Differences
This shows you the differences between the selected revisions of the page.
library_example_find_duplicate_files 2016-09-05 | library_example_find_duplicate_files 2022-06-16 (current) | ||
Line 6: | Line 6: | ||
You can install this script as a [[extension|WinSCP extension]] by using this page URL in the //[[ui_pref_commands#extensions|Add Extension]]// command. If you known that the server supports a [[protocols|protocol extension for calculating checksums]], you can improve the extension efficiency by [[#options|configuring it]] to ask the server for the checksum, sparing the file download. | You can install this script as a [[extension|WinSCP extension]] by using this page URL in the //[[ui_pref_commands#extensions|Add Extension]]// command. If you known that the server supports a [[protocols|protocol extension for calculating checksums]], you can improve the extension efficiency by [[#options|configuring it]] to ask the server for the checksum, sparing the file download. | ||
+ | |||
+ | ~~AD~~ | ||
To run the script manually use: | To run the script manually use: | ||
<code batch> | <code batch> | ||
- | powershell.exe -File C:\path\FindDuplicates.ps1 -remotePath "/path" -remoteChecksumAlg sha-1 | + | powershell.exe -File C:\path\FindDuplicates.ps1 -sessionUrl "sftp://user:password;fingerprint=ssh-rsa-xxxxxxxxxxx...@example.com/" -remotePath "/path" -remoteChecksumAlg sha-1 |
</code> | </code> | ||
<code powershell - FindDuplicates.ps1> | <code powershell - FindDuplicates.ps1> | ||
# @name Find &Duplicates... | # @name Find &Duplicates... | ||
- | # @command powershell.exe -ExecutionPolicy Bypass -File "%EXTENSION_PATH%" -sessionUrl "!S" -remotePath "!/" -pause -remoteChecksumAlg "%RemoteChecksumAlg%" -sessionLogPath "%SessionLogPath%" | + | # @command powershell.exe -ExecutionPolicy Bypass -File "%EXTENSION_PATH%" ^ |
+ | # -sessionUrl "!E" -remotePath "!/" -pause ^ | ||
+ | # -remoteChecksumAlg "%RemoteChecksumAlg%" -sessionLogPath "%SessionLogPath%" | ||
# @description Searches for duplicate files on the server, starting from the current directory | # @description Searches for duplicate files on the server, starting from the current directory | ||
# @flag RemoteFiles | # @flag RemoteFiles | ||
- | # @version 2 | + | # @version 8 |
# @homepage ~~SELF~~ | # @homepage ~~SELF~~ | ||
- | # @require WinSCP 5.8.4 | + | # @require WinSCP 5.16 |
- | # @option RemoteChecksumAlg -config -run combobox "&Checksum:" "local" "local=Local sha-1" "sha1=Remote sha-1" "sha256=Remote sha-256" "md5=Remote md5" | + | # @option RemoteChecksumAlg -config -run combobox "&Checksum:" "local" ^ |
+ | # "local=Local sha-1" "sha1=Remote sha-1" "sha256=Remote sha-256" ^ | ||
+ | # "md5=Remote md5" | ||
# @option SessionLogPath -config sessionlogfile | # @option SessionLogPath -config sessionlogfile | ||
# @optionspage ~~SELF~~#options | # @optionspage ~~SELF~~#options | ||
+ | · | ||
param ( | param ( | ||
- | # Use Generate URL function to obtain a value for -sessionUrl parameter. | + | # Use Generate Session URL function to obtain a value for -sessionUrl parameter. |
- | $sessionUrl = "sftp://user:mypassword;fingerprint=ssh-rsa-xx-xx-xx@example.com/", | + | $sessionUrl = "sftp://user:mypassword;fingerprint=ssh-rsa-xxxxxxxxxxx...@example.com/", |
- | [Parameter(Mandatory)] | + | [Parameter(Mandatory = $True)] |
$remotePath, | $remotePath, | ||
$remoteChecksumAlg = $Null, | $remoteChecksumAlg = $Null, | ||
$sessionLogPath = $Null, | $sessionLogPath = $Null, | ||
[Switch] | [Switch] | ||
- | $pause = $False | + | $pause |
) | ) | ||
+ | · | ||
function FileChecksum ($remotePath) | function FileChecksum ($remotePath) | ||
{ | { | ||
Line 42: | Line 48: | ||
if (!$remoteChecksumAlg -or ($remoteChecksumAlg -eq "local")) | if (!$remoteChecksumAlg -or ($remoteChecksumAlg -eq "local")) | ||
{ | { | ||
- | Write-Host ("Downloading file {0}..." -f $remotePath) | + | Write-Host "Downloading file $remotePath..." |
# Download file | # Download file | ||
$localPath = [System.IO.Path]::GetTempFileName() | $localPath = [System.IO.Path]::GetTempFileName() | ||
$transferResult = $session.GetFiles($remotePath, $localPath) | $transferResult = $session.GetFiles($remotePath, $localPath) | ||
+ | · | ||
if ($transferResult.IsSuccess) | if ($transferResult.IsSuccess) | ||
{ | { | ||
Line 53: | Line 59: | ||
$stream.Dispose() | $stream.Dispose() | ||
- | Write-Host ("Downloaded file {0} checksum is {1}" -f $remotePath, $checksum) | + | Write-Host "Downloaded file $remotePath checksum is $checksum" |
+ | |||
Remove-Item $localPath | Remove-Item $localPath | ||
} | } | ||
else | else | ||
{ | { | ||
- | Write-Host ("Error downloading file {0}: {1}" -f $remotePath, $transferResult.Failures[0]) | + | Write-Host ( |
+ | ····················"Error downloading file ${remotePath}: $($transferResult.Failures[0])") | ||
$checksum = $False | $checksum = $False | ||
} | } | ||
Line 65: | Line 72: | ||
else | else | ||
{ | { | ||
- | Write-Host ("Request checksum for file {0}..." -f $remotePath) | + | Write-Host "Request checksum for file $remotePath..." |
- | $checksum = [System.BitConverter]::ToString($session.CalculateFileChecksum($remoteChecksumAlg, $remotePath)) | + | ···········$buf = $session.CalculateFileChecksum($remoteChecksumAlg, $remotePath) |
- | Write-Host ("File {0} checksum is {1}" -f $remotePath, $checksum) | + | $checksum = [System.BitConverter]::ToString($buf) |
+ | Write-Host "File $remotePath checksum is $checksum" | ||
} | } | ||
+ | · | ||
$checksums[$remotePath] = $checksum | $checksums[$remotePath] = $checksum | ||
} | } | ||
+ | · | ||
return $checksums[$remotePath] | return $checksums[$remotePath] | ||
- | } | ||
- | |||
- | function FindDuplicatesInDirectory ($remotePath) | ||
- | { | ||
- | Write-Host ("Finding duplicates in directory {0} ..." -f $remotePath) | ||
- | |||
- | try | ||
- | { | ||
- | $directoryInfo = $session.ListDirectory($remotePath) | ||
- | |||
- | foreach ($fileInfo in $directoryInfo.Files) | ||
- | { | ||
- | $remoteFilePath = $session.CombinePaths($remotePath, $fileInfo.Name) | ||
- | |||
- | if ($fileInfo.IsDirectory) | ||
- | { | ||
- | # Skip references to current and parent directories | ||
- | if (($fileInfo.Name -ne ".") -and | ||
- | ($fileInfo.Name -ne "..")) | ||
- | { | ||
- | # Recurse into subdirectories | ||
- | FindDuplicatesInDirectory $remoteFilePath | ||
- | } | ||
- | } | ||
- | else | ||
- | { | ||
- | Write-Host ("Found file {0} with size {1}" -f $remoteFilePath, $fileInfo.Length) | ||
- | |||
- | if ($sizes.ContainsKey($fileInfo.Length)) | ||
- | { | ||
- | $checksum = FileChecksum($remoteFilePath) | ||
- | |||
- | foreach ($otherFilePath in $sizes[$fileInfo.Length]) | ||
- | { | ||
- | $otherChecksum = FileChecksum($otherFilePath) | ||
- | |||
- | if ($checksum -eq $otherChecksum) | ||
- | { | ||
- | Write-Host ("Checksums of files {0} and {1} are identical" -f $remoteFilePath, $otherFilePath) | ||
- | $duplicates[$remoteFilePath] = $otherFilePath | ||
- | } | ||
- | } | ||
- | } | ||
- | else | ||
- | { | ||
- | $sizes[$fileInfo.Length] = @() | ||
- | } | ||
- | |||
- | $sizes[$fileInfo.Length] += $remoteFilePath | ||
- | } | ||
- | } | ||
- | } | ||
- | catch [Exception] | ||
- | { | ||
- | Write-Host ("Error processing directory {0}: {1}" -f $remotePath, $_.Exception.Message) | ||
- | } | ||
} | } | ||
Line 147: | Line 99: | ||
{ | { | ||
$session.SessionLogPath = $sessionLogPath | $session.SessionLogPath = $sessionLogPath | ||
- | + | · | |
- | # Connect | + | Write-Host "Connecting..." |
$session.Open($sessionOptions) | $session.Open($sessionOptions) | ||
+ | # Handle errors when enumerating the files | ||
+ | $session.add_Failed( { | ||
+ | Write-Host "Error: $($_.Error.Message)" | ||
+ | } ) | ||
+ | |||
$sizes = @{} | $sizes = @{} | ||
$checksums = @{} | $checksums = @{} | ||
Line 156: | Line 113: | ||
$sha1 = [System.Security.Cryptography.SHA1]::Create() | $sha1 = [System.Security.Cryptography.SHA1]::Create() | ||
+ | |||
+ | $files = | ||
+ | $session.EnumerateRemoteFiles( | ||
+ | $remotePath, "*", [WinSCP.EnumerationOptions]::AllDirectories) | ||
+ | |||
+ | foreach ($fileInfo in $files) | ||
+ | { | ||
+ | Write-Host "Found file $($fileInfo.FullName) with size $($fileInfo.Length)" | ||
+ | |||
+ | if ($sizes.ContainsKey($fileInfo.Length)) | ||
+ | { | ||
+ | $checksum = FileChecksum($fileInfo.FullName) | ||
+ | |||
+ | foreach ($otherFilePath in $sizes[$fileInfo.Length]) | ||
+ | { | ||
+ | $otherChecksum = FileChecksum($otherFilePath) | ||
- | ········# Start recursion | + | ····················if ($checksum -eq $otherChecksum) |
- | ·······FindDuplicatesInDirectory $remotePath | + | ···················{ |
+ | Write-Host ( | ||
+ | "Checksums of files $($fileInfo.FullName) and " + | ||
+ | "$otherFilePath are identical") | ||
+ | $duplicates[$fileInfo.FullName] = $otherFilePath | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | $sizes[$fileInfo.Length] = @() | ||
+ | } | ||
+ | |||
+ | $sizes[$fileInfo.Length] += $fileInfo.FullName | ||
+ | } | ||
} | } | ||
finally | finally | ||
Line 165: | Line 152: | ||
$session.Dispose() | $session.Dispose() | ||
} | } | ||
+ | · | ||
# Print results | # Print results | ||
Write-Host | Write-Host | ||
+ | · | ||
if ($duplicates.Count -gt 0) | if ($duplicates.Count -gt 0) | ||
{ | { | ||
Write-Host "Duplicates found:" | Write-Host "Duplicates found:" | ||
+ | · | ||
foreach ($path1 in $duplicates.Keys) | foreach ($path1 in $duplicates.Keys) | ||
{ | { | ||
- | Write-Host ("{0} <=> {1}" -f $path1, $duplicates[$path1]) | + | Write-Host "$path1 <=> $($duplicates[$path1])" |
} | } | ||
} | } | ||
Line 182: | Line 169: | ||
Write-Host "No duplicates found." | Write-Host "No duplicates found." | ||
} | } | ||
+ | · | ||
$result = 0 | $result = 0 | ||
} | } | ||
- | catch [Exception] | + | catch |
{ | { | ||
- | Write-Host ("Error: {0}" -f $_.Exception.Message) | + | Write-Host "Error: $($_.Exception.Message)" |
$result = 1 | $result = 1 | ||
} | } | ||
Line 197: | Line 184: | ||
[System.Console]::ReadKey() | Out-Null | [System.Console]::ReadKey() | Out-Null | ||
} | } | ||
+ | · | ||
exit $result | exit $result | ||
</code> | </code> | ||
- | ===== Options ===== | + | ===== [[options]] Options ===== |
- | The //Checksum// selection allows you to choose, what checksum algorithm to use and is the checksum is to be calculated locally or remotely. Select the //Local sha-1// to calculate SHA-1 checksum locally. This is an universal option that will work with any server, but WinSCP will need to download all candidate files locally. If you know that the server supports [[protocols|a protocol extension for calculating checksums]], you can improve the extension efficiency by selecting a remote calculation. The list contains some common algorithms that some servers support. However you can type in name of any other algorithm supported by the server. | + | |
- | In the //Session log file// you can specify a path to a [[logging|session log file]]. The option is available on the [[ui_pref_commands|Preferences dialog]] only. | + | &screenshotpict(extension_find_duplicate_files) |
- | + | ||
+ | The //Checksum// selection allows you to choose, what checksum algorithm to use and if the checksum is to be calculated locally or remotely. Select the //Local sha-1// to calculate SHA-1 checksum locally. This is an universal option that will work with any server, but WinSCP will need to download all candidate files locally. If you know that the server supports [[protocols|a protocol extension for calculating checksums]], you can improve the extension efficiency by selecting a remote calculation. The list contains some common algorithms that some servers support. However you can type in name of any other algorithm supported by the server. | ||
+ | |||
+ | In the //Session log file//, you can specify a path to a [[logging|session log file]]. The option is available on the [[ui_pref_commands|Preferences dialog]] only. | ||
+ | |||
+ | In the //Keyboard shortcut//, you can specify a [[custom_key_shortcuts|keyboard shortcut]] for the extension. The option is available on the [[ui_pref_commands|Preferences dialog]] only. |