This is an old revision of the document!
Automating download in parallel connections over SFTP/FTP protocol
C#
The example opens by default three parallel connections and uses them to download remote file tree to local folder in parallel.
Advertisement
using System; using System.Collections.Generic; using System.IO; using System.Threading.Tasks; using WinSCP; class Example { public static int Main() { try { // Setup session options SessionOptions sessionOptions = new SessionOptions { Protocol = Protocol.Sftp, HostName = "example.com", UserName = "user", Password = "mypassword", SshHostKeyFingerprint = "ssh-rsa 2048 xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx" }; const string localPath = "C:\\local\\path"; const string remotePath = "/remote/path"; const int batches = 3; DateTime started = DateTime.Now; int count = 0; Int64 bytes = 0; using (Session session = new Session()) { Console.WriteLine("Connecting..."); session.Open(sessionOptions); Console.WriteLine("Starting files enumeration..."); IEnumerable<RemoteFileInfo> files = session.EnumerateRemoteFiles(remotePath, null, EnumerationOptions.AllDirectories); IEnumerator<RemoteFileInfo> filesEnumerator = files.GetEnumerator(); List<Task> tasks = new List<Task>(); for (int i = 1; i <= batches; i++) { int no = i; Task task = new Task(() => { using (Session downloadSession = new Session()) { Console.WriteLine("Starting download {0}...", no); downloadSession.Open(sessionOptions); while (true) { string remoteFilePath; lock (filesEnumerator) { if (!filesEnumerator.MoveNext()) { break; } RemoteFileInfo file = filesEnumerator.Current; bytes += file.Length; count++; remoteFilePath = file.FullName; } string localFilePath = session.TranslateRemotePathToLocal(remoteFilePath, remotePath, localPath); Console.WriteLine("Downloading {0} to {1} in {2}...", remoteFilePath, localFilePath, no); Directory.CreateDirectory(Path.GetDirectoryName(localFilePath)); downloadSession.GetFiles(session.EscapeFileMask(remoteFilePath), localFilePath).Check(); } Console.WriteLine("Download {0} done", no); } }); tasks.Add(task); task.Start(); } Console.WriteLine("Waiting for downloads to complete..."); Task.WaitAll(tasks.ToArray()); } Console.WriteLine("Done"); DateTime ended = DateTime.Now; Console.WriteLine("Took {0}", (ended - started)); Console.WriteLine("Downloaded {0} files, totaling {1:N0} bytes", count, bytes); return 0; } catch (Exception e) { Console.WriteLine("Error: {0}", e); return 1; } } }
Advertisement
PowerShell
The code is not equivalent to the C# example above. The PowerShell code does not download subdirectories. It also split the files to batches by their count only, instead of using a queue like the C# code.
param ( $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xx-xx-xx@example.com/", $remotePath = "/home/user/", $localPath = "c:\downloaded\", $batches = 3 ) try { $dllPath = (Join-Path $PSScriptRoot "WinSCPnet.dll") # Load WinSCP .NET assembly Add-Type -Path $dllPath # Setup session options $sessionOptions = New-Object WinSCP.SessionOptions $sessionOptions.ParseUrl($sessionUrl) $started = Get-Date try { # Connect Write-Host "Connecting..." $session = New-Object WinSCP.Session $session.Open($sessionOptions) # Retrieve list of files and sort them from larges to smallest [array]$files = $session.ListDirectory($remotePath).Files | Where-Object { -Not $_.IsDirectory } | Sort-Object Length -Descending # Calculate total size of all files $total = ($files | Measure-Object -Property Length -Sum).Sum # And batch size $batch = [int]($total / $batches) Write-Host "Will download $($files.Count) files totaling $total bytes in $batches parallel batches, $batch bytes on average in each" $start = 0 $sum = 0 $no = 0 for ($i = 0; $i -lt $files.Count; $i++) { $sum += $files[$i].Length # Found enough files for the next batch if (($sum -ge $batch) -or ($i -eq $files.Count - 1)) { Write-Host "Starting batch $no to download $($i - $start + 1) files totaling $sum" $fileList = $files[$start..$i] -join ";" # Start the background job for the batch Start-Job -Name "Batch $no" -ArgumentList $dllPath, $sessionUrl, $remotePath, $localPath, $no, $fileList { param ( [Parameter(Position = 0)] $dllPath, [Parameter(Position = 1)] $sessionUrl, [Parameter(Position = 2)] $remotePath, [Parameter(Position = 3)] $localPath, [Parameter(Position = 4)] $no, [Parameter(Position = 5)] $fileList ) try { Write-Host "Starting batch $no" # Load WinSCP .NET assembly. # Need to use an absolute path as the Job is started from user's documents folder. Add-Type -Path $dllPath # Setup session options $sessionOptions = New-Object WinSCP.SessionOptions $sessionOptions.ParseUrl($sessionUrl) try { Write-Host "Connecting batch $no..." $session = New-Object WinSCP.Session $session.Open($sessionOptions) $files = $fileList -split ";" # Download the files selected for this batch foreach ($file in $files) { $remoteFilePath = "$remotePath/$file" $localFilePath = "$localPath\$file" Write-Host "Downloading $remoteFilePath to $localFilePath in $no" $session.GetFiles($session.EscapeFileMask($remoteFilePath), $localFilePath).Check() } } finally { # Disconnect, clean up $session.Dispose() } Write-Host "Batch $no done" } catch [Exception] { Write-Host "Error: $($_.Exception.Message)" exit 1 } } | Out-Null # Reset for the next batch $no++ $sum = 0 $start = $i + 1 } } Write-Host "Waiting for batches to complete" Get-Job | Receive-Job -Wait Write-Host "Done" $ended = Get-Date Write-Host "Took $(New-TimeSpan -Start $started -End $ended)" } finally { # Disconnect, clean up $session.Dispose() } exit 0 } catch [Exception] { Write-Host "Error: $($_.Exception.Message)" exit 1 }
Advertisement