This is an old revision of the document!

Automating download in parallel connections over SFTP/FTP protocol

C#

The example opens by default three parallel connections and uses them to download remote file tree to local folder in parallel.

Advertisement

using System;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using WinSCP;
 
class Example
{
    public static int Main()
    {
        try
        {
            // Setup session options
            SessionOptions sessionOptions = new SessionOptions
            {
                Protocol = Protocol.Sftp,
                HostName = "example.com",
                UserName = "user",
                Password = "mypassword",
                SshHostKeyFingerprint = "ssh-rsa 2048 xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx"
            };
 
            const string localPath = "C:\\local\\path";
            const string remotePath = "/remote/path";
            const int batches = 3;
 
            DateTime started = DateTime.Now;
            int count = 0;
            Int64 bytes = 0;
 
            using (Session session = new Session())
            {
                Console.WriteLine("Connecting...");
                session.Open(sessionOptions);
 
                Console.WriteLine("Starting files enumeration...");
                IEnumerable<RemoteFileInfo> files = session.EnumerateRemoteFiles(remotePath, null, EnumerationOptions.AllDirectories);
                IEnumerator<RemoteFileInfo> filesEnumerator = files.GetEnumerator();
 
                List<Task> tasks = new List<Task>();
 
                for (int i = 1; i <= batches; i++)
                {
                    int no = i;
 
                    Task task = new Task(() =>
                        {
                            using (Session downloadSession = new Session())
                            {
                                Console.WriteLine("Starting download {0}...", no);
                                downloadSession.Open(sessionOptions);
 
                                while (true)
                                {
                                    string remoteFilePath;
                                    lock (filesEnumerator)
                                    {
                                        if (!filesEnumerator.MoveNext())
                                        {
                                            break;
                                        }
 
                                        RemoteFileInfo file = filesEnumerator.Current;
                                        bytes += file.Length;
                                        count++;
                                        remoteFilePath = file.FullName;
                                    }
 
                                    string localFilePath = session.TranslateRemotePathToLocal(remoteFilePath, remotePath, localPath);
                                    Console.WriteLine("Downloading {0} to {1} in {2}...", remoteFilePath, localFilePath, no);
                                    Directory.CreateDirectory(Path.GetDirectoryName(localFilePath));
                                    downloadSession.GetFiles(session.EscapeFileMask(remoteFilePath), localFilePath).Check();
                                }
 
                                Console.WriteLine("Download {0} done", no);
                            }
 
                        });
 
                    tasks.Add(task);
                    task.Start();
                }
 
                Console.WriteLine("Waiting for downloads to complete...");
                Task.WaitAll(tasks.ToArray());
            }
 
            Console.WriteLine("Done");
 
            DateTime ended = DateTime.Now;
            Console.WriteLine("Took {0}", (ended - started));
            Console.WriteLine("Downloaded {0} files, totaling {1:N0} bytes", count, bytes);
 
            return 0;
        }
        catch (Exception e)
        {
            Console.WriteLine("Error: {0}", e);
            return 1;
        }
    }
}

Advertisement

PowerShell

The code is not equivalent to the C# example above. The PowerShell code does not download subdirectories. It also split the files to batches by their count only, instead of using a queue like the C# code.

param (
    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xx-xx-xx@example.com/",
    $remotePath = "/home/user/",
    $localPath = "c:\downloaded\",
    $batches = 3
)
 
try
{
    $dllPath = (Join-Path $PSScriptRoot "WinSCPnet.dll")
    # Load WinSCP .NET assembly
    Add-Type -Path $dllPath
 
    # Setup session options
    $sessionOptions = New-Object WinSCP.SessionOptions
    $sessionOptions.ParseUrl($sessionUrl)
 
    $started = Get-Date
 
    try
    {
        # Connect
        Write-Host "Connecting..."
        $session = New-Object WinSCP.Session
        $session.Open($sessionOptions)
        
        # Retrieve list of files and sort them from larges to smallest
        [array]$files =
            $session.ListDirectory($remotePath).Files |
            Where-Object { -Not $_.IsDirectory } |
            Sort-Object Length -Descending
 
        # Calculate total size of all files
        $total = ($files | Measure-Object -Property Length -Sum).Sum
        
        # And batch size
        $batch = [int]($total / $batches)
 
        Write-Host "Will download $($files.Count) files totaling $total bytes in $batches parallel batches, $batch bytes on average in each"
        
        $start = 0
        $sum = 0
        $no = 0
 
        for ($i = 0; $i -lt $files.Count; $i++)
        {
            $sum += $files[$i].Length
 
            # Found enough files for the next batch
            if (($sum -ge $batch) -or ($i -eq $files.Count - 1))
            {
                Write-Host "Starting batch $no to download $($i - $start + 1) files totaling $sum"
                
                $fileList = $files[$start..$i] -join ";"
                
                # Start the background job for the batch
                Start-Job -Name "Batch $no" -ArgumentList $dllPath, $sessionUrl, $remotePath, $localPath, $no, $fileList {
                    param (
                        [Parameter(Position = 0)]
                        $dllPath,
                        [Parameter(Position = 1)]
                        $sessionUrl,
                        [Parameter(Position = 2)]
                        $remotePath,
                        [Parameter(Position = 3)]
                        $localPath,
                        [Parameter(Position = 4)]
                        $no,
                        [Parameter(Position = 5)]
                        $fileList
                    )
 
                    try
                    {
                        Write-Host "Starting batch $no"
 
                        # Load WinSCP .NET assembly.
                        # Need to use an absolute path as the Job is started from user's documents folder.
                        Add-Type -Path $dllPath
 
                        # Setup session options
                        $sessionOptions = New-Object WinSCP.SessionOptions
                        $sessionOptions.ParseUrl($sessionUrl)
                        
                        try
                        {
                            Write-Host "Connecting batch $no..."
                            $session = New-Object WinSCP.Session
 
                            $session.Open($sessionOptions)
                            
                            $files = $fileList -split ";"
 
                            # Download the files selected for this batch
                            foreach ($file in $files)
                            {
                                $remoteFilePath = "$remotePath/$file"
                                $localFilePath = "$localPath\$file"
                                Write-Host "Downloading $remoteFilePath to $localFilePath in $no"
 
                                $session.GetFiles($session.EscapeFileMask($remoteFilePath), $localFilePath).Check()
                            }
                        }
                        finally
                        {
                            # Disconnect, clean up
                            $session.Dispose()
                        }
                        
                        Write-Host "Batch $no done"
                    }
                    catch [Exception]
                    {
                        Write-Host "Error: $($_.Exception.Message)"
                        exit 1
                    }
                } | Out-Null
 
                # Reset for the next batch
                $no++
                $sum = 0
                $start = $i + 1
            }
        }
 
        Write-Host "Waiting for batches to complete"
        Get-Job | Receive-Job -Wait
 
        Write-Host "Done"
 
        $ended = Get-Date
        Write-Host "Took $(New-TimeSpan -Start $started -End $ended)"
    }
    finally
    {
        # Disconnect, clean up
        $session.Dispose()
    }
 
    exit 0
}
catch [Exception]
{
    Write-Host "Error: $($_.Exception.Message)"
    exit 1
}

Advertisement

Last modified: by martin