Differences

This shows you the differences between the selected revisions of the page.

library_example_parallel_transfers 2016-06-30 library_example_parallel_transfers 2023-11-27 (current)
Line 1: Line 1:
-====== Automating download in parallel connections over SFTP/FTP protocol ======+====== Automating transfers or synchronization in parallel connections over SFTP/FTP protocol ======
-The following example uses [[library|WinSCP .NET assembly]] from a [[library_powershell|PowerShell]] script. If you have another preferred language, you can easily translate it.+===== Download =====
-The example opens by default three parallel connections and evenly splits the download of files from specified remote directory among these.+==== [[download_csharp]] C# ==== 
 + 
 +The example opens by default three parallel connections and uses them to download remote file tree to local folder in parallel. 
 + 
 +<code csharp> 
 +using System; 
 +using System.Collections.Generic; 
 +using System.IO; 
 +using System.Threading.Tasks; 
 +using WinSCP; 
 + 
 +class Example 
 +
 +    public static int Main() 
 +    { 
 +        try 
 +        { 
 +            // Setup session options 
 +            var sessionOptions = new SessionOptions 
 +            { 
 +                Protocol = Protocol.Sftp, 
 +                HostName = "example.com", 
 +                UserName = "user", 
 +                Password = "mypassword", 
 +                SshHostKeyFingerprint = "ssh-rsa 2048 xxxxxxxxxxx..." 
 +            }; 
 + 
 +            const string localPath = @"C:\local\path"; 
 +            const string remotePath = "/remote/path"; 
 +            const int batches = 3; 
 + 
 +            var started = DateTime.Now; 
 +            int count = 0; 
 +            long bytes = 0; 
 + 
 +            using (var session = new Session()) 
 +            { 
 +                Console.WriteLine("Connecting..."); 
 +                session.Open(sessionOptions); 
 + 
 +                Console.WriteLine("Starting files enumeration..."); 
 +                var opts = WinSCP.EnumerationOptions.AllDirectories; 
 +                IEnumerable<RemoteFileInfo> files = 
 +                    session.EnumerateRemoteFiles(remotePath, null, opts); 
 +                IEnumerator<RemoteFileInfo> filesEnumerator = files.GetEnumerator(); 
 + 
 +                var tasks = new List<Task>(); 
 + 
 +                for (int i = 1; i <= batches; i++) 
 +                { 
 +                    int no = i; 
 + 
 +                    var task = new Task(() => 
 +                    { 
 +                        using (var downloadSession = new Session()) 
 +                        { 
 +                            Console.WriteLine($"Starting download {no}..."); 
 +                            downloadSession.Open(sessionOptions); 
 + 
 +                            while (true) 
 +                            { 
 +                                string remoteFilePath; 
 +                                lock (filesEnumerator) 
 +                                { 
 +                                    if (!filesEnumerator.MoveNext()) 
 +                                    { 
 +                                        break; 
 +                                    } 
 + 
 +                                    RemoteFileInfo file = filesEnumerator.Current; 
 +                                    bytes += file.Length; 
 +                                    count++; 
 +                                    remoteFilePath = file.FullName; 
 +                                } 
 + 
 +                                string localFilePath = 
 +                                    RemotePath.TranslateRemotePathToLocal( 
 +                                        remoteFilePath, remotePath, localPath); 
 +                                Console.WriteLine( 
 +                                    $"Downloading {remoteFilePath} to {localFilePath} in {no}..."); 
 +                                string localFileDir = Path.GetDirectoryName(localFilePath); 
 +                                Directory.CreateDirectory(localFileDir); 
 +                                downloadSession.GetFileToDirectory(remoteFilePath, localFileDir); 
 +                            } 
 + 
 +                            Console.WriteLine($"Download {no} done"); 
 +                        } 
 +                    }); 
 + 
 +                    tasks.Add(task); 
 +                    task.Start(); 
 +                } 
 + 
 +                Console.WriteLine("Waiting for downloads to complete..."); 
 +                Task.WaitAll(tasks.ToArray()); 
 +            } 
 + 
 +            Console.WriteLine("Done"); 
 + 
 +            var ended = DateTime.Now; 
 +            Console.WriteLine($"Took {ended - started}"); 
 +            Console.WriteLine($"Downloaded {count} files, totaling {bytes:N0} bytes"); 
 + 
 +            return 0; 
 +        } 
 +        catch (Exception e) 
 +        { 
 +            Console.WriteLine($"Error: {e}"); 
 +            return 1; 
 +        } 
 +    } 
 +
 +</code> 
 + 
 +==== [[powershell]] PowerShell ==== 
 + 
 +The following code uses [[ps>threadjob/start-threadjob|''Start-ThreadJob'' cmdlet]] from ''ThreadJob'' module. It is a part of PowerShell 6 and newer. In PowerShell 5, it can be installed using ''Install-Module ThreadJob''.
<code powershell> <code powershell>
param ( param (
-    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xx-xx-xx@example.com/", +    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xxxxxxxxxxx...@example.com/", 
-    $remotePath = "/home/user/", +    $remotePath = "/remote/path/", 
-    $localPath = "c:\downloaded\",+    $localPath = "c:\local\path\",
    $batches = 3     $batches = 3
) )
 +·
try try
{ {
-    $dllPath = (Join-Path $PSScriptRoot "WinSCPnet.dll")+    $assemblyFilePath = "WinSCPnet.dll"
    # Load WinSCP .NET assembly     # Load WinSCP .NET assembly
-    Add-Type -Path $dllPath +    Add-Type -Path $assemblyFilePath 
 +·
    # Setup session options     # Setup session options
    $sessionOptions = New-Object WinSCP.SessionOptions     $sessionOptions = New-Object WinSCP.SessionOptions
    $sessionOptions.ParseUrl($sessionUrl)     $sessionOptions.ParseUrl($sessionUrl)
 +·
    $started = Get-Date     $started = Get-Date
 +    # Plain variables cannot be modified in job threads 
 +    $stats = @{ 
 +        count = 0 
 +        bytes = [long]0 
 +    } 
 +·
    try     try
    {     {
Line 32: Line 153:
        $session.Open($sessionOptions)         $session.Open($sessionOptions)
               
-        # Retrieve list of files and sort them from larges to smallest +        Write-Host "Starting files enumeration...
-········[array]$files = +········$files = 
-            $session.ListDirectory($remotePath).Files | +            $session.EnumerateRemoteFiles( 
- ···········Where-Object { -Not $_.IsDirectory } | +················$remotePath, $Null, [WinSCP.EnumerationOptions]::AllDirectories) 
-            Sort-Object Length -Descending+       $filesEnumerator = $files.GetEnumerator() 
 +  
 +········for ($i = 1; $i -le $batches; $i++) 
 +       { 
 +            Start-ThreadJob -Name "Batch $i" -ArgumentList $i { 
 +                param ($no)
-········# Calculate total size of all files +················try 
- ·······$total = ($files | Measure-Object -Property Length -Sum).Sum + ···············{ 
-        + ···················Write-Host &quot;Starting download $no...&quot;
-       # And batch size +
-········$batch = [int]($total / $batches)+
-········Write-Host ("Will download {0} files totaling {1} bytes in {2} parallel batches, {3} bytes on average in each" -f $files.Count, $total, $batches, $batch) +····················$downloadSession = New-Object WinSCP.Session 
-         + ···················$downloadSession.Open($using:sessionOptions)
-        $start = 0 +
-        $sum = 0 +
-        $no = 0+
-········for ($i = 0; $i -lt $files.Count; $i++)+····················while ($True) 
 +                    { 
 +                        [System.Threading.Monitor]::Enter($using:filesEnumerator) 
 +                        try 
 +                        { 
 +                            if (!($using:filesEnumerator).MoveNext()) 
 +                            { 
 +                                break 
 +                            } 
 + 
 +                            $file = ($using:filesEnumerator).Current 
 +                            ($using:stats).bytes += $file.Length 
 +                            ($using:stats).count++ 
 +                            $remoteFilePath = $file.FullName 
 +                        } 
 +                        finally 
 +                        { 
 +                            [System.Threading.Monitor]::<;nohilite>Exit</nohilite>($using:filesEnumerator) 
 +                       } 
 + 
 +························$localFilePath
 +                            [WinSCP.RemotePath]::TranslateRemotePathToLocal( 
 +                                $remoteFilePath, $using:remotePath, $using:localPath) 
 +                        Write-Host "Downloading $remoteFilePath to $localFilePath in $no..."; 
 + ·······················$localFileDir = (Split-Path -Parent $localFilePath) 
 +                        New-Item -ItemType directory -Path $localFileDir -Force | Out-Null 
 +                        $downloadSession.GetFileToDirectory($remoteFilePath, $localFileDir) | 
 +                            Out-Null 
 +                    } 
 + 
 +                    Write-Host "Download $no done" 
 +                } 
 +                finally 
 +                { 
 +                    $downloadSession.Dispose() 
 +                } 
 +            } | Out-Null 
 +        } 
 + 
 +        Write-Host "Waiting for downloads to complete..." 
 +        Get-Job | Receive-Job -Wait -ErrorAction Stop 
 +  
 +        Write-Host "Done" 
 +  
 +        $ended = Get-Date 
 +        Write-Host "Took $(New-TimeSpan -Start $started -End $ended)" 
 +        Write-Host ("Downloaded $($stats.count) files, " + 
 +                    "totaling $($stats.bytes.ToString("N0")) bytes") 
 +    } 
 +    finally 
 +    { 
 +        # Disconnect, clean up 
 +        $session.Dispose() 
 +    } 
 +  
 +    exit 0 
 +
 +catch 
 +
 +    Write-Host "Error: $($_.Exception.Message)" 
 +    exit 1 
 +
 +</code> 
 + 
 +===== Upload ===== 
 + 
 +==== [[upload_csharp]] C# ==== 
 + 
 +The example opens by default three parallel connections and uses them to upload locale file tree to remote folder in parallel. 
 + 
 +<code csharp> 
 +using System; 
 +using System.Collections.Generic; 
 +using System.IO; 
 +using System.Threading.Tasks; 
 +using WinSCP; 
 + 
 +class Example 
 +
 +    static int Main() 
 +    { 
 +        try
        {         {
-            $sum += $files[$i].Length+            // Setup session options 
 +            SessionOptions sessionOptions = new SessionOptions 
 +············{ 
 +················Protocol = Protocol.Sftp, 
 +                HostName = "example.com", 
 +                UserName = "user", 
 +                Password = "password", 
 +                SshHostKeyFingerprint = "ssh-rsa 2048 xxxxxxxxxxx..." 
 +            };
-            # Found enough files for the next batch +            const string localPath = @"C:\local\path"; 
-            if (($sum -ge $batch) -or ($i -eq $files.Count - 1))+           const string remotePath = "/remote/path"; 
 +            const int batches = 3; 
 + 
 + ···········DateTime started = DateTime.Now; 
 +            int count = 0; 
 +            Int64 bytes = 0; 
 + 
 +            Console.WriteLine("Starting files enumeration...")
 + ···········IEnumerable<string> files = 
 +                Directory.EnumerateFiles(localPath, "*.*", SearchOption.AllDirectories); 
 +            IEnumerator<string> filesEnumerator = files.GetEnumerator(); 
 + 
 +           List<Task> tasks = new List<Task>()
 + 
 +            HashSet<string> existingRemotePaths = new HashSet<string>(); 
 + 
 +            for (int i = 1; i <= batches; i++)
            {             {
-                Write-Host ("Starting batch {0} to download {1} files totaling {2}" -f $no, ($i - $start + 1), $sum) +                int no = i;
-                 +
-                $fileList = $files[$start..$i] -join ";;" +
-                 +
-                # Start the background job for the batch +
-                Start-Job -Name "Batch $no" -ArgumentList $dllPath, $sessionUrl, $remotePath, $localPath, $no, $fileList { +
-                    param ( +
-                        [Parameter(Position = 0)] +
-                        $dllPath, +
-                        [Parameter(Position = 1)] +
-                        $sessionUrl, +
-                        [Parameter(Position = 2)] +
-                        $remotePath, +
-                        [Parameter(Position = 3)] +
-                        $localPath, +
-                        [Parameter(Position = 4)] +
-                        $no, +
-                        [Parameter(Position = 5)] +
-                        $fileList +
-                    )+
-····················try+················Task task = new Task(() => 
 +                { 
 +                    using (Session uploadSession = new Session())
                    {                     {
-                        Write-Host (&quot;Starting batch {0}&quot; -f $no)+                        while (true
 + ·······················{ 
 +                            string localFilePath; 
 +                           lock (filesEnumerator) 
 +····························{ 
 + ·······························if (!filesEnumerator.MoveNext()
 +                                { 
 +                                    break; 
 +                                }
-························# Load WinSCP .NET assembly. +································localFilePath = filesEnumerator.Current; 
- ·······················# Need to use an absolute path as the Job is started from user's documents folder+ ·······························bytes += new FileInfo(localFilePath).Length; 
- ·······················Add-Type -Path $dllPath+ ·······························count++
 + ···························}
-························# Setup session options +····························if (!uploadSession.Opened
-························$sessionOptions = New-Object WinSCP.SessionOptions + ···························{ 
-                        $sessionOptions.ParseUrl($sessionUrl+ ·······························Console.WriteLine("Starting upload {0}...", no); 
- ······················· + ·······························uploadSession.Open(sessionOptions); 
-                        try +                            }
- ·······················{ +
-····························Write-Host ("Connecting batch {0}..." -f $no) +
- ···························$session = New-Object WinSCP.Session+
-                            $session.Open($sessionOptions+                            string remoteFilePath = 
-                             +································RemotePath.TranslateLocalPathToRemote( 
- ···························$files = $fileList -split ";&quot;+····································localFilePath, localPath, remotePath); 
 +                            Console.WriteLine( 
 + ·······························&quot;Uploading {0} to {1} in {2}...", 
 +································localFilePath, remoteFilePath, no);
-                            # Download the files selected for this batch +                            string path
-                            foreach ($file in $files)+                               remoteFilePath.Substring(0, remoteFilePath.LastIndexOf('/')); 
 +                           string current = ""; 
 + 
 +                            if (path.Substring(0, 1) == "/";)
                            {                             {
-                                $remoteFilePath = &quot;$remotePath/$file"+                                path = path.Substring(1)
- ·······························$localFilePath = "$localPath\$file" + ···························}
-                                Write-Host "Downloading $remoteFilePath to $localFilePath in $no"+
-································$session.GetFiles($session.EscapeFileMask($remoteFilePath), $localFilePath).Check()+····························while (!string.IsNullOrEmpty(path)) 
 +                            { 
 +                                int p = path.IndexOf('/')
 +                                current += '/'; 
 +                                if (p >= 0) 
 +                                { 
 +                                    current += path.Substring(0, p); 
 +                                    path = path.Substring(p + 1)
 +                                } 
 +                                else 
 +                                { 
 +                                    current += path; 
 +                                    path = ""; 
 +                                } 
 + 
 +                                lock (existingRemotePaths) 
 +                                { 
 +                                    if (!existingRemotePaths.Contains(current)) // optimization 
 +                                    { 
 +                                        if (!uploadSession.FileExists(current)) 
 +                                        { 
 +                                            Console.WriteLine("Creating {0}...", current); 
 +                                            uploadSession.CreateDirectory(current); 
 +                                        } 
 +                                        existingRemotePaths.Add(current); 
 +                                    } 
 +                                }
                            }                             }
 +
 +                            uploadSession.PutFiles(
 +                                localFilePath, RemotePath.EscapeFileMask(remoteFilePath)).
 +                                Check();
                        }                         }
-························finally+ 
 +························if (uploadSession.Opened)
                        {                         {
-                            # Disconnect, clean up +                            Console.WriteLine("Upload {0} done", no); 
- ···························$session.Dispose()+                       } 
 + ·······················else 
 +                        { 
 +                            Console.WriteLine("Upload {0} had nothing to do", no);
                        }                         }
-                         
-                        Write-Host ("Batch {0} done" -f $no) 
                    }                     }
-····················catch [Exception]+ 
 +                }); 
 + 
 +                tasks.Add(task); 
 +                task.Start(); 
 +            } 
 + 
 +            Console.WriteLine("Waiting for uploads to complete..."); 
 +            Task.WaitAll(tasks.ToArray()); 
 + 
 +            Console.WriteLine("Done"); 
 + 
 +            DateTime ended = DateTime.Now; 
 +            Console.WriteLine("Took {0}", (ended - started)); 
 +            Console.WriteLine("Uploaded {0} files, totaling {1:N0} bytes", count, bytes); 
 + 
 +            return 0; 
 +        } 
 +········catch (Exception e) 
 +        { 
 +            Console.WriteLine("Error: {0}", e); 
 +            return 1; 
 +        } 
 +    } 
 +
 +</code> 
 + 
 +===== [[synchronization]] Synchronization ===== 
 + 
 +==== [[synchronization_powershell]] PowerShell ==== 
 + 
 +//Regarding ''Start-ThreadJob'' cmdlet, see the comment in [[#powershell|Download section]].// 
 + 
 +<code powershell> 
 +param ( 
 +    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xxxxxxxxxxx...@example.com/", 
 +    $remotePath = "/remote/path/", 
 +    $localPath = "c:\local\path\", 
 +    $removeFiles = $False, 
 +    $connections = 3 
 +
 +  
 +try 
 +
 +    $assemblyFilePath = "WinSCPnet.dll" 
 +    # Load WinSCP .NET assembly 
 +    Add-Type -Path $assemblyFilePath 
 +  
 +    # Setup session options 
 +    $sessionOptions = New-Object WinSCP.SessionOptions 
 +    $sessionOptions.ParseUrl($sessionUrl) 
 +  
 +    $started = Get-Date 
 +    # Plain variables cannot be modified in job threads 
 +    $stats = @{ 
 +        count = 0 
 +    } 
 +  
 +    try 
 +    { 
 +        # Connect 
 +        Write-Host "Connecting..." 
 +        $session = New-Object WinSCP.Session 
 +        $session.Open($sessionOptions) 
 +         
 +        Write-Host "Comparing directories..." 
 +        $differences = 
 +            $session.CompareDirectories( 
 +                [WinSCP.SynchronizationMode]::Both, $localPath, $remotePath, $removeFiles) 
 +        if ($differences.Count -eq 0) 
 +        { 
 +            Write-Host "No changes found."   
 +        } 
 +        else 
 +        { 
 +            if ($differences.Count -lt $connections) 
 +            { 
 +                $connections = $differences.Count; 
 +            } 
 +            $differenceEnumerator = $differences.GetEnumerator() 
 +     
 +            for ($i = 1; $i -le $connections; $i++) 
 +            { 
 +                Start-ThreadJob -Name "Connection $i" -ArgumentList $i { 
 +                    param ($no) 
 +     
 +                    try
                    {                     {
-                        Write-Host ("Error: {0}" -f $_.Exception.Message+                        Write-Host "Starting connection $no..." 
-                        exit 1+     
 +                        $syncSession = New-Object WinSCP.Session 
 +                        $syncSession.Open($using:sessionOptions) 
 +      
 +                        while ($True) 
 +························{ 
 +                            [System.Threading.Monitor]::Enter($using:differenceEnumerator) 
 +                            try 
 +                            { 
 +                                if (!($using:differenceEnumerator).MoveNext()) 
 +                                { 
 +                                    break 
 +································} 
 +     
 +                                $difference = ($using:differenceEnumerator).Current 
 +                                ($using:stats).count++ 
 +                            } 
 +                            finally 
 +                            { 
 +                                [System.Threading.Monitor]::Exit($using:differenceEnumerator) 
 +                            } 
 + 
 +                            Write-Host "$difference in $no...
 +                            $difference.Resolve($syncSession) | Out-Null 
 +                       
 +      
 +                        Write-Host "Connection $no done" 
 +                    } 
 +                    finally 
 +                    { 
 +                        $syncSession.Dispose()
                    }                     }
                } | Out-Null                 } | Out-Null
-  
-                # Reset for the next batch 
-                $no++ 
-                $sum = 0 
-                $start = $i + 1 
            }             }
 +   
 +            Write-Host "Waiting for connections to complete..."
 +            Get-Job | Receive-Job -Wait -ErrorAction Stop
 +   
 +            Write-Host "Done"
        }         }
- 
-        Write-Host "Waiting for batches to complete" 
-        Get-Job | Receive-Job -Wait 
- 
-        Write-Host "Done" 
        $ended = Get-Date         $ended = Get-Date
-        Write-Host ("Took {0}" -f (New-TimeSpan -Start $started -End $ended))+        Write-Host "Took $(New-TimeSpan -Start $started -End $ended)
 +        Write-Host "Synchronized $($stats.count) differences"
    }     }
    finally     finally
Line 144: Line 508:
        $session.Dispose()         $session.Dispose()
    }     }
 +·
    exit 0     exit 0
} }
-catch [Exception]+catch
{ {
-    Write-Host ("Error: {0}" -f $_.Exception.Message)+    Write-Host "Error: $($_.Exception.Message)"
    exit 1     exit 1
} }
</code> </code>
- 

Last modified: by martin