Differences

This shows you the differences between the selected revisions of the page.

library_example_parallel_transfers 2017-10-19 library_example_parallel_transfers 2023-11-27 (current)
Line 1: Line 1:
-====== Automating transfers in parallel connections over SFTP/FTP protocol ======+====== Automating transfers or synchronization in parallel connections over SFTP/FTP protocol ======
-===== [[csharp]] Download (C#) =====+===== Download ===== 
 + 
 +==== [[download_csharp]] C# ====
The example opens by default three parallel connections and uses them to download remote file tree to local folder in parallel. The example opens by default three parallel connections and uses them to download remote file tree to local folder in parallel.
Line 19: Line 21:
        {         {
            // Setup session options             // Setup session options
-            SessionOptions sessionOptions = new SessionOptions+            var sessionOptions = new SessionOptions
            {             {
                Protocol = Protocol.Sftp,                 Protocol = Protocol.Sftp,
Line 25: Line 27:
                UserName = "user",                 UserName = "user",
                Password = "mypassword",                 Password = "mypassword",
-                SshHostKeyFingerprint = "ssh-rsa 2048 xx:xx:xx:xx:xx:xx:xx:xx..."+                SshHostKeyFingerprint = "ssh-rsa 2048 xxxxxxxxxxx..."
            };             };
-            const string localPath = "C:\\local\\path";+            const string localPath = @"C:\local\path";
            const string remotePath = "/remote/path";             const string remotePath = "/remote/path";
            const int batches = 3;             const int batches = 3;
-            DateTime started = DateTime.Now;+            var started = DateTime.Now;
            int count = 0;             int count = 0;
-            Int64 bytes = 0;+            long bytes = 0;
-            using (Session session = new Session())+            using (var session = new Session())
            {             {
                Console.WriteLine("Connecting...");                 Console.WriteLine("Connecting...");
Line 42: Line 44:
                Console.WriteLine("Starting files enumeration...");                 Console.WriteLine("Starting files enumeration...");
 +                var opts = WinSCP.EnumerationOptions.AllDirectories;
                IEnumerable<RemoteFileInfo> files =                 IEnumerable<RemoteFileInfo> files =
-                    session.EnumerateRemoteFiles( +                    session.EnumerateRemoteFiles(remotePath, null, opts);
-························remotePath, null, EnumerationOptions.AllDirectories);+
                IEnumerator<RemoteFileInfo> filesEnumerator = files.GetEnumerator();                 IEnumerator<RemoteFileInfo> filesEnumerator = files.GetEnumerator();
-                List<Task> tasks = new List<Task>();+                var tasks = new List<Task>();
                for (int i = 1; i <= batches; i++)                 for (int i = 1; i <= batches; i++)
Line 53: Line 55:
                    int no = i;                     int no = i;
-                    Task task = new Task(() =>+                    var task = new Task(() => 
 +                    { 
 +                        using (var downloadSession = new Session())
                        {                         {
-                            using (Session downloadSession = new Session())+                            Console.WriteLine($"Starting download {no}..."); 
 +                           downloadSession.Open(sessionOptions)
 + 
 +                            while (true)
                            {                             {
-                                Console.WriteLine("Starting download {0}...";, no); +                                string remoteFilePath
-                                downloadSession.Open(sessionOptions); +                                lock (filesEnumerator)
- +
-                                while (true)+
                                {                                 {
-                                    string remoteFilePath; +                                    if (!filesEnumerator.MoveNext())
-····································lock (filesEnumerator)+
                                    {                                     {
-                                        if (!filesEnumerator.MoveNext()) +                                        break;
-                                        { +
-············································break+
-                                        } +
- +
-                                        RemoteFileInfo file = filesEnumerator.Current; +
-                                        bytes += file.Length; +
-                                        count++; +
-                                        remoteFilePath = file.FullName;+
                                    }                                     }
-                                    string localFilePath = +                                    RemoteFileInfo file = filesEnumerator.Current
-                                       session.TranslateRemotePathToLocal( +                                    bytes += file.Length
-                                            remoteFilePath, remotePath, localPath)+                                    count++
-                                    Console.WriteLine( +                                    remoteFilePath = file.FullName;
- ·······································"Downloading {0} to {1} in {2}...", +
-                                        remoteFilePath, localFilePath, no)+
-                                    Directory.CreateDirectory( +
-                                        Path.GetDirectoryName(localFilePath))+
-                                    downloadSession.GetFiles( +
-                                        session.EscapeFileMask(remoteFilePath), localFilePath). +
-                                        Check();+
                                }                                 }
-                                Console.WriteLine("Download {0} done", no);+                                string localFilePath = 
 +                                    RemotePath.TranslateRemotePathToLocal( 
 +                                        remoteFilePath, remotePath, localPath); 
 +································Console.WriteLine( 
 +                                    $"Downloading {remoteFilePath} to {localFilePath} in {no}..."); 
 +                                string localFileDir = Path.GetDirectoryName(localFilePath); 
 +                                Directory.CreateDirectory(localFileDir); 
 +                                downloadSession.GetFileToDirectory(remoteFilePath, localFileDir);
                            }                             }
-························});+                            Console.WriteLine($"Download {no} done"); 
 +                        } 
 +····················});
                    tasks.Add(task);                     tasks.Add(task);
Line 104: Line 102:
            Console.WriteLine("Done");             Console.WriteLine("Done");
-            DateTime ended = DateTime.Now; +            var ended = DateTime.Now; 
-            Console.WriteLine("Took {0}", (ended - started)); +            Console.WriteLine($"Took {ended - started}"); 
-            Console.WriteLine("Downloaded {0} files, totaling {1:N0} bytes", count, bytes);+            Console.WriteLine($"Downloaded {count} files, totaling {bytes:N0} bytes");
            return 0;             return 0;
Line 112: Line 110:
        catch (Exception e)         catch (Exception e)
        {         {
-            Console.WriteLine("Error: {0}", e);+            Console.WriteLine($"Error: {e}");
            return 1;             return 1;
        }         }
Line 119: Line 117:
</code> </code>
-===== Download (PowerShell) =====+==== [[powershell]] PowerShell ====
-The code is not equivalent to the C# example above. The PowerShell code does not download subdirectories. It also split the files to batches by their count only, instead of using a queue like the C# code.+The following code uses [[ps>threadjob/start-threadjob|''Start-ThreadJob'' cmdlet]] from ''ThreadJob'' module. It is a part of PowerShell 6 and newer. In PowerShell 5, it can be installed using ''Install-Module ThreadJob''.
<code powershell> <code powershell>
param ( param (
-    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xx-xx-xx@example.com/", +    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xxxxxxxxxxx...@example.com/", 
-    $remotePath = "/home/user/", +    $remotePath = "/remote/path/", 
-    $localPath = "c:\downloaded\",+    $localPath = "c:\local\path\",
    $batches = 3     $batches = 3
) )
 +·
try try
{ {
-    $dllPath = (Join-Path $PSScriptRoot "WinSCPnet.dll")+    $assemblyFilePath = "WinSCPnet.dll"
    # Load WinSCP .NET assembly     # Load WinSCP .NET assembly
-    Add-Type -Path $dllPath +    Add-Type -Path $assemblyFilePath 
 +·
    # Setup session options     # Setup session options
    $sessionOptions = New-Object WinSCP.SessionOptions     $sessionOptions = New-Object WinSCP.SessionOptions
    $sessionOptions.ParseUrl($sessionUrl)     $sessionOptions.ParseUrl($sessionUrl)
 +·
    $started = Get-Date     $started = Get-Date
 +    # Plain variables cannot be modified in job threads 
 +    $stats = @{ 
 +        count = 0 
 +        bytes = [long]0 
 +    } 
 +·
    try     try
    {     {
Line 150: Line 153:
        $session.Open($sessionOptions)         $session.Open($sessionOptions)
               
-        # Retrieve list of files and sort them from larges to smallest +        Write-Host &quot;Starting files enumeration...&quot; 
- ·······[array]$files +        $files
-            $session.ListDirectory($remotePath).Files | + ···········$session.EnumerateRemoteFiles
-            Where-Object { -Not $_.IsDirectory } | + ···············$remotePath, $Null, [WinSCP.EnumerationOptions]::AllDirectories
-············Sort-Object Length -Descending +        $filesEnumerator = $files.GetEnumerator() 
- +· 
-        # Calculate total size of all files +        for ($i = 1; $i -le $batches; $i++)
-        $total = ($files | Measure-Object -Property Length -Sum).Sum +
-        +
-        # And batch size +
-········$batch = [int]($total / $batches) +
- +
- ·······Write-Host ( +
-            "Will download $($files.Count) files totaling $total bytes in &quot; + +
-············&quot;$batches parallel batches, $batch bytes on average in each") +
-········ +
-        $start = +
-········$sum = 0 +
-········$no = 0 +
- +
-        for ($i = 0; $i -lt $files.Count; $i++)+
        {         {
-            $sum += $files[$i].Length+            Start-ThreadJob -Name "Batch $i&quot; -ArgumentList $i { 
 +                param ($no)
-············# Found enough files for the next batch +················try 
-           if (($sum -ge $batch) -or ($i -eq $files.Count - 1)) + ···············
-············+ ···················Write-Host "Starting download $no..."
- ···············Write-Host "Starting batch $no to download $($i - $start + 1) files totaling $sum" +
-                 +
-                $fileList = $files[$start..$i] -join ";" +
-                 +
-                # Start the background job for the batch +
-                Start-Job -Name "Batch $no" ` +
-                    -ArgumentList $dllPath, $sessionUrl, $remotePath, $localPath, $no, $fileList { +
-                    param ( +
-                        [Parameter(Position = 0)] +
-                        $dllPath, +
-                        [Parameter(Position = 1)] +
-                        $sessionUrl, +
-                        [Parameter(Position = 2)] +
-                        $remotePath, +
-                        [Parameter(Position = 3)] +
-                        $localPath, +
-                        [Parameter(Position = 4)] +
-                        $no, +
-                        [Parameter(Position = 5)] +
-                        $fileList +
-                    )+
-                    try+                    $downloadSession = New-Object WinSCP.Session 
 +                    $downloadSession.Open($using:sessionOptions) 
 + 
 +                    while ($True)
                    {                     {
-                        Write-Host "Starting batch $no" +                        [System.Threading.Monitor]::Enter($using:filesEnumerator)
- +
-                        # Load WinSCP .NET assembly. +
-························# Need to use an absolute path as the Job is started +
-························# from user's documents folder+
-                        Add-Type -Path $dllPath +
- +
-                        # Setup session options +
-                        $sessionOptions = New-Object WinSCP.SessionOptions +
-                        $sessionOptions.ParseUrl($sessionUrl) +
-························+
                        try                         try
                        {                         {
-                            Write-Host &quot;Connecting batch $no..." +                            if (!($using:filesEnumerator).MoveNext())
-                            $session = New-Object WinSCP.Session +
- +
-                            $session.Open($sessionOptions) +
-                             +
-                            $files = $fileList -split ";" +
- +
-                            # Download the files selected for this batch +
-                            foreach ($file in $files)+
                            {                             {
-                                $remoteFilePath = "$remotePath/$file" +                                break 
-                               $localFilePath = "$localPath\$file" + ···························}
-                                Write-Host "Downloading $remoteFilePath to $localFilePath in $no";+
-································$session.GetFiles( +····························$file = ($using:filesEnumerator).Current 
-····································$session.EscapeFileMask($remoteFilePath), $localFilePath). +····························($using:stats).bytes += $file.Length 
-                                   Check() + ···························($using:stats).count++ 
-                            }+                            $remoteFilePath = $file.FullName
                        }                         }
                        finally                         finally
                        {                         {
-                            # Disconnect, clean up +                            [System.Threading.Monitor]::<nohilite>Exit</nohilite>($using:filesEnumerator)
-····························$session.Dispose()+
                        }                         }
-························ + 
-                        Write-Host "Batch $no done"+                        $localFilePath = 
 +                            [WinSCP.RemotePath]::TranslateRemotePathToLocal( 
 +                                $remoteFilePath, $using:remotePath, $using:localPath) 
 +                        Write-Host "Downloading $remoteFilePath to $localFilePath in $no..." 
 +                        $localFileDir = (Split-Path -Parent $localFilePath) 
 +                        New-Item -ItemType directory -Path $localFileDir -Force | Out-Null 
 +                        $downloadSession.GetFileToDirectory($remoteFilePath, $localFileDir) | 
 +                            Out-Null
                    }                     }
-                    catch [Exception] + 
-                    +                    Write-Host "Download $no done
-························Write-Host "Error: $($_.Exception.Message)+ ···············
-                       exit 1 +                finally 
-····················+                { 
-                } | Out-Null + ···················$downloadSession.Dispose() 
-· +                } 
-                # Reset for the next batch + ···········} | Out-Null
- ···············$no++ +
-················$sum = 0 +
-                $start = $i + 1 +
-············}+
        }         }
-        Write-Host "Waiting for batches to complete" +        Write-Host "Waiting for downloads to complete...
-        Get-Job | Receive-Job -Wait +        Get-Job | Receive-Job -Wait -ErrorAction Stop 
 +·
        Write-Host "Done"         Write-Host "Done"
 +·
        $ended = Get-Date         $ended = Get-Date
        Write-Host "Took $(New-TimeSpan -Start $started -End $ended)"         Write-Host "Took $(New-TimeSpan -Start $started -End $ended)"
 +        Write-Host ("Downloaded $($stats.count) files, " +
 +                    "totaling $($stats.bytes.ToString("N0")) bytes")
    }     }
    finally     finally
Line 268: Line 225:
        $session.Dispose()         $session.Dispose()
    }     }
 +·
    exit 0     exit 0
} }
-catch [Exception]+catch
{ {
    Write-Host "Error: $($_.Exception.Message)"     Write-Host "Error: $($_.Exception.Message)"
Line 278: Line 235:
</code> </code>
-===== [[upload_csharp]] Upload (C#) =====+===== Upload ===== 
 + 
 +==== [[upload_csharp]] C# ====
The example opens by default three parallel connections and uses them to upload locale file tree to remote folder in parallel. The example opens by default three parallel connections and uses them to upload locale file tree to remote folder in parallel.
Line 302: Line 261:
                UserName = "user",                 UserName = "user",
                Password = "password",                 Password = "password",
-                SshHostKeyFingerprint = "ssh-rsa 2048 xx:xx:xx:xx:xx:xx:xx:xx..."+                SshHostKeyFingerprint = "ssh-rsa 2048 xxxxxxxxxxx..."
            };             };
-            const string localPath = "C:\\local\\path";+            const string localPath = @"C:\local\path";
            const string remotePath = "/remote/path";             const string remotePath = "/remote/path";
            const int batches = 3;             const int batches = 3;
Line 352: Line 311:
                            string remoteFilePath =                             string remoteFilePath =
-                                uploadSession.TranslateLocalPathToRemote(+                                RemotePath.TranslateLocalPathToRemote(
                                    localFilePath, localPath, remotePath);                                     localFilePath, localPath, remotePath);
                            Console.WriteLine(                             Console.WriteLine(
Line 397: Line 356:
                            uploadSession.PutFiles(                             uploadSession.PutFiles(
-                                localFilePath, uploadSession.EscapeFileMask(remoteFilePath)).+                                localFilePath, RemotePath.EscapeFileMask(remoteFilePath)).
                                Check();                                 Check();
                        }                         }
Line 436: Line 395:
} }
</code> </code>
 +
 +===== [[synchronization]] Synchronization =====
 +
 +==== [[synchronization_powershell]] PowerShell ====
 +
 +//Regarding ''Start-ThreadJob'' cmdlet, see the comment in [[#powershell|Download section]].//
 +
 +<code powershell>
 +param (
 +    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xxxxxxxxxxx...@example.com/",
 +    $remotePath = "/remote/path/",
 +    $localPath = "c:\local\path\",
 +    $removeFiles = $False,
 +    $connections = 3
 +)
 +
 +try
 +{
 +    $assemblyFilePath = "WinSCPnet.dll"
 +    # Load WinSCP .NET assembly
 +    Add-Type -Path $assemblyFilePath
 +
 +    # Setup session options
 +    $sessionOptions = New-Object WinSCP.SessionOptions
 +    $sessionOptions.ParseUrl($sessionUrl)
 +
 +    $started = Get-Date
 +    # Plain variables cannot be modified in job threads
 +    $stats = @{
 +        count = 0
 +    }
 +
 +    try
 +    {
 +        # Connect
 +        Write-Host "Connecting..."
 +        $session = New-Object WinSCP.Session
 +        $session.Open($sessionOptions)
 +       
 +        Write-Host "Comparing directories..."
 +        $differences =
 +            $session.CompareDirectories(
 +                [WinSCP.SynchronizationMode]::Both, $localPath, $remotePath, $removeFiles)
 +        if ($differences.Count -eq 0)
 +        {
 +            Write-Host "No changes found." 
 +        }
 +        else
 +        {
 +            if ($differences.Count -lt $connections)
 +            {
 +                $connections = $differences.Count;
 +            }
 +            $differenceEnumerator = $differences.GetEnumerator()
 +   
 +            for ($i = 1; $i -le $connections; $i++)
 +            {
 +                Start-ThreadJob -Name "Connection $i" -ArgumentList $i {
 +                    param ($no)
 +   
 +                    try
 +                    {
 +                        Write-Host "Starting connection $no..."
 +   
 +                        $syncSession = New-Object WinSCP.Session
 +                        $syncSession.Open($using:sessionOptions)
 +   
 +                        while ($True)
 +                        {
 +                            [System.Threading.Monitor]::Enter($using:differenceEnumerator)
 +                            try
 +                            {
 +                                if (!($using:differenceEnumerator).MoveNext())
 +                                {
 +                                    break
 +                                }
 +   
 +                                $difference = ($using:differenceEnumerator).Current
 +                                ($using:stats).count++
 +                            }
 +                            finally
 +                            {
 +                                [System.Threading.Monitor]::Exit($using:differenceEnumerator)
 +                            }
 +
 +                            Write-Host "$difference in $no..."
 +                            $difference.Resolve($syncSession) | Out-Null
 +                        }
 +   
 +                        Write-Host "Connection $no done"
 +                    }
 +                    finally
 +                    {
 +                        $syncSession.Dispose()
 +                    }
 +                } | Out-Null
 +            }
 +   
 +            Write-Host "Waiting for connections to complete..."
 +            Get-Job | Receive-Job -Wait -ErrorAction Stop
 +   
 +            Write-Host "Done"
 +        }
 +
 +        $ended = Get-Date
 +        Write-Host "Took $(New-TimeSpan -Start $started -End $ended)"
 +        Write-Host "Synchronized $($stats.count) differences"
 +    }
 +    finally
 +    {
 +        # Disconnect, clean up
 +        $session.Dispose()
 +    }
 +
 +    exit 0
 +}
 +catch
 +{
 +    Write-Host "Error: $($_.Exception.Message)"
 +    exit 1
 +}
 +</code>
 +

Last modified: by martin