Differences

This shows you the differences between the selected revisions of the page.

library_example_parallel_transfers 2017-11-13 library_example_parallel_transfers 2023-11-27 (current)
Line 1: Line 1:
-====== Automating transfers in parallel connections over SFTP/FTP protocol ======+====== Automating transfers or synchronization in parallel connections over SFTP/FTP protocol ======
===== Download ===== ===== Download =====
 +
==== [[download_csharp]] C# ==== ==== [[download_csharp]] C# ====
Line 20: Line 21:
        {         {
            // Setup session options             // Setup session options
-            SessionOptions sessionOptions = new SessionOptions+            var sessionOptions = new SessionOptions
            {             {
                Protocol = Protocol.Sftp,                 Protocol = Protocol.Sftp,
Line 26: Line 27:
                UserName = "user",                 UserName = "user",
                Password = "mypassword",                 Password = "mypassword",
-                SshHostKeyFingerprint = "ssh-rsa 2048 xx:xx:xx:xx:xx:xx:xx:xx..."+                SshHostKeyFingerprint = "ssh-rsa 2048 xxxxxxxxxxx..."
            };             };
Line 33: Line 34:
            const int batches = 3;             const int batches = 3;
-            DateTime started = DateTime.Now;+            var started = DateTime.Now;
            int count = 0;             int count = 0;
-            Int64 bytes = 0;+            long bytes = 0;
-            using (Session session = new Session())+            using (var session = new Session())
            {             {
                Console.WriteLine("Connecting...");                 Console.WriteLine("Connecting...");
Line 43: Line 44:
                Console.WriteLine("Starting files enumeration...");                 Console.WriteLine("Starting files enumeration...");
 +                var opts = WinSCP.EnumerationOptions.AllDirectories;
                IEnumerable<RemoteFileInfo> files =                 IEnumerable<RemoteFileInfo> files =
-                    session.EnumerateRemoteFiles( +                    session.EnumerateRemoteFiles(remotePath, null, opts);
-························remotePath, null, EnumerationOptions.AllDirectories);+
                IEnumerator<RemoteFileInfo> filesEnumerator = files.GetEnumerator();                 IEnumerator<RemoteFileInfo> filesEnumerator = files.GetEnumerator();
-                List<Task> tasks = new List<Task>();+                var tasks = new List<Task>();
                for (int i = 1; i <= batches; i++)                 for (int i = 1; i <= batches; i++)
Line 54: Line 55:
                    int no = i;                     int no = i;
-                    Task task = new Task(() =>+                    var task = new Task(() => 
 +                    { 
 +                        using (var downloadSession = new Session())
                        {                         {
-                            using (Session downloadSession = new Session())+                            Console.WriteLine($"Starting download {no}..."); 
 +                           downloadSession.Open(sessionOptions)
 + 
 +                            while (true)
                            {                             {
-                                Console.WriteLine("Starting download {0}...";, no); +                                string remoteFilePath
-                                downloadSession.Open(sessionOptions); +                                lock (filesEnumerator)
- +
-                                while (true)+
                                {                                 {
-                                    string remoteFilePath; +                                    if (!filesEnumerator.MoveNext())
-····································lock (filesEnumerator)+
                                    {                                     {
-                                        if (!filesEnumerator.MoveNext()) +                                        break;
-                                        { +
-············································break+
-                                        } +
- +
-                                        RemoteFileInfo file = filesEnumerator.Current; +
-                                        bytes += file.Length; +
-                                        count++; +
-                                        remoteFilePath = file.FullName;+
                                    }                                     }
-                                    string localFilePath = +                                    RemoteFileInfo file = filesEnumerator.Current
-                                       downloadSession.TranslateRemotePathToLocal( +                                    bytes += file.Length
-                                            remoteFilePath, remotePath, localPath)+                                    count++
-                                    Console.WriteLine( +                                    remoteFilePath = file.FullName;
- ·······································"Downloading {0} to {1} in {2}...", +
-                                        remoteFilePath, localFilePath, no)+
-                                    Directory.CreateDirectory( +
-                                        Path.GetDirectoryName(localFilePath))+
-                                    downloadSession.GetFiles( +
-                                        downloadSession.EscapeFileMask(remoteFilePath), +
-                                       localFilePath).Check();+
                                }                                 }
-                                Console.WriteLine("Download {0} done", no);+                                string localFilePath = 
 +                                    RemotePath.TranslateRemotePathToLocal( 
 +                                        remoteFilePath, remotePath, localPath); 
 +································Console.WriteLine( 
 +                                    $"Downloading {remoteFilePath} to {localFilePath} in {no}..."); 
 +                                string localFileDir = Path.GetDirectoryName(localFilePath); 
 +                                Directory.CreateDirectory(localFileDir); 
 +                                downloadSession.GetFileToDirectory(remoteFilePath, localFileDir);
                            }                             }
-························});+                            Console.WriteLine($"Download {no} done"); 
 +                        } 
 +····················});
                    tasks.Add(task);                     tasks.Add(task);
Line 105: Line 102:
            Console.WriteLine("Done");             Console.WriteLine("Done");
-            DateTime ended = DateTime.Now; +            var ended = DateTime.Now; 
-            Console.WriteLine("Took {0}", (ended - started)); +            Console.WriteLine($"Took {ended - started}"); 
-            Console.WriteLine("Downloaded {0} files, totaling {1:N0} bytes", count, bytes);+            Console.WriteLine($"Downloaded {count} files, totaling {bytes:N0} bytes");
            return 0;             return 0;
Line 113: Line 110:
        catch (Exception e)         catch (Exception e)
        {         {
-            Console.WriteLine("Error: {0}", e);+            Console.WriteLine($"Error: {e}");
            return 1;             return 1;
        }         }
Line 122: Line 119:
==== [[powershell]] PowerShell ==== ==== [[powershell]] PowerShell ====
-The code is not equivalent to the C# example above. The PowerShell code does not download subdirectories. It also split the files to batches by their count only, instead of using a queue like the C# code.+The following code uses [[ps>threadjob/start-threadjob|''Start-ThreadJob'' cmdlet]] from ''ThreadJob'' module. It is a part of PowerShell 6 and newer. In PowerShell 5, it can be installed using ''Install-Module ThreadJob''.
<code powershell> <code powershell>
param ( param (
-    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xx-xx-xx@example.com/", +    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xxxxxxxxxxx...@example.com/", 
-    $remotePath = "/home/user/", +    $remotePath = "/remote/path/", 
-    $localPath = "c:\downloaded\",+    $localPath = "c:\local\path\",
    $batches = 3     $batches = 3
) )
 +·
try try
{ {
-    $dllPath = (Join-Path $PSScriptRoot "WinSCPnet.dll")+    $assemblyFilePath = "WinSCPnet.dll"
    # Load WinSCP .NET assembly     # Load WinSCP .NET assembly
-    Add-Type -Path $dllPath +    Add-Type -Path $assemblyFilePath 
 +·
    # Setup session options     # Setup session options
    $sessionOptions = New-Object WinSCP.SessionOptions     $sessionOptions = New-Object WinSCP.SessionOptions
    $sessionOptions.ParseUrl($sessionUrl)     $sessionOptions.ParseUrl($sessionUrl)
 +·
    $started = Get-Date     $started = Get-Date
 +    # Plain variables cannot be modified in job threads 
 +    $stats = @{ 
 +        count = 0 
 +        bytes = [long]0 
 +    } 
 +·
    try     try
    {     {
Line 151: Line 153:
        $session.Open($sessionOptions)         $session.Open($sessionOptions)
               
-        # Retrieve list of files and sort them from larges to smallest +        Write-Host &quot;Starting files enumeration...&quot; 
- ·······[array]$files +        $files
-            $session.ListDirectory($remotePath).Files | + ···········$session.EnumerateRemoteFiles
-            Where-Object { -Not $_.IsDirectory } | + ···············$remotePath, $Null, [WinSCP.EnumerationOptions]::AllDirectories
-············Sort-Object Length -Descending +        $filesEnumerator = $files.GetEnumerator() 
- +· 
-        # Calculate total size of all files +        for ($i = 1; $i -le $batches; $i++)
-        $total = ($files | Measure-Object -Property Length -Sum).Sum +
-        +
-        # And batch size +
-········$batch = [int]($total / $batches) +
- +
- ·······Write-Host ( +
-            "Will download $($files.Count) files totaling $total bytes in &quot; + +
-············&quot;$batches parallel batches, $batch bytes on average in each") +
-········ +
-        $start = +
-········$sum = 0 +
-········$no = 0 +
- +
-        for ($i = 0; $i -lt $files.Count; $i++)+
        {         {
-            $sum += $files[$i].Length+            Start-ThreadJob -Name "Batch $i&quot; -ArgumentList $i { 
 +                param ($no)
-············# Found enough files for the next batch +················try 
-           if (($sum -ge $batch) -or ($i -eq $files.Count - 1)) + ···············
-············+ ···················Write-Host "Starting download $no..."
- ···············Write-Host "Starting batch $no to download $($i - $start + 1) files totaling $sum" +
-                 +
-                $fileList = $files[$start..$i] -join ";" +
-                 +
-                # Start the background job for the batch +
-                Start-Job -Name "Batch $no" ` +
-                    -ArgumentList $dllPath, $sessionUrl, $remotePath, $localPath, $no, $fileList { +
-                    param ( +
-                        [Parameter(Position = 0)] +
-                        $dllPath, +
-                        [Parameter(Position = 1)] +
-                        $sessionUrl, +
-                        [Parameter(Position = 2)] +
-                        $remotePath, +
-                        [Parameter(Position = 3)] +
-                        $localPath, +
-                        [Parameter(Position = 4)] +
-                        $no, +
-                        [Parameter(Position = 5)] +
-                        $fileList +
-                    )+
-                    try+                    $downloadSession = New-Object WinSCP.Session 
 +                    $downloadSession.Open($using:sessionOptions) 
 + 
 +                    while ($True)
                    {                     {
-                        Write-Host "Starting batch $no" +                        [System.Threading.Monitor]::Enter($using:filesEnumerator)
- +
-                        # Load WinSCP .NET assembly. +
-························# Need to use an absolute path as the Job is started +
-························# from user's documents folder+
-                        Add-Type -Path $dllPath +
- +
-                        # Setup session options +
-                        $sessionOptions = New-Object WinSCP.SessionOptions +
-                        $sessionOptions.ParseUrl($sessionUrl) +
-························+
                        try                         try
                        {                         {
-                            Write-Host &quot;Connecting batch $no..." +                            if (!($using:filesEnumerator).MoveNext())
-                            $session = New-Object WinSCP.Session +
- +
-                            $session.Open($sessionOptions) +
-                             +
-                            $files = $fileList -split ";" +
- +
-                            # Download the files selected for this batch +
-                            foreach ($file in $files)+
                            {                             {
-                                $remoteFilePath = "$remotePath/$file" +                                break 
-                               $localFilePath = "$localPath\$file" + ···························}
-                                Write-Host "Downloading $remoteFilePath to $localFilePath in $no";+
-································$session.GetFiles( +····························$file = ($using:filesEnumerator).Current 
-····································$session.EscapeFileMask($remoteFilePath), $localFilePath). +····························($using:stats).bytes += $file.Length 
-                                   Check() + ···························($using:stats).count++ 
-                            }+                            $remoteFilePath = $file.FullName
                        }                         }
                        finally                         finally
                        {                         {
-                            # Disconnect, clean up +                            [System.Threading.Monitor]::<nohilite>Exit</nohilite>($using:filesEnumerator)
-····························$session.Dispose()+
                        }                         }
-························ + 
-                        Write-Host "Batch $no done"+                        $localFilePath = 
 +                            [WinSCP.RemotePath]::TranslateRemotePathToLocal( 
 +                                $remoteFilePath, $using:remotePath, $using:localPath) 
 +                        Write-Host "Downloading $remoteFilePath to $localFilePath in $no..." 
 +                        $localFileDir = (Split-Path -Parent $localFilePath) 
 +                        New-Item -ItemType directory -Path $localFileDir -Force | Out-Null 
 +                        $downloadSession.GetFileToDirectory($remoteFilePath, $localFileDir) | 
 +                            Out-Null
                    }                     }
-                    catch + 
-                    +                    Write-Host "Download $no done
-························Write-Host "Error: $($_.Exception.Message)+ ···············
-                       exit 1 +                finally 
-····················+                { 
-                } | Out-Null + ···················$downloadSession.Dispose() 
-· +                } 
-                # Reset for the next batch + ···········} | Out-Null
- ···············$no++ +
-················$sum = 0 +
-                $start = $i + 1 +
-············}+
        }         }
-        Write-Host "Waiting for batches to complete" +        Write-Host "Waiting for downloads to complete...
-        Get-Job | Receive-Job -Wait +        Get-Job | Receive-Job -Wait -ErrorAction Stop 
 +·
        Write-Host "Done"         Write-Host "Done"
 +·
        $ended = Get-Date         $ended = Get-Date
        Write-Host "Took $(New-TimeSpan -Start $started -End $ended)"         Write-Host "Took $(New-TimeSpan -Start $started -End $ended)"
 +        Write-Host ("Downloaded $($stats.count) files, " +
 +                    "totaling $($stats.bytes.ToString("N0")) bytes")
    }     }
    finally     finally
Line 269: Line 225:
        $session.Dispose()         $session.Dispose()
    }     }
 +·
    exit 0     exit 0
} }
Line 280: Line 236:
===== Upload ===== ===== Upload =====
 +
==== [[upload_csharp]] C# ==== ==== [[upload_csharp]] C# ====
Line 304: Line 261:
                UserName = "user",                 UserName = "user",
                Password = "password",                 Password = "password",
-                SshHostKeyFingerprint = "ssh-rsa 2048 xx:xx:xx:xx:xx:xx:xx:xx..."+                SshHostKeyFingerprint = "ssh-rsa 2048 xxxxxxxxxxx..."
            };             };
Line 354: Line 311:
                            string remoteFilePath =                             string remoteFilePath =
-                                uploadSession.TranslateLocalPathToRemote(+                                RemotePath.TranslateLocalPathToRemote(
                                    localFilePath, localPath, remotePath);                                     localFilePath, localPath, remotePath);
                            Console.WriteLine(                             Console.WriteLine(
Line 399: Line 356:
                            uploadSession.PutFiles(                             uploadSession.PutFiles(
-                                localFilePath, uploadSession.EscapeFileMask(remoteFilePath)).+                                localFilePath, RemotePath.EscapeFileMask(remoteFilePath)).
                                Check();                                 Check();
                        }                         }
Line 438: Line 395:
} }
</code> </code>
 +
 +===== [[synchronization]] Synchronization =====
 +
 +==== [[synchronization_powershell]] PowerShell ====
 +
 +//Regarding ''Start-ThreadJob'' cmdlet, see the comment in [[#powershell|Download section]].//
 +
 +<code powershell>
 +param (
 +    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xxxxxxxxxxx...@example.com/",
 +    $remotePath = "/remote/path/",
 +    $localPath = "c:\local\path\",
 +    $removeFiles = $False,
 +    $connections = 3
 +)
 +
 +try
 +{
 +    $assemblyFilePath = "WinSCPnet.dll"
 +    # Load WinSCP .NET assembly
 +    Add-Type -Path $assemblyFilePath
 +
 +    # Setup session options
 +    $sessionOptions = New-Object WinSCP.SessionOptions
 +    $sessionOptions.ParseUrl($sessionUrl)
 +
 +    $started = Get-Date
 +    # Plain variables cannot be modified in job threads
 +    $stats = @{
 +        count = 0
 +    }
 +
 +    try
 +    {
 +        # Connect
 +        Write-Host "Connecting..."
 +        $session = New-Object WinSCP.Session
 +        $session.Open($sessionOptions)
 +       
 +        Write-Host "Comparing directories..."
 +        $differences =
 +            $session.CompareDirectories(
 +                [WinSCP.SynchronizationMode]::Both, $localPath, $remotePath, $removeFiles)
 +        if ($differences.Count -eq 0)
 +        {
 +            Write-Host "No changes found." 
 +        }
 +        else
 +        {
 +            if ($differences.Count -lt $connections)
 +            {
 +                $connections = $differences.Count;
 +            }
 +            $differenceEnumerator = $differences.GetEnumerator()
 +   
 +            for ($i = 1; $i -le $connections; $i++)
 +            {
 +                Start-ThreadJob -Name "Connection $i" -ArgumentList $i {
 +                    param ($no)
 +   
 +                    try
 +                    {
 +                        Write-Host "Starting connection $no..."
 +   
 +                        $syncSession = New-Object WinSCP.Session
 +                        $syncSession.Open($using:sessionOptions)
 +   
 +                        while ($True)
 +                        {
 +                            [System.Threading.Monitor]::Enter($using:differenceEnumerator)
 +                            try
 +                            {
 +                                if (!($using:differenceEnumerator).MoveNext())
 +                                {
 +                                    break
 +                                }
 +   
 +                                $difference = ($using:differenceEnumerator).Current
 +                                ($using:stats).count++
 +                            }
 +                            finally
 +                            {
 +                                [System.Threading.Monitor]::Exit($using:differenceEnumerator)
 +                            }
 +
 +                            Write-Host "$difference in $no..."
 +                            $difference.Resolve($syncSession) | Out-Null
 +                        }
 +   
 +                        Write-Host "Connection $no done"
 +                    }
 +                    finally
 +                    {
 +                        $syncSession.Dispose()
 +                    }
 +                } | Out-Null
 +            }
 +   
 +            Write-Host "Waiting for connections to complete..."
 +            Get-Job | Receive-Job -Wait -ErrorAction Stop
 +   
 +            Write-Host "Done"
 +        }
 +
 +        $ended = Get-Date
 +        Write-Host "Took $(New-TimeSpan -Start $started -End $ended)"
 +        Write-Host "Synchronized $($stats.count) differences"
 +    }
 +    finally
 +    {
 +        # Disconnect, clean up
 +        $session.Dispose()
 +    }
 +
 +    exit 0
 +}
 +catch
 +{
 +    Write-Host "Error: $($_.Exception.Message)"
 +    exit 1
 +}
 +</code>
 +

Last modified: by martin