This is an old revision of the document!

Automating download in parallel connections over SFTP/FTP protocol

The following example uses WinSCP .NET assembly from a PowerShell script. If you have another preferred language, you can easily translate it.

The example opens by default three parallel connections and evenly splits the download of files from specified remote directory among these.

Advertisement

param (
    $sessionUrl = "sftp://user:password;fingerprint=ssh-rsa-xx-xx-xx@example.com/",
    $remotePath = "/home/user/",
    $localPath = "c:\downloaded\",
    $batches = 3
)
 
try
{
    $dllPath = (Join-Path $PSScriptRoot "WinSCPnet.dll")
    # Load WinSCP .NET assembly
    Add-Type -Path $dllPath
 
    # Setup session options
    $sessionOptions = New-Object WinSCP.SessionOptions
    $sessionOptions.ParseUrl($sessionUrl)
 
    $started = Get-Date
 
    try
    {
        # Connect
        Write-Host "Connecting..."
        $session = New-Object WinSCP.Session
        $session.Open($sessionOptions)
        
        # Retrieve list of files and sort them from larges to smallest
        $files =
            $session.ListDirectory($remotePath).Files |
            Where-Object { -Not $_.IsDirectory } |
            Sort-Object Length -Descending
 
        # Calculate total size of all files
        $total = ($files | Measure-Object -Property Length -Sum).Sum
        
        # And batch size
        $batch = [int]($total / $batches)
 
        Write-Host ("Will download {0} files totaling {1} bytes in {2} parallel batches, {3} bytes on average in each" -f $files.Count, $total, $batches, $batch)
        
        $start = 0
        $sum = 0
        $no = 0
 
        for ($i = 0; $i -lt $files.Count; $i++)
        {
            $sum += $files[$i].Length
 
            # Found enough files for the next batch
            if (($sum -ge $batch) -or ($i -eq $files.Count - 1))
            {
                Write-Host ("Starting batch {0} to download {1} files totaling {2}" -f $no, ($i - $start + 1), $sum)
                
                $fileList = $files[$start..$i] -join ";"
                
                # Start the background job for the batch
                Start-Job -Name "Batch $no" -ArgumentList $dllPath, $sessionUrl, $remotePath, $localPath, $no, $fileList {
                    param (
                        [Parameter(Position = 0)]
                        $dllPath,
                        [Parameter(Position = 1)]
                        $sessionUrl,
                        [Parameter(Position = 2)]
                        $remotePath,
                        [Parameter(Position = 3)]
                        $localPath,
                        [Parameter(Position = 4)]
                        $no,
                        [Parameter(Position = 5)]
                        $fileList
                    )
 
                    try
                    {
                        Write-Host ("Starting batch {0}" -f $no)
 
                        # Load WinSCP .NET assembly.
                        # Need to use an absolute path as the Job is started from user's documents folder.
                        Add-Type -Path $dllPath
 
                        # Setup session options
                        $sessionOptions = New-Object WinSCP.SessionOptions
                        $sessionOptions.ParseUrl($sessionUrl)
                        
                        try
                        {
                            Write-Host ("Connecting batch {0}..." -f $no)
                            $session = New-Object WinSCP.Session
 
                            $session.Open($sessionOptions)
                            
                            $files = $fileList -split ";"
 
                            # Download the files selected for this batch
                            foreach ($file in $files)
                            {
                                $remoteFilePath = "$remotePath/$file"
                                $localFilePath = "$localPath\$file"
                                Write-Host "Downloading $remoteFilePath to $localFilePath in $no"
 
                                $session.GetFiles($session.EscapeFileMask($remoteFilePath), $localFilePath).Check()
                            }
                        }
                        finally
                        {
                            # Disconnect, clean up
                            $session.Dispose()
                        }
                        
                        Write-Host ("Batch {0} done" -f $no)
                    }
                    catch [Exception]
                    {
                        Write-Host ("Error: {0}" -f $_.Exception.Message)
                        exit 1
                    }
                } | Out-Null
 
                # Reset for the next batch
                $no++
                $sum = 0
                $start = $i + 1
            }
        }
 
        Write-Host "Waiting for batches to complete"
        Get-Job | Receive-Job -Wait
 
        Write-Host "Done"
 
        $ended = Get-Date
        Write-Host ("Took {0}" -f (New-TimeSpan -Start $started -End $ended))
    }
    finally
    {
        # Disconnect, clean up
        $session.Dispose()
    }
 
    exit 0
}
catch [Exception]
{
    Write-Host ("Error: {0}" -f $_.Exception.Message)
    exit 1
}

Advertisement

Last modified: by martin