[PATCH] Force use UTF-8 for remote site in SFTP



[PATCH] Force use UTF-8 for remote site in SFTP

Is possible to add this option? Because the sftp-server not support version 4 protocol, but the locale for sftp server site use UTF-8.

here is the patch:

In this patch, I also remove the check for filename with '\\', because for Big5 Chinese (or CP950), some Chinese word use this.

diff -Nur winscp.orig\core\SessionData.cpp winscp.patch\core\SessionData.cpp
--- winscp.orig\core\SessionData.cpp   Tue Feb 08 17:02:06 2005
+++ winscp.patch\core\SessionData.cpp   Tue Mar 01 16:12:34 2005
@@ -498,6 +498,7 @@
+    READ_SFTP_BUG(ForceUtf);
     #undef READ_SFTP_BUG
     SFTPMaxVersion = Storage->ReadInteger("SFTPMaxVersion", SFTPMaxVersion);
@@ -673,6 +674,7 @@
+      WRITE_SFTP_BUG(ForceUtf);
       #undef WRITE_SFTP_BUG
       WRITE_DATA(Integer, SFTPMaxVersion);
diff -Nur winscp.orig\core\SessionData.h winscp.patch\core\SessionData.h
--- winscp.orig\core\SessionData.h   Tue Feb 08 17:01:30 2005
+++ winscp.patch\core\SessionData.h   Tue Mar 01 16:11:48 2005
@@ -22,7 +22,7 @@
 enum TSshBug { sbIgnore1, sbPlainPW1, sbRSA1, sbHMAC2, sbDeriveKey2, sbRSAPad2,
   sbRekey2, sbPKSessID2 };
 #define BUG_COUNT (sbPKSessID2+1)
-enum TSftpBug { sbSymlink, sbUtf, sbSignedTS };
+enum TSftpBug { sbSymlink, sbUtf, sbSignedTS, sbForceUtf };
 #define SFTP_BUG_COUNT (sbSignedTS+1)
 enum TAutoSwitch { asOn, asOff, asAuto };
 enum TPingType { ptOff, ptNullPacket, ptDummyCommand };
diff -Nur winscp.orig\core\SftpFileSystem.cpp winscp.patch\core\SftpFileSystem.cpp
--- winscp.orig\core\SftpFileSystem.cpp   Wed Feb 09 09:41:30 2005
+++ winscp.patch\core\SftpFileSystem.cpp   Tue Mar 01 16:29:14 2005
@@ -292,7 +292,8 @@
   inline void AddPathString(const AnsiString Value, int Version, bool Utf)
-    AddString(Value, (Version >= 4) && Utf);
+//    AddString(Value, (Version >= 4) && Utf);
+    AddString(Value, Utf);
   void AddProperties(unsigned short * Rights, AnsiString * Owner,
@@ -458,7 +459,8 @@
   inline AnsiString GetPathString(int Version, bool Utf)
-    return GetString((Version >= 4) && Utf);
+//    return GetString((Version >= 4) && Utf);
+    return GetString(Utf);
   void GetFile(TRemoteFile * File, int Version, bool ConsiderDST, bool Utf, bool SignedTS)
@@ -1277,6 +1279,7 @@
   FAvoidBusy = false;
   FUtfStrings = false;
   FSignedTS = false;
+  FForceUtf = false;
   FSupport = new TSFTPSupport();
   FSupport->Extensions = new TStringList();
   FExtensions = new TStringList();
@@ -2242,23 +2245,32 @@
+  FForceUtf = (FTerminal->SessionData->SFTPBug[sbForceUtf] == asOn);
   if (FVersion >= 4)
-    FUtfStrings = (FTerminal->SessionData->SFTPBug[sbUtf] == asOff) ||
-      ((FTerminal->SessionData->SFTPBug[sbUtf] == asAuto) &&
-        (FTerminal->SshImplementation.Pos("Foxit-WAC-Server") != 1));
-    if (FUtfStrings)
+    if (FForceUtf)
-      FTerminal->LogEvent("We will use UTF-8 strings when appropriate");
+      FUtfStrings = true;
-      FTerminal->LogEvent("We believe the server has SFTP UTF-8 bug");
+      FUtfStrings = (FTerminal->SessionData->SFTPBug[sbUtf] == asOff) ||
+      ((FTerminal->SessionData->SFTPBug[sbUtf] == asAuto) &&
+        (FTerminal->SshImplementation.Pos("Foxit-WAC-Server") != 1));
-    FUtfStrings = false;
+    FUtfStrings = FForceUtf;
+  }
+  if (FUtfStrings)
+  {
+    FTerminal->LogEvent("We will use UTF-8 strings when appropriate");
+  }
+  else
+  {
+    FTerminal->LogEvent("We believe the server has SFTP UTF-8 bug");
@@ -2442,7 +2454,8 @@
           File = LoadFile(&ListingPacket, NULL, "");
           // security fix
           if (((File->FileName.Length() > 2) && IsDots(File->FileName)) || 
-              (File->FileName.Pos("/") > 0) || (File->FileName.Pos("\\") > 0))
+              (File->FileName.Pos("/") > 0))
+//              (File->FileName.Pos("/") > 0) || (File->FileName.Pos("\\") > 0))
             FTerminal->LogEvent(FORMAT("Ignored suspicious file '%s'", (File->FileName))); 
             delete File;
diff -Nur winscp.orig\core\SftpFileSystem.h winscp.patch\core\SftpFileSystem.h
--- winscp.orig\core\SftpFileSystem.h   Wed Feb 09 09:40:30 2005
+++ winscp.patch\core\SftpFileSystem.h   Tue Mar 01 16:13:25 2005
@@ -76,6 +76,7 @@
   TSFTPSupport * FSupport;
   bool FUtfStrings;
   bool FSignedTS;
+  bool FForceUtf;
   void __fastcall CustomReadFile(const AnsiString FileName,
     TRemoteFile *& File, char Type, TRemoteFile * ALinkedByFile = NULL,
diff -Nur winscp.orig\forms\Login.cpp winscp.patch\forms\Login.cpp
--- winscp.orig\forms\Login.cpp   Wed Feb 09 21:35:02 2005
+++ winscp.patch\forms\Login.cpp   Tue Mar 01 16:21:30 2005
@@ -94,6 +94,7 @@
+  InitializeBugsCombo(SFTPBugForceUtfCombo);
 void __fastcall TLoginDialog::Init()
@@ -231,6 +232,7 @@
       if (SFTPBug ## BUG ## Combo->ItemIndex < 0) SFTPBug ## BUG ## Combo->ItemIndex = 0
     // Authentication tab
@@ -495,6 +497,7 @@
   #define SAVE_SFTP_BUG_COMBO(BUG) aSessionData->SFTPBug[sb ## BUG] = (TAutoSwitch)(2 - SFTPBug ## BUG ## Combo->ItemIndex);
   // Proxy tab
diff -Nur winscp.orig\forms\Login.dfm winscp.patch\forms\Login.dfm
--- winscp.orig\forms\Login.dfm   Wed Feb 09 23:40:12 2005
+++ winscp.patch\forms\Login.dfm   Tue Mar 01 16:33:28 2005
@@ -969,7 +969,7 @@
           Left = 0
           Top = 6
           Width = 345
-          Height = 70
+          Height = 100
           Anchors = [akLeft, akTop, akRight]
           Caption = 'Detection of known bugs in SFTP servers'
           TabOrder = 0
@@ -992,6 +992,14 @@
             Caption = 'Does not use &UTF-8 for SFTP4 and newer'
             FocusControl = SFTPBugUtfCombo
+          object Label34: TLabel
+            Left = 12
+            Top = 68
+            Width = 115
+            Height = 13
+            Caption = 'Force remote use UTF-8'
+            FocusControl = SFTPBugForceUtfCombo
+          end
           object SFTPBugSymlinkCombo: TComboBox
             Left = 272
             Top = 15
@@ -1011,6 +1019,16 @@
             Anchors = [akLeft, akTop, akRight]
             ItemHeight = 0
             TabOrder = 1
+          end
+          object SFTPBugForceUtfCombo: TComboBox
+            Left = 272
+            Top = 63
+            Width = 61
+            Height = 21
+            Style = csDropDownList
+            Anchors = [akLeft, akTop, akRight]
+            ItemHeight = 0
+            TabOrder = 2
diff -Nur winscp.orig\forms\Login.h winscp.patch\forms\Login.h
--- winscp.orig\forms\Login.h   Thu Jan 27 21:27:00 2005
+++ winscp.patch\forms\Login.h   Tue Mar 01 16:20:50 2005
@@ -232,6 +232,8 @@
   TRadioButton *IPv6Button;
   TLabel *Label33;
   TComboBox *BugRekey2Combo;
+  TLabel *Label34;
+  TComboBox *SFTPBugForceUtfCombo;
   void __fastcall DataChange(TObject *Sender);
   void __fastcall FormShow(TObject *Sender);
   void __fastcall SessionListViewSelectItem(TObject *Sender,

Reply with quote


Site Admin
martin avatar
Prague, Czechia

Re: [PATCH] Force use UTF-8 for remote site in SFTP

Thanks for the patch!

It seems useful, so unless I find some problem with it, I'll add this to the next release.

Is there any way to autodetect the server for which the patch is necessary?

Can I somehow detect if the '\\' in the filename is really path delimiter or special character in Big5 encoding?

Reply with quote


for autodetect? I don't know, so I make this an option for user to enable or disable it by himself for each session.

for big5 encoding, I think there is no way to check if we check the string byte by byte. (Big5 is 2 bytes character)
I've another ideal to check this one, just convert it to UTF-8 again if we find '\\', then check it again. Because after the convert, if '\\' included in a regular big5 word, it will convert to another ascii byte, if it is really path delimiter, it should still as '\\'.

like this one:
diff -Nur winscp.orig\core\SftpFileSystem.cpp winscp.patch\core\SftpFileSystem.cpp
--- winscp.orig\core\SftpFileSystem.cpp   Wed Feb 09 09:41:30 2005
+++ winscp.patch\core\SftpFileSystem.cpp   Wed Mar 02 16:17:22 2005
@@ -2442,16 +2454,35 @@
           File = LoadFile(&ListingPacket, NULL, "");
           // security fix
           if (((File->FileName.Length() > 2) && IsDots(File->FileName)) || 
-              (File->FileName.Pos("/") > 0) || (File->FileName.Pos("\\") > 0))
+              (File->FileName.Pos("/") > 0))
             FTerminal->LogEvent(FORMAT("Ignored suspicious file '%s'", (File->FileName))); 
             delete File;
-            FileList->AddFile(File);
+            // if we find \\, try convert to UTF-8 then check again
+            if (File->FileName.Pos("\\") > 0)
+            {
+              AnsiString tmpstr = EncodeUTF(File->FileName);
+              if (tmpstr.Pos("\\") > 0)
+              {
+                FTerminal->LogEvent(FORMAT("Ignored suspicious file '%s'", (File->FileName))); 
+                delete File;
+              }
+              else
+              {
+                FileList->AddFile(File);
+                Total++;
+              }
+            }
+            else
+            {
+              FileList->AddFile(File);
-            Total++;
+              Total++;
+            }
           if (Total % 10 == 0)

Reply with quote

Site Admin
martin avatar
Prague, Czechia

twu2 wrote:

for autodetect? I don't know, so I make this an option for user to enable or disable it by himself for each session.
What kind of server it is? (OpenSSH?)

for big5 encoding, I think there is no way to check if we check the string byte by byte. (Big5 is 2 bytes character)
I've another ideal to check this one, just convert it to UTF-8 again if we find '\\', then check it again. Because after the convert, if '\\' included in a regular big5 word, it will convert to another ascii byte, if it is really path delimiter, it should still as '\\'.
I'm not sure if I understand this. Do you mean that first conversion from UTF-8 converts the filename to another multibyte character set (Big5)? And what does the second conversion?

Reply with quote


martin wrote:

What kind of server it is? (OpenSSH?)

martin wrote:

I'm not sure if I understand this. Do you mean that first conversion from UTF-8 converts the filename to another multibyte character set (Big5)? And what does the second conversion?

no, in ReadDirectory(), the filename we received from sftp-server is already Big5 (even if remote sending UTF-8 to us, we'll convert it from UTF-8 to ansi first, windows use Big5 for Chinese encoding default).

if we find \ in the filename, it maybe a regular path delimiter or a byte in Chinese big5 word.

if we want to know it is belong to a Chinese big5 word or not, we just convert the string from Big5 to UTF-8, if that's a byte in Chinese big5 word, it will change to another ascii code, so we check the UTF-8 string again to make sure it's path delimiter or not.

like this:
        for (unsigned long Index = 0; Index < Count; Index++)
          File = LoadFile(&ListingPacket, NULL, "");
          // security fix
          if (((File->FileName.Length() > 2) && IsDots(File->FileName)) ||
              (File->FileName.Pos("/") > 0))
//              (File->FileName.Pos("/") > 0) || (File->FileName.Pos("\\") > 0))
            FTerminal->LogEvent(FORMAT("Ignored suspicious file '%s'", (File->FileName)));
            delete File;
            // check \ in filename or not
            if (File->FileName.Pos("\\") > 0) {
                // find \, but we want to make sure it's not a byte of a MBCS word, like Chinese Big5
                // so we convert the filename to UTF-8
                // then check again
                AnsiString tmpstr = EncodeUTF(File->FileName);
            FTerminal->LogEvent(FORMAT("Finding \\ for '%s'", (File->FileName)));
            FTerminal->LogEvent(FORMAT("Convert to for '%s'", (tmpstr)));
                  if (tmpstr.Pos("\\") > 0) {
                       // still find \ in UTF-8 filename, so it is a path delimiter, ignore this one
            FTerminal->LogEvent(FORMAT("Ignored suspicious file '%s'", (File->FileName)));
            delete File;
                  else {
            else {

Reply with quote


Site Admin
martin avatar
Prague, Czechia

twu2 wrote:

martin wrote:

What kind of server it is? (OpenSSH?)
On what OS? Is it standard build or some patched version?

if we want to know it is belong to a Chinese big5 word or not, we just convert the string from Big5 to UTF-8, if that's a byte in Chinese big5 word, it will change to another ascii code, so we check the UTF-8 string again to make sure it's path delimiter or not.
Can you try if it helps to use IsPathDelimiter() function? From VCL help:

Call IsPathDelimiter to determine whether a position in the string S contains the path delimiter character ("\"). Positions are numbered from 1.

When working with a multi-byte character system (MBCS), IsPathDelimiter distinguishes between a true delimiter character, and the byte of the same value that can appear as the second byte of a double byte character.

To test for more characters than just the delimiter, use IsDelimiter.

Reply with quote


martin wrote:

twu2 wrote:

martin wrote:

What kind of server it is? (OpenSSH?)
On what OS? Is it standard build or some patched version?

Debian Sarge standard build.

martin wrote:

if we want to know it is belong to a Chinese big5 word or not, we just convert the string from Big5 to UTF-8, if that's a byte in Chinese big5 word, it will change to another ascii code, so we check the UTF-8 string again to make sure it's path delimiter or not.
Can you try if it helps to use IsPathDelimiter() function? From VCL help:

Call IsPathDelimiter to determine whether a position in the string S contains the path delimiter character ("\"). Positions are numbered from 1.

When working with a multi-byte character system (MBCS), IsPathDelimiter distinguishes between a true delimiter character, and the byte of the same value that can appear as the second byte of a double byte character.

To test for more characters than just the delimiter, use IsDelimiter.

no, IsPathDelimiter() not always work.

for example, if we call IsPathDelimiter() for each index of string:

"\xB3\x5C\xA5\x5C\xA5\x5C" => always false.
"\xB3\x5C\xA5\x5C\x5C\xA5\x5C" => true for index = 5.
"\xB3\x5C\xA5\x5C\xA5\x5C\x5C" => always false.

for the third string, it should return true, but use IsPathDelimiter() will return false.

I think convert to UTF-8 then check it again is more easy.

Reply with quote

Site Admin
martin avatar
Prague, Czechia

twu2 wrote:

no, IsPathDelimiter() not always work.

for example, if we call IsPathDelimiter() for each index of string:

"\xB3\x5C\xA5\x5C\xA5\x5C" => always false.
"\xB3\x5C\xA5\x5C\x5C\xA5\x5C" => true for index = 5.
"\xB3\x5C\xA5\x5C\xA5\x5C\x5C" => always false.

for the third string, it should return true, but use IsPathDelimiter() will return false.

I think convert to UTF-8 then check it again is more easy.
And have you any idea why does it fail?

Reply with quote


martin wrote:

And have you any idea why does it fail?

sorry, that's my fault... the index should start from 1.
I test it again.

IsPathDelimiter() work.

int i, len;
len = File->FileName.Length();
bool found = false;
for (i = 1; i <= len; i++) {
  if (IsPathDelimiter(File->FileName,i)) {
    found = true;
if (found) {
  FTerminal->LogEvent(FORMAT("Ignored suspicious file '%s'", (File->FileName)));

Reply with quote


Site Admin
martin avatar
Prague, Czechia

twu2 wrote:

sorry, that's my fault... the index should start from 1.
I test it again.

IsPathDelimiter() work.
That's good. IsPathDelimiter() seems like a more elegant solution.

Reply with quote


You can post new topics in this forum