 {
 ****************************************************************************
 CRW32 project
 Copyright (C) by Kuryakin Alexey, Sarov, Russia, 2003, <kouriakine@mail.ru>
 BackSlash encode/decode (also known as escape\unuscape).
 Modifications:
 20220209 - creation.
 ****************************************************************************
 }

unit _bsencode; { Encode/decode with backshash escape/unescape }

{$I _sysdef}

interface

uses sysutils, classes, windows, _alloc, _str, _crypt, _mime;

 {
 **************************************************************
 C/C++ - style encoding with backslash (\) escaping.
 Escape sequence    Description     Representation
 \a                 audible bell    byte 0x07 in ASCII encoding
 \b                 backspace       byte 0x08 in ASCII encoding
 \t                 horizontal tab  byte 0x09 in ASCII encoding
 \n                 line feed       byte 0x0a in ASCII encoding
 \v                 vertical tab    byte 0x0b in ASCII encoding
 \f                 form feed       byte 0x0c in ASCII encoding
 \r                 carriage return byte 0x0d in ASCII encoding
 \"                 double quote    byte 0x22 in ASCII encoding
 \'                 single quote    byte 0x27 in ASCII encoding
 \?                 question mark   byte 0x3f in ASCII encoding
 \\                 backslash       byte 0x5c in ASCII encoding
 \xnn               hexadecimal     byte nn
 Notes:
  1)    control chars \x00..\x1F,\x7F always passed as hex \xnn
  2)    any printable char (c) except of (a,b,t,n,v,f,r,",',?,\)
        may be passed as \c (use esclist to apply this pass).
  3)    any char (c) may be passed as hex \xnn (use hexlist).
  4)    current version don`t use octal codes, use hex instead.
 ***************************************************************
 }
function backslash_encode(const data:String; const esclist:TCharSet=[]; const hexlist:TCharSet=[]):String;
function backslash_decode(const data:String):String;

function StringOfCharRange(a,b:Char):String;

function backslash_encoder_self_test(n:Integer=1; const esclist:TCharSet=[]; const hexlist:TCharSet=[]):Boolean;
function backslash_encode_self_test_and_benchmark:String;

implementation

function backslash_encode(const data:String; const esclist:TCharSet=[]; const hexlist:TCharSet=[]):String;
const MaxSizeFactor=4; HexTab:PChar='0123456789ABCDEF';
var i,n,m,datalen,esclen,hexlen:Integer; buff:String; ch:Char; p:PChar;
 procedure EmitChr(ch:Char); begin p[0]:=ch; inc(p); inc(n); end;
 procedure EmitEsc(ch:Char); begin p[0]:='\'; p[1]:=ch; inc(p,2); inc(n,2); end;
 procedure EmitHex(ch:Char); begin p[0]:='\'; p[1]:='x'; p[2]:=HexTab[Byte(ch) shr 4]; p[3]:=HexTab[Byte(ch) and $F]; inc(p,4); inc(n,4); end;
begin
 Result:='';
 if (data='') then Exit;
 try
  datalen:=Length(data);
  esclen:=Ord(esclist<>[]);
  hexlen:=Ord(hexlist<>[]);
  SetLength(buff,datalen*MaxSizeFactor);
  p:=PChar(buff); n:=0;
  for i:=1 to datalen do begin
   ch:=data[i];
   if (esclen>0) and (ch in esclist) then m:=1 else
   if (hexlen>0) and (ch in hexlist) then m:=2 else m:=0;
   case ch of
    ASCII_NUL..ASCII_ACK,ASCII_SO..ASCII_US,ASCII_DEL : EmitHex(ch);
    ASCII_BEL           : case m of 0,1:EmitEsc('a'); 2:EmitHex(ch); end;
    ASCII_BS            : case m of 0,1:EmitEsc('b'); 2:EmitHex(ch); end;
    ASCII_HT            : case m of 0,1:EmitEsc('t'); 2:EmitHex(ch); end;
    ASCII_LF            : case m of 0,1:EmitEsc('n'); 2:EmitHex(ch); end;  
    ASCII_VT            : case m of 0,1:EmitEsc('v'); 2:EmitHex(ch); end;
    ASCII_FF            : case m of 0,1:EmitEsc('f'); 2:EmitHex(ch); end;
    ASCII_CR            : case m of 0,1:EmitEsc('r'); 2:EmitHex(ch); end;   
    '\'                 : case m of 0,1:EmitEsc('\'); 2:EmitHex(ch); end;
    QuoteMark           : case m of 0,1:EmitEsc(ch);  2:EmitHex(ch); end;
    Apostrophe          : case m of 0,1:EmitEsc(ch);  2:EmitHex(ch); end;
    '?'                 : case m of 0,1:EmitEsc(ch);  2:EmitHex(ch); end;
    'a','b','t','n'     : case m of 0,1:EmitChr(ch);  2:EmitHex(ch); end;
    'v','f','r','x'     : case m of 0,1:EmitChr(ch);  2:EmitHex(ch); end;
    else     case m of 0:EmitChr(ch); 1:EmitEsc(ch);  2:EmitHex(ch); end;
   end;
  end;
  SetLength(buff,n);
  Result:=buff;
 except
  on E:Exception do BugReport(E,nil,'backslash_encode');
 end;
end;

var TabHex:packed array[Char] of Byte;

procedure TabHexInit;
const HexTab='0123456789ABCDEF'; var c:Char; i:Integer;
begin
 for c:=Low(TabHex) to High(TabHex) do TabHex[c]:=255;
 for i:=1 to Length(HexTab) do begin
  c:=HexTab[i]; TabHex[c]:=i-1;
  c:=UpCase(c); TabHex[c]:=i-1;
  c:=LoCase(c); TabHex[c]:=i-1;
 end;
end;

function backslash_decode(const data:String):String;
var i,n,datalen:Integer; buff:String; ch,c1,c2,c3:Char; p:PChar; ph,p2,p3:Byte;
 procedure EmitChr(ch:Char); begin p[0]:=ch; inc(p); inc(n); end;
begin
 Result:='';
 if (data='') then Exit;
 try
  datalen:=Length(data);
  SetLength(buff,datalen);
  p:=PChar(buff); n:=0; i:=0;
  while (i<datalen) do begin
   inc(i);
   ch:=data[i];
   if (ch<>'\') then EmitChr(ch) else begin
    inc(i); if (i>datalen) then Exit;
    c1:=data[i];
    case c1 of
     'a' : EmitChr(ASCII_BEL);
     'b' : EmitChr(ASCII_BS);
     't' : EmitChr(ASCII_HT);
     'n' : EmitChr(ASCII_LF);
     'v' : EmitChr(ASCII_VT);
     'f' : EmitChr(ASCII_FF);
     'r' : EmitChr(ASCII_CR);
     'x' : begin
            inc(i); if (i>datalen) then Exit; c2:=data[i];
            inc(i); if (i>datalen) then Exit; c3:=data[i];
            p2:=TabHex[c2]; if (p2>$F) then Exit;
            p3:=TabHex[c3]; if (p3>$F) then Exit;
            ph:=(p2 shl 4) + (p3 and $F);
            EmitChr(Chr(ph));
           end;
     else  EmitChr(c1);
    end;
   end;
  end;
  SetLength(buff,n);
  Result:=buff;
 except
  on E:Exception do BugReport(E,nil,'backslash_decode');
 end;
end;

function StringOfCharRange(a,b:Char):String;
var c:Char; i:Integer;
begin
 Result:='';
 i:=0; for c:=a to b do inc(i);
 if (i>0) then SetLength(Result,i) else Exit;
 i:=0; for c:=a to b do begin inc(i); Result[i]:=c; end;
end;

function backslash_encoder_self_test(n:Integer=1; const esclist:TCharSet=[]; const hexlist:TCharSet=[]):Boolean;
var data:String; i:Integer;
begin
 data:='';
 for i:=1 to n do data:=data+StringOfCharRange(#0,#255);
 Result:=(backslash_decode(backslash_encode(data,esclist,hexlist))=data);
end;

function backslash_encode_self_test_and_benchmark:String;
var t0,dt:Cardinal; n:integer; data:String; escl,hexl:TCharSet; List:TStringList;
begin
 Result:='';
 try
  List:=TStringList.Create;
  try
   List.Add(backslash_encode('Hello, World!'+LineEnding,['!'],[' ']));
   List.Add(Format('Test1 = %d',[Ord(backslash_encoder_self_test(10,[],[]))]));
   List.Add(Format('Test2 = %d',[Ord(backslash_encoder_self_test(10,[#0..#255],[]))]));
   List.Add(Format('Test3 = %d',[Ord(backslash_encoder_self_test(10,[],[#0..#255]))]));
   List.Add(Format('Test4 = %d',[Ord(backslash_encoder_self_test(10,[#0..#255],[#0..#255]))]));
   List.Add(backslash_encode(StringOfCharRange(#0,#255)));
   List.Add(backslash_encode(backslash_decode(backslash_encode(StringOfCharRange(#0,#255)))));
   List.Add(backslash_encode(StringOfCharRange(#0,#255),[#0..#255],[]));
   List.Add(backslash_encode(StringOfCharRange(#0,#255),[],[#0..#255]));
   List.Add(backslash_encode(StringOfCharRange(#0,#255),[#0..#255],[#0..#255]));
   //
   data:=StringOfCharRange(#0,#255);
   escl:=[#0..#255]; hexl:=[#0..#255];
   //
   n:=0; t0:=GetTickCount;
   while GetTickCount-t0<200 do begin backslash_encode(data); inc(n); end;
   dt:=GetTickCount-t0;
   List.Add(Format('backslash_encode %1.3f mks per call, %1.3f ns per char',[dt*1e3/n,dt*1e6/n/length(data)]));
   //
   n:=0; t0:=GetTickCount;
   while GetTickCount-t0<200 do begin mime_encode(data); inc(n); end;
   dt:=GetTickCount-t0;
   List.Add(Format('mime_encode %1.3f mks per call, %1.3f ns per char',[dt*1e3/n,dt*1e6/n/length(data)]));
   //
   n:=0; t0:=GetTickCount;
   while GetTickCount-t0<200 do begin hex_encode(data); inc(n); end;
   dt:=GetTickCount-t0;
   List.Add(Format('hex_encode %1.3f mks per call, %1.3f ns per char',[dt*1e3/n,dt*1e6/n/length(data)]));
   //
   n:=0; t0:=GetTickCount;
   while GetTickCount-t0<200 do begin backslash_decode(backslash_encode(data)); inc(n); end;
   dt:=GetTickCount-t0;
   List.Add(Format('backslash_encode/decode %1.3f mks per call, %1.3f ns per char',[dt*1e3/n,dt*1e6/n/length(data)]));
   //
   n:=0; t0:=GetTickCount;
   while GetTickCount-t0<200 do begin mime_decode(mime_encode(data)); inc(n); end;
   dt:=GetTickCount-t0;
   List.Add(Format('mime_encode/decode %1.3f mks per call, %1.3f ns per char',[dt*1e3/n,dt*1e6/n/length(data)]));
   //
   n:=0; t0:=GetTickCount;
   while GetTickCount-t0<200 do begin hex_decode(hex_encode(data)); inc(n); end;
   dt:=GetTickCount-t0;
   List.Add(Format('hex_encode/decode %1.3f mks per call, %1.3f ns per char',[dt*1e3/n,dt*1e6/n/length(data)]));
   //
   n:=0; t0:=GetTickCount;
   while GetTickCount-t0<200 do begin backslash_decode(backslash_encode(data,escl,[])); inc(n); end;
   dt:=GetTickCount-t0;
   List.Add(Format('backslash_encode(esc) %1.3f mks per call, %1.3f ns per char',[dt*1e3/n,dt*1e6/n/length(data)]));
   //
   n:=0; t0:=GetTickCount;
   while GetTickCount-t0<200 do begin backslash_decode(backslash_encode(data,[],hexl)); inc(n); end;
   dt:=GetTickCount-t0;
   List.Add(Format('backslash_encode(hex) %1.3f mks per call, %1.3f ns per char',[dt*1e3/n,dt*1e6/n/length(data)]));
   //
   Result:=List.Text;
  finally
   List.Free;
  end;
 except
  on E:Exception do BugReport(E,nil,'backslash_encode_self_test_and_benchmark');
 end;
end;

initialization

 TabHexInit;

end.
