unit IBParser;

{$mode objfpc}{$H+}

{$I IBDef.inc}

interface

uses
  Classes, SysUtils, LazUTF8;

const
  dsEOF     = Char(0);
  dsSymbol  = Char(1);
  dsString  = Char(2);
  dsInteger = Char(3);
  dsHexInt  = Char(4);
  dsFloat   = Char(5);
  dsText    = Char(6);
  dsChar    = Char(7);
  dsUnknSym = Char(8);

type

  //TIBAnsiFunction = (iaf1250, iaf1251, iaf1252, iaf1253, iaf1254, iaf1255, iaf1256, iaf1257, iaf1258);

  TIdentToken = (
    ID_EOF,
    ID_Identifier,
    ID_SemiColon,
    ID_Comma,
    ID_Period,
    ID_Colon,
    ID_OpenRound,
    ID_CloseRound,
    ID_OpenBlock,
    ID_CloseBlock,
    ID_Assignment,
    ID_Inc,
    ID_Dec,
    ID_IncAssign,
    ID_DecAssign,
    ID_Equal,
    ID_NotEqual,
    ID_Greater,
    ID_GreaterEqual,
    ID_Less,
    ID_LessEqual,
    ID_Plus,
    ID_Minus,
    ID_Divide,
    ID_Multiply,
    ID_Integer,
    ID_Float,
    ID_String,
    ID_Dots,
    ID_and,
    ID_array,
    ID_begin,
    ID_case,
    ID_const,
    ID_div,
    ID_do,
    ID_downto,
    ID_else,
    ID_end,
    ID_for,
    ID_execute,
    ID_if,
    ID_in,
    ID_mod,
    ID_not,
    ID_of,
    ID_or,
    ID_procedure,
    ID_then,
    ID_to,
    ID_Break,
    ID_Continue,
    ID_Exit,
    ID_variable,
    ID_while,
    ID_As,
    ID_Is,
    ID_Exception,
    ID_On,
    ID_out,
    ID_nil,
    ID_False,
    ID_True,

    ID_Insert,
    ID_Update,
    ID_Delete,
    ID_Select,
    ID_From,
    ID_Block,
    ID_Values,
    ID_Value,
    ID_Left,
    ID_Right,
    ID_Outer,
    ID_Join,
    ID_Returning,
    ID_Suspend,
    ID_Declare,
    ID_Inner,
    ID_where,
    ID_Full,
    ID_Into,
    ID_Set,
    ID_Create,
    ID_Alter,
    ID_View,
    ID_Sequence,
    ID_Restart,
    ID_With,
    ID_Order,
    ID_By,
    ID_Desc,
    ID_Asc,
    ID_Collate,
    ID_Nulls,
    ID_First,
    ID_Last,

    ID_Unknown
    );


  PTokenRec = ^TTokenRec;
  TTokenRec = record
    TokenName: string;
    TokenID: TIdentToken;
    Col: Integer;
    Row: Integer;
    Pos: Integer;
    GlobalPos: LongInt;
  end;

  { TIBTokensList }

  TIBTokensList = class
  private
    FList: TList;
    FPos: Integer;
    function Get(Index: Integer): TTokenRec;
    function GetToken: TTokenRec;
    function GetPreviewNext: TTokenRec;
  protected
  public
    constructor Create;
    destructor Destroy; override;
    procedure Clear;
    function Count: Integer;
    procedure Next;
    procedure Prev;
    function Add(TokenName: string; TokenID: TIdentToken; Col, Row: Integer; GlobalPos, Pos: LongInt): Integer;
    property Pos: Integer read FPos write FPos;
    property Items[Index: Integer]: TTokenRec read Get;
    property Token: TTokenRec read GetToken;
    property PreviewNext: TTokenRec read GetPreviewNext;
  end;

  { TIBParser }

  TIBParser = class
  private
    FString: string;
    FStartPos: PChar;
    FEndPos: PChar;
    FRow: Longint;
    FStartCol: PChar;
    FGlStartPos: PChar;
    FTokenPos: Integer;
    FTokenEnd: Integer;
    FTokens: TIBTokensList;

    function GetCol: Integer;
    function GetGlPos: LongInt;
    function GetPoss: LongInt;
  protected
    function TokenString: string;
    function TokenText: string;
    function TokenFloat: Extended;
    function TokenInt: Integer;
  public
    //function PreviewToken: AnsiChar;

    constructor Create; virtual;
    function IsIdentChar(p: PChar): boolean;
    destructor Destroy; override;
    function NextToken: Char; virtual;
    procedure SetParseStr(S: String);
    function GetTokensList: TIBTokensList;
    property Row: LongInt read FRow;
    property Col: LongInt read GetCol;
    property GlPos: LongInt read GetGlPos;
    property Poss: LongInt read GetPoss;
    property TokenPos: Integer read FTokenPos;
    property TokenEnd: Integer read FTokenEnd;
    property AsString: String read TokenString;
    property AsFloat: Extended read TokenFloat;
    property AsInteger: Integer read TokenInt;
    property AsText: String read TokenText;
    property Tokens: TIBTokensList read FTokens;
  end;

//var
//  DefaultIBAnsiFunction: TIBAnsiFunction = iaf1251;

//function IBUTF8ToAnsi(S: UTF8String): AnsiString;
//function IBAnsiToUTF8(S: AnsiString): UTF8String;

implementation

uses character;

//function IBUTF8ToAnsi(S: UTF8String): AnsiString;
//begin
//  case DefaultIBAnsiFunction of
//    iaf1250: Result := UTF8ToCP1250(S);
//    iaf1251: Result := UTF8ToCP1251(S);
//    iaf1252: Result := UTF8ToCP1252(S);
//    iaf1253: Result := UTF8ToCP1253(S);
//    iaf1254: Result := UTF8ToCP1254(S);
//    iaf1255: Result := UTF8ToCP1255(S);
//    iaf1256: Result := UTF8ToCP1256(S);
//    iaf1257: Result := UTF8ToCP1257(S);
//    iaf1258: Result := UTF8ToCP1258(S);
//  else
//    Result := S;
//  end;
//end;
//
//function IBAnsiToUTF8(S: AnsiString): UTF8String;
//begin
//  case DefaultIBAnsiFunction of
//    iaf1250: Result := CP1250ToUTF8(S);
//    iaf1251: Result := CP1251ToUTF8(S);
//    iaf1252: Result := CP1252ToUTF8(S);
//    iaf1253: Result := CP1253ToUTF8(S);
//    iaf1254: Result := CP1254ToUTF8(S);
//    iaf1255: Result := CP1255ToUTF8(S);
//    iaf1256: Result := CP1256ToUTF8(S);
//    iaf1257: Result := CP1257ToUTF8(S);
//    iaf1258: Result := CP1258ToUTF8(S);
//  else
//    Result := S;
//  end;
//end;

constructor TIBParser.Create;
begin
  inherited Create;
  FTokens := TIBTokensList.Create;
end;

function TIBParser.IsIdentChar(p: PChar): boolean;
var
  u: UnicodeString;
  i: Integer;
  L: SizeUInt;
begin
  Result := False;
  Result := p^ in ['a'..'z','A'..'Z','0'..'9','_'];
  if Result then exit;

  if p^ <= #127 then exit;
  i := UTF8CharacterLength(p);
  SetLength(u, i);
  // wide chars of UTF-16 <= bytes of UTF-8 string
  if ConvertUTF8ToUTF16(PWideChar(u), i + 1, p, i, [toInvalidCharToSymbol], L) = trNoError
  then
  begin
    SetLength(u, L - 1);
    if L > 1 then
      Result := TCharacter.IsLetterOrDigit(u, 1);
  end;
end;

destructor TIBParser.Destroy;
begin
  FTokens.Free;
  inherited Destroy;
end;

procedure TIBParser.SetParseStr(S: String);
var
  I: Integer;
begin
  I := Length(S);
  FString := '';
  FRow := 0;
  FTokenPos := 0;
  FTokenEnd := 0;

  if I > 0 then
  begin
    FString := S;
    I := Length(FString);
    Inc(I);
    SetLength(FString, I);
    FString[I] := #0;
    FStartPos := @FString[1];
    FGlStartPos := FStartPos;
    FEndPos := FStartPos;
    FStartCol := FStartPos;
  end
  else
  begin
    FStartPos := nil;
    FEndPos := nil;
    FStartCol := nil;
  end;
end;



function TIBParser.NextToken: Char;
var
  P: PChar;
  I: Integer;

  procedure SkipBlanks;
  begin
    case P^ of
      ' ', #8:
      begin
        while (P^ = ' ') or (P^ = #8) do
        begin
          Inc(P);
        end;
      end;

      '-':
      begin
        Inc(P, 2);
        while not (P^ in [#0, #13, #10]) do Inc(P);
      end;

      '/':
      begin
        Inc(P, 2);
        while P^ <> dsEOF do
        begin
          case P^ of
            #10:
            begin
              Inc(FRow);
              FStartCol := P;
            end;
            '*':
            begin
              if (P + 1)^ = '/' then
              begin
                Inc(P, 2);
                Break;
              end;
            end;
          end;

          Inc(P);
        end;
      end;

      #13:
      begin
        Inc(P);
      end;

      #10:
      begin
        Inc(FRow);
        Inc(P);
        FStartCol := P;
      end;
    end;

  end;

begin
  if FStartPos = nil then
  begin
    Result := dsEOF;
    Exit;
  end;
  FStartPos := FEndPos;
  P := FStartPos;

  if P^ = #0 then
  begin
    Result := dsEOF;
    Exit;
  end;

  while (P^ = ' ') or (P^ = #8)
    or (P^ = #13) or (P^ = #10)
    or (((P^ = '-') and ((P + 1)^ = '-')))
    or ((P^ = '/') and ((P + 1)^ = '*')) do
  begin
    SkipBlanks;
  end;

  FStartPos := P;

  if P^ = #0 then
    Result := dsEOF
  else
  if P^ in ['0'..'9'] then
  begin
    Result := dsInteger;
    Inc(P);
    while (P^ in ['0'..'9', '.', 'E', 'e']) do
    begin
      if (P^ = '.') then
      begin
        if ((P + 1)^ in ['0'..'9']) then
          Result := dsFloat
        else Break;
      end
      else
      if (P^ = 'e') or (P^ = 'E') then
      begin
          Result := dsFloat;
          if ((P + 1)^ = '+') or ((P + 1)^ = '-') then Inc(P);
      end;
      Inc(P);
    end;
  end
  else
  if IsIdentChar(P) then
  begin
    I := UTF8CharacterLength(P);
    Inc(P, I);
    while IsIdentChar(P) or (P^ = '$') do
    begin
      I := UTF8CharacterLength(P);
      Inc(P, I);
    end;
    Result := dsString;
  end
  else
  if P^ = '''' then
  begin
    Inc(P);
    while (P^ <> #0) and (P^ <> #10) and (P^ <> #13) do
    begin
        if P^ = '''' then
        begin
          if (P + 1)^ = '''' then
          begin
            Inc(P, 2);
            Continue;
          end
          else
          begin
            Inc(P);
            Break;
          end;
        end;
        Inc(P);
    end;
    Result := dsText;
  end
  else
  begin
    if ((P^ = '=') or (P^ = '<') or (P^ = '>')) and ((P + 1)^ = '=') then Inc(P)
    else
    if (P^ = '<') and ((P + 1)^ = '>') then Inc(P);

    Inc(P);
    Result := dsSymbol;
  end;
  FEndPos := P;
end;

function TIBParser.TokenString: string;
var
  I: Integer;
begin
  if (FStartPos <> nil) or (FEndPos <> nil) then
  begin
    I := FEndPos - FStartPos;
    FTokenPos := FTokenEnd;
    FTokenEnd := FTokenPos + I;
    SetString(Result, FStartPos, I);
    //FStartPos := FEndPos;
  end;
end;

function TIBParser.TokenText: string;
var
  I: Integer;
  R: String;
begin
  R := '';
  if (FStartPos <> nil) or (FEndPos <> nil) then
  begin
    I := FEndPos - FStartPos;
    FTokenPos := FTokenEnd;
    FTokenEnd := FTokenPos + I;
    SetString(R, FStartPos + 1, I - 2);
    I := Length(R);
    while I > 0 do
    begin
      if (I > 1) and (R[I] = '''') then
      begin
        if R[I - 1] = '''' then
        begin
          Delete(R, I, 1);
          Dec(I);
        end;
      end;
      Dec(I);
    end;
  end;
  Result := R;
end;

function TIBParser.TokenFloat: Extended;
begin
  Result := StrToFloat(AsString);
end;

function TIBParser.TokenInt: Integer;
begin
  Result := StrToInt(AsString);
end;

function TIBParser.GetCol: Integer;
begin
  Result := FStartPos - FStartCol;
end;

function TIBParser.GetGlPos: LongInt;
begin
  Result := FEndPos - FGlStartPos;
end;

function TIBParser.GetPoss: LongInt;
begin
  Result := FStartPos - FGlStartPos;
end;

function HexToInt(S: string): integer;
var
  Tempt: string;
  I: integer;
begin
  Tempt := '';
  if S = '' then
  begin
    HexToInt := 0;
  end
  else
  begin
    for i := 1 to Length(s) do
    begin
      tempt := tempt + IntToHex(Ord(s[i]), 2);
    end;
    HexToInt := StrToInt('$' + tempt);
  end;
end;

function TIBParser.GetTokensList: TIBTokensList;
var
  C: Char;
  S, HS: String;
begin
  Result := FTokens;
  if FString = '' then Exit;
  C := NextToken;
  while True do
  begin
    case C of
      dsSymbol:
      begin
        S := AsString;
          if S = '>=' then Tokens.Add(S, ID_GreaterEqual, Col, Row, GlPos, Poss)
          else
          if S = '>' then Tokens.Add(S, ID_Greater, Col, Row, GlPos, Poss)
          else
          if S = '<' then Tokens.Add(S, ID_Less, Col, Row, GlPos, Poss)
          else
          if S = '<=' then Tokens.Add(S, ID_LessEqual, Col, Row, GlPos, Poss)
          else
          if S = '<>' then Tokens.Add(S, ID_NotEqual, Col, Row, GlPos, Poss)
          else
          if S = '(' then Tokens.Add(S, ID_OpenRound, Col, Row, GlPos, Poss)
          else
          if S = ')' then Tokens.Add(S, ID_CloseRound, Col, Row, GlPos, Poss)
          else
          if S = '[' then Tokens.Add(S, ID_OpenBlock, Col, Row, GlPos, Poss)
          else
          if S = ']' then Tokens.Add(S, ID_CloseBlock, Col, Row, GlPos, Poss)
          else
          if S = ',' then Tokens.Add(S, ID_Comma, Col, Row, GlPos, Poss)
          else
          if S = '.' then Tokens.Add(S, ID_Period, Col, Row, GlPos, Poss)
          else
          if S = '..' then Tokens.Add(S, ID_Dots, Col, Row, GlPos, Poss)
          else
          if S = ';' then Tokens.Add(S, ID_SemiColon, Col, Row, GlPos, Poss)
          else
          if S = ':' then Tokens.Add(S, ID_Colon, Col, Row, GlPos, Poss)
          else
          if S = '=' then Tokens.Add(S, ID_Assignment, Col, Row, GlPos, Poss)
          else
          if S = '+' then Tokens.Add(S, ID_Plus, Col, Row, GlPos, Poss)
          else
          if S = '-' then Tokens.Add(S, ID_Minus, Col, Row, GlPos, Poss)
          else
          if S = '*' then Tokens.Add(S, ID_Multiply, Col, Row, GlPos, Poss)
          else
          if S = '/' then Tokens.Add(S, ID_Divide, Col, Row, GlPos, Poss)
          else
            Tokens.Add(S, ID_Unknown, Col, Row, GlPos, Poss);
      end;
      dsChar:
      begin
        S := AsString;
        S := Chr(StrToInt(S));
        Tokens.Add(S, ID_String, Col, Row, GlPos, Poss);
      end;
      dsString:
      begin
        S := AsString;
        HS := LowerCase(S);
        if HS = 'begin' then
          Tokens.Add(S, ID_begin, Col, Row, GlPos, Poss)
        else if HS = 'end' then
          Tokens.Add(S, ID_end, Col, Row, GlPos, Poss)
        else if HS = 'execute' then
          Tokens.Add(S, ID_execute, Col, Row, GlPos, Poss)
        else if HS = 'block' then
          Tokens.Add(S, ID_Block, Col, Row, GlPos, Poss)
        else if HS = 'procedure' then
          Tokens.Add(S, ID_procedure, Col, Row, GlPos, Poss)
        else if HS = 'insert' then
          Tokens.Add(S, ID_Insert, Col, Row, GlPos, Poss)
        else if HS = 'update' then
          Tokens.Add(S, ID_Update, Col, Row, GlPos, Poss)
        else if HS = 'select' then
          Tokens.Add(S, ID_Select, Col, Row, GlPos, Poss)
        else if HS = 'from' then
          Tokens.Add(S, ID_From, Col, Row, GlPos, Poss)
        else if HS = 'delete' then
          Tokens.Add(S, ID_Delete, Col, Row, GlPos, Poss)
        else if HS = 'returning' then
          Tokens.Add(S, ID_Returning, Col, Row, GlPos, Poss)
        else if HS = 'if' then
          Tokens.Add(S, ID_if, Col, Row, GlPos, Poss)
        else if HS = 'then' then
          Tokens.Add(S, ID_then, Col, Row, GlPos, Poss)
        else if HS = 'for' then
          Tokens.Add(S, ID_for, Col, Row, GlPos, Poss)
        else if HS = 'while' then
          Tokens.Add(S, ID_while, Col, Row, GlPos, Poss)
        else if HS = 'do' then
          Tokens.Add(S, ID_do, Col, Row, GlPos, Poss)
        else if HS = 'suspend' then
          Tokens.Add(S, ID_Suspend, Col, Row, GlPos, Poss)
        else if HS = 'left' then
          Tokens.Add(S, ID_Left, Col, Row, GlPos, Poss)
        else if HS = 'right' then
          Tokens.Add(S, ID_Right, Col, Row, GlPos, Poss)
        else if HS = 'inner' then
          Tokens.Add(S, ID_Inner, Col, Row, GlPos, Poss)
        else if HS = 'outer' then
          Tokens.Add(S, ID_Outer, Col, Row, GlPos, Poss)
        else if HS = 'join' then
          Tokens.Add(S, ID_Join, Col, Row, GlPos, Poss)
        else if HS = 'where' then
          Tokens.Add(S, ID_where, Col, Row, GlPos, Poss)
        else if HS = 'on' then
          Tokens.Add(S, ID_On, Col, Row, GlPos, Poss)
        else if HS = 'full' then
          Tokens.Add(S, ID_Full, Col, Row, GlPos, Poss)
        else if HS = 'into' then
          Tokens.Add(S, ID_Into, Col, Row, GlPos, Poss)
        else if HS = 'exception' then
          Tokens.Add(S, ID_Exception, Col, Row, GlPos, Poss)
        else if HS = 'and' then
          Tokens.Add(S, ID_and, Col, Row, GlPos, Poss)
        else if HS = 'or' then
          Tokens.Add(S, ID_or, Col, Row, GlPos, Poss)
        else if HS = 'set' then
          Tokens.Add(S, ID_Set, Col, Row, GlPos, Poss)
        else if HS = 'create' then
          Tokens.Add(S, ID_Create, Col, Row, GlPos, Poss)
        else if HS = 'alter' then
          Tokens.Add(S, ID_Alter, Col, Row, GlPos, Poss)
        else if HS = 'view' then
          Tokens.Add(S, ID_View, Col, Row, GlPos, Poss)
        else if HS = 'sequence' then
          Tokens.Add(S, ID_Sequence, Col, Row, GlPos, Poss)
        else if HS = 'restrart' then
          Tokens.Add(S, ID_Restart, Col, Row, GlPos, Poss)
        else if HS = 'with' then
          Tokens.Add(S, ID_With, Col, Row, GlPos, Poss)
        else if HS = 'order' then
          Tokens.Add(S, ID_Order, Col, Row, GlPos, Poss)
        else if HS = 'by' then
          Tokens.Add(S, ID_By, Col, Row, GlPos, Poss)
        else if (HS = 'desc') or (HS = 'descending') then
          Tokens.Add(S, ID_Desc, Col, Row, GlPos, Poss)
        else if (HS = 'asc') or (HS = 'ascending') then
          Tokens.Add(S, ID_Asc, Col, Row, GlPos, Poss)
        else if (HS = 'collate') then
          Tokens.Add(S, ID_Collate, Col, Row, GlPos, Poss)
        else if (HS = 'nulls') then
          Tokens.Add(S, ID_Nulls, Col, Row, GlPos, Poss)
        else if (HS = 'first') then
          Tokens.Add(S, ID_First, Col, Row, GlPos, Poss)
        else if (HS = 'last') then
          Tokens.Add(S, ID_Last, Col, Row, GlPos, Poss)
        else
        begin
          Tokens.Add(S, ID_Identifier, Col, Row, GlPos, Poss);
        end;
      end;
      dsInteger:
      begin
        S := AsString;
        Tokens.Add(S, ID_Integer, Col, Row, GlPos, Poss);
      end;
      dsHexInt:
      begin
        S := AsString;
        Tokens.Add(S, ID_Integer, Col, Row, GlPos, Poss);
      end;
      dsFloat:
      begin
        S := AsString;
        Tokens.Add(S, ID_Float, Col, Row, GlPos, Poss);
      end;
      dsText:
      begin
        S := AsText;
        {if Length(S) = 1 then
          Tokens.Add(S, ID_String, Col, Row, GlPos, Poss)
        else}
        Tokens.Add(S, ID_String, Col, Row, GlPos, Poss);
      end;
      dsEOF:
      begin
        S := '';
        Tokens.Add(S, ID_EOF, Col, Row, 0, 0);
        Break;
      end;
      dsUnknSym:
      begin
        S := AsString;
        Tokens.Add(S, ID_Unknown, Col, Row, GlPos, Poss);
      end;
    end;
    C := NextToken;
  end;
end;

{TIBTokensList}

constructor TIBTokensList.Create;
begin
  FList := TList.Create;
  FPos := 0;
end;

destructor TIBTokensList.Destroy;
begin
  Clear;
  FList.Free;
  inherited Destroy;
end;

function TIBTokensList.Count: Integer;
begin
  Result := FList.Count;
end;

procedure TIBTokensList.Clear;
var
  PR: PTokenRec;
  I: Integer;
begin
  I := 0;
  while I < FList.Count do
  begin
    PR := PTokenRec(FList.Items[I]);
    Dispose(PR);
    FList.Items[I] := nil;
    Inc(I);
  end;
  FList.Clear;
end;

function TIBTokensList.Add(TokenName: string; TokenID: TIdentToken; Col,
  Row: Integer; GlobalPos, Pos: LongInt): Integer;
var
  PR: PTokenRec;
begin
  New(PR);
  PR^.TokenName := TokenName;
  PR^.TokenID := TokenID;
  PR^.Col := Col;
  PR^.Row := Row;
  PR^.Pos := Pos;
  PR^.GlobalPos := GlobalPos;
  Result := FList.Add(PR);
end;

function TIBTokensList.Get(Index: Integer): TTokenRec;
begin
  if (Index >= 0) and (Index < FList.Count) then
    Result := PTokenRec(FList.Items[Index])^
  else raise Exception.Create('Index out of bounds. ' + IntToStr(Index));
end;

function TIBTokensList.GetToken: TTokenRec;
begin
  Result := Get(FPos);
end;

function TIBTokensList.GetPreviewNext: TTokenRec;
begin
  Result := Get(FPos + 1);
end;

procedure TIBTokensList.Next;
begin
  if FPos < FList.Count - 1 then Inc(FPos);
end;

procedure TIBTokensList.Prev;
begin
  if FPos > 0 then Dec(FPos);
end;

end.

