/// Framework Core Low-Level Unicode UTF-8 UTF-16 Ansi Conversion
// - this unit is a part of the Open Source Synopse mORMot framework 2,
// licensed under a MPL/GPL/LGPL three license - see LICENSE.md
unit mormot.core.unicode;

{
  *****************************************************************************

   Efficient Unicode Conversion Classes shared by all framework units
   - UTF-8 Efficient Encoding / Decoding
   - UTF-8 / UTF-16 / Ansi Conversion Classes
   - Text File Loading with BOM/Unicode Support
   - Low-Level String Conversion Functions
   - Text Case-(in)sensitive Conversion and Comparison
   - UTF-8 String Manipulation Functions
   - TRawUtf8DynArray Processing Functions
   - Operating-System Independent Unicode Process

  *****************************************************************************
}

interface

{$I mormot.defines.inc}

uses
  classes,
  sysutils,
  mormot.core.base,
  mormot.core.os;


{ *************** UTF-8 Efficient Encoding / Decoding }

// some constants used for UTF-8 conversion, including surrogates
type
  // see http://floodyberry.wordpress.com/2007/04/14/utf-8-conversion-tricks
  {$ifdef USERECORDWITHMETHODS}
  TUtf8Table = record
  {$else}
  TUtf8Table = object
  {$endif USERECORDWITHMETHODS}
  public
    Lookup: array[byte] of byte;
    Extra: array[0..6] of record
      offset, minimum: cardinal;
    end;
    FirstByte: array[2..6] of byte;
    /// retrieve a >127 UCS4 CodePoint from UTF-8
    function GetHighUtf8Ucs4(var U: PUtf8Char): Ucs4CodePoint;
  end;
  PUtf8Table = ^TUtf8Table;

const
  UTF8_ASCII = 0;
  UTF8_INVALID = 6;
  UTF8_ZERO = 7;
  UTF8_TABLE: TUtf8Table = (
    Lookup: (
      7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
      6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
      6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
      6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6);
    Extra: (
      (offset: $00000000;  minimum: $00010000),
      (offset: $00003080;  minimum: $00000080),
      (offset: $000e2080;  minimum: $00000800),
      (offset: $03c82080;  minimum: $00010000),
      (offset: $fa082080;  minimum: $00200000),
      (offset: $82082080;  minimum: $04000000),
      (offset: $00000000;  minimum: $04000000));
    FirstByte: (
      $c0, $e0, $f0, $f8, $fc));

  UTF8_EXTRA_SURROGATE = 3;
  UTF16_HISURROGATE_MIN = $d800;
  UTF16_HISURROGATE_MAX = $dbff;
  UTF16_LOSURROGATE_MIN = $dc00;
  UTF16_LOSURROGATE_MAX = $dfff;
  UTF16_SURROGATE_OFFSET = $d7c0;

  /// replace any incoming character whose value is unrepresentable in Unicode
  // - set e.g. by GetUtf8WideChar(), Utf8UpperReference() or
  // RawUnicodeToUtf8() when ccfReplacementCharacterForUnmatchedSurrogate is set
  // - encoded as $ef $bf $bd bytes in UTF-8
  UNICODE_REPLACEMENT_CHARACTER = $fffd;


/// internal function, used to retrieve a >127 US4 CodePoint from UTF-8
// - not to be called directly, but from inlined higher-level functions
// - here U^ shall be always >= #80
// - typical use is as such:
// !  ch := ord(P^);
// !  if ch and $80=0 then
// !    inc(P) else
// !    ch := GetHighUtf8Ucs4(P);
function GetHighUtf8Ucs4(var U: PUtf8Char): Ucs4CodePoint;
  {$ifdef HASINLINE}inline;{$endif}

/// decode UTF-16 WideChar from UTF-8 input buffer
// - any surrogate (Ucs4>$ffff) is returned as UNICODE_REPLACEMENT_CHARACTER=$fffd
function GetUtf8WideChar(P: PUtf8Char): cardinal;
  {$ifdef HASINLINE}inline;{$endif}

/// get the UCS4 CodePoint stored in P^ (decode UTF-8 if necessary)
function NextUtf8Ucs4(var P: PUtf8Char): Ucs4CodePoint;
  {$ifdef HASINLINE}inline;{$endif}

/// UTF-8 encode one UTF-16 encoded UCS4 CodePoint into Dest
// - return the number of bytes written into Dest (i.e. from 1 up to 6)
// - Source will contain the next UTF-16 character
// - this method DOES properly handle UTF-16 surrogate pairs
function Utf16CharToUtf8(Dest: PUtf8Char; var Source: PWord): integer;

/// UTF-8 encode one UCS4 CodePoint into Dest
// - return the number of bytes written into Dest (i.e. from 1 up to 6)
// - this method DOES properly handle UTF-16 surrogate pairs
function Ucs4ToUtf8(ucs4: Ucs4CodePoint; Dest: PUtf8Char): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

type
  /// option set for RawUnicodeToUtf8() conversion
  TCharConversionFlags = set of (
    ccfNoTrailingZero,
    ccfReplacementCharacterForUnmatchedSurrogate);

/// convert a UTF-16 PWideChar buffer into a UTF-8 string
procedure RawUnicodeToUtf8(WideChar: PWideChar; WideCharCount: integer;
  var result: RawUtf8; Flags: TCharConversionFlags = [ccfNoTrailingZero]); overload;

/// convert a UTF-16 PWideChar buffer into a UTF-8 temporary buffer
procedure RawUnicodeToUtf8(WideChar: PWideChar; WideCharCount: integer;
  var result: TSynTempBuffer; Flags: TCharConversionFlags); overload;

/// convert a UTF-16 PWideChar buffer into a UTF-8 string
function RawUnicodeToUtf8(WideChar: PWideChar; WideCharCount: integer;
  Flags: TCharConversionFlags = [ccfNoTrailingZero]): RawUtf8; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert a UTF-16 PWideChar buffer into a UTF-8 buffer
// - replace system.UnicodeToUtf8 implementation, which is rather slow
// since Delphi 2009+
// - append a trailing #0 to the ending PUtf8Char, unless ccfNoTrailingZero is set
// - if ccfReplacementCharacterForUnmatchedSurrogate is set, this function will identify
// unmatched surrogate pairs and replace them with UNICODE_REPLACEMENT_CHARACTER -
// see https://en.wikipedia.org/wiki/Specials_(Unicode_block)
function RawUnicodeToUtf8(Dest: PUtf8Char; DestLen: PtrInt;
  Source: PWideChar; SourceLen: PtrInt; Flags: TCharConversionFlags): PtrInt; overload;

/// convert a UTF-16 PWideChar buffer into a UTF-8 string
// - this version doesn't resize the resulting RawUtf8 string, but return
// the new resulting RawUtf8 byte count into Utf8Length
function RawUnicodeToUtf8(WideChar: PWideChar; WideCharCount: integer;
  out Utf8Length: integer): RawUtf8; overload;


/// convert an UTF-8 encoded text into a WideChar (UTF-16) buffer
// - faster than System.Utf8ToUnicode
// - sourceBytes can by 0, therefore length is computed from zero terminated source
// - enough place must be available in dest buffer (guess is sourceBytes*3+2)
// - a WideChar(#0) is added at the end (if something is written) unless
// NoTrailingZero is TRUE
// - returns the BYTE count written in dest, excluding the ending WideChar(#0)
function Utf8ToWideChar(dest: PWideChar; source: PUtf8Char; sourceBytes: PtrInt = 0;
  NoTrailingZero: boolean = false): PtrInt; overload;

/// convert an UTF-8 encoded text into a WideChar (UTF-16) buffer
// - faster than System.Utf8ToUnicode
// - this overloaded function expect a MaxDestChars parameter
// - sourceBytes can not be 0 for this function
// - enough place must be available in dest buffer (guess is sourceBytes*3+2)
// - a WideChar(#0) is added at the end (if something is written) unless
// NoTrailingZero is TRUE
// - returns the BYTE COUNT (not WideChar count) written in dest, excluding the
// ending WideChar(#0)
function Utf8ToWideChar(dest: PWideChar; source: PUtf8Char;
  MaxDestChars, sourceBytes: PtrInt; NoTrailingZero: boolean = false): PtrInt; overload;

/// direct conversion of a UTF-8 encoded buffer into a WinAnsi ShortString buffer
// - non WinAnsi chars are replaced by '?' placeholders
procedure Utf8ToShortString(var dest: ShortString; source: PUtf8Char);

/// calculate the UTF-16 Unicode characters count, UTF-8 encoded in source^
// - count may not match the UCS4 CodePoint, in case of UTF-16 surrogates
// - faster than System.Utf8ToUnicode with dest=nil
function Utf8ToUnicodeLength(source: PUtf8Char): PtrUInt;

/// returns TRUE if the supplied buffer has valid UTF-8 encoding
// - will also refuse #0 characters within the buffer
// - on Haswell AVX2 Intel/AMD CPUs, will use very efficient ASM
var
  IsValidUtf8Buffer: function(source: PUtf8Char; sourcelen: PtrInt): boolean;

/// returns TRUE if the supplied buffer has valid UTF-8 encoding
// - will also refuse #0 characters within the buffer
// - on Haswell AVX2 Intel/AMD CPUs, will use very efficient ASM, reaching e.g.
// 21 GB/s parsing speed on a Core i5-13500
function IsValidUtf8(const source: RawUtf8): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// returns TRUE if the supplied buffer has valid UTF-8 encoding
// - will stop when the buffer contains #0
// - just a wrapper around IsValidUtf8Buffer(source, StrLen(source)) so if you
// know the source length, you would better call IsValidUtf8Buffer() directly
// - on Haswell AVX2 Intel/AMD CPUs, will use very efficient ASM, reaching e.g.
// 15 GB/s parsing speed on a Core i5-13500 - StrLen() itself runs at 37 GB/s
function IsValidUtf8(source: PUtf8Char): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// detect UTF-8 content and mark the variable with the CP_UTF8 codepage
// - to circumvent FPC concatenation bug with CP_UTF8 and CP_RAWBYTESTRING
procedure DetectRawUtf8(var source: RawByteString);
  {$ifndef HASCODEPAGE}{$ifdef HASINLINE}inline;{$endif}{$endif}

/// returns TRUE if the supplied buffer has valid UTF-8 encoding with no #1..#31
// control characters
// - supplied input is a pointer to a #0 ended text buffer
function IsValidUtf8WithoutControlChars(source: PUtf8Char): boolean; overload;

/// returns TRUE if the supplied buffer has valid UTF-8 encoding with no #0..#31
// control characters
// - supplied input is a RawUtf8 variable
function IsValidUtf8WithoutControlChars(const source: RawUtf8): boolean; overload;

/// will truncate the supplied UTF-8 value if its length exceeds the specified
// UTF-16 Unicode characters count
// - count may not match the UCS4 CodePoint, in case of UTF-16 surrogates
// - returns FALSE if text was not truncated, TRUE otherwise
function Utf8TruncateToUnicodeLength(var text: RawUtf8; maxUtf16: integer): boolean;

/// will truncate the supplied UTF-8 value if its length exceeds the specified
// bytes count
// - this function will ensure that the returned content will contain only valid
// UTF-8 sequence, i.e. will trim the whole trailing UTF-8 sequence
// - returns FALSE if text was not truncated, TRUE otherwise
function Utf8TruncateToLength(var text: RawUtf8; maxBytes: PtrUInt): boolean;

/// compute the truncated length of the supplied UTF-8 value if it exceeds the
// specified bytes count
// - this function will ensure that the returned content will contain only valid
// UTF-8 sequence, i.e. will trim the whole trailing UTF-8 sequence
// - returns maxBytes if text was not truncated, or the number of fitting bytes
function Utf8TruncatedLength(const text: RawUtf8; maxBytes: PtrUInt): PtrInt; overload;

/// compute the truncated length of the supplied UTF-8 value if it exceeds the
// specified bytes count
// - this function will ensure that the returned content will contain only valid
// UTF-8 sequence, i.e. will trim the whole trailing UTF-8 sequence
// - returns maxBytes if text was not truncated, or the number of fitting bytes
function Utf8TruncatedLength(text: PAnsiChar;
  textlen, maxBytes: PtrUInt): PtrInt; overload;

/// calculate the UTF-16 Unicode characters count of the UTF-8 encoded first line
// - count may not match the UCS4 CodePoint, in case of UTF-16 surrogates
// - end the parsing at first #13 or #10 character
function Utf8FirstLineToUtf16Length(source: PUtf8Char): PtrInt;



{ **************** UTF-8 / UTF-16 / Ansi Conversion Classes }

type
  /// Exception raised by this unit in case of fatal conversion issue
  ESynUnicode = class(ExceptionWithProps);

  /// an abstract class to handle Ansi to/from Unicode translation
  // - implementations of this class will handle efficiently all Code Pages
  // - this default implementation will use the Operating System APIs
  // - you should not create your own class instance by yourself, but should
  // better retrieve an instance using TSynAnsiConvert.Engine(), which will
  // initialize either a TSynAnsiFixedWidth or a TSynAnsiConvert instance on need
  TSynAnsiConvert = class
  protected
    fCodePage: cardinal;
    fAnsiCharShift: byte;
  public
    /// initialize the internal conversion engine
    constructor Create(aCodePage: cardinal); reintroduce; virtual;
    /// returns the engine corresponding to a given code page
    // - a global list of TSynAnsiConvert instances is handled by the unit -
    // therefore, caller should not release the returned instance
    // - will return nil in case of unhandled code page
    // - is aCodePage is 0, will return CurrentAnsiConvert value
    class function Engine(aCodePage: cardinal): TSynAnsiConvert;
    /// direct conversion of a PAnsiChar buffer into an Unicode buffer
    // - Dest^ buffer must be reserved with at least SourceChars*2 bytes
    // - this default implementation will use the Operating System APIs
    // - will append a trailing #0 to the returned PWideChar, unless
    // NoTrailingZero is set
    function AnsiBufferToUnicode(Dest: PWideChar; Source: PAnsiChar;
      SourceChars: cardinal; NoTrailingZero: boolean = false): PWideChar; overload; virtual;
    /// direct conversion of a PAnsiChar buffer into a UTF-8 encoded buffer
    // - Dest^ buffer must be reserved with at least SourceChars*3 bytes
    // - will append a trailing #0 to the returned PUtf8Char, unless
    // NoTrailingZero is set
    // - this default implementation will use the Operating System APIs
    function AnsiBufferToUtf8(Dest: PUtf8Char; Source: PAnsiChar;
      SourceChars: cardinal; NoTrailingZero: boolean = false): PUtf8Char; overload; virtual;
    {$ifndef PUREMORMOT2}
    /// convert any Ansi Text into an UTF-16 Unicode String
    // - returns a value using our RawUnicode kind of string
    function AnsiToRawUnicode(const AnsiText: RawByteString): RawUnicode; overload;
    /// convert any Ansi buffer into an Unicode String
    // - returns a value using our RawUnicode kind of string
    function AnsiToRawUnicode(
      Source: PAnsiChar; SourceChars: cardinal): RawUnicode; overload; virtual;
    {$endif PUREMORMOT2}
    /// convert any Ansi buffer into an Unicode String
    // - returns a SynUnicode, i.e. Delphi 2009+ UnicodeString or a WideString
    function AnsiToUnicodeString(
      Source: PAnsiChar; SourceChars: cardinal): SynUnicode; overload;
    /// convert any Ansi buffer into an Unicode String
    // - returns a SynUnicode, i.e. Delphi 2009+ UnicodeString or a WideString
    function AnsiToUnicodeString(const Source: RawByteString): SynUnicode; overload;
    /// convert any Ansi Text into an UTF-8 encoded String
    // - internally calls AnsiBufferToUtf8 virtual method
    function AnsiToUtf8(const AnsiText: RawByteString): RawUtf8; virtual;
    /// direct conversion of a PAnsiChar buffer into a UTF-8 encoded string
    // - will call AnsiBufferToUnicode() overloaded virtual method
    procedure AnsiBufferToRawUtf8(Source: PAnsiChar;
      SourceChars: cardinal; out Value: RawUtf8); overload; virtual;
    /// direct conversion of an Unicode buffer into a PAnsiChar buffer
    // - Dest^ buffer must be reserved with at least SourceChars * 3 bytes
    // - will detect and ignore any trailing UTF-16LE BOM marker
    // - this default implementation will rely on the Operating System for
    // all non ASCII-7 chars
    function UnicodeBufferToAnsi(Dest: PAnsiChar; Source: PWideChar;
      SourceChars: cardinal): PAnsiChar; overload; virtual;
    /// direct conversion of an Unicode buffer into an Ansi Text
    function UnicodeBufferToAnsi(Source: PWideChar;
      SourceChars: cardinal): RawByteString; overload; virtual;
    /// convert any Unicode-encoded String into Ansi Text
    // - internally calls UnicodeBufferToAnsi virtual method
    function UnicodeStringToAnsi(const Source: SynUnicode): RawByteString;
      {$ifdef HASINLINE}inline;{$endif}
    {$ifndef PUREMORMOT2}
    /// convert any Unicode-encoded String into Ansi Text
    // - internally calls UnicodeBufferToAnsi virtual method
    function RawUnicodeToAnsi(const Source: RawUnicode): RawByteString;
    {$endif PUREMORMOT2}
    /// direct conversion of an UTF-8 encoded buffer into a PAnsiChar buffer
    // - Dest^ buffer must be reserved with at least SourceChars bytes
    // - no trailing #0 is appended to the buffer
    function Utf8BufferToAnsi(Dest: PAnsiChar; Source: PUtf8Char;
      SourceChars: cardinal): PAnsiChar; overload; virtual;
    /// convert any UTF-8 encoded buffer into Ansi Text
    // - internally calls Utf8BufferToAnsi virtual method
    function Utf8BufferToAnsi(Source: PUtf8Char;
      SourceChars: cardinal): RawByteString; overload;
      {$ifdef HASINLINE}inline;{$endif}
    /// convert any UTF-8 encoded buffer into Ansi Text
    // - internally calls Utf8BufferToAnsi virtual method
    procedure Utf8BufferToAnsi(Source: PUtf8Char;
      SourceChars: cardinal; var result: RawByteString); overload; virtual;
    /// convert any UTF-8 encoded String into Ansi Text
    // - internally calls Utf8BufferToAnsi virtual method
    function Utf8ToAnsi(const u: RawUtf8): RawByteString; virtual;
    /// direct conversion of a UTF-8 encoded string into a WinAnsi <2KB buffer
    // - will truncate the destination string to DestSize bytes (including the
    // trailing #0), with a maximum handled size of 2048 bytes
    // - returns the number of bytes stored in Dest^ (i.e. the position of #0)
    function Utf8ToAnsiBuffer2K(const S: RawUtf8;
      Dest: PAnsiChar; DestSize: integer): integer;
    /// convert any Ansi Text (providing a From converted) into Ansi Text
    function AnsiToAnsi(From: TSynAnsiConvert;
      const Source: RawByteString): RawByteString; overload;
    /// convert any Ansi buffer (providing a From converted) into Ansi Text
    function AnsiToAnsi(From: TSynAnsiConvert; Source: PAnsiChar;
      SourceChars: cardinal): RawByteString; overload;
    /// corresponding code page
    property CodePage: cardinal
      read fCodePage;
    /// corresponding length binary shift used for worst conversion case
    property AnsiCharShift: byte
      read fAnsiCharShift;
  end;

  /// a class to handle Ansi to/from Unicode translation of fixed width encoding
  // (i.e. non MBCS)
  // - this class will handle efficiently all Code Page availables without MBCS
  // encoding - like WinAnsi (1252) or Russian (1251)
  // - it will use internal fast look-up tables for such encodings
  // - this class could take some time to generate, and will consume more than
  // 64 KB of memory: you should not create your own class instance by yourself,
  // but should better retrieve an instance using TSynAnsiConvert.Engine(), which
  // will initialize either a TSynAnsiFixedWidth or a TSynAnsiConvert instance
  // on need
  // - this class has some additional methods (e.g. IsValid*) which take
  // advantage of the internal lookup tables to provide some fast process
  TSynAnsiFixedWidth = class(TSynAnsiConvert)
  protected
    fAnsiToWide: TWordDynArray;
    fWideToAnsi: TByteDynArray;
  public
    /// initialize the internal conversion engine
    constructor Create(aCodePage: cardinal); override;
    /// direct conversion of a PAnsiChar buffer into an Unicode buffer
    // - Dest^ buffer must be reserved with at least SourceChars*2 bytes
    // - will append a trailing #0 to the returned PWideChar, unless
    // NoTrailingZero is set
    function AnsiBufferToUnicode(Dest: PWideChar; Source: PAnsiChar;
      SourceChars: cardinal; NoTrailingZero: boolean = false): PWideChar; override;
    /// direct conversion of a PAnsiChar buffer into a UTF-8 encoded buffer
    // - Dest^ buffer must be reserved with at least SourceChars*3 bytes
    // - will append a trailing #0 to the returned PUtf8Char, unless
    // NoTrailingZero is set
    function AnsiBufferToUtf8(Dest: PUtf8Char; Source: PAnsiChar;
      SourceChars: cardinal; NoTrailingZero: boolean = false): PUtf8Char; override;
    {$ifndef PUREMORMOT2}
    /// convert any Ansi buffer into an Unicode String
    // - returns a value using our RawUnicode kind of string
    function AnsiToRawUnicode(Source: PAnsiChar;
      SourceChars: cardinal): RawUnicode; override;
    {$endif PUREMORMOT2}
    /// direct conversion of an Unicode buffer into a PAnsiChar buffer
    // - Dest^ buffer must be reserved with at least SourceChars * 3 bytes
    // - will detect and ignore any trailing UTF-16LE BOM marker
    // - this overridden version will use internal lookup tables for fast process
    function UnicodeBufferToAnsi(Dest: PAnsiChar;
      Source: PWideChar; SourceChars: cardinal): PAnsiChar; override;
    /// direct conversion of an UTF-8 encoded buffer into a PAnsiChar buffer
    // - Dest^ buffer must be reserved with at least SourceChars bytes
    // - no trailing #0 is appended to the buffer
    // - non Ansi compatible characters are replaced as '?'
    function Utf8BufferToAnsi(Dest: PAnsiChar; Source: PUtf8Char;
      SourceChars: cardinal): PAnsiChar; override;
    /// conversion of a wide char into the corresponding Ansi character
    // - return -1 for an unknown WideChar in the current code page
    function WideCharToAnsiChar(wc: cardinal): integer;
    /// return TRUE if the supplied unicode buffer only contains characters of
    // the corresponding Ansi code page
    // - i.e. if the text can be displayed using this code page
    function IsValidAnsi(WideText: PWideChar; Length: PtrInt): boolean; overload;
    /// return TRUE if the supplied unicode buffer only contains characters of
    // the corresponding Ansi code page
    // - i.e. if the text can be displayed using this code page
    function IsValidAnsi(WideText: PWideChar): boolean; overload;
    /// return TRUE if the supplied UTF-8 buffer only contains characters of
    // the corresponding Ansi code page
    // - i.e. if the text can be displayed using this code page
    function IsValidAnsiU(Utf8Text: PUtf8Char): boolean;
    /// return TRUE if the supplied UTF-8 buffer only contains 8-bit characters
    // of the corresponding Ansi code page
    // - i.e. if the text can be displayed with only 8-bit unicode characters
    // (e.g. no "tm" or such) within this code page
    function IsValidAnsiU8Bit(Utf8Text: PUtf8Char): boolean;
    /// direct access to the Ansi-To-Unicode lookup table
    // - use this array like AnsiToWide: array[byte] of word
    property AnsiToWide: TWordDynArray
      read fAnsiToWide;
    /// direct access to the Unicode-To-Ansi lookup table
    // - use this array like WideToAnsi: array[word] of byte
    // - any unhandled WideChar will return ord('?')
    property WideToAnsi: TByteDynArray
      read fWideToAnsi;
  end;

  /// a class to handle UTF-8 to/from Unicode translation
  // - match the TSynAnsiConvert signature, for code page CP_UTF8
  // - this class is mostly a non-operation for conversion to/from UTF-8
  TSynAnsiUtf8 = class(TSynAnsiConvert)
  public
    /// initialize the internal conversion engine
    constructor Create(aCodePage: cardinal); override;
    /// direct conversion of a PAnsiChar UTF-8 buffer into an Unicode buffer
    // - Dest^ buffer must be reserved with at least SourceChars*2 bytes
    // - will append a trailing #0 to the returned PWideChar, unless
    // NoTrailingZero is set
    function AnsiBufferToUnicode(Dest: PWideChar; Source: PAnsiChar;
      SourceChars: cardinal; NoTrailingZero: boolean = false): PWideChar; override;
    /// direct conversion of a PAnsiChar UTF-8 buffer into a UTF-8 encoded buffer
    // - Dest^ buffer must be reserved with at least SourceChars*3 bytes
    // - will append a trailing #0 to the returned PUtf8Char, unless
    // NoTrailingZero is set
    function AnsiBufferToUtf8(Dest: PUtf8Char; Source: PAnsiChar;
      SourceChars: cardinal; NoTrailingZero: boolean = false): PUtf8Char; override;
    {$ifndef PUREMORMOT2}
    /// convert any UTF-8 Ansi buffer into an Unicode String
    // - returns a value using our RawUnicode kind of string
    function AnsiToRawUnicode(Source: PAnsiChar;
      SourceChars: cardinal): RawUnicode; override;
    {$endif PUREMORMOT2}
    /// direct conversion of an Unicode buffer into a PAnsiChar UTF-8 buffer
    // - will detect and ignore any trailing UTF-16LE BOM marker
    // - Dest^ buffer must be reserved with at least SourceChars * 3 bytes
    function UnicodeBufferToAnsi(Dest: PAnsiChar; Source: PWideChar;
      SourceChars: cardinal): PAnsiChar; override;
    /// direct conversion of an Unicode buffer into an Ansi Text
    function UnicodeBufferToAnsi(Source: PWideChar;
      SourceChars: cardinal): RawByteString; override;
    /// direct conversion of an UTF-8 encoded buffer into a PAnsiChar UTF-8 buffer
    // - Dest^ buffer must be reserved with at least SourceChars bytes
    // - no trailing #0 is appended to the buffer
    function Utf8BufferToAnsi(Dest: PAnsiChar; Source: PUtf8Char;
      SourceChars: cardinal): PAnsiChar; override;
    /// convert any UTF-8 encoded buffer into Ansi Text
    procedure Utf8BufferToAnsi(Source: PUtf8Char; SourceChars: cardinal;
      var result: RawByteString); override;
    /// convert any UTF-8 encoded String into Ansi Text
    // - directly assign the input as result, since no conversion is needed
    function Utf8ToAnsi(const u: RawUtf8): RawByteString; override;
    /// convert any Ansi Text into an UTF-8 encoded String
    // - directly assign the input as result, since no conversion is needed
    function AnsiToUtf8(const AnsiText: RawByteString): RawUtf8; override;
    /// direct conversion of a PAnsiChar buffer into a UTF-8 encoded string
    procedure AnsiBufferToRawUtf8(Source: PAnsiChar;
      SourceChars: cardinal; out Value: RawUtf8); override;
  end;

  /// a class to handle UTF-16 to/from Unicode translation
  // - match the TSynAnsiConvert signature, for code page CP_UTF16
  // - even if UTF-16 is not an Ansi format, code page CP_UTF16 may have been
  // used to store UTF-16 encoded binary content
  // - this class is mostly a non-operation for conversion to/from Unicode
  TSynAnsiUtf16 = class(TSynAnsiConvert)
  public
    /// initialize the internal conversion engine
    constructor Create(aCodePage: cardinal); override;
    /// direct conversion of a PAnsiChar UTF-16 buffer into an Unicode buffer
    // - Dest^ buffer must be reserved with at least SourceChars*2 bytes
    // - will append a trailing #0 to the returned PWideChar, unless
    // NoTrailingZero is set
    function AnsiBufferToUnicode(Dest: PWideChar; Source: PAnsiChar;
      SourceChars: cardinal; NoTrailingZero: boolean = false): PWideChar; override;
    /// direct conversion of a PAnsiChar UTF-16 buffer into a UTF-8 encoded buffer
    // - Dest^ buffer must be reserved with at least SourceChars*3 bytes
    // - will append a trailing #0 to the returned PUtf8Char, unless
    // NoTrailingZero is set
    function AnsiBufferToUtf8(Dest: PUtf8Char; Source: PAnsiChar;
      SourceChars: cardinal; NoTrailingZero: boolean = false): PUtf8Char; override;
    {$ifndef PUREMORMOT2}
    /// convert any UTF-16 Ansi buffer into an Unicode String
    // - returns a value using our RawUnicode kind of string
    function AnsiToRawUnicode(Source: PAnsiChar;
      SourceChars: cardinal): RawUnicode; override;
    {$endif PUREMORMOT2}
    /// direct conversion of an Unicode buffer into a PAnsiChar UTF-16 buffer
    // - Dest^ buffer must be reserved with at least SourceChars * 3 bytes
    function UnicodeBufferToAnsi(Dest: PAnsiChar; Source: PWideChar;
      SourceChars: cardinal): PAnsiChar; override;
    /// direct conversion of an UTF-8 encoded buffer into a PAnsiChar UTF-16 buffer
    // - Dest^ buffer must be reserved with at least SourceChars bytes
    // - no trailing #0 is appended to the buffer
    function Utf8BufferToAnsi(Dest: PAnsiChar; Source: PUtf8Char;
      SourceChars: cardinal): PAnsiChar; override;
  end;


var
  /// global TSynAnsiConvert instance to handle WinAnsi encoding (code page 1252)
  // - this instance is global and instantied during the whole program life time
  // - it will be created from hard-coded values, and not using the system API,
  // since it appeared that some systems (e.g. in Russia) did tweak the registry
  // so that 1252 code page maps 1251 code page
  WinAnsiConvert: TSynAnsiFixedWidth;

  /// global TSynAnsiConvert instance to handle current system encoding
  // - this is the encoding as used by the AnsiString type, so will be used
  // before Delphi 2009 to speed-up RTL string handling (especially for UTF-8)
  // - this instance is global and instantied during the whole program life time
  CurrentAnsiConvert: TSynAnsiConvert;

  /// global TSynAnsiConvert instance to handle UTF-8 encoding (code page CP_UTF8)
  // - this instance is global and instantied during the whole program life time
  Utf8AnsiConvert: TSynAnsiUtf8;

  /// global TSynAnsiConvert instance with no encoding (RawByteString/RawBlob)
  RawByteStringConvert: TSynAnsiFixedWidth;

/// check if a code page is known to be of fixed width, i.e. not MBCS
// - i.e. will be implemented as a TSynAnsiFixedWidth
function IsFixedWidthCodePage(aCodePage: cardinal): boolean;

/// return a code page number into human-friendly text
function CodePageToText(aCodePage: cardinal): TShort16;


{ *************** Text File Loading with BOM/Unicode Support }

type
  /// text file layout, as returned by BomFile() and StringFromBomFile()
  // - bomNone means there was no BOM recognized
  // - bomUnicode stands for UTF-16 LE encoding (as on Windows products)
  // - bomUtf8 stands for a UTF-8 BOM (as on Windows products)
  TBomFile = (
    bomNone,
    bomUnicode,
    bomUtf8);

const
  /// UTF-16LE BOM WideChar marker, as existing e.g. in some UTF-16 Windows files
  BOM_UTF16LE = #$FEFF;

/// check the file BOM at the beginning of a file buffer
// - BOM is common only with Microsoft products
// - returns bomNone if no BOM was recognized
// - returns bomUnicode or bomUtf8 if UTF-16LE or UTF-8 BOM were recognized:
// and will adjust Buffer/BufferSize to ignore the leading 2 or 3 bytes
function BomFile(var Buffer: pointer; var BufferSize: PtrInt): TBomFile;

/// read a file into a temporary variable, check the BOM, and adjust the buffer
function StringFromBomFile(const FileName: TFileName; out FileContent: RawByteString;
  out Buffer: pointer; out BufferSize: PtrInt): TBomFile;

/// read a File content into a RawUtf8, detecting any leading BOM
// - will assume text file with no BOM is already UTF-8 encoded
// - an alternative to StringFromFile() if you want to handle UTF-8 content
// and the files are likely to be natively UTF-8 encoded, or with a BOM
function RawUtf8FromFile(const FileName: TFileName): RawUtf8;
  {$ifdef HASINLINE} inline; {$endif}

/// read a File content into a RawUtf8, detecting any leading BOM
// - assume file with no BOM is encoded with the current Ansi code page, not
// UTF-8, unless AssumeUtf8IfNoBom is true and it behaves like RawUtf8FromFile()
function AnyTextFileToRawUtf8(const FileName: TFileName;
  AssumeUtf8IfNoBom: boolean = false): RawUtf8;

/// read a File content into a RTL string, detecting any leading BOM
// - assume file with no BOM is encoded with the current Ansi code page, not UTF-8
// - if ForceUtf8 is true, won't detect the BOM but assume whole file is UTF-8
function AnyTextFileToString(const FileName: TFileName;
  ForceUtf8: boolean = false): string;

/// read a File content into SynUnicode string, detecting any leading BOM
// - assume file with no BOM is encoded with the current Ansi code page, not UTF-8
// - if ForceUtf8 is true, won't detect the BOM but assume whole file is UTF-8
function AnyTextFileToSynUnicode(const FileName: TFileName;
  ForceUtf8: boolean = false): SynUnicode;


{ *************** Low-Level String Conversion Functions }

/// will fast replace all #0 chars as ~
// - could be used after UniqueRawUtf8() on a in-placed modified JSON buffer,
// in which all values have been ended with #0
// - you can optionally specify a maximum size, in bytes (this won't reallocate
// the string, but just add a #0 at some point in the UTF-8 buffer)
// - could allow logging of parsed input e.g. after an exception
procedure UniqueRawUtf8ZeroToTilde(var u: RawUtf8; MaxSize: PtrInt = maxInt);

/// convert a binary buffer into a fake ASCII/UTF-8 content without any #0 input
// - will use ~ char to escape any #0 as ~0 pair (and plain ~ as ~~ pair)
// - output is just a bunch of non 0 bytes, so not trully valid UTF-8 content
// - may be used as an alternative to Base64 encoding if 8-bit chars are allowed
// - call ZeroedRawUtf8() as reverse function
function UnZeroed(const bin: RawByteString): RawUtf8;

/// convert a fake UTF-8 buffer without any #0 input back into its original binary
// - may be used as an alternative to Base64 decoding if 8-bit chars are allowed
// - call UnZeroedRawUtf8() as reverse function
function Zeroed(const u: RawUtf8): RawByteString;

/// conversion of a wide char into a WinAnsi (CodePage 1252) char
// - return '?' for an unknown WideChar in code page 1252
function WideCharToWinAnsiChar(wc: cardinal): AnsiChar;
  {$ifdef HASINLINE}inline;{$endif}

/// conversion of a wide char into a WinAnsi (CodePage 1252) char index
// - return -1 for an unknown WideChar in code page 1252
function WideCharToWinAnsi(wc: cardinal): integer;
  {$ifdef HASINLINE}inline;{$endif}

/// return TRUE if the supplied unicode buffer only contains WinAnsi characters
// - i.e. if the text can be displayed using ANSI_CHARSET
function IsWinAnsi(WideText: PWideChar): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// return TRUE if the supplied unicode buffer only contains WinAnsi characters
// - i.e. if the text can be displayed using ANSI_CHARSET
function IsWinAnsi(WideText: PWideChar; Length: integer): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// return TRUE if the supplied UTF-8 buffer only contains WinAnsi characters
// - i.e. if the text can be displayed using ANSI_CHARSET
function IsWinAnsiU(Utf8Text: PUtf8Char): boolean;
  {$ifdef HASINLINE}inline;{$endif}

/// return TRUE if the supplied UTF-8 buffer only contains WinAnsi 8-bit characters
// - i.e. if the text can be displayed using ANSI_CHARSET with only 8-bit unicode
// characters (e.g. no "tm" or such)
function IsWinAnsiU8Bit(Utf8Text: PUtf8Char): boolean;
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of an AnsiString with an unknown code page into an
// UTF-8 encoded String
// - will assume CurrentAnsiConvert.CodePage prior to Delphi 2009
// - newer UNICODE versions of Delphi will retrieve the code page from string
procedure AnyAnsiToUtf8(const s: RawByteString; var result: RawUtf8); overload;

/// direct conversion of an AnsiString with an unknown code page into an
// UTF-8 encoded String
// - will assume CurrentAnsiConvert.CodePage prior to Delphi 2009
// - newer UNICODE versions of Delphi will retrieve the code page from string
// - use AnsiToUtf8() if you want to specify the codepage
function AnyAnsiToUtf8(const s: RawByteString): RawUtf8; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert an AnsiString (of a given code page) into a UTF-8 string
// - use AnyAnsiToUtf8() if you want to use the codepage of the input string
// - wrapper around TSynAnsiConvert.Engine(CodePage).AnsiToUtf8()
function AnsiToUtf8(const Ansi: RawByteString; CodePage: integer): RawUtf8;
  {$ifdef HASINLINE}inline;{$endif}

/// convert an AnsiChar buffer (of a given code page) into a UTF-8 string
// - the destination code page should be supplied
// - wrapper around TSynAnsiConvert.Engine(CodePage).AnsiBufferToRawUtf8()
procedure AnsiCharToUtf8(P: PAnsiChar; L: integer; var result: RawUtf8;
  CodePage: integer);
  {$ifdef HASINLINE}inline;{$endif}

/// convert an AnsiString (of a given code page) into a RTL string
// - the destination code page should be supplied
// - wrapper around TSynAnsiConvert.Engine(CodePage) and string conversion
function AnsiToString(const Ansi: RawByteString; CodePage: integer): string;
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of a WinAnsi (CodePage 1252) string into a UTF-8 encoded String
// - faster than SysUtils: don't use Utf8Encode(WideString) -> no Windows.Global(),
// and use a fixed pre-calculated array for individual chars conversion
function WinAnsiToUtf8(const S: WinAnsiString): RawUtf8; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of a WinAnsi (CodePage 1252) string into a UTF-8 encoded String
// - faster than SysUtils: don't use Utf8Encode(WideString) -> no Windows.Global(),
// and use a fixed pre-calculated array for individual chars conversion
function WinAnsiToUtf8(WinAnsi: PAnsiChar; WinAnsiLen: PtrInt): RawUtf8; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of a WinAnsi PAnsiChar buffer into a UTF-8 encoded buffer
// - Dest^ buffer must be reserved with at least SourceChars*3
// - call internally WinAnsiConvert fast conversion class
function WinAnsiBufferToUtf8(Dest: PUtf8Char;
  Source: PAnsiChar; SourceChars: cardinal): PUtf8Char;
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of a WinAnsi ShortString into a UTF-8 text
// - call internally WinAnsiConvert fast conversion class
function ShortStringToUtf8(const source: ShortString): RawUtf8;
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of a WinAnsi (CodePage 1252) string into a Unicode buffer
// - very fast, by using a fixed pre-calculated array for individual chars conversion
// - text will be truncated if necessary to avoid buffer overflow in Dest[]
procedure WinAnsiToUnicodeBuffer(const S: WinAnsiString;
  Dest: PWordArray; DestLen: PtrInt);
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of a UTF-8 encoded string into a WinAnsi String
function Utf8ToWinAnsi(const S: RawUtf8): WinAnsiString; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of a UTF-8 encoded zero terminated buffer into a WinAnsi String
function Utf8ToWinAnsi(P: PUtf8Char): WinAnsiString; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of a UTF-8 encoded zero terminated buffer into a RawUtf8 String
procedure Utf8ToRawUtf8(P: PUtf8Char; var result: RawUtf8);
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of a UTF-8 encoded buffer into a WinAnsi PAnsiChar buffer
function Utf8ToWinPChar(dest: PAnsiChar; source: PUtf8Char; count: integer): integer;
  {$ifdef HASINLINE}inline;{$endif}

{$ifndef PUREMORMOT2}
/// direct conversion of a WinAnsi (CodePage 1252) string into a Unicode encoded String
// - very fast, by using a fixed pre-calculated array for individual chars conversion
function WinAnsiToRawUnicode(const S: WinAnsiString): RawUnicode;

/// convert a UTF-16 string into a WinAnsi (code page 1252) string
function RawUnicodeToWinAnsi(const Unicode: RawUnicode): WinAnsiString; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert a UTF-8 encoded buffer into a RawUnicode string
// - if L is 0, L is computed from zero terminated P buffer
// - RawUnicode is ended by a WideChar(#0)
// - faster than System.Utf8Decode() which uses slow widestrings
function Utf8DecodeToRawUnicode(P: PUtf8Char; L: integer): RawUnicode; overload;

/// convert a UTF-8 string into a RawUnicode string
function Utf8DecodeToRawUnicode(const S: RawUtf8): RawUnicode; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert a UTF-8 string into a RawUnicode string
// - this version doesn't resize the length of the result RawUnicode
// and is therefore useful before a Win32 Unicode API call (with nCount=-1)
// - if DestLen is not nil, the resulting length (in bytes) will be stored within
// - see also Utf8DecodeToUnicode() which uses a TSynTempBuffer for storage
function Utf8DecodeToRawUnicodeUI(const S: RawUtf8;
  DestLen: PInteger = nil): RawUnicode; overload;

/// convert a UTF-8 string into a RawUnicode string
// - returns the resulting length (in bytes) will be stored within Dest
// - see also Utf8DecodeToUnicode() which uses a TSynTempBuffer for storage
function Utf8DecodeToRawUnicodeUI(const S: RawUtf8;
  var Dest: RawUnicode): integer; overload;

/// convert a RawUnicode string into a UTF-8 string
function RawUnicodeToUtf8(const Unicode: RawUnicode): RawUtf8; overload;

/// convert any RawUnicode String into a generic SynUnicode Text
function RawUnicodeToSynUnicode(const Unicode: RawUnicode): SynUnicode; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any RTL string into a RawUnicode encoded String
// - it's prefered to use TLanguageFile.StringToUtf8() method in mORMoti18n,
// which will handle full i18n of your application
// - it will work as is with Delphi 2009+ (direct unicode conversion)
// - under older version of Delphi (no unicode), it will use the
// current RTL codepage, as with WideString conversion (but without slow
// WideString usage)
function StringToRawUnicode(const S: string): RawUnicode; overload;

/// convert any RTL string into a RawUnicode encoded String
// - it's prefered to use TLanguageFile.StringToUtf8() method in mORMoti18n,
// which will handle full i18n of your application
// - it will work as is with Delphi 2009+ (direct unicode conversion)
// - under older version of Delphi (no unicode), it will use the
// current RTL codepage, as with WideString conversion (but without slow
// WideString usage)
function StringToRawUnicode(P: PChar; L: integer): RawUnicode; overload;

/// convert any RawUnicode encoded string into a RTL string
// - uses StrLenW() and not length(U) to handle case when was used as buffer
function RawUnicodeToString(const U: RawUnicode): string; overload;
{$endif PUREMORMOT2}

/// convert a SynUnicode string into a UTF-8 string
function SynUnicodeToUtf8(const Unicode: SynUnicode): RawUtf8;

/// convert a WideString into a UTF-8 string
function WideStringToUtf8(const aText: WideString): RawUtf8;
  {$ifdef HASINLINE}inline;{$endif}

/// direct conversion of a UTF-16 encoded buffer into a WinAnsi PAnsiChar buffer
procedure RawUnicodeToWinPChar(dest: PAnsiChar;
  source: PWideChar; WideCharCount: integer);
  {$ifdef HASINLINE}inline;{$endif}

/// convert a UTF-16 PWideChar buffer into a WinAnsi (code page 1252) string
function RawUnicodeToWinAnsi(
  WideChar: PWideChar; WideCharCount: integer): WinAnsiString; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert a WideString into a WinAnsi (code page 1252) string
function WideStringToWinAnsi(const Wide: WideString): WinAnsiString;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any UTF-16 buffer into a generic SynUnicode Text
function RawUnicodeToSynUnicode(
  WideChar: PWideChar; WideCharCount: integer): SynUnicode; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert an Unicode buffer into a WinAnsi (code page 1252) string
procedure UnicodeBufferToWinAnsi(source: PWideChar; out Dest: WinAnsiString);

/// convert an Unicode buffer into a RTL string
function UnicodeBufferToString(source: PWideChar): string;

/// convert an Unicode buffer into a UTF-8 string
function UnicodeBufferToUtf8(source: PWideChar): RawUtf8;
  {$ifdef HASINLINE} inline; {$endif}

/// convert an Unicode buffer into a variant storing a UTF-8 string
// - could be used e.g. as TDocVariantData.AddValue() parameter
function UnicodeBufferToVariant(source: PWideChar): variant;

/// convert any RTL string into a variant storing a UTF-8 string
// - could be used e.g. as TDocVariantData.AddValue() parameter
function StringToVariant(const Txt: string): variant; overload;

/// convert any RTL string into a variant storing a UTF-8 string
// - could be used e.g. as TDocVariantData.AddValue() parameter
procedure StringToVariant(const Txt: string; var result: variant); overload;

{$ifdef HASVARUSTRING}

/// convert a Delphi 2009+ or FPC Unicode string into our UTF-8 string
function UnicodeStringToUtf8(const S: UnicodeString): RawUtf8; inline;

// this function is the same as direct RawUtf8=AnsiString(CP_UTF8) assignment
// but is faster, since it uses no Win32 API call
function Utf8DecodeToUnicodeString(const S: RawUtf8): UnicodeString; overload; inline;

/// convert an UTF-8 encoded buffer into a Delphi 2009+ or FPC Unicode string
// - this function is the same as direct assignment, since RawUtf8=AnsiString(CP_UTF8),
// but is faster, since use no Win32 API call
procedure Utf8DecodeToUnicodeString(P: PUtf8Char; L: integer;
  var result: UnicodeString); overload;

/// convert a Delphi 2009+ or FPC Unicode string into a WinAnsi (code page 1252) string
function UnicodeStringToWinAnsi(const S: UnicodeString): WinAnsiString; inline;

/// convert our UTF-8 encoded buffer into a Delphi 2009+ or FPC Unicode string
// - this function is the same as direct assignment, since RawUtf8=AnsiString(CP_UTF8),
// but is faster, since use no Win32 API call
function Utf8DecodeToUnicodeString(P: PUtf8Char; L: integer): UnicodeString; overload; inline;

/// convert a Win-Ansi encoded buffer into a Delphi 2009+ or FPC Unicode string
// - this function is faster than default RTL, since use no Win32 API call
function WinAnsiToUnicodeString(WinAnsi: PAnsiChar; WinAnsiLen: PtrInt): UnicodeString; overload;

/// convert a Win-Ansi string into a Delphi 2009+ or FPC Unicode string
// - this function is faster than default RTL, since use no Win32 API call
function WinAnsiToUnicodeString(const WinAnsi: WinAnsiString): UnicodeString; inline; overload;

{$endif HASVARUSTRING}

/// convert an UTF-8 encoded buffer into a UTF-16 encoded RawByteString buffer
// - could be used instead of deprecated RawUnicode when a temp UTF-16 buffer is needed
function Utf8DecodeToUnicodeRawByteString(P: PUtf8Char; L: integer): RawByteString; overload;

/// convert an UTF-8 encoded buffer into a UTF-16 encoded RawByteString buffer
// - could be used instead of deprecated RawUnicode when a temp UTF-16 buffer is needed
function Utf8DecodeToUnicodeRawByteString(const U: RawUtf8): RawByteString; overload;

/// convert an UTF-8 encoded buffer into a UTF-16 encoded stream of bytes
function Utf8DecodeToUnicodeStream(P: PUtf8Char; L: integer): TStream;

/// convert a Win-Ansi encoded buffer into a Delphi 2009+ or FPC Unicode string
// - this function is faster than default RTL, since use no Win32 API call
function WinAnsiToSynUnicode(WinAnsi: PAnsiChar; WinAnsiLen: PtrInt): SynUnicode; overload;

/// convert a Win-Ansi string into a Delphi 2009+ or FPC Unicode string
// - this function is faster than default RTL, since use no Win32 API call
function WinAnsiToSynUnicode(const WinAnsi: WinAnsiString): SynUnicode;
  {$ifdef HASINLINE}inline;{$endif} overload;

/// convert any RTL string into an UTF-8 encoded String
// - in the VCL context, it's prefered to use TLanguageFile.StringToUtf8()
//  method from mORMoti18n, which will handle full i18n of your application
// - it will work as is with Delphi 2009+ (direct unicode conversion)
// - under older version of Delphi (no unicode), it will use the
// current RTL codepage, as with WideString conversion (but without slow
// WideString usage)
function StringToUtf8(const Text: string): RawUtf8; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any RTL string buffer into an UTF-8 encoded String
// - it will work as is with Delphi 2009+ (direct unicode conversion)
// - under older version of Delphi (no unicode), it will use the
// current RTL codepage, as with WideString conversion (but without slow
// WideString usage)
procedure StringToUtf8(Text: PChar; TextLen: PtrInt; var result: RawUtf8); overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any RTL string into an UTF-8 encoded String
// - this overloaded function use a faster by-reference parameter for the result
procedure StringToUtf8(const Text: string; var result: RawUtf8); overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any RTL string into an UTF-8 encoded String
function ToUtf8(const Text: string): RawUtf8; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any RTL string into an UTF-8 encoded TSynTempBuffer
// - returns the number of UTF-8 bytes available in Temp.buf
// - this overloaded function use a TSynTempBuffer for the result to avoid any
// memory allocation for the shorter content
// - caller should call Temp.Done to release any heap-allocated memory
function StringToUtf8(const Text: string; var Temp: TSynTempBuffer): integer; overload;

/// convert any Ansi memory buffer into UTF-8, using a TSynTempBuffer if needed
// - caller should release any memory by calling Temp.Done
// - returns a pointer to the UTF-8 converted buffer - which may be buf
function AnsiBufferToTempUtf8(var Temp: TSynTempBuffer;
  Buf: PAnsiChar; BufLen, CodePage: cardinal): PUtf8Char;

/// convert any UTF-8 encoded ShortString Text into an UTF-8 encoded String
// - expects the supplied content to be already ASCII-7 or UTF-8 encoded, e.g.
// a RTTI type or property name: it won't work with Ansi-encoded strings
function ToUtf8(const Ansi7Text: ShortString): RawUtf8; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any RTL string buffer into an UTF-8 encoded buffer
// - Dest must be able to receive at least SourceChars*3 bytes
// - it will work as is with Delphi 2009+ (direct unicode conversion)
// - under older version of Delphi (no unicode), it will use the
// current RTL codepage, as with WideString conversion (but without slow
// WideString usage)
function StringBufferToUtf8(Dest: PUtf8Char;
  Source: PChar; SourceChars: PtrInt): PUtf8Char; overload;

/// convert any RTL string 0-terminated Text buffer into an UTF-8 string
// - it will work as is with Delphi 2009+ (direct unicode conversion)
// - under older version of Delphi (no unicode), it will use the
// current RTL codepage, as with WideString conversion (but without slow
// WideString usage)
procedure StringBufferToUtf8(Source: PChar; out result: RawUtf8); overload;

/// convert any RTL string into a SynUnicode encoded String
// - it's prefered to use TLanguageFile.StringToUtf8() method in mORMoti18n,
// which will handle full i18n of your application
// - it will work as is with Delphi 2009+ (direct unicode conversion)
// - under older version of Delphi (no unicode), it will use the
// current RTL codepage, as with WideString conversion (but without slow
// WideString usage)
function StringToSynUnicode(const S: string): SynUnicode; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any RTL string into a SynUnicode encoded String
// - overloaded to avoid a copy to a temporary result string of a function
procedure StringToSynUnicode(const S: string; var result: SynUnicode); overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any UTF-16 encoded buffer into a RTL string
function RawUnicodeToString(P: PWideChar; L: integer): string; overload;

/// convert any UTF-16 encoded buffer into a RTL string
procedure RawUnicodeToString(P: PWideChar; L: integer; var result: string); overload;

/// convert any SynUnicode encoded string into a RTL string
function SynUnicodeToString(const U: SynUnicode): string;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any UTF-8 encoded String into a RTL string
// - it's prefered to use TLanguageFile.Utf8ToString() in mORMoti18n,
// which will handle full i18n of your application
// - it will work as is with Delphi 2009+ (direct unicode conversion)
// - under older version of Delphi (no unicode), it will use the
// current RTL codepage, as with WideString conversion (but without slow
// WideString usage)
function Utf8ToString(const Text: RawUtf8): string;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any UTF-8 encoded String into a RTL string
procedure Utf8ToStringVar(const Text: RawUtf8; var result: string);
  {$ifdef HASINLINE}inline;{$endif}

/// convert any UTF-8 encoded String into a generic RTL file name string
procedure Utf8ToFileName(const Text: RawUtf8; var result: TFileName);
  {$ifdef HASINLINE}inline;{$endif}

/// convert any UTF-8 encoded buffer into a RTL string
// - it's prefered to use TLanguageFile.Utf8ToString() in mORMoti18n,
// which will handle full i18n of your application
// - it will work as is with Delphi 2009+ (direct unicode conversion)
// - under older version of Delphi (no unicode), it will use the
// current RTL codepage, as with WideString conversion (but without slow
// WideString usage)
function Utf8DecodeToString(P: PUtf8Char; L: integer): string; overload;
  {$ifdef UNICODE}inline;{$endif}

/// convert any UTF-8 encoded buffer into a RTL string
procedure Utf8DecodeToString(P: PUtf8Char; L: integer; var result: string); overload;

/// convert any UTF-8 encoded String into a generic WideString Text
function Utf8ToWideString(const Text: RawUtf8): WideString; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any UTF-8 encoded String into a generic WideString Text
procedure Utf8ToWideString(const Text: RawUtf8; var result: WideString); overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any UTF-8 encoded String into a generic WideString Text
procedure Utf8ToWideString(Text: PUtf8Char; Len: PtrInt; var result: WideString); overload;

/// convert any UTF-8 encoded String into a generic SynUnicode Text
function Utf8ToSynUnicode(const Text: RawUtf8): SynUnicode; overload;

/// convert any UTF-8 encoded String into a generic SynUnicode Text
procedure Utf8ToSynUnicode(const Text: RawUtf8; var result: SynUnicode); overload;

/// convert any UTF-8 encoded buffer into a generic SynUnicode Text
procedure Utf8ToSynUnicode(Text: PUtf8Char; Len: PtrInt; var result: SynUnicode); overload;

/// convert any UTF-8 encoded string into an UTF-16 temporary buffer
// - returns the number of WideChar stored in temp (not bytes)
// - caller should make temp.Done after temp.buf has been used
function Utf8DecodeToUnicode(const Text: RawUtf8; var temp: TSynTempBuffer): PtrInt; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any UTF-8 encoded buffer into an UTF-16 temporary buffer
function Utf8DecodeToUnicode(Text: PUtf8Char; Len: PtrInt; var temp: TSynTempBuffer): PtrInt; overload;

/// convert any Ansi 7-bit encoded String into a RTL string
// - the Text content must contain only 7-bit pure ASCII characters
function Ansi7ToString(const Text: RawByteString): string; overload;
  {$ifndef UNICODE}{$ifdef HASINLINE}inline;{$endif}{$endif}

/// convert any Ansi 7-bit encoded String into a RTL string
// - the Text content must contain only 7-bit pure ASCII characters
function Ansi7ToString(Text: PWinAnsiChar; Len: PtrInt): string; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// convert any Ansi 7-bit encoded String into a RTL string
// - the Text content must contain only 7-bit pure ASCII characters
procedure Ansi7ToString(Text: PWinAnsiChar; Len: PtrInt; var result: string); overload;

/// convert any RTL string into Ansi 7-bit encoded String
// - the Text content must contain only 7-bit pure ASCII characters
function StringToAnsi7(const Text: string): RawByteString;
  {$ifndef UNICODE}{$ifdef HASINLINE}inline;{$endif}{$endif}

/// convert any RTL string into WinAnsi (Win-1252) 8-bit encoded String
function StringToWinAnsi(const Text: string): WinAnsiString;
  {$ifdef UNICODE}inline;{$endif}



{ **************** Text Case-(in)sensitive Conversion and Comparison }

type
  /// lookup table used for fast case conversion
  TNormTable = packed array[AnsiChar] of AnsiChar;
  /// pointer to a lookup table used for fast case conversion
  PNormTable = ^TNormTable;

  /// lookup table used for fast case conversion
  TNormTableByte = packed array[byte] of byte;
  /// pointer to a lookup table used for fast case conversion
  PNormTableByte = ^TNormTableByte;

var
  /// lookup table used for fast case conversion to uppercase
  // - handle 8-bit upper chars as in WinAnsi / code page 1252 (e.g. accents)
  // - is defined globally, since may be used from an inlined function
  NormToUpper: TNormTable;
  NormToUpperByte: TNormTableByte absolute NormToUpper;

  /// lookup table used for fast case conversion to lowercase
  // - handle 8-bit upper chars as in WinAnsi / code page 1252 (e.g. accents)
  // - is defined globally, since may be used from an inlined function
  NormToLower: TNormTable;
  NormToLowerByte: TNormTableByte absolute NormToLower;

  /// this table will convert 'a'..'z' into 'A'..'Z'
  // - so it will work with UTF-8 without decoding, whereas NormToUpper[] expects
  // WinAnsi encoding
  NormToUpperAnsi7: TNormTable;
  NormToUpperAnsi7Byte: TNormTableByte absolute NormToUpperAnsi7;

  /// this table will convert 'A'..'Z' into 'a'..'z'
  // - so it will work with UTF-8 without decoding, whereas NormToUpper[] expects
  // WinAnsi encoding
  NormToLowerAnsi7: TNormTable;
  NormToLowerAnsi7Byte: TNormTableByte absolute NormToLowerAnsi7;

  /// case sensitive NormToUpper[]/NormToLower[]-like table
  // - i.e. NormToNorm[c] = c
  NormToNorm: TNormTable;
  NormToNormByte: TNormTableByte absolute NormToNorm;

const
  NORM2CASE: array[boolean] of PNormTable = (nil, @NormToUpperAnsi7);

type
  /// character categories for text linefeed/word/identifier/uri parsing
  // - using such a set compiles into TEST [MEM], IMM so is more efficient
  // than a regular set of AnsiChar which generates much slower BT [MEM], IMM
  // - the same 256-byte memory will also be reused from L1 CPU cache
  // during the parsing of complex input
  TTextChar = set of (
    tcNot01013,
    tc1013,
    tcCtrlNotLF,
    tcCtrlNot0Comma,
    tcWord,
    tcIdentifierFirstChar,
    tcIdentifier,
    tcUriUnreserved);

  /// defines an AnsiChar lookup table used for branch-less text parsing
  TTextCharSet = array[AnsiChar] of TTextChar;
  /// points to an AnsiChar lookup table used for branch-less text parsing
  PTextCharSet = ^TTextCharSet;

  /// defines an Ordinal lookup table used for branch-less text parsing
  TTextByteSet = array[byte] of TTextChar;
  /// points to an Ordinal lookup table used for branch-less text parsing
  PTextByteSet = ^TTextByteSet;

var
  /// lookup table for text linefeed/word/identifier/uri branch-less parsing
  TEXT_CHARS: TTextCharSet;
  TEXT_BYTES: TTextByteSet absolute TEXT_CHARS;

/// returns TRUE if the given text buffer contains a..z,A..Z,0..9,_ characters
// - should match most usual property names values or other identifier names
// in the business logic source code
// - i.e. can be tested via IdemPropName*() functions, and the MongoDB-like
// extended JSON syntax as generated by dvoSerializeAsExtendedJson
// - following classic pascal naming convention, first char must be alphabetical
// or '_' (i.e. not a digit), following chars can be alphanumerical or '_'
function PropNameValid(P: PUtf8Char): boolean;

/// returns TRUE if the given text buffers contains A..Z,0..9,_ characters
// - use it with property names values (i.e. only including A..Z,0..9,_ chars)
// - this function allows numbers as first char, so won't check the first char
// the same way than PropNameValid() which refuses digits as pascal convention
function PropNamesValid(const Values: array of RawUtf8): boolean;

/// try to generate a PropNameValid() output from an incoming text
// - will trim all spaces, and replace most special chars by '_'
// - if it is not PropNameValid() after those replacements, will return fallback
function PropNameSanitize(const text, fallback: RawUtf8): RawUtf8;

/// case insensitive comparison of ASCII 7-bit identifiers
// - use it with property names values (i.e. only including A..Z,0..9,_ chars)
// - behavior is undefined with UTF-8 encoding (some false positive may occur)
function IdemPropName(const P1, P2: ShortString): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

  /// case insensitive comparison of ASCII 7-bit identifiers
  // - use it with property names values (i.e. only including A..Z,0..9,_ chars)
  // - behavior is undefined with UTF-8 encoding (some false positive may occur)
function IdemPropName(const P1: ShortString; P2: PUtf8Char; P2Len: PtrInt): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// case insensitive comparison of ASCII 7-bit identifiers
// - use it with property names values (i.e. only including A..Z,0..9,_ chars)
// - behavior is undefined with UTF-8 encoding (some false positive may occur)
// - this version expects P1 and P2 to be a PAnsiChar with specified lengths
function IdemPropName(P1, P2: PUtf8Char; P1Len, P2Len: PtrInt): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// case insensitive comparison of ASCII 7-bit identifiers
// - use it with property names values (i.e. only including A..Z,0..9,_ chars)
// - behavior is undefined with UTF-8 encoding (some false positive may occur)
// - this version expects P2 to be a PAnsiChar with specified length
function IdemPropNameU(const P1: RawUtf8; P2: PUtf8Char; P2Len: PtrInt): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// case insensitive comparison of ASCII 7-bit identifiers of same length
// - use it with property names values (i.e. only including A..Z,0..9,_ chars)
// - behavior is undefined with UTF-8 encoding (some false positive may occur)
// - this version expects P1 and P2 to be a PAnsiChar with an already checked
// identical length, so may be used for a faster process, e.g. in a loop
// - if P1 and P2 are RawUtf8, you should better call overloaded function
// IdemPropNameU(const P1,P2: RawUtf8), which would be slightly faster by
// using the length stored before the actual text buffer of each RawUtf8
function IdemPropNameUSameLenNotNull(P1, P2: PUtf8Char; P1P2Len: PtrInt): boolean;
  {$ifdef FPC}inline;{$endif} // Delphi does not like to inline goto

type
  TIdemPropNameUSameLen = function(P1, P2: pointer; P1P2Len: PtrInt): boolean;

var
  /// case (in)sensitive comparison of ASCII 7-bit identifiers of same length
  IdemPropNameUSameLen: array[{casesensitive=}boolean] of TIdemPropNameUSameLen;

/// case insensitive comparison of ASCII 7-bit identifiers
// - use it with property names values (i.e. only including A..Z,0..9,_ chars)
// - behavior is undefined with UTF-8 encoding (some false positive may occur)
// - is an alternative with PropNameEquals() to be used inlined e.g. in a loop
function IdemPropNameU(const P1, P2: RawUtf8): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// returns true if the beginning of p^ is the same as up^
// - ignore case - up^ must be already Upper
// - chars are compared as 7-bit Ansi only (no accentuated characters): but when
// you only need to search for field names e.g. IdemPChar() is prefered, because
// it'll be faster than IdemPCharU(), if UTF-8 decoding is not mandatory
// - if p is nil, will return FALSE
// - if up is nil, will return TRUE
function IdemPChar(p: PUtf8Char; up: PAnsiChar): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// returns true if the beginning of p^ is the same as up^
// - this overloaded function accept the uppercase lookup buffer as parameter
function IdemPChar(p: PUtf8Char; up: PAnsiChar; table: PNormTable): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// returns true if the beginning of p^ is the same as up^, ignoring white spaces
// - ignore case - up^ must be already Upper
// - any white space in the input p^ buffer is just ignored
// - chars are compared as 7-bit Ansi only (no accentuated characters): but when
// you only need to search for field names e.g. IdemPChar() is prefered, because
// it'll be faster than IdemPCharU(), if UTF-8 decoding is not mandatory
// - if p is nil, will return FALSE
// - if up is nil, will return TRUE
function IdemPCharWithoutWhiteSpace(p: PUtf8Char; up: PAnsiChar): boolean;

/// returns the index of a matching beginning of p^ in upArray[]
// - returns -1 if no item matched
// - ignore case - upArray^ must be already Upper
// - chars are compared as 7-bit Ansi only (no accentuated chars, nor UTF-8)
// - warning: this function expects upArray[] items to have AT LEAST TWO
// CHARS (it will use a fast 16-bit comparison of initial 2 bytes)
// - consider IdemPPChar() which is faster but a bit more verbose
function IdemPCharArray(p: PUtf8Char; const upArray: array of PAnsiChar): integer;

/// returns the index of a matching beginning of p^ in nil-terminated up^ array
// - returns -1 if no item matched
// - ignore case - each up^ must be already Upper
// - chars are compared as 7-bit Ansi only (no accentuated chars, nor UTF-8)
// - warning: this function expects up^ items to have AT LEAST TWO CHARS
// (it will use a fast 16-bit comparison of initial 2 bytes)
function IdemPPChar(p: PUtf8Char; up: PPAnsiChar): PtrInt;

/// returns the index of a matching beginning of p^ in upArray two characters
// - returns -1 if no item matched
// - ignore case - upArray^ must be already Upper
// - chars are compared as 7-bit Ansi only (no accentuated chars, nor UTF-8)
function IdemPCharArrayBy2(p: PUtf8Char; const upArrayBy2Chars: RawUtf8): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

/// returns true if the beginning of p^ is the same as up^
// - ignore case - up^ must be already Upper
// - this version will decode the UTF-8 content before using NormToUpper[], so
// it will be slower than the IdemPChar() function above, but will handle
// WinAnsi accentuated characters (e.g. 'e' acute will be matched as 'E')
function IdemPCharU(p, up: PUtf8Char): boolean;
  {$ifdef HASINLINE}inline;{$endif}

/// returns true if the beginning of p^ is same as up^
// - ignore case - up^ must be already Upper
// - this version expects p^ to point to an Unicode char array
function IdemPCharW(p: PWideChar; up: PUtf8Char): boolean;

/// check case-insensitive matching starting of text in upTextStart
// - returns true if the item matched
// - ignore case - upTextStart must be already in upper case
// - chars are compared as 7-bit Ansi only (no accentuated chars, nor UTF-8)
// - see StartWithExact() from mormot.core.text for a case-sensitive version
function StartWith(const text, upTextStart: RawUtf8): boolean;

/// check case-insensitive matching ending of text in upTextEnd
// - returns true if the item matched
// - ignore case - upTextEnd must be already in upper case
// - chars are compared as 7-bit Ansi only (no accentuated chars, nor UTF-8)
// - see EndWithExact() from mormot.core.text for a case-sensitive version
function EndWith(const text, upTextEnd: RawUtf8): boolean;

/// returns the index of a case-insensitive matching ending of p^ in upArray[]
// - returns -1 if no item matched
// - ignore case - upArray[] items must be already in upper case
// - chars are compared as 7-bit Ansi only (no accentuated chars, nor UTF-8)
function EndWithArray(const text: RawUtf8; const upArray: array of RawUtf8): integer;

/// returns true if the file name extension contained in p^ is the same same as extup^
// - ignore case - extup^ must be already Upper
// - chars are compared as 7-bit Ansi only (no accentuated chars, nor UTF-8)
// - could be used e.g. like IdemFileExt(aFileName,'.JP');
function IdemFileExt(p: PUtf8Char; extup: PAnsiChar; sepChar: AnsiChar = '.'): boolean;

/// returns matching file name extension index as extup^
// - ignore case - extup[] must be already Upper
// - chars are compared as 7-bit Ansi only (no accentuated chars, nor UTF-8)
// - could be used e.g. like IdemFileExts(aFileName,['.PAS','.INC']);
function IdemFileExts(p: PUtf8Char; const extup: array of PAnsiChar;
  sepChar: AnsiChar = '.'): integer;

/// fast retrieve the position of any value of a given set of characters
// - see also strspn() function which is likely to be faster
function PosCharAny(Str: PUtf8Char; Characters: PAnsiChar): PUtf8Char;

/// a non case-sensitive RawUtf8 version of Pos()
// - uppersubstr is expected to be already in upper case
// - this version handle only 7-bit ASCII (no accentuated characters)
// - see PosIU() if you want an UTF-8 version with accentuated chars support
function PosI(uppersubstr: PUtf8Char; const str: RawUtf8): PtrInt;

/// a non case-sensitive version of Pos()
// - uppersubstr is expected to be already in upper case
// - this version handle only 7-bit ASCII (no accentuated characters)
function StrPosI(uppersubstr, str: PUtf8Char): PUtf8Char;

/// a non case-sensitive RawUtf8 version of Pos()
// - substr is expected to be already in upper case
// - this version will decode the UTF-8 content before using NormToUpper[]
// - see PosI() for a non-accentuated, but faster version
function PosIU(substr: PUtf8Char; const str: RawUtf8): integer;

/// pure pascal version of strspn(), to be used with PUtf8Char/PAnsiChar
// - returns size of initial segment of s which appears in accept chars, e.g.
// ! strspn('abcdef','debca')=5
// - please note that this optimized version may read up to 3 bytes beyond
// accept but never after s end, so is safe e.g. over memory mapped files
function strspn(s, accept: pointer): integer;

/// pure pascal version of strcspn(), to be used with PUtf8Char/PAnsiChar
// - returns size of initial segment of s which doesn't appears in reject chars, e.g.
// ! strcspn('1234,6789',',')=4
// - please note that this optimized version may read up to 3 bytes beyond
// reject but never after s end, so is safe e.g. over memory mapped files
function strcspn(s, reject: pointer): integer;

/// our fast version of StrCompL(), to be used with PUtf8Char
// - i.e. make a binary comparison of two memory buffers, using supplied length
// - Default value is returned if both P1 and P2 buffers are equal
function StrCompL(P1, P2: pointer; L: PtrInt; Default: PtrInt = 0): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

/// our fast version of StrCompIL(), to be used with PUtf8Char
// - i.e. make a case-insensitive comparison of two memory buffers, using
// supplied length
// - Default value is returned if both P1 and P2 buffers are equal
function StrCompIL(P1, P2: pointer; L: PtrInt; Default: PtrInt = 0): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

/// our fast version of StrIComp(), to be used with PUtf8Char/PAnsiChar
function StrIComp(Str1, Str2: pointer): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

/// StrIComp-like function with a lookup table and Str1/Str2 expected not nil
function StrICompNotNil(Str1, Str2: pointer; Up: PNormTableByte): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

/// StrIComp-like function with a length, lookup table and Str1/Str2 expected not nil
function StrICompLNotNil(Str1, Str2: pointer; Up: PNormTableByte; L: PtrInt): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

/// StrIComp function with a length, lookup table and Str1/Str2 expected not nil
// - returns L for whole match, or < L for a partial match
function StrILNotNil(Str1, Str2: pointer; Up: PNormTableByte; L: PtrInt): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

type
  /// function prototype used internally for UTF-8 buffer comparison
  // - also used e.g. in mormot.core.variants unit
  TUtf8Compare = function(P1, P2: PUtf8Char): PtrInt;

var
  /// a quick wrapper to StrComp or StrIComp comparison functions
  StrCompByCase: array[{CaseInsensitive=}boolean] of TUtf8Compare;

/// retrieve the next UCS4 CodePoint stored in U, then update the U pointer
// - this function will decode the UTF-8 content before using NormToUpper[]
// - will return '?' if the UCS4 CodePoint is higher than #255: so use this function
// only if you need to deal with ASCII characters (e.g. it's used for Soundex
// and for ContainsUtf8 function)
function GetNextUtf8Upper(var U: PUtf8Char): Ucs4CodePoint;
  {$ifdef HASINLINE}inline;{$endif}

/// points to the beginning of the next word stored in U
// - returns nil if reached the end of U (i.e. #0 char)
// - here a "word" is a Win-Ansi word, i.e. '0'..'9', 'A'..'Z'
function FindNextUtf8WordBegin(U: PUtf8Char): PUtf8Char;

/// return true if UpperValue (Ansi) is contained in A^ (Ansi)
// - find UpperValue starting at word beginning, not inside words
function FindAnsi(A, UpperValue: PAnsiChar): boolean;

/// return true if UpperValue (Ansi) is contained in U^ (UTF-8 encoded)
// - find UpperValue starting at word beginning, not inside words
// - UTF-8 decoding is done on the fly (no temporary decoding buffer is used)
function FindUtf8(U: PUtf8Char; UpperValue: PAnsiChar): boolean;

/// return true if Upper (Unicode encoded) is contained in U^ (UTF-8 encoded)
// - will use the slow but accurate Operating System API (Win32 or ICU)
// to perform the comparison at Unicode-level
// - consider using StrPosIReference() for our faster Unicode 10.0 version
function FindUnicode(PW: PWideChar; Upper: PWideChar; UpperLen: PtrInt): boolean;

/// return true if up^ is contained inside the UTF-8 buffer p^
// - search up^ at the beginning of every UTF-8 word (aka in Soundex)
// - here a "word" is a Win-Ansi word, i.e. '0'..'9', 'A'..'Z'
// - up^ must be already Upper
function ContainsUtf8(p, up: PUtf8Char): boolean;

/// returns TRUE if the supplied uppercased text is contained in the text buffer
function GetLineContains(p, pEnd, up: PUtf8Char): boolean;
  {$ifdef FPC}inline;{$endif} // Delphi does not like inlining goto+label

/// copy source into a 256 chars dest^ buffer with 7-bit upper case conversion
// - used internally for short keys match or case-insensitive hash
// - returns final dest pointer
// - will copy up to 255 AnsiChar (expect the dest buffer to be defined e.g. as
// array[byte] of AnsiChar on the caller stack)
function UpperCopy255(dest: PAnsiChar; const source: RawUtf8): PAnsiChar; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// copy source^ into a 256 chars dest^ buffer with 7-bit upper case conversion
// - used internally for short keys match or case-insensitive hash
// - returns final dest pointer
// - will copy up to 255 AnsiChar (expect the dest buffer to be defined e.g. as
// array[byte] of AnsiChar on the caller stack)
function UpperCopy255Buf(dest: PAnsiChar; source: PUtf8Char; sourceLen: PtrInt): PAnsiChar;

/// copy source into dest^ with WinAnsi 8-bit upper case conversion
// - used internally for short keys match or case-insensitive hash
// - returns final dest pointer
// - will copy up to 255 AnsiChar (expect the dest buffer to be array[byte] of
// AnsiChar)
function UpperCopyWin255(dest: PWinAnsiChar; const source: RawUtf8): PWinAnsiChar;

/// copy UTF-16 source into dest^ with ASCII 7-bit upper case conversion
// - used internally for short keys match or case-insensitive hash
// - returns final dest pointer
// - will copy up to 255 AnsiChar (expect the dest buffer to be array[byte] of
// AnsiChar), replacing any non WinAnsi character by '?'
function UpperCopy255W(dest: PAnsiChar; const source: SynUnicode): PAnsiChar; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// copy WideChar source into dest^ with upper case conversion
// - used internally for short keys match or case-insensitive hash
// - returns final dest pointer
// - will copy up to 255 AnsiChar (expect the dest buffer to be array[byte] of
// AnsiChar), replacing any non WinAnsi character by '?'
function UpperCopy255W(dest: PAnsiChar; source: PWideChar; L: PtrInt): PAnsiChar; overload;

/// copy source into dest^ with ASCII 7-bit upper case conversion
// - returns final dest pointer
// - will copy up to the source buffer end: so Dest^ should be big enough -
// which will the case e.g. if Dest := pointer(source)
function UpperCopy(dest: PAnsiChar; const source: RawUtf8): PAnsiChar;

/// copy source into dest^ with ASCII 7-bit upper case conversion
// - returns final dest pointer
// - this special version expect source to be a ShortString
function UpperCopyShort(dest: PAnsiChar; const source: ShortString): PAnsiChar;

/// fast UTF-8 comparison handling WinAnsi CP-1252 case folding
// - this version expects u1 and u2 to be zero-terminated
// - decode the UTF-8 content before using NormToUpper[] lookup table
// - match the our SYSTEMNOCASE custom (and default) SQLite 3 collation
// - consider Utf8ICompReference() for Unicode 10.0 support
function Utf8IComp(u1, u2: PUtf8Char): PtrInt;

/// fast UTF-8 comparison handling WinAnsi CP-1252 case folding
// - this version expects u1 and u2 not to be necessary zero-terminated, but
// uses L1 and L2 as length for u1 and u2 respectively
// - decode the UTF-8 content before using NormToUpper[] lookup table
// - consider Utf8ILCompReference() for Unicode 10.0 support
function Utf8ILComp(u1, u2: PUtf8Char; L1, L2: cardinal): PtrInt;

/// copy UTF-8 buffer into dest^ handling WinAnsi CP-1252 NormToUpper[] folding
// - returns the final dest pointer
// - current implementation handles UTF-16 surrogates
function Utf8UpperCopy(Dest, Source: PUtf8Char; SourceChars: cardinal): PUtf8Char;

/// copy UTF-8 buffer into dest^ handling WinAnsi CP-1252 NormToUpper[] folding
// - returns the final dest pointer
// - will copy up to 255 AnsiChar (expect the dest buffer to be array[byte] of
// AnsiChar), with UTF-8 encoding
function Utf8UpperCopy255(dest: PAnsiChar; const source: RawUtf8): PUtf8Char;
  {$ifdef HASINLINE}inline;{$endif}

/// fast case-insensitive Unicode comparison handling ASCII 7-bit chars
// - use the NormToUpperAnsi7Byte[] array, i.e. compare 'a'..'z' as 'A'..'Z'
// - this version expects u1 and u2 to be zero-terminated
function AnsiICompW(u1, u2: PWideChar): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

/// compare two "array of AnsiString" elements, with no case sensitivity
// - just a wrapper around inlined StrIComp()
function SortDynArrayAnsiStringI(const A, B): integer;

/// compare two "array of PUtf8Char/PAnsiChar" elements, with no case sensitivity
// - just a wrapper around inlined StrIComp()
function SortDynArrayPUtf8CharI(const A, B): integer;

/// compare two "array of RTL string" elements, with no case sensitivity
// - the expected string type is the RTL string
// - just a wrapper around StrIComp() for AnsiString or AnsiICompW() for UNICODE
function SortDynArrayStringI(const A, B): integer;

/// compare two "array of WideString/UnicodeString" elements, with no case sensitivity
// - implemented here since would call AnsiICompW()
function SortDynArrayUnicodeStringI(const A, B): integer;

var
  /// a quick wrapper to SortDynArrayAnsiString or SortDynArrayAnsiStringI
  // comparison functions
  SortDynArrayAnsiStringByCase: array[{CaseInsensitive=}boolean] of TDynArraySortCompare;

/// SameText() overloaded function with proper UTF-8 decoding
// - fast version using NormToUpper[] array for all WinAnsi characters
// - this version will decode each UTF-8 glyph before using NormToUpper[]
// - current implementation handles UTF-16 surrogates as Utf8IComp()
function SameTextU(const S1, S2: RawUtf8): boolean;
  {$ifdef HASINLINE}inline;{$endif}

/// fast conversion of the supplied text into 8-bit uppercase
// - this will not only convert 'a'..'z' into 'A'..'Z', but also accentuated
// latin characters ('e' acute into 'E' e.g.), using NormToUpper[] array
// - it will therefore decode the supplied UTF-8 content to handle more than
// 7-bit of ascii characters (so this function is dedicated to WinAnsi code page
// 1252 characters set)
function UpperCaseU(const S: RawUtf8): RawUtf8;

/// fast conversion of the supplied text into 8-bit lowercase
// - this will not only convert 'A'..'Z' into 'a'..'z', but also accentuated
// latin characters ('E' acute into 'e' e.g.), using NormToLower[] array
// - it will therefore decode the supplied UTF-8 content to handle more than
// 7-bit of ascii characters
function LowerCaseU(const S: RawUtf8): RawUtf8;

/// fast conversion of the supplied text into 8-bit case sensitivity
// - convert the text in-place, returns the resulting length
// - it will decode the supplied UTF-8 content to handle more than 7-bit
// of ascii characters during the conversion (leaving not WinAnsi characters
// untouched)
// - will not set the last char to #0 (caller must do that if necessary)
function ConvertCaseUtf8(P: PUtf8Char; const Table: TNormTableByte): PtrInt;

/// check if the supplied text has some case-insentitive 'a'..'z','A'..'Z' chars
// - will therefore be correct with true UTF-8 content, but only for 7-bit
function IsCaseSensitive(const S: RawUtf8): boolean; overload;

/// check if the supplied text has some case-insentitive 'a'..'z','A'..'Z' chars
// - will therefore be correct with true UTF-8 content, but only for 7-bit
function IsCaseSensitive(P: PUtf8Char; PLen: PtrInt): boolean; overload;

/// low-level function called when inlining UpperCase(Copy) and LowerCase(Copy)
procedure CaseCopy(Text: PUtf8Char; Len: PtrInt; Table: PNormTable;
  var Dest: RawUtf8);

/// low-level function called when inlining UpperCaseSelf and LowerCaseSelf
procedure CaseSelf(var S: RawUtf8; Table: PNormTable);

/// fast conversion of the supplied text into uppercase
// - this will only convert 'a'..'z' into 'A'..'Z' (no NormToUpper use), and
// will therefore be correct with true UTF-8 content, but only for 7-bit
function UpperCase(const S: RawUtf8): RawUtf8;
  {$ifdef HASINLINE} inline; {$endif}

/// fast conversion of the supplied text into uppercase
// - this will only convert 'a'..'z' into 'A'..'Z' (no NormToUpper use), and
// will therefore be correct with true UTF-8 content, but only for 7-bit
procedure UpperCaseCopy(Text: PUtf8Char; Len: PtrInt; var Dest: RawUtf8); overload;
  {$ifdef HASINLINE} inline; {$endif}

/// fast conversion of the supplied text into uppercase
// - this will only convert 'a'..'z' into 'A'..'Z' (no NormToUpper use), and
// will therefore be correct with true UTF-8 content, but only for 7-bit
procedure UpperCaseCopy(const Source: RawUtf8; var Dest: RawUtf8); overload;
  {$ifdef HASINLINE} inline; {$endif}

/// fast in-place conversion of the supplied variable text into uppercase
// - this will only convert 'a'..'z' into 'A'..'Z' (no NormToUpper use), and
// will therefore be correct with true UTF-8 content, but only for 7-bit
procedure UpperCaseSelf(var S: RawUtf8);
  {$ifdef HASINLINE} inline; {$endif}

/// fast conversion of the supplied text into lowercase
// - this will only convert 'A'..'Z' into 'a'..'z' (no NormToLower use), and
// will therefore be correct with true UTF-8 content
function LowerCase(const S: RawUtf8): RawUtf8;
  {$ifdef HASINLINE} inline; {$endif}

/// fast conversion of the supplied text into lowercase
// - this will only convert 'A'..'Z' into 'a'..'z' (no NormToLower use), and
// will therefore be correct with true UTF-8 content
procedure LowerCaseCopy(Text: PUtf8Char; Len: PtrInt; var Dest: RawUtf8);
  {$ifdef HASINLINE} inline; {$endif}

/// fast in-place conversion of the supplied variable text into lowercase
// - this will only convert 'A'..'Z' into 'a'..'z' (no NormToLower use), and
// will therefore be correct with true UTF-8 content, but only for 7-bit
procedure LowerCaseSelf(var S: RawUtf8);
  {$ifdef HASINLINE} inline; {$endif}

/// accurate conversion of the supplied UTF-8 content into the corresponding
// upper-case Unicode characters
// - will use the available API (e.g. Win32 or ICU), so may not be consistent on
// all systems - consider UpperCaseReference() to use our Unicode 10.0 tables
// - will temporary decode S into and from UTF-16 so is likely to be slower
function UpperCaseUnicode(const S: RawUtf8): RawUtf8;

/// accurate conversion of the supplied UTF-8 content into the corresponding
// lower-case Unicode characters
// - will use the available API (e.g. Win32 or ICU), so may not be consistent on
// all systems - and also slower than LowerCase/LowerCaseU versions
function LowerCaseUnicode(const S: RawUtf8): RawUtf8;

/// use the RTL to convert the SynUnicode text to UpperCase
function UpperCaseSynUnicode(const S: SynUnicode): SynUnicode;

/// use the RTL to convert the SynUnicode text to LowerCase
function LowerCaseSynUnicode(const S: SynUnicode): SynUnicode;

/// fast WinAnsi comparison using the NormToUpper[] array for all 8-bit values
function AnsiIComp(Str1, Str2: pointer): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

/// internal function used when inlining PosExI()
function PosExIPas(pSub, p: PUtf8Char; Offset: PtrUInt;
  Lookup: PNormTable): PtrInt;

/// a ASCII-7 case-insensitive version of PosEx()
// - will use the NormToUpperAnsi7 lookup table for character conversion
function PosExI(const SubStr, S: RawUtf8; Offset: PtrUInt): PtrInt; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// a case-insensitive version of PosEx() with a specified lookup table
// - redirect to mormot.core.base PosEx() if Lookup = nil
function PosExI(const SubStr, S: RawUtf8; Offset: PtrUInt;
  Lookup: PNormTable): PtrInt; overload;
  {$ifdef HASINLINE}inline;{$endif}


{ ************ UTF-8 String Manipulation Functions }

type
  /// used to store a set of 8-bit encoded characters
  TSynAnsicharSet = set of AnsiChar;

  /// used to store a set of 8-bit unsigned integers
  TSynByteSet = set of byte;

  /// a generic callback, which can be used to translate some text on the fly
  // - maps procedure TLanguageFile.Translate(var English: string) signature
  // as defined in mORMoti18n.pas
  // - can be used e.g. for TSynMustache's {{"English text}} callback
  TOnStringTranslate = procedure(var English: string) of object;


/// check case-sensitive matching starting of text in start
// - returns true if the item matched
// - see StartWith() from mormot.core.unicode for a case-insensitive version
function StartWithExact(const text, textStart: RawUtf8): boolean;
  {$ifdef HASINLINE} inline; {$endif}

/// check case-sensitive matching ending of text in ending
// - returns true if the item matched
// - see EndWith() from mormot.core.unicode for a case-insensitive version
function EndWithExact(const text, textEnd: RawUtf8): boolean;
  {$ifdef HASINLINE} inline; {$endif}

/// extract a line from source array of chars
// - next will contain the beginning of next line, or nil if source has ended
function GetNextLine(source: PUtf8Char; out next: PUtf8Char;
  andtrim: boolean = false): RawUtf8;

/// returns n leading characters
function LeftU(const S: RawUtf8; n: PtrInt): RawUtf8;
  {$ifdef HASINLINE} inline; {$endif}

/// returns n trailing characters
function RightU(const S: RawUtf8; n: PtrInt): RawUtf8;

/// trims leading whitespace characters from the string by removing
// new line, space, and tab characters
function TrimLeft(const S: RawUtf8): RawUtf8;

/// trims trailing whitespace characters from the string by removing trailing
// newline, space, and tab characters
function TrimRight(const S: RawUtf8): RawUtf8;

/// trims leading whitespaces of every lines of the UTF-8 text
// - also delete void lines
// - could be used e.g. before FindNameValue() call
// - modification is made in-place so S will be modified
procedure TrimLeftLines(var S: RawUtf8);

/// trim some trailing and ending chars
// - if S is unique (RefCnt=1), will modify the RawUtf8 in place
// - faster alternative to S := copy(S, Left + 1, length(S) - Left - Right)
procedure TrimChars(var S: RawUtf8; Left, Right: PtrInt);

/// returns the supplied text content, without any specified char
// - specify a custom char set to be excluded, e.g. as [#0 .. ' ']
function TrimChar(const text: RawUtf8; const exclude: TSynAnsicharSet): RawUtf8;

/// returns the supplied text content, without one specified char
function TrimOneChar(const text: RawUtf8; exclude: AnsiChar): RawUtf8;

/// returns the supplied text content, without any other char than specified
// - specify a custom char set to be included, e.g. as ['A'..'Z']
function OnlyChar(const text: RawUtf8; const only: TSynAnsicharSet): RawUtf8;

/// returns the supplied text content, without any control char
// - here control chars have an ASCII code in [#0 .. ' '], i.e. text[] <= ' '
function TrimControlChars(const text: RawUtf8): RawUtf8;

/// split a RawUtf8 string into two strings, according to SepStr separator
// - returns true and LeftStr/RightStr if they were separated by SepStr
// - if SepStr is not found, LeftStr=Str and RightStr='' and returns false
// - if ToUpperCase is TRUE, then LeftStr and RightStr will be made uppercase
function Split(const Str, SepStr: RawUtf8; var LeftStr, RightStr: RawUtf8;
  ToUpperCase: boolean = false): boolean; overload;

/// split a RawUtf8 string into two strings, according to SepStr separator
// - this overloaded function returns the right string as function result
// - if SepStr is not found, LeftStr=Str and result=''
// - if ToUpperCase is TRUE, then LeftStr and result will be made uppercase
function Split(const Str, SepStr: RawUtf8; var LeftStr: RawUtf8;
  ToUpperCase: boolean = false): RawUtf8; overload;

/// split a RawUtf8 string into several strings, according to SepStr separator
// - this overloaded function will fill a DestPtr[] array of PRawUtf8
// - if any DestPtr[]=nil, the item will be skipped
// - if input Str end before al SepStr[] are found, DestPtr[] is set to ''
// - returns the number of values extracted into DestPtr[]
function Split(const Str: RawUtf8; const SepStr: array of RawUtf8;
  const DestPtr: array of PRawUtf8): PtrInt; overload;

/// returns the last occurrence of the given SepChar separated context
// - e.g. SplitRight('01/2/34','/')='34'
// - if SepChar doesn't appear, will return Str, e.g. SplitRight('123','/')='123'
// - if LeftStr is supplied, the RawUtf8 it points to will be filled with
// the left part just before SepChar ('' if SepChar doesn't appear)
function SplitRight(const Str: RawUtf8; SepChar: AnsiChar; LeftStr: PRawUtf8 = nil): RawUtf8;

/// returns the last occurrence of the given SepChar separated context
// - e.g. SplitRight('path/one\two/file.ext','/\')='file.ext', i.e.
// SepChars='/\' will be like ExtractFileName() over RawUtf8 string
// - if SepChar doesn't appear, will return Str, e.g. SplitRight('123','/')='123'
function SplitRights(const Str, SepChar: RawUtf8): RawUtf8;

/// check all character within text are spaces or control chars
// - i.e. a faster alternative to  if TrimU(text)='' then
function IsVoid(const text: RawUtf8): boolean;

/// fill all bytes of this memory buffer with zeros, i.e. 'toto' -> #0#0#0#0
// - will write the memory buffer directly, if this string instance is not shared
// (i.e. has refcount = 1), to avoid zeroing still-used values
// - may be used to cleanup stack-allocated content
// ! ... finally FillZero(secret); end;
procedure FillZero(var secret: RawByteString); overload;

/// fill all bytes of this UTF-8 string with zeros, i.e. 'toto' -> #0#0#0#0
// - will write the memory buffer directly, if this string instance is not shared
// (i.e. has refcount = 1), to avoid zeroing still-used values
// - may be used to cleanup stack-allocated content
// ! ... finally FillZero(secret); end;
procedure FillZero(var secret: RawUtf8); overload;
  {$ifdef HASINLINE}inline;{$endif}

/// fill all bytes of this UTF-8 string with zeros, i.e. 'toto' -> #0#0#0#0
// - SpiUtf8 type has been defined explicitly to store Sensitive Personal
// Information
procedure FillZero(var secret: SpiUtf8); overload;
  {$ifdef HASINLINE}inline;{$endif}

/// fill all bytes of this dynamic array of bytes with zeros
// - will write the memory buffer directly, if this array instance is not shared
// (i.e. has refcount = 1), to avoid zeroing still-used values
procedure FillZero(var secret: TBytes); overload;

{$ifdef HASVARUSTRING}
/// fill all bytes of this UTF-16 string with zeros, i.e. 'toto' -> #0#0#0#0
procedure FillZero(var secret: UnicodeString); overload;
{$endif HASVARUSTRING}

/// actual replacement function called by StringReplaceAll() on first match
// - not to be called as such, but defined globally for proper inlining
function StringReplaceAllProcess(const S, OldPattern, NewPattern: RawUtf8;
  found: integer; Lookup: PNormTable): RawUtf8;

/// fast version of StringReplace(S, OldPattern, NewPattern, [rfReplaceAll]);
function StringReplaceAll(const S, OldPattern, NewPattern: RawUtf8;
  Lookup: PNormTable = nil): RawUtf8; overload;

/// case-sensitive (or not) StringReplace(S, OldPattern, NewPattern,[rfReplaceAll])
// - calls plain StringReplaceAll() version for CaseInsensitive = false
// - calls StringReplaceAll(.., NormToUpperAnsi7) if CaseInsensitive = true
function StringReplaceAll(const S, OldPattern, NewPattern: RawUtf8;
  CaseInsensitive: boolean): RawUtf8; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// fast version of several cascaded StringReplaceAll()
function StringReplaceAll(const S: RawUtf8;
  const OldNewPatternPairs: array of RawUtf8;
  CaseInsensitive: boolean = false): RawUtf8; overload;

/// fast replace of a specified char by a given string
function StringReplaceChars(const Source: RawUtf8; OldChar, NewChar: AnsiChar): RawUtf8;

/// fast replace of all #9 chars by a given string
function StringReplaceTabs(const Source, TabText: RawUtf8): RawUtf8;

/// UTF-8 dedicated (and faster) alternative to StringOfChar((Ch,Count))
function RawUtf8OfChar(Ch: AnsiChar; Count: integer): RawUtf8;

/// format a text content with SQL-like quotes
// - this function implements what is specified in the official SQLite3
// documentation: "A string constant is formed by enclosing the string in single
// quotes ('). A single quote within the string can be encoded by putting two
// single quotes in a row - as in Pascal."
function QuotedStr(const S: RawUtf8; Quote: AnsiChar = ''''): RawUtf8; overload;

/// format a text content with SQL-like quotes
procedure QuotedStr(const S: RawUtf8; Quote: AnsiChar; var result: RawUtf8); overload;

/// format a text buffer with SQL-like quotes
procedure QuotedStr(P: PUtf8Char; PLen: PtrInt; Quote: AnsiChar;
  var result: RawUtf8); overload;

/// unquote a SQL-compatible string
// - the first character in P^ must be either ' or " then internal double quotes
// are transformed into single quotes
// - 'text '' end'   -> text ' end
// - "text "" end"   -> text " end
// - returns nil if P doesn't contain a valid SQL string
// - returns a pointer just after the quoted text otherwise
function UnQuoteSqlStringVar(P: PUtf8Char; out Value: RawUtf8): PUtf8Char;

/// unquote a SQL-compatible string
function UnQuoteSqlString(const Value: RawUtf8): RawUtf8;

/// unquote a SQL-compatible symbol name
// - e.g. '[symbol]' -> 'symbol' or '"symbol"' -> 'symbol'
function UnQuotedSqlSymbolName(const ExternalDBSymbol: RawUtf8): RawUtf8;

/// get the next character after a quoted buffer
// - the first character in P^ must be either ', either "
// - it will return the latest quote position, ignoring double quotes within
function GotoEndOfQuotedString(P: PUtf8Char): PUtf8Char;
  {$ifdef HASINLINE}inline;{$endif}

/// get the next character not in [#1..' ']
function GotoNextNotSpace(P: PUtf8Char): PUtf8Char;
  {$ifdef HASINLINE}inline;{$endif}

/// get the next character not in [#9,' ']
function GotoNextNotSpaceSameLine(P: PUtf8Char): PUtf8Char;
  {$ifdef HASINLINE}inline;{$endif}

/// get the next character in [#0..' ']
function GotoNextSpace(P: PUtf8Char): PUtf8Char;
  {$ifdef HASINLINE}inline;{$endif}

/// check if the next character not in [#1..' '] matchs a given value
// - first ignore any non space character
// - then returns TRUE if P^=ch, setting P to the character after ch
// - or returns FALSE if P^<>ch, leaving P at the level of the unexpected char
function NextNotSpaceCharIs(var P: PUtf8Char; ch: AnsiChar): boolean;
  {$ifdef HASINLINE}inline;{$endif}

/// retrieve the next SQL-like identifier within the UTF-8 buffer
// - will also trim any space (or line feeds) and trailing ';'
// - any comment like '/*nocache*/' will be ignored
// - returns true if something was set to Prop
function GetNextFieldProp(var P: PUtf8Char; var Prop: RawUtf8): boolean;

/// retrieve the next identifier within the UTF-8 buffer on the same line
// - GetNextFieldProp() will just handle line feeds (and ';') as spaces - which
// is fine e.g. for SQL, but not for regular config files with name/value pairs
// - returns true if something was set to Prop
function GetNextFieldPropSameLine(var P: PUtf8Char; var Prop: ShortString): boolean;

/// return true if IdemPChar(source,searchUp), and go to the next line of source
function IdemPCharAndGetNextLine(var source: PUtf8Char; searchUp: PAnsiChar): boolean;

/// search for a value from its uppercased named entry
// - i.e. iterate IdemPChar(source,UpperName) over every line of the source
// - returns the text just after UpperName if it has been found at line beginning
// - returns nil if UpperName was not found at any line beginning
// - could be used e.g. to efficently extract a value from HTTP headers, whereas
// FindIniNameValue() is tuned for [section]-oriented INI files
function FindNameValue(P: PUtf8Char; UpperName: PAnsiChar): PUtf8Char; overload;

/// search and returns a value from its uppercased named entry
// - i.e. iterate IdemPChar(source,UpperName) over every line of the source
// - returns true and the trimmed text just after UpperName into Value
// if it has been found at line beginning
// - returns false and set Value := '' if UpperName was not found (or leave
// Value untouched if KeepNotFoundValue is true)
// - could be used e.g. to efficently extract a value from HTTP headers, whereas
// FindIniNameValue() is tuned for [section]-oriented INI files
// - do TrimLeftLines(NameValuePairs) first if the lines start with spaces/tabs
function FindNameValue(const NameValuePairs: RawUtf8; UpperName: PAnsiChar;
  var Value: RawUtf8; KeepNotFoundValue: boolean = false;
  UpperNameSeparator: AnsiChar = #0): boolean; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// search and returns a PUtf8Char value from its uppercased named entry
// - as called when inlining FindNameValue()
// - won't make any memory allocation, so could be fine for a quick lookup
function FindNameValuePointer(NameValuePairs: PUtf8Char; UpperName: PAnsiChar;
  out FoundLen: PtrInt; UpperNameSeparator: AnsiChar): PUtf8Char;

/// compute the line length from source array of chars
// - if PEnd = nil, end counting at either #0, #13 or #10
// - otherwise, end counting at either #13 or #10
// - just a wrapper around BufferLineLength() checking PEnd=nil case
function GetLineSize(P, PEnd: PUtf8Char): PtrUInt;
  {$ifdef HASINLINE}inline;{$endif}

/// returns true if the line length from source array of chars is not less than
// the specified count
function GetLineSizeSmallerThan(P, PEnd: PUtf8Char; aMinimalCount: integer): boolean;

{$ifndef PUREMORMOT2}
/// return next string delimited with #13#10 from P, nil if no more
// - this function returns a RawUnicode string type
function GetNextStringLineToRawUnicode(var P: PChar): RawUnicode;
{$endif PUREMORMOT2}

/// trim first lowercase chars ('otDone' will return 'Done' e.g.)
// - return a PUtf8Char to avoid any memory allocation
function TrimLeftLowerCase(const V: RawUtf8): PUtf8Char;

/// trim first lowercase chars ('otDone' will return 'Done' e.g.)
// - return an RawUtf8 string: enumeration names are pure 7-bit ANSI with Delphi 7
// to 2007, and UTF-8 encoded with Delphi 2009+
function TrimLeftLowerCaseShort(V: PShortString): RawUtf8;

/// trim first lowercase chars ('otDone' will return 'Done' e.g.)
// - return a ShortString: enumeration names are pure 7-bit ANSI with Delphi 7
// to 2007, and UTF-8 encoded with Delphi 2009+
function TrimLeftLowerCaseToShort(V: PShortString): ShortString; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// trim first lowercase chars ('otDone' will return 'Done' e.g.)
// - return a ShortString: enumeration names are pure 7-bit ANSI with Delphi 7
// to 2007, and UTF-8 encoded with Delphi 2009+
procedure TrimLeftLowerCaseToShort(V: PShortString; out result: ShortString); overload;

/// fast append some UTF-8 text into a ShortString, with an ending ','
procedure AppendShortComma(text: PAnsiChar; len: PtrInt; var result: ShortString;
  trimlowercase: boolean);   {$ifdef FPC} inline; {$endif}

/// fast search of an exact case-insensitive match of a RTTI's PShortString array
function FindShortStringListExact(List: PShortString; MaxValue: integer;
  aValue: PUtf8Char; aValueLen: PtrInt): integer;

/// fast case-insensitive search of a left-trimmed lowercase match
// of a RTTI's PShortString array
function FindShortStringListTrimLowerCase(List: PShortString; MaxValue: integer;
  aValue: PUtf8Char; aValueLen: PtrInt): integer;

/// fast case-sensitive search of a left-trimmed lowercase match
// of a RTTI's PShortString array
function FindShortStringListTrimLowerCaseExact(List: PShortString; MaxValue: integer;
  aValue: PUtf8Char; aValueLen: PtrInt): integer;

/// convert a CamelCase string into a space separated one
// - 'OnLine' will return 'On line' e.g., and 'OnMyLINE' will return 'On my LINE'
// - will handle capital words at the beginning, middle or end of the text, e.g.
// 'KLMFlightNumber' will return 'KLM flight number' and 'GoodBBCProgram' will
// return 'Good BBC program'
// - will handle a number at the beginning, middle or end of the text, e.g.
// 'Email12' will return 'Email 12'
// - '_' char is transformed into ' - '
// - '__' chars are transformed into ': '
// - return an RawUtf8 string: enumeration names are pure 7-bit ANSI with Delphi
// up to 2007, and UTF-8 encoded with Delphi 2009+
function UnCamelCase(const S: RawUtf8): RawUtf8; overload;

/// convert a CamelCase string into a space separated one
// - 'OnLine' will return 'On line' e.g., and 'OnMyLINE' will return 'On my LINE'
// - will handle capital words at the beginning, middle or end of the text, e.g.
// 'KLMFlightNumber' will return 'KLM flight number' and 'GoodBBCProgram' will
// return 'Good BBC program'
// - will handle a number at the beginning, middle or end of the text, e.g.
// 'Email12' will return 'Email 12'
// - return the char count written into D^
// - D^ and P^ are expected to be UTF-8 encoded: enumeration and property names
// are pure 7-bit ANSI with Delphi 7 to 2007, and UTF-8 encoded with Delphi 2009+
// - '_' char is transformed into ' - '
// - '__' chars are transformed into ': '
function UnCamelCase(D, P: PUtf8Char): integer; overload;

/// convert a string into an human-friendly CamelCase identifier
// - replacing spaces or punctuations by an uppercase character
// - as such, it is not the reverse function to UnCamelCase()
procedure CamelCase(P: PAnsiChar; len: PtrInt; var s: RawUtf8;
  const isWord: TSynByteSet = [ord('0')..ord('9'), ord('a')..ord('z'), ord('A')..ord('Z')]); overload;

/// convert a string into an human-friendly CamelCase identifier
// - replacing spaces or punctuations by an uppercase character
// - as such, it is not the reverse function to UnCamelCase()
procedure CamelCase(const text: RawUtf8; var s: RawUtf8;
  const isWord: TSynByteSet = [ord('0')..ord('9'), ord('a')..ord('z'), ord('A')..ord('Z')]); overload;
  {$ifdef HASINLINE}inline;{$endif}

var
  /// these procedure type must be defined if a default system.pas is used
  // - expect generic "string" type, i.e. UnicodeString for Delphi 2009+
  LoadResStringTranslate: procedure(var Text: string) = nil;

/// UnCamelCase and translate a char buffer
// - P is expected to be #0 ended
// - return "string" type, i.e. UnicodeString for Delphi 2009+
procedure GetCaptionFromPCharLen(P: PUtf8Char; out result: string);


{ ************ TRawUtf8DynArray Processing Functions }

/// returns TRUE if Value is nil or all supplied Values[] equal ''
function IsZero(const Values: TRawUtf8DynArray): boolean; overload;

/// quick helper to initialize a dynamic array of RawUtf8 from some constants
// - can be used e.g. as:
// ! MyArray := TRawUtf8DynArrayFrom(['a','b','c']);
function TRawUtf8DynArrayFrom(const Values: array of RawUtf8): TRawUtf8DynArray;

/// low-level efficient search of Value in Values[]
// - CaseSensitive=false will use StrICmp() for A..Z / a..z equivalence
function FindRawUtf8(Values: PRawUtf8; const Value: RawUtf8; ValuesCount: integer;
  CaseSensitive: boolean): integer; overload;

/// return the index of Value in Values[], -1 if not found
// - CaseSensitive=false will use StrICmp() for A..Z / a..z equivalence
function FindRawUtf8(const Values: TRawUtf8DynArray; const Value: RawUtf8;
  CaseSensitive: boolean = true): integer; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// return the index of Value in Values[], -1 if not found
// - CaseSensitive=false will use StrICmp() for A..Z / a..z equivalence
function FindRawUtf8(const Values: array of RawUtf8; const Value: RawUtf8;
  CaseSensitive: boolean = true): integer; overload;

/// true if Value was added successfully in Values[]
function AddRawUtf8(var Values: TRawUtf8DynArray; const Value: RawUtf8;
  NoDuplicates: boolean = false; CaseSensitive: boolean = true): boolean; overload;

/// add the Value to Values[], with an external count variable, for performance
function AddRawUtf8(var Values: TRawUtf8DynArray; var ValuesCount: integer;
  const Value: RawUtf8): PtrInt; overload;

/// add Value[] items to Values[]
procedure AddRawUtf8(var Values: TRawUtf8DynArray; const Value: TRawUtf8DynArray); overload;

/// add Value[] items to Values[], with an external count variable, for performance
procedure AddRawUtf8(var Values: TRawUtf8DynArray; var ValuesCount: integer;
  const Value: TRawUtf8DynArray); overload;

/// true if both TRawUtf8DynArray are the same
// - comparison is case-sensitive
function RawUtf8DynArrayEquals(const A, B: TRawUtf8DynArray): boolean; overload;

/// true if both TRawUtf8DynArray are the same for a given number of items
// - A and B are expected to have at least Count items
// - comparison is case-sensitive
function RawUtf8DynArrayEquals(const A, B: TRawUtf8DynArray;
  Count: integer): boolean; overload;

/// add the Value to Values[] string array
function AddString(var Values: TStringDynArray; const Value: string): PtrInt;

/// convert the string dynamic array into a dynamic array of UTF-8 strings
procedure StringDynArrayToRawUtf8DynArray(const Source: TStringDynArray;
  var result: TRawUtf8DynArray);

/// convert the string list into a dynamic array of UTF-8 strings
procedure StringListToRawUtf8DynArray(Source: TStringList;
  var result: TRawUtf8DynArray);

/// retrieve the index where to insert a PUtf8Char in a sorted PUtf8Char array
// - R is the last index of available entries in P^ (i.e. Count-1)
// - string comparison is case-sensitive StrComp (so will work with any PAnsiChar)
// - returns -1 if the specified Value was found (i.e. adding will duplicate a value)
// - will use fast O(log(n)) binary search algorithm
function FastLocatePUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt;
  Value: PUtf8Char): PtrInt; overload;
  {$ifdef HASINLINE}inline;{$endif}

/// retrieve the index where to insert a PUtf8Char in a sorted PUtf8Char array
// - this overloaded function accept a custom comparison function for sorting
// - R is the last index of available entries in P^ (i.e. Count-1)
// - string comparison is case-sensitive (so will work with any PAnsiChar)
// - returns -1 if the specified Value was found (i.e. adding will duplicate a value)
// - will use fast O(log(n)) binary search algorithm
function FastLocatePUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt;
  Value: PUtf8Char; Compare: TUtf8Compare): PtrInt; overload;

/// retrieve the index where is located a PUtf8Char in a sorted PUtf8Char array
// - R is the last index of available entries in P^ (i.e. Count-1)
// - string comparison is case-sensitive StrComp (so will work with any PAnsiChar)
// - returns -1 if the specified Value was not found
// - will use inlined binary search algorithm with optimized x86_64 branchless asm
// - slightly faster than plain FastFindPUtf8CharSorted(P,R,Value,@StrComp)
function FastFindPUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt;
  Value: PUtf8Char): PtrInt; overload;

/// retrieve the index where is located a PUtf8Char in a sorted uppercase array
// - P[] array is expected to be already uppercased
// - searched Value is converted to uppercase before search via UpperCopy255Buf(),
// so is expected to be short, i.e. length < 250
// - R is the last index of available entries in P^ (i.e. Count-1)
// - returns -1 if the specified Value was not found
// - will use fast O(log(n)) binary search algorithm
// - slightly faster than plain FastFindPUtf8CharSorted(P,R,Value,@StrIComp)
function FastFindUpperPUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt;
  Value: PUtf8Char; ValueLen: PtrInt): PtrInt;
  {$ifdef HASINLINE}inline;{$endif}

/// retrieve the index where is located a PUtf8Char in a sorted PUtf8Char array
// - R is the last index of available entries in P^ (i.e. Count-1)
// - string comparison will use the specified Compare function
// - returns -1 if the specified Value was not found
// - will use fast O(log(n)) binary search algorithm
function FastFindPUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt;
  Value: PUtf8Char; Compare: TUtf8Compare): PtrInt; overload;

/// retrieve the index of a PUtf8Char in a PUtf8Char array via a sort indexed
// - will use fast O(log(n)) binary search algorithm
function FastFindIndexedPUtf8Char(P: PPUtf8CharArray; R: PtrInt;
  var SortedIndexes: TCardinalDynArray; Value: PUtf8Char;
  ItemComp: TUtf8Compare): PtrInt;

/// add a RawUtf8 value in an alphaticaly sorted dynamic array of RawUtf8
// - returns the index where the Value was added successfully in Values[]
// - returns -1 if the specified Value was already present in Values[]
//  (we must avoid any duplicate for O(log(n)) binary search)
// - if CoValues is set, its content will be moved to allow inserting a new
// value at CoValues[result] position - a typical usage of CoValues is to store
// the corresponding ID to each RawUtf8 item
// - if FastLocatePUtf8CharSorted() has been already called, this index can
// be set to optional ForceIndex parameter
// - by default, exact (case-sensitive) match is used; you can specify a custom
// compare function if needed in Compare optional parameter
function AddSortedRawUtf8(var Values: TRawUtf8DynArray;
  var ValuesCount: integer; const Value: RawUtf8;
  CoValues: PIntegerDynArray = nil; ForcedIndex: PtrInt = -1;
  Compare: TUtf8Compare = nil): PtrInt;

/// delete a RawUtf8 item in a dynamic array of RawUtf8
// - if CoValues is set, the integer item at the same index is also deleted
function DeleteRawUtf8(var Values: TRawUtf8DynArray; var ValuesCount: integer;
  Index: integer; CoValues: PIntegerDynArray = nil): boolean; overload;

/// delete a RawUtf8 item in a dynamic array of RawUtf8;
function DeleteRawUtf8(var Values: TRawUtf8DynArray;
  Index: PtrInt): boolean; overload;

/// sort a dynamic array of RawUtf8 items
// - if CoValues is set, the integer items are also synchronized
// - by default, exact (case-sensitive) match is used; you can specify a custom
// compare function if needed in Compare optional parameter
procedure QuickSortRawUtf8(var Values: TRawUtf8DynArray; ValuesCount: integer;
  CoValues: PIntegerDynArray = nil; Compare: TUtf8Compare = nil); overload;

/// sort a RawUtf8 array, low values first
procedure QuickSortRawUtf8(Values: PRawUtf8Array; L, R: PtrInt;
  caseInsensitive: boolean = false); overload;

{$ifdef OSPOSIX}
type
  /// monitor a POSIX folder for all its file names, and allow efficient
  // case-insensitive search, as it would on a Windows file system
  // - will use our fast PosixFileNames() low-level API to read the names
  // and store them into its in-memory cache (until Flush or after FlushSeconds)
  TPosixFileCaseInsensitive = class
  protected
    fSafe: TRWLightLock;
    fFiles: TRawUtf8DynArray;
    fFolder: TFileName;
    fNextTix, fFlushSeconds: integer;
    fSubFolders: boolean;
    procedure SetFolder(const aFolder: TFileName);
    procedure SetSubFolders(aSubFolders: boolean);
  public
    /// initialize the file names lookup
    constructor Create(const aFolder: TFileName; aSubFolders: boolean); reintroduce;
    /// to be called on a regular pace (e.g. every second) to perform FlushSeconds
    procedure OnIdle(tix64: Int64);
    /// clear the internal list to force full reload of the directory
    procedure Flush;
    /// case-insensitive search for a given TFileName in the folder
    // - returns '' if not found, or the exact file name in the POSIX folder
    // - is thread-safe and non blocking during its lookup
    // - can optionally return MicroSec spent for actual filenames read on disk
    function Find(const aSearched: TFileName; aReadMs: PInteger = nil): TFileName;
    /// how many file entries are currently in the internal list
    function Count: PtrInt;
    /// make a dynamic array copy of the internal file names, sorted by StrIComp
    function Files: TRawUtf8DynArray;
    /// allow to change the monitored folder at runtime
    property Folder: TFileName
      read fFolder write SetFolder;
    /// define if sub-folders should also be included to the internal list
    property SubFolders: boolean
      read fSubFolders write SetSubFolders;
    /// after how many seconds OnIdle() should flush the internal cache
    // - default is 60, i.e. 1 minute
    // - you can set 0 to disable any auto-flush from OnIdle()
    property FlushSeconds: integer
      read fFlushSeconds write fFlushSeconds;
  end;
{$endif OSPOSIX}


{ ************** Operating-System Independent Unicode Process }

/// UpperCase conversion of a UTF-8 buffer using our Unicode 10.0 tables
// - won't call the Operating System, so is consistent on all platforms,
// whereas UpperCaseUnicode() may vary depending on each library implementation
// - some codepoints enhance in length, so D^ should be at least twice than S^
// - any invalid input is replaced by UNICODE_REPLACEMENT_CHARACTER=$fffd
// - won't use temporary UTF-16 decoding, and optimized for plain ASCII content
function Utf8UpperReference(S, D: PUtf8Char): PUtf8Char; overload;

/// UpperCase conversion of a UTF-8 buffer using our Unicode 10.0 tables
// - won't call the Operating System, so is consistent on all platforms,
// whereas UpperCaseUnicode() may vary depending on each library implementation
// - some codepoints enhance in length, so D^ should be at least twice than S^
// - any invalid input is replaced by UNICODE_REPLACEMENT_CHARACTER=$fffd
// - won't use temporary UTF-16 decoding, and optimized for plain ASCII content
// - knowing the Source length, this function will handle any ASCII 7-bit input
// by quad, for efficiency
function Utf8UpperReference(S, D: PUtf8Char; SLen: PtrUInt): PUtf8Char; overload;

/// UpperCase conversion of a UTF-8 string using our Unicode 10.0 tables
// - won't call the Operating System, so is consistent on all platforms,
// whereas UpperCaseUnicode() may vary depending on each library implementation
// - won't use temporary UTF-16 decoding, and optimized for plain ASCII content
function UpperCaseReference(const S: RawUtf8): RawUtf8;

/// UTF-8 comparison using our Unicode 10.0 tables
// - this version expects u1 and u2 to be zero-terminated
// - Utf8IComp() handles WinAnsi CP-1252 latin accents - this one is Unicode
// - won't call the Operating System, so is consistent on all platforms, and
// don't require any temporary UTF-16 decoding
// - has a branchless optimized process of 7-bit ASCII charset [a..z] -> [A..Z]
function Utf8ICompReference(u1, u2: PUtf8Char): PtrInt;

/// UTF-8 comparison using our Unicode 10.0 tables
// - this version expects u1 and u2 not to be necessary zero-terminated, but
// uses L1 and L2 as length for u1 and u2 respectively
// - Utf8ILComp() handles WinAnsi CP-1252 latin accents - this one is Unicode
// - won't call the Operating System, so is consistent on all platforms, and
// don't require any temporary UTF-16 decoding
// - has a branchless optimized process of 7-bit ASCII charset [a..z] -> [A..Z]
function Utf8ILCompReference(u1, u2: PUtf8Char; L1, L2: integer): PtrInt;

/// UpperCase conversion of UTF-8 into UCS4 using our Unicode 10.0 tables
// - won't call the Operating System, so is consistent on all platforms,
// whereas UpperCaseUnicode() may vary depending on each library implementation
function UpperCaseUcs4Reference(const S: RawUtf8): RawUcs4;

/// UTF-8 Unicode 10.0 case-insensitive Pattern search within UTF-8 buffer
// - returns nil if no match, or the Pattern position found inside U^
// - Up should have been already converted using UpperCaseUcs4Reference()
// - won't call the Operating System, so is consistent on all platforms, and
// don't require any temporary UTF-16 decoding
function StrPosIReference(U: PUtf8Char; const Up: RawUcs4): PUtf8Char;


implementation


{ *************** UTF-8 Efficient Encoding / Decoding }

{ TUtf8Table }

function TUtf8Table.GetHighUtf8Ucs4(var U: PUtf8Char): Ucs4CodePoint;
var
  x, i: PtrInt;
  v: byte;
  c: PtrUInt;
begin
  result := 0;
  c := byte(U^); // here c=U^>=#80
  inc(U);
  x := Lookup[c];
  if x = UTF8_INVALID then
    exit;
  i := 0;
  repeat
    v := byte(U[i]);
    if v and $c0 <> $80 then
      exit; // invalid input content
    c := (c shl 6) + v;
    inc(i);
  until i = x;
  inc(U, x);
  with Extra[x] do
  begin
    dec(c, offset);
    if c < minimum then
      exit; // invalid input content
  end;
  result := c;
end;

function GetHighUtf8Ucs4(var U: PUtf8Char): Ucs4CodePoint;
begin
  result := UTF8_TABLE.GetHighUtf8Ucs4(U);
end;

function GetUtf8WideChar(P: PUtf8Char): cardinal;
begin
  if P <> nil then
  begin
    result := byte(P^);
    if result > $7f then
    begin
      result := UTF8_TABLE.GetHighUtf8Ucs4(P);
      if result > $ffff then
        // surrogates can't be stored in a single UTF-16 WideChar
        result := UNICODE_REPLACEMENT_CHARACTER;
    end;
  end
  else
    result := PtrUInt(P);
end;

function NextUtf8Ucs4(var P: PUtf8Char): Ucs4CodePoint;
begin
  if P <> nil then
  begin
    result := byte(P[0]);
    if result <= 127 then
      inc(P)
    else
      if result and $20 = 0 then
      begin
        // fast $0..$7ff process
        result := (result shl 6) + byte(P[1]) - $3080;
        inc(P, 2);
      end
      else
        // complex but efficient wrapper handling even UTF-16 surrogates
        result := UTF8_TABLE.GetHighUtf8Ucs4(P);
  end
  else
    result := 0;
end;

function Ucs4ToUtf8(ucs4: Ucs4CodePoint; Dest: PUtf8Char): PtrInt;
var
  j: PtrInt;
begin
  if ucs4 <= $7f then
  begin
    Dest^ := AnsiChar(ucs4);
    result := 1;
  end
  else if ucs4 <= $7ff then
  begin
    Dest[0] := AnsiChar($C0 or (ucs4 shr 6));
    Dest[1] := AnsiChar($80 or (ucs4 and $3F));
    result := 2;
  end
  else if ucs4 <= $ffff then
  begin
    Dest[0] := AnsiChar($E0 or (ucs4 shr 12));
    Dest[1] := AnsiChar($80 or ((ucs4 shr 6) and $3F));
    Dest[2] := AnsiChar($80 or (ucs4 and $3F));
    result := 3;
  end
  else
  begin
    // here ucs4 > $ffff
    if ucs4 <= $1FFFFF then
      result := 4
    else if ucs4 <= $3FFFFFF then
      result := 5
    else
      result := 6;
    j := result - 1;
    repeat
      Dest[j] := AnsiChar((ucs4 and $3f) or $80);
      ucs4 := ucs4 shr 6;
      dec(j);
    until j = 0;
    Dest^ := AnsiChar(byte(ucs4) or UTF8_TABLE.FirstByte[result]);
  end;
end;

function Utf16CharToUtf8(Dest: PUtf8Char; var Source: PWord): integer;
var
  c: cardinal;
begin
  c := Source^;
  inc(Source);
  case c of
    0..$7f:
      begin
        Dest^ := AnsiChar(c);
        result := 1;
        exit;
      end;
    UTF16_HISURROGATE_MIN..UTF16_HISURROGATE_MAX:
      begin
        c := ((c - UTF16_SURROGATE_OFFSET) shl 10) or
              (Source^ xor UTF16_LOSURROGATE_MIN);
        inc(Source);
      end;
    UTF16_LOSURROGATE_MIN..UTF16_LOSURROGATE_MAX:
      begin
        c := ((cardinal(Source^) - UTF16_SURROGATE_OFFSET) shl 10) or
              (c xor UTF16_LOSURROGATE_MIN);
        inc(Source);
      end;
  end;
  // now c is the UTF-32/UCS4 code point
  result := Ucs4ToUtf8(c, Dest);
end;

function RawUnicodeToUtf8(Dest: PUtf8Char; DestLen: PtrInt; Source: PWideChar;
  SourceLen: PtrInt; Flags: TCharConversionFlags): PtrInt;
var
  c: cardinal;
  Tail: PWideChar;
  i, j: integer;
label
  unmatch;
begin
  result := PtrInt(Dest);
  inc(DestLen, PtrInt(Dest));
  if (Source <> nil) and
     (SourceLen > 0) and
     (Dest <> nil) then
  begin
    // ignore any trailing BOM (do exist on Windows files)
    if Source^ = BOM_UTF16LE then
    begin
      inc(Source);
      dec(SourceLen);
    end;
    // first handle 7-bit ASCII WideChars, by pairs (Sha optimization)
    SourceLen := SourceLen * 2 + PtrInt(PtrUInt(Source));
    Tail := PWideChar(SourceLen) - 2;
    if (PtrInt(PtrUInt(Dest)) < DestLen) and
       (Source <= Tail) then
      repeat
        c := PCardinal(Source)^;
        if c and $ff80ff80 <> 0 then
          break; // break on first non ASCII pair
        inc(Source, 2);
        c := c shr 8 or c;
        PWord(Dest)^ := c;
        inc(Dest, 2);
      until (Source > Tail) or
            (PtrInt(PtrUInt(Dest)) >= DestLen);
    // generic loop, handling one UCS4 CodePoint per iteration
    if (PtrInt(PtrUInt(Dest)) < DestLen) and
       (PtrInt(PtrUInt(Source)) < SourceLen) then
      repeat
      // inlined Utf16CharToUtf8() with bufferoverlow check and $FFFD on unmatch
        c := cardinal(Source^);
        inc(Source);
        case c of
          0..$7f:
            begin
              Dest^ := AnsiChar(c);
              inc(Dest);
              if (PtrInt(PtrUInt(Dest)) < DestLen) and
                 (PtrInt(PtrUInt(Source)) < SourceLen) then
                continue
              else
                break;
            end;
          UTF16_HISURROGATE_MIN..UTF16_HISURROGATE_MAX:
            if (PtrInt(PtrUInt(Source)) >= SourceLen) or
               ((cardinal(Source^) < UTF16_LOSURROGATE_MIN) or
                (cardinal(Source^) > UTF16_LOSURROGATE_MAX)) then
            begin
unmatch:      if (PtrInt(PtrUInt(@Dest[3])) > DestLen) or
                 not (ccfReplacementCharacterForUnmatchedSurrogate in Flags) then
                break;
              PWord(Dest)^ := $BFEF; // UTF-8 UNICODE_REPLACEMENT_CHARACTER
              Dest[2] := AnsiChar($BD);
              inc(Dest, 3);
              if (PtrInt(PtrUInt(Dest)) < DestLen) and
                 (PtrInt(PtrUInt(Source)) < SourceLen) then
                continue
              else
                break;
            end
            else
            begin
              c := ((c - UTF16_SURROGATE_OFFSET) shl 10) or
                   (cardinal(Source^) xor UTF16_LOSURROGATE_MIN);
              inc(Source);
            end;
          UTF16_LOSURROGATE_MIN..UTF16_LOSURROGATE_MAX:
            if (PtrInt(PtrUInt(Source)) >= SourceLen) or
               ((cardinal(Source^) < UTF16_HISURROGATE_MIN) or
                (cardinal(Source^) > UTF16_HISURROGATE_MAX)) then
              goto unmatch
            else
            begin
              c := ((cardinal(Source^) - UTF16_SURROGATE_OFFSET) shl 10) or
                   (c xor UTF16_LOSURROGATE_MIN);
              inc(Source);
            end;
        end; // now c is the UTF-32/UCS4 code point
        if c <= $7ff then
          i := 2
        else if c <= $ffff then
          i := 3
        else if c <= $1FFFFF then
          i := 4
        else if c <= $3FFFFFF then
          i := 5
        else
          i := 6;
        if PtrInt(PtrUInt(Dest)) + i > DestLen then
          break;
        j := i - 1;
        repeat
          Dest[j] := AnsiChar((c and $3f) or $80);
          c := c shr 6;
          dec(j);
        until j = 0;
        Dest^ := AnsiChar(byte(c) or UTF8_TABLE.FirstByte[i]);
        inc(Dest, i);
        if (PtrInt(PtrUInt(Dest)) < DestLen) and
           (PtrInt(PtrUInt(Source)) < SourceLen) then
          continue
        else
          break;
      until false;
    if not (ccfNoTrailingZero in Flags) then
      Dest^ := #0;
  end;
  result := PtrInt(PtrUInt(Dest)) - result;
end;

procedure RawUnicodeToUtf8(WideChar: PWideChar; WideCharCount: integer;
  var result: TSynTempBuffer; Flags: TCharConversionFlags);
begin
  if (WideChar = nil) or
     (WideCharCount = 0) then
    result.Init(0)
  else
  begin
    result.Init(WideCharCount * 3);
    result.Len := RawUnicodeToUtf8(
      result.buf, result.len, WideChar, WideCharCount, Flags);
  end;
end;

procedure RawUnicodeToUtf8(WideChar: PWideChar; WideCharCount: integer;
  var result: RawUtf8; Flags: TCharConversionFlags);
var
  tmp: TSynTempBuffer;
begin
  RawUnicodeToUtf8(WideChar, WideCharCount, tmp, Flags);
  FastSetString(result, tmp.buf, tmp.len);
  tmp.Done;
end;

function RawUnicodeToUtf8(WideChar: PWideChar; WideCharCount: integer;
  Flags: TCharConversionFlags): RawUtf8;
begin
  RawUnicodeToUtf8(WideChar, WideCharCount, result, Flags);
end;

function RawUnicodeToUtf8(WideChar: PWideChar; WideCharCount: integer;
  out Utf8Length: integer): RawUtf8;
var
  LW: integer;
begin
  result := ''; // somewhat faster if result is freed before any SetLength()
  if WideCharCount = 0 then
    exit;
  LW := WideCharCount * 3; // maximum resulting length
  SetLength(result, LW);
  Utf8Length := RawUnicodeToUtf8(pointer(result), LW + 1,
    WideChar, WideCharCount, [ccfNoTrailingZero]);
  if Utf8Length <= 0 then
    result := '';
end;

procedure Utf8ToShortString(var dest: ShortString; source: PUtf8Char);
var
  c: cardinal;
  len, extra, i: integer;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
begin
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  len := 0;
  if source <> nil then
    repeat
      c := byte(source^);
      inc(source);
      if c = 0 then
        break
      else if c <= 127 then
      begin
        inc(len);
        dest[len] := AnsiChar(c);
        if len < 253 then
          continue
        else
          break;
      end
      else
      begin
        extra := utf8.Lookup[c];
        if extra = UTF8_INVALID then
          break;
        i := extra;
        repeat
          if byte(source^) and $c0 <> $80 then
          begin
            dest[0] := AnsiChar(len);
            exit; // invalid UTF-8 content
          end;
          c := (c shl 6) + byte(source^);
          inc(source);
          dec(i);
        until i = 0;
        dec(c, utf8.Extra[extra].offset);
        // #256.. -> slower but accurate conversion
        inc(len);
        if c > $ffff then
          dest[len] := '?'
        else
          dest[len] := AnsiChar(WinAnsiConvert.fWideToAnsi[c]);
        if len < 253 then
          continue
        else
          break;
      end;
    until false;
  dest[0] := AnsiChar(len);
end;

function Utf8ToWideChar(dest: PWideChar; source: PUtf8Char;
  MaxDestChars, sourceBytes: PtrInt; NoTrailingZero: boolean): PtrInt;
// faster than System.Utf8ToUnicode()
var
  c: cardinal;
  begd: PWideChar;
  endSource: PUtf8Char;
  endDest: PWideChar;
  i, extra: integer;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  Quit, NoSource;
begin
  result := 0;
  if dest = nil then
    exit;
  if source = nil then
    goto NoSource;
  if sourceBytes = 0 then
  begin
    if source^ = #0 then
      goto NoSource;
    sourceBytes := StrLen(source);
  end;
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  endSource := source + sourceBytes;
  endDest := dest + MaxDestChars;
  begd := dest;
  repeat
    c := byte(source^);
    inc(source);
    if c <= 127 then
    begin
      PWord(dest)^ := c; // much faster than dest^ := WideChar(c) for FPC
      inc(dest);
      if (source < endSource) and
         (dest < endDest) then
        continue
      else
        break;
    end;
    extra := utf8.Lookup[c];
    if (extra = UTF8_INVALID) or
       (source + extra > endSource) then
      break;
    i := 0;
    repeat
      if byte(source^) and $c0 <> $80 then
        goto Quit; // invalid input content
      c := (c shl 6) + byte(source[i]);
      inc(i);
    until i = extra;
    inc(source, extra);
    with utf8.Extra[extra] do
    begin
      dec(c, offset);
      if c < minimum then
        break; // invalid input content
    end;
    if c <= $ffff then
    begin
      PWord(dest)^ := c;
      inc(dest);
      if (source < endSource) and
         (dest < endDest) then
        continue
      else
        break;
    end;
    dec(c, $10000); // store as UTF-16 surrogates
    PWordArray(dest)[0] := (c shr 10) or UTF16_HISURROGATE_MIN;
    PWordArray(dest)[1] := (c and $3FF) or UTF16_LOSURROGATE_MIN;
    inc(dest, 2);
    if (source >= endSource) or
       (dest >= endDest) then
      break;
  until false;
Quit:
  result := PtrUInt(dest) - PtrUInt(begd); // dest-begd return byte length
NoSource:
  if not NoTrailingZero then
    dest^ := #0; // always append a WideChar(0) to the end of the buffer
end;

function Utf8ToWideChar(dest: PWideChar; source: PUtf8Char; sourceBytes: PtrInt;
  NoTrailingZero: boolean): PtrInt;
// faster than System.UTF8Decode()
var
  c: cardinal;
  begd: PWideChar;
  endSource, endSourceBy4: PUtf8Char;
  i, extra: PtrInt;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  quit, nosource, by1, by4;
begin
  result := 0;
  if dest = nil then
    exit;
  if source = nil then
    goto nosource;
  if sourceBytes = 0 then
  begin
    if source^ = #0 then
      goto nosource;
    sourceBytes := StrLen(source);
  end;
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  begd := dest;
  endSource := source + sourceBytes;
  endSourceBy4 := endSource - 4;
  if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(source) and 3 = 0) and{$endif}
     (source <= endSourceBy4) then
    repeat // handle 7-bit ASCII chars, by quad
      c := PCardinal(source)^;
      if c and $80808080 <> 0 then
        goto by1; // break on first non ASCII quad
by4:  inc(source, 4);
      PCardinal(dest)^ := (c shl 8 or (c and $FF)) and $00ff00ff;
      c := c shr 16;
      PCardinal(dest + 2)^ := (c shl 8 or c) and $00ff00ff;
      inc(dest, 4);
    until source > endSourceBy4;
  if source < endSource then
    repeat
by1:  c := byte(source^);
      inc(source);
      if c <= 127 then
      begin
        PWord(dest)^ := c; // much faster than dest^ := WideChar(c) for FPC
        inc(dest);
        if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(source) and 3 = 0) and{$endif}
           (source <= endSourceBy4) then
        begin
          c := PCardinal(source)^;
          if c and $80808080 = 0 then
            goto by4
          else
            continue;
        end;
        if source < endSource then
          continue
        else
          break;
      end;
      extra := utf8.Lookup[c];
      if (extra = UTF8_INVALID) or
         (source + extra > endSource) then
        break;
      i := 0;
      repeat
        if byte(source^) and $c0 <> $80 then
          goto quit; // invalid input content
        c := (c shl 6) + byte(source[i]);
        inc(i);
      until i = extra;
      inc(source, extra);
      with utf8.Extra[extra] do
      begin
        dec(c, offset);
        if c < minimum then
          break; // invalid input content
      end;
      if c <= $ffff then
      begin
        PWord(dest)^ := c;
        inc(dest);
        if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(source) and 3 = 0) and{$endif}
           (source <= endSourceBy4) then
        begin
          c := PCardinal(source)^;
          if c and $80808080 = 0 then
            goto by4;
          continue;
        end;
        if source < endSource then
          continue
        else
          break;
      end;
      dec(c, $10000); // store as UTF-16 surrogates
      PWordArray(dest)[0] := (c shr 10) or UTF16_HISURROGATE_MIN;
      PWordArray(dest)[1] := (c and $3FF) or UTF16_LOSURROGATE_MIN;
      inc(dest, 2);
      if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(source) and 3 = 0) and{$endif}
         (source <= endSourceBy4) then
      begin
        c := PCardinal(source)^;
        if c and $80808080 = 0 then
          goto by4;
        continue;
      end;
      if source >= endSource then
        break;
    until false;
quit:
  result := PtrUInt(dest) - PtrUInt(begd); // dest-begd returns bytes length
nosource:
  if not NoTrailingZero then
    dest^ := #0; // always append a WideChar(0) to the end of the buffer
end;

function IsValidUtf8Pas(source: PUtf8Char; sourcelen: PtrInt): boolean;
var
  c: byte;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  done;
begin
  inc(PtrUInt(sourcelen), PtrUInt(source) - 4);
  if source = nil then
    goto done;
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  repeat
    if PtrUInt(source) <= PtrUInt(sourcelen) then
    begin
      if utf8.Lookup[ord(source[0])] = UTF8_ASCII then
        if utf8.Lookup[ord(source[1])] = UTF8_ASCII then
          if utf8.Lookup[ord(source[2])] = UTF8_ASCII then
            if utf8.Lookup[ord(source[3])] = UTF8_ASCII then
            begin
              inc(source, 4); // optimized for JSON-like content
              continue;
            end
            else
              inc(source, 3)
          else
            inc(source, 2)
        else
          inc(source);
    end
    else if PtrUInt(source) >= PtrUInt(sourcelen) + 4 then
      break;
    c := utf8.Lookup[ord(source^)];
    inc(source);
    if c = UTF8_ASCII then
      continue
    else if c >= UTF8_INVALID then
      // UTF8_INVALID=6, UTF8_ZERO=7 means unexpected end of input
      break;
    // c = extras -> check valid UTF-8 content
    repeat
      if byte(source^) and $c0 <> $80 then
        goto done;
      inc(source);
      dec(c);
    until c = 0;
  until false;
done:
  result := PtrUInt(source) = PtrUInt(sourcelen) + 4;
end;

function IsValidUtf8(source: PUtf8Char): boolean;
begin
  result := IsValidUtf8Buffer(source, StrLen(source));
end;

function IsValidUtf8(const source: RawUtf8): boolean;
begin
  result := IsValidUtf8Buffer(pointer(source), length(source));
end;

procedure DetectRawUtf8(var source: RawByteString);
begin
  {$ifdef HASCODEPAGE} // do nothing on oldest Delphi
  if (source <> '') and
     IsValidUtf8(source) then
    EnsureRawUtf8(source);
  {$endif HASCODEPAGE}
end;

function IsValidUtf8WithoutControlChars(source: PUtf8Char): boolean;
var
  c: byte;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
begin
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  result := false;
  if source <> nil then
    repeat
      c := byte(source^);
      inc(source);
      if c <= $7f then
        if c < 32 then
          if c = 0 then
            break // reached end of input
          else
            exit // disallow #1..#31 control char
        else
         continue;
      c := utf8.Lookup[c];
      if c = UTF8_INVALID then
        exit;
      // check valid UTF-8 content
      repeat
        if byte(source^) and $c0 <> $80 then
          exit;
        inc(source);
        dec(c);
      until c = 0;
    until false;
  result := true;
end;

function IsValidUtf8WithoutControlChars(const source: RawUtf8): boolean;
var
  s, len: PtrInt;
  c: byte;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
begin
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  result := false;
  s := 1;
  len := Length(source);
  while s <= len do
  begin
    c := byte(source[s]);
    inc(s);
    if c < 32 then
      exit // disallow #0..#31 control char
    else if c > $7f then
    begin
      c := utf8.Lookup[c];
      if c = UTF8_INVALID then
        exit;
      // check valid UTF-8 content
      repeat
        if byte(source[s]) and $c0 <> $80 then
          exit;
        inc(s);
        dec(c);
      until c = 0;
    end;
  end;
  result := true;
end;

function Utf8ToUnicodeLength(source: PUtf8Char): PtrUInt;
var
  c: byte;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
begin
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  result := 0;
  if source <> nil then
    repeat
      c := utf8.Lookup[byte(source^)];
      inc(source);
      if c = UTF8_ASCII then
        inc(result)
      else if c = UTF8_ZERO then
        break
      else if c = UTF8_INVALID then
        exit
      else
      begin
        inc(result, 1 + ord(c >= UTF8_EXTRA_SURROGATE));
        // check valid UTF-8 content
        repeat
          if byte(source^) and $c0 <> $80 then
            exit;
          inc(source);
          dec(c);
        until c = 0;
      end;
    until false;
end;

function Utf8TruncateToUnicodeLength(var text: RawUtf8; maxUtf16: integer): boolean;
var
  c: byte;
  source: PUtf8Char;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  trunc;
begin
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  source := pointer(text);
  if (source <> nil) and
     (cardinal(maxUtf16) < cardinal(Length(text))) then
    repeat
      c := utf8.Lookup[byte(source^)];
      inc(source);
      if c = UTF8_ASCII then
      begin
        dec(maxUtf16);
        if maxUtf16 <> 0 then
          continue;
trunc:  SetLength(text, source - pointer(text));
        result := true;
        exit;
      end
      else if (c = UTF8_ZERO) or
              (c = UTF8_INVALID) then
        break
      else
      begin
        dec(maxUtf16, 1 + ord(c >= UTF8_EXTRA_SURROGATE));
        if maxUtf16 < 0 then
          goto trunc; // not enough place for this UTF-8 codepoint
        // check valid UTF-8 content
        repeat
          if byte(source^) and $c0 <> $80 then
            break;
          inc(source);
          dec(c);
        until c = 0;
        if maxUtf16 = 0 then
          goto trunc;
      end;
    until false;
  result := false;
end;

function Utf8TruncateToLength(var text: RawUtf8; maxBytes: PtrUInt): boolean;
begin
  if PtrUInt(Length(text)) < maxBytes then
  begin
    result := false;
    exit; // nothing to truncate
  end;
  while (maxBytes > 0) and
        (ord(text[maxBytes]) and $c0 = $80) do
    dec(maxBytes);
  if (maxBytes > 0) and
     (text[maxBytes] > #$7f) then
    dec(maxBytes);
  SetLength(text, maxBytes);
  result := true;
end;

function Utf8TruncatedLength(const text: RawUtf8; maxBytes: PtrUInt): PtrInt;
begin
  result := Length(text);
  if PtrUInt(result) < maxBytes then
    exit;
  result := maxBytes;
  if (result = 0) or
     (text[result] <= #$7f) then
    exit; 
  while (result > 0) and
        (ord(text[result]) and $c0 = $80) do
    dec(result);
  if (result > 0) and
     (text[result] > #$7f) then
    dec(result);
end;

function Utf8TruncatedLength(text: PAnsiChar; textlen, maxBytes: PtrUInt): PtrInt;
begin
  result := textlen;
  if textlen < maxBytes then
    exit;
  result := maxBytes;
  while (result > 0) and
        (ord(text[result]) and $c0 = $80) do
    dec(result);
  if (result > 0) and
     (text[result] > #$7f) then
    dec(result);
end;

function Utf8FirstLineToUtf16Length(source: PUtf8Char): PtrInt;
var
  c: PtrUInt;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
begin
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  result := 0;
  if source <> nil then
    repeat
      c := byte(source^);
      inc(source);
      if c <= $7f then
        if byte(c) in [0, 10, 13] then
          break // #0, #10 or #13 stop the count
        else
          inc(result)
      else
      begin
        c := utf8.Lookup[c];
        if c = UTF8_INVALID then
          exit; // invalid leading byte
        inc(result, 1 + ord(c >= UTF8_EXTRA_SURROGATE));
        inc(source, c); // a bit less safe, but faster
      end;
    until false;
end;



{ **************** UTF-8 / Unicode / Ansi Conversion Classes }

var
  // internal list of TSynAnsiConvert instances
  SynAnsiConvertList: array of TSynAnsiConvert;
  SynAnsiConvertListLock: TRWLightLock;
  SynAnsiConvertListCount: integer;
  SynAnsiConvertListCodePage: TWordDynArray; // for fast lookup in CPU L1 cache


{ TSynAnsiConvert }

function TSynAnsiConvert.AnsiBufferToUnicode(Dest: PWideChar;
  Source: PAnsiChar; SourceChars: cardinal; NoTrailingZero: boolean): PWideChar;
var
  c: cardinal;
begin
  // first handle trailing 7-bit ASCII chars, by quad (Sha optimization)
  if SourceChars >= 4 then
    repeat
      c := PCardinal(Source)^;
      if c and $80808080 <> 0 then
        break; // break on first non ASCII quad
      dec(SourceChars, 4);
      inc(Source, 4);
      PCardinal(Dest)^ := (c shl 8 or (c and $FF)) and $00ff00ff;
      c := c shr 16;
      PCardinal(Dest + 2)^ := (c shl 8 or c) and $00ff00ff;
      inc(Dest, 4);
    until SourceChars < 4;
  if (SourceChars > 0) and
     (ord(Source^) < 128) then
    repeat
      dec(SourceChars);
      PWord(Dest)^ := ord(Source^); // faster than dest^ := WideChar(c) on FPC
      inc(Source);
      inc(Dest);
    until (SourceChars = 0) or
          (ord(Source^) >= 128);
  if SourceChars > 0 then
    // rely on the Operating System for all remaining ASCII characters
    inc(Dest,
      Unicode_AnsiToWide(Source, Dest, SourceChars, SourceChars, fCodePage));
  if not NoTrailingZero then
    Dest^ := #0;
  result := Dest;
end;

function TSynAnsiConvert.AnsiBufferToUtf8(Dest: PUtf8Char; Source: PAnsiChar;
  SourceChars: cardinal; NoTrailingZero: boolean): PUtf8Char;
var
  tmp: TSynTempBuffer;
  c: cardinal;
  U: PWideChar;
begin
  // first handle trailing 7-bit ASCII chars, by quad (Sha optimization)
  if SourceChars >= 4 then
    repeat
      c := PCardinal(Source)^;
      if c and $80808080 <> 0 then
        break; // break on first non ASCII quad
      PCardinal(Dest)^ := c;
      dec(SourceChars, 4);
      inc(Source, 4);
      inc(Dest, 4);
    until SourceChars < 4;
  if (SourceChars > 0) and
     (ord(Source^) < 128) then
    repeat
      Dest^ := Source^;
      dec(SourceChars);
      inc(Source);
      inc(Dest);
    until (SourceChars = 0) or
          (ord(Source^) >= 128);
  // rely on the Operating System for all remaining ASCII characters
  if SourceChars = 0 then
    result := Dest
  else
  begin
    U := AnsiBufferToUnicode(tmp.Init(SourceChars * 3), Source, SourceChars);
    result := Dest + RawUnicodeToUtf8(Dest, SourceChars * 3, tmp.buf,
      (PtrUInt(U) - PtrUInt(tmp.buf)) shr 1, [ccfNoTrailingZero]);
    tmp.Done;
  end;
  if not NoTrailingZero then
    result^ := #0;
end;

// UTF-8 is AT MOST 50% bigger than UTF-16 in bytes in range U+0800..U+FFFF
// see http://stackoverflow.com/a/7008095 -> bytes=WideCharCount*3 below

{$ifndef PUREMORMOT2}

function TSynAnsiConvert.AnsiToRawUnicode(const AnsiText: RawByteString): RawUnicode;
begin
  result := AnsiToRawUnicode(pointer(AnsiText), length(AnsiText));
end;

function TSynAnsiConvert.AnsiToRawUnicode(Source: PAnsiChar;
  SourceChars: cardinal): RawUnicode;
var
  U: PWideChar;
  tmp: TSynTempBuffer;
begin
  if SourceChars = 0 then
    result := ''
  else
  begin
    U := AnsiBufferToUnicode(tmp.Init(SourceChars * 2), Source, SourceChars);
    U^ := #0;
    SetString(result, PAnsiChar(tmp.buf), PtrUInt(U) - PtrUInt(tmp.buf) + 1);
    tmp.Done;
  end;
end;

{$endif PUREMORMOT2}

function TSynAnsiConvert.AnsiToUnicodeString(Source: PAnsiChar;
  SourceChars: cardinal): SynUnicode;
var
  tmp: TSynTempBuffer;
  U: PWideChar;
begin
  if SourceChars = 0 then
    result := ''
  else
  begin
    U := AnsiBufferToUnicode(tmp.Init(SourceChars * 2), Source, SourceChars);
    SetString(result, PWideChar(tmp.buf), (PtrUInt(U) - PtrUInt(tmp.buf)) shr 1);
    tmp.Done;
  end;
end;

function TSynAnsiConvert.AnsiToUnicodeString(const Source: RawByteString): SynUnicode;
var
  tmp: TSynTempBuffer;
  U: PWideChar;
begin
  if Source = '' then
    result := ''
  else
  begin
    tmp.Init(length(Source) * 2); // max dest size in bytes
    U := AnsiBufferToUnicode(tmp.buf, pointer(Source), length(Source));
    SetString(result, PWideChar(tmp.buf), (PtrUInt(U) - PtrUInt(tmp.buf)) shr 1);
    tmp.Done;
  end;
end;

function TSynAnsiConvert.AnsiToUtf8(const AnsiText: RawByteString): RawUtf8;
begin
  AnsiBufferToRawUtf8(pointer(AnsiText), length(AnsiText), result);
end;

procedure TSynAnsiConvert.AnsiBufferToRawUtf8(Source: PAnsiChar;
  SourceChars: cardinal; out Value: RawUtf8);
var
  tmp: TSynTempBuffer;
  P: PUtf8Char;
begin
  if (Source = nil) or
     (SourceChars = 0) then
    exit;
  P := AnsiBufferToUtf8(tmp.Init(SourceChars * 3), Source, SourceChars);
  FastSetString(Value, tmp.buf, P - tmp.buf);
  tmp.Done;
end;

constructor TSynAnsiConvert.Create(aCodePage: cardinal);
begin
  fCodePage := aCodePage;
  fAnsiCharShift := 1; // default is safe
end;

function GetEngine(aCodePage: cardinal): TSynAnsiConvert;
  {$ifdef HASINLINE} inline; {$endif}
var
  i: PtrInt;
begin
  SynAnsiConvertListLock.ReadLock; // concurrent read lock
  i := WordScanIndex(pointer(SynAnsiConvertListCodePage),
    SynAnsiConvertListCount, aCodePage); // SSE2 asm on i386 and x86_64
  if i >= 0 then
    result := SynAnsiConvertList[i]
  else
    result := nil;
  SynAnsiConvertListLock.ReadUnLock;
end;

function NewEngine(aCodePage: cardinal): TSynAnsiConvert;
var
  i: PtrInt;
begin
  SynAnsiConvertListLock.WriteLock;
  try
    i := WordScanIndex(pointer(SynAnsiConvertListCodePage),
      SynAnsiConvertListCount, aCodePage); // search again for thread safety
    if i >= 0 then
    begin
      result := SynAnsiConvertList[i]; // avoid any (unlikely) race condition
      exit;
    end;
    if aCodePage = CP_UTF8 then
      result := TSynAnsiUtf8.Create(CP_UTF8)
    else if aCodePage = CP_UTF16 then
      result := TSynAnsiUtf16.Create(CP_UTF16)
    else if IsFixedWidthCodePage(aCodePage) then
      result := TSynAnsiFixedWidth.Create(aCodePage)
    else
      result := TSynAnsiConvert.Create(aCodePage);
    RegisterGlobalShutdownRelease(result);
    ObjArrayAdd(SynAnsiConvertList, result);
    AddWord(SynAnsiConvertListCodePage, SynAnsiConvertListCount, aCodePage);
  finally
    SynAnsiConvertListLock.WriteUnLock;
  end;
end;

class function TSynAnsiConvert.Engine(aCodePage: cardinal): TSynAnsiConvert;
begin
  if aCodePage <> CP_ACP then
  begin
    result := GetEngine(aCodePage);
    if result = nil then
      if aCodePage = CP_RAWBLOB then
        result := RawByteStringConvert // CP_RAWBLOB is internal -> no engine
      else
        result := NewEngine(aCodePage)
  end
  else
    result := CurrentAnsiConvert;
end;

function TSynAnsiConvert.UnicodeBufferToAnsi(Dest: PAnsiChar;
  Source: PWideChar; SourceChars: cardinal): PAnsiChar;
var
  c: cardinal;
begin
  if (Source <> nil) and
     (SourceChars <> 0) then
  begin
    // ignore any trailing BOM (do exist on Windows files)
    if Source^ = BOM_UTF16LE then
    begin
      inc(Source);
      dec(SourceChars);
    end;
    // first handle trailing 7-bit ASCII chars, by pairs (Sha optimization)
    if SourceChars >= 2 then
      repeat
        c := PCardinal(Source)^;
        if c and $ff80ff80 <> 0 then
          break; // break on first non ASCII pair
        dec(SourceChars, 2);
        inc(Source, 2);
        c := c shr 8 or c;
        PWord(Dest)^ := c;
        inc(Dest, 2);
      until SourceChars < 2;
    if (SourceChars > 0) and
       (ord(Source^) < 128) then
      repeat
        Dest^ := AnsiChar(ord(Source^));
        dec(SourceChars);
        inc(Source);
        inc(Dest);
      until (SourceChars = 0) or
            (ord(Source^) >= 128);
    // rely on the Operating System for all remaining ASCII characters
    if SourceChars <> 0 then
      inc(Dest,
        Unicode_WideToAnsi(Source, Dest, SourceChars, SourceChars * 3, fCodePage));
  end;
  result := Dest;
end;

function TSynAnsiConvert.Utf8BufferToAnsi(Dest: PAnsiChar;
  Source: PUtf8Char; SourceChars: cardinal): PAnsiChar;
var
  tmp: TSynTempBuffer;
begin
  if (Source = nil) or
     (SourceChars = 0) then
    result := Dest
  else
  begin
    tmp.Init((SourceChars + 1) shl fAnsiCharShift);
    result := UnicodeBufferToAnsi(Dest, tmp.buf,
      Utf8ToWideChar(tmp.buf, Source, SourceChars) shr 1);
    tmp.Done;
  end;
end;

function TSynAnsiConvert.Utf8BufferToAnsi(
  Source: PUtf8Char; SourceChars: cardinal): RawByteString;
begin
  Utf8BufferToAnsi(Source, SourceChars, result);
end;

procedure TSynAnsiConvert.Utf8BufferToAnsi(Source: PUtf8Char; SourceChars: cardinal;
  var result: RawByteString);
var
  tmp: array[word] of AnsiChar;
  max: PtrInt;
begin
  if (Source = nil) or
     (SourceChars = 0) then
    result := ''
  else
  begin
    max := (SourceChars + 1) shl fAnsiCharShift;
    if max < SizeOf(tmp) then
      // use a temporary stack buffer up to 64KB
      FastSetStringCP(result, @tmp,
        Utf8BufferToAnsi(@tmp, Source, SourceChars) - PAnsiChar(@tmp), fCodePage)
    else
    begin
      // huge strings will be allocated once and truncated, not resized
      FastSetStringCP(result, nil, max, fCodePage);
      FakeLength(result,
        Utf8BufferToAnsi(pointer(result), Source, SourceChars) - pointer(result));
    end;
  end;
end;

function TSynAnsiConvert.Utf8ToAnsi(const u: RawUtf8): RawByteString;
begin
  Utf8BufferToAnsi(pointer(u), length(u), result);
end;

function TSynAnsiConvert.Utf8ToAnsiBuffer2K(const S: RawUtf8;
  Dest: PAnsiChar; DestSize: integer): integer;
var
  tmp: array[0..2047] of AnsiChar; // truncated to 2KB as documented
begin
  if (DestSize <= 0) or
     (Dest = nil) then
  begin
    result := 0;
    exit;
  end;
  result := length(S);
  if result > 0 then
  begin
    if result > SizeOf(tmp) then
      result := SizeOf(tmp);
    result := Utf8BufferToAnsi(tmp{%H-}, pointer(S), result) - {%H-}tmp;
    if result >= DestSize then
      result := DestSize - 1;
    MoveFast(tmp, Dest^, result);
  end;
  Dest[result] := #0;
end;

function TSynAnsiConvert.UnicodeBufferToAnsi(Source: PWideChar;
  SourceChars: cardinal): RawByteString;
var
  tmp: TSynTempBuffer;
begin
  if (Source = nil) or
     (SourceChars = 0) then
    result := ''
  else
  begin
    tmp.Init(SourceChars * 3);
    FastSetStringCP(result, tmp.buf, UnicodeBufferToAnsi(
      tmp.buf, Source, SourceChars) - PAnsiChar(tmp.buf), fCodePage);
    tmp.Done;
  end;
end;

function TSynAnsiConvert.UnicodeStringToAnsi(const Source: SynUnicode): RawByteString;
begin
  result := UnicodeBufferToAnsi(pointer(Source), length(Source));
end;

{$ifndef PUREMORMOT2}
function TSynAnsiConvert.RawUnicodeToAnsi(const Source: RawUnicode): RawByteString;
begin
  result := UnicodeBufferToAnsi(pointer(Source), length(Source) shr 1);
end;
{$endif PUREMORMOT2}

function TSynAnsiConvert.AnsiToAnsi(From: TSynAnsiConvert;
  const Source: RawByteString): RawByteString;
begin
  if From = self then
    result := Source
  else
    result := AnsiToAnsi(From, pointer(Source), length(Source));
end;

function TSynAnsiConvert.AnsiToAnsi(From: TSynAnsiConvert;
  Source: PAnsiChar; SourceChars: cardinal): RawByteString;
var
  tmp: TSynTempBuffer;
  U: PWideChar;
begin
  if From.fCodePage = fCodePage then
    FastSetStringCP(result, Source, SourceChars, fCodePage)
  else if (Source = nil) or
          (SourceChars = 0) then
    result := ''
  else
  begin
    U := tmp.Init(SourceChars * 2 + 2);
    result := UnicodeBufferToAnsi(U,
      From.AnsiBufferToUnicode(U, Source, SourceChars) - U);
    tmp.Done;
  end;
end;


{ TSynAnsiFixedWidth }

function TSynAnsiFixedWidth.AnsiBufferToUnicode(Dest: PWideChar;
  Source: PAnsiChar; SourceChars: cardinal; NoTrailingZero: boolean): PWideChar;
var
  i: integer;
  tab: PWordArray;
begin
  // PWord*(Dest)[] is much faster than dest^ := WideChar(c) for FPC
  tab := pointer(fAnsiToWide);
  for i := 1 to SourceChars shr 2 do
  begin
    PWordArray(Dest)[0] := tab[Ord(Source[0])];
    PWordArray(Dest)[1] := tab[Ord(Source[1])];
    PWordArray(Dest)[2] := tab[Ord(Source[2])];
    PWordArray(Dest)[3] := tab[Ord(Source[3])];
    inc(Source, 4);
    inc(Dest, 4);
  end;
  for i := 1 to SourceChars and 3 do
  begin
    PWord(Dest)^ := tab[Ord(Source^)];
    inc(Dest);
    inc(Source);
  end;
  if not NoTrailingZero then
    Dest^ := #0;
  result := Dest;
end;

function TSynAnsiFixedWidth.AnsiBufferToUtf8(Dest: PUtf8Char;
  Source: PAnsiChar; SourceChars: cardinal; NoTrailingZero: boolean): PUtf8Char;
var
  EndSource, EndSourceBy4: PAnsiChar;
  c: cardinal;
label
  by4, by1; // ugly but faster
begin
  if (self = nil) or
     (Dest = nil) then
  begin
    result := nil;
    Exit;
  end
  else if (Source <> nil) and
          (SourceChars > 0) then
  begin
    // handle 7-bit ASCII WideChars, by quads
    EndSource := Source + SourceChars;
    EndSourceBy4 := EndSource - 4;
    if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(Source) and 3 = 0) and{$endif}
       (Source <= EndSourceBy4) then
      repeat
        c := PCardinal(Source)^;
        if c and $80808080 <> 0 then
          goto by1; // break on first non ASCII quad
by4:    inc(Source, 4);
        PCardinal(Dest)^ := c;
        inc(Dest, 4);
      until Source > EndSourceBy4;
    // generic loop, handling one WideChar per iteration
    if Source < EndSource then
      repeat
by1:    c := byte(Source^);
        inc(Source);
        if c <= $7F then
        begin
          Dest^ := AnsiChar(c); // 0..127 don't need any translation
          Inc(Dest);
          if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(Source) and 3 = 0) and{$endif}
             (Source <= EndSourceBy4) then
         begin
           c := PCardinal(Source)^;
           if c and $80808080 = 0 then
             goto by4;
           continue;
         end;
          if Source < EndSource then
            continue
          else
            break;
        end
        else
        begin
          // no surrogate is expected in TSynAnsiFixedWidth charsets
          c := fAnsiToWide[c]; // convert FixedAnsi char into Unicode char
          if c > $7ff then
          begin
            Dest[0] := AnsiChar($E0 or (c shr 12));
            Dest[1] := AnsiChar($80 or ((c shr 6) and $3F));
            Dest[2] := AnsiChar($80 or (c and $3F));
            Inc(Dest, 3);
            if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(Source) and 3 = 0) and{$endif}
               (Source <= EndSourceBy4) then
            begin
              c := PCardinal(Source)^;
              if c and $80808080 = 0 then
                goto by4;
              continue;
            end;
            if Source < EndSource then
              continue
            else
              break;
          end
          else
          begin
            Dest[0] := AnsiChar($C0 or (c shr 6));
            Dest[1] := AnsiChar($80 or (c and $3F));
            Inc(Dest, 2);
            if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(Source) and 3 = 0) and{$endif}
               (Source < EndSourceBy4) then
            begin
              c := PCardinal(Source)^;
              if c and $80808080 = 0 then
                goto by4;
              continue;
            end;
            if Source < EndSource then
              continue
            else
              break;
          end;
        end;
      until false;
  end;
  if not NoTrailingZero then
    Dest^ := #0;
  {$ifdef ISDELPHI104}
  exit(Dest); // circumvent Delphi 10.4 optimizer bug
  {$else}
  result := Dest;
  {$endif ISDELPHI104}
end;

{$ifndef PUREMORMOT2}
function TSynAnsiFixedWidth.AnsiToRawUnicode(Source: PAnsiChar;
  SourceChars: cardinal): RawUnicode;
begin
  if SourceChars = 0 then
    result := ''
  else
  begin
    SetString(result, nil, SourceChars * 2 + 1);
    AnsiBufferToUnicode(pointer(result), Source, SourceChars);
  end;
end;
{$endif PUREMORMOT2}

const
  /// reference set for WinAnsi to Unicode conversion
  // - this table contains all the Unicode codepoints corresponding to
  // the Ansi Code Page 1252 (i.e. WinAnsi), which Unicode value are > 255
  // - values taken from MultiByteToWideChar(1252,0,@Tmp,256,@WinAnsiTable,256)
  // so are available outside the Windows platforms (e.g. Linux/BSD) and even
  // if the system has been tweaked as such:
  // http://www.fas.harvard.edu/~chgis/data/chgis/downloads/v4/howto/cyrillic.html
  WinAnsiUnicodeChars: packed array[128..159] of word = (
    8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338,
    141, 381, 143, 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482,
    353, 8250, 339, 157, 382, 376);

constructor TSynAnsiFixedWidth.Create(aCodePage: cardinal);
var
  i, len: PtrInt;
  A256: array[0..255] of AnsiChar;
  U256: array[0..255] of WideChar;
begin
  inherited;
  if not IsFixedWidthCodePage(aCodePage) then
    // warning: CreateUtf8() uses Utf8ToString() -> call CreateFmt() here
    raise ESynUnicode.CreateFmt('%s.Create - Invalid code page %d',
      [ClassNameShort(self)^, fCodePage]);
  // create internal look-up tables
  SetLength(fAnsiToWide, 256);
  if (aCodePage = CP_WINANSI) or
     (aCodePage = CP_LATIN1) or
     (aCodePage >= CP_RAWBLOB) then
  begin
    // Win1252 has its own table, LATIN1 and RawByteString map 8-bit Unicode
    for i := 0 to 255 do
      fAnsiToWide[i] := i;
    if aCodePage = CP_WINANSI then
      // do not trust the Windows API for the 1252 code page :(
      for i := low(WinAnsiUnicodeChars) to high(WinAnsiUnicodeChars) do
        fAnsiToWide[i] := WinAnsiUnicodeChars[i];
  end
  else
  begin
    // initialize table from Operating System returned values
    for i := 0 to 255 do
      A256[i] := AnsiChar(i);
    FillcharFast(U256, SizeOf(U256), 0);
    // call mormot.core.os cross-platform Unicode_AnsiToWide()
    len := PtrUInt(inherited AnsiBufferToUnicode(U256, A256, 256)) - PtrUInt(@U256);
    if (len < 500) or
       (len > 512) then
      // warning: CreateUtf8() uses Utf8ToString() -> call CreateFmt() now
      raise ESynUnicode.CreateFmt('OS error for %s.Create(%d) [%d]',
        [ClassNameShort(self)^, aCodePage, len]);
    MoveFast(U256[0], fAnsiToWide[0], 512);
  end;
  SetLength(fWideToAnsi, 65536);
  for i := 1 to 126 do
    fWideToAnsi[i] := i;
  FillcharFast(fWideToAnsi[127], 65536 - 127, ord('?')); // '?' for unknown char
  for i := 127 to 255 do
    if (fAnsiToWide[i] <> 0) and
       (fAnsiToWide[i] <> ord('?')) then
      fWideToAnsi[fAnsiToWide[i]] := i;
  // fixed width Ansi will never be bigger than UTF-8
  fAnsiCharShift := 0;
end;

function TSynAnsiFixedWidth.IsValidAnsi(WideText: PWideChar; Length: PtrInt): boolean;
var
  i: PtrInt;
  wc: PtrUInt;
begin
  result := false;
  if WideText <> nil then
    for i := 0 to Length - 1 do
    begin
      wc := PtrUInt(WideText[i]);
      if wc = 0 then
        break
      else if wc < 256 then
        if fAnsiToWide[wc] < 256 then
          continue
        else
          exit
      else if fWideToAnsi[wc] = ord('?') then
        exit
      else
        continue;
    end;
  result := true;
end;

function TSynAnsiFixedWidth.IsValidAnsi(WideText: PWideChar): boolean;
var
  wc: PtrUInt;
begin
  result := false;
  if WideText <> nil then
    repeat
      wc := PtrUInt(WideText^);
      inc(WideText);
      if wc = 0 then
        break
      else if wc < 256 then
        if fAnsiToWide[wc] < 256 then
          continue
        else
          exit
      else if fWideToAnsi[wc] = ord('?') then
        exit
      else
        continue;
    until false;
  result := true;
end;

function TSynAnsiFixedWidth.IsValidAnsiU(Utf8Text: PUtf8Char): boolean;
var
  extra: byte;
  c, n: cardinal;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
begin
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  result := false;
  if Utf8Text <> nil then
    repeat
      extra := utf8.Lookup[ord(Utf8Text^)];
      inc(Utf8Text);
      if extra = UTF8_ASCII then
        continue
      else if extra = UTF8_ZERO then
        break
      else if extra = UTF8_INVALID then
        exit
      else
      begin
        if utf8.Extra[extra].minimum > $ffff then
          exit;
        n := extra;
        c := ord(Utf8Text[-1]);
        repeat
          if byte(Utf8Text^) and $c0 <> $80 then
            exit; // invalid UTF-8 content
          c := (c shl 6) + byte(Utf8Text^);
          inc(Utf8Text);
          dec(n)
        until n = 0;
        dec(c, utf8.Extra[extra].offset);
        if (c > $ffff) or
           (fWideToAnsi[c] = ord('?')) then
          exit; // invalid char in the WinAnsi code page
      end;
    until false;
  result := true;
end;

function TSynAnsiFixedWidth.IsValidAnsiU8Bit(Utf8Text: PUtf8Char): boolean;
var
  extra: byte;
  c, n: cardinal;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
begin
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  result := false;
  if Utf8Text <> nil then
    repeat
      extra := utf8.Lookup[ord(Utf8Text^)];
      inc(Utf8Text);
      if extra = UTF8_ASCII then
        continue
      else if extra = UTF8_ZERO then
        break
      else if extra = UTF8_INVALID then
        exit
      else
      begin
        if utf8.Extra[extra].minimum > $ffff then
          exit;
        n := extra;
        c := ord(Utf8Text[-1]);
        repeat
          if byte(Utf8Text^) and $c0 <> $80 then
            exit; // invalid UTF-8 content
          c := (c shl 6) + byte(Utf8Text^);
          inc(Utf8Text);
          dec(n)
        until n = 0;
        dec(c, utf8.Extra[extra].offset);
        if (c > 255) or
           (fAnsiToWide[c] > 255) then
          exit; // not 8-bit char (like "tm" or such) is marked invalid
      end;
    until false;
  result := true;
end;

function TSynAnsiFixedWidth.UnicodeBufferToAnsi(Dest: PAnsiChar;
  Source: PWideChar; SourceChars: cardinal): PAnsiChar;
var
  c: cardinal;
  tab: PAnsiChar;
begin
  if (Source <> nil) and
     (SourceChars <> 0) then
  begin
    // ignore any trailing BOM (do exist on Windows files)
    if Source^ = BOM_UTF16LE then
    begin
      inc(Source);
      dec(SourceChars);
    end;
    // first handle trailing 7-bit ASCII chars, by pairs (Sha optimization)
    if SourceChars >= 2 then
      repeat
        c := PCardinal(Source)^;
        if c and $ff80ff80 <> 0 then
          break; // break on first non ASCII pair
        dec(SourceChars, 2);
        inc(Source, 2);
        c := c shr 8 or c;
        PWord(Dest)^ := c;
        inc(Dest, 2);
      until SourceChars < 2;
    // use internal lookup tables for fast process of remaining chars
    tab := pointer(fWideToAnsi);
    for c := 1 to SourceChars shr 2 do
    begin
      Dest[0] := tab[Ord(Source[0])];
      Dest[1] := tab[Ord(Source[1])];
      Dest[2] := tab[Ord(Source[2])];
      Dest[3] := tab[Ord(Source[3])];
      inc(Source, 4);
      inc(Dest, 4);
    end;
    for c := 1 to SourceChars and 3 do
    begin
      Dest^ := tab[Ord(Source^)];
      inc(Dest);
      inc(Source);
    end;
  end;
  result := Dest;
end;

function TSynAnsiFixedWidth.Utf8BufferToAnsi(Dest: PAnsiChar;
  Source: PUtf8Char; SourceChars: cardinal): PAnsiChar;
var
  c: cardinal;
  endSource, endSourceBy4: PUtf8Char;
  i, extra: PtrInt;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  by1, by4, quit; // ugly but faster
begin
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  // first handle trailing 7-bit ASCII chars, by quad (Sha optimization)
  endSource := Source + SourceChars;
  endSourceBy4 := endSource - 4;
  if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(Source) and 3 = 0) and{$endif}
     (Source <= endSourceBy4) then
    repeat
      c := PCardinal(Source)^;
      if c and $80808080 <> 0 then
        goto by1; // break on first non ASCII quad
by4:  PCardinal(Dest)^ := c;
      inc(Source, 4);
      inc(Dest, 4);
    until Source > endSourceBy4;
  // generic loop, handling one UTF-8 code per iteration
  if Source < endSource then
  begin
    repeat
by1:  c := byte(Source^);
      inc(Source);
      if ord(c) <= 127 then
      begin
        Dest^ := AnsiChar(c);
        inc(Dest);
        if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(Source) and 3 = 0) and{$endif}
           (Source <= endSourceBy4) then
        begin
          c := PCardinal(Source)^;
          if c and $80808080 = 0 then
            goto by4;
          continue;
        end;
        if Source < endSource then
          continue
        else
          break;
      end
      else
      begin
        extra := utf8.Lookup[c];
        if (extra = UTF8_INVALID) or
           (Source + extra > endSource) then
          break;
        i := extra;
        repeat
          if byte(Source^) and $c0 <> $80 then
            goto quit; // invalid UTF-8 content
          c := (c shl 6) + byte(Source^);
          inc(Source);
          dec(i);
        until i = 0;
        dec(c, utf8.Extra[extra].offset);
        if c > $ffff then
          Dest^ := '?' // '?' as in unknown fWideToAnsi[] items
        else
          Dest^ := AnsiChar(fWideToAnsi[c]);
        inc(Dest);
        if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(Source) and 3 = 0) and{$endif}
           (Source <= endSourceBy4) then
        begin
          c := PCardinal(Source)^;
          if c and $80808080 = 0 then
            goto by4;
          continue;
        end;
        if Source < endSource then
          continue
        else
          break;
      end;
    until false;
  end;
quit:
  result := Dest;
end;

function TSynAnsiFixedWidth.WideCharToAnsiChar(wc: cardinal): integer;
begin
  if wc < 256 then
    if fAnsiToWide[wc] < 256 then
      result := wc
    else
      result := -1
  else if wc <= 65535 then
  begin
    result := fWideToAnsi[wc];
    if result = ord('?') then
      result := -1;
  end
  else
    result := -1;
end;


{ TSynAnsiUtf8 }

function TSynAnsiUtf8.AnsiBufferToUnicode(Dest: PWideChar;
  Source: PAnsiChar; SourceChars: cardinal; NoTrailingZero: boolean): PWideChar;
begin
  result := Dest + (Utf8ToWideChar(Dest,
    PUtf8Char(Source), SourceChars, NoTrailingZero) shr 1);
end;

function TSynAnsiUtf8.AnsiBufferToUtf8(Dest: PUtf8Char;
  Source: PAnsiChar; SourceChars: cardinal; NoTrailingZero: boolean): PUtf8Char;
begin
  MoveFast(Source^, Dest^, SourceChars);
  if not NoTrailingZero then
    Dest[SourceChars] := #0;
  result := Dest + SourceChars;
end;

{$ifndef PUREMORMOT2}
function TSynAnsiUtf8.AnsiToRawUnicode(Source: PAnsiChar;
  SourceChars: cardinal): RawUnicode;
begin
  result := Utf8DecodeToRawUniCode(PUtf8Char(Source), SourceChars);
end;
{$endif PUREMORMOT2}

constructor TSynAnsiUtf8.Create(aCodePage: cardinal);
begin
  if aCodePage <> CP_UTF8 then
    raise ESynUnicode.CreateFmt('%s.Create(%d)', [ClassNameShort(self)^, aCodePage]);
  inherited Create(aCodePage);
end;

function TSynAnsiUtf8.UnicodeBufferToAnsi(Dest: PAnsiChar;
  Source: PWideChar; SourceChars: cardinal): PAnsiChar;
begin
  result := Dest + RawUnicodeToUtf8(PUtf8Char(Dest), SourceChars * 3,
    Source, SourceChars, [ccfNoTrailingZero]);
end;

function TSynAnsiUtf8.UnicodeBufferToAnsi(Source: PWideChar;
  SourceChars: cardinal): RawByteString;
var
  tmp: TSynTempBuffer;
begin
  if (Source = nil) or
     (SourceChars = 0) then
    result := ''
  else
  begin
    tmp.Init(SourceChars * 3);
    FastSetStringCP(result, tmp.buf, RawUnicodeToUtf8(tmp.buf,
      SourceChars * 3, Source, SourceChars, [ccfNoTrailingZero]), fCodePage);
    tmp.Done;
  end;
end;

function TSynAnsiUtf8.Utf8BufferToAnsi(Dest: PAnsiChar;
  Source: PUtf8Char; SourceChars: cardinal): PAnsiChar;
begin
  MoveFast(Source^, Dest^, SourceChars);
  result := Dest + SourceChars;
end;

procedure TSynAnsiUtf8.Utf8BufferToAnsi(Source: PUtf8Char; SourceChars: cardinal;
  var result: RawByteString);
begin
  FastSetString(RawUtf8(result), Source, SourceChars);
end;

function TSynAnsiUtf8.Utf8ToAnsi(const u: RawUtf8): RawByteString;
begin
  result := u; // may be read-only: no FastAssignUtf8/FakeCodePage
  EnsureRawUtf8(result);
end;

function TSynAnsiUtf8.AnsiToUtf8(const AnsiText: RawByteString): RawUtf8;
begin
  result := AnsiText; // may be read-only: no FastAssignUtf8/FakeCodePage
  EnsureRawUtf8(result);
end;

procedure TSynAnsiUtf8.AnsiBufferToRawUtf8(
  Source: PAnsiChar; SourceChars: cardinal; out Value: RawUtf8);
begin
  FastSetString(Value, Source, SourceChars);
end;


{ TSynAnsiUtf16 }

function TSynAnsiUtf16.AnsiBufferToUnicode(Dest: PWideChar;
  Source: PAnsiChar; SourceChars: cardinal; NoTrailingZero: boolean): PWideChar;
begin
  MoveFast(Source^, Dest^, SourceChars);
  result := Pointer(PtrUInt(Dest) + SourceChars);
  if not NoTrailingZero then
    result^ := #0;
end;

const
  NOTRAILING: array[boolean] of TCharConversionFlags = (
    [], [ccfNoTrailingZero]);

function TSynAnsiUtf16.AnsiBufferToUtf8(Dest: PUtf8Char;
  Source: PAnsiChar; SourceChars: cardinal; NoTrailingZero: boolean): PUtf8Char;
begin
  SourceChars := SourceChars shr 1; // from byte count to WideChar count
  result := Dest + RawUnicodeToUtf8(Dest,
    SourceChars * 3, PWideChar(Source), SourceChars, NOTRAILING[NoTrailingZero]);
end;

{$ifndef PUREMORMOT2}
function TSynAnsiUtf16.AnsiToRawUnicode(Source: PAnsiChar;
  SourceChars: cardinal): RawUnicode;
begin
  SetString(result, Source, SourceChars); // byte count
end;
{$endif PUREMORMOT2}

constructor TSynAnsiUtf16.Create(aCodePage: cardinal);
begin
  if aCodePage <> CP_UTF16 then
    raise ESynUnicode.CreateFmt('%s.Create(%d)', [ClassNameShort(self)^, aCodePage]);
  inherited Create(aCodePage);
end;

function TSynAnsiUtf16.UnicodeBufferToAnsi(Dest: PAnsiChar;
  Source: PWideChar; SourceChars: cardinal): PAnsiChar;
begin
  SourceChars := SourceChars shl 1; // from WideChar count to byte count
  MoveFast(Source^, Dest^, SourceChars);
  result := Dest + SourceChars;
end;

function TSynAnsiUtf16.Utf8BufferToAnsi(Dest: PAnsiChar;
  Source: PUtf8Char; SourceChars: cardinal): PAnsiChar;
begin
  result := Dest + Utf8ToWideChar(PWideChar(Dest), Source, SourceChars, true);
end;


function IsFixedWidthCodePage(aCodePage: cardinal): boolean;
begin
  result := ((aCodePage >= 1250) and
             (aCodePage <= 1258)) or
            (aCodePage = CP_LATIN1) or
            (aCodePage >= CP_RAWBLOB);
end;

function CodePageToText(aCodePage: cardinal): TShort16;
begin
  case aCodePage of
    CP_UTF8:
      result := 'utf8';
    CODEPAGE_US:
      result := 'WinAnsi';
  else
    begin
      PCardinal(@result)^ := 2 + ord('c') shl 8 + ord('p') shl 16;
      AppendShortCardinal(aCodePage, result);
    end;
  end;
end;


{ *************** Text File Loading with BOM/Unicode Support }

function BomFile(var Buffer: pointer; var BufferSize: PtrInt): TBomFile;
begin
  result := bomNone;
  if (Buffer <> nil) and
     (BufferSize >= 2) then
    case PWord(Buffer)^ of
      ord(BOM_UTF16LE):
        begin
          inc(PByte(Buffer), 2);
          dec(BufferSize, 2);
          result := bomUnicode; // UTF-16 LE
        end;
      $BBEF:
        if (BufferSize >= 3) and
           (PByteArray(Buffer)[2] = $BF) then
        begin
          inc(PByte(Buffer), 3);
          dec(BufferSize, 3);
          result := bomUtf8;
        end;
    end;
end;

function StringFromBomFile(const FileName: TFileName; out FileContent: RawByteString;
  out Buffer: pointer; out BufferSize: PtrInt): TBomFile;
begin
  FileContent := StringFromFile(FileName);
  Buffer := pointer(FileContent);
  BufferSize := length(FileContent);
  result := BomFile(Buffer, BufferSize);
end;

function RawUtf8FromFile(const FileName: TFileName): RawUtf8;
begin
  result := AnyTextFileToRawUtf8(FileName, {AssumeUtf8IfNoBom=}true);
end;

function AnyTextFileToRawUtf8(const FileName: TFileName; AssumeUtf8IfNoBom: boolean): RawUtf8;
var
  tmp: RawByteString;
  buf: pointer;
  len: PtrInt;
begin
  case StringFromBomFile(FileName, tmp, buf, len) of
    bomNone:
      if AssumeUtf8IfNoBom then
        FastAssignUtf8(result, tmp)
      else
        CurrentAnsiConvert.AnsiBufferToRawUtf8(buf, len, result);
    bomUnicode:
      RawUnicodeToUtf8(PWideChar(buf), len shr 1, result);
    bomUtf8:
      if len = 0 then
        result := ''
      else
      begin
        MoveFast(buf^, pointer(tmp)^, len); // fast in-place delete(bom)
        FakeLength(tmp, len);
        FastAssignUtf8(result, tmp)
      end;
  end;
end;

function AnyTextFileToSynUnicode(const FileName: TFileName; ForceUtf8: boolean): SynUnicode;
var
  tmp: RawByteString;
  buf: pointer;
  len: PtrInt;
begin
  if ForceUtf8 then
    Utf8ToSynUnicode(StringFromFile(FileName), result)
  else
    case StringFromBomFile(FileName, tmp, buf, len) of
      bomNone:
        result := CurrentAnsiConvert.AnsiToUnicodeString(buf, len);
      bomUnicode:
        SetString(result, PWideChar(buf), len shr 1);
      bomUtf8:
        Utf8ToSynUnicode(buf, len, result);
    end;
end;

function AnyTextFileToString(const FileName: TFileName; ForceUtf8: boolean): string;
var
  tmp: RawByteString;
  buf: pointer;
  len: PtrInt;
begin

  {$ifdef UNICODE}
  if ForceUtf8 then
    Utf8ToStringVar(StringFromFile(FileName), result)
  else
    case StringFromBomFile(FileName, tmp, buf, len) of
      bomNone:
        result := CurrentAnsiConvert.AnsiToUnicodeString(buf, len);
      bomUnicode:
        SetString(result, PWideChar(buf), len shr 1);
      bomUtf8:
        Utf8DecodeToString(buf, len, result);
    end;
  {$else}
  if ForceUtf8 then
    result := CurrentAnsiConvert.Utf8ToAnsi(StringFromFile(FileName))
  else
    case StringFromBomFile(FileName, tmp, buf, len) of
      bomNone:
        SetString(result, PAnsiChar(buf), len);
      bomUnicode:
        result := CurrentAnsiConvert.UnicodeBufferToAnsi(buf, len shr 1);
      bomUtf8:
        result := CurrentAnsiConvert.Utf8BufferToAnsi(buf, len);
    end;
  {$endif UNICODE}
end;


{ *************** Low-Level String Conversion Functions }

procedure AnyAnsiToUtf8(const s: RawByteString; var result: RawUtf8);
{$ifdef HASCODEPAGE}
var
  cp: cardinal;
{$endif HASCODEPAGE}
begin
  if s = '' then
    result := ''
  else
  {$ifdef HASCODEPAGE}
  begin
    cp := GetCodePage(s);
    if cp = CP_ACP then
    begin
      cp := Unicode_CodePage;
      {$ifdef FPC}
      if cp = CP_UTF8 then // happens on POSIX and with Lazarus - so FPC only
      begin
        if PStrRec(PAnsiChar(pointer(s)) - _STRRECSIZE)^.refCnt >= 0 then
        begin
          result := s; // not a read-only constant: assign by ref
          FakeCodePage(RawByteString(result), cp); // override 0 by CP_UTF8
        end
        else
          FastSetString(result, pointer(s), length(s)); // realloc constant
        exit;
      end;
      {$endif FPC}
    end;
    if cp = CP_UTF8 then
      result := s
    else if cp >= CP_RAWBLOB then
      FastSetString(result, pointer(s), length(s)) // no convert, just copy
    else
      TSynAnsiConvert.Engine(cp).AnsiBufferToRawUtf8(pointer(s), length(s), result);
  end;
  {$else}
  CurrentAnsiConvert.AnsiBufferToRawUtf8(pointer(s), length(s), result);
  {$endif HASCODEPAGE}
end;

function AnyAnsiToUtf8(const s: RawByteString): RawUtf8;
begin
  AnyAnsiToUtf8(s, result);
end;

function WinAnsiBufferToUtf8(Dest: PUtf8Char;
  Source: PAnsiChar; SourceChars: cardinal): PUtf8Char;
begin
  result := WinAnsiConvert.AnsiBufferToUtf8(Dest, Source, SourceChars);
end;

function ShortStringToUtf8(const source: ShortString): RawUtf8;
begin
  WinAnsiConvert.AnsiBufferToRawUtf8(@source[1], ord(source[0]), result);
end;

procedure WinAnsiToUnicodeBuffer(const S: WinAnsiString; Dest: PWordArray; DestLen: PtrInt);
var
  L: PtrInt;
begin
  L := length(S);
  if L <> 0 then
  begin
    if L >= DestLen then
      L := DestLen - 1; // truncate to avoid buffer overflow
    WinAnsiConvert.AnsiBufferToUnicode(PWideChar(Dest), pointer(S), L);
    // including last #0
  end
  else
    Dest^[0] := 0;
end;

{$ifndef PUREMORMOT2}
function WinAnsiToRawUnicode(const S: WinAnsiString): RawUnicode;
begin
  result := WinAnsiConvert.AnsiToRawUnicode(S);
end;
{$endif PUREMORMOT2}

function WinAnsiToUtf8(const S: WinAnsiString): RawUtf8;
begin
  WinAnsiConvert.AnsiBufferToRawUtf8(pointer(S), length(S), result);
end;

function WinAnsiToUtf8(WinAnsi: PAnsiChar; WinAnsiLen: PtrInt): RawUtf8;
begin
  WinAnsiConvert.AnsiBufferToRawUtf8(WinAnsi, WinAnsiLen, result);
end;

function WideCharToWinAnsiChar(wc: cardinal): AnsiChar;
begin
  wc := WinAnsiConvert.WideCharToAnsiChar(wc);
  if integer(wc) = -1 then
    result := '?'
  else
    result := AnsiChar(wc);
end;

function WideCharToWinAnsi(wc: cardinal): integer;
begin
  result := WinAnsiConvert.WideCharToAnsiChar(wc);
end;

function IsWinAnsi(WideText: PWideChar; Length: integer): boolean;
begin
  result := WinAnsiConvert.IsValidAnsi(WideText, Length);
end;

function IsWinAnsi(WideText: PWideChar): boolean;
begin
  result := WinAnsiConvert.IsValidAnsi(WideText);
end;

function IsWinAnsiU(Utf8Text: PUtf8Char): boolean;
begin
  result := WinAnsiConvert.IsValidAnsiU(Utf8Text);
end;

function IsWinAnsiU8Bit(Utf8Text: PUtf8Char): boolean;
begin
  result := WinAnsiConvert.IsValidAnsiU8Bit(Utf8Text);
end;

function Utf8ToWinPChar(dest: PAnsiChar; source: PUtf8Char; count: integer): integer;
begin
  result := WinAnsiConvert.Utf8BufferToAnsi(dest, source, count) - dest;
end;

function Utf8ToWinAnsi(const S: RawUtf8): WinAnsiString;
begin
  result := WinAnsiConvert.Utf8ToAnsi(S);
end;

function Utf8ToWinAnsi(P: PUtf8Char): WinAnsiString;
begin
  result := WinAnsiConvert.Utf8ToAnsi(P);
end;

procedure Utf8ToRawUtf8(P: PUtf8Char; var result: RawUtf8);
begin
  // fast and Delphi 2009+ ready
  FastSetString(result, P, StrLen(P));
end;

{$ifndef PUREMORMOT2}

function Utf8DecodeToRawUnicode(P: PUtf8Char; L: integer): RawUnicode;
var
  tmp: TSynTempBuffer;
begin
  result := ''; // somewhat faster if result is freed before any SetLength()
  if L = 0 then
    L := StrLen(P);
  if L = 0 then
    exit;
  // +1 below is for #0 ending -> true WideChar(#0) ending
  tmp.Init(L * 3); // maximum posible unicode size (if all <#128)
  SetString(result, PAnsiChar(tmp.buf), Utf8ToWideChar(tmp.buf, P, L) + 1);
  tmp.Done;
end;

function Utf8DecodeToRawUnicode(const S: RawUtf8): RawUnicode;
begin
  if S = '' then
    result := ''
  else
    result := Utf8DecodeToRawUnicode(pointer(S), Length(S));
end;

function Utf8DecodeToRawUnicodeUI(const S: RawUtf8; DestLen: PInteger): RawUnicode;
var
  L: integer;
begin
  L := Utf8DecodeToRawUnicodeUI(S, result);
  if DestLen <> nil then
    DestLen^ := L;
end;

function Utf8DecodeToRawUnicodeUI(const S: RawUtf8; var Dest: RawUnicode): integer;
begin
  Dest := ''; // somewhat faster if Dest is freed before any SetLength()
  if S = '' then
  begin
    result := 0;
    exit;
  end;
  result := Length(S);
  SetLength(Dest, result * 2 + 2);
  result := Utf8ToWideChar(pointer(Dest), Pointer(S), result);
end;

function RawUnicodeToUtf8(const Unicode: RawUnicode): RawUtf8;
begin
  RawUnicodeToUtf8(pointer(Unicode), Length(Unicode) shr 1, result);
end;

function RawUnicodeToSynUnicode(const Unicode: RawUnicode): SynUnicode;
begin
  SetString(result, PWideChar(pointer(Unicode)), Length(Unicode) shr 1);
end;

function RawUnicodeToWinAnsi(const Unicode: RawUnicode): WinAnsiString;
begin
  result := WinAnsiConvert.UnicodeBufferToAnsi(pointer(Unicode), Length(Unicode) shr 1);
end;

{$endif PUREMORMOT2}

function SynUnicodeToUtf8(const Unicode: SynUnicode): RawUtf8;
begin
  RawUnicodeToUtf8(pointer(Unicode), Length(Unicode), result);
end;

function RawUnicodeToSynUnicode(WideChar: PWideChar; WideCharCount: integer): SynUnicode;
begin
  SetString(result, WideChar, WideCharCount);
end;

procedure RawUnicodeToWinPChar(dest: PAnsiChar; source: PWideChar; WideCharCount: integer);
begin
  WinAnsiConvert.UnicodeBufferToAnsi(dest, source, WideCharCount);
end;

function RawUnicodeToWinAnsi(WideChar: PWideChar; WideCharCount: integer): WinAnsiString;
begin
  result := WinAnsiConvert.UnicodeBufferToAnsi(WideChar, WideCharCount);
end;

function WideStringToWinAnsi(const Wide: WideString): WinAnsiString;
begin
  result := WinAnsiConvert.UnicodeBufferToAnsi(pointer(Wide), Length(Wide));
end;

procedure UnicodeBufferToWinAnsi(source: PWideChar; out Dest: WinAnsiString);
var
  L: integer;
begin
  L := StrLenW(source);
  SetLength(Dest, L);
  WinAnsiConvert.UnicodeBufferToAnsi(pointer(Dest), source, L);
end;

function UnicodeBufferToString(source: PWideChar): string;
begin
  result := RawUnicodeToString(source, StrLenW(source));
end;

function UnicodeBufferToUtf8(source: PWideChar): RawUtf8;
begin
  RawUnicodeToUtf8(source, StrLenW(source), result);
end;

function UnicodeBufferToVariant(source: PWideChar): variant;
begin
  ClearVariantForString(result);
  if Source <> nil then
    RawUnicodeToUtf8(source, StrLenW(source), RawUtf8(TVarData(result).VAny));
end;

function StringToVariant(const Txt: string): variant;
begin
  StringToVariant(Txt, result);
end;

procedure StringToVariant(const Txt: string; var result: variant);
begin
  ClearVariantForString(result);
  if Txt <> '' then
    {$ifndef UNICODE}
    if (Unicode_CodePage = CP_UTF8) or
       IsAnsiCompatible(Txt) then
    begin
      RawByteString(TVarData(result).VAny) := Txt;
      EnsureRawUtf8(RawByteString(TVarData(result).VAny));
    end
    else
    {$endif UNICODE}
      StringToUtf8(Txt, RawUtf8(TVarData(result).VAny));
end;

procedure AnsiCharToUtf8(P: PAnsiChar; L: integer; var result: RawUtf8;
  CodePage: integer);
begin
  TSynAnsiConvert.Engine(CodePage).AnsiBufferToRawUtf8(P, L, result);
end;

function AnsiToUtf8(const Ansi: RawByteString; CodePage: integer): RawUtf8;
begin
  if Ansi = '' then
    result := ''
  else
    result := TSynAnsiConvert.Engine(CodePage).AnsiToUtf8(Ansi);
end;

function AnsiToString(const Ansi: RawByteString; CodePage: integer): string;
begin
  if Ansi = '' then
    result := ''
  else
    {$ifdef UNICODE}
    result := TSynAnsiConvert.Engine(CodePage).AnsiToUnicodeString(Ansi);
    {$else}
    result := CurrentAnsiConvert.AnsiToAnsi(TSynAnsiConvert.Engine(CodePage), Ansi);
    {$endif UNICODE}
end;

function AnsiBufferToTempUtf8(var Temp: TSynTempBuffer; Buf: PAnsiChar; BufLen,
  CodePage: cardinal): PUtf8Char;
begin
  if (BufLen = 0) or
     (CodePage = CP_UTF8) or
     (CodePage >= CP_RAWBLOB) or
     IsAnsiCompatible(Buf, BufLen) then
  begin
    temp.Buf := nil;
    temp.len := BufLen;
    result := PUtf8Char(Buf);
  end
  else
  begin
    temp.Init(BufLen * 3);
    Buf := pointer(TSynAnsiConvert.Engine(CodePage).
      AnsiBufferToUtf8(temp.Buf, Buf, BufLen));
    temp.len := Buf - PAnsiChar(temp.Buf);
    result := temp.Buf;
  end;
end;

{$ifdef UNICODE}

function Ansi7ToString(const Text: RawByteString): string;
var
  i: PtrInt;
begin
  SetString(result, nil, Length(Text));
  for i := 0 to Length(Text) - 1 do
    PWordArray(result)[i] := PByteArray(Text)[i]; // no conversion for 7-bit Ansi
end;

function Ansi7ToString(Text: PWinAnsiChar; Len: PtrInt): string;
begin
  Ansi7ToString(Text, Len, result);
end;

procedure Ansi7ToString(Text: PWinAnsiChar; Len: PtrInt; var result: string);
var
  i: PtrInt;
begin
  SetString(result, nil, Len);
  for i := 0 to Len - 1 do
    PWordArray(result)[i] := PByteArray(Text)[i]; // no conversion for 7-bit Ansi
end;

function StringToAnsi7(const Text: string): RawByteString;
var
  i: PtrInt;
begin
  SetString(result, nil, Length(Text));
  for i := 0 to Length(Text) - 1 do
    PByteArray(result)[i] := PWordArray(Text)[i]; // no conversion for 7-bit Ansi
end;

function StringToWinAnsi(const Text: string): WinAnsiString;
begin
  result := RawUnicodeToWinAnsi(Pointer(Text), Length(Text));
end;

function StringBufferToUtf8(Dest: PUtf8Char; Source: PChar; SourceChars: PtrInt): PUtf8Char;
begin
  result := Dest + RawUnicodeToUtf8(Dest, SourceChars * 3, PWideChar(Source), SourceChars, []);
end;

procedure StringBufferToUtf8(Source: PChar; out result: RawUtf8);
begin
  RawUnicodeToUtf8(Source, StrLenW(Source), result);
end;

function StringToUtf8(const Text: string): RawUtf8;
begin
  RawUnicodeToUtf8(pointer(Text), Length(Text), result);
end;

procedure StringToUtf8(Text: PChar; TextLen: PtrInt; var result: RawUtf8);
begin
  RawUnicodeToUtf8(Text, TextLen, result);
end;

procedure StringToUtf8(const Text: string; var result: RawUtf8);
begin
  RawUnicodeToUtf8(pointer(Text), Length(Text), result);
end;

function StringToUtf8(const Text: string; var Temp: TSynTempBuffer): integer;
var
  len: integer;
begin
  len := length(Text);
  Temp.Init(len * 3);
  result := RawUnicodeToUtf8(Temp.buf, Temp.len + 1, pointer(Text), len, []);
end;

function ToUtf8(const Text: string): RawUtf8;
begin
  RawUnicodeToUtf8(pointer(Text), Length(Text), result);
end;

{$ifndef PUREMORMOT2}

function StringToRawUnicode(const S: string): RawUnicode;
begin
  SetString(result, PAnsiChar(pointer(S)), length(S) * 2 + 1); // +1 for last wide #0
end;

function StringToRawUnicode(P: PChar; L: integer): RawUnicode;
begin
  SetString(result, PAnsiChar(P), L * 2 + 1); // +1 for last wide #0
end;

function RawUnicodeToString(const U: RawUnicode): string;
begin
  // uses StrLenW() and not length(U) to handle case when was used as buffer
  SetString(result, PWideChar(pointer(U)), StrLenW(Pointer(U)));
end;

{$endif PUREMORMOT2}

function StringToSynUnicode(const S: string): SynUnicode;
begin
  result := S;
end;

procedure StringToSynUnicode(const S: string; var result: SynUnicode);
begin
  result := S;
end;

function RawUnicodeToString(P: PWideChar; L: integer): string;
begin
  SetString(result, P, L);
end;

procedure RawUnicodeToString(P: PWideChar; L: integer; var result: string);
begin
  SetString(result, P, L);
end;

function SynUnicodeToString(const U: SynUnicode): string;
begin
  result := U;
end;

function Utf8DecodeToString(P: PUtf8Char; L: integer): string;
begin
  Utf8DecodeToUnicodeString(P, L, result);
end;

procedure Utf8DecodeToString(P: PUtf8Char; L: integer; var result: string);
begin
  Utf8DecodeToUnicodeString(P, L, result);
end;

function Utf8ToString(const Text: RawUtf8): string;
begin
  Utf8DecodeToUnicodeString(pointer(Text), length(Text), result);
end;

procedure Utf8ToStringVar(const Text: RawUtf8; var result: string);
begin
  Utf8DecodeToUnicodeString(pointer(Text), length(Text), result);
end;

procedure Utf8ToFileName(const Text: RawUtf8; var result: TFileName);
begin
  Utf8DecodeToUnicodeString(pointer(Text), length(Text), string(result));
end;

{$else}

function Ansi7ToString(const Text: RawByteString): string;
begin
  result := Text; // if we are SURE this text is 7-bit Ansi -> direct assign
  {$ifdef FPC} // if Text is CP_RAWBYTESTRING then FPC won't handle it properly
  SetCodePage(RawByteString(result), DefaultSystemCodePage, false);
  {$endif FPC} // no FakeCodePage() since Text may be read-only
end;

function Ansi7ToString(Text: PWinAnsiChar; Len: PtrInt): string;
begin
  SetString(result, PAnsiChar(Text), Len);
end;

procedure Ansi7ToString(Text: PWinAnsiChar; Len: PtrInt; var result: string);
begin
  SetString(result, PAnsiChar(Text), Len);
end;

function StringToAnsi7(const Text: string): RawByteString;
begin
  result := Text; // if we are SURE this text is 7-bit Ansi -> direct assign
end;

function StringToWinAnsi(const Text: string): WinAnsiString;
begin
  result := WinAnsiConvert.AnsiToAnsi(CurrentAnsiConvert, Text);
end;

function StringBufferToUtf8(Dest: PUtf8Char; Source: PChar; SourceChars: PtrInt): PUtf8Char;
begin
  result := CurrentAnsiConvert.AnsiBufferToUtf8(Dest, Source, SourceChars);
end;

procedure StringBufferToUtf8(Source: PChar; out result: RawUtf8);
begin
  CurrentAnsiConvert.AnsiBufferToRawUtf8(Source, StrLen(Source), result);
end;

function StringToUtf8(const Text: string): RawUtf8;
begin
  result := CurrentAnsiConvert.AnsiToUtf8(Text);
end;

procedure StringToUtf8(Text: PChar; TextLen: PtrInt; var result: RawUtf8);
begin
  CurrentAnsiConvert.AnsiBufferToRawUtf8(Text, TextLen, result);
end;

procedure StringToUtf8(const Text: string; var result: RawUtf8);
begin
  result := CurrentAnsiConvert.AnsiToUtf8(Text);
end;

function StringToUtf8(const Text: string; var Temp: TSynTempBuffer): integer;
var
  len: integer;
begin
  len := length(Text);
  Temp.Init(len * 3);
  if len <> 0 then
    result := CurrentAnsiConvert.
      AnsiBufferToUtf8(Temp.buf, pointer(Text), len) - PUtf8Char(Temp.buf)
  else
    result := 0;
end;

function ToUtf8(const Text: string): RawUtf8;
begin
  result := CurrentAnsiConvert.AnsiToUtf8(Text);
end;

{$ifndef PUREMORMOT2}

function StringToRawUnicode(const S: string): RawUnicode;
begin
  result := CurrentAnsiConvert.AnsiToRawUnicode(S);
end;

function StringToRawUnicode(P: PChar; L: integer): RawUnicode;
begin
  result := CurrentAnsiConvert.AnsiToRawUnicode(P, L);
end;

function RawUnicodeToString(const U: RawUnicode): string;
begin
  // uses StrLenW() and not length(U) to handle case when was used as buffer
  result := CurrentAnsiConvert.UnicodeBufferToAnsi(Pointer(U), StrLenW(Pointer(U)));
end;

{$endif PUREMORMOT2}

function StringToSynUnicode(const S: string): SynUnicode;
begin
  result := CurrentAnsiConvert.AnsiToUnicodeString(pointer(S), length(S));
end;

procedure StringToSynUnicode(const S: string; var result: SynUnicode);
begin
  result := CurrentAnsiConvert.AnsiToUnicodeString(pointer(S), length(S));
end;

function RawUnicodeToString(P: PWideChar; L: integer): string;
begin
  result := CurrentAnsiConvert.UnicodeBufferToAnsi(P, L);
end;

procedure RawUnicodeToString(P: PWideChar; L: integer; var result: string);
begin
  result := CurrentAnsiConvert.UnicodeBufferToAnsi(P, L);
end;

function SynUnicodeToString(const U: SynUnicode): string;
begin
  result := CurrentAnsiConvert.UnicodeBufferToAnsi(Pointer(U), length(U));
end;

function Utf8DecodeToString(P: PUtf8Char; L: integer): string;
begin
  CurrentAnsiConvert.Utf8BufferToAnsi(P, L, RawByteString(result));
end;

procedure Utf8DecodeToString(P: PUtf8Char; L: integer; var result: string);
begin
  CurrentAnsiConvert.Utf8BufferToAnsi(P, L, RawByteString(result));
end;

function Utf8ToString(const Text: RawUtf8): string;
begin
  result := CurrentAnsiConvert.Utf8ToAnsi(Text);
end;

procedure Utf8ToStringVar(const Text: RawUtf8; var result: string);
begin
  result := CurrentAnsiConvert.Utf8ToAnsi(Text);
end;

procedure Utf8ToFileName(const Text: RawUtf8; var result: TFileName);
begin
  result := CurrentAnsiConvert.Utf8ToAnsi(Text);
end;

{$endif UNICODE}

function ToUtf8(const Ansi7Text: ShortString): RawUtf8;
begin
  FastSetString(result, @Ansi7Text[1], ord(Ansi7Text[0]));
end;

{$ifdef HASVARUSTRING} // some UnicodeString dedicated functions

function UnicodeStringToUtf8(const S: UnicodeString): RawUtf8;
begin
  RawUnicodeToUtf8(pointer(S), Length(S), result);
end;

function Utf8DecodeToUnicodeString(const S: RawUtf8): UnicodeString;
begin
  Utf8DecodeToUnicodeString(pointer(S), Length(S), result);
end;

procedure Utf8DecodeToUnicodeString(P: PUtf8Char; L: integer; var result: UnicodeString);
var
  tmp: TSynTempBuffer;
begin
  if (P = nil) or
     (L = 0) then
    result := ''
  else
  begin
    tmp.Init(L * 3); // maximum posible unicode size (if all <#128)
    SetString(result, PWideChar(tmp.buf), Utf8ToWideChar(tmp.buf, P, L) shr 1);
    tmp.Done;
  end;
end;

function UnicodeStringToWinAnsi(const S: UnicodeString): WinAnsiString;
begin
  result := WinAnsiConvert.UnicodeBufferToAnsi(pointer(S), Length(S));
end;

function Utf8DecodeToUnicodeString(P: PUtf8Char; L: integer): UnicodeString;
begin
  Utf8DecodeToUnicodeString(P, L, result);
end;

function WinAnsiToUnicodeString(WinAnsi: PAnsiChar; WinAnsiLen: PtrInt): UnicodeString;
begin
  SetString(result, nil, WinAnsiLen);
  WinAnsiConvert.AnsiBufferToUnicode(pointer(result), WinAnsi, WinAnsiLen);
end;

function WinAnsiToUnicodeString(const WinAnsi: WinAnsiString): UnicodeString;
begin
  result := WinAnsiToUnicodeString(pointer(WinAnsi), Length(WinAnsi));
end;

{$endif HASVARUSTRING}

function Utf8DecodeToUnicodeRawByteString(P: PUtf8Char; L: integer): RawByteString;
begin
  if (P <> nil) and
     (L <> 0) then
  begin
    FastNewRawByteString(result, L * 3);
    FakeSetLength(result, Utf8ToWideChar(pointer(result), P, L));
  end
  else
    result := '';
end;

function Utf8DecodeToUnicodeRawByteString(const U: RawUtf8): RawByteString;
begin
  result := Utf8DecodeToUnicodeRawByteString(pointer(U), length(U));
end;

function Utf8DecodeToUnicodeStream(P: PUtf8Char; L: integer): TStream;
begin
  result := TRawByteStringStream.Create(Utf8DecodeToUnicodeRawByteString(P, L));
end;

function WinAnsiToSynUnicode(WinAnsi: PAnsiChar; WinAnsiLen: PtrInt): SynUnicode;
begin
  SetString(result, nil, WinAnsiLen);
  WinAnsiConvert.AnsiBufferToUnicode(pointer(result), WinAnsi, WinAnsiLen);
end;

function WinAnsiToSynUnicode(const WinAnsi: WinAnsiString): SynUnicode;
begin
  result := WinAnsiToSynUnicode(pointer(WinAnsi), Length(WinAnsi));
end;

procedure UniqueRawUtf8ZeroToTilde(var u: RawUtf8; MaxSize: PtrInt);
var
  i: PtrInt;
begin
  i := length(u);
  if i > MaxSize then
    PByteArray(u)[MaxSize] := 0
  else
    MaxSize := i;
  for i := 0 to MaxSize - 1 do
    if PByteArray(u)[i] = 0 then
      PByteArray(u)[i] := ord('~');
end;

const
  ZEROED_CW = '~'; // any byte would do - followed by ~ or 0

function UnZeroed(const bin: RawByteString): RawUtf8;
var
  len, z, c: PtrInt;
  a: AnsiChar;
  s, d: PAnsiChar;
begin
  result := '';
  len := length(bin);
  if len = 0 then
    exit;
  s := pointer(bin);
  z := StrLen(s);
  c := ByteScanIndex(pointer(s), len, ord(ZEROED_CW));
  if (z = len) and
     (c < 0) then
  begin
    result := bin; // nothing to convert
    exit;
  end;
  if (c < 0) or
     (z < c) then
    c := z;
  FastSetString(result, len shl 1);
  d := pointer(result);
  MoveFast(s^, d^, c);
  inc(s, c);
  inc(d, c);
  dec(len, c);
  repeat
    a := s^;
    if a = #0 then
    begin
      d^ := ZEROED_CW;
      inc(d);
      a := '0';
    end
    else if a = ZEROED_CW then
    begin
      d^ := ZEROED_CW;
      inc(d);
    end;
    d^ := a;
    inc(d);
    inc(s);
    dec(len);
  until len = 0;
  FakeLength(result, d - pointer(result));
end;

function Zeroed(const u: RawUtf8): RawByteString;
var
  len, c: PtrInt;
  a: AnsiChar;
  s, d: PAnsiChar;
begin
  result := '';
  len := length(u);
  if len = 0 then
    exit;
  s := pointer(u);
  c := ByteScanIndex(pointer(s), len, ord(ZEROED_CW));
  if c < 0 then
  begin
    result := u;
    exit;
  end;
  FastNewRawByteString(result, len);
  d := pointer(result);
  MoveFast(s^, d^, c);
  inc(s, c);
  inc(d, c);
  dec(len, c);
  repeat
    a := s^;
    if a = ZEROED_CW then
    begin
      inc(s);
      dec(len);
      if s^ = '0' then
        a := #0;
    end;
    d^ := a;
    inc(d);
    inc(s);
    dec(len);
  until len = 0;
  FakeLength(result, d - pointer(result));
end;

procedure Utf8ToWideString(const Text: RawUtf8; var result: WideString);
begin
  Utf8ToWideString(pointer(Text), Length(Text), result);
end;

function Utf8ToWideString(const Text: RawUtf8): WideString;
begin
  {$ifdef FPC}
  Finalize(result);
  {$endif FPC}
  Utf8ToWideString(pointer(Text), Length(Text), result);
end;

procedure Utf8ToWideString(Text: PUtf8Char; Len: PtrInt; var result: WideString);
var
  tmp: TSynTempBuffer;
begin
  if (Text = nil) or
     (Len = 0) then
    result := ''
  else
  begin
    tmp.Init(Len * 3); // maximum posible unicode size (if all <#128)
    SetString(result, PWideChar(tmp.buf), Utf8ToWideChar(tmp.buf, Text, Len) shr 1);
    tmp.Done;
  end;
end;

function WideStringToUtf8(const aText: WideString): RawUtf8;
begin
  RawUnicodeToUtf8(pointer(aText), length(aText), result);
end;

function Utf8ToSynUnicode(const Text: RawUtf8): SynUnicode;
begin
  Utf8ToSynUnicode(pointer(Text), length(Text), result);
end;

procedure Utf8ToSynUnicode(const Text: RawUtf8; var result: SynUnicode);
begin
  Utf8ToSynUnicode(pointer(Text), length(Text), result);
end;

procedure Utf8ToSynUnicode(Text: PUtf8Char; Len: PtrInt; var result: SynUnicode);
var
  tmp: TSynTempBuffer;
  n: PtrInt;
begin
  n := Utf8DecodeToUnicode(Text, Len, tmp);
  SetString(result, PWideChar(tmp.buf), n);
  if n <> 0 then
    tmp.Done;
end;

function Utf8DecodeToUnicode(const Text: RawUtf8; var temp: TSynTempBuffer): PtrInt;
begin
  result := Utf8DecodeToUnicode(pointer(Text), length(Text), temp);
end;

function Utf8DecodeToUnicode(Text: PUtf8Char; Len: PtrInt; var temp: TSynTempBuffer): PtrInt;
begin
  if (Text = nil) or
     (Len <= 0) then
  begin
    temp.buf := nil;
    temp.len := 0;
    result := 0;
  end
  else
  begin
    temp.Init(Len * 3); // maximum posible unicode size (if all <#128)
    result := Utf8ToWideChar(temp.buf, Text, Len) shr 1;
  end;
end;


{ **************** Text Case-(in)sensitive Conversion and Comparison }

function IdemPropNameUSameLenNotNull(P1, P2: PUtf8Char; P1P2Len: PtrInt): boolean;
label
  zero;
begin
  {$ifndef CPUX86}
  result := false;
  {$endif CPUX86}
  pointer(P1P2Len) := @P1[P1P2Len - SizeOf(cardinal)];
  dec(PtrUInt(P2), PtrUInt(P1));
  while PtrUInt(P1P2Len) >= PtrUInt(P1) do
    // compare 4 Bytes per loop
    if (PCardinal(P1)^ xor PCardinal(@P2[PtrUInt(P1)])^) and $dfdfdfdf <> 0 then
      goto zero
    else
      inc(PCardinal(P1));
  inc(P1P2Len, SizeOf(cardinal));
  while PtrUInt(P1) < PtrUInt(P1P2Len) do
    if (ord(P1^) xor ord(P2[PtrUInt(P1)])) and $df <> 0 then
      goto zero
    else
      inc(PByte(P1));
  result := true;
  exit;
zero:
  {$ifdef CPUX86}
  result := false;
  {$endif CPUX86}
end;

function PropNameValid(P: PUtf8Char): boolean;
var
  tab: PTextCharSet;
{%H-}begin
  tab := @TEXT_CHARS;
  if (P <> nil) and
     (tcIdentifierFirstChar in tab[P^]) then
    // first char must be in ['_', 'a'..'z', 'A'..'Z']
    repeat
      inc(P); // following chars can be ['_', '0'..'9', 'a'..'z', 'A'..'Z']
      if tcIdentifier in tab[P^] then
        continue;
      result := P^ = #0;
      exit;
    until false
  else
    result := false;
end;

function PropNamesValid(const Values: array of RawUtf8): boolean;
var
  i, j: PtrInt;
  tab: PTextCharSet;
begin
  result := false;
  tab := @TEXT_CHARS;
  for i := 0 to high(Values) do
    for j := 1 to length(Values[i]) do
      if not (tcIdentifier in tab[Values[i][j]]) then
        exit; // not ['_', '0'..'9', 'a'..'z', 'A'..'Z']
  result := true;
end;

function PropNameSanitize(const text, fallback: RawUtf8): RawUtf8;
var
  i: PtrInt;
begin
  result := text;
  for i := length(result) downto 1 do
    if result[i] in [#0 .. ' ', '#', '"', '''', '*'] then
      delete(result, i, 1);
  for i := 1 to length(result) do
    if result[i] in ['[', ']', '/', '\', '&', '@', '+', '-', '.'] then
      result[i] := '_';
  if not PropNameValid(pointer(result)) then
    result := fallback; // it was not good enough
end;

function IdemPropName(const P1, P2: ShortString): boolean;
begin
  result := (P1[0] = P2[0]) and
            ((P1[0] = #0) or
             (((ord(P1[1]) xor ord(P2[1])) and $df = 0) and
              IdemPropNameUSameLenNotNull(@P1[1], @P2[1], ord(P2[0]))));
end;

function IdemPropName(const P1: ShortString; P2: PUtf8Char; P2Len: PtrInt): boolean;
begin
  result := (ord(P1[0]) = P2Len) and
            ((P2Len = 0) or
             IdemPropNameUSameLenNotNull(@P1[1], P2, P2Len));
end;

function IdemPropName(P1, P2: PUtf8Char; P1Len, P2Len: PtrInt): boolean;
begin
  result := (P1Len = P2Len) and
            ((P1Len = 0) or
             IdemPropNameUSameLenNotNull(P1, P2, P1Len));
end;

function IdemPropNameU(const P1: RawUtf8; P2: PUtf8Char; P2Len: PtrInt): boolean;
begin
  if PtrUInt(P1) <> 0 then
    result := (PStrLen(PAnsiChar(pointer(P1)) - _STRLEN)^ = P2Len) and
              ((PByte(P1)^ xor PByte(P2)^) and $df = 0) and
              IdemPropNameUSameLenNotNull(pointer(P1), pointer(P2), P2Len)
  else
    result := P2Len = 0;
end;

function IdemPropNameU(const P1, P2: RawUtf8): boolean;
var
  L: TStrLen;
begin
  if PtrUInt(P1) <> PtrUInt(P2) then
    if (PtrUInt(P1) <> 0) and
       (PtrUInt(P2) <> 0) then
    begin
      L := PStrLen(PAnsiChar(pointer(P1)) - _STRLEN)^;
      result := (PStrLen(PAnsiChar(pointer(P2)) - _STRLEN)^ = L) and
                ((PByte(P1)^ xor PByte(P2)^) and $df = 0) and
                IdemPropNameUSameLenNotNull(pointer(P1), pointer(P2), L);
    end
    else
      result := false
  else
    result := true;
end;

function IdemPChar(p: PUtf8Char; up: PAnsiChar): boolean;
var
  {$ifdef CPUX86NOTPIC}
  table: TNormTable absolute NormToUpperAnsi7;
  {$else}
  table: PNormTable; // faster on PIC/ARM and x86_64
  {$endif CPUX86NOTPIC}
begin
  result := false;
  if p = nil then
    exit;
  if up <> nil then
  begin
    dec(PtrUInt(p), PtrUInt(up));
    {$ifndef CPUX86NOTPIC}
    table := @NormToUpperAnsi7;
    {$endif CPUX86NOTPIC}
    while true do
      if up^ = #0 then
        break
      else if table[up[PtrUInt(p)]] <> up^ then
        exit
      else
        inc(up);
  end;
  result := true;
end;

function IdemPChar(p: PUtf8Char; up: PAnsiChar; table: PNormTable): boolean;
begin
  result := false;
  if p = nil then
    exit;
  if up <> nil then
  begin
    dec(PtrUInt(p), PtrUInt(up));
    while true do
      if up^ = #0 then
        break
      else if table[up[PtrUInt(p)]] <> up^ then
        exit
      else
        inc(up);
  end;
  result := true;
end;

function IdemPCharAnsi(
  {$ifdef CPUX86NOTPIC}
  const table: TNormTable;
  {$else}
  const table: PNormTable;
  {$endif CPUX86NOTPIC}
  p: PUtf8Char; up: PAnsiChar): boolean;
  {$ifdef HASINLINE}inline;{$endif}
begin
  // in this local IdemPChar() version, p and up are expected to be <> nil
  result := false;
  dec(PtrUInt(p), PtrUInt(up));
  while true do
    if up^ = #0 then
      break
    else if table[up[PtrUInt(p)]] <> up^ then
      exit
    else
      inc(up);
  result := true;
end;

function IdemPCharByte(
  {$ifdef CPUX86NOTPIC}
  const table: TNormTableByte;
  {$else}
  const table: PByteArray;
  {$endif CPUX86NOTPIC}
  p: PUtf8Char; up: PAnsiChar): boolean;
  {$ifdef HASINLINE}inline;{$endif}
begin
  // in this local IdemPChar() version, p and up are expected to be <> nil
  result := false;
  dec(PtrUInt(p), PtrUInt(up));
  while true do
    if up^ = #0 then
      break
    else if table[PtrInt(up[PtrUInt(p)])] <> PByte(up)^ then
      exit
    else
      inc(up);
  result := true;
end;

function IdemPCharWithoutWhiteSpace(p: PUtf8Char; up: PAnsiChar): boolean;
begin
  result := False;
  if p = nil then
    exit;
  if up <> nil then
    while up^ <> #0 do
    begin
      while p^ <= ' ' do // trim white space
        if p^ = #0 then
          exit
        else
          inc(p);
      if up^ <> NormToUpperAnsi7[p^] then
        exit;
      inc(up);
      inc(p);
    end;
  result := true;
end;

function IdemPCharArray(p: PUtf8Char; const upArray: array of PAnsiChar): integer;
var
  w: word;
  up: ^PAnsiChar;
  {$ifdef CPUX86NOTPIC}
  tab: TNormTableByte absolute NormToUpperAnsi7;
  {$else}
  tab: PByteArray; // faster on PIC/ARM and x86_64
  {$endif CPUX86NOTPIC}
begin
  if p <> nil then
  begin
    {$ifndef CPUX86NOTPIC}
    tab := @NormToUpperAnsi7;
    {$endif CPUX86NOTPIC}
    w := tab[ord(p[0])] + tab[ord(p[1])] shl 8;
    up := @upArray[0];
    for result := 0 to high(upArray) do
      if (PWord(up^)^ = w) and
         IdemPCharByte(tab, p + 2, up^ + 2) then
        exit
      else
        inc(up);
  end;
  result := -1;
end;

function IdemPPChar(p: PUtf8Char; up: PPAnsiChar): PtrInt;
var
  w: word;
  u: PAnsiChar;
  p2: PtrUInt;
  c: byte;
  {$ifdef CPUX86NOTPIC}
  tab: TNormTableByte absolute NormToUpperAnsi7;
  {$else}
  tab: PByteArray; // faster on PIC/ARM and x86_64
  {$endif CPUX86NOTPIC}
begin
  if p <> nil then
  begin
    // uppercase the first two p^ chars
    {$ifndef CPUX86NOTPIC}
    tab := @NormToUpperAnsi7;
    {$endif CPUX86NOTPIC}
    w := tab[ord(p[0])] + tab[ord(p[1])] shl 8;
    result := 0;
    repeat
      // quickly check the first 2 up^[result] chars
      u := PPointerArray(up)[result];
      if u = nil then
        break
      else if PWord(u)^ <> w then
      begin
        inc(result);
        continue;
      end;
      // inlined if IdemPCharByte(tab, p + 2, up^ + 2) then exit
      p2 := PtrUInt(p);
      dec(p2, PtrUInt(u));
      inc(u, 2);
      repeat
        c := PByte(u)^;
        if c = 0 then
          exit   // found IdemPChar(p^, up^[result])
        else if tab[PtrUInt(u[p2])] <> c then
          break; // at least one char doesn't match
        inc(u);
      until false;
      inc(result);
    until false;
  end;
  result := -1;
end;

function IdemPCharArrayBy2(p: PUtf8Char; const upArrayBy2Chars: RawUtf8): PtrInt;
begin
  if p <> nil then
    result := WordScanIndex(pointer(upArrayBy2Chars), length(upArrayBy2Chars) shr 1,
      NormToUpperAnsi7Byte[ord(p[0])] + NormToUpperAnsi7Byte[ord(p[1])] shl 8)
  else
    result := -1;
end;

function IdemPCharU(p, up: PUtf8Char): boolean;
begin
  result := false;
  if (p = nil) or
     (up = nil) then
    exit;
  while up^ <> #0 do
  begin
    if GetNextUtf8Upper(p) <> ord(up^) then
      exit;
    inc(up);
  end;
  result := true;
end;

function IdemPCharW(p: PWideChar; up: PUtf8Char): boolean;
begin
  result := false;
  if (p = nil) or
     (up = nil) then
    exit;
  while up^ <> #0 do
  begin
    if (p^ > #255) or
       (up^ <> AnsiChar(NormToUpperByte[ord(p^)])) then
      exit;
    inc(up);
    inc(p);
  end;
  result := true;
end;

function StartWith(const text, upTextStart: RawUtf8): boolean;
begin
  result := (PtrUInt(text) <> 0) and
            (PtrUInt(upTextStart) <> 0) and
            (PStrLen(PAnsiChar(pointer(text)) - _STRLEN)^ >=
              PStrLen(PAnsiChar(pointer(upTextStart)) - _STRLEN)^) and
            IdemPChar(pointer(text), pointer(upTextStart));
end;

function EndWith(const text, upTextEnd: RawUtf8): boolean;
var
  o: PtrInt;
begin
  o := length(text) - length(upTextEnd);
  result := (o >= 0) and
            IdemPChar(PUtf8Char(pointer(text)) + o, pointer(upTextEnd));
end;

function EndWithArray(const text: RawUtf8; const upArray: array of RawUtf8): integer;
var
  t, o: PtrInt;
  {$ifdef CPUX86NOTPIC}
  tab: TNormTableByte absolute NormToUpperAnsi7;
  {$else}
  tab: PByteArray; // faster on PIC/ARM and x86_64
  {$endif CPUX86NOTPIC}
begin
  t := length(text);
  if t > 0 then
  begin
    {$ifndef CPUX86NOTPIC}
    tab := @NormToUpperAnsi7;
    {$endif CPUX86NOTPIC}
    for result := 0 to high(upArray) do
    begin
      o := t - length(upArray[result]);
      if (o >= 0) and
         ((upArray[result] = '') or
          IdemPCharByte(tab, PUtf8Char(pointer(text)) + o,
            pointer(upArray[result]))) then
        exit;
    end;
  end;
  result := -1;
end;

function IdemFileExt(p: PUtf8Char; extup: PAnsiChar; sepChar: AnsiChar): boolean;
var
  ext: PUtf8Char;
begin
  if (p <> nil) and
     (extup <> nil) then
  begin
    ext := nil;
    repeat
      if p^ = sepChar then
        ext := p; // get last '.' position from p into ext
      inc(p);
    until p^ = #0;
    result := IdemPChar(ext, extup);
  end
  else
    result := false;
end;

function IdemFileExts(p: PUtf8Char; const extup: array of PAnsiChar;
  sepChar: AnsiChar): integer;
var
  ext: PUtf8Char;
begin
  result := -1;
  if (p <> nil) and
     (high(extup) > 0) then
  begin
    ext := nil;
    repeat
      if p^ = sepChar then
        ext := p; // get last '.' position from p into ext
      inc(p);
    until p^ = #0;
    if ext <> nil then
      result := IdemPCharArray(ext, extup);
  end;
end;

function PosCharAny(Str: PUtf8Char; Characters: PAnsiChar): PUtf8Char;
var
  s: PAnsiChar;
  c: AnsiChar;
begin
  if (Str <> nil) and
     (Characters <> nil) and
     (Characters^ <> #0) then
    repeat
      c := Str^;
      if c = #0 then
        break;
      result := Str;
      s := Characters;
      repeat
        if s^ = c then
          exit;
        inc(s);
      until s^ = #0;
      inc(Str);
    until false;
  result := nil;
end;

function PosI(uppersubstr: PUtf8Char; const str: RawUtf8): PtrInt;
var
  u: AnsiChar;
  {$ifdef CPUX86NOTPIC}
  table: TNormTable absolute NormToUpperAnsi7;
  {$else}
  table: PNormTable;
  {$endif CPUX86NOTPIC}
begin
  if uppersubstr <> nil then
  begin
    {$ifndef CPUX86NOTPIC}
    table := @NormToUpperAnsi7;
    {$endif CPUX86NOTPIC}
    u := uppersubstr^;
    for result := 1 to Length(str) do
      if table[str[result]] = u then
        if IdemPCharAnsi(table, @PUtf8Char(pointer(str))[result],
             PAnsiChar(uppersubstr) + 1) then
          exit;
  end;
  result := 0;
end;

function StrPosI(uppersubstr, str: PUtf8Char): PUtf8Char;
var
  u: AnsiChar;
  {$ifdef CPUX86NOTPIC}
  table: TNormTable absolute NormToUpperAnsi7;
  {$else}
  table: PNormTable;
  {$endif CPUX86NOTPIC}
begin
  if (uppersubstr <> nil) and
     (str <> nil) then
  begin
    {$ifndef CPUX86NOTPIC}
    table := @NormToUpperAnsi7;
    {$endif CPUX86NOTPIC}
    u := uppersubstr^;
    inc(uppersubstr);
    result := str;
    while result^ <> #0 do
    begin
      if table[result^] = u then
        if IdemPCharAnsi(table, result + 1, PAnsiChar(uppersubstr)) then
          exit;
      inc(result);
    end;
  end;
  result := nil;
end;

function PosIU(substr: PUtf8Char; const str: RawUtf8): integer;
var
  p: PUtf8Char;
begin
  if (substr <> nil) and
     (str <> '') then
  begin
    p := pointer(str);
    repeat
      if GetNextUtf8Upper(p) = ord(substr^) then
        if IdemPCharU(p, substr + 1) then
        begin
          result := p - pointer(str);
          exit;
        end;
    until p^ = #0;
  end;
  result := 0;
end;

function strspn(s, accept: pointer): integer;
// FPC is efficient at compiling this code, but is SLOWER when inlined
var
  p: PCardinal;
  c: AnsiChar;
  d: cardinal;
begin
  // returns size of initial segment of s which are in accept
  result := 0;
  repeat
    c := PAnsiChar(s)[result];
    if c = #0 then
      break;
    p := accept;
    repeat // stop as soon as we find any character not from accept
      d := p^;
      inc(p);
      if AnsiChar(d) = c then
        break
      else if AnsiChar(d) = #0 then
        exit;
      d := d shr 8;
      if AnsiChar(d) = c then
        break
      else if AnsiChar(d) = #0 then
        exit;
      d := d shr 8;
      if AnsiChar(d) = c then
        break
      else if AnsiChar(d) = #0 then
        exit;
      d := d shr 8;
      if AnsiChar(d) = c then
        break
      else if AnsiChar(d) = #0 then
        exit;
    until false;
    inc(result);
  until false;
end;

function strcspn(s, reject: pointer): integer;
// FPC is efficient at compiling this code, but is SLOWER when inlined
var
  p: PCardinal;
  c: AnsiChar;
  d: cardinal;
begin
  // returns size of initial segment of s which are not in reject
  result := 0;
  repeat
    c := PAnsiChar(s)[result];
    if c = #0 then
      break;
    p := reject;
    repeat // stop as soon as we find any character from reject
      d := p^;
      inc(p);
      if AnsiChar(d) = c then
        exit
      else if AnsiChar(d) = #0 then
        break;
      d := d shr 8;
      if AnsiChar(d) = c then
        exit
      else if AnsiChar(d) = #0 then
        break;
      d := d shr 8;
      if AnsiChar(d) = c then
        exit
      else if AnsiChar(d) = #0 then
        break;
      d := d shr 8;
      if AnsiChar(d) = c then
        exit
      else if AnsiChar(d) = #0 then
        break;
    until false;
    inc(result);
  until false;
end;

function StrCompL(P1, P2: pointer; L, Default: PtrInt): PtrInt;
var
  i: PtrInt;
begin
  i := 0;
  repeat
    result := PByteArray(P1)[i] - PByteArray(P2)[i];
    if result = 0 then
    begin
      inc(i);
      if i < L then
        continue
      else
        break;
    end;
    exit;
  until false;
  result := Default;
end;

function StrCompIL(P1, P2: pointer; L, Default: PtrInt): PtrInt;
var
  i: PtrInt;
  {$ifdef CPUX86NOTPIC}
  tab: TNormTableByte absolute NormToUpperAnsi7Byte;
  {$else}
  tab: PByteArray; // faster on PIC/ARM and x86_64
  {$endif CPUX86NOTPIC}
begin
  i := 0;
  {$ifndef CPUX86NOTPIC}
  tab := @NormToUpperAnsi7Byte;
  {$endif CPUX86NOTPIC}
  repeat
    if tab[PByteArray(P1)[i]] = tab[PByteArray(P2)[i]] then
    begin
      inc(i);
      if i < L then
        continue
      else
        break;
    end;
    result := PByteArray(P1)[i] - PByteArray(P2)[i];
    exit;
  until false;
  result := Default;
end;

function StrICompNotNil(Str1, Str2: pointer; Up: PNormTableByte): PtrInt;
var
  C1, C2: byte; // integer/PtrInt are actually slower on FPC
begin
  result := PtrInt(PtrUInt(Str2)) - PtrInt(PtrUInt(Str1));
  if result <> 0 then
  begin
    repeat
      C1 := Up[PByteArray(Str1)[0]];
      C2 := Up[PByteArray(Str1)[result]];
      inc(PByte(Str1));
    until (C1 = 0) or
          (C1 <> C2);
    result := C1 - C2;
  end;
end;

function StrICompLNotNil(Str1, Str2: pointer; Up: PNormTableByte; L: PtrInt): PtrInt;
begin
  result := 0;
  repeat
    if Up[PByteArray(Str1)[result]] = Up[PByteArray(Str2)[result]] then
    begin
      inc(result);
      if result < L then
        continue
      else
        break;
    end;
    result := PByteArray(Str1)[result] - PByteArray(Str2)[result];
    exit;
  until false;
  result := 0;
end;

function StrILNotNil(Str1, Str2: pointer; Up: PNormTableByte; L: PtrInt): PtrInt;
begin
  result := 0;
  repeat
    if Up[PByteArray(Str1)[result]] <> Up[PByteArray(Str2)[result]] then
      exit;
    inc(result);
  until result = L;
end;


function StrIComp(Str1, Str2: pointer): PtrInt;
var
  C1, C2: byte; // integer/PtrInt are actually slower on FPC
  {$ifdef CPUX86NOTPIC}
  table: TNormTableByte absolute NormToUpperAnsi7Byte;
  {$else}
  table: PByteArray;
  {$endif CPUX86NOTPIC}
begin
  result := PtrInt(PtrUInt(Str2)) - PtrInt(PtrUInt(Str1));
  if result <> 0 then
    if Str1 <> nil then
      if Str2 <> nil then
      begin
        {$ifndef CPUX86NOTPIC}
        table := @NormToUpperAnsi7Byte;
        {$endif CPUX86NOTPIC}
        repeat
          C1 := table[PByteArray(Str1)[0]];
          C2 := table[PByteArray(Str1)[result]];
          inc(PByte(Str1));
        until (C1 = 0) or
              (C1 <> C2);
        result := C1 - C2;
      end
      else
        // Str2=''
        result := 1
    else
      // Str1=''
      result := -1;
end;

function GetLineContains(p, pEnd, up: PUtf8Char): boolean;
var
  i: PtrInt;
  {$ifdef CPUX86NOTPIC}
  table: TNormTable absolute NormToUpperAnsi7Byte;
  {$else}
  table: PNormTable;
  {$endif CPUX86NOTPIC}
label
  Fnd1, LF1, Fnd2, LF2, Ok; // ugly but fast
begin
  if (p <> nil) and
     (up <> nil) then
  begin
    {$ifndef CPUX86NOTPIC}
    table := @NormToUpperAnsi7;
    {$endif CPUX86NOTPIC}
    if pEnd = nil then
      repeat
        if p^ <= #13 then // p^ into a temp var is slower
          goto LF1
        else if table[p^] = up^ then
          goto Fnd1;
        inc(p);
        continue;
LF1:    if (p^ = #0) or
           (p^ = #13) or
           (p^ = #10) then
          break;
        inc(p);
        continue;
Fnd1:   i := 0;
        repeat
          inc(i);
          if up[i] <> #0 then
            if up[i] = table[p[i]] then
              continue
            else
              break
          else
          begin
Ok:         result := true; // found
            exit;
          end;
        until false;
        inc(p);
      until false
    else
      repeat
        if p >= pEnd then
          break;
        if p^ <= #13 then
          goto LF2
        else if table[p^] = up^ then
          goto Fnd2;
        inc(p);
        continue;
LF2:    if (p^ = #13) or
           (p^ = #10) then
          break;
        inc(p);
        continue;
Fnd2:   i := 0;
        repeat
          inc(i);
          if up[i] = #0 then
            goto Ok;
          if p + i >= pEnd then
            break;
        until up[i] <> table[p[i]];
        inc(p);
      until false;
  end;
  result := false;
end;

function ContainsUtf8(p, up: PUtf8Char): boolean;
var
  u: PByte;
begin
  if (p <> nil) and
     (up <> nil) and
     (up^ <> #0) then
  begin
    result := true;
    repeat
      u := pointer(up);
      repeat
        if GetNextUtf8Upper(p) <> u^ then
          break
        else
          inc(u);
        if u^ = 0 then
          exit; // up^ was found inside p^
      until false;
      p := FindNextUtf8WordBegin(p);
    until p = nil;
  end;
  result := false;
end;

function GetNextUtf8Upper(var U: PUtf8Char): Ucs4CodePoint;
begin
  result := ord(U^);
  if result = 0 then
    exit;
  if result <= 127 then
  begin
    inc(U);
    result := NormToUpperByte[result];
    exit;
  end;
  result := UTF8_TABLE.GetHighUtf8Ucs4(U);
  if (result <= 255) and
     (WinAnsiConvert.AnsiToWide[result] <= 255) then
    result := NormToUpperByte[result];
end;

function FindNextUtf8WordBegin(U: PUtf8Char): PUtf8Char;
var
  c: cardinal;
  V: PUtf8Char;
begin
  result := nil;
  repeat
    c := GetNextUtf8Upper(U);
    if c = 0 then
      exit;
  until (c >= 127) or
        not (tcWord in TEXT_BYTES[c]); // not ['0'..'9', 'a'..'z', 'A'..'Z']
  repeat
    V := U;
    c := GetNextUtf8Upper(U);
    if c = 0 then
      exit;
  until (c < 127) and
        (tcWord in TEXT_BYTES[c]);
  result := V;
end;

function AnsiICompW(u1, u2: PWideChar): PtrInt;
var
  C1, C2: PtrInt;
  {$ifdef CPUX86NOTPIC}
  table: TNormTableByte absolute NormToUpperAnsi7Byte;
  {$else}
  table: PByteArray;
  {$endif CPUX86NOTPIC}
begin
  if u1 <> u2 then
    if u1 <> nil then
      if u2 <> nil then
      begin
        {$ifndef CPUX86NOTPIC}
        table := @NormToUpperAnsi7Byte;
        {$endif CPUX86NOTPIC}
        repeat
          C1 := PtrInt(u1^);
          C2 := PtrInt(u2^);
          result := C1 - C2;
          if result <> 0 then
          begin
            if (C1 > 255) or
               (C2 > 255) then
              exit;
            result := table[C1] - table[C2];
            if result <> 0 then
              exit;
          end;
          if (C1 = 0) or
             (C2 = 0) then
            break;
          inc(u1);
          inc(u2);
        until false;
      end
      else
        result := 1
    else  // u2=''
      result := -1
  else // u1=''
    result := 0;      // u1=u2
end;

function AnsiIComp(Str1, Str2: pointer): PtrInt;
var
  C1, C2: byte; // integer/PtrInt are actually slower on FPC
  lookupper: PByteArray; // better x86-64 / PIC asm generation
begin
  result := PtrInt(PtrUInt(Str2)) - PtrInt(PtrUInt(Str1));
  if result <> 0 then
    if Str1 <> nil then
      if Str2 <> nil then
      begin
        lookupper := @NormToUpperByte;
        repeat
          C1 := lookupper[PByteArray(Str1)[0]];
          C2 := lookupper[PByteArray(Str1)[result]];
          inc(PByte(Str1));
        until (C1 = 0) or
              (C1 <> C2);
        result := C1 - C2;
      end
      else
        result := 1
    else  // Str2=''
      result := -1;     // Str1=''
end;

function SortDynArrayAnsiStringI(const A, B): integer;
begin
  result := StrIComp(PUtf8Char(A), PUtf8Char(B)); // very agressively inlined
end;

function SortDynArrayPUtf8CharI(const A, B): integer;
begin
  result := StrIComp(PUtf8Char(A), PUtf8Char(B));
end;

function SortDynArrayStringI(const A, B): integer;
begin
  {$ifdef UNICODE}
  result := AnsiICompW(PWideChar(A), PWideChar(B));
  {$else}
  result := StrIComp(PUtf8Char(A), PUtf8Char(B));
  {$endif UNICODE}
end;

function SortDynArrayUnicodeStringI(const A, B): integer;
begin
  result := AnsiICompW(PWideChar(A), PWideChar(B));
end;

function ConvertCaseUtf8(P: PUtf8Char; const Table: TNormTableByte): PtrInt;
var
  D, S: PUtf8Char;
  c: PtrUInt;
  extra, i: PtrInt;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
begin
  result := 0;
  if P = nil then
    exit;
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  D := P;
  repeat
    c := byte(P[0]);
    inc(P);
    if c = 0 then
      break;
    if c <= 127 then
    begin
      D[result] := AnsiChar(Table[c]);
      inc(result);
    end
    else
    begin
      extra := utf8.Lookup[c];
      if extra = UTF8_INVALID then
        exit; // invalid leading byte
      i := 0;
      repeat
        if byte(P[i]) and $c0 <> $80 then
          exit; // invalid input content
        c := (c shl 6) + byte(P[i]);
        inc(i);
      until i = extra;
      with utf8.Extra[extra] do
      begin
        dec(c, offset);
        if c < minimum then
          exit; // invalid input content
      end;
      if (c <= 255) and
         (Table[c] <= 127) then
      begin
        D[result] := AnsiChar(Table[c]);
        inc(result);
        inc(P, extra);
        continue;
      end;
      S := P - 1;
      inc(P, extra);
      inc(extra);
      MoveByOne(S, D + result, extra);
      inc(result, extra);
    end;
  until false;
end;

function UpperCaseU(const S: RawUtf8): RawUtf8;
var
  LS, LD: integer;
begin
  LS := length(S);
  FastSetString(result, pointer(S), LS);
  LD := ConvertCaseUtf8(pointer(result), NormToUpperByte);
  if LS <> LD then
    SetLength(result, LD);
end;

function LowerCaseU(const S: RawUtf8): RawUtf8;
var
  LS, LD: integer;
begin
  LS := length(S);
  FastSetString(result, pointer(S), LS);
  LD := ConvertCaseUtf8(pointer(result), NormToLowerByte);
  if LS <> LD then
    SetLength(result, LD);
end;

function Utf8IComp(u1, u2: PUtf8Char): PtrInt;
var
  c2: PtrInt;
  {$ifdef CPUX86NOTPIC}
  table: TNormTableByte absolute NormToUpperByte;
  {$else}
  table: PByteArray;
  {$endif CPUX86NOTPIC}
label
  c2low;
begin
  // fast UTF-8 comparison using the NormToUpper[] array for all 8-bit values
  {$ifndef CPUX86NOTPIC}
  table := @NormToUpperByte;
  {$endif CPUX86NOTPIC}
  if u1 <> u2 then
    if u1 <> nil then
      if u2 <> nil then
        repeat
          result := ord(u1^);
          c2 := ord(u2^);
          if result <= 127 then
            if result <> 0 then
            begin
              inc(u1);
              result := table[result];
              if c2 <= 127 then
              begin
c2low:          if c2 = 0 then
                  exit; // u1>u2 -> return u1^
                inc(u2);
                dec(result, table[c2]);
                if result <> 0 then
                  exit;
                continue;
              end;
            end
            else
            begin
              // u1^=#0 -> end of u1 reached
              if c2 <> 0 then    // end of u2 reached -> u1=u2 -> return 0
                result := -1;    // u1<u2
              exit;
            end
          else
          begin
            if result and $20 = 0 then // fast $0..$7ff process
            begin
              result := (result shl 6) + byte(u1[1]) - $3080;
              inc(u1, 2);
            end
            else
              result := UTF8_TABLE.GetHighUtf8Ucs4(u1);
            if result <= 255 then
              result := table[result]; // 8-bit to upper, 32-bit as is
          end;
          if c2 <= 127 then
            goto c2low
          else if c2 and $20 = 0 then // fast $0..$7ff process
          begin
            c2 := (c2 shl 6) + byte(u2[1]) - $3080;
            inc(u2, 2);
          end
          else
            c2 := UTF8_TABLE.GetHighUtf8Ucs4(u2);
          if c2 <= 255 then
            c2 := table[c2]; // 8-bit to upper
          dec(result, c2);
          if result <> 0 then
            exit;
        until false
      else
        result := 1 // u2=''
    else
      result := -1  // u1=''
  else
    result := 0;    // u1=u2
end;

function Utf8ILComp(u1, u2: PUtf8Char; L1, L2: cardinal): PtrInt;
var
  c2: PtrInt;
  extra, i: integer;
  {$ifdef CPUX86NOTPIC}
  table: TNormTableByte absolute NormToUpperByte;
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  table: PByteArray;
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  neg, pos;
begin
  // fast UTF-8 comparison using the NormToUpper[] array for all 8-bit values
  {$ifndef CPUX86NOTPIC}
  table := @NormToUpperByte;
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  if u1 <> u2 then
    if (u1 <> nil) and
       (L1 <> 0) then
      if (u2 <> nil) and
         (L2 <> 0) then
        repeat
          result := ord(u1^);
          c2 := ord(u2^);
          inc(u1);
          dec(L1);
          if result <= 127 then
          begin
            result := table[result];
            if c2 <= 127 then
            begin
              // 'a'..'z' / 'A'..'Z' case insensitive comparison
              dec(result, table[c2]);
              dec(L2);
              inc(u2);
              if result <> 0 then
                // found unmatching char
                exit
              else if L1 <> 0 then
                if L2 <> 0 then
                  // L1>0 and L2>0 -> next char
                  continue
                else
                  // L1>0 and L2=0 -> u1>u2
                  goto pos
              else
              if L2 <> 0 then
                // L1=0 and L2>0 -> u1<u2
                goto neg
              else
                // L1=0 and L2=0 -> u1=u2 -> returns 0
                exit;
            end;
          end
          else
          begin
            // Win-1252 case insensitive comparison
            extra := utf8.Lookup[result];
            if extra = UTF8_INVALID then
              // invalid leading byte
              goto neg;
            dec(L1, extra);
            if integer(L1) < 0 then
              goto neg;
            i := 0;
            repeat
              result := result shl 6;
              inc(result, ord(u1[i]));
              inc(i);
            until i = extra;
            inc(u1, extra);
            dec(result, utf8.Extra[extra].offset);
            if result and $ffffff00 = 0 then
              // 8-bit to upper conversion, 32-bit as is
              result := table[result];
          end;
          // here result=NormToUpper[u1^]
          inc(u2);
          dec(L2);
          if c2 <= 127 then
          begin
            dec(result, table[c2]);
            if result <> 0 then
              // found unmatching char
              exit;
          end
          else
          begin
            extra := utf8.Lookup[c2];
            if extra = UTF8_INVALID then
              goto pos;
            dec(L2, extra);
            if integer(L2) < 0 then
              goto pos;
            i := 0;
            repeat
              c2 := c2 shl 6;
              inc(c2, ord(u2[i]));
              inc(i);
            until i = extra;
            inc(u2, extra);
            dec(c2, utf8.Extra[extra].offset);
            if c2 and $ffffff00 = 0 then
              // 8-bit to upper
              dec(result, table[c2])
            else
              // returns 32-bit diff
              dec(result, c2);
            if result <> 0 then
              // found unmatching char
              exit;
          end;
          // here we have result=NormToUpper[u2^]-NormToUpper[u1^]=0
          if L1 = 0 then
            // test if we reached end of u1 or end of u2
            if L2 = 0 then
              // u1=u2
              exit
            else
              // u1<u2
              goto neg
          else
          if L2 = 0 then
            // u1>u2
            goto pos;
        until false
      else
pos:    // u2='' or u1>u2
        result := 1
    else
neg:  // u1='' or u1<u2
      result := -1
  else
    // u1=u2
    result := 0;
end;

function SameTextU(const S1, S2: RawUtf8): boolean;
// checking UTF-8 lengths is not accurate: surrogates may be confusing
begin
  result := Utf8IComp(pointer(S1), pointer(S2)) = 0;
end;

function FindAnsi(A, UpperValue: PAnsiChar): boolean;
var
  ValueStart: PAnsiChar;
begin
  result := false;
  if (A = nil) or
     (UpperValue = nil) then
    exit;
  ValueStart := UpperValue;
  repeat
    // test beginning of word
    repeat
      if A^ = #0 then
        exit
      else if tcWord in TEXT_CHARS[NormToUpper[A^]] then
        break
      else
        inc(A);
    until false;
    // check if this word is the UpperValue
    UpperValue := ValueStart;
    repeat
      if NormToUpper[A^] <> UpperValue^ then
        break;
      inc(UpperValue);
      if UpperValue^ = #0 then
      begin
        result := true; // UpperValue found!
        exit;
      end;
      inc(A);
      if A^ = #0 then
        exit;
    until false;
    // find beginning of next word
    repeat
      if A^ = #0 then
        exit
      else if not (tcWord in TEXT_CHARS[NormToUpper[A^]]) then
        break
      else
        inc(A);
    until false;
  until false;
end;

function FindUnicode(PW, Upper: PWideChar; UpperLen: PtrInt): boolean;
var
  Start: PWideChar;
  w: PtrUInt;
begin
  result := false;
  if (PW = nil) or
     (Upper = nil) then
    exit;
  repeat
    // go to beginning of next word
    repeat
      w := ord(PW^);
      if w = 0 then
        exit
      else if (w > 126) or
              (tcWord in TEXT_BYTES[w]) then
        Break;
      inc(PW);
    until false;
    Start := PW;
    // search end of word matching UpperLen characters
    repeat
      inc(PW);
      w := ord(PW^);
    until (PW - Start >= UpperLen) or
          (w = 0) or
          ((w < 127) and
           not (tcWord in TEXT_BYTES[w]));
    if PW - Start >= UpperLen then
      if Unicode_CompareString(Start, Upper, UpperLen, UpperLen,
           {ignorecase=}true) = 2 then
      begin
        result := true; // case-insensitive match found
        exit;
      end;
    // not found: go to end of current word
    repeat
      w := ord(PW^);
      if w = 0 then
        exit
      else if ((w < 127) and
              not (tcWord in TEXT_BYTES[w])) then
        Break;
      inc(PW);
    until false;
  until false;
end;

function FindUtf8(U: PUtf8Char; UpperValue: PAnsiChar): boolean;
var
  ValueStart: PAnsiChar;
  c: PtrUInt;
  FirstChar: AnsiChar;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  Next;
begin
  result := false;
  if (U = nil) or
     (UpperValue = nil) then
    exit;
  // handles 8-bits WinAnsi chars inside UTF-8 encoded data
  {$ifndef CPUX86NOTPIC}
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  FirstChar := UpperValue^;
  ValueStart := UpperValue + 1;
  repeat
    // test beginning of word
    repeat
      c := byte(U^);
      inc(U);
      if c = 0 then
        exit;
      if c <= 127 then
      begin
        if tcWord in TEXT_BYTES[c] then
          if PAnsiChar(@NormToUpper)[c] <> FirstChar then
            goto Next
          else
            break;
      end
      else if c and $20 = 0 then
      begin
        // fast direct process of $0..$7ff codepoints including accents
        c := ((c shl 6) + byte(U^)) - $3080;
        inc(U);
        if c <= 255 then
        begin
          c := NormToUpperByte[c];
          if tcWord in TEXT_BYTES[c] then
            if AnsiChar(c) <> FirstChar then
              goto Next
            else
              break;
        end;
      end
      else
      begin
        c := utf8.Lookup[c];
        if c = UTF8_INVALID then
          exit
        else
          // just ignore surrogates for soundex
          inc(U, c);
      end;
      until false;
    // here we had the first char match -> check if this word match UpperValue
    UpperValue := ValueStart;
    repeat
      if UpperValue^ = #0 then
      begin
        result := true; // UpperValue found!
        exit;
      end;
      c := byte(U^);
      inc(U); // next chars
      if c = 0 then
        exit
      else if c <= 127 then
      begin
        if PAnsiChar(@NormToUpper)[c] <> UpperValue^ then
          break;
      end
      else if c and $20 = 0 then
      begin
        c := ((c shl 6) + byte(U^)) - $3080;
        inc(U);
        if (c > 255) or
           (PAnsiChar(@NormToUpper)[c] <> UpperValue^) then
          break;
      end
      else
      begin
        c := utf8.Lookup[c];
        if c = UTF8_INVALID then
          exit
        else
          inc(U, c);
        break;
      end;
      inc(UpperValue);
    until false;
Next: // find beginning of next word
    U := FindNextUtf8WordBegin(U);
  until U = nil;
end;

function UpperCopy255(dest: PAnsiChar; const source: RawUtf8): PAnsiChar;
begin
  if source <> '' then
    result := UpperCopy255Buf(
      dest, pointer(source), PStrLen(PAnsiChar(pointer(source)) - _STRLEN)^)
  else
    result := dest;
end;

function UpperCopy255Buf(dest: PAnsiChar; source: PUtf8Char; sourceLen: PtrInt): PAnsiChar;
var
  i, c, d {$ifdef CPU64}, _80, _61, _7b {$endif}: PtrUInt;
begin
  if sourceLen <> 0 then
  begin
    if sourceLen > 248 then
      sourceLen := 248; // avoid buffer overflow
    // we allow to copy up to 3/7 more chars in Dest^ since its size is 255
    {$ifdef CPU64}
    // unbranched uppercase conversion of 8 chars blocks
    _80 := PtrUInt($8080808080808080); // use registers for constants
    _61 := $6161616161616161;
    _7b := $7b7b7b7b7b7b7b7b;
    for i := 0 to (sourceLen - 1) shr 3 do
    begin
      c := PPtrUIntArray(source)^[i];
      d := c or _80;
      PPtrUIntArray(dest)^[i] := c - ((d - _61) and
        not (d - _7b)) and ((not c) and _80) shr 2;
    end;
    {$else}
    // unbranched uppercase conversion of 4 chars blocks
    for i := 0 to (sourceLen - 1) shr 2 do
    begin
      c := PPtrUIntArray(source)^[i];
      d := c or PtrUInt($80808080);
      PPtrUIntArray(dest)^[i] := c - ((d - PtrUInt($61616161)) and
        not(d - PtrUInt($7b7b7b7b))) and ((not c) and PtrUInt($80808080)) shr 2;
    end;
    {$endif CPU64}
  end;
  result := dest + sourceLen; // return the exact size
end;

function UpperCopyWin255(dest: PWinAnsiChar; const source: RawUtf8): PWinAnsiChar;
var
  i, L: PtrInt;
  {$ifdef CPUX86NOTPIC}
  tab: TNormTableByte absolute NormToUpperByte;
  {$else}
  tab: PByteArray; // faster on PIC/ARM and x86_64
  {$endif CPUX86NOTPIC}
begin
  if source = '' then
    result := dest
  else
  begin
    L := PStrLen(PAnsiChar(pointer(source)) - _STRLEN)^;
    if L > 250 then
      L := 250; // avoid buffer overflow
    result := dest + L;
    {$ifndef CPUX86NOTPIC}
    tab := @NormToUpperByte;
    {$endif CPUX86NOTPIC}
    for i := 0 to L - 1 do
      dest[i] := AnsiChar(tab[PByteArray(source)[i]]);
  end;
end;

function Utf8UpperCopy(Dest, Source: PUtf8Char; SourceChars: cardinal): PUtf8Char;
var
  c: cardinal;
  endSource, endSourceBy4, up: PUtf8Char;
  extra, i: PtrInt;
  {$ifdef CPUX86NOTPIC}
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  by1, by4, set1; // ugly but faster
begin
  if (Source <> nil) and
     (Dest <> nil) then
  begin
    {$ifndef CPUX86NOTPIC}
    utf8 := @UTF8_TABLE;
    {$endif CPUX86NOTPIC}
    // first handle trailing 7-bit ASCII chars, by quad (Sha optimization)
    endSource := Source + SourceChars;
    endSourceBy4 := endSource - 4;
    up := @NormToUpper;
    if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(Source) and 3 = 0) and{$endif}
       (Source <= endSourceBy4) then
      repeat
        c := PCardinal(Source)^;
        if c and $80808080 <> 0 then
          goto by1; // break on first non ASCII quad
by4:    inc(Source, 4);
        Dest[0] := up[ToByte(c)];
        Dest[1] := up[ToByte(c shr 8)];
        Dest[2] := up[ToByte(c shr 16)];
        Dest[3] := up[ToByte(c shr 24)];
        inc(Dest, 4);
      until Source > endSourceBy4;
    // generic loop, handling one UCS4 CodePoint per iteration
    if Source < endSource then
      repeat
by1:    c := byte(Source^);
        inc(Source);
        if c <= 127 then
        begin
          Dest^ := up[c];
set1:     inc(Dest);
          if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(Source) and 3 = 0) and{$endif}
             (Source <= endSourceBy4) then
          begin
            c := PCardinal(Source)^;
            if c and $80808080 = 0 then
              goto by4;
            continue;
          end
          else if Source < endSource then
            continue
          else
            break;
        end
        else
        begin
          extra := utf8.Lookup[c];
          if (extra = UTF8_INVALID) or
             (Source + extra > endSource) then
            break;
          i := 0;
          repeat
            c := (c shl 6) + byte(Source[i]);
            inc(i)
          until i = extra;
          with utf8.Extra[extra] do
          begin
            dec(c, offset);
            if c < minimum then
              break; // invalid input content
          end;
          if (c <= 255) and
             (up[c] <= #127) then
          begin
            Dest^ := up[c];
            inc(Source, extra);
            goto set1;
          end;
          Dest^ := Source[-1];
          repeat // here we now extra>0 - just copy UTF-8 input untouched
            inc(Dest);
            Dest^ := Source^;
            inc(Source);
            dec(extra);
            if extra = 0 then
              goto set1;
          until false;
        end;
      until false;
  end;
  result := Dest;
end;

function Utf8UpperCopy255(dest: PAnsiChar; const source: RawUtf8): PUtf8Char;
var
  L: integer;
begin
  L := length(source);
  if L > 0 then
  begin
    if L > 250 then
      L := 250; // avoid buffer overflow
    result := Utf8UpperCopy(pointer(dest), pointer(source), L);
  end
  else
    result := pointer(dest);
end;

function UpperCopy255W(dest: PAnsiChar; const source: SynUnicode): PAnsiChar;
begin
  result := UpperCopy255W(dest, pointer(source), length(source));
end;

function UpperCopy255W(dest: PAnsiChar; source: PWideChar; L: PtrInt): PAnsiChar;
var
  c: PtrUInt;
  d: byte;
  lookupper: PByteArray; // better x86-64 / PIC asm generation
begin
  if L > 0 then
  begin
    if L > 250 then
      L := 250; // avoid buffer overflow
    lookupper := @NormToUpperAnsi7Byte;
    repeat
      c := PWord(source)^;
      d := ord('?');
      if c < 255 then
        d := lookupper[c];
      dest^ := AnsiChar(d);
      inc(dest);
      inc(source);
      dec(L);
    until L = 0;
  end;
  result := dest;
end;

function UpperCopy(dest: PAnsiChar; const source: RawUtf8): PAnsiChar;
var
  s: PAnsiChar;
  c: byte;
  lookupper: PByteArray; // better x86-64 / PIC asm generation
begin
  s := pointer(source);
  if s <> nil then
  begin
    lookupper := @NormToUpperAnsi7Byte;
    repeat
      c := lookupper[ord(s^)];
      if c = 0 then
        break;
      dest^ := AnsiChar(c);
      inc(s);
      inc(dest);
    until false;
  end;
  result := dest;
end;

function UpperCopyShort(dest: PAnsiChar; const source: ShortString): PAnsiChar;
var
  s: PByteArray;
  i: PtrInt;
  lookupper: PByteArray; // better x86-64 / PIC asm generation
begin
  s := @source;
  lookupper := @NormToUpperAnsi7Byte;
  for i := 1 to s[0] do
  begin
    dest^ := AnsiChar(lookupper[s[i]]);
    inc(dest);
  end;
  result := dest;
end;

function UpperCaseUnicode(const S: RawUtf8): RawUtf8;
var
  tmp: TSynTempBuffer;
  len: integer;
begin
  if S = '' then
  begin
    result := '';
    exit;
  end;
  tmp.Init(length(s) * 2);
  len := Utf8ToWideChar(tmp.buf, pointer(S), length(S)) shr 1;
  RawUnicodeToUtf8(tmp.buf, Unicode_InPlaceUpper(tmp.buf, len), result);
  tmp.Done;
end;

function UpperCaseSynUnicode(const S: SynUnicode): SynUnicode;
begin
  {$ifdef UNICODE}
  result := SysUtils.UpperCase(S);
  {$else}
  {$ifdef HASVARUSTRING}
  result := UnicodeUpperCase(S);
  {$else}
  result := WideUpperCase(s);
  {$endif HASVARUSTRING}
  {$endif UNICODE}
end;

function LowerCaseSynUnicode(const S: SynUnicode): SynUnicode;
begin
  {$ifdef UNICODE}
  result := SysUtils.LowerCase(S);
  {$else}
  {$ifdef HASVARUSTRING}
  result := UnicodeLowerCase(S);
  {$else}
  result := WideLowerCase(s);
  {$endif HASVARUSTRING}
  {$endif UNICODE}
end;

function LowerCaseUnicode(const S: RawUtf8): RawUtf8;
var
  tmp: TSynTempBuffer;
  len: integer;
begin
  if S = '' then
  begin
    result := '';
    exit;
  end;
  tmp.Init(length(s) * 2);
  len := Utf8ToWideChar(tmp.buf, pointer(S), length(S)) shr 1;
  RawUnicodeToUtf8(tmp.buf, Unicode_InPlaceLower(tmp.buf, len),result);
  tmp.Done;
end;

function IsCaseSensitive(const S: RawUtf8): boolean;
begin
  result := IsCaseSensitive(pointer(S), length(S));
end;

function IsCaseSensitive(P: PUtf8Char; PLen: PtrInt): boolean;
begin
  result := true;
  if (P <> nil) and
     (PLen > 0) then
    repeat
      if ord(P^) in [ord('a')..ord('z'), ord('A')..ord('Z')] then
        exit;
      inc(P);
      dec(PLen);
    until PLen = 0;
  result := false;
end;

procedure CaseCopy(Text: PUtf8Char; Len: PtrInt; Table: PNormTable;
  var Dest: RawUtf8);
var
  i: PtrInt;
  tmp: PAnsiChar;
begin
  tmp := FastNewString(Len, CP_UTF8);
  for i := 0 to Len - 1 do
    tmp[i] := Table[Text[i]]; // branchless conversion
  FastAssignNew(Dest, tmp);
end;

procedure CaseSelf(var S: RawUtf8; Table: PNormTable);
var
  i: PtrInt;
  P: PUtf8Char;
begin
  P := UniqueRawUtf8(S);
  for i := 0 to length(S) - 1 do
    P[i] := Table[P[i]]; // branchless conversion
end;

function UpperCase(const S: RawUtf8): RawUtf8;
begin
  CaseCopy(pointer(S), length(S), @NormToUpperAnsi7, result);
end;

procedure UpperCaseCopy(Text: PUtf8Char; Len: PtrInt; var Dest: RawUtf8);
begin
  CaseCopy(Text, Len, @NormToUpperAnsi7, Dest);
end;

procedure UpperCaseCopy(const Source: RawUtf8; var Dest: RawUtf8);
begin
  CaseCopy(pointer(Source), length(Source), @NormToUpperAnsi7, Dest);
end;

procedure UpperCaseSelf(var S: RawUtf8);
begin
  CaseSelf(S, @NormToUpperAnsi7);
end;

function LowerCase(const S: RawUtf8): RawUtf8;
begin
  CaseCopy(pointer(S), length(S), @NormToLowerAnsi7, result);
end;

procedure LowerCaseCopy(Text: PUtf8Char; Len: PtrInt; var Dest: RawUtf8);
begin
  CaseCopy(Text, Len, @NormToLowerAnsi7, Dest);
end;

procedure LowerCaseSelf(var S: RawUtf8);
begin
  CaseSelf(S, @NormToLowerAnsi7);
end;

function PosExIPas(pSub, p: PUtf8Char; Offset: PtrUInt;
  Lookup: PNormTable): PtrInt;
var
  len, lenSub: PtrInt;
  ch: AnsiChar;
  pStart, pStop: PUtf8Char;
label
  s2, s6, tt, t0, t1, t2, t3, t4, s0, s1, fnd, quit;
begin
  result := 0;
  if (p = nil) or
     (pSub = nil) or
     (PtrInt(Offset) <= 0) or
     (Lookup = nil) then
    goto quit;
  len := PStrLen(p - _STRLEN)^;
  lenSub := PStrLen(pSub - _STRLEN)^ - 1;
  if (len < lenSub + PtrInt(Offset)) or
     (lenSub < 0) then
    goto quit;
  pStop := p + len;
  inc(p, lenSub);
  inc(pSub, lenSub);
  pStart := p;
  p := @p[Offset + 3];
  ch := Lookup[pSub[0]];
  lenSub := -lenSub;
  if p < pStop then
    goto s6;
  dec(p, 4);
  goto s2;
s6: // check 6 chars per loop iteration with O(1) case comparison
  if ch = Lookup[p[-4]] then
    goto t4;
  if ch = Lookup[p[-3]] then
    goto t3;
  if ch = Lookup[p[-2]] then
    goto t2;
  if ch = Lookup[p[-1]] then
    goto t1;
s2:if ch = Lookup[p[0]] then
    goto t0;
s1:if ch = Lookup[p[1]] then
    goto tt;
s0:inc(p, 6);
  if p < pStop then
    goto s6;
  dec(p, 4);
  if p >= pStop then
    goto quit;
  goto s2;
t4:dec(p, 2);
t2:dec(p, 2);
  goto t0;
t3:dec(p, 2);
t1:dec(p, 2);
tt:len := lenSub;
  if lenSub <> 0 then
    repeat
      if (Lookup[pSub[len]] <> Lookup[p[len + 1]]) or
         (Lookup[pSub[len + 1]] <> Lookup[p[len + 2]]) then
        goto s0;
      inc(len, 2);
    until len >= 0;
  inc(p, 2);
  if p <= pStop then
    goto fnd;
  goto quit;
t0:len := lenSub;
  if lenSub <> 0 then
    repeat
      if (Lookup[pSub[len]] <> Lookup[p[len]]) or
         (Lookup[pSub[len + 1]] <> Lookup[p[len + 1]]) then
        goto s1;
      inc(len, 2);
    until len >= 0;
  inc(p);
fnd:
  result := p - pStart;
quit:
end;

function PosExI(const SubStr, S: RawUtf8; Offset: PtrUInt): PtrInt;
begin
  result := PosExIPas(pointer(SubStr), pointer(S), Offset, @NormToUpperAnsi7);
end;

function PosExI(const SubStr, S: RawUtf8; Offset: PtrUInt;
  Lookup: PNormTable): PtrInt;
begin
  if Lookup = nil then
    {$ifdef CPUX86}
    result := PosEx(SubStr, S, Offset)
    {$else}
    result := PosExPas(pointer(SubStr), pointer(S), Offset)
    {$endif CPUX86}
  else
    result := PosExIPas(pointer(SubStr), pointer(S), Offset, Lookup);
end;


{ ************ UTF-8 String Manipulation Functions }

function StartWithExact(const text, textStart: RawUtf8): boolean;
var
  l: PtrInt;
begin
  l := length(textStart);
  result := (length(text) >= l) and
            CompareMem(pointer(text), pointer(textStart), l);
end;

function EndWithExact(const text, textEnd: RawUtf8): boolean;
var
  l, o: PtrInt;
begin
  l := length(textEnd);
  o := length(text) - l;
  result := (o >= 0) and
            CompareMem(PUtf8Char(pointer(text)) + o, pointer(textEnd), l);
end;

function GetNextLine(source: PUtf8Char; out next: PUtf8Char; andtrim: boolean): RawUtf8;
var
  beg: PUtf8Char;
begin
  if source = nil then
  begin
    {$ifdef FPC}
    FastAssignNew(result);
    {$else}
    result := '';
    {$endif FPC}
    next := source;
    exit;
  end;
  if andtrim then // optional trim left
    while source^ in [#9, ' '] do
      inc(source);
  beg := source;
  repeat // just here to avoid a goto
    if source[0] > #13 then
      if source[1] > #13 then
        if source[2] > #13 then
          if source[3] > #13 then
          begin
            inc(source, 4); // fast process 4 chars per loop
            continue;
          end
          else
            inc(source, 3)
        else
          inc(source, 2)
      else
        inc(source);
    case source^ of
      #0:
        next := nil;
      #10:
        next := source + 1;
      #13:
        if source[1] = #10 then
          next := source + 2
        else
          next := source + 1;
    else
      begin
        inc(source);
        continue;
      end;
    end;
    if andtrim then // optional trim right
      while (source > beg) and
            (source[-1] in [#9, ' ']) do
        dec(source);
    FastSetString(result, beg, source - beg);
    exit;
  until false;
end;

function LeftU(const S: RawUtf8; n: PtrInt): RawUtf8;
begin
  result := Copy(S, 1, n);
end;

function RightU(const S: RawUtf8; n: PtrInt): RawUtf8;
var
  L: PtrInt;
begin
  L := length(S);
  if n > L then
    n := L;
  result := Copy(S, L + 1 - n, n);
end;

function TrimLeft(const S: RawUtf8): RawUtf8;
var
  i, l: PtrInt;
begin
  l := Length(S);
  i := 1;
  while (i <= l) and
        (S[i] <= ' ') do
    inc(i);
  if i = 1 then
    result := S
  else
    FastSetString(result, @PByteArray(S)[i - 1], l - i);
end;

function TrimRight(const S: RawUtf8): RawUtf8;
var
  i: PtrInt;
begin
  i := Length(S);
  while (i > 0) and
        (S[i] <= ' ') do
    dec(i);
  FastSetString(result, pointer(S), i);
end;

procedure TrimLeftLines(var S: RawUtf8);
var
  P, D: PUtf8Char;
begin
  if S = '' then
    exit;
  P := UniqueRawUtf8(S);
  D := P; // in-place process
  repeat
    while (P^ <= ' ') and
          (P^ <> #0) do
      inc(P);
    while not (P^ in [#0, #10, #13]) do
    begin
      D^ := P^;
      inc(P);
      inc(D);
    end;
    if P^ = #0 then
      break;
    D^ := #10;
    inc(D);
  until false;
  if D = pointer(S) then
    S := ''
  else
    FakeLength(S, D); // no SetLength needed
end;

procedure TrimChars(var S: RawUtf8; Left, Right: PtrInt);
var
  P: PUtf8Char;
begin
  P := pointer(S);
  if P = nil then
    exit;
  if Left < 0 then
    Left := 0;
  if Right < 0 then
    Right := 0;
  inc(Right, Left);
  if Right = 0 then
    exit; // nothing to trim
  Right := PStrLen(P - _STRLEN)^ - Right; // compute new length
  if Right > 0 then
    if PStrCnt(P - _STRCNT)^ = 1 then // RefCnt=1 ?
    begin
      PStrLen(P - _STRLEN)^ := Right; // we can modify it in-place
      if Left <> 0 then
        MoveFast(P[Left], P^, Right);
      P[Right] := #0;
    end
    else
      FastSetString(S, P + Left, Right) // create a new unique string
  else
    FastAssignNew(S);
end;

function SplitRight(const Str: RawUtf8; SepChar: AnsiChar; LeftStr: PRawUtf8): RawUtf8;
var
  i: PtrInt;
begin
  for i := length(Str) downto 1 do
    if Str[i] = SepChar then
    begin
      FastSetString(result, @PByteArray(Str)[i], length(Str) - i);
      if LeftStr <> nil then
        FastSetString(LeftStr^, pointer(Str), i - 1);
      exit;
    end;
  result := Str;
  if LeftStr <> nil then
    FastAssignNew(LeftStr^);
end;

function SplitRights(const Str, SepChar: RawUtf8): RawUtf8;
var
  i, j, sep: PtrInt;
  c: AnsiChar;
begin
  sep := length(SepChar);
  if sep > 0 then
    if sep = 1 then
      result := SplitRight(Str, SepChar[1])
    else
    begin
      for i := length(Str) downto 1 do
      begin
        c := Str[i];
        for j := 1 to sep do
          if c = SepChar[j] then
          begin
            FastSetString(result, @PByteArray(Str)[i], length(Str) - i);
            exit;
          end;
      end;
    end;
  result := Str;
end;

function Split(const Str, SepStr: RawUtf8; var LeftStr, RightStr: RawUtf8;
  ToUpperCase: boolean): boolean;
var
  i: PtrInt;
  tmp: pointer; // may be called as Split(Str,SepStr,Str,RightStr)
begin
  if length(SepStr) = 1 then
    i := PosExChar(SepStr[1], Str) // may use SSE2 on i386/x86_64
  else
    i := PosEx(SepStr, Str);
  if i = 0 then
  begin
    LeftStr := Str;
    RightStr := '';
    result := false;
  end
  else
  begin
    dec(i);
    tmp := nil;
    FastSetString(RawUtf8(tmp), pointer(Str), i);
    inc(i, length(SepStr));
    FastSetString(RightStr, @PByteArray(Str)[i], length(Str) - i);
    FastAssignNew(LeftStr, tmp);
    result := true;
  end;
  if ToUpperCase then
  begin
    UpperCaseSelf(LeftStr);
    UpperCaseSelf(RightStr);
  end;
end;

function Split(const Str, SepStr: RawUtf8; var LeftStr: RawUtf8;
  ToUpperCase: boolean): RawUtf8;
begin
  Split(Str, SepStr, LeftStr, result, ToUpperCase);
end;

function Split(const Str: RawUtf8; const SepStr: array of RawUtf8;
  const DestPtr: array of PRawUtf8): PtrInt;
var
  s, i, j: PtrInt;
  P: pointer;
begin
  j := 1;
  result := 0;
  s := 0;
  if high(SepStr) >= 0 then
    while result <= high(DestPtr) do
    begin
      P := @PByteArray(Str)[j - 1];
      i := PosEx(SepStr[s], Str, j);
      if i = 0 then
      begin
        if DestPtr[result] <> nil then
          FastSetString(DestPtr[result]^, P, length(Str) - j + 1);
        inc(result);
        break;
      end;
      if DestPtr[result] <> nil then
        FastSetString(DestPtr[result]^, P, i - j);
      inc(result);
      if s < high(SepStr) then
        inc(s);
      j := i + 1;
    end;
  for i := result to high(DestPtr) do
    if DestPtr[i] <> nil then
      FastAssignNew(DestPtr[i]^);
end;

function IsVoid(const text: RawUtf8): boolean;
var
  i: PtrInt;
begin
  result := false;
  for i := 1 to length(text) do
    if text[i] > ' ' then
      exit;
  result := true;
end;

function TrimControlChars(const text: RawUtf8): RawUtf8;
var
  len, i, j, n: PtrInt;
  P: PAnsiChar;
begin
  len := length(text);
  for i := 1 to len do
    if text[i] <= ' ' then
    begin
      n := i - 1;
      FastSetString(result, len);
      P := pointer(result);
      if n > 0 then
        MoveFast(pointer(text)^, P^, n);
      for j := i + 1 to len do
        if text[j] > ' ' then
        begin
          P[n] := text[j];
          inc(n);
        end;
      FakeSetLength(result, n);
      exit;
    end;
  result := text; // no control char found
end;

function TrimChar(const text: RawUtf8; const exclude: TSynAnsicharSet): RawUtf8;
var
  len, i, j, n: PtrInt;
  P: PAnsiChar;
begin
  len := length(text);
  for i := 1 to len do
    if text[i] in exclude then
    begin
      n := i - 1;
      FastSetString(result, len - 1);
      P := pointer(result);
      if n > 0 then
        MoveFast(pointer(text)^, P^, n);
      for j := i + 1 to len do
        if not (text[j] in exclude) then
        begin
          P[n] := text[j];
          inc(n);
        end;
      FakeSetLength(result, n);
      exit;
    end;
  result := text; // no exclude char found
end;

function TrimOneChar(const text: RawUtf8; exclude: AnsiChar): RawUtf8;
var
  first, len, i: PtrInt;
  c: AnsiChar;
  P: PAnsiChar;
begin
  len := length(text);
  first := ByteScanIndex(pointer(text), len, ord(exclude));
  if first < 0 then
  begin
    result := text; // no exclude char found
    exit;
  end;
  FastSetString(result, len - 1);
  P := pointer(result);
  MoveFast(pointer(text)^, P^, first);
  inc(P, first);
  for i := first + 1 to len do
  begin
    c := text[i];
    if c <> exclude then
    begin
      P^ := c;
      inc(P);
    end;
  end;
  FakeSetLength(result, P - pointer(result));
end;

function OnlyChar(const text: RawUtf8; const only: TSynAnsicharSet): RawUtf8;
var
  i: PtrInt;
  exclude: array[0..(SizeOf(only) shr POINTERSHR) - 1] of PtrInt;
begin // reverse bits in local stack copy before calling TrimChar()
  for i := 0 to (SizeOf(only) shr POINTERSHR) - 1 do
    exclude[i] := not PPtrIntArray(@only)[i];
  result := TrimChar(text, TSynAnsicharSet(exclude));
end;

procedure FillZero(var secret: RawByteString);
begin
  if secret <> '' then
    with PStrRec(Pointer(PtrInt(secret) - _STRRECSIZE))^ do
      if refCnt = 1 then // avoid GPF if const
        FillCharFast(pointer(secret)^, length, 0);
  FastAssignNew(secret); // dec refCnt
end;

procedure FillZero(var secret: RawUtf8);
begin
  FillZero(RawByteString(secret));
end;

procedure FillZero(var secret: SpiUtf8);
begin
  FillZero(RawByteString(secret));
end;

{$ifdef HASVARUSTRING}
procedure FillZero(var secret: UnicodeString);
begin
  if secret <> '' then
    with PStrRec(Pointer(PtrInt(secret) - _STRRECSIZE))^ do
      if refCnt = 1 then // avoid GPF if const
        FillCharFast(pointer(secret)^, length * SizeOf(WideChar), 0);
  Finalize(secret); // dec refCnt
end;
{$endif HASVARUSTRING}

procedure FillZero(var secret: TBytes);
begin
  if secret <> nil then
    with PDynArrayRec(Pointer(PtrInt(secret) - _DARECSIZE))^ do
      if refCnt = 1 then // avoid GPF if const
        FillCharFast(pointer(secret)^, length, 0);
  secret := nil; // dec refCnt
end;

function StringReplaceAllProcess(const S, OldPattern, NewPattern: RawUtf8;
  found: integer; Lookup: PNormTable): RawUtf8;
var
  i, last, oldlen, newlen, sharedlen: PtrInt;
  posCount: integer;
  pos: TIntegerDynArray;
  src, dst: PAnsiChar;
begin
  oldlen := length(OldPattern);
  newlen := length(NewPattern);
  SetLength(pos, 64);
  pos[0] := found;
  posCount := 1;
  repeat
    found := PosExI(OldPattern, S, found + oldlen, Lookup);
    if found = 0 then
      break;
    AddInteger(pos, posCount, found);
  until false;
  FastSetString(result, Length(S) + (newlen - oldlen) * posCount);
  last := 1;
  src := pointer(S);
  dst := pointer(result);
  for i := 0 to posCount - 1 do
  begin
    sharedlen := pos[i] - last;
    MoveFast(src^, dst^, sharedlen);
    inc(src, sharedlen + oldlen);
    inc(dst, sharedlen);
    if newlen > 0 then
    begin
      MoveByOne(pointer(NewPattern), dst, newlen);
      inc(dst, newlen);
    end;
    last := pos[i] + oldlen;
  end;
  MoveFast(src^, dst^, length(S) - last + 1);
end;

function StringReplaceAll(const S, OldPattern, NewPattern: RawUtf8;
  Lookup: PNormTable): RawUtf8;
var
  found: integer;
begin
  if (S = '') or
     (OldPattern = '') or
     (OldPattern = NewPattern) then
    result := S
  else
  begin
    if (Lookup = nil) and
       (length(OldPattern) = 1) then
      found := ByteScanIndex(pointer(S), {%H-}PStrLen(PtrUInt(S) - _STRLEN)^,
        byte(OldPattern[1])) + 1
    else
      found := PosExI(OldPattern, S, 1, Lookup); // handle Lookup=nil
    if found = 0 then
      result := S
    else
      result := StringReplaceAllProcess(S, OldPattern, NewPattern, found, Lookup);
  end;
end;

function StringReplaceAll(const S, OldPattern, NewPattern: RawUtf8;
  CaseInsensitive: boolean): RawUtf8;
begin
  result := StringReplaceAll(S, OldPattern, NewPattern, NORM2CASE[CaseInsensitive]);
end;

function StringReplaceAll(const S: RawUtf8;
  const OldNewPatternPairs: array of RawUtf8; CaseInsensitive: boolean): RawUtf8;
var
  n, i: PtrInt;
  tab: PNormTable;
begin
  result := S;
  n := high(OldNewPatternPairs);
  if (n <= 0) or
     (n and 1 <> 1) then
    exit;
  tab := NORM2CASE[CaseInsensitive];
  for i := 0 to n shr 1 do
    result := StringReplaceAll(result,
      OldNewPatternPairs[i * 2], OldNewPatternPairs[i * 2 + 1], tab);
end;

function StringReplaceChars(const Source: RawUtf8; OldChar, NewChar: AnsiChar): RawUtf8;
var
  i, j, n: PtrInt;
  P: PAnsiChar;
begin
  if (OldChar <> NewChar) and
     (Source <> '') then
  begin
    n := length(Source);
    i := ByteScanIndex(pointer(Source), n, ord(OldChar));
    if i >= 0 then
    begin
      FastSetString(result, pointer(Source), n);
      P := pointer(result);
      for j := i to n - 1 do
        if P[j] = OldChar then
          P[j] := NewChar;
      exit;
    end;
  end;
  result := Source;
end;

function StringReplaceTabs(const Source, TabText: RawUtf8): RawUtf8;

  procedure Process(S, D, T: PAnsiChar; TLen: integer);
  begin
    repeat
      if S^ = #0 then
        break
      else if S^ <> #9 then
      begin
        D^ := S^;
        inc(D);
        inc(S);
      end
      else
      begin
        if TLen > 0 then
        begin
          MoveByOne(T, D, TLen);
          inc(D, TLen);
        end;
        inc(S);
      end;
    until false;
  end;

var
  L, i, n, ttl: PtrInt;
begin
  ttl := length(TabText);
  L := Length(Source);
  n := 0;
  if ttl <> 0 then
    for i := 1 to L do
      if Source[i] = #9 then
        inc(n);
  if n = 0 then
  begin
    result := Source;
    exit;
  end;
  FastSetString(result, L + n * pred(ttl));
  Process(pointer(Source), pointer(result), pointer(TabText), ttl);
end;

function RawUtf8OfChar(Ch: AnsiChar; Count: integer): RawUtf8;
begin
  if Count <= 0 then
    FastAssignNew(result)
  else
  begin
    FastSetString(result, Count);
    FillCharFast(pointer(result)^, Count, byte(Ch));
  end;
end;

function QuotedStr(const S: RawUtf8; Quote: AnsiChar): RawUtf8;
begin
  QuotedStr(pointer(S), length(S), Quote, result);
end;

procedure QuotedStr(const S: RawUtf8; Quote: AnsiChar; var result: RawUtf8);
var
  P: PUtf8Char;
  tmp: pointer; // will hold a RawUtf8 with no try..finally exception block
begin
  tmp := nil;
  P := pointer(S);
  if (P <> nil) and
     (P = pointer(result)) then
  begin
    RawUtf8(tmp) := S; // make private ref-counted copy for QuotedStr(U,'"',U)
    P := pointer(tmp);
  end;
  QuotedStr(P, length(S), Quote, result);
  if tmp <> nil then
    {$ifdef FPC}
    FastAssignNew(tmp);
    {$else}
    RawUtf8(tmp) := '';
    {$endif FPC}
end;

procedure QuotedStr(P: PUtf8Char; PLen: PtrInt; Quote: AnsiChar;
  var result: RawUtf8);
var
  i, quote1, nquote: PtrInt;
  R: PUtf8Char;
  c: AnsiChar;
begin
  nquote := 0;
  quote1 := ByteScanIndex(pointer(P), PLen, byte(Quote)); // asm if available
  if quote1 >= 0 then
    for i := quote1 to PLen - 1 do
      if P[i] = Quote then
        inc(nquote);
  FastSetString(result, PLen + nquote + 2);
  R := pointer(result);
  R^ := Quote;
  inc(R);
  if nquote = 0 then
  begin
    MoveFast(P^, R^, PLen);
    R[PLen] := Quote;
  end
  else
  begin
    MoveFast(P^, R^, quote1);
    inc(R, quote1);
    inc(PLen, PtrInt(PtrUInt(P))); // efficient use of registers on FPC
    inc(quote1, PtrInt(PtrUInt(P)));
    repeat
      if quote1 = PLen then
        break;
      c := PAnsiChar(quote1)^;
      inc(quote1);
      R^ := c;
      inc(R);
      if c <> Quote then
        continue;
      R^ := c;
      inc(R);
    until false;
    R^ := Quote;
  end;
end;

function GotoEndOfQuotedString(P: PUtf8Char): PUtf8Char;
var
  quote: AnsiChar;
begin
  // P^='"' or P^='''' at function call
  quote := P^;
  inc(P);
  repeat
    if P^ = #0 then
      break
    else if P^ <> quote then
      inc(P)
    else if P[1] = quote then // allow double quotes inside string
      inc(P, 2)
    else
      break; // end quote
  until false;
  result := P;
end; // P^='"' or P^=#0 at function return

function GotoNextNotSpace(P: PUtf8Char): PUtf8Char;
begin
  {$ifdef FPC}
  while (P^ <= ' ') and
        (P^ <> #0) do
    inc(P);
  {$else}
  if P^ in [#1..' '] then
    repeat
      inc(P)
    until not (P^ in [#1..' ']);
  {$endif FPC}
  result := P;
end;

function GotoNextNotSpaceSameLine(P: PUtf8Char): PUtf8Char;
begin
  while P^ in [#9, ' '] do
    inc(P);
  result := P;
end;

function GotoNextSpace(P: PUtf8Char): PUtf8Char;
begin
  if P^ > ' ' then
    repeat
      inc(P)
    until P^ <= ' ';
  result := P;
end;

function NextNotSpaceCharIs(var P: PUtf8Char; ch: AnsiChar): boolean;
begin
  while (P^ <= ' ') and
        (P^ <> #0) do
    inc(P);
  if P^ = ch then
  begin
    inc(P);
    result := true;
  end
  else
    result := false;
end;

function GotoNextSqlIdentifier(P: PUtf8Char; tab: PTextCharSet): PUtf8Char;
  {$ifdef HASINLINE} inline; {$endif}
begin
  while tcCtrlNot0Comma in tab[P^] do // in [#1..' ', ';']
    inc(P);
  if PWord(P)^ = ord('/') + ord('*') shl 8 then
  begin
    // detect and ignore e.g. '/*nocache*/'
    repeat
      inc(P);
      if PWord(P)^ = ord('*') + ord('/') shl 8 then
      begin
        inc(P, 2);
        break;
      end;
    until P^ = #0;
    while tcCtrlNot0Comma in tab[P^] do
      inc(P);
  end;
  result := P;
end;

function GetNextFieldProp(var P: PUtf8Char; var Prop: RawUtf8): boolean;
var
  B: PUtf8Char;
  tab: PTextCharSet;
begin
  tab := @TEXT_CHARS;
  P := GotoNextSqlIdentifier(P, tab); // handle /*comment*/
  B := P;
  while tcIdentifier in tab[P^] do
    inc(P); // go to end of ['_', '0'..'9', 'a'..'z', 'A'..'Z'] chars
  FastSetString(Prop, B, P - B);
  P := GotoNextSqlIdentifier(P, tab);
  result := Prop <> '';
end;

function GetNextFieldPropSameLine(var P: PUtf8Char; var Prop: ShortString): boolean;
var
  B: PUtf8Char;
  tab: PTextCharSet;
begin
  tab := @TEXT_CHARS;
  while tcCtrlNotLF in tab[P^] do
    inc(P); // ignore [#1..#9, #11, #12, #14..' ']
  B := P;
  while tcIdentifier in tab[P^] do
    inc(P); // go to end of field name
  SetString(Prop, PAnsiChar(B), P - B);
  while tcCtrlNotLF in TEXT_CHARS[P^] do
    inc(P);
  result := Prop <> '';
end;

function UnQuoteSqlStringVar(P: PUtf8Char; out Value: RawUtf8): PUtf8Char;
var
  quote: AnsiChar;
  PBeg, PS: PUtf8Char;
  internalquote: PtrInt;
begin
  result := nil;
  if P = nil then
    exit;
  quote := P^; // " or '
  inc(P);
  // compute unquoted string length
  PBeg := P;
  internalquote := 0;
  P := PosChar(P, quote); // fast SSE2 search on x86_64
  if P = nil then
    exit; // we need at least an ending quote
  while true do
    if P^ = #0 then
      exit // where is my quote?
    else if P^ <> quote then
      inc(P)
    else if P[1] = quote then
    begin
      inc(P, 2); // allow double quotes inside string
      inc(internalquote);
    end
    else
      break; // end quote
  // create unquoted string
  if internalquote = 0 then
    // no quote within
    FastSetString(Value, PBeg, P - PBeg)
  else
  begin
    // unescape internal quotes
    pointer(Value) := FastNewString(P - PBeg - internalquote, CP_UTF8);
    P := PBeg;
    PS := pointer(Value);
    repeat
      if P[0] = quote then
        if P[1] = quote then
          // allow double quotes inside string
          inc(P)
        else
          // end quote
          break;
      PS^ := P[0];
      inc(PS);
      inc(P);
    until false;
  end;
  result := P + 1;
end;

function UnQuoteSqlString(const Value: RawUtf8): RawUtf8;
begin
  UnQuoteSqlStringVar(pointer(Value), result);
end;

function UnQuotedSqlSymbolName(const ExternalDBSymbol: RawUtf8): RawUtf8;
begin
  if (ExternalDBSymbol <> '') and
     (ExternalDBSymbol[1] in ['[', '"', '''', '(']) then
    // e.g. for ZDBC's GetFields()
    result := copy(ExternalDBSymbol, 2, length(ExternalDBSymbol) - 2)
  else
    result := ExternalDBSymbol;
end;

function IdemPCharAndGetNextLine(var source: PUtf8Char; searchUp: PAnsiChar): boolean;
begin
  if source = nil then
    result := false
  else
  begin
    result := IdemPChar(source, searchUp);
    source := GotoNextLine(source);
  end;
end;

function FindNameValue(P: PUtf8Char; UpperName: PAnsiChar): PUtf8Char;
var
  table: PNormTable; // faster even on i386
  u: PAnsiChar;
label
  eof, eol;
begin
  if (P = nil) or
     (UpperName = nil) then
    goto eof;
  table := @NormToUpperAnsi7;
  repeat
    if table[P^] <> UpperName^ then // first character is likely not to match
      repeat // quickly go to end of current line
        repeat
eol:      if P^ <= #13 then
            break;
          inc(P);
        until false;
        if (P^ = #13) or
           (P^ = #10) then
        begin
          repeat
            inc(P);
          until (P^ <> #10) and
                (P^ <> #13);
          if P^ = #0 then
            goto eof;
          break; // handle next line
        end
        else if P^ <> #0 then
          continue; // e.g. #9
eof:    result := nil; // reached P^=#0 -> not found
        exit;
      until false
    else
    begin
      // first char did match -> try other chars
      inc(P);
      u := UpperName + 1;
      repeat
        if u^ = #0 then
          break
        else if u^ <> table[P^] then
          goto eol;
        inc(P);
        inc(u);
      until false;
      result := P; // if found, points just after UpperName
      exit;
    end;
  until false;
end;

function FindNameValuePointer(NameValuePairs: PUtf8Char; UpperName: PAnsiChar;
  out FoundLen: PtrInt; UpperNameSeparator: AnsiChar): PUtf8Char;
var
  P: PUtf8Char;
  L: PtrInt;
begin
  P := FindNameValue(NameValuePairs, UpperName);
  if P <> nil then
    repeat
      if UpperNameSeparator <> #0 then
        if P^ = UpperNameSeparator then
          inc(P) // e.g. THttpSocket.HeaderGetValue uses UpperNameSeparator=':'
        else
          break;
      while P^ in [#9, ' '] do // trim left
        inc(P);
      L := 0;
      while P[L] > #13 do      // end of line/value
        inc(L);
      while P[L - 1] = ' ' do  // trim right
        dec(L);
      FoundLen := L;
      break;
    until false;
  result := P;
end;

function FindNameValue(const NameValuePairs: RawUtf8; UpperName: PAnsiChar;
  var Value: RawUtf8; KeepNotFoundValue: boolean; UpperNameSeparator: AnsiChar): boolean;
var
  P: PUtf8Char;
  L: PtrInt;
begin
  P := FindNameValuePointer(pointer(NameValuePairs), UpperName, L, UpperNameSeparator);
  if P <> nil then
  begin
    FastSetString(Value, P, L);
    result := true;
  end
  else
  begin
    if not KeepNotFoundValue then
      {$ifdef FPC}
      FastAssignNew(Value);
      {$else}
      Value := '';
      {$endif FPC}
    result := false;
  end;
end;

function GetLineSize(P, PEnd: PUtf8Char): PtrUInt;
var
  c: byte;
begin
  {$ifdef CPUX64}
  if PEnd <> nil then
  begin
    result := BufferLineLength(P, PEnd); // use branchless SSE2 on x86_64
    exit;
  end;
  result := PtrUInt(P) - 1;
  {$else}
  result := PtrUInt(P) - 1;
  if PEnd <> nil then
    repeat // inlined BufferLineLength()
      inc(result);
      if PtrUInt(result) < PtrUInt(PEnd) then
      begin
        c := PByte(result)^;
        if (c > 13) or
           ((c <> 10) and
            (c <> 13)) then
          continue;
      end;
      break;
    until false
  else
  {$endif CPUX64}
    repeat // inlined BufferLineLength() ending at #0 for PEnd=nil
      inc(result);
      c := PByte(result)^;
      if (c > 13) or
         ((c <> 0) and (c <> 10) and (c <> 13)) then
        continue;
      break;
    until false;
  dec(result, PtrUInt(P)); // returns length
end;

function GetLineSizeSmallerThan(P, PEnd: PUtf8Char; aMinimalCount: integer): boolean;
begin
  result := false;
  if P <> nil then
    while (P < PEnd) and
          (P^ <> #10) and
          (P^ <> #13) do
      if aMinimalCount = 0 then
        exit
      else
      begin
        dec(aMinimalCount);
        inc(P);
      end;
  result := true;
end;

{$ifndef PUREMORMOT2}
function GetNextStringLineToRawUnicode(var P: PChar): RawUnicode;
var
  S: PChar;
begin
  if P = nil then
    result := ''
  else
  begin
    S := P;
    while S^ >= ' ' do
      inc(S);
    result := StringToRawUnicode(P, S - P);
    while (S^ <> #0) and
          (S^ < ' ') do
      inc(S); // ignore e.g. #13 or #10
    if S^ <> #0 then
      P := S
    else
      P := nil;
  end;
end;
{$endif PUREMORMOT2}

function TrimLeftLowerCase(const V: RawUtf8): PUtf8Char;
begin
  result := Pointer(V);
  if result <> nil then
  begin
    while result^ in ['a'..'z'] do
      inc(result);
    if result^ = #0 then
      result := Pointer(V);
  end;
end;

function TrimLeftLowerCaseToShort(V: PShortString): ShortString;
begin
  TrimLeftLowerCaseToShort(V, result);
end;

procedure TrimLeftLowerCaseToShort(V: PShortString; out result: ShortString);
var
  P: PAnsiChar;
  L: integer;
begin
  L := length(V^);
  P := @V^[1];
  while (L > 0) and
        (P^ in ['a'..'z']) do
  begin
    inc(P);
    dec(L);
  end;
  if L = 0 then
    result := V^
  else
    SetString(result, P, L);
end;

function TrimLeftLowerCaseShort(V: PShortString): RawUtf8;
var
  P: PAnsiChar;
  L: integer;
begin
  L := length(V^);
  P := @V^[1];
  while (L > 0) and
        (P^ in ['a'..'z']) do
  begin
    inc(P);
    dec(L);
  end;
  if L = 0 then
    FastSetString(result, @V^[1], length(V^))
  else
    FastSetString(result, P, L);
end;

procedure AppendShortComma(text: PAnsiChar; len: PtrInt; var result: ShortString;
  trimlowercase: boolean);
begin
  if trimlowercase then
    while text^ in ['a'..'z'] do
      if len = 1 then
        exit
      else
      begin
        inc(text);
        dec(len);
      end;
  if integer(ord(result[0])) + len >= 255 then
    exit;
  if len > 0 then
    MoveByOne(text, @result[ord(result[0]) + 1], len);
  inc(result[0], len + 1);
  result[ord(result[0])] := ',';
end;

function IdemPropNameUSmallNotVoid(P1, P2, P1P2Len: PtrInt): boolean;
  {$ifdef HASINLINE}inline;{$endif}
begin
  inc(P1P2Len, P1);
  dec(P2, P1);
  repeat
    result := (PByte(P1)^ xor ord(PAnsiChar(P1)[P2])) and $df = 0;
    if not result then
      exit;
    inc(P1);
  until P1 >= P1P2Len;
end;

function FindShortStringListExact(List: PShortString; MaxValue: integer;
  aValue: PUtf8Char; aValueLen: PtrInt): integer;
var
  PLen: PtrInt;
begin
  if aValueLen <> 0 then
    for result := 0 to MaxValue do
    begin
      PLen := PByte(List)^;
      if (PLen = aValueLen) and
         IdemPropNameUSmallNotVoid(PtrInt(@List^[1]), PtrInt(aValue), PLen) then
        exit;
      List := pointer(@PAnsiChar(PLen)[PtrUInt(List) + 1]); // next
    end;
  result := -1;
end;

function FindShortStringListTrimLowerCase(List: PShortString; MaxValue: integer;
  aValue: PUtf8Char; aValueLen: PtrInt): integer;
var
  PLen: PtrInt;
begin
  if aValueLen <> 0 then
    for result := 0 to MaxValue do
    begin
      PLen := ord(List^[0]);
      inc(PUtf8Char(List));
      repeat // trim lower case
        if not (PUtf8Char(List)^ in ['a'..'z']) then
          break;
        inc(PUtf8Char(List));
        dec(PLen);
      until PLen = 0;
      if (PLen = aValueLen) and
         IdemPropNameUSmallNotVoid(PtrInt(aValue), PtrInt(List), PLen) then
        exit;
      inc(PUtf8Char(List), PLen); // next
    end;
  result := -1;
end;

function FindShortStringListTrimLowerCaseExact(List: PShortString;
  MaxValue: integer; aValue: PUtf8Char; aValueLen: PtrInt): integer;
var
  PLen: PtrInt;
begin
  if aValueLen <> 0 then
    for result := 0 to MaxValue do
    begin
      PLen := ord(List^[0]);
      inc(PUtf8Char(List));
      repeat
        if not (PUtf8Char(List)^ in ['a'..'z']) then
          break;
        inc(PUtf8Char(List));
        dec(PLen);
      until PLen = 0;
      if (PLen = aValueLen) and
         CompareMemSmall(aValue, List, PLen) then
        exit;
      inc(PUtf8Char(List), PLen);
    end;
  result := -1;
end;

function UnCamelCase(const S: RawUtf8): RawUtf8;
var
  tmp: TSynTempBuffer;
  destlen: PtrInt;
begin
  if S = '' then
    result := ''
  else
  begin
    destlen := UnCamelCase(tmp.Init(length(S) * 2), pointer(S));
    tmp.Done(PAnsiChar(tmp.buf) + destlen, result);
  end;
end;

function UnCamelCase(D, P: PUtf8Char): integer;
var
  Space, SpaceBeg, DBeg: PUtf8Char;
  CapitalCount: integer;
  Number: boolean;
label
  Next;
begin
  DBeg := D;
  if (D <> nil) and
     (P <> nil) then
  begin
    // avoid GPF
    Space := D;
    SpaceBeg := D;
    repeat
      CapitalCount := 0;
      Number := P^ in ['0'..'9'];
      if Number then
        repeat
          inc(CapitalCount);
          D^ := P^;
          inc(P);
          inc(D);
        until not (P^ in ['0'..'9'])
      else
        repeat
          inc(CapitalCount);
          D^ := P^;
          inc(P);
          inc(D);
        until not (P^ in ['A'..'Z']);
      if P^ = #0 then
        break; // no lowercase conversion of last fully uppercased word
      if (CapitalCount > 1) and
         not Number then
      begin
        dec(P);
        dec(D);
      end;
      while P^ in ['a'..'z'] do
      begin
        D^ := P^;
        inc(D);
        inc(P);
      end;
      if P^ = '_' then
        if P[1] = '_' then
        begin
          D^ := ':';
          inc(P);
          inc(D);
          goto Next;
        end
        else
        begin
          PWord(D)^ := ord(' ') + ord('-') shl 8;
          inc(D, 2);
Next:     if Space = SpaceBeg then
            SpaceBeg := D + 1;
          inc(P);
          Space := D + 1;
        end
      else
        Space := D;
      if P^ = #0 then
        break;
      D^ := ' ';
      inc(D);
    until false;
    if Space > DBeg then
      dec(Space);
    while Space > SpaceBeg do
    begin
      if Space^ in ['A'..'Z'] then
        if not (Space[1] in ['A'..'Z', ' ']) then
          inc(Space^, 32); // lowercase conversion of not last fully uppercased word
      dec(Space);
    end;
  end;
  result := D - DBeg;
end;

procedure CamelCase(P: PAnsiChar; len: PtrInt; var s: RawUtf8; const isWord: TSynByteSet);
var
  i: PtrInt;
  d: PAnsiChar;
  tmp: array[byte] of AnsiChar;
begin
  if len > SizeOf(tmp) then
    len := SizeOf(tmp);
  for i := 0 to len - 1 do
    if not (ord(P[i]) in isWord) then
    begin
      if i > 0 then
      begin
        MoveFast(P^, tmp, i);
        inc(P, i);
        dec(len, i);
      end;
      d := @tmp[i];
      while len > 0 do
      begin
        while (len > 0) and
              not (ord(P^) in isWord) do
        begin
          inc(P);
          dec(len);
        end;
        if len = 0 then
          break;
        d^ := NormToUpperAnsi7[P^];
        inc(d);
        repeat
          inc(P);
          dec(len);
          if not (ord(P^) in isWord) then
            break;
          d^ := P^;
          inc(d);
        until len = 0;
      end;
      P := @tmp;
      len := d - tmp;
      break;
    end;
  FastSetString(s, P, len);
end;

procedure CamelCase(const text: RawUtf8; var s: RawUtf8; const isWord: TSynByteSet);
begin
  CamelCase(pointer(text), length(text), s, isWord);
end;

procedure GetCaptionFromPCharLen(P: PUtf8Char; out result: string);
var
  Temp: array[byte] of AnsiChar;
begin
  if P = nil then
    exit;
  {$ifdef UNICODE}
  Utf8DecodeToUnicodeString(Temp, UnCamelCase(@Temp, P), result);
  {$else}
  SetString(result, PAnsiChar(@Temp), UnCamelCase(@Temp, P));
  {$endif UNICODE}
  if Assigned(LoadResStringTranslate) then
    LoadResStringTranslate(result);
end;


{ ************ TRawUtf8DynArray Processing Functions }

function IsZero(const Values: TRawUtf8DynArray): boolean;
var
  i: PtrInt;
begin
  result := false;
  for i := 0 to length(Values) - 1 do
    if Values[i] <> '' then
      exit;
  result := true;
end;

function TRawUtf8DynArrayFrom(const Values: array of RawUtf8): TRawUtf8DynArray;
var
  i: PtrInt;
begin
  Finalize(result);
  SetLength(result, length(Values));
  for i := 0 to high(Values) do
    result[i] := Values[i];
end;

function FindRawUtf8(Values: PRawUtf8; const Value: RawUtf8; ValuesCount: integer;
  CaseSensitive: boolean): integer;
var
  ValueLen: TStrLen;
begin
  dec(ValuesCount);
  ValueLen := length(Value);
  if ValueLen = 0 then
    for result := 0 to ValuesCount do
      if Values^ = '' then
        exit
      else
        inc(Values)
  else if CaseSensitive then
    for result := 0 to ValuesCount do
      if (PtrUInt(Values^) <> 0) and
         ({%H-}PStrLen(PtrUInt(Values^) - _STRLEN)^ = ValueLen) and
         CompareMemFixed(pointer(PtrInt(Values^)), pointer(Value), ValueLen) then
        exit
      else
        inc(Values)
  else
    for result := 0 to ValuesCount do
      if (PtrUInt(Values^) <> 0) and // StrIComp() won't change length
         ({%H-}PStrLen(PtrUInt(Values^) - _STRLEN)^ = ValueLen) and
         (StrIComp(pointer(Values^), pointer(Value)) = 0) then
        exit
      else
        inc(Values);
  result := -1;
end;

function FindRawUtf8(const Values: TRawUtf8DynArray; const Value: RawUtf8;
  CaseSensitive: boolean): integer;
begin
  result := FindRawUtf8(pointer(Values), Value, length(Values), CaseSensitive);
end;

function FindRawUtf8(const Values: array of RawUtf8; const Value: RawUtf8;
  CaseSensitive: boolean): integer;
begin
  result := high(Values);
  if result >= 0 then
    result := FindRawUtf8(@Values[0], Value, result + 1, CaseSensitive);
end;

function AddRawUtf8(var Values: TRawUtf8DynArray; const Value: RawUtf8;
  NoDuplicates, CaseSensitive: boolean): boolean;
var
  i: PtrInt;
begin
  if NoDuplicates then
  begin
    i := FindRawUtf8(Values, Value, CaseSensitive);
    if i >= 0 then
    begin
      result := false;
      exit;
    end;
  end;
  i := length(Values);
  SetLength(Values, i + 1);
  Values[i] := Value;
  result := true;
end;

function AddRawUtf8(var Values: TRawUtf8DynArray; var ValuesCount: integer;
  const Value: RawUtf8): PtrInt;
begin
  result := ValuesCount;
  if result = Length(Values) then
    SetLength(Values, NextGrow(result));
  Values[result] := Value;
  inc(ValuesCount);
end;

procedure AddRawUtf8(var Values: TRawUtf8DynArray; const Value: TRawUtf8DynArray);
var
  n, o, i: PtrInt;
begin
  n := length(Value);
  if n = 0 then
    exit;
  o := length(Values);
  SetLength(Values, o + n);
  for i := 0 to n - 1 do
    Values[o + i] := Value[i];
end;

procedure AddRawUtf8(var Values: TRawUtf8DynArray; var ValuesCount: integer;
  const Value: TRawUtf8DynArray);
var
  n, o, i: PtrInt;
begin
  n := length(Value);
  o := ValuesCount;
  inc(ValuesCount, n);
  if ValuesCount > Length(Values) then
    SetLength(Values, NextGrow(ValuesCount));
  for i := 0 to n - 1 do
    Values[o + i] := Value[i];
end;

function RawUtf8DynArrayEquals(const A, B: TRawUtf8DynArray): boolean;
var
  n, i: PtrInt;
begin
  result := false;
  n := length(A);
  if n <> length(B) then
    exit;
  for i := 0 to n - 1 do
    if A[i] <> B[i] then
      exit;
  result := true;
end;

function RawUtf8DynArrayEquals(const A, B: TRawUtf8DynArray; Count: integer): boolean;
var
  i: PtrInt;
begin
  result := false;
  for i := 0 to Count - 1 do
    if A[i] <> B[i] then
      exit;
  result := true;
end;

function AddString(var Values: TStringDynArray; const Value: string): PtrInt;
begin
  result := length(Values);
  SetLength(Values, result + 1);
  Values[result] := Value;
end;

procedure StringDynArrayToRawUtf8DynArray(const Source: TStringDynArray;
  var Result: TRawUtf8DynArray);
var
  i: PtrInt;
begin
  Finalize(Result);
  SetLength(Result, length(Source));
  for i := 0 to length(Source) - 1 do
    StringToUtf8(Source[i], Result[i]);
end;

procedure StringListToRawUtf8DynArray(Source: TStringList; var Result: TRawUtf8DynArray);
var
  i: PtrInt;
begin
  Finalize(Result);
  SetLength(Result, Source.Count);
  for i := 0 to Source.Count - 1 do
    StringToUtf8(Source[i], Result[i]);
end;

function FastLocatePUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt; Value: PUtf8Char): PtrInt;
begin
  result := FastLocatePUtf8CharSorted(P, R, Value, TUtf8Compare(@StrComp));
end;

function FastLocatePUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt;
  Value: PUtf8Char; Compare: TUtf8Compare): PtrInt;
var
  L, i, cmp: PtrInt;
begin
  // fast O(log(n)) binary search
  if (not Assigned(Compare)) or
     (R < 0) then
    result := 0
  else if Compare(P^[R], Value) < 0 then // quick return if already sorted
    result := R + 1
  else
  begin
    L := 0;
    result := -1; // return -1 if found
    repeat
      {$ifdef CPUX64}
      i := L + R;
      i := i shr 1;
      {$else}
      i := (L + R) shr 1;
      {$endif CPUX64}
      cmp := Compare(P^[i], Value);
      if cmp = 0 then
        exit;
      if cmp < 0 then
        L := i + 1
      else
        R := i - 1;
    until L > R;
    while (i >= 0) and (Compare(P^[i], Value) >= 0) do
      dec(i);
    result := i + 1; // return the index where to insert
  end;
end;

function FastFindPUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt;
  Value: PUtf8Char; Compare: TUtf8Compare): PtrInt;
var
  L, cmp: PtrInt;
begin
  // fast O(log(n)) binary search
  L := 0;
  if Assigned(Compare) and (R >= 0) then
    repeat
      {$ifdef CPUX64}
      result := L + R;
      result := result shr 1;
      {$else}
      result := (L + R) shr 1;
      {$endif CPUX64}
      cmp := Compare(P^[result], Value);
      if cmp = 0 then
        exit;
      if cmp < 0 then
      begin
        L := result + 1;
        if L <= R then
          continue;
        break;
      end;
      R := result - 1;
      if L <= R then
        continue;
      break;
    until false;
  result := -1;
end;

{$ifdef CPUX64}

function FastFindPUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt; Value: PUtf8Char): PtrInt;
{$ifdef FPC} assembler; nostackframe; asm {$else} asm .noframe {$endif}
        {$ifdef win64}  // P=rcx/rdi R=rdx/rsi Value=r8/rdx
        push    rdi
        mov     rdi, P  // P=rdi
        {$endif win64}
        push    r12
        push    r13
        xor     r9, r9  // L=r9
        test    R, R
        jl      @err
        test    Value, Value
        jz      @void
        mov     cl, byte ptr [Value]  // to check first char (likely diverse)
@s:     lea     rax, qword ptr [r9 + R]
        shr     rax, 1
        lea     r12, qword ptr [rax - 1]  // branchless main loop
        lea     r13, qword ptr [rax + 1]
        mov     r10, qword ptr [rdi + rax * 8]
        test    r10, r10
        jz      @lt
        cmp     cl, byte ptr [r10]
        je      @eq
        cmovc   R, r12
        cmovnc  r9, r13
@nxt:   cmp     r9, R
        jle     @s
@err:   mov     rax, -1
@found: pop     r13
        pop     r12
        {$ifdef win64}
        pop     rdi
        {$endif win64}
        ret
@lt:    mov     r9, r13 // very unlikely P[rax]=nil
        jmp     @nxt
@eq:    mov     r11, Value // first char equal -> check others
@sub:   mov     cl, byte ptr [r10]
        add     r10, 1
        add     r11, 1
        test    cl, cl
        jz      @found
        mov     cl, byte ptr [r11]
        cmp     cl, byte ptr [r10]
        je      @sub
        mov     cl, byte ptr [Value]  // reset first char
        cmovc   R, r12
        cmovnc  r9, r13
        cmp     r9, R
        jle     @s
        jmp     @err
@void:  mov     rax, -1
        cmp     qword ptr [P], 0
        cmove   rax, Value
        jmp     @found
end;

{$else}

function FastFindPUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt; Value: PUtf8Char): PtrInt;
var
  L: PtrInt;
  c: byte;
  piv, val: PByte;
begin
  // fast O(log(n)) binary search using inlined StrCompFast()
  if R >= 0 then
    if Value <> nil then
    begin
      L := 0;
      repeat
        result := L + R;
        result := result shr 1;
        piv := pointer(P^[result]);
        if piv <> nil then
        begin
          val := pointer(Value);
          c := piv^;
          if c = val^ then
            repeat
              if c = 0 then
                exit;  // StrComp(P^[result],Value)=0
              inc(piv);
              inc(val);
              c := piv^;
            until c <> val^;
          if c > val^ then
          begin
            R := result - 1;  // StrComp(P^[result],Value)>0
            if L <= R then
              continue;
            break;
          end;
        end;
        L := result + 1;  // StrComp(P^[result],Value)<0
        if L <= R then
          continue;
        break;
      until false;
    end
    else if P^[0] = nil then
    begin
      // '' should be in lowest P[] slot
      result := 0;
      exit;
    end;
  result := -1;
end;

{$endif CPUX64}

function FastFindUpperPUtf8CharSorted(P: PPUtf8CharArray; R: PtrInt;
  Value: PUtf8Char; ValueLen: PtrInt): PtrInt;
var
  tmp: array[byte] of AnsiChar;
begin
  UpperCopy255Buf(@tmp, Value, ValueLen)^ := #0;
  result := FastFindPUtf8CharSorted(P, R, @tmp);
end;

function FastFindIndexedPUtf8Char(P: PPUtf8CharArray; R: PtrInt;
  var SortedIndexes: TCardinalDynArray; Value: PUtf8Char;
  ItemComp: TUtf8Compare): PtrInt;
var
  L, cmp: PtrInt;
begin
  // fast O(log(n)) binary search
  L := 0;
  if 0 <= R then
    repeat
      {$ifdef CPUX64}
      result := L + R;
      result := result shr 1;
      {$else}
      result := (L + R) shr 1;
      {$endif CPUX64}
      cmp := ItemComp(P^[SortedIndexes[result]], Value);
      if cmp = 0 then
      begin
        result := SortedIndexes[result];
        exit;
      end;
      if cmp < 0 then
      begin
        L := result + 1;
        if L <= R then
          continue;
        break;
      end;
      R := result - 1;
      if L <= R then
        continue;
      break;
    until false;
  result := -1;
end;

function AddSortedRawUtf8(var Values: TRawUtf8DynArray; var ValuesCount: integer;
  const Value: RawUtf8; CoValues: PIntegerDynArray; ForcedIndex: PtrInt;
  Compare: TUtf8Compare): PtrInt;
var
  n: PtrInt;
begin
  if ForcedIndex >= 0 then
    result := ForcedIndex
  else
  begin
    if not Assigned(Compare) then
      Compare := @StrComp;
    result := FastLocatePUtf8CharSorted(pointer(Values), ValuesCount - 1,
      pointer(Value), Compare);
    if result < 0 then
      exit; // Value exists -> fails
  end;
  n := Length(Values);
  if ValuesCount = n then
  begin
    n := NextGrow(n);
    SetLength(Values, n);
    if CoValues <> nil then
      SetLength(CoValues^, n);
  end;
  n := ValuesCount;
  if result < n then
  begin
    n := (n - result) * SizeOf(pointer);
    MoveFast(Pointer(Values[result]), Pointer(Values[result + 1]), n);
    PtrInt(Values[result]) := 0; // avoid GPF
    if CoValues <> nil then
    begin
      {$ifdef CPU64} n := n shr 1; {$endif} // 64-bit pointer to 32-bit integer
      MoveFast(CoValues^[result], CoValues^[result + 1], n);
    end;
  end
  else
    result := n;
  Values[result] := Value;
  inc(ValuesCount);
end;

type
  /// used internally for faster quick sort
  {$ifdef USERECORDWITHMETHODS}
  TQuickSortRawUtf8 = record
  {$else}
  TQuickSortRawUtf8 = object
  {$endif USERECORDWITHMETHODS}
  public
    Compare: TUtf8Compare;
    CoValues: PIntegerArray;
    pivot: pointer;
    procedure Sort(Values: PPointerArray; L, R: PtrInt);
  end;

procedure TQuickSortRawUtf8.Sort(Values: PPointerArray; L, R: PtrInt);
var
  I, J, P: PtrInt;
  tmp: Pointer;
  int: integer;
begin
  if L < R then
    repeat
      I := L;
      J := R;
      P := (L + R) shr 1;
      repeat
        pivot := Values^[P];
        while Compare(Values^[I], pivot) < 0 do
          inc(I);
        while Compare(Values^[J], pivot) > 0 do
          dec(J);
        if I <= J then
        begin
          tmp := Values^[J];
          Values^[J] := Values^[I];
          Values^[I] := tmp;
          if CoValues <> nil then
          begin
            int := CoValues^[J];
            CoValues^[J] := CoValues^[I];
            CoValues^[I] := int;
          end;
          if P = I then
            P := J
          else if P = J then
            P := I;
          inc(I);
          dec(J);
        end;
      until I > J;
      if J - L < R - I then
      begin
        // use recursion only for smaller range
        if L < J then
          Sort(Values, L, J);
        L := I;
      end
      else
      begin
        if I < R then
          Sort(Values, I, R);
        R := J;
      end;
    until L >= R;
end;

procedure QuickSortRawUtf8(var Values: TRawUtf8DynArray; ValuesCount: integer;
  CoValues: PIntegerDynArray; Compare: TUtf8Compare);
var
  QS: TQuickSortRawUtf8;
begin
  if Assigned(Compare) then
    QS.Compare := Compare
  else
    QS.Compare := @StrComp;
  if CoValues = nil then
    QS.CoValues := nil
  else
    QS.CoValues := pointer(CoValues^);
  QS.Sort(pointer(Values), 0, ValuesCount - 1);
end;

procedure QuickSortRawUtf8(Values: PRawUtf8Array; L, R: PtrInt;
  caseInsensitive: boolean);
var
  QS: TQuickSortRawUtf8;
begin
  QS.Compare := StrCompByCase[caseInsensitive];
  QS.CoValues := nil;
  QS.Sort(pointer(Values), L, R);
end;

procedure MakeUniqueArray(var Values: TRawUtf8DynArray);
begin
  Values := copy(Values); // sub-proc to avoid try..finally
end;

function DeleteRawUtf8(var Values: TRawUtf8DynArray; Index: PtrInt): boolean;
var
  n: PtrInt;
begin
  n := length(Values);
  if PtrUInt(Index) >= PtrUInt(n) then
    result := false
  else
  begin
    dec(n);
    if PDACnt(PAnsiChar(pointer(Values)) - _DACNT)^ > 1 then
      MakeUniqueArray(Values);
    Values[Index] := ''; // avoid GPF
    if n > Index then
    begin
      MoveFast(pointer(Values[Index + 1]), pointer(Values[Index]),
        (n - Index) * SizeOf(pointer));
      PtrUInt(Values[n]) := 0; // avoid GPF
    end;
    SetLength(Values, n);
    result := true;
  end;
end;

function DeleteRawUtf8(var Values: TRawUtf8DynArray; var ValuesCount: integer;
  Index: integer; CoValues: PIntegerDynArray): boolean;
var
  n: integer;
begin
  n := ValuesCount;
  if cardinal(Index) >= cardinal(n) then
    result := false
  else
  begin
    dec(n);
    ValuesCount := n;
    if PDACnt(PAnsiChar(pointer(Values)) - _DACNT)^ > 1 then
      MakeUniqueArray(Values);
    Values[Index] := ''; // avoid GPF
    dec(n, Index);
    if n > 0 then
    begin
      if CoValues <> nil then
        MoveFast(CoValues^[Index + 1], CoValues^[Index], n * SizeOf(integer));
      MoveFast(pointer(Values[Index + 1]), pointer(Values[Index]), n * SizeOf(pointer));
      PtrUInt(Values[ValuesCount]) := 0; // avoid GPF
    end;
    result := true;
  end;
end;

{$ifdef OSPOSIX}

{ TPosixFileCaseInsensitive }

constructor TPosixFileCaseInsensitive.Create(
  const aFolder: TFileName; aSubFolders: boolean);
begin
  fFolder := aFolder;
  fSubFolders := aSubFolders;
  fFlushSeconds := 60;
end;

procedure TPosixFileCaseInsensitive.SetFolder(const aFolder: TFileName);
begin
  if self = nil then
    exit;
  fSafe.WriteLock;
  try
    fFiles := nil; // force list refresh
    fFolder := aFolder;
  finally
    fSafe.WriteUnLock;
  end;
end;

procedure TPosixFileCaseInsensitive.SetSubFolders(aSubFolders: boolean);
begin
  if (self = nil) or
     (fSubFolders = aSubFolders) then
    exit;
  fSubFolders := aSubFolders;
  Flush;
end;

procedure TPosixFileCaseInsensitive.OnIdle(tix64: Int64);
begin
  if (self = nil) or
     (fFiles = nil) or
     (fFlushSeconds = 0) then
    exit;
  if tix64 = 0 then
    tix64 := GetTickCount64;
  if tix64 shr 10 < fNextTix then
    exit;
  fSafe.WriteLock;
  try
    fFiles := nil; // force list refresh
  finally
    fSafe.WriteUnLock;
  end;
end;

procedure TPosixFileCaseInsensitive.Flush;
begin
  SetFolder(fFolder);
end;

function TPosixFileCaseInsensitive.Find(const aSearched: TFileName;
  aReadMs: PInteger): TFileName;
var
  start, stop: Int64;
  i: PtrInt;
  fn: RawUtf8;
begin
  result := '';
  if aReadMs <> nil then
    aReadMs^ := 0;
  if (self = nil) or
     (fFolder = '') or
     (aSearched = '') then
    exit;
  if fFiles = nil then // need to refresh the cache
  begin
    fSafe.WriteLock;
    try
      if fFiles = nil then
      begin
        if aReadMs <> nil then
          QueryPerformanceMicroSeconds(start);
        fFiles := PosixFileNames(fFolder, fSubFolders); // fast syscall
        QuickSortRawUtf8(fFiles, length(fFiles), nil, @StrIComp);
        // e.g. 4392 filenames from /home/ab/dev/lib/ in 7.20ms
        if aReadMs <> nil then
        begin
          QueryPerformanceMicroSeconds(stop);
          aReadMs^ := stop - start;
        end;
        if fFlushSeconds <> 0 then
          fNextTix := (GetTickCount64 shr 10) + fFlushSeconds;
      end;
    finally
      fSafe.WriteUnLock;
    end;
  end;
  StringToUtf8(aSearched, fn);
  fSafe.ReadLock; // non-blocking lookup
  try
    i := FastFindPUtf8CharSorted( // efficient O(log(n)) binary search
      pointer(fFiles), high(fFiles), pointer(fn), @StrIComp);
    if i >= 0 then
      Utf8ToFileName(fFiles[i], result); // use exact file name case from OS
  finally
    fSafe.ReadUnLock;
  end;
end;

function TPosixFileCaseInsensitive.Count: PtrInt;
begin
  if self = nil then
    result := 0
  else
    result := length(fFiles);
end;

function TPosixFileCaseInsensitive.Files: TRawUtf8DynArray;
begin
  result := nil;
  if (self = nil) or
     (fFiles = nil) then
    exit;
  fSafe.ReadLock;
  try
    result := copy(fFiles); // make a copy for thread safety
  finally
    fSafe.ReadUnLock;
  end;
end;

{$endif OSPOSIX}


{ ************** Operating-System Independent Unicode Process }

// freely inspired by Bero's PUCU library, released under zlib license
//  https://github.com/BeRo1985/pucu  (C)2016-2020 Benjamin Rosseaux

{$define UU_COMPRESSED}
// 1KB compressed static table in the exe renders into our 20KB UU[] array :)

type
  // 20,016 bytes for full Unicode 10.0 case folding branchless conversion
  {$ifdef USERECORDWITHMETHODS}
  TUnicodeUpperTable = record
  {$else}
  TUnicodeUpperTable = object
  {$endif USERECORDWITHMETHODS}
  public
    Block: array[0..37, 0..127] of integer;
    IndexHi: array[0..271] of byte;
    IndexLo: array[0..8, 0..31] of byte;
    // branchless Unicode 10.0 uppercase folding using our internal tables
    // caller should have checked that c <= UU_MAX
    function Ucs4Upper(c: PtrUInt): PtrUInt;
      {$ifdef HASINLINE} inline; {$endif}
  end;
  {$ifndef CPUX86NOTPIC}
  PUnicodeUpperTable = ^TUnicodeUpperTable;
  {$endif CPUX86NOTPIC}

const
  UU_BLOCK_HI = 7;
  UU_BLOCK_LO = 127;
  UU_INDEX_HI = 5;
  UU_INDEX_LO = 31;
  UU_MAX = $10ffff;

var
  {$ifdef UU_COMPRESSED}
  UU: TUnicodeUpperTable;
  {$else}
  UU: TUnicodeUpperTable = (
    Block: (
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
      -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 743, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -32, -32, -
      32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
      -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121),
     (0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -232, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, -300),
     (195, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, 0, -1, 0,
      0, 97, 0, 0, 0, -1, 163, 0, 0, 0, 130, 0, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0,
      0, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 56, 0,
      0, 0, 0, 0, -1, -2, 0, -1, -2, 0, -1, -2, 0, -1, 0, -1, 0, -1, 0, -1, 0, -
      1, 0, -1, 0, -1, 0, -1, -79, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, 0, -1, -2, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1),
     (0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 10815,
      10815, 0, -1, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 10783, 10780,
      10782, -210, -206, 0, -205, -205, 0, -202, 0, -203, 42319, 0, 0, 0, -205,
      42315, 0, -207, 0, 42280, 42308, 0, -209, -211, 42308, 10743, 42305, 0, 0,
      -211, 0, 10749, -213, 0, 0, -214, 0, 0, 0, 0, 0, 0, 0, 10727, 0, 0),
     (-218, 0, 0, -218, 0, 0, 0, 42282, -218, -69, -217, -217, -71, 0, 0, 0, 0, 0,
      -219, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42261, 42258, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, -1, 0, -1, 0, 0, 0, -1, 0, 0, 0, 130, 130, 130, 0, 0),
     (0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -38, -37, -37, -37, 0, -32,
      -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
      -32, -31, -32, -32, -32, -32, -32, -32, -32, -32, -32, -64, -63, -63, 0,
      -62, -57, 0, 0, 0, -47, -54, -8, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -86, -80, 7, -116, 0, -96, 0, 0,
      -1, 0, 0, -1, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
      -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
      -32, -32, -32, -32, -32, -32, -80, -80, -80, -80, -80, -80, -80, -80, -80,
      -80, -80, -80, -80, -80, -80, -80, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1),
     (0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -15, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1),
     (0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48,
      -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48,
      -48, -48, -48, -48, -48),
     (-48, -48, -48, -48, -48, -48, -48, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8,
      -8, -8, -8, 0, 0),
     (-6254, -6253, -6244, -6242, -6242, -6243, -6236, -6181, 35266, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35332, 0, 0, 0, 3814, 0, 0),
     (0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1),
     (0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, 0, 0, 0, 0, -59, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1),
     (8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0,
      8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 8, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8,
      8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 74, 74, 86, 86, 86, 86, 100, 100,
      128, 128, 112, 112, 126, 126, 0, 0),
     (8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0,
      0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8,
      8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      -7205, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -28, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
      -16, -16, -16, -16, -16),
     (0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -26, -26, -26, -26,
      -26, -26, -26, -26, -26, -26, -26, -26, -26, -26, -26, -26, -26, -26, -26,
      -26, -26, -26, -26, -26, -26, -26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48,
      -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48,
      -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48, -48,
      -48, -48, -48, -48, -48, -48, -48, 0, 0, -1, 0, 0, 0, -10795, -10792, 0, -1,
      0, -1, 0, -1, 0, 0, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0,
      0, -1, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (-7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264,
      -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264,
      -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264, -7264,
      -7264, -7264, -7264, -7264, -7264, 0, -7264, 0, 0, 0, 0, 0, -7264, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, 0, -1),
     (0, -1, 0, -1, 0,  -1, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, -1, 0, 0,
      0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -928, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -38864, -38864,
      -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864,
      -38864, -38864, -38864, -38864, -38864),
     (-38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864,
      -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864,
      -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864,
      -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864,
      -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864,
      -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864,
      -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864, -38864,
      -38864, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -32, -32, -32, -32, -32, -32,
      -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
      -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -40, -40, -40, -40,
      -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40,
      -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40,
      -40, -40, -40, -40, -40, -40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -40, -40,
      -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40,
      -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40, -40,
      -40, -40, -40, -40, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64,
      -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64,
      -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64, -64,
      -64, -64, -64, -64, -64, -64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -32, -32, -32, -32, -32, -32, -32,
      -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
      -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
     (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, -34, -34, -34, -34, -34, -34, -34, -34, -34, -34,
      -34, -34, -34, -34, -34, -34, -34, -34, -34, -34, -34, -34, -34, -34, -34,
      -34, -34, -34, -34, -34, -34, -34, -34, -34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
    );
    IndexHi: (0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, 6, 7, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
    IndexLo: ((0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12, 12, 12, 12, 12,
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (12, 12, 12, 12, 12,
      12, 12, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
      12, 14, 15, 12, 16, 17, 18, 19), (12, 12, 20, 21, 12, 12, 12, 12, 12, 22,
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 23, 24, 25, 12, 12,
      12, 12, 12), (12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (12,
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 26, 27, 28, 29, 12, 12, 12, 12,
      12, 12, 30, 31, 12, 12, 12, 12, 12, 12, 12, 12), (12, 12, 12, 12, 12, 12,
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
      12, 12, 12, 12, 12, 32, 12), (12, 12, 12, 12, 12, 12, 12, 12, 33, 34, 12,
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 35, 12, 12, 12, 12,
      12, 12), (12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
      12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (12, 12,
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 37, 12, 12,
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12));
  );
  {$endif UU_COMPRESSED}

function TUnicodeUpperTable.Ucs4Upper(c: PtrUInt): PtrUInt;
var
  i: PtrUInt;
begin
  // branchless conversion of 0..UU_MAX = $10ffff Unicode codepoints
  i := c shr UU_BLOCK_HI;
  result := PtrInt(c) +
            Block[IndexLo[IndexHi[i shr UU_INDEX_HI], i and UU_INDEX_LO],
                  c and UU_BLOCK_LO];
end;

function Utf8UpperReference(S, D: PUtf8Char): PUtf8Char;
var
  c: PtrUInt;
  S2: PUtf8Char;
  {$ifdef CPUX86NOTPIC}
  tab: TUnicodeUpperTable absolute UU;
  {$else}
  tab: PUnicodeUpperTable;
  {$endif CPUX86NOTPIC}
begin
  {$ifndef CPUX86NOTPIC}
  tab := @UU;
  {$endif CPUX86NOTPIC}
  if S <> nil then
    repeat
      c := ord(S^);
      if c <= 127 then
        if c = 0 then
          break
        else
        begin
          inc(c, tab.Block[0, c]); // branchless a..z -> A..Z
          D^ := AnsiChar(c);
          inc(S);
          inc(D);
          continue;
        end
      else if c and $20 = 0 then
      begin
        c := (c shl 6) + byte(S[1]) - $3080; // fast process $0..$7ff
        inc(S, 2);
      end
      else
      begin
        S2 := S;
        c := UTF8_TABLE.GetHighUtf8Ucs4(S2); // handle even surrogates
        S := S2;
        if c = 0 then
          c := UNICODE_REPLACEMENT_CHARACTER; // =$fffd for invalid input
      end;
      if c <= UU_MAX then
        c := tab.Ucs4Upper(c);
      inc(D, Ucs4ToUtf8(c, D));
    until false;
  D^ := #0;
  result := D;
end;

function Utf8UpperReference(S, D: PUtf8Char; SLen: PtrUInt): PUtf8Char;
var
  c: PtrUInt;
  endSBy4: PUtf8Char;
  extra, i: PtrInt;
  {$ifdef CPUX86NOTPIC}
  tab: TUnicodeUpperTable absolute UU;
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  tab: PUnicodeUpperTable;
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  by1, by4; // ugly but faster
begin
  if (S <> nil) and
     (D <> nil) then
  begin
    {$ifndef CPUX86NOTPIC}
    tab := @UU;
    utf8 := @UTF8_TABLE;
    {$endif CPUX86NOTPIC}
    // first handle trailing 7-bit ASCII chars, by quad
    inc(SLen, PtrUInt(S));
    endSBy4 := PUtf8Char(SLen) - 4;
    if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(S) and 3 = 0) and {$endif}
       (S <= endSBy4) then
      repeat
        if PCardinal(S)^ and $80808080 <> 0 then
          goto by1; // break on first non ASCII quad
by4:    i := byte(S[0]);
        inc(i, tab.Block[0, i]); // branchless a..z -> A..Z
        D[0] := AnsiChar(i);
        i := byte(S[1]);
        inc(i, tab.Block[0, i]);
        D[1] := AnsiChar(i);
        i := byte(S[2]);
        inc(i, tab.Block[0, i]);
        D[2] := AnsiChar(i);
        i := byte(S[3]);
        inc(i, tab.Block[0, i]);
        D[3] := AnsiChar(i);
        inc(S, 4);
        inc(D, 4);
      until S > endSBy4;
    // generic loop, handling one UCS4 CodePoint per iteration
    if S < PUtf8Char(SLen) then
      repeat
by1:    c := byte(S^);
        inc(S);
        if c <= 127 then
        begin
          inc(c, tab.Block[0, c]); // branchless a..z -> A..Z
          D^ := AnsiChar(c);
          inc(D);
          if {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}(PtrUInt(S) and 3 = 0) and{$endif}
             (S <= endSBy4) then
            if PCardinal(S)^ and $80808080 = 0 then
              goto By4
            else
              continue
          else if S < PUtf8Char(SLen) then
            continue
          else
            break;
        end
        else
        begin
          extra := utf8.Lookup[c];
          if (extra = UTF8_INVALID) or
             (S + extra > PUtf8Char(SLen)) then
            break;
          i := 0;
          repeat
            c := (c shl 6) + byte(S[i]);
            inc(i)
          until i = extra;
          inc(S, extra);
          with utf8.Extra[extra] do
          begin
            dec(c, offset);
            if c < minimum then
              break; // invalid input content
          end;
          if c <= UU_MAX then
            c := tab.Ucs4Upper(c);
          inc(D, Ucs4ToUtf8(c, D));
          if S < PUtf8Char(SLen) then
            continue
          else
            break;
        end;
      until false;
    D^ := #0;
  end;
  result := D;
end;

function UpperCaseReference(const S: RawUtf8): RawUtf8;
var
  len: integer;
  tmp: TSynTempBuffer;
begin
  len := length(S);
  tmp.Init(len * 2); // some codepoints enhance in length
  tmp.Done(Utf8UpperReference(pointer(S), tmp.buf, len), result);
end;

function UpperCaseUcs4Reference(const S: RawUtf8): RawUcs4;
var
  c, n: PtrUInt;
  U: PUtf8Char;
begin
  if S = '' then
  begin
    result := nil;
    exit;
  end;
  SetLength(result, length(S) + 1);
  U := pointer(S);
  n := 0;
  repeat
    c := NextUtf8Ucs4(U);
    if c = 0 then
      break;
    if c <= UU_MAX then
      c := UU.Ucs4Upper(c);
    result[n] := c;
    inc(n);
  until false;
  if n = 0 then
    result := nil
  else
  begin
    result[n] := 0; // always end with a 0
    DynArrayFakeLength(result, n); // faster than SetLength()
  end;
end;

function Utf8ICompReference(u1, u2: PUtf8Char): PtrInt;
var
  c2: PtrInt;
  {$ifdef CPUX86NOTPIC}
  tab: TUnicodeUpperTable absolute UU;
  {$else}
  tab: PUnicodeUpperTable;
  {$endif CPUX86NOTPIC}
label
  c2low;
begin
  {$ifndef CPUX86NOTPIC}
  tab := @UU;
  {$endif CPUX86NOTPIC}
  if u1 <> u2 then
    if u1 <> nil then
      if u2 <> nil then
        repeat
          result := ord(u1^);
          c2 := ord(u2^);
          if result <= 127 then
            if result <> 0 then
            begin
              inc(u1);
              inc(result, tab.Block[0, result]); // branchless a..z -> A..Z
              if c2 <= 127 then
              begin
c2low:          if c2 = 0 then
                  exit; // u1>u2 -> return u1^
                inc(u2);
                inc(c2, tab.Block[0, c2]);
                dec(result, c2);
                if result <> 0 then
                  exit;
                continue;
              end;
            end
            else
            begin
              // result=u1^=#0 -> end of u1 reached
              if c2 <> 0 then    // end of u2 reached -> u1=u2 -> return 0
                result := -1;    // u1<u2
              exit;
            end
          else
          begin
            // fast Unicode 10.0 uppercase conversion
            if result and $20 = 0 then // $0..$7ff common case
            begin
              result := (result shl 6) + byte(u1[1]) - $3080;
              inc(u1, 2);
            end
            else
              result := UTF8_TABLE.GetHighUtf8Ucs4(u1);
            if PtrUInt(result) <= UU_MAX then
              result := tab.Ucs4Upper(result);
          end;
          if c2 <= 127 then
            goto c2low
          else if c2 and $20 = 0 then // $0..$7ff common case
          begin
            c2 := (c2 shl 6) + byte(u2[1]) - $3080;
            inc(u2, 2);
          end
          else
            c2 := UTF8_TABLE.GetHighUtf8Ucs4(u2);
          if PtrUInt(c2) <= UU_MAX then
            c2 := tab.Ucs4Upper(c2);
          dec(result, c2);
          if result <> 0 then
            exit;
        until false
      else
        result := 1 // u2=''
    else
      result := -1  // u1=''
  else
    result := 0;    // u1=u2
end;

function Utf8ILCompReference(u1, u2: PUtf8Char; L1, L2: integer): PtrInt;
var
  c2: PtrInt;
  extra, i: integer;
  {$ifdef CPUX86NOTPIC}
  tab: TUnicodeUpperTable absolute UU;
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  tab: PUnicodeUpperTable;
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  neg, pos;
begin
  {$ifndef CPUX86NOTPIC}
  tab := @UU;
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  if u1 <> u2 then
    if (u1 <> nil) and
       (L1 <> 0) then
      if (u2 <> nil) and
         (L2 <> 0) then
        repeat
          result := ord(u1^);
          c2 := ord(u2^);
          inc(u1);
          dec(L1);
          if result <= 127 then
          begin
            inc(result, tab.Block[0, result]); // branchless a..z -> A..Z
            if c2 <= 127 then
            begin
              inc(c2, tab.Block[0, c2]);
              dec(L2);
              inc(u2);
              dec(result, c2);
              if result <> 0 then
                // found unmatching char
                exit
              else if L1 <> 0 then
                if L2 <> 0 then
                  // L1>0 and L2>0 -> next char
                  continue
                else
                  // L1>0 and L2=0 -> u1>u2
                  goto pos
              else
              if L2 <> 0 then
                // L1=0 and L2>0 -> u1<u2
                goto neg
              else
                // L1=0 and L2=0 -> u1=u2 -> returns 0
                exit;
            end;
          end
          else
          begin
            // fast Unicode 10.0 uppercase conversion
            extra := utf8.Lookup[result];
            if extra = UTF8_INVALID then
              // invalid leading byte
              goto neg;
            dec(L1, extra);
            if L1 < 0 then
              goto neg;
            i := 0;
            repeat
              result := result shl 6;
              inc(result, ord(u1[i]));
              inc(i);
            until i = extra;
            inc(u1, extra);
            dec(result, utf8.Extra[extra].offset);
            if PtrUInt(result) <= UU_MAX then
              result := tab.Ucs4Upper(result);
          end;
          // here result=NormToUpper[u1^]
          inc(u2);
          dec(L2);
          if c2 <= 127 then
          begin
            inc(c2, tab.Block[0, c2]);
            dec(result, c2);
            if result <> 0 then
              // found unmatching codepoint
              exit;
          end
          else
          begin
            extra := utf8.Lookup[c2];
            if extra = UTF8_INVALID then
              goto pos;
            dec(L2, extra);
            if L2 < 0 then
              goto pos;
            i := 0;
            repeat
              c2 := c2 shl 6;
              inc(c2, ord(u2[i]));
              inc(i);
            until i = extra;
            inc(u2, extra);
            dec(c2, utf8.Extra[extra].offset);
            if PtrUInt(c2) <= UU_MAX then
              c2 := tab.Ucs4Upper(c2);
            dec(result, c2);
            if result <> 0 then
              // found unmatching codepoint
              exit;
          end;
          // here we have result=0
          if L1 = 0 then
            // test if we reached end of u1 or end of u2
            if L2 = 0 then
              // u1=u2
              exit
            else
              // u1<u2
              goto neg
          else
          if L2 = 0 then
            // u1>u2
            goto pos;
        until false
      else
pos:    // u2='' or u1>u2
        result := 1
    else
neg:  // u1='' or u1<u2
      result := -1
  else
    // u1=u2
    result := 0;
end;

function StrPosIReference(U: PUtf8Char; const Up: RawUcs4): PUtf8Char;
var
  c, extra, i: PtrInt;
  u0, u2: PUtf8Char;
  up2: PInteger;
  {$ifdef CPUX86NOTPIC}
  tab: TUnicodeUpperTable absolute UU;
  utf8: TUtf8Table absolute UTF8_TABLE;
  {$else}
  tab: PUnicodeUpperTable;
  utf8: PUtf8Table;
  {$endif CPUX86NOTPIC}
label
  nxt;
begin
  result := nil;
  if (U = nil) or
     (Up = nil) then
    exit;
  {$ifndef CPUX86NOTPIC}
  tab := @UU;
  utf8 := @UTF8_TABLE;
  {$endif CPUX86NOTPIC}
  repeat
    // fast search for the first character
nxt:u0 := U;
    c := byte(U^);
    inc(U);
    if c <= 127 then
    begin
      if c = 0 then
        exit; // not found -> return nil
      inc(c, tab.Block[0, c]); // branchless a..z -> A..Z
      if c <> Up[0] then
        continue;
    end
    else
    begin
      extra := utf8.Lookup[c];
      if extra = UTF8_INVALID then
        exit;
      i := 0;
      repeat
        c := c shl 6;
        inc(c, ord(U[i]));
        inc(i);
      until i = extra;
      inc(U, extra);
      dec(c, utf8.Extra[extra].offset);
      if PtrUInt(c) <= UU_MAX then
        c := tab.Ucs4Upper(c);
      if c <> Up[0] then
        continue;
    end;
    // if we reached here, U^ and Up^ first UCS4 CodePoint do match
    u2 := U;
    up2 := @Up[1];
    repeat
      if up2^ = 0 then
      begin
        result := u0; // found -> return position in U
        exit;
      end;
      c := byte(u2^);
      inc(u2);
      if c <= 127 then
      begin
        if c = 0 then
          exit; // not found -> return nil
        inc(c, tab.Block[0, c]);
        if c <> up2^ then
          goto nxt;
        inc(up2);
      end
      else
      begin
        extra := utf8.Lookup[c];
        if extra = UTF8_INVALID then
          exit; // invalid input
        i := 0;
        repeat
          c := c shl 6;
          inc(c, ord(u2[i]));
          inc(i);
        until i = extra;
        inc(u2, extra);
        dec(c, utf8.Extra[extra].offset);
        if PtrUInt(c) <= UU_MAX then
          c := tab.Ucs4Upper(c);
        if c <> up2^ then
          goto nxt;
        inc(up2);
      end;
    until false;
  until false;
end;




const
  // reference 8-bit upper chars as in WinAnsi/CP1252 for NormToUpper/Lower[]
  WinAnsiToUp: array[138..255] of byte = (
    83,  139, 140, 141, 90,  143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
    153, 83,  155, 140, 157,  90,  89, 160, 161, 162, 163, 164, 165, 166, 167,
    168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,
    183, 184, 185, 186, 187, 188, 189, 190, 191, 65,  65,  65,  65,  65,  65,
    198, 67,  69,  69,  69,  69,  73,  73,  73,  73,  68,  78,  79,  79,  79,
    79,  79,  215, 79,  85,  85,  85,  85,  89,  222, 223, 65,  65,  65,  65,
    65,  65,  198, 67,  69,  69,  69,  69,  73,  73,  73,  73,  68,  78,  79,
    79,  79,  79,  79,  247, 79,  85,  85,  85,  85,  89,  222, 89);

{$ifdef UU_COMPRESSED}

  // 1KB compressed buffer which renders into our 20,016 bytes UU[] array
  UU_: array[byte] of cardinal = (
    $040019FD, $FF5A6024, $00855A00, $FFFFFFE0, $5A5201F0, $02E700E8, $FFE0AA5A,
    $E0045A4B, $5A790BFF, $045A0007, $A045A1FF, $DB1878BA, $01A82B01, $0145A000,
    $1DA45008, $041E5A80, $401DA450, $5A8F185A, $FFFFFED4, $590B5AC3, $0C5A84A4,
    $5314A453, $610008A4, $A4520F5A, $82F5A1A3, $F1EBB5AB, $5A44DDF7, $52105A84,
    $5A845AA4, $5A4A5AC4, $5A385AC6, $11A45217, $10ABA500, $45A00200, $4F5A4401,
    $0000B15A, $A05A4F04, $5A830145, $C65A0018, $5EBAA05A, $20245AC0, $85A1A452,
    $5BB55700, $00002A3F, $065A2A3F, $5B04A453, $1F055A40, $A11C02A1, $02A11E02,
    $3200012E, $45A10001, $C2000133, $3645A10C, $45A10001, $4F000135, $00550690,
    $000E5AA5, $A54B0CC2, $013165A1, $2845A100, $440000A5, $012FAC02, $00012D00,
    $F70A5144, $41000029, $000A5AA5, $2AC4A16B, $45A10D22, $2B0291FD, $85A10001,
    $1C00022A, $A129E700, $000226A5, $0D930008, $512A000C, $BB0D920A, $05270001,
    $0001B900, $0644145A, $25000107, $00280002, $120A5115, $F1F552A5, $54009C55,
    $A453AF5A, $5AC45A44, $0082000C, $5A208400, $FFDA00BB, $AD6EFFFF, $09DB15D5,
    $F045A100, $01E13201, $1201F000, $C10001C0, $45A10005, $C70001C2, $C5A10001,
    $CA0001D1, $01F80001, $A045A100, $01AA33BA, $0001B000, $D0000007, $001EFBFB,
    $A2FFFF8C, $0002A0AB, $85A15A83, $E0D0BAA2, $01F00BFF, $2E01F012, $04F004F2,
    $3645A02A, $240D45A0, $5A44A459, $847E5A40, $115A405A, $400002F1, $45A0115A,
    $CC5A400D, $1FD000C4, $01255507, $8202F000, $55F1F555, $00C955F4, $700001F8,
    $85A10200, $FFFFE792, $9C018193, $859E0181, $01819D01, $DB0181A4, $89C20181,
    $00C5F558, $3C90F1E4, $E5A18A04, $E5A10EE6, $5A40BAA2, $CC5A40CC, $01C50014,
    $A045B100, $45AAFFBA, $00000008, $5A070080, $04800023, $80002B5A, $80000604,
    $00000635, $BC2F5A0E, $00F75B6D, $D875A108, $050A30A7, $014A0009, $5604A200,
    $056A0001, $42000164, $00018006, $01700802, $7E070200, $A17E0001, $070080B5,
    $80080001, $00022635, $35860002, $C4304825, $810975A1, $01E3DBB5, $090010A5,
    $8004335A, $80053B5A, $5A07000F, $F1CA0137, $E4006C55, $84A501FF, $0001F000,
    $8E2A00F0, $5AEDC05B, $55F15B04, $002F55F4, $900001E6, $F5525201, $87FFD019,
    $5A1202F0, $000C5A84, $FFFFD5D5, $AA02A1D8, $1845A545, $5A84A453, $40A45328,
    $5A40CC5A, $FB5FC736, $445804BF, $305B045A, $01C1A000, $A182C5E0, $B1C5E245,
    $F4C5E345, $A4504E55, $A4504C77, $1E55F441, $C417A450, $405A445A, $588A9C5A,
    $5A405A84, $045B0406, $C05A445B, $592C2A5A, $C6F021A4, $6E55F49B, $01FC6000,
    $300070A5, $60FFFF68, $0970FF7C, $F1F55219, $FFE00655, $35F55257, $0001D800,
    $528A0270, $2255F1F5, $527F7FD0, $F20011F5, $00063B03, $B603F000, $E035F552,
    $01F057FF, $09F55206, $0001DE00, $5A720210, $020100F1, $0403075A, $0503045A,
    $0C5A0706, $F15A0803, $00000012, $03120103, $08675104, $5A0B0A09, $5A0D0C1B,
    $0F0E0C11, $1211100C, $140C0C13, $0C055A15, $00005A16, $0C0E0000, $5A191817,
    $1B1A0C31, $065A1D1C, $5A1F1E0C, $5A200C26, $22210C09, $230C0F5A, $0000175A,
    $240C0000, $250C205A, $000C0D5A, $00000000);

procedure InitializeUU;
var
  tmp: array[0..7000] of byte; // need only 6653 bytes
begin
  // Uppercase Unicode table RLE + SynLZ decompression from 1KB to 20KB :)
  if (RleUnCompress(@tmp, @UU, SynLZdecompress1(@UU_, 1019, @tmp)) <> SizeOf(UU)) or
     (crc32c(0, @UU, SizeOf(UU)) <> $7343D053) then
    raise ESynUnicode.Create('UU Table Decompression Failed'); // paranoid
end;

{$endif UU_COMPRESSED}

(*
procedure doUU;
var
  tmp1, tmp2: array[0..5500] of cardinal;
  rle, lz, i: PtrInt;
  l: RawUtf8;
begin
  rle := RleCompress(@UU, @tmp1, SizeOf(UU), SizeOf(tmp1));
  lz := SynLZCompress1(@tmp1, rle, @tmp2);
  writeln(SizeOf(UU)); writeln(rle); writeln(lz);
  writeln('UU_ = array[byte] of cardinal = ('); l := '  ';
  for i := 0 to 255 do
  begin
    l := l + '$' + HexStr(tmp2[i], 8) + ',';
    if length(l) > 70 then
    begin
      writeln(l);
      l := '  ';
    end;
  end;
  writeln(l, ');');
end;
*)

procedure InitializeUnit;
var
  i: PtrInt;
  c: AnsiChar;
begin
  // decompress 1KB static in the exe into 20KB UU[] array for Unicode Uppercase
  {$ifdef UU_COMPRESSED}
  InitializeUU;
  {$endif UU_COMPRESSED}
  // initialize internal lookup tables for various text conversions
  for i := 0 to 255 do
    NormToNormByte[i] := i;
  NormToUpperAnsi7Byte := NormToNormByte;
  for i := ord('a') to ord('z') do
    dec(NormToUpperAnsi7Byte[i], 32);
  NormToLowerAnsi7Byte := NormToNormByte;
  for i := ord('A') to ord('Z') do
    inc(NormToLowerAnsi7Byte[i], 32);
  MoveFast(NormToUpperAnsi7, NormToUpper, 138);
  MoveFast(WinAnsiToUp, NormToUpperByte[138], SizeOf(WinAnsiToUp));
  for i := 0 to 255 do
  begin
    c := NormToUpper[AnsiChar(i)];
    if c in ['A'..'Z'] then
      inc(c, 32);
    NormToLower[AnsiChar(i)] := c;
  end;
  for c := low(c) to high(c) do
  begin
    if not (c in [#0, #10, #13]) then
      include(TEXT_CHARS[c], tcNot01013);
    if c in [#10, #13] then
      include(TEXT_CHARS[c], tc1013);
    if c in ['0'..'9', 'a'..'z', 'A'..'Z'] then
      include(TEXT_CHARS[c], tcWord);
    if c in ['_', 'a'..'z', 'A'..'Z'] then
      include(TEXT_CHARS[c], tcIdentifierFirstChar);
    if c in ['_', '0'..'9', 'a'..'z', 'A'..'Z'] then
      include(TEXT_CHARS[c], tcIdentifier);
    if c in ['_', '-', '.', '0'..'9', 'a'..'z', 'A'..'Z'] then
      // '~' is part of the RFC 3986 but should be escaped in practice
      // see https://blog.synopse.info/?post/2020/08/11/The-RFC%2C-The-URI%2C-and-The-Tilde
      include(TEXT_CHARS[c], tcUriUnreserved);
    if c in [#1..#9, #11, #12, #14..' '] then
      include(TEXT_CHARS[c], tcCtrlNotLF);
    if c in [#1..' ', ';'] then
      include(TEXT_CHARS[c], tcCtrlNot0Comma);
  end;
  // setup sorting functions redirection
  StrCompByCase[false] := @StrComp;
  StrCompByCase[true] := @StrIComp;
  {$ifdef CPUINTEL}
  SortDynArrayAnsiStringByCase[false] := @SortDynArrayAnsiString;
  {$else}
  SortDynArrayAnsiStringByCase[false] := @SortDynArrayRawByteString;
  {$endif CPUINTEL}
  SortDynArrayAnsiStringByCase[true] := @SortDynArrayAnsiStringI;
  IdemPropNameUSameLen[false] := @IdemPropNameUSameLenNotNull;
  IdemPropNameUSameLen[true] := @mormot.core.base.CompareMem;

  // setup basic Unicode conversion engines
  SetLength(SynAnsiConvertListCodePage, 16); // no resize -> more thread safe
  CurrentAnsiConvert   := NewEngine(Unicode_CodePage);
  WinAnsiConvert       := NewEngine(CP_WINANSI) as TSynAnsiFixedWidth;
  Utf8AnsiConvert      := NewEngine(CP_UTF8) as TSynAnsiUtf8;
  RawByteStringConvert := NewEngine(CP_RAWBYTESTRING) as TSynAnsiFixedWidth;
  // setup optimized ASM functions
  IsValidUtf8Buffer := @IsValidUtf8Pas;
  {$ifdef ASMX64AVXNOCONST}
  if cpuHaswell in X64CpuFeatures then
    // Haswell CPUs can use simdjson AVX2 asm for IsValidUtf8()
    IsValidUtf8Buffer := @IsValidUtf8Avx2;
  {$endif ASMX64AVXNOCONST}
end;


initialization
  InitializeUnit;


end.