mirror of
https://github.com/danieleteti/delphimvcframework.git
synced 2024-11-15 15:55:54 +01:00
6329 lines
191 KiB
ObjectPascal
6329 lines
191 KiB
ObjectPascal
/// Framework Core Text, Binary and Time Search Engines
|
|
// - this unit is a part of the Open Source Synopse mORMot framework 2,
|
|
// licensed under a MPL/GPL/LGPL three license - see LICENSE.md
|
|
unit mormot.core.search;
|
|
|
|
{
|
|
*****************************************************************************
|
|
|
|
Several Indexing and Search Engines, as used by other parts of the framework
|
|
- Files Search in Folders
|
|
- ScanUtf8, GLOB and SOUNDEX Text Search
|
|
- Efficient CSV Parsing using RTTI
|
|
- Versatile Expression Search Engine
|
|
- Bloom Filter Probabilistic Index
|
|
- Binary Buffers Delta Compression
|
|
- TDynArray Low-Level Binary Search and Iteration
|
|
- TSynFilter and TSynValidate Processing Classes
|
|
- Cross-Platform TSynTimeZone Time Zones
|
|
|
|
*****************************************************************************
|
|
}
|
|
|
|
interface
|
|
|
|
{$I mormot.defines.inc}
|
|
|
|
uses
|
|
classes,
|
|
sysutils,
|
|
mormot.core.base,
|
|
mormot.core.os,
|
|
mormot.core.rtti,
|
|
mormot.core.unicode,
|
|
mormot.core.text,
|
|
mormot.core.buffers,
|
|
mormot.core.datetime,
|
|
mormot.core.data,
|
|
mormot.core.json;
|
|
|
|
|
|
{ ****************** Files Search in Folders }
|
|
|
|
type
|
|
{$A-}
|
|
/// define one file found result item, as returned by FindFiles()
|
|
// - Delphi "object" is buggy on stack -> also defined as record with methods
|
|
{$ifdef USERECORDWITHMETHODS}
|
|
TFindFiles = record
|
|
{$else}
|
|
TFindFiles = object
|
|
{$endif USERECORDWITHMETHODS}
|
|
public
|
|
/// the matching file name
|
|
// - including its folder name unless ffoExcludesDir is set
|
|
Name: TFileName;
|
|
/// the matching file attributes
|
|
Attr: integer;
|
|
/// the matching file size
|
|
Size: Int64;
|
|
/// the matching file local date/time
|
|
Timestamp: TDateTime;
|
|
/// fill the item properties from a FindFirst/FindNext's TSearchRec
|
|
procedure FromSearchRec(const Directory: TFileName; const F: TSearchRec);
|
|
/// returns some ready-to-be-loggued text
|
|
function ToText: ShortString;
|
|
end;
|
|
{$A+}
|
|
|
|
/// result list, as returned by FindFiles()
|
|
TFindFilesDynArray = array of TFindFiles;
|
|
|
|
/// one optional feature of FindFiles()
|
|
// - ffoSortByName will sort the result files by extension then name
|
|
// - ffoExcludesDir won't include the path in TFindFiles.Name
|
|
// - ffoSubFolder will search within nested folders
|
|
TFindFilesOption = (
|
|
ffoSortByName,
|
|
ffoExcludesDir,
|
|
ffoSubFolder);
|
|
/// the optional features of FindFiles()
|
|
TFindFilesOptions = set of TFindFilesOption;
|
|
|
|
/// search for matching files by names
|
|
// - just an enhanced wrapper around FindFirst/FindNext with some options
|
|
// - you may specify several masks in Mask, e.g. as '*.jpg;*.jpeg'
|
|
function FindFiles(const Directory: TFileName;
|
|
const Mask: TFileName = FILES_ALL; const IgnoreFileName: TFileName = '';
|
|
Options: TFindFilesOptions = []): TFindFilesDynArray;
|
|
|
|
/// search for matching file names
|
|
// - just a wrapper around FindFilesDynArrayToFileNames(FindFiles())
|
|
function FileNames(const Directory: TFileName;
|
|
const Mask: TFileName = FILES_ALL; Options: TFindFilesOptions = [];
|
|
const IgnoreFileName: TFileName = ''): TFileNameDynArray; overload;
|
|
|
|
/// search for matching file names from path-delimited content
|
|
// - is a wrapper around FindFileNames(MakePath())
|
|
function FileNames(const Path: array of const; const Mask: TFileName = FILES_ALL;
|
|
Options: TFindFilesOptions = []): TFileNameDynArray; overload;
|
|
|
|
/// convert a result list, as returned by FindFiles(), into an array of Files[].Name
|
|
function FindFilesDynArrayToFileNames(const Files: TFindFilesDynArray): TFileNameDynArray;
|
|
|
|
/// sort a FindFiles() result list by its TFindFiles[].Timestamp field
|
|
procedure FindFilesSortByTimestamp(var Files: TFindFilesDynArray);
|
|
|
|
type
|
|
/// one optional feature of SynchFolders()
|
|
// - process recursively nested folders if sfoSubFolder is included
|
|
// - use file content instead of file date check if sfoByContent is included
|
|
// - display synched file name on console if sfoWriteFileNameToConsole is included
|
|
TSynchFoldersOption = (
|
|
sfoSubFolder,
|
|
sfoByContent,
|
|
sfoWriteFileNameToConsole);
|
|
/// the optional features of SynchFolders()
|
|
TSynchFoldersOptions = set of TSynchFoldersOption;
|
|
|
|
/// ensure all files in Dest folder(s) do match the one in Reference
|
|
// - won't copy all files from Reference folders, but will update files already
|
|
// existing in Dest, which did change since last synchronization
|
|
// - file copy will use in-memory loading, so won't work well with huge files
|
|
// - returns the number of files copied during the process
|
|
function SynchFolders(const Reference, Dest: TFileName;
|
|
Options: TSynchFoldersOptions = []): integer;
|
|
|
|
/// copy all files from a source folder to a destination folder
|
|
// - will copy only new or changed files, keeping existing identical files
|
|
// - file copy will use stream loading, so would cope with huge files
|
|
// - returns the number of fields copied during the process, -1 on error
|
|
function CopyFolder(const Source, Dest: TFileName;
|
|
Options: TSynchFoldersOptions = []): integer;
|
|
|
|
|
|
{ ****************** ScanUtf8, GLOB and SOUNDEX Text Search }
|
|
|
|
/// read and store text into values[] according to fmt specifiers
|
|
// - %d as PInteger, %D as PInt64, %u as PCardinal, %U as PQWord, %f as PDouble,
|
|
// %F as PCurrency, %x as 8 hexa chars to PInteger, %X as 16 hexa chars to PInt64,
|
|
// %s as PShortString (UTF-8 encoded), %S as PRawUtf8, %L as PRawUtf8 (getting
|
|
// all text until the end of the line)
|
|
// - optionally, specifiers and any whitespace separated identifiers may be
|
|
// extracted and stored into the ident[] array, e.g. '%dFirstInt %s %DOneInt64'
|
|
// will store ['dFirstInt','s','DOneInt64'] into ident[] dynamic array
|
|
function ScanUtf8(const text, fmt: RawUtf8; const values: array of pointer;
|
|
ident: PRawUtf8DynArray = nil): integer; overload;
|
|
|
|
/// read text from P/PLen and store it into values[] according to fmt specifiers
|
|
function ScanUtf8(P: PUtf8Char; PLen: PtrInt; const fmt: RawUtf8;
|
|
const values: array of pointer; ident: PRawUtf8DynArray): integer; overload;
|
|
|
|
|
|
type
|
|
PMatch = ^TMatch;
|
|
|
|
// used when inlining TMatch.Match
|
|
TMatchSearchFunction = function(aMatch: PMatch;
|
|
aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
|
|
/// low-level structure used by IsMatch() for actual GLOB search
|
|
// - you can use this object to prepare a given pattern, e.g. in a loop
|
|
// - implemented as a fast brute-force state-machine without any heap allocation
|
|
// - some common patterns ('exactmatch', 'startwith*', '*endwith', '*contained*')
|
|
// are handled with dedicated code, optionally with case-insensitive search
|
|
// - PrepareContains() is the most efficient method for '*contained*' search
|
|
// - consider using TMatchs (or SetMatchs/TMatchDynArray) if you expect to
|
|
// search for several patterns, or even TExprParserMatch for expression search
|
|
{$ifdef USERECORDWITHMETHODS}
|
|
TMatch = record
|
|
{$else}
|
|
TMatch = object
|
|
{$endif USERECORDWITHMETHODS}
|
|
private
|
|
Pattern, Text: PUtf8Char;
|
|
P, T, PMax, TMax: PtrInt;
|
|
Upper: PNormTable;
|
|
State: (sNONE, sABORT, sEND, sLITERAL, sPATTERN, sRANGE, sVALID);
|
|
procedure MatchAfterStar;
|
|
procedure MatchMain;
|
|
public
|
|
/// published for proper inlining
|
|
Search: TMatchSearchFunction;
|
|
/// initialize the internal fields for a given glob search pattern
|
|
// - note that the aPattern instance should remain in memory, since it will
|
|
// be pointed to by the PatternText private field of this object
|
|
procedure Prepare(const aPattern: RawUtf8;
|
|
aCaseInsensitive, aReuse: boolean); overload;
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
/// initialize the internal fields for a given glob search pattern
|
|
// - note that the aPattern buffer should remain in memory, since it will
|
|
// be pointed to by the PatternText private field of this object
|
|
procedure Prepare(aPattern: PUtf8Char; aPatternLen: integer;
|
|
aCaseInsensitive, aReuse: boolean); overload;
|
|
/// initialize low-level internal fields for'*aPattern*' search
|
|
// - this method is faster than a regular Prepare('*' + aPattern + '*'),
|
|
// since it may use the SBNDMQ2 algorithm for patterns of length 2..31
|
|
// - warning: the supplied aPattern variable may be modified in-place to be
|
|
// filled with some lookup buffer, when SBNDMQ2 is triggered
|
|
procedure PrepareContains(var aPattern: RawUtf8;
|
|
aCaseInsensitive: boolean); overload;
|
|
/// initialize low-level internal fields for a custom search algorithm
|
|
procedure PrepareRaw(aPattern: PUtf8Char; aPatternLen: integer;
|
|
aSearch: TMatchSearchFunction);
|
|
/// returns TRUE if the supplied content matches the prepared glob pattern
|
|
// - this method is not thread-safe
|
|
function Match(const aText: RawUtf8): boolean; overload;
|
|
{$ifdef FPC} inline;{$endif}
|
|
/// returns TRUE if the supplied content matches the prepared glob pattern
|
|
// - this method is not thread-safe
|
|
function Match(aText: PUtf8Char; aTextLen: PtrInt): boolean; overload;
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
/// returns TRUE if the supplied content matches the prepared glob pattern
|
|
// - this method IS thread-safe, and won't lock
|
|
function MatchThreadSafe(const aText: RawUtf8): boolean;
|
|
/// returns TRUE if the supplied VCL/LCL content matches the prepared glob pattern
|
|
// - this method IS thread-safe, will use on-stack UTF-8 temporary conversion
|
|
// if possible, and won't lock
|
|
function MatchString(const aText: string): boolean;
|
|
/// returns TRUE if this search pattern matches another
|
|
function Equals(const aAnother: TMatch): boolean;
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
/// access to the pattern length as stored in PMax + 1
|
|
function PatternLength: integer;
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
/// access to the pattern text as stored in Pattern
|
|
function PatternText: PUtf8Char;
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
/// check if the pattern search was defined as case-insensitive
|
|
function CaseInsensitive: boolean;
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
end;
|
|
|
|
/// stores an array of GLOB search engines
|
|
// - use SetMatchs() to initialize such an array from a CSV pattern text
|
|
TMatchDynArray = array of TMatch;
|
|
|
|
/// TMatch descendant owning a copy of the Pattern string to avoid GPF issues
|
|
TMatchStore = record
|
|
/// access to the research criteria
|
|
// - defined as a nested record (and not an object) to circumvent Delphi bug
|
|
Pattern: TMatch;
|
|
/// Pattern.Pattern PUtf8Char will point to this instance
|
|
PatternInstance: RawUtf8;
|
|
end;
|
|
|
|
TMatchStoreDynArray = array of TMatchStore;
|
|
|
|
/// stores several TMatch instances, from a set of glob patterns
|
|
TMatchs = class(TSynPersistent)
|
|
protected
|
|
fMatch: TMatchStoreDynArray;
|
|
fMatchCount: integer;
|
|
public
|
|
/// add once some glob patterns to the internal TMach list
|
|
// - aPatterns[] follows the IsMatch() syntax
|
|
constructor Create(const aPatterns: TRawUtf8DynArray;
|
|
CaseInsensitive: boolean); reintroduce; overload;
|
|
/// add once some glob patterns to the internal TMach list
|
|
// - aPatterns[] follows the IsMatch() syntax
|
|
procedure Subscribe(const aPatterns: TRawUtf8DynArray;
|
|
CaseInsensitive: boolean); overload; virtual;
|
|
/// add once some glob patterns to the internal TMach list
|
|
// - each CSV item in aPatterns follows the IsMatch() syntax
|
|
procedure Subscribe(const aPatternsCsv: RawUtf8;
|
|
CaseInsensitive: boolean); overload;
|
|
/// search patterns in the supplied UTF-8 text
|
|
// - returns -1 if no filter has been subscribed
|
|
// - returns -2 if there is no match on any previous pattern subscription
|
|
// - returns fMatch[] index, i.e. >= 0 number on first matching pattern
|
|
// - this method is thread-safe
|
|
function Match(const aText: RawUtf8): integer; overload;
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
/// search patterns in the supplied UTF-8 text buffer
|
|
function Match(aText: PUtf8Char; aLen: integer): integer; overload;
|
|
/// search patterns in the supplied RTL string text
|
|
// - could be used on a TFileName for instance
|
|
// - will avoid any memory allocation if aText is small enough
|
|
function MatchString(const aText: string): integer;
|
|
end;
|
|
|
|
/// store a decoded URI as full path and file/resource name
|
|
{$ifdef USERECORDWITHMETHODS}
|
|
TUriMatchName = record
|
|
{$else}
|
|
TUriMatchName = object
|
|
{$endif USERECORDWITHMETHODS}
|
|
public
|
|
Path, Name: TValuePUtf8Char;
|
|
/// to be called once Path has been populated to compute Name
|
|
procedure ParsePath;
|
|
end;
|
|
|
|
/// efficient GLOB path or resource name lockup for an URI
|
|
// - using mORMot fast TMatch engine
|
|
{$ifdef USERECORDWITHMETHODS}
|
|
TUriMatch = record
|
|
{$else}
|
|
TUriMatch = object
|
|
{$endif USERECORDWITHMETHODS}
|
|
private
|
|
Init: TLightLock;
|
|
Names, Paths: TMatchDynArray;
|
|
procedure DoInit(csv: PUtf8Char; caseinsensitive: boolean);
|
|
public
|
|
/// main entry point of the GLOB resource/path URI pattern matching
|
|
// - will thread-safe initialize the internal TMatch instances if necessary
|
|
function Check(const csv: RawUtf8; const uri: TUriMatchName;
|
|
caseinsensitive: boolean): boolean;
|
|
end;
|
|
|
|
|
|
|
|
/// fill the Match[] dynamic array with all glob patterns supplied as CSV
|
|
// - returns how many patterns have been set in Match[|]
|
|
// - note that the CsvPattern instance should remain in memory, since it will
|
|
// be pointed to by the Match[].Pattern private field
|
|
function SetMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean;
|
|
out Match: TMatchDynArray; CsvSep: AnsiChar = ','): integer; overload;
|
|
|
|
/// fill the Match[0..MatchMax] static array with all glob patterns supplied as CSV
|
|
// - note that the CsvPattern instance should remain in memory, since it will
|
|
// be pointed to by the Match[].Pattern private field
|
|
function SetMatchs(CsvPattern: PUtf8Char; CaseInsensitive: boolean;
|
|
Match: PMatch; MatchMax: integer; CsvSep: AnsiChar = ','): integer; overload;
|
|
|
|
/// fill a TMatch instance with the next glob pattern supplied as CSV
|
|
function SetNextMatch(P: PUtf8Char; var Dest: TMatch;
|
|
CaseInsensitive, Reuse: boolean; CsvSep: AnsiChar): PUtf8Char;
|
|
|
|
/// search if one TMach is already registered in the Several[] dynamic array
|
|
function MatchExists(const One: TMatch; const Several: TMatchDynArray): boolean;
|
|
|
|
/// add one TMach if not already registered in the Several[] dynamic array
|
|
function MatchAdd(const One: TMatch; var Several: TMatchDynArray): boolean;
|
|
|
|
/// allocate one TMach in the Several[] dynamic array
|
|
function MatchNew(var Several: TMatchDynArray): PMatch;
|
|
|
|
/// returns TRUE if Match=nil or if any Match[].Match(Text) is TRUE
|
|
function MatchAny(const Match: TMatchDynArray; const Text: RawUtf8): boolean; overload;
|
|
{$ifdef HASINLINE} inline; {$endif}
|
|
|
|
/// returns TRUE if Match=nil or if any Match[].Match(Text, TextLen) is TRUE
|
|
function MatchAny(Match: PMatch; Text: PUtf8Char; TextLen: PtrInt): boolean; overload;
|
|
|
|
/// apply the CSV-supplied glob patterns to an array of RawUtf8
|
|
// - any text not matching the pattern will be deleted from the array
|
|
// - the patterns are specified as CSV, separated by ','
|
|
procedure FilterMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean;
|
|
var Values: TRawUtf8DynArray; CsvSep: AnsiChar = ','); overload;
|
|
|
|
/// apply the CSV-supplied glob patterns to an array of string
|
|
// - any text not matching the pattern will be deleted from the array
|
|
// - the patterns are specified as CSV, separated by ','
|
|
procedure FilterMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean;
|
|
var Values: TStringDynArray; CsvSep: AnsiChar = ','); overload;
|
|
|
|
/// return TRUE if the supplied content matches a glob pattern
|
|
// - ? Matches any single character
|
|
// - * Matches any contiguous characters
|
|
// - [abc] Matches a or b or c at that position
|
|
// - [^abc] Matches anything but a or b or c at that position
|
|
// - [!abc] Matches anything but a or b or c at that position
|
|
// - [a-e] Matches a through e at that position
|
|
// - [abcx-z] Matches a or b or c or x or y or or z, as does [a-cx-z]
|
|
// - 'ma?ch.*' would match match.exe, mavch.dat, march.on, etc..
|
|
// - 'this [e-n]s a [!zy]est' would match 'this is a test', but would not
|
|
// match 'this as a test' nor 'this is a zest'
|
|
// - consider using TMatch or TMatchs if you expect to reuse the pattern
|
|
function IsMatch(const Pattern, Text: RawUtf8;
|
|
CaseInsensitive: boolean = false): boolean;
|
|
|
|
/// return TRUE if the supplied content matches a glob pattern, using RTL strings
|
|
// - is a wrapper around IsMatch() with fast UTF-8 conversion
|
|
function IsMatchString(const Pattern, Text: string;
|
|
CaseInsensitive: boolean = false): boolean;
|
|
|
|
/// return TRUE if the supplied content matches one or several glob patterns
|
|
// - the patterns are specified as CSV, separated by ','
|
|
function IsMatchs(const CsvPattern, Text: RawUtf8;
|
|
CaseInsensitive: boolean = false; CsvSep: AnsiChar = ','): boolean; overload;
|
|
|
|
/// return TRUE if the supplied content matches one or several glob patterns
|
|
// - the patterns are specified as CSV, separated by ','
|
|
function IsMatchs(CsvPattern, Text: PUtf8Char; TextLen: PtrInt;
|
|
CaseInsensitive: boolean = false; CsvSep: AnsiChar = ','): boolean; overload;
|
|
|
|
type
|
|
/// available pronunciations for our fast Soundex implementation
|
|
TSynSoundExPronunciation = (
|
|
sndxEnglish,
|
|
sndxFrench,
|
|
sndxSpanish,
|
|
sndxNone);
|
|
|
|
TSoundExValues = array[0..ord('Z') - ord('B')] of byte;
|
|
|
|
PSoundExValues = ^TSoundExValues;
|
|
PSynSoundEx = ^TSynSoundEx;
|
|
|
|
/// fast search of a text value, using the Soundex approximation mechanism
|
|
// - Soundex is a phonetic algorithm for indexing names by sound,
|
|
// as pronounced in a given language. The goal is for homophones to be
|
|
// encoded to the same representation so that they can be matched despite
|
|
// minor differences in spelling
|
|
// - this implementation is very fast and can be used e.g. to parse and search
|
|
// in a huge text buffer
|
|
// - this version also handles french and spanish pronunciations on request,
|
|
// which differs from default Soundex, i.e. English
|
|
{$ifdef USERECORDWITHMETHODS}
|
|
TSynSoundEx = record
|
|
{$else}
|
|
TSynSoundEx = object
|
|
{$endif USERECORDWITHMETHODS}
|
|
private
|
|
Search, FirstChar: cardinal;
|
|
fValues: PSoundExValues;
|
|
public
|
|
/// prepare for a Soundex search
|
|
// - you can specify another language pronunciation than default english
|
|
function Prepare(UpperValue: PAnsiChar;
|
|
Lang: TSynSoundExPronunciation = sndxEnglish): boolean; overload;
|
|
/// prepare for a custom Soundex search
|
|
// - you can specify any language pronunciation from raw TSoundExValues array
|
|
function Prepare(UpperValue: PAnsiChar; Lang: PSoundExValues): boolean; overload;
|
|
/// return true if prepared value is contained in a text buffer
|
|
// (UTF-8 encoded), by using the SoundEx comparison algorithm
|
|
// - search prepared value at every word beginning in U^
|
|
function Utf8(U: PUtf8Char): boolean;
|
|
/// return true if prepared value is contained in a ANSI text buffer
|
|
// by using the SoundEx comparison algorithm
|
|
// - search prepared value at every word beginning in A^
|
|
function Ansi(A: PAnsiChar): boolean;
|
|
end;
|
|
|
|
|
|
/// Retrieve the Soundex value of a text word, from Ansi buffer
|
|
// - Return the soundex value as an easy to use cardinal value, 0 if the
|
|
// incoming string contains no valid word
|
|
// - if next is defined, its value is set to the end of the encoded word
|
|
// (so that you can call again this function to encode a full sentence)
|
|
function SoundExAnsi(A: PAnsiChar; next: PPAnsiChar = nil;
|
|
Lang: TSynSoundExPronunciation = sndxEnglish): cardinal; overload;
|
|
|
|
/// Retrieve the Soundex value of a text word, from Ansi buffer
|
|
// - Return the soundex value as an easy to use cardinal value, 0 if the
|
|
// incoming string contains no valid word
|
|
// - if next is defined, its value is set to the end of the encoded word
|
|
// (so that you can call again this function to encode a full sentence)
|
|
function SoundExAnsi(A: PAnsiChar; next: PPAnsiChar;
|
|
Lang: PSoundExValues): cardinal; overload;
|
|
|
|
/// Retrieve the Soundex value of a text word, from UTF-8 buffer
|
|
// - Return the soundex value as an easy to use cardinal value, 0 if the
|
|
// incoming string contains no valid word
|
|
// - if next is defined, its value is set to the end of the encoded word
|
|
// (so that you can call again this function to encode a full sentence)
|
|
// - very fast: all UTF-8 decoding is handled on the fly
|
|
function SoundExUtf8(U: PUtf8Char; next: PPUtf8Char = nil;
|
|
Lang: TSynSoundExPronunciation = sndxEnglish): cardinal;
|
|
|
|
const
|
|
/// number of bits to use for each interesting soundex char
|
|
// - default is to use 8-bit, i.e. 4 soundex chars, which is the
|
|
// standard approach
|
|
// - for a more detailed soundex, use 4 bits resolution, which will
|
|
// compute up to 7 soundex chars in a cardinal (that's our choice)
|
|
SOUNDEX_BITS = 4;
|
|
|
|
|
|
{ ****************** Efficient CSV Parsing using RTTI }
|
|
|
|
/// parse a CSV buffer into a TDynArray of records using its RTTI fields
|
|
// - TypeInfo should have proper fields description, e.g. from Delphi 2010
|
|
// extended RTTI or mormot.core.rtti.pas' Rtti.RegisterFromText()
|
|
// - first CSV line has headers matching the needed case-insensitive field names
|
|
// - following CSV lines will be read and parsed into the dynamic array records
|
|
// - any unknown header name within the RTTI fields will be ignored
|
|
// - you can optionally intern all RawUtf8 values to reduce memory consumption
|
|
function TDynArrayLoadCsv(var Value: TDynArray; Csv: PUtf8Char;
|
|
Intern: TRawUtf8Interning = nil): boolean;
|
|
|
|
/// parse a CSV UTF-8 string into a dynamic array of records using its RTTI fields
|
|
// - just a wrapper around DynArrayLoadCsv() with a temporary TDynArray
|
|
function DynArrayLoadCsv(var Value; const Csv: RawUtf8; TypeInfo: PRttiInfo;
|
|
Intern: TRawUtf8Interning = nil): boolean;
|
|
|
|
|
|
{ ****************** Versatile Expression Search Engine }
|
|
|
|
type
|
|
/// exception type used by TExprParser
|
|
EExprParser = class(ESynException);
|
|
|
|
/// identify an expression search engine node type, as used by TExprParser
|
|
TExprNodeType = (
|
|
entWord,
|
|
entNot,
|
|
entOr,
|
|
entAnd);
|
|
|
|
/// results returned by TExprParserAbstract.Parse method
|
|
TExprParserResult = (
|
|
eprSuccess,
|
|
eprNoExpression,
|
|
eprMissingParenthesis,
|
|
eprTooManyParenthesis,
|
|
eprMissingFinalWord,
|
|
eprInvalidExpression,
|
|
eprUnknownVariable,
|
|
eprUnsupportedOperator,
|
|
eprInvalidConstantOrVariable);
|
|
|
|
TParserAbstract = class;
|
|
|
|
/// stores an expression search engine node, as used by TExprParser
|
|
TExprNode = class(TSynPersistent)
|
|
protected
|
|
fNext: TExprNode;
|
|
fNodeType: TExprNodeType;
|
|
function Append(node: TExprNode): boolean;
|
|
public
|
|
/// initialize a node for the search engine
|
|
constructor Create(nodeType: TExprNodeType); reintroduce;
|
|
/// recursively destroys the linked list of nodes (i.e. Next)
|
|
destructor Destroy; override;
|
|
/// browse all nodes until Next = nil
|
|
function Last: TExprNode;
|
|
/// points to the next node in the parsed tree
|
|
property Next: TExprNode
|
|
read fNext;
|
|
/// what is actually stored in this node
|
|
property NodeType: TExprNodeType
|
|
read fNodeType;
|
|
end;
|
|
|
|
/// abstract class to handle word search, as used by TExprParser
|
|
TExprNodeWordAbstract = class(TExprNode)
|
|
protected
|
|
fOwner: TParserAbstract;
|
|
fWord: RawUtf8;
|
|
/// should be set from actual data before TExprParser.Found is called
|
|
fFound: boolean;
|
|
function ParseWord: TExprParserResult; virtual; abstract;
|
|
public
|
|
/// you should override this virtual constructor for proper initialization
|
|
constructor Create(aOwner: TParserAbstract; const aWord: RawUtf8); reintroduce; virtual;
|
|
end;
|
|
|
|
/// class-reference type (metaclass) for a TExprNode
|
|
// - allow to customize the actual searching process for entWord
|
|
TExprNodeWordClass = class of TExprNodeWordAbstract;
|
|
|
|
/// parent class of TExprParserAbstract
|
|
TParserAbstract = class(TSynPersistent)
|
|
protected
|
|
fExpression, fCurrentWord, fAndWord, fOrWord, fNotWord: RawUtf8;
|
|
fCurrent: PUtf8Char;
|
|
fCurrentError: TExprParserResult;
|
|
fFirstNode: TExprNode;
|
|
fWordClass: TExprNodeWordClass;
|
|
fWords: array of TExprNodeWordAbstract;
|
|
fWordCount: integer;
|
|
fNoWordIsAnd: boolean;
|
|
fFoundStack: array[byte] of boolean; // simple stack-based virtual machine
|
|
procedure ParseNextCurrentWord; virtual; abstract;
|
|
function ParseExpr: TExprNode;
|
|
function ParseFactor: TExprNode;
|
|
function ParseTerm: TExprNode;
|
|
procedure Clear; virtual;
|
|
// override this method to initialize fWordClass and fAnd/Or/NotWord
|
|
procedure Initialize; virtual; abstract;
|
|
/// perform the expression search over TExprNodeWord.fFound flags
|
|
// - warning: caller should check that fFirstNode<>nil (e.g. WordCount>0)
|
|
function Execute: boolean;
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
public
|
|
/// initialize an expression parser
|
|
constructor Create; override;
|
|
/// finalize the expression parser
|
|
destructor Destroy; override;
|
|
/// initialize the parser from a given text expression
|
|
function Parse(const aExpression: RawUtf8): TExprParserResult;
|
|
/// try this parser class on a given text expression
|
|
// - returns '' on success, or an explicit error message (e.g.
|
|
// 'Missing parenthesis')
|
|
class function ParseError(const aExpression: RawUtf8): RawUtf8;
|
|
/// the associated text expression used to define the search
|
|
property Expression: RawUtf8
|
|
read fExpression;
|
|
/// how many words did appear in the search expression
|
|
property WordCount: integer
|
|
read fWordCount;
|
|
end;
|
|
|
|
/// abstract class to parse a text expression into nodes
|
|
// - you should inherit this class to provide actual text search
|
|
// - searched expressions can use parenthesis and &=AND -=WITHOUT +=OR operators,
|
|
// e.g. '((w1 & w2) - w3) + w4' means ((w1 and w2) without w3) or w4
|
|
// - no operator is handled like a AND, e.g. 'w1 w2' = 'w1 & w2'
|
|
TExprParserAbstract = class(TParserAbstract)
|
|
protected
|
|
procedure ParseNextCurrentWord; override;
|
|
// may be overridden to provide custom words escaping (e.g. handle quotes)
|
|
procedure ParseNextWord; virtual;
|
|
procedure Initialize; override;
|
|
end;
|
|
|
|
/// search expression engine using TMatch for the actual word searches
|
|
TExprParserMatch = class(TExprParserAbstract)
|
|
protected
|
|
fCaseSensitive: boolean;
|
|
fMatchedLastSet: integer;
|
|
procedure Initialize; override;
|
|
public
|
|
/// initialize the search engine
|
|
constructor Create(aCaseSensitive: boolean = true); reintroduce;
|
|
/// returns TRUE if the expression is within the text buffer
|
|
function Search(aText: PUtf8Char; aTextLen: PtrInt): boolean; overload;
|
|
/// returns TRUE if the expression is within the text buffer
|
|
function Search(const aText: RawUtf8): boolean; overload;
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
end;
|
|
|
|
|
|
const
|
|
/// may be used when overriding TExprParserAbstract.ParseNextWord method
|
|
PARSER_STOPCHAR = ['&', '+', '-', '(', ')'];
|
|
|
|
function ToText(r: TExprParserResult): PShortString; overload;
|
|
function ToUtf8(r: TExprParserResult): RawUtf8; overload;
|
|
|
|
|
|
|
|
{ ****************** Bloom Filter Probabilistic Index }
|
|
|
|
type
|
|
/// implements a thread-safe Bloom Filter storage
|
|
// - a "Bloom Filter" is a space-efficient probabilistic data structure,
|
|
// that is used to test whether an element is a member of a set. False positive
|
|
// matches are possible, but false negatives are not. Elements can be added to
|
|
// the set, but not removed. Typical use cases are to avoid unnecessary
|
|
// slow disk or network access if possible, when a lot of items are involved.
|
|
// - memory use is very low, when compared to storage of all values: fewer
|
|
// than 10 bits per element are required for a 1% false positive probability,
|
|
// independent of the size or number of elements in the set - for instance,
|
|
// storing 10,000,000 items presence with 1% of false positive ratio
|
|
// would consume only 11.5 MB of memory, using 7 hash functions
|
|
// - use Insert() methods to add an item to the internal bits array, and
|
|
// Reset() to clear all bits array, if needed
|
|
// - MayExist() function would check if the supplied item was probably set
|
|
// - SaveTo() and LoadFrom() methods allow transmission of the bits array,
|
|
// for a disk/database storage or transmission over a network
|
|
// - internally, several (hardware-accelerated) crc32c hash functions will be
|
|
// used, with some random seed values, to simulate several hashing functions;
|
|
// you can customize the hash function if needed
|
|
// - all methods are thread-safe, and MayExist can be concurrent (via a TRWLock)
|
|
TSynBloomFilter = class(TSynPersistent)
|
|
private
|
|
fSafe: TRWLock; // need an upgradable lock for TSynBloomFilterDiff
|
|
fHasher: THasher;
|
|
fSize: cardinal;
|
|
fBits: cardinal;
|
|
fHashFunctions: cardinal;
|
|
fInserted: cardinal;
|
|
fFalsePositivePercent: double;
|
|
fStore: RawByteString;
|
|
public
|
|
/// don't call this raw constructor, but its overloads
|
|
constructor Create; overload; override;
|
|
/// initialize the internal bits storage for a given number of items
|
|
// - by default, internal bits array size will be guess from a 1 % false
|
|
// positive rate - but you may specify another value, to reduce memory use
|
|
// - this constructor would compute and initialize Bits and HashFunctions
|
|
// corresponding to the expected false positive ratio
|
|
// - you can specify a custom hash function if you find that the default
|
|
// crc32c() has too many collisions: but SaveTo/LoadFrom will be tied to it;
|
|
// see e.g. CryptCrc32(caMd5/caSha1) from mormot.crypt.secure
|
|
constructor Create(aSize: integer; aFalsePositivePercent: double = 1;
|
|
aHasher: THasher = nil); reintroduce; overload;
|
|
/// initialize the internal bits storage from a SaveTo() binary buffer
|
|
// - this constructor will initialize the internal bits array calling LoadFrom()
|
|
// - you can specify a custom hash function to match with the one used before
|
|
constructor Create(const aSaved: RawByteString; aMagic: cardinal = $B1003F11;
|
|
aHasher: THasher = nil); reintroduce; overload;
|
|
/// add an item in the internal bits array storage
|
|
// - this method is thread-safe
|
|
procedure Insert(const aValue: RawByteString); overload;
|
|
/// add an item in the internal bits array storage
|
|
// - this method is thread-safe
|
|
procedure Insert(aValue: pointer; aValueLen: integer); overload; virtual;
|
|
/// clear the internal bits array storage
|
|
// - you may call this method after some time, if some items may have
|
|
// been removed, to reduce false positives
|
|
// - this method is thread-safe
|
|
procedure Reset; virtual;
|
|
/// returns TRUE if the supplied items was probably set via Insert()
|
|
// - some false positive may occur, but not much than FalsePositivePercent
|
|
// - this method is thread-safe, and allow concurrent calls (via a TRWLock)
|
|
function MayExist(const aValue: RawByteString): boolean; overload;
|
|
{$ifdef HASINLINE} inline; {$endif}
|
|
/// returns TRUE if the supplied items was probably set via Insert()
|
|
// - some false positive may occur, but not much than FalsePositivePercent
|
|
// - this method is thread-safe, and allow concurrent calls (via a TRWLock)
|
|
function MayExist(aValue: pointer; aValueLen: integer): boolean; overload;
|
|
/// store the internal bits array into a binary buffer
|
|
// - may be used to transmit or store the state of a dataset, avoiding
|
|
// to recompute all Insert() at program startup, or to synchronize
|
|
// networks nodes information and reduce the number of remote requests
|
|
// - this method is thread-safe, and won't block MayExist (via a TRWLock)
|
|
function SaveTo(aMagic: cardinal = $B1003F11): RawByteString; overload;
|
|
/// store the internal bits array into a binary buffer
|
|
// - may be used to transmit or store the state of a dataset, avoiding
|
|
// to recompute all Insert() at program startup, or to synchronize
|
|
// networks nodes information and reduce the number of remote requests
|
|
// - this method is thread-safe, and won't block MayExist (via a TRWLock)
|
|
procedure SaveTo(aDest: TBufferWriter;
|
|
aMagic: cardinal = $B1003F11); overload;
|
|
/// read the internal bits array from a binary buffer
|
|
// - as previously serialized by the SaveTo method
|
|
// - may be used to transmit or store the state of a dataset
|
|
function LoadFrom(const aSaved: RawByteString;
|
|
aMagic: cardinal = $B1003F11): boolean; overload;
|
|
/// read the internal bits array from a binary buffer
|
|
// - as previously serialized by the SaveTo method
|
|
// - may be used to transmit or store the state of a dataset
|
|
function LoadFrom(P: PByte; PLen: integer;
|
|
aMagic: cardinal = $B1003F11): boolean; overload; virtual;
|
|
published
|
|
/// maximum number of items which are expected to be inserted
|
|
property Size: cardinal
|
|
read fSize;
|
|
/// expected percentage (1..100) of false positive results for MayExists()
|
|
property FalsePositivePercent: double
|
|
read fFalsePositivePercent;
|
|
/// number of bits stored in the internal bits array
|
|
property Bits: cardinal
|
|
read fBits;
|
|
/// how many hash functions would be applied for each Insert()
|
|
property HashFunctions: cardinal
|
|
read fHashFunctions;
|
|
/// how many times the Insert() method has been called
|
|
property Inserted: cardinal
|
|
read fInserted;
|
|
end;
|
|
|
|
/// implements a thread-safe differential Bloom Filter storage
|
|
// - this inherited class is able to compute incremental serialization of
|
|
// its internal bits array, to reduce network use
|
|
// - an obfuscated revision counter is used to identify storage history
|
|
TSynBloomFilterDiff = class(TSynBloomFilter)
|
|
protected
|
|
fRevision: Int64;
|
|
fSnapShotAfterMinutes: cardinal;
|
|
fSnapshotAfterInsertCount: cardinal;
|
|
fSnapshotTimestamp: Int64;
|
|
fSnapshotInsertCount: cardinal;
|
|
fKnownRevision: Int64;
|
|
fKnownStore: RawByteString;
|
|
public
|
|
/// add an item in the internal bits array storage
|
|
// - this overloaded thread-safe method would compute fRevision
|
|
procedure Insert(aValue: pointer; aValueLen: integer); override;
|
|
/// clear the internal bits array storage
|
|
// - this overloaded thread-safe method would reset fRevision
|
|
procedure Reset; override;
|
|
/// store the internal bits array into an incremental binary buffer
|
|
// - here the difference from a previous SaveToDiff revision will be computed
|
|
// - if aKnownRevision is outdated (e.g. if equals 0), the whole bits array
|
|
// would be returned, and around 10 bits per item would be transmitted
|
|
// (for 1% false positive ratio)
|
|
// - incremental retrieval would then return around 10 bytes per newly added
|
|
// item since the last snapshot reference state (with 1% ratio, i.e. 7 hash
|
|
// functions)
|
|
function SaveToDiff(const aKnownRevision: Int64): RawByteString;
|
|
/// use the current internal bits array state as known revision
|
|
// - is done the first time SaveToDiff() is called, then after 1/32th of
|
|
// the filter size has been inserted (see SnapshotAfterInsertCount property),
|
|
// or after SnapShotAfterMinutes property timeout period
|
|
procedure DiffSnapshot;
|
|
/// retrieve the revision number from an incremental binary buffer
|
|
// - returns 0 if the supplied binary buffer does not match this bloom filter
|
|
function DiffKnownRevision(const aDiff: RawByteString): Int64;
|
|
/// read the internal bits array from an incremental binary buffer
|
|
// - as previously serialized by the SaveToDiff() method
|
|
// - may be used to transmit or store the state of a dataset
|
|
// - returns false if the supplied content is incorrect, e.g. if the known
|
|
// revision is deprecated
|
|
function LoadFromDiff(const aDiff: RawByteString): boolean;
|
|
/// the opaque revision number of this internal storage
|
|
// - is in fact the Unix timestamp shifted by 31 bits, and an incremental
|
|
// counter: this pattern will allow consistent IDs over several ServPanels
|
|
property Revision: Int64
|
|
read fRevision;
|
|
/// after how many Insert() the internal bits array storage should be
|
|
// promoted as known revision
|
|
// - equals Size div 32 by default
|
|
property SnapshotAfterInsertCount: cardinal
|
|
read fSnapshotAfterInsertCount write fSnapshotAfterInsertCount;
|
|
/// after how many time the internal bits array storage should be
|
|
// promoted as known revision
|
|
// - equals 30 minutes by default
|
|
property SnapShotAfterMinutes: cardinal
|
|
read fSnapShotAfterMinutes write fSnapShotAfterMinutes;
|
|
end;
|
|
|
|
|
|
/// RLE compression of a memory buffer containing mostly zeros
|
|
// - will store the number of consecutive zeros instead of plain zero bytes
|
|
// - used for spare bit sets, e.g. TSynBloomFilter serialization
|
|
// - will also compute the crc32c of the supplied content
|
|
// - use ZeroDecompress() to expand the compressed result
|
|
// - resulting content would be at most 14 bytes bigger than the input
|
|
// - you may use this function before SynLZ compression
|
|
procedure ZeroCompress(P: PAnsiChar; Len: integer; Dest: TBufferWriter);
|
|
|
|
/// RLE uncompression of a memory buffer containing mostly zeros
|
|
// - returns Dest='' if P^ is not a valid ZeroCompress() function result
|
|
// - used for spare bit sets, e.g. TSynBloomFilter serialization
|
|
// - will also check the crc32c of the supplied content
|
|
procedure ZeroDecompress(P: PByte; Len: integer; var Dest: RawByteString);
|
|
|
|
/// RLE compression of XORed memory buffers resulting in mostly zeros
|
|
// - will perform ZeroCompress(Dest^ := New^ xor Old^) without any temporary
|
|
// memory allocation
|
|
// - is used e.g. by TSynBloomFilterDiff.SaveToDiff() in incremental mode
|
|
// - will also compute the crc32c of the supplied content
|
|
procedure ZeroCompressXor(New, Old: PAnsiChar; Len: cardinal;
|
|
Dest: TBufferWriter);
|
|
|
|
/// RLE uncompression and ORing of a memory buffer containing mostly zeros
|
|
// - will perform Dest^ := Dest^ or ZeroDecompress(P^) without any temporary
|
|
// memory allocation
|
|
// - is used e.g. by TSynBloomFilterDiff.LoadFromDiff() in incremental mode
|
|
// - returns false if P^ is not a valid ZeroCompress/ZeroCompressXor() result
|
|
// - will also check the crc32c of the supplied content
|
|
function ZeroDecompressOr(P, Dest: PAnsiChar; Len, DestLen: integer): boolean;
|
|
|
|
|
|
{ ****************** Binary Buffers Delta Compression }
|
|
|
|
const
|
|
/// normal pattern search depth for DeltaCompress()
|
|
// - gives good results on most content
|
|
DELTA_LEVEL_FAST = 100;
|
|
/// brutal pattern search depth for DeltaCompress()
|
|
// - may become very slow, with minor benefit, on huge content
|
|
DELTA_LEVEL_BEST = 500;
|
|
/// 2MB as internal chunks/window default size for DeltaCompress()
|
|
// - will use up to 9 MB of RAM during DeltaCompress() - none in DeltaExtract()
|
|
DELTA_BUF_DEFAULT = 2 shl 20;
|
|
|
|
/// compute difference of two binary buffers
|
|
// - returns '=' for equal buffers, or an optimized binary delta
|
|
// - DeltaExtract() could be used later on to compute New from Old + Delta
|
|
function DeltaCompress(const New, Old: RawByteString;
|
|
Level: integer = DELTA_LEVEL_FAST;
|
|
BufSize: integer = DELTA_BUF_DEFAULT): RawByteString; overload;
|
|
|
|
/// compute difference of two binary buffers
|
|
// - returns '=' for equal buffers, or an optimized binary delta
|
|
// - DeltaExtract() could be used later on to compute New from Old
|
|
function DeltaCompress(New, Old: PAnsiChar; NewSize, OldSize: integer;
|
|
Level: integer = DELTA_LEVEL_FAST;
|
|
BufSize: integer = DELTA_BUF_DEFAULT): RawByteString; overload;
|
|
|
|
/// compute difference of two binary buffers
|
|
// - returns '=' for equal buffers, or an optimized binary delta
|
|
// - DeltaExtract() could be used later on to compute New from Old + Delta
|
|
// - caller should call Freemem(Delta) once finished with the output buffer
|
|
function DeltaCompress(New, Old: PAnsiChar; NewSize, OldSize: integer;
|
|
out Delta: PAnsiChar; Level: integer = DELTA_LEVEL_FAST;
|
|
BufSize: integer = DELTA_BUF_DEFAULT): integer; overload;
|
|
|
|
type
|
|
/// result of function DeltaExtract()
|
|
TDeltaError = (
|
|
dsSuccess,
|
|
dsCrcCopy,
|
|
dsCrcComp,
|
|
dsCrcBegin,
|
|
dsCrcEnd,
|
|
dsCrcExtract,
|
|
dsFlag,
|
|
dsLen);
|
|
|
|
/// returns how many bytes a DeltaCompress() result will expand to
|
|
function DeltaExtractSize(const Delta: RawByteString): integer; overload;
|
|
|
|
/// returns how many bytes a DeltaCompress() result will expand to
|
|
function DeltaExtractSize(Delta: PAnsiChar): integer; overload;
|
|
|
|
/// apply the delta binary as computed by DeltaCompress()
|
|
// - decompression don't use any RAM, will perform crc32c check, and is very fast
|
|
// - return dsSuccess if was uncompressed to aOutUpd as expected
|
|
function DeltaExtract(const Delta, Old: RawByteString;
|
|
out New: RawByteString): TDeltaError; overload;
|
|
|
|
/// low-level apply the delta binary as computed by DeltaCompress()
|
|
// - New should already be allocated with DeltaExtractSize(Delta) bytes
|
|
// - as such, expect Delta, Old and New to be <> nil, and Delta <> '='
|
|
// - return dsSuccess if was uncompressed to aOutUpd as expected
|
|
function DeltaExtract(Delta, Old, New: PAnsiChar): TDeltaError; overload;
|
|
|
|
function ToText(err: TDeltaError): PShortString; overload;
|
|
|
|
|
|
{ ****************** TDynArray Low-Level Binary Search and Iteration }
|
|
|
|
/// wrap a simple dynamic array BLOB content as stored by TDynArray.SaveTo
|
|
// - a "simple" dynamic array contains data with no reference count, e.g. byte,
|
|
// word, integer, cardinal, Int64, double or Currency
|
|
// - same as TDynArray.LoadFrom() with no memory allocation nor memory copy: so
|
|
// is much faster than creating a temporary dynamic array to load the data
|
|
// - will return nil if no or invalid data, or a pointer to the data
|
|
// array otherwise, with the items number stored in Count and the individual
|
|
// element size in ElemSize (e.g. 2 for a TWordDynArray)
|
|
// - note: mORMot 1.18 Hash32 is not stored any more
|
|
function SimpleDynArrayLoadFrom(Source: PAnsiChar; aTypeInfo: PRttiInfo;
|
|
out Count, ElemSize: PtrInt): pointer;
|
|
|
|
/// wrap an integer dynamic array BLOB content as stored by TDynArray.SaveTo
|
|
// - same as TDynArray.LoadFrom() with no memory allocation nor memory copy: so
|
|
// is much faster than creating a temporary dynamic array to load the data
|
|
// - will return nil if no or invalid data, or a pointer to the integer
|
|
// array otherwise, with the items number stored in Count
|
|
// - slightly faster than SimpleDynArrayLoadFrom(Source,TypeInfo(TIntegerDynArray),Count)
|
|
function IntegerDynArrayLoadFrom(Source: PAnsiChar; var Count: integer): PIntegerArray;
|
|
|
|
/// search in a RawUtf8 dynamic array BLOB content as stored by TDynArray.SaveTo
|
|
// - same as search within TDynArray.LoadFrom() with no memory allocation nor
|
|
// memory copy: so is much faster
|
|
// - will return -1 if no match or invalid data, or the matched entry index
|
|
function RawUtf8DynArrayLoadFromContains(Source: PAnsiChar;
|
|
Value: PUtf8Char; ValueLen: PtrInt; CaseSensitive: boolean): PtrInt;
|
|
|
|
type
|
|
/// allows to iterate over a TDynArray.SaveTo binary buffer
|
|
// - may be used as alternative to TDynArray.LoadFrom, if you don't want
|
|
// to allocate all items at once, but retrieve items one by one
|
|
{$ifdef USERECORDWITHMETHODS}
|
|
TDynArrayLoadFrom = record
|
|
{$else}
|
|
TDynArrayLoadFrom = object
|
|
{$endif USERECORDWITHMETHODS}
|
|
private
|
|
ArrayLoad: TRttiBinaryLoad;
|
|
public
|
|
/// how many items were saved in the TDynArray.SaveTo binary buffer
|
|
// - equals -1 if Init() failed to deserialize its header
|
|
Count: integer;
|
|
/// the zero-based index of the current item pointed by next Step() call
|
|
// - is in range 0..Count-1 until Step() returns false
|
|
Current: integer;
|
|
/// current read position in the TDynArray.SaveTo binary buffer
|
|
// - after Step() returned false, points just after the binary buffer,
|
|
// like a regular TDynArray.LoadFrom
|
|
Reader: TFastReader;
|
|
/// RTTI information of the deserialized dynamic array
|
|
ArrayRtti: TRttiCustom;
|
|
/// initialize iteration over a TDynArray.SaveTo binary buffer
|
|
// - returns true on success, with Count and Position being set
|
|
// - returns false if the supplied binary buffer is not correct
|
|
// - you should specify SourceMaxLen to avoid any buffer overflow
|
|
function Init(ArrayTypeInfo: PRttiInfo; Source: PAnsiChar;
|
|
SourceMaxLen: PtrInt): boolean; overload;
|
|
/// initialize iteration over a TDynArray.SaveTo binary buffer
|
|
// - returns true on success, with Count and Position being set
|
|
// - returns false if the supplied binary buffer is not correct
|
|
function Init(ArrayTypeInfo: PRttiInfo;
|
|
const Source: RawByteString): boolean; overload;
|
|
/// iterate over the current stored item
|
|
// - Item should point to a variable of the exact item type stored in this
|
|
// dynamic array
|
|
// - returns true if Item was filled with one value, or false if all
|
|
// items were read, and Position contains the end of the binary buffer
|
|
function Step(Item: pointer): boolean;
|
|
/// extract the first field value of the current stored item
|
|
// - this function won't increase the internal Current pointer
|
|
// - returns true if Field was filled with one value, or false if all
|
|
// items were read, and Position contains the end of the binary buffer
|
|
// - Field is expected to be of ArrayRtti.ArrayFirstField type
|
|
// - could be called before Step(), to pre-allocate a new item instance,
|
|
// or update an existing instance
|
|
function FirstField(Field: pointer): boolean;
|
|
end;
|
|
|
|
|
|
{ ****************** TSynFilter and TSynValidate Processing Classes }
|
|
|
|
type
|
|
TSynFilterOrValidate = class;
|
|
|
|
TSynFilterOrValidateObjArray = array of TSynFilterOrValidate;
|
|
TSynFilterOrValidateObjArrayArray = array of TSynFilterOrValidateObjArray;
|
|
|
|
/// will define a filter (transformation) or a validation process to be
|
|
// applied to a database Record content (typically a TOrm)
|
|
// - the optional associated parameters are to be supplied JSON-encoded
|
|
TSynFilterOrValidate = class
|
|
protected
|
|
fParameters: RawUtf8;
|
|
/// children must override this method in order to parse the JSON-encoded
|
|
// parameters, and store it in protected field values
|
|
procedure SetParameters(const Value: RawUtf8); virtual;
|
|
public
|
|
/// add the filter or validation process to a list, checking if not present
|
|
// - if an instance with the same class type and parameters is already
|
|
// registered, will call aInstance.Free and return the exising instance
|
|
// - if there is no similar instance, will add it to the list and return it
|
|
function AddOnce(var aObjArray: TSynFilterOrValidateObjArray;
|
|
aFreeIfAlreadyThere: boolean = true): TSynFilterOrValidate;
|
|
public
|
|
/// initialize the filter (transformation) or validation instance
|
|
// - most of the time, optional parameters may be specified as JSON,
|
|
// possibly with the extended MongoDB syntax
|
|
constructor Create(const aParameters: RawUtf8 = ''); overload; virtual;
|
|
/// initialize the filter or validation instance
|
|
/// - this overloaded constructor will allow to easily set the parameters
|
|
constructor CreateUtf8(const Format: RawUtf8; const Args, Params: array of const); overload;
|
|
/// the optional associated parameters, supplied as JSON-encoded
|
|
property Parameters: RawUtf8
|
|
read fParameters write SetParameters;
|
|
end;
|
|
|
|
/// will define a validation to be applied to a Record (typically a TOrm)
|
|
// field content
|
|
// - a typical usage is to validate an email or IP address e.g.
|
|
// - the optional associated parameters are to be supplied JSON-encoded
|
|
TSynValidate = class(TSynFilterOrValidate)
|
|
public
|
|
/// perform the validation action to the specified value
|
|
// - the value is expected by be UTF-8 text, as generated by
|
|
// TPropInfo.GetValue e.g.
|
|
// - if the validation failed, must return FALSE and put some message in
|
|
// ErrorMsg (translated into the current language: you could e.g. use
|
|
// a resourcestring and a SysUtils.Format() call for automatic translation
|
|
// via the mORMoti18n unit - you can leave ErrorMsg='' to trigger a
|
|
// generic error message from clas name ('"Validate email" rule failed'
|
|
// for TSynValidateEmail class e.g.)
|
|
// - if the validation passed, will return TRUE
|
|
function Process(aFieldIndex: integer; const Value: RawUtf8; var ErrorMsg: string): boolean;
|
|
virtual; abstract;
|
|
end;
|
|
|
|
/// points to a TSynValidate variable
|
|
// - used e.g. as optional parameter to TOrm.Validate/FilterAndValidate
|
|
PSynValidate = ^TSynValidate;
|
|
|
|
/// IP v4 address validation to be applied to a Record field content
|
|
// (typically a TOrm)
|
|
// - this versions expect no parameter
|
|
TSynValidateIPAddress = class(TSynValidate)
|
|
protected
|
|
public
|
|
/// perform the IP Address validation action to the specified value
|
|
function Process(aFieldIndex: integer; const Value: RawUtf8;
|
|
var ErrorMsg: string): boolean; override;
|
|
end;
|
|
|
|
/// IP address validation to be applied to a Record field content
|
|
// (typically a TOrm)
|
|
// - optional JSON encoded parameters are "AllowedTLD" or "ForbiddenTLD",
|
|
// expecting a CSV lis of Top-Level-Domain (TLD) names, e.g.
|
|
// $ '{"AllowedTLD":"com,org,net","ForbiddenTLD":"fr"}'
|
|
// $ '{AnyTLD:true,ForbiddenDomains:"mailinator.com,yopmail.com"}'
|
|
// - this will process a validation according to RFC 822 (calling the
|
|
// IsValidEmail() function) then will check for the TLD to be in one of
|
|
// the Top-Level domains ('.com' and such) or a two-char country, and
|
|
// then will check the TLD according to AllowedTLD and ForbiddenTLD
|
|
TSynValidateEmail = class(TSynValidate)
|
|
private
|
|
fAllowedTLD: RawUtf8;
|
|
fForbiddenTLD: RawUtf8;
|
|
fForbiddenDomains: RawUtf8;
|
|
fAnyTLD: boolean;
|
|
protected
|
|
/// decode all published properties from their JSON representation
|
|
procedure SetParameters(const Value: RawUtf8); override;
|
|
public
|
|
/// perform the Email Address validation action to the specified value
|
|
// - call IsValidEmail() function and check for the supplied TLD
|
|
function Process(aFieldIndex: integer; const Value: RawUtf8; var ErrorMsg: string): boolean; override;
|
|
/// allow any TLD to be allowed, even if not a generic TLD (.com,.net ...)
|
|
// - this may be mandatory since already over 1,300 new gTLD names or
|
|
// "strings" could become available in the next few years: there is a
|
|
// growing list of new gTLDs available at
|
|
// @http://newgtlds.icann.org/en/program-status/delegated-strings
|
|
// - the only restriction is that it should be ascii characters
|
|
property AnyTLD: boolean
|
|
read fAnyTLD write fAnyTLD;
|
|
/// a CSV list of allowed TLD
|
|
// - if accessed directly, should be set as lower case values
|
|
// - e.g. 'com,org,net'
|
|
property AllowedTLD: RawUtf8
|
|
read fAllowedTLD write fAllowedTLD;
|
|
/// a CSV list of forbidden TLD
|
|
// - if accessed directly, should be set as lower case values
|
|
// - e.g. 'fr'
|
|
property ForbiddenTLD: RawUtf8
|
|
read fForbiddenTLD write fForbiddenTLD;
|
|
/// a CSV list of forbidden domain names
|
|
// - if accessed directly, should be set as lower case values
|
|
// - not only the TLD, but whole domains like 'cracks.ru,hotmail.com' or such
|
|
property ForbiddenDomains: RawUtf8
|
|
read fForbiddenDomains write fForbiddenDomains;
|
|
end;
|
|
|
|
/// glob case-sensitive pattern validation of a Record field content
|
|
// - parameter is NOT JSON encoded, but is some basic TMatch glob pattern
|
|
// - ? Matches any single character
|
|
// - * Matches any contiguous characters
|
|
// - [abc] Matches a or b or c at that position
|
|
// - [^abc] Matches anything but a or b or c at that position
|
|
// - [!abc] Matches anything but a or b or c at that position
|
|
// - [a-e] Matches a through e at that position
|
|
// - [abcx-z] Matches a or b or c or x or y or or z, as does [a-cx-z]
|
|
// - 'ma?ch.*' would match match.exe, mavch.dat, march.on, etc..
|
|
// - 'this [e-n]s a [!zy]est' would match 'this is a test', but would not
|
|
// match 'this as a test' nor 'this is a zest'
|
|
// - pattern check IS case sensitive (TSynValidatePatternI is not)
|
|
// - this class is not as complete as PCRE regex for example,
|
|
// but code overhead is very small, and speed good enough in practice
|
|
TSynValidatePattern = class(TSynValidate)
|
|
protected
|
|
fMatch: TMatch;
|
|
procedure SetParameters(const Value: RawUtf8); override;
|
|
public
|
|
/// perform the pattern validation to the specified value
|
|
// - pattern can be e.g. '[0-9][0-9]:[0-9][0-9]:[0-9][0-9]'
|
|
// - this method will implement both TSynValidatePattern and
|
|
// TSynValidatePatternI, checking the current class
|
|
function Process(aFieldIndex: integer; const Value: RawUtf8;
|
|
var ErrorMsg: string): boolean; override;
|
|
end;
|
|
|
|
/// glob case-insensitive pattern validation of a text field content
|
|
// (typically a TOrm)
|
|
// - parameter is NOT JSON encoded, but is some basic TMatch glob pattern
|
|
// - same as TSynValidatePattern, but is NOT case sensitive
|
|
TSynValidatePatternI = class(TSynValidatePattern);
|
|
|
|
/// text validation to ensure that to any text field would not be ''
|
|
TSynValidateNonVoidText = class(TSynValidate)
|
|
public
|
|
/// perform the non void text validation action to the specified value
|
|
function Process(aFieldIndex: integer; const Value: RawUtf8;
|
|
var ErrorMsg: string): boolean; override;
|
|
end;
|
|
|
|
TSynValidateTextProps = array[0..15] of cardinal;
|
|
|
|
{$M+} // to have existing RTTI for published properties
|
|
/// text validation to be applied to any Record field content
|
|
// - default MinLength value is 1, MaxLength is maxInt: so a blank
|
|
// TSynValidateText.Create('') is the same as TSynValidateNonVoidText
|
|
// - MinAlphaCount, MinDigitCount, MinPunctCount, MinLowerCount and
|
|
// MinUpperCount allow you to specify the minimal count of respectively
|
|
// alphabetical [a-zA-Z], digit [0-9], punctuation [_!;.,/:?%$="#@(){}+-*],
|
|
// lower case or upper case characters
|
|
// - expects optional JSON parameters of the allowed text length range as
|
|
// $ '{"MinLength":5,"MaxLength":10,"MinAlphaCount":1,"MinDigitCount":1,
|
|
// $ "MinPunctCount":1,"MinLowerCount":1,"MinUpperCount":1}
|
|
TSynValidateText = class(TSynValidate)
|
|
private
|
|
/// used to store all associated validation properties by index
|
|
fProps: TSynValidateTextProps;
|
|
fUtf8Length: boolean;
|
|
protected
|
|
/// use sInvalidTextChar resourcestring to create a translated error message
|
|
procedure SetErrorMsg(fPropsIndex, InvalidTextIndex, MainIndex: integer;
|
|
var result: string);
|
|
/// decode "MinLength", "MaxLength", and other parameters into fProps[]
|
|
procedure SetParameters(const Value: RawUtf8); override;
|
|
public
|
|
/// perform the text length validation action to the specified value
|
|
function Process(aFieldIndex: integer; const Value: RawUtf8;
|
|
var ErrorMsg: string): boolean; override;
|
|
published
|
|
/// Minimal length value allowed for the text content
|
|
// - the length is calculated with UTF-16 Unicode codepoints, unless
|
|
// Utf8Length has been set to TRUE so that the UTF-8 byte count is checked
|
|
// - default is 1, i.e. a void text will not pass the validation
|
|
property MinLength: cardinal
|
|
read fProps[0] write fProps[0];
|
|
/// Maximal length value allowed for the text content
|
|
// - the length is calculated with UTF-16 Unicode codepoints, unless
|
|
// Utf8Length has been set to TRUE so that the UTF-8 byte count is checked
|
|
// - default is maxInt, i.e. no maximum length is set
|
|
property MaxLength: cardinal
|
|
read fProps[1] write fProps[1];
|
|
/// Minimal alphabetical character [a-zA-Z] count
|
|
// - default is 0, i.e. no minimum set
|
|
property MinAlphaCount: cardinal
|
|
read fProps[2] write fProps[2];
|
|
/// Maximal alphabetical character [a-zA-Z] count
|
|
// - default is maxInt, i.e. no Maximum set
|
|
property MaxAlphaCount: cardinal
|
|
read fProps[10] write fProps[10];
|
|
/// Minimal digit character [0-9] count
|
|
// - default is 0, i.e. no minimum set
|
|
property MinDigitCount: cardinal
|
|
read fProps[3] write fProps[3];
|
|
/// Maximal digit character [0-9] count
|
|
// - default is maxInt, i.e. no Maximum set
|
|
property MaxDigitCount: cardinal
|
|
read fProps[11] write fProps[11];
|
|
/// Minimal punctuation sign [_!;.,/:?%$="#@(){}+-*] count
|
|
// - default is 0, i.e. no minimum set
|
|
property MinPunctCount: cardinal
|
|
read fProps[4] write fProps[4];
|
|
/// Maximal punctuation sign [_!;.,/:?%$="#@(){}+-*] count
|
|
// - default is maxInt, i.e. no Maximum set
|
|
property MaxPunctCount: cardinal
|
|
read fProps[12] write fProps[12];
|
|
/// Minimal alphabetical lower case character [a-z] count
|
|
// - default is 0, i.e. no minimum set
|
|
property MinLowerCount: cardinal
|
|
read fProps[5] write fProps[5];
|
|
/// Maximal alphabetical lower case character [a-z] count
|
|
// - default is maxInt, i.e. no Maximum set
|
|
property MaxLowerCount: cardinal
|
|
read fProps[13] write fProps[13];
|
|
/// Minimal alphabetical upper case character [A-Z] count
|
|
// - default is 0, i.e. no minimum set
|
|
property MinUpperCount: cardinal
|
|
read fProps[6] write fProps[6];
|
|
/// Maximal alphabetical upper case character [A-Z] count
|
|
// - default is maxInt, i.e. no Maximum set
|
|
property MaxUpperCount: cardinal
|
|
read fProps[14] write fProps[14];
|
|
/// Minimal space count inside the value text
|
|
// - default is 0, i.e. any space number allowed
|
|
property MinSpaceCount: cardinal
|
|
read fProps[7] write fProps[7];
|
|
/// Maximal space count inside the value text
|
|
// - default is maxInt, i.e. any space number allowed
|
|
property MaxSpaceCount: cardinal
|
|
read fProps[15] write fProps[15];
|
|
/// Maximal space count allowed on the Left side
|
|
// - default is maxInt, i.e. any Left space allowed
|
|
property MaxLeftTrimCount: cardinal
|
|
read fProps[8] write fProps[8];
|
|
/// Maximal space count allowed on the Right side
|
|
// - default is maxInt, i.e. any Right space allowed
|
|
property MaxRightTrimCount: cardinal
|
|
read fProps[9] write fProps[9];
|
|
/// defines if lengths parameters expects UTF-8 or UTF-16 codepoints number
|
|
// - with default FALSE, the length is calculated with UTF-16 Unicode
|
|
// codepoints - MaxLength may not match the UCS4 CodePoint, in case of
|
|
// UTF-16 surrogates
|
|
// - you can set this property to TRUE so that the UTF-8 byte count would
|
|
// be used for truncation against the MaxLength parameter
|
|
property Utf8Length: boolean
|
|
read fUtf8Length write fUtf8Length;
|
|
end;
|
|
{$M-}
|
|
|
|
/// strong password validation for a Record field content (typically a TOrm)
|
|
// - the following parameters are set by default to
|
|
// $ '{"MinLength":5,"MaxLength":20,"MinAlphaCount":1,"MinDigitCount":1,
|
|
// $ "MinPunctCount":1,"MinLowerCount":1,"MinUpperCount":1,"MaxSpaceCount":0}'
|
|
// - you can specify some JSON encoded parameters to change this default
|
|
// values, which will validate the text field only if it contains from 5 to 10
|
|
// characters, with at least one digit, one upper case letter, one lower case
|
|
// letter, and one punctuation sign, with no space allowed inside
|
|
TSynValidatePassWord = class(TSynValidateText)
|
|
protected
|
|
/// set password specific parameters
|
|
procedure SetParameters(const Value: RawUtf8); override;
|
|
end;
|
|
|
|
{ C++Builder doesn't support array elements as properties (RSP-12595).
|
|
For now, simply exclude the relevant classes from C++Builder. }
|
|
{$NODEFINE TSynValidateTextProps}
|
|
{$NODEFINE TSynValidateText }
|
|
{$NODEFINE TSynValidatePassWord }
|
|
|
|
/// will define a transformation to be applied to a Record field content
|
|
// (typically a TOrm)
|
|
// - here "filter" means that content would be transformed according to a
|
|
// set of defined rules
|
|
// - a typical usage is to convert to lower or upper case, or
|
|
// trim any time or date value in a TDateTime field
|
|
// - the optional associated parameters are to be supplied JSON-encoded
|
|
TSynFilter = class(TSynFilterOrValidate)
|
|
protected
|
|
public
|
|
/// perform the transformation to the specified value
|
|
// - the value is converted into UTF-8 text, as expected by
|
|
// TPropInfo.GetValue / TPropInfo.SetValue e.g.
|
|
procedure Process(aFieldIndex: integer; var Value: RawUtf8); virtual; abstract;
|
|
end;
|
|
|
|
/// class-reference type (metaclass) for a TSynFilter or a TSynValidate
|
|
TSynFilterOrValidateClass = class of TSynFilterOrValidate;
|
|
|
|
/// class-reference type (metaclass) of a record filter (transformation)
|
|
TSynFilterClass = class of TSynFilter;
|
|
|
|
/// convert the value into ASCII Upper Case characters
|
|
// - UpperCase conversion is made for ASCII-7 only, i.e. 'a'..'z' characters
|
|
// - this version expects no parameter
|
|
TSynFilterUpperCase = class(TSynFilter)
|
|
public
|
|
/// perform the case conversion to the specified value
|
|
procedure Process(aFieldIndex: integer; var Value: RawUtf8); override;
|
|
end;
|
|
|
|
/// convert the value into WinAnsi Upper Case characters
|
|
// - UpperCase conversion is made for all latin characters in the WinAnsi
|
|
// code page only, e.g. 'e' acute will be converted to 'E'
|
|
// - this version expects no parameter
|
|
TSynFilterUpperCaseU = class(TSynFilter)
|
|
public
|
|
/// perform the case conversion to the specified value
|
|
procedure Process(aFieldIndex: integer; var Value: RawUtf8); override;
|
|
end;
|
|
|
|
/// convert the value into ASCII Lower Case characters
|
|
// - LowerCase conversion is made for ASCII-7 only, i.e. 'A'..'Z' characters
|
|
// - this version expects no parameter
|
|
TSynFilterLowerCase = class(TSynFilter)
|
|
public
|
|
/// perform the case conversion to the specified value
|
|
procedure Process(aFieldIndex: integer; var Value: RawUtf8); override;
|
|
end;
|
|
|
|
/// convert the value into WinAnsi Lower Case characters
|
|
// - LowerCase conversion is made for all latin characters in the WinAnsi
|
|
// code page only, e.g. 'E' acute will be converted to 'e'
|
|
// - this version expects no parameter
|
|
TSynFilterLowerCaseU = class(TSynFilter)
|
|
public
|
|
/// perform the case conversion to the specified value
|
|
procedure Process(aFieldIndex: integer; var Value: RawUtf8); override;
|
|
end;
|
|
|
|
/// trim any space character left or right to the value
|
|
// - this versions expect no parameter
|
|
TSynFilterTrim = class(TSynFilter)
|
|
public
|
|
/// perform the space trimming conversion to the specified value
|
|
procedure Process(aFieldIndex: integer; var Value: RawUtf8); override;
|
|
end;
|
|
|
|
/// truncate a text above a given maximum length
|
|
// - expects optional JSON parameters of the allowed text length range as
|
|
// $ '{MaxLength":10}
|
|
TSynFilterTruncate = class(TSynFilter)
|
|
protected
|
|
fMaxLength: cardinal;
|
|
fUtf8Length: boolean;
|
|
/// decode the MaxLength: and Utf8Length: parameters
|
|
procedure SetParameters(const Value: RawUtf8); override;
|
|
public
|
|
/// perform the length truncation of the specified value
|
|
procedure Process(aFieldIndex: integer; var Value: RawUtf8); override;
|
|
/// Maximal length value allowed for the text content
|
|
// - the length is calculated with UTF-16 Unicode codepoints, unless
|
|
// Utf8Length has been set to TRUE so that the UTF-8 byte count is checked
|
|
// - default is 0, i.e. no maximum length is forced
|
|
property MaxLength: cardinal
|
|
read fMaxLength write fMaxLength;
|
|
/// defines if MaxLength is stored as UTF-8 or UTF-16 codepoints number
|
|
// - with default FALSE, the length is calculated with UTF-16 Unicode
|
|
// codepoints - MaxLength may not match the UCS4 CodePoint, in case of
|
|
// UTF-16 surrogates
|
|
// - you can set this property to TRUE so that the UTF-8 byte count would
|
|
// be used for truncation against the MaxLength parameter
|
|
property Utf8Length: boolean
|
|
read fUtf8Length write fUtf8Length;
|
|
end;
|
|
|
|
{$ifdef ISDELPHI}
|
|
resourcestring
|
|
{$else}
|
|
const
|
|
{$endif ISDELPHI}
|
|
sInvalidIPAddress = '"%s" is an invalid IP v4 address';
|
|
sInvalidEmailAddress = '"%s" is an invalid email address';
|
|
sInvalidPattern = '"%s" does not match the expected pattern';
|
|
sCharacter01n = 'character,character,characters';
|
|
sInvalidTextLengthMin = 'Expect at least %d %s';
|
|
sInvalidTextLengthMax = 'Expect up to %d %s';
|
|
sInvalidTextChar = 'Expect at least %d %s %s,Expect up to %d %s %s,'+
|
|
'alphabetical,digital,punctuation,lowercase,uppercase,space,'+
|
|
'Too much spaces on the left,Too much spaces on the right';
|
|
sValidationFailed = '"%s" rule failed';
|
|
sValidationFieldVoid = 'An unique key field must not be void';
|
|
sValidationFieldDuplicate = 'Value already used for this unique key field';
|
|
|
|
|
|
/// return TRUE if the supplied content is a valid IP v4 address
|
|
function IsValidIP4Address(P: PUtf8Char): boolean;
|
|
|
|
/// return TRUE if the supplied content is a valid email address
|
|
// - follows RFC 822, to validate local-part@domain email format
|
|
function IsValidEmail(P: PUtf8Char): boolean;
|
|
|
|
|
|
{ ***************** Cross-Platform TSynTimeZone Time Zones }
|
|
|
|
type
|
|
{$A-} { make all records packed for cross-platform binary serialization }
|
|
|
|
/// used to store Time Zone bias in TSynTimeZone
|
|
// - map how low-level information is stored in the Windows Registry
|
|
TTimeZoneInfo = record
|
|
Bias: integer;
|
|
bias_std: integer;
|
|
bias_dlt: integer;
|
|
change_time_std: TSynSystemTime;
|
|
change_time_dlt: TSynSystemTime;
|
|
end;
|
|
PTimeZoneInfo = ^TTimeZoneInfo;
|
|
|
|
/// text identifier of a Time Zone, following Microsoft Windows naming
|
|
TTimeZoneID = type RawUtf8;
|
|
|
|
/// used to store Time Zone information for a single area in TSynTimeZone
|
|
// - Delphi "object" is buggy on stack -> also defined as record with methods
|
|
{$ifdef USERECORDWITHMETHODS}
|
|
TTimeZoneData = record
|
|
{$else}
|
|
TTimeZoneData = object
|
|
{$endif USERECORDWITHMETHODS}
|
|
public
|
|
id: TTimeZoneID;
|
|
display: RawUtf8;
|
|
tzi: TTimeZoneInfo;
|
|
dyn: array of packed record
|
|
year: integer;
|
|
tzi: TTimeZoneInfo;
|
|
end;
|
|
/// search for the TTimeZoneInfo of a given year
|
|
function GetTziFor(year: integer): PTimeZoneInfo;
|
|
end;
|
|
|
|
/// used to store the Time Zone information of a TSynTimeZone class
|
|
TTimeZoneDataDynArray = array of TTimeZoneData;
|
|
|
|
{$A+}
|
|
|
|
/// handle cross-platform time conversions, following Microsoft time zones
|
|
// - is able to retrieve accurate information from the Windows registry,
|
|
// or from a binary compressed file on other platforms (which should have been
|
|
// saved from a Windows system first)
|
|
// - for Linux/POSIX our mORMot 2 repository supplies a ready-to-use
|
|
// ! {$R mormot.tz.res}
|
|
// - each time zone will be identified by its TzId string, as defined by
|
|
// Microsoft for its Windows Operating system
|
|
// - note that each instance is thread-safe
|
|
TSynTimeZone = class
|
|
protected
|
|
fSafe: TRWLightLock;
|
|
fZone: TTimeZoneDataDynArray;
|
|
fZoneCount: integer;
|
|
fZones: TDynArrayHashed;
|
|
fLastZone: TTimeZoneID;
|
|
fLastIndex: integer;
|
|
fIds: TStringList;
|
|
fDisplays: TStringList;
|
|
function LockedFindZoneIndex(const TzId: TTimeZoneID): PtrInt;
|
|
public
|
|
/// initialize the internal storage
|
|
// - but no data is available, until Load* methods are called
|
|
constructor Create;
|
|
/// retrieve the time zones from Windows registry, or from a local file
|
|
// - under Linux, the file should be located with the executable, renamed
|
|
// with a .tz extension - may have been created via SaveToFile(''), or
|
|
// from a 'TSynTimeZone' bound resource
|
|
// - "dummycpp" parameter exists only to disambiguate constructors for C++
|
|
constructor CreateDefault(dummycpp: integer = 0);
|
|
/// finalize the instance
|
|
destructor Destroy; override;
|
|
/// will retrieve the default shared TSynTimeZone instance
|
|
// - locally created via the CreateDefault constructor
|
|
// - see also the NowToLocal/LocalToUtc/UtcToLocal global functions
|
|
class function Default: TSynTimeZone;
|
|
{$ifdef OSWINDOWS}
|
|
/// read time zone information from the Windows registry
|
|
procedure LoadFromRegistry;
|
|
{$endif OSWINDOWS}
|
|
/// read time zone information from a compressed file
|
|
// - if no file name is supplied, a ExecutableName.tz file would be used
|
|
procedure LoadFromFile(const FileName: TFileName = '');
|
|
/// read time zone information from a compressed memory buffer
|
|
procedure LoadFromBuffer(const Buffer: RawByteString);
|
|
/// read time zone information from a 'TSynTimeZone' resource
|
|
// - the resource should contain the SaveToBuffer compressed binary content
|
|
// - is no resource matching the TSynTimeZone class name and ResType=10
|
|
// do exist, nothing would be loaded
|
|
// - the resource could be created as such, from a Windows system:
|
|
// ! TSynTimeZone.Default.SaveToFile('TSynTimeZone.data');
|
|
// then compile the resource as expected, with a brcc32 .rc entry:
|
|
// ! TSynTimeZone 10 "TSynTimeZone.data"
|
|
// - you can specify a library (dll) resource instance handle, if needed
|
|
// - for Linux/POSIX our mORMot 2 repository supplies a ready-to-use
|
|
// ! {$R mormot.tz.res}
|
|
procedure LoadFromResource(Instance: TLibHandle = 0);
|
|
/// write then time zone information into a compressed file
|
|
// - if no file name is supplied, a ExecutableName.tz file would be created
|
|
procedure SaveToFile(const FileName: TFileName);
|
|
/// write then time zone information into a compressed memory buffer
|
|
function SaveToBuffer: RawByteString;
|
|
/// retrieve the time bias (in minutes) for a given date/time on a TzId
|
|
function GetBiasForDateTime(const Value: TDateTime; const TzId: TTimeZoneID;
|
|
out Bias: integer; out HaveDaylight: boolean; ValueIsUtc: boolean = false): boolean;
|
|
/// retrieve the display text corresponding to a TzId
|
|
// - returns '' if the supplied TzId is not recognized
|
|
function GetDisplay(const TzId: TTimeZoneID): RawUtf8;
|
|
/// compute the UTC date/time corrected for a given TzId
|
|
function UtcToLocal(const UtcDateTime: TDateTime; const TzId: TTimeZoneID): TDateTime;
|
|
/// compute the current date/time corrected for a given TzId
|
|
function NowToLocal(const TzId: TTimeZoneID): TDateTime;
|
|
/// compute the UTC date/time for a given local TzId value
|
|
// - by definition, a local time may correspond to two UTC times, during the
|
|
// time bias period, so the returned value is informative only, and any
|
|
// stored value should be following UTC
|
|
function LocalToUtc(const LocalDateTime: TDateTime; const TzID: TTimeZoneID): TDateTime;
|
|
/// direct access to the low-level time zone information
|
|
property Zone: TTimeZoneDataDynArray
|
|
read fZone;
|
|
/// direct access to the wrapper over the time zone information array
|
|
property Zones: TDynArrayHashed
|
|
read fZones;
|
|
/// returns a TStringList of all TzID values
|
|
// - could be used to fill any UI component to select the time zone
|
|
// - order in Ids[] array follows the Zone[].id information
|
|
function Ids: TStrings;
|
|
/// returns a TStringList of all Display text values
|
|
// - could be used to fill any UI component to select the time zone
|
|
// - order in Displays[] array follows the Zone[].display information
|
|
function Displays: TStrings;
|
|
end;
|
|
|
|
/// retrieve the time bias (in minutes) for a given date/time on a TzId
|
|
// - will use a global shared thread-safe TSynTimeZone instance for the request
|
|
function GetBiasForDateTime(const Value: TDateTime; const TzId: TTimeZoneID;
|
|
out Bias: integer; out HaveDaylight: boolean; ValueIsUtc: boolean = false): boolean;
|
|
|
|
/// retrieve the display text corresponding to a TzId
|
|
// - returns '' if the supplied TzId is not recognized
|
|
// - will use a global shared thread-safe TSynTimeZone instance for the request
|
|
function GetDisplay(const TzId: TTimeZoneID): RawUtf8;
|
|
|
|
/// compute the UTC date/time corrected for a given TzId
|
|
// - will use a global shared thread-safe TSynTimeZone instance for the request
|
|
function UtcToLocal(const UtcDateTime: TDateTime; const TzId: TTimeZoneID): TDateTime;
|
|
{$ifdef HASINLINE} inline; {$endif}
|
|
|
|
/// compute the current date/time corrected for a given TzId
|
|
// - will use a global shared thread-safe TSynTimeZone instance for the request
|
|
function NowToLocal(const TzId: TTimeZoneID): TDateTime;
|
|
{$ifdef HASINLINE} inline; {$endif}
|
|
|
|
/// compute the UTC date/time for a given local TzId value
|
|
// - by definition, a local time may correspond to two UTC times, during the
|
|
// time bias period, so the returned value is informative only, and any
|
|
// stored value should be following UTC
|
|
// - will use a global shared thread-safe TSynTimeZone instance for the request
|
|
function LocalToUtc(const LocalDateTime: TDateTime; const TzID: TTimeZoneID): TDateTime;
|
|
{$ifdef HASINLINE} inline; {$endif}
|
|
|
|
|
|
implementation
|
|
|
|
|
|
{ ****************** Files Search in Folders }
|
|
|
|
procedure TFindFiles.FromSearchRec(const Directory: TFileName; const F: TSearchRec);
|
|
begin
|
|
Name := Directory + TFileName(F.Name);
|
|
{$ifdef OSWINDOWS}
|
|
{$ifdef HASINLINE} // FPC or Delphi 2006+
|
|
Size := F.Size;
|
|
{$else} // F.Size was limited to 32-bit on older Delphi
|
|
PInt64Rec(@Size)^.Lo := F.FindData.nFileSizeLow;
|
|
PInt64Rec(@Size)^.Hi := F.FindData.nFileSizeHigh;
|
|
{$endif HASINLINE}
|
|
{$else}
|
|
Size := F.Size;
|
|
{$endif OSWINDOWS}
|
|
Attr := F.Attr;
|
|
Timestamp := SearchRecToDateTime(F);
|
|
end;
|
|
|
|
function TFindFiles.ToText: ShortString;
|
|
begin
|
|
FormatShort('% % %', [Name, KB(Size), DateTimeToFileShort(Timestamp)], result);
|
|
end;
|
|
|
|
function FindFiles(const Directory, Mask, IgnoreFileName: TFileName;
|
|
Options: TFindFilesOptions): TFindFilesDynArray;
|
|
var
|
|
m, count: integer;
|
|
dir: TFileName;
|
|
da: TDynArray;
|
|
masks: TRawUtf8DynArray;
|
|
masked: TFindFilesDynArray;
|
|
|
|
procedure SearchFolder(const folder: TFileName);
|
|
var
|
|
F: TSearchRec;
|
|
ff: TFindFiles;
|
|
fold, name: TFileName; // FPC requires these implicit local variables :(
|
|
begin
|
|
fold := dir + folder;
|
|
name := fold + Mask;
|
|
if FindFirst(name, faAnyfile - faDirectory, F) = 0 then
|
|
begin
|
|
repeat
|
|
if SearchRecValidFile(F) and
|
|
((IgnoreFileName = '') or
|
|
(AnsiCompareFileName(F.Name, IgnoreFileName) <> 0)) then
|
|
begin
|
|
if ffoExcludesDir in Options then
|
|
ff.FromSearchRec(folder, F)
|
|
else
|
|
ff.FromSearchRec(fold, F);
|
|
da.Add(ff);
|
|
end;
|
|
until FindNext(F) <> 0;
|
|
FindClose(F);
|
|
end;
|
|
if (ffoSubFolder in Options) and
|
|
(FindFirst(fold + '*', faDirectory, F) = 0) then
|
|
begin
|
|
// recursive SearchFolder() call for nested directories
|
|
repeat
|
|
if SearchRecValidFolder(F) and
|
|
((IgnoreFileName = '') or
|
|
(AnsiCompareFileName(F.Name, IgnoreFileName) <> 0)) then
|
|
SearchFolder(IncludeTrailingPathDelimiter(folder + F.Name));
|
|
until FindNext(F) <> 0;
|
|
FindClose(F);
|
|
end;
|
|
end;
|
|
|
|
begin
|
|
Finalize(result);
|
|
da.Init(TypeInfo(TFindFilesDynArray), result, @count);
|
|
if Pos(';', Mask) > 0 then
|
|
CsvToRawUtf8DynArray(pointer(StringToUtf8(Mask)), masks, ';');
|
|
if masks <> nil then
|
|
begin
|
|
// recursive calls for each masks[]
|
|
if ffoSortByName in Options then
|
|
QuickSortRawUtf8(masks, length(masks), nil,
|
|
{$ifdef OSWINDOWS} @StrIComp {$else} @StrComp {$endif});
|
|
for m := 0 to length(masks) - 1 do
|
|
begin
|
|
masked := FindFiles(
|
|
Directory, Utf8ToString(masks[m]), IgnoreFileName, Options);
|
|
da.AddArray(masked);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
// single mask search
|
|
if Directory <> '' then
|
|
dir := IncludeTrailingPathDelimiter(Directory);
|
|
SearchFolder('');
|
|
if (ffoSortByName in Options) and
|
|
(da.Count > 1) then
|
|
da.Sort(SortDynArrayFileName);
|
|
end;
|
|
if count <> 0 then
|
|
DynArrayFakeLength(result, count);
|
|
end;
|
|
|
|
function FileNames(const Directory, Mask: TFileName;
|
|
Options: TFindFilesOptions; const IgnoreFileName: TFileName): TFileNameDynArray;
|
|
begin
|
|
result := FindFilesDynArrayToFileNames(
|
|
FindFiles(Directory, Mask, IgnoreFileName, Options));
|
|
end;
|
|
|
|
function FileNames(const Path: array of const; const Mask: TFileName;
|
|
Options: TFindFilesOptions): TFileNameDynArray;
|
|
var
|
|
dir: TFileName;
|
|
begin
|
|
dir := MakePath(Path, {endwithdelim=}true);
|
|
result := FileNames(dir, Mask, Options);
|
|
end;
|
|
|
|
function FindFilesDynArrayToFileNames(const Files: TFindFilesDynArray): TFileNameDynArray;
|
|
var
|
|
i, n: PtrInt;
|
|
begin
|
|
Finalize(result);
|
|
if Files = nil then
|
|
exit;
|
|
n := length(Files);
|
|
SetLength(result, n);
|
|
for i := 0 to n - 1 do
|
|
result[i] := Files[i].Name;
|
|
end;
|
|
|
|
function SortFindFileTimestamp(const A, B): integer;
|
|
begin
|
|
result := CompareFloat(TFindFiles(A).Timestamp, TFindFiles(B).Timestamp);
|
|
end;
|
|
|
|
procedure FindFilesSortByTimestamp(var Files: TFindFilesDynArray);
|
|
begin
|
|
DynArray(TypeInfo(TFindFilesDynArray), Files).Sort(SortFindFileTimestamp);
|
|
end;
|
|
|
|
function SynchFolders(const Reference, Dest: TFileName;
|
|
Options: TSynchFoldersOptions): integer;
|
|
var
|
|
ref, dst, reffn, dstfn: TFileName;
|
|
fdst: TSearchRec;
|
|
refsize: Int64;
|
|
reftime: TUnixMSTime;
|
|
s: RawByteString;
|
|
begin
|
|
result := 0;
|
|
ref := IncludeTrailingPathDelimiter(Reference);
|
|
dst := IncludeTrailingPathDelimiter(Dest);
|
|
if DirectoryExists(ref) and
|
|
(FindFirst(dst + FILES_ALL, faAnyFile, fdst) = 0) then
|
|
begin
|
|
repeat
|
|
if SearchRecValidFile(fdst) then
|
|
begin
|
|
reffn := ref + fdst.Name;
|
|
if not FileInfoByName(reffn, refsize, reftime) then
|
|
continue; // only update existing files
|
|
if not (sfoByContent in Options) then
|
|
if (refsize = fdst.Size) and
|
|
(reftime = SearchRecToUnixTimeUtc(fdst)) then
|
|
continue;
|
|
dstfn := dst + fdst.Name;
|
|
s := StringFromFile(reffn);
|
|
if (s = '') or
|
|
((sfoByContent in Options) and
|
|
(length(s) = fdst.Size) and
|
|
(DefaultHasher(0, pointer(s), fdst.Size) = HashFile(dstfn))) then
|
|
continue;
|
|
FileFromString(s, dstfn);
|
|
FileSetDateFromUnixUtc(dstfn, reftime div MSecsPerSec);
|
|
inc(result);
|
|
if sfoWriteFileNameToConsole in Options then
|
|
ConsoleWrite('synched %', [dstfn]);
|
|
end
|
|
else if (sfoSubFolder in Options) and
|
|
SearchRecValidFolder(fdst) then
|
|
inc(result, SynchFolders(ref + fdst.Name, dst + fdst.Name, Options));
|
|
until FindNext(fdst) <> 0;
|
|
FindClose(fdst);
|
|
end;
|
|
end;
|
|
|
|
function CopyFolder(const Source, Dest: TFileName;
|
|
Options: TSynchFoldersOptions): integer;
|
|
var
|
|
src, dst, reffn, dstfn: TFileName;
|
|
sr: TSearchRec;
|
|
dsize: Int64;
|
|
dtime: TUnixMSTime;
|
|
nested: integer;
|
|
begin
|
|
result := 0;
|
|
src := IncludeTrailingPathDelimiter(Source);
|
|
if not DirectoryExists(src) then
|
|
exit;
|
|
dst := EnsureDirectoryExists(Dest);
|
|
if (dst = '') or
|
|
(FindFirst(src + FILES_ALL, faAnyFile, sr) <> 0) then
|
|
exit;
|
|
repeat
|
|
reffn := src + sr.Name;
|
|
dstfn := dst + sr.Name;
|
|
if SearchRecValidFile(sr) then
|
|
begin
|
|
if FileInfoByName(dstfn, dsize, dtime) and // fast single syscall
|
|
(sr.Size = dsize) then
|
|
if sfoByContent in Options then
|
|
begin
|
|
if SameFileContent(reffn, dstfn) then
|
|
continue;
|
|
end
|
|
else if abs(SearchRecToUnixTimeUtc(sr) * 1000 - dtime) < 1000 then
|
|
continue; // allow error of 1 second timestamp resolution
|
|
if not CopyFile(reffn, dstfn, {failsifexists=}false) then
|
|
result := -1;
|
|
end
|
|
else if not SearchRecValidFolder(sr) then
|
|
continue
|
|
else if sfoSubFolder in Options then
|
|
begin
|
|
nested := CopyFolder(reffn, dstfn, Options);
|
|
if nested < 0 then
|
|
result := nested
|
|
else
|
|
inc(result, nested);
|
|
end;
|
|
if result < 0 then
|
|
break;
|
|
inc(result);
|
|
if sfoWriteFileNameToConsole in Options then
|
|
ConsoleWrite('copied %', [reffn]);
|
|
until (FindNext(sr) <> 0);
|
|
FindClose(sr);
|
|
end;
|
|
|
|
|
|
{ ****************** ScanUtf8, GLOB and SOUNDEX Text Search }
|
|
|
|
function ScanUtf8(P: PUtf8Char; PLen: PtrInt; const fmt: RawUtf8;
|
|
const values: array of pointer; ident: PRawUtf8DynArray): integer;
|
|
var
|
|
v, w: PtrInt;
|
|
F, FEnd, PEnd: PUtf8Char;
|
|
tab: PTextCharSet;
|
|
label
|
|
next;
|
|
begin
|
|
result := 0;
|
|
if (fmt = '') or
|
|
(P = nil) or
|
|
(PLen <= 0) or
|
|
(high(values) < 0) then
|
|
exit;
|
|
if ident <> nil then
|
|
SetLength(ident^, length(values));
|
|
F := pointer(fmt);
|
|
FEnd := F + length(fmt);
|
|
PEnd := P + PLen;
|
|
for v := 0 to high(values) do
|
|
repeat
|
|
if (P^ <= ' ') and
|
|
(P^ <> #0) then
|
|
// ignore any whitespace char in text
|
|
repeat
|
|
inc(P);
|
|
if P = PEnd then
|
|
exit;
|
|
until (P^ > ' ') or
|
|
(P^ = #0);
|
|
while (F^ <= ' ') and
|
|
(F^ <> #0) do
|
|
begin
|
|
// ignore any whitespace char in fmt
|
|
inc(F);
|
|
if F = FEnd then
|
|
exit;
|
|
end;
|
|
if F^ = '%' then
|
|
begin
|
|
// handle format specifier
|
|
inc(F);
|
|
if F = FEnd then
|
|
exit;
|
|
case F^ of
|
|
'd':
|
|
PInteger(values[v])^ := GetNextItemInteger(P, #0);
|
|
'D':
|
|
PInt64(values[v])^ := GetNextItemInt64(P, #0);
|
|
'u':
|
|
PCardinal(values[v])^ := GetNextItemCardinal(P, #0);
|
|
'U':
|
|
PQword(values[v])^ := GetNextItemQword(P, #0);
|
|
'f':
|
|
unaligned(PDouble(values[v])^) := GetNextItemDouble(P, #0);
|
|
'F':
|
|
GetNextItemCurrency(P, PCurrency(values[v])^, #0);
|
|
'x':
|
|
if not GetNextItemHexDisplayToBin(P, values[v], 4, #0) then
|
|
exit;
|
|
'X':
|
|
if not GetNextItemHexDisplayToBin(P, values[v], 8, #0) then
|
|
exit;
|
|
's', 'S':
|
|
begin
|
|
w := 0;
|
|
while (P[w] > ' ') and
|
|
(P + w <= PEnd) do
|
|
inc(w);
|
|
if F^ = 's' then
|
|
SetString(PShortString(values[v])^, PAnsiChar(P), w)
|
|
else
|
|
FastSetString(PRawUtf8(values[v])^, P, w);
|
|
inc(P, w);
|
|
while (P^ <= ' ') and
|
|
(P^ <> #0) and
|
|
(P <= PEnd) do
|
|
inc(P);
|
|
end;
|
|
'L':
|
|
begin
|
|
w := 0;
|
|
tab := @TEXT_CHARS;
|
|
while (tcNot01013 in tab[P[w]]) and
|
|
(P + w <= PEnd) do
|
|
inc(w);
|
|
FastSetString(PRawUtf8(values[v])^, P, w);
|
|
inc(P, w);
|
|
end;
|
|
'%':
|
|
goto next;
|
|
else
|
|
raise ESynException.CreateUtf8(
|
|
'ScanUtf8: unknown ''%'' specifier [%]', [F^, fmt]);
|
|
end;
|
|
inc(result);
|
|
tab := @TEXT_CHARS;
|
|
if (tcIdentifier in tab[F[1]]) or
|
|
(ident <> nil) then
|
|
begin
|
|
w := 0;
|
|
repeat
|
|
inc(w)
|
|
until not (tcIdentifier in tab[F[w]]) or
|
|
(F + w = FEnd);
|
|
if ident <> nil then
|
|
FastSetString(ident^[v], F, w);
|
|
inc(F, w);
|
|
end
|
|
else
|
|
inc(F);
|
|
if (F >= FEnd) or
|
|
(P >= PEnd) then
|
|
exit;
|
|
break;
|
|
end
|
|
else
|
|
begin
|
|
next: while (P^ <> F^) and
|
|
(P <= PEnd) do
|
|
inc(P);
|
|
inc(F);
|
|
inc(P);
|
|
if (F >= FEnd) or
|
|
(P >= PEnd) then
|
|
exit;
|
|
end;
|
|
until false;
|
|
end;
|
|
|
|
function ScanUtf8(const text, fmt: RawUtf8; const values: array of pointer;
|
|
ident: PRawUtf8DynArray): integer;
|
|
begin
|
|
result := ScanUtf8(pointer(text), length(text), fmt, values, ident);
|
|
end;
|
|
|
|
|
|
// inspired by ZMatchPattern.pas - http://www.zeoslib.sourceforge.net
|
|
procedure TMatch.MatchMain;
|
|
var
|
|
RangeStart, RangeEnd: PtrInt;
|
|
c: AnsiChar;
|
|
flags: set of (Invert, MemberMatch);
|
|
begin
|
|
while ((State = sNONE) and
|
|
(P <= PMax)) do
|
|
begin
|
|
c := Upper[Pattern[P]];
|
|
if T > TMax then
|
|
begin
|
|
if (c = '*') and
|
|
(P + 1 > PMax) then
|
|
State := sVALID
|
|
else
|
|
State := sABORT;
|
|
exit;
|
|
end
|
|
else
|
|
case c of
|
|
'?':
|
|
;
|
|
'*':
|
|
MatchAfterStar;
|
|
'[':
|
|
begin
|
|
inc(P);
|
|
byte(flags) := 0;
|
|
if Pattern[P] in ['!', '^'] then
|
|
begin
|
|
include(flags, Invert);
|
|
inc(P);
|
|
end;
|
|
if Pattern[P] = ']' then
|
|
begin
|
|
State := sPATTERN;
|
|
exit;
|
|
end;
|
|
c := Upper[Text[T]];
|
|
while Pattern[P] <> ']' do
|
|
begin
|
|
RangeStart := P;
|
|
RangeEnd := P;
|
|
inc(P);
|
|
if P > PMax then
|
|
begin
|
|
State := sPATTERN;
|
|
exit;
|
|
end;
|
|
if Pattern[P] = '-' then
|
|
begin
|
|
inc(P);
|
|
RangeEnd := P;
|
|
if (P > PMax) or
|
|
(Pattern[RangeEnd] = ']') then
|
|
begin
|
|
State := sPATTERN;
|
|
exit;
|
|
end;
|
|
inc(P);
|
|
end;
|
|
if P > PMax then
|
|
begin
|
|
State := sPATTERN;
|
|
exit;
|
|
end;
|
|
if RangeStart < RangeEnd then
|
|
begin
|
|
if (c >= Upper[Pattern[RangeStart]]) and
|
|
(c <= Upper[Pattern[RangeEnd]]) then
|
|
begin
|
|
include(flags, MemberMatch);
|
|
break;
|
|
end;
|
|
end
|
|
else if (c >= Upper[Pattern[RangeEnd]]) and
|
|
(c <= Upper[Pattern[RangeStart]]) then
|
|
begin
|
|
include(flags, MemberMatch);
|
|
break;
|
|
end;
|
|
end;
|
|
if ((Invert in flags) and
|
|
(MemberMatch in flags)) or
|
|
not ((Invert in flags) or
|
|
(MemberMatch in flags)) then
|
|
begin
|
|
State := sRANGE;
|
|
exit;
|
|
end;
|
|
if MemberMatch in flags then
|
|
while (P <= PMax) and
|
|
(Pattern[P] <> ']') do
|
|
inc(P);
|
|
if P > PMax then
|
|
begin
|
|
State := sPATTERN;
|
|
exit;
|
|
end;
|
|
end;
|
|
else
|
|
if c <> Upper[Text[T]] then
|
|
State := sLITERAL;
|
|
end;
|
|
inc(P);
|
|
inc(T);
|
|
end;
|
|
if State = sNONE then
|
|
if T <= TMax then
|
|
State := sEND
|
|
else
|
|
State := sVALID;
|
|
end;
|
|
|
|
procedure TMatch.MatchAfterStar;
|
|
var
|
|
retryT, retryP: PtrInt;
|
|
begin
|
|
if (TMax = 1) or
|
|
(P = PMax) then
|
|
begin
|
|
State := sVALID;
|
|
exit;
|
|
end
|
|
else if (PMax = 0) or
|
|
(TMax = 0) then
|
|
begin
|
|
State := sABORT;
|
|
exit;
|
|
end;
|
|
while (T <= TMax) and
|
|
(P < PMax) and
|
|
(Pattern[P] in ['?', '*']) do
|
|
begin
|
|
if Pattern[P] = '?' then
|
|
inc(T);
|
|
inc(P);
|
|
end;
|
|
if T >= TMax then
|
|
begin
|
|
State := sABORT;
|
|
exit;
|
|
end
|
|
else if P >= PMax then
|
|
begin
|
|
State := sVALID;
|
|
exit;
|
|
end;
|
|
repeat
|
|
if (Upper[Pattern[P]] = Upper[Text[T]]) or
|
|
(Pattern[P] = '[') then
|
|
begin
|
|
retryT := T;
|
|
retryP := P;
|
|
MatchMain;
|
|
if State = sVALID then
|
|
break;
|
|
State := sNONE; // retry until end of Text, (check below) or State valid
|
|
T := retryT;
|
|
P := retryP;
|
|
end;
|
|
inc(T);
|
|
if (T > TMax) or
|
|
(P > PMax) then
|
|
begin
|
|
State := sABORT;
|
|
exit;
|
|
end;
|
|
until State <> sNONE;
|
|
end;
|
|
|
|
function SearchAny(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
aMatch.State := sNONE;
|
|
aMatch.P := 0;
|
|
aMatch.T := 0;
|
|
aMatch.Text := aText;
|
|
aMatch.TMax := aTextLen - 1;
|
|
aMatch.MatchMain;
|
|
result := aMatch.State = sVALID;
|
|
end;
|
|
|
|
// faster alternative (without recursion) for only * ? (but not [...])
|
|
|
|
{$ifdef CPU32} // less registers on this CPU - also circumvent ARM problems (Alf)
|
|
|
|
function SearchNoRange(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
var
|
|
c: AnsiChar;
|
|
pat, txt: PtrInt; // use local registers
|
|
begin
|
|
aMatch.T := 0; // aMatch.P/T are used for retry positions after *
|
|
aMatch.Text := aText;
|
|
aMatch.TMax := aTextLen - 1;
|
|
pat := 0;
|
|
txt := 0;
|
|
repeat
|
|
if pat <= aMatch.PMax then
|
|
begin
|
|
c := aMatch.Pattern[pat];
|
|
case c of
|
|
'?':
|
|
if txt <= aMatch.TMax then
|
|
begin
|
|
inc(pat);
|
|
inc(txt);
|
|
continue;
|
|
end;
|
|
'*':
|
|
begin
|
|
aMatch.P := pat;
|
|
aMatch.T := txt + 1;
|
|
inc(pat);
|
|
continue;
|
|
end;
|
|
else
|
|
if (txt <= aMatch.TMax) and
|
|
(c = aMatch.Text[txt]) then
|
|
begin
|
|
inc(pat);
|
|
inc(txt);
|
|
continue;
|
|
end;
|
|
end;
|
|
end
|
|
else if txt > aMatch.TMax then
|
|
break;
|
|
txt := aMatch.T;
|
|
if (txt > 0) and
|
|
(txt <= aMatch.TMax + 1) then
|
|
begin
|
|
inc(aMatch.T);
|
|
pat := aMatch.P + 1;
|
|
continue;
|
|
end;
|
|
result := false;
|
|
exit;
|
|
until false;
|
|
result := true;
|
|
end;
|
|
|
|
{$else} // optimized for x86_64/ARM with more registers
|
|
|
|
function SearchNoRange(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
var
|
|
c: AnsiChar;
|
|
pat, patend, txtend, txtretry, patretry: PUtf8Char;
|
|
label
|
|
fin;
|
|
begin
|
|
pat := pointer(aMatch.Pattern);
|
|
if pat = nil then
|
|
goto fin;
|
|
patend := pat + aMatch.PMax;
|
|
patretry := nil;
|
|
txtend := aText + aTextLen - 1;
|
|
txtretry := nil;
|
|
repeat
|
|
if pat <= patend then
|
|
begin
|
|
c := pat^;
|
|
if c <> '*' then
|
|
if c <> '?' then
|
|
begin
|
|
if (aText <= txtend) and
|
|
(c = aText^) then
|
|
begin
|
|
inc(pat);
|
|
inc(aText);
|
|
continue;
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
// '?'
|
|
if aText <= txtend then
|
|
begin
|
|
inc(pat);
|
|
inc(aText);
|
|
continue;
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
// '*'
|
|
inc(pat);
|
|
txtretry := aText + 1;
|
|
patretry := pat;
|
|
continue;
|
|
end;
|
|
end
|
|
else if aText > txtend then
|
|
break;
|
|
if (PtrInt(PtrUInt(txtretry)) > 0) and
|
|
(txtretry <= txtend + 1) then
|
|
begin
|
|
aText := txtretry;
|
|
inc(txtretry);
|
|
pat := patretry;
|
|
continue;
|
|
end;
|
|
fin:result := false;
|
|
exit;
|
|
until false;
|
|
result := true;
|
|
end;
|
|
|
|
{$endif CPUX86}
|
|
|
|
function SearchNoRangeU(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
var
|
|
c: AnsiChar;
|
|
pat, txt: PtrInt;
|
|
begin
|
|
aMatch.T := 0;
|
|
aMatch.Text := aText;
|
|
aMatch.TMax := aTextLen - 1;
|
|
pat := 0;
|
|
txt := 0;
|
|
repeat
|
|
if pat <= aMatch.PMax then
|
|
begin
|
|
c := aMatch.Pattern[pat];
|
|
case c of
|
|
'?':
|
|
if txt <= aMatch.TMax then
|
|
begin
|
|
inc(pat);
|
|
inc(txt);
|
|
continue;
|
|
end;
|
|
'*':
|
|
begin
|
|
aMatch.P := pat;
|
|
aMatch.T := txt + 1;
|
|
inc(pat);
|
|
continue;
|
|
end;
|
|
else
|
|
if (txt <= aMatch.TMax) and
|
|
(aMatch.Upper[c] = aMatch.Upper[aMatch.Text[txt]]) then
|
|
begin
|
|
inc(pat);
|
|
inc(txt);
|
|
continue;
|
|
end;
|
|
end;
|
|
end
|
|
else if txt > aMatch.TMax then
|
|
break;
|
|
txt := aMatch.T;
|
|
if (txt > 0) and
|
|
(txt <= aMatch.TMax + 1) then
|
|
begin
|
|
inc(aMatch.T);
|
|
pat := aMatch.P + 1;
|
|
continue;
|
|
end;
|
|
result := false;
|
|
exit;
|
|
until false;
|
|
result := true;
|
|
end;
|
|
|
|
function SimpleContainsU(t, tend, p: PUtf8Char; pmax: PtrInt; up: PNormTable): boolean;
|
|
{$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label
|
|
// brute force case-insensitive search p[0..pmax] in t..tend-1
|
|
var
|
|
first: AnsiChar;
|
|
i: PtrInt;
|
|
label
|
|
next;
|
|
begin
|
|
first := up[p^];
|
|
repeat
|
|
if up[t^] <> first then
|
|
begin
|
|
next: inc(t);
|
|
if t < tend then
|
|
continue
|
|
else
|
|
break;
|
|
end;
|
|
for i := 1 to pmax do
|
|
if up[t[i]] <> up[p[i]] then
|
|
goto next;
|
|
result := true;
|
|
exit;
|
|
until false;
|
|
result := false;
|
|
end;
|
|
|
|
{$ifdef CPU64} // naive but very efficient code generation on FPC x86-64
|
|
|
|
function SimpleContains8(t, tend, p: PUtf8Char; pmax: PtrInt): boolean;
|
|
{$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label
|
|
label
|
|
next;
|
|
var
|
|
i, first: PtrInt;
|
|
begin
|
|
first := PPtrInt(p)^;
|
|
repeat
|
|
if PPtrInt(t)^ <> first then
|
|
begin
|
|
next: inc(t);
|
|
if t < tend then
|
|
continue
|
|
else
|
|
break;
|
|
end;
|
|
for i := 8 to pmax do
|
|
if t[i] <> p[i] then
|
|
goto next;
|
|
result := true;
|
|
exit;
|
|
until false;
|
|
result := false;
|
|
end;
|
|
|
|
{$endif CPU64}
|
|
|
|
|
|
{$ifdef CPUX86}
|
|
|
|
function SimpleContains1(t, tend, p: PUtf8Char; pmax: PtrInt): boolean;
|
|
{$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label
|
|
label
|
|
next;
|
|
var
|
|
i: PtrInt;
|
|
begin
|
|
repeat
|
|
if t^ <> p^ then
|
|
begin
|
|
next: inc(t);
|
|
if t < tend then
|
|
continue
|
|
else
|
|
break;
|
|
end;
|
|
for i := 1 to pmax do
|
|
if t[i] <> p[i] then
|
|
goto next;
|
|
result := true;
|
|
exit;
|
|
until false;
|
|
result := false;
|
|
end;
|
|
|
|
function SimpleContains4(t, tend, p: PUtf8Char; pmax: PtrInt): boolean;
|
|
{$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label
|
|
label
|
|
next;
|
|
var
|
|
i: PtrInt;
|
|
begin
|
|
repeat
|
|
if PCardinal(t)^ <> PCardinal(p)^ then
|
|
begin
|
|
next: inc(t);
|
|
if t < tend then
|
|
continue
|
|
else
|
|
break;
|
|
end;
|
|
for i := 1 to pmax do
|
|
if t[i] <> p[i] then
|
|
goto next;
|
|
result := true;
|
|
exit;
|
|
until false;
|
|
result := false;
|
|
end;
|
|
|
|
{$else}
|
|
|
|
function SimpleContains1(t, tend, p: PUtf8Char; pmax: PtrInt): boolean;
|
|
{$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label
|
|
label
|
|
next;
|
|
var
|
|
i: PtrInt;
|
|
first: AnsiChar;
|
|
begin
|
|
first := p^;
|
|
repeat
|
|
if t^ <> first then
|
|
begin
|
|
next: inc(t);
|
|
if t < tend then
|
|
continue
|
|
else
|
|
break;
|
|
end;
|
|
for i := 1 to pmax do
|
|
if t[i] <> p[i] then
|
|
goto next;
|
|
result := true;
|
|
exit;
|
|
until false;
|
|
result := false;
|
|
end;
|
|
|
|
function SimpleContains4(t, tend, p: PUtf8Char; pmax: PtrInt): boolean;
|
|
{$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label
|
|
label
|
|
next;
|
|
var
|
|
i: PtrInt;
|
|
first: cardinal;
|
|
begin
|
|
first := PCardinal(p)^;
|
|
repeat
|
|
if PCardinal(t)^ <> first then
|
|
begin
|
|
next: inc(t);
|
|
if t < tend then
|
|
continue
|
|
else
|
|
break;
|
|
end;
|
|
for i := 1 to pmax do
|
|
if t[i] <> p[i] then
|
|
goto next;
|
|
result := true;
|
|
exit;
|
|
until false;
|
|
result := false;
|
|
end;
|
|
|
|
{$endif CPUX86}
|
|
|
|
function CompareMemU(P1, P2: PUtf8Char; len: PtrInt; U: PNormTable): boolean;
|
|
{$ifdef FPC} inline;{$endif}
|
|
begin
|
|
// here we know that len>0
|
|
result := false;
|
|
repeat
|
|
dec(len);
|
|
if U[P1[len]] <> U[P2[len]] then
|
|
exit;
|
|
until len = 0;
|
|
result := true;
|
|
end;
|
|
|
|
function SearchVoid(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
result := aTextLen = 0;
|
|
end;
|
|
|
|
function SearchNoPattern(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
result := (aMatch.PMax + 1 = aTextLen) and
|
|
mormot.core.base.CompareMem(aText, aMatch.Pattern, aTextLen);
|
|
end;
|
|
|
|
function SearchNoPatternU(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
result := (aMatch.PMax + 1 = aTextLen) and
|
|
CompareMemU(aText, aMatch.Pattern, aTextLen, aMatch.Upper);
|
|
end;
|
|
|
|
function SearchContainsValid(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
result := true;
|
|
end;
|
|
|
|
function SearchContainsU(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
dec(aTextLen, aMatch.PMax);
|
|
if aTextLen > 0 then
|
|
result := SimpleContainsU(aText, aText + aTextLen,
|
|
aMatch.Pattern, aMatch.PMax, aMatch.Upper)
|
|
else
|
|
result := false;
|
|
end;
|
|
|
|
function SearchContains1(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
dec(aTextLen, aMatch.PMax);
|
|
if aTextLen > 0 then
|
|
result := SimpleContains1(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax)
|
|
else
|
|
result := false;
|
|
end;
|
|
|
|
function SearchContains4(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
dec(aTextLen, aMatch.PMax);
|
|
if aTextLen > 0 then
|
|
result := SimpleContains4(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax)
|
|
else
|
|
result := false;
|
|
end;
|
|
|
|
{$ifdef CPU64}
|
|
function SearchContains8(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
// optimized e.g. to search an IP address as '*12.34.56.78*' in logs
|
|
dec(aTextLen, aMatch.PMax);
|
|
if aTextLen > 0 then
|
|
result := SimpleContains8(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax)
|
|
else
|
|
result := false;
|
|
end;
|
|
{$endif CPU64}
|
|
|
|
function SearchStartWith(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
result := (aMatch.PMax < aTextLen) and
|
|
mormot.core.base.CompareMem(aText, aMatch.Pattern, aMatch.PMax + 1);
|
|
end;
|
|
|
|
function SearchStartWithU(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
result := (aMatch.PMax < aTextLen) and
|
|
CompareMemU(aText, aMatch.Pattern, aMatch.PMax + 1, aMatch.Upper);
|
|
end;
|
|
|
|
function SearchEndWith(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
dec(aTextLen, aMatch.PMax);
|
|
result := (aTextLen >= 0) and
|
|
mormot.core.base.CompareMem(aText + aTextLen, aMatch.Pattern, aMatch.PMax);
|
|
end;
|
|
|
|
function SearchEndWithU(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
dec(aTextLen, aMatch.PMax);
|
|
result := (aTextLen >= 0) and
|
|
CompareMemU(aText + aTextLen, aMatch.Pattern, aMatch.PMax, aMatch.Upper);
|
|
end;
|
|
|
|
procedure TMatch.Prepare(const aPattern: RawUtf8;
|
|
aCaseInsensitive, aReuse: boolean);
|
|
begin
|
|
Prepare(pointer(aPattern), length(aPattern), aCaseInsensitive, aReuse);
|
|
end;
|
|
|
|
procedure TMatch.Prepare(aPattern: PUtf8Char; aPatternLen: integer;
|
|
aCaseInsensitive, aReuse: boolean);
|
|
const
|
|
SPECIALS: PUtf8Char = '*?[';
|
|
begin
|
|
Pattern := aPattern;
|
|
pmax := aPatternLen - 1; // search in Pattern[0..PMax]
|
|
if Pattern = nil then
|
|
begin
|
|
Search := SearchVoid;
|
|
exit;
|
|
end;
|
|
if aCaseInsensitive and
|
|
not IsCaseSensitive(aPattern, aPatternLen) then
|
|
aCaseInsensitive := false; // don't slow down e.g. number or IP search
|
|
if aCaseInsensitive then
|
|
Upper := @NormToUpperAnsi7
|
|
else
|
|
Upper := @NormToNorm;
|
|
Search := nil;
|
|
if aReuse then
|
|
if strcspn(Pattern, SPECIALS) > pmax then
|
|
if aCaseInsensitive then
|
|
Search := SearchNoPatternU
|
|
else
|
|
Search := SearchNoPattern
|
|
else if pmax > 0 then
|
|
begin
|
|
if Pattern[pmax] = '*' then
|
|
begin
|
|
if strcspn(Pattern + 1, SPECIALS) = pmax - 1 then
|
|
case Pattern[0] of
|
|
'*':
|
|
begin
|
|
// *something*
|
|
inc(Pattern);
|
|
dec(pmax, 2); // trim trailing and ending *
|
|
if pmax < 0 then
|
|
Search := SearchContainsValid
|
|
else if aCaseInsensitive then
|
|
Search := SearchContainsU
|
|
{$ifdef CPU64}
|
|
else if pmax >= 7 then
|
|
Search := SearchContains8
|
|
{$endif CPU64}
|
|
else if pmax >= 3 then
|
|
Search := SearchContains4
|
|
else
|
|
Search := SearchContains1;
|
|
end;
|
|
'?':
|
|
// ?something*
|
|
if aCaseInsensitive then
|
|
Search := SearchNoRangeU
|
|
else
|
|
Search := SearchNoRange;
|
|
'[':
|
|
Search := SearchAny;
|
|
else
|
|
begin
|
|
dec(pmax); // trim trailing *
|
|
if aCaseInsensitive then
|
|
Search := SearchStartWithU
|
|
else
|
|
Search := SearchStartWith;
|
|
end;
|
|
end;
|
|
end
|
|
else if (Pattern[0] = '*') and
|
|
(strcspn(Pattern + 1, SPECIALS) >= pmax) then
|
|
begin
|
|
inc(Pattern); // jump leading *
|
|
if aCaseInsensitive then
|
|
Search := SearchEndWithU
|
|
else
|
|
Search := SearchEndWith;
|
|
end;
|
|
end
|
|
else if Pattern[0] in ['*', '?'] then
|
|
Search := SearchContainsValid;
|
|
if not Assigned(Search) then
|
|
begin
|
|
aPattern := PosChar(Pattern, '[');
|
|
if (aPattern = nil) or
|
|
(aPattern - Pattern > pmax) then
|
|
if aCaseInsensitive then
|
|
Search := SearchNoRangeU
|
|
else
|
|
Search := SearchNoRange
|
|
else
|
|
Search := SearchAny;
|
|
end;
|
|
end;
|
|
|
|
type
|
|
// Holub and Durian (2005) SBNDM2 algorithm
|
|
// see http://www.cri.haifa.ac.il/events/2005/string/presentations/Holub.pdf
|
|
TSBNDMQ2Mask = array[AnsiChar] of cardinal;
|
|
PSBNDMQ2Mask = ^TSBNDMQ2Mask;
|
|
|
|
function SearchSBNDMQ2ComputeMask(const Pattern: RawUtf8;
|
|
u: PNormTable): RawByteString;
|
|
var
|
|
i: PtrInt;
|
|
p: PAnsiChar absolute Pattern;
|
|
m: PSBNDMQ2Mask absolute result;
|
|
c: PCardinal;
|
|
begin
|
|
FastNewRawByteString(result, SizeOf(m^));
|
|
FillCharFast(m^, SizeOf(m^), 0);
|
|
for i := 0 to length(Pattern) - 1 do
|
|
begin
|
|
c := @m^[u[p[i]]]; // for FPC code generation
|
|
c^ := c^ or cardinal(1 shl i);
|
|
end;
|
|
end;
|
|
|
|
function SearchSBNDMQ2(aMatch: PMatch;
|
|
aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
var
|
|
mask: PSBNDMQ2Mask;
|
|
max, i, j: PtrInt;
|
|
state: cardinal;
|
|
begin
|
|
mask := pointer(aMatch^.Pattern); // was filled by SearchSBNDMQ2ComputeMask()
|
|
max := aMatch^.pmax;
|
|
i := max - 1;
|
|
dec(aTextLen);
|
|
if i < aTextLen then
|
|
begin
|
|
repeat
|
|
state := mask[aText[i + 1]] shr 1; // in two steps for better FPC codegen
|
|
state := state and mask[aText[i]];
|
|
if state = 0 then
|
|
begin
|
|
inc(i, max); // fast skip
|
|
if i >= aTextLen then
|
|
break;
|
|
continue;
|
|
end;
|
|
j := i - max;
|
|
repeat
|
|
dec(i);
|
|
if i < 0 then
|
|
break;
|
|
state := (state shr 1) and mask[aText[i]];
|
|
until state = 0;
|
|
if i = j then
|
|
begin
|
|
result := true;
|
|
exit;
|
|
end;
|
|
inc(i, max);
|
|
if i >= aTextLen then
|
|
break;
|
|
until false;
|
|
end;
|
|
result := false;
|
|
end;
|
|
|
|
function SearchSBNDMQ2U(aMatch: PMatch;
|
|
aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
var
|
|
u: PNormTable;
|
|
mask: PSBNDMQ2Mask;
|
|
max, i, j: PtrInt;
|
|
state: cardinal;
|
|
begin
|
|
mask := pointer(aMatch^.Pattern);
|
|
max := aMatch^.pmax;
|
|
u := aMatch^.Upper;
|
|
i := max - 1;
|
|
dec(aTextLen);
|
|
if i < aTextLen then
|
|
begin
|
|
repeat
|
|
state := mask[u[aText[i + 1]]] shr 1;
|
|
state := state and mask[u[aText[i]]];
|
|
if state = 0 then
|
|
begin
|
|
inc(i, max);
|
|
if i >= aTextLen then
|
|
break;
|
|
continue;
|
|
end;
|
|
j := i - max;
|
|
repeat
|
|
dec(i);
|
|
if i < 0 then
|
|
break;
|
|
state := (state shr 1) and mask[u[aText[i]]];
|
|
until state = 0;
|
|
if i = j then
|
|
begin
|
|
result := true;
|
|
exit;
|
|
end;
|
|
inc(i, max);
|
|
if i >= aTextLen then
|
|
break;
|
|
until false;
|
|
end;
|
|
result := false;
|
|
end;
|
|
|
|
procedure TMatch.PrepareContains(var aPattern: RawUtf8;
|
|
aCaseInsensitive: boolean);
|
|
begin
|
|
pmax := length(aPattern) - 1;
|
|
if aCaseInsensitive and
|
|
not IsCaseSensitive(pointer(aPattern), pmax + 1) then
|
|
aCaseInsensitive := false;
|
|
if aCaseInsensitive then
|
|
Upper := @NormToUpperAnsi7
|
|
else
|
|
Upper := @NormToNorm;
|
|
if pmax < 0 then
|
|
Search := SearchContainsValid
|
|
else if pmax > 30 then
|
|
if aCaseInsensitive then
|
|
Search := SearchContainsU
|
|
else
|
|
{$ifdef CPU64}
|
|
Search := SearchContains8
|
|
{$else}
|
|
Search := SearchContains4
|
|
{$endif CPU64}
|
|
else if pmax >= 1 then
|
|
begin
|
|
// PMax=[1..30] -> len=[2..31] -> aPattern becomes a SBNDMQ2 lookup table
|
|
aPattern := SearchSBNDMQ2ComputeMask(aPattern, Upper);
|
|
if aCaseInsensitive then
|
|
Search := SearchSBNDMQ2U
|
|
else
|
|
Search := SearchSBNDMQ2;
|
|
end
|
|
else if aCaseInsensitive then
|
|
Search := SearchContainsU
|
|
else
|
|
Search := SearchContains1; // todo: use ByteScanIndex() asm?
|
|
Pattern := pointer(aPattern);
|
|
end;
|
|
|
|
procedure TMatch.PrepareRaw(aPattern: PUtf8Char; aPatternLen: integer;
|
|
aSearch: TMatchSearchFunction);
|
|
begin
|
|
Pattern := aPattern;
|
|
pmax := aPatternLen - 1; // search in Pattern[0..PMax]
|
|
Search := aSearch;
|
|
end;
|
|
|
|
function TMatch.Match(const aText: RawUtf8): boolean;
|
|
begin
|
|
if pointer(aText) <> nil then
|
|
result := Search(@self,
|
|
pointer(aText), PStrLen(PAnsiChar(pointer(aText)) - _STRLEN)^)
|
|
else
|
|
result := pmax < 0;
|
|
end;
|
|
|
|
function TMatch.Match(aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
begin
|
|
if (aText <> nil) and
|
|
(aTextLen > 0) then
|
|
result := Search(@self, aText, aTextLen)
|
|
else
|
|
result := pmax < 0;
|
|
end;
|
|
|
|
function TMatch.MatchThreadSafe(const aText: RawUtf8): boolean;
|
|
var
|
|
local: TMatch; // thread-safe with no lock!
|
|
begin
|
|
local := self;
|
|
if aText <> '' then
|
|
result := local.Search(@local, pointer(aText), length(aText))
|
|
else
|
|
result := local.PMax < 0;
|
|
end;
|
|
|
|
function TMatch.MatchString(const aText: string): boolean;
|
|
var
|
|
local: TMatch; // thread-safe with no lock!
|
|
temp: TSynTempBuffer;
|
|
len: integer;
|
|
begin
|
|
if aText = '' then
|
|
begin
|
|
result := pmax < 0;
|
|
exit;
|
|
end;
|
|
len := length(aText);
|
|
temp.Init(len * 3);
|
|
{$ifdef UNICODE}
|
|
len := RawUnicodeToUtf8(temp.buf, temp.len + 1, pointer(aText), len, [ccfNoTrailingZero]);
|
|
{$else}
|
|
len := CurrentAnsiConvert.AnsiBufferToUtf8(temp.buf, pointer(aText), len) - temp.buf;
|
|
{$endif UNICODE}
|
|
local := self;
|
|
result := local.Search(@local, temp.buf, len);
|
|
temp.Done;
|
|
end;
|
|
|
|
function TMatch.Equals(const aAnother: TMatch): boolean;
|
|
begin
|
|
result := (pmax = TMatch(aAnother).pmax) and
|
|
(Upper = TMatch(aAnother).Upper) and
|
|
mormot.core.base.CompareMem(Pattern, TMatch(aAnother).Pattern, pmax + 1);
|
|
end;
|
|
|
|
function TMatch.PatternLength: integer;
|
|
begin
|
|
result := pmax + 1;
|
|
end;
|
|
|
|
function TMatch.PatternText: PUtf8Char;
|
|
begin
|
|
result := Pattern;
|
|
end;
|
|
|
|
function TMatch.CaseInsensitive: boolean;
|
|
begin
|
|
result := Upper = @NormToUpperAnsi7;
|
|
end;
|
|
|
|
|
|
{ TUriMatchName }
|
|
|
|
procedure TUriMatchName.ParsePath;
|
|
var
|
|
i: PtrInt;
|
|
begin
|
|
Name := Path;
|
|
i := Name.Len;
|
|
while i > 0 do // retrieve
|
|
begin
|
|
dec(i);
|
|
if Name.Text[i] <> '/' then
|
|
continue;
|
|
inc(i);
|
|
inc(Name.Text, i);
|
|
dec(Name.Len, i);
|
|
break;
|
|
end;
|
|
end;
|
|
|
|
|
|
{ TUriMatch }
|
|
|
|
procedure TUriMatch.DoInit(csv: PUtf8Char; caseinsensitive: boolean);
|
|
var
|
|
s: PUtf8Char;
|
|
m: ^TMatchDynArray;
|
|
begin
|
|
if csv <> nil then
|
|
repeat
|
|
m := @Names; // default 'file.ext' pattern
|
|
csv := GotoNextNotSpace(csv);
|
|
s := csv;
|
|
repeat
|
|
case csv^ of
|
|
#0,
|
|
',':
|
|
break;
|
|
'/':
|
|
m := @Paths; // is a 'path/to/file.ext' pattern
|
|
end;
|
|
inc(csv);
|
|
until false;
|
|
if csv <> s then
|
|
MatchNew(m^)^.Prepare(s, csv - s, caseinsensitive, true);
|
|
if csv^ = #0 then
|
|
break;
|
|
inc(csv);
|
|
until false;
|
|
end;
|
|
|
|
function TUriMatch.Check(const csv: RawUtf8;
|
|
const uri: TUriMatchName; caseinsensitive: boolean): boolean;
|
|
begin
|
|
if Init.TryLock then // thread-safe init once from supplied csv
|
|
DoInit(pointer(csv), caseinsensitive);
|
|
result := ((Names <> nil) and
|
|
MatchAny(pointer(Names), uri.Name.Text, uri.Name.Len)) or
|
|
((Paths <> nil) and
|
|
MatchAny(pointer(Paths), uri.Path.Text, uri.Path.Len));
|
|
end;
|
|
|
|
|
|
function IsMatch(const Pattern, Text: RawUtf8; CaseInsensitive: boolean): boolean;
|
|
var
|
|
match: TMatch;
|
|
begin
|
|
match.Prepare(pointer(Pattern), length(Pattern), CaseInsensitive, {reuse=}false);
|
|
result := match.Match(Text);
|
|
end;
|
|
|
|
function IsMatchString(const Pattern, Text: string;
|
|
CaseInsensitive: boolean): boolean;
|
|
var
|
|
match: TMatch;
|
|
pat, txt: RawUtf8;
|
|
begin
|
|
StringToUtf8(Pattern, pat); // local variable is mandatory for FPC
|
|
StringToUtf8(Text, txt);
|
|
match.Prepare(pat, CaseInsensitive, {reuse=}false);
|
|
result := match.Match(txt);
|
|
end;
|
|
|
|
function SetNextMatch(P: PUtf8Char; var Dest: TMatch;
|
|
CaseInsensitive, Reuse: boolean; CsvSep: AnsiChar): PUtf8Char;
|
|
begin
|
|
result := P;
|
|
repeat
|
|
while not (result^ in [#0, CsvSep]) do
|
|
inc(result);
|
|
if result <> P then
|
|
begin
|
|
Dest.Prepare(P, result - P, CaseInsensitive, Reuse);
|
|
if result^ = CsvSep then
|
|
inc(result); // go to next CSV
|
|
exit;
|
|
end;
|
|
until result^ = #0;
|
|
result := nil; // indicates Dest.Prepare() was not called
|
|
end;
|
|
|
|
function IsMatchs(CsvPattern, Text: PUtf8Char; TextLen: PtrInt;
|
|
CaseInsensitive: boolean; CsvSep: AnsiChar): boolean;
|
|
var
|
|
match: TMatch;
|
|
begin
|
|
result := (CsvPattern <> nil) and (TextLen > 0);
|
|
if not result then
|
|
exit;
|
|
repeat
|
|
CsvPattern := SetNextMatch(
|
|
CsvPattern, match, CaseInsensitive, {reuse=}false, CsvSep);
|
|
if CsvPattern = nil then
|
|
break;
|
|
if match.Search(@match, Text, TextLen) then
|
|
exit;
|
|
until CsvPattern^ = #0;
|
|
result := false;
|
|
end;
|
|
|
|
function IsMatchs(const CsvPattern, Text: RawUtf8; CaseInsensitive: boolean;
|
|
CsvSep: AnsiChar): boolean;
|
|
begin
|
|
result := IsMatchs(pointer(CsvPattern), pointer(Text), length(Text),
|
|
CaseInsensitive, CsvSep);
|
|
end;
|
|
|
|
function SetMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean;
|
|
out Match: TMatchDynArray; CsvSep: AnsiChar): integer;
|
|
var
|
|
P, S: PUtf8Char;
|
|
begin
|
|
P := pointer(CsvPattern);
|
|
if P <> nil then
|
|
repeat
|
|
S := P;
|
|
while not (P^ in [#0, CsvSep]) do
|
|
inc(P);
|
|
if P <> S then
|
|
MatchNew(Match)^.Prepare(S, P - S, CaseInsensitive, {reuse=}true);
|
|
if P^ = #0 then
|
|
break;
|
|
inc(P);
|
|
until false;
|
|
result := length(Match);
|
|
end;
|
|
|
|
function SetMatchs(CsvPattern: PUtf8Char; CaseInsensitive: boolean;
|
|
Match: PMatch; MatchMax: integer; CsvSep: AnsiChar): integer;
|
|
var
|
|
S: PUtf8Char;
|
|
begin
|
|
result := 0;
|
|
if (CsvPattern <> nil) and
|
|
(MatchMax >= 0) then
|
|
repeat
|
|
S := CsvPattern;
|
|
while not (CsvPattern^ in [#0, CsvSep]) do
|
|
inc(CsvPattern);
|
|
if CsvPattern <> S then
|
|
begin
|
|
Match^.Prepare(S, CsvPattern - S, CaseInsensitive, {reuse=}true);
|
|
inc(result);
|
|
if result > MatchMax then
|
|
break;
|
|
inc(Match);
|
|
end;
|
|
if CsvPattern^ = #0 then
|
|
break;
|
|
inc(CsvPattern);
|
|
until false;
|
|
end;
|
|
|
|
function MatchExists(const One: TMatch; const Several: TMatchDynArray): boolean;
|
|
var
|
|
i: PtrInt;
|
|
begin
|
|
result := true;
|
|
for i := 0 to length(Several) - 1 do
|
|
if Several[i].Equals(One) then
|
|
exit;
|
|
result := false;
|
|
end;
|
|
|
|
function MatchAdd(const One: TMatch; var Several: TMatchDynArray): boolean;
|
|
begin
|
|
result := not MatchExists(One, Several);
|
|
if result then
|
|
MatchNew(Several)^ := One;
|
|
end;
|
|
|
|
function MatchNew(var Several: TMatchDynArray): PMatch;
|
|
var
|
|
n: PtrInt;
|
|
begin
|
|
n := length(Several);
|
|
SetLength(Several, n + 1);
|
|
result := @Several[n];
|
|
end;
|
|
|
|
function MatchAny(const Match: TMatchDynArray; const Text: RawUtf8): boolean;
|
|
begin
|
|
result := MatchAny(pointer(Match), pointer(Text), length(Text));
|
|
end;
|
|
|
|
function MatchAny(Match: PMatch; Text: PUtf8Char; TextLen: PtrInt): boolean;
|
|
var
|
|
n: integer;
|
|
begin
|
|
result := true;
|
|
if Match = nil then
|
|
exit;
|
|
if TextLen <= 0 then
|
|
Text := nil;
|
|
n := PDALen(PAnsiChar(pointer(Match)) - _DALEN)^ + (_DAOFF - 1);
|
|
repeat
|
|
// inlined Match^.Match() to avoid internal error on Delphi
|
|
if Text <> nil then
|
|
begin
|
|
if Match^.Search(Match, Text, TextLen) then
|
|
exit;
|
|
end
|
|
else if Match^.pmax < 0 then
|
|
exit;
|
|
inc(Match);
|
|
dec(n);
|
|
until n = 0;
|
|
result := false;
|
|
end;
|
|
|
|
procedure FilterMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean;
|
|
var Values: TRawUtf8DynArray; CsvSep: AnsiChar);
|
|
var
|
|
match: TMatchDynArray;
|
|
m, n, i: PtrInt;
|
|
begin
|
|
if SetMatchs(CsvPattern, CaseInsensitive, match, CsvSep) = 0 then
|
|
exit;
|
|
n := 0;
|
|
for i := 0 to high(Values) do
|
|
for m := 0 to high(match) do
|
|
if match[m].Match(Values[i]) then
|
|
begin
|
|
if i <> n then
|
|
Values[n] := Values[i];
|
|
inc(n);
|
|
break;
|
|
end;
|
|
if n <> length(Values) then
|
|
SetLength(Values, n);
|
|
end;
|
|
|
|
procedure FilterMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean;
|
|
var Values: TStringDynArray; CsvSep: AnsiChar);
|
|
var
|
|
match: TMatchDynArray;
|
|
m, n, i: PtrInt;
|
|
begin
|
|
if SetMatchs(CsvPattern, CaseInsensitive, match, CsvSep) = 0 then
|
|
exit;
|
|
n := 0;
|
|
for i := 0 to high(Values) do
|
|
for m := 0 to high(match) do
|
|
if match[m].MatchString(Values[i]) then
|
|
begin
|
|
if i <> n then
|
|
Values[n] := Values[i];
|
|
inc(n);
|
|
break;
|
|
end;
|
|
if n <> length(Values) then
|
|
SetLength(Values, n);
|
|
end;
|
|
|
|
|
|
{ TMatchs }
|
|
|
|
constructor TMatchs.Create(const aPatterns: TRawUtf8DynArray; CaseInsensitive: boolean);
|
|
begin
|
|
inherited Create; // may have been overriden
|
|
Subscribe(aPatterns, CaseInsensitive);
|
|
end;
|
|
|
|
function TMatchs.Match(const aText: RawUtf8): integer;
|
|
begin
|
|
result := Match(pointer(aText), length(aText));
|
|
end;
|
|
|
|
function TMatchs.Match(aText: PUtf8Char; aLen: integer): integer;
|
|
var
|
|
one: ^TMatchStore;
|
|
local: TMatch; // thread-safe with no lock!
|
|
begin
|
|
if (self = nil) or
|
|
(fMatch = nil) then
|
|
result := -1 // no filter by name -> allow e.g. to process everything
|
|
else
|
|
begin
|
|
one := pointer(fMatch);
|
|
if aLen <> 0 then
|
|
begin
|
|
for result := 0 to fMatchCount - 1 do
|
|
begin
|
|
local := one^.Pattern;
|
|
if local.Search(@local, aText, aLen) then
|
|
exit;
|
|
inc(one);
|
|
end;
|
|
end
|
|
else
|
|
for result := 0 to fMatchCount - 1 do
|
|
if one^.Pattern.PMax < 0 then
|
|
exit
|
|
else
|
|
inc(one);
|
|
result := -2;
|
|
end;
|
|
end;
|
|
|
|
function TMatchs.MatchString(const aText: string): integer;
|
|
var
|
|
temp: TSynTempBuffer;
|
|
len: integer;
|
|
begin
|
|
len := StringToUtf8(aText, temp);
|
|
result := Match(temp.buf, len);
|
|
temp.Done;
|
|
end;
|
|
|
|
procedure TMatchs.Subscribe(const aPatternsCsv: RawUtf8; CaseInsensitive: boolean);
|
|
var
|
|
patterns: TRawUtf8DynArray;
|
|
begin
|
|
CsvToRawUtf8DynArray(pointer(aPatternsCsv), patterns);
|
|
Subscribe(patterns, CaseInsensitive);
|
|
end;
|
|
|
|
procedure TMatchs.Subscribe(const aPatterns: TRawUtf8DynArray; CaseInsensitive: boolean);
|
|
var
|
|
i, j, m, n: integer;
|
|
found: ^TMatchStore;
|
|
pat: PRawUtf8;
|
|
begin
|
|
m := length(aPatterns);
|
|
if m = 0 then
|
|
exit;
|
|
n := fMatchCount;
|
|
SetLength(fMatch, n + m);
|
|
pat := pointer(aPatterns);
|
|
for i := 1 to m do
|
|
begin
|
|
found := pointer(fMatch);
|
|
for j := 1 to n do
|
|
if StrComp(pointer(found^.PatternInstance), pointer(pat^)) = 0 then
|
|
begin
|
|
found := nil;
|
|
break;
|
|
end
|
|
else
|
|
inc(found);
|
|
if found <> nil then
|
|
with fMatch[n] do
|
|
begin
|
|
PatternInstance := pat^; // avoid GPF if aPatterns[] is released
|
|
Pattern.Prepare(PatternInstance, CaseInsensitive, {reuse=}true);
|
|
inc(n);
|
|
end;
|
|
inc(pat);
|
|
end;
|
|
fMatchCount := n;
|
|
if n <> length(fMatch) then
|
|
SetLength(fMatch, n);
|
|
end;
|
|
|
|
|
|
|
|
procedure SoundExComputeAnsi(var p: PAnsiChar; var result: cardinal;
|
|
Values: PSoundExValues);
|
|
var
|
|
n, v, old: PtrUInt;
|
|
begin
|
|
n := 0;
|
|
old := 0;
|
|
if Values <> nil then
|
|
repeat
|
|
v := NormToUpperByte[ord(p^)]; // also handle 8-bit WinAnsi (1252 accents)
|
|
if not (tcWord in TEXT_BYTES[v]) then
|
|
break;
|
|
inc(p);
|
|
dec(v, ord('B'));
|
|
if v > high(TSoundExValues) then
|
|
continue;
|
|
v := Values[v]; // get soundex value
|
|
if (v = 0) or
|
|
(v = old) then
|
|
continue; // invalid or dopple value
|
|
old := v;
|
|
result := result shl SOUNDEX_BITS;
|
|
inc(result, v);
|
|
inc(n);
|
|
if n = ((32 - 8) div SOUNDEX_BITS) then // first char use up to 8-bit
|
|
break; // result up to a cardinal size
|
|
until false;
|
|
end;
|
|
|
|
function SoundExComputeFirstCharAnsi(var p: PAnsiChar): cardinal;
|
|
label
|
|
err;
|
|
begin
|
|
if p = nil then
|
|
begin
|
|
err:result := 0;
|
|
exit;
|
|
end;
|
|
repeat
|
|
result := NormToUpperByte[ord(p^)]; // also handle 8-bit WinAnsi (CP 1252)
|
|
if result = 0 then
|
|
goto err; // end of input text, without a word
|
|
inc(p);
|
|
// trim initial spaces or 'H'
|
|
until AnsiChar(result) in ['A'..'G', 'I'..'Z'];
|
|
end;
|
|
|
|
procedure SoundExComputeUtf8(var U: PUtf8Char; var result: cardinal; Values: PSoundExValues);
|
|
var
|
|
n, v, old: cardinal;
|
|
begin
|
|
n := 0;
|
|
old := 0;
|
|
if Values <> nil then
|
|
repeat
|
|
v := GetNextUtf8Upper(U);
|
|
if not (tcWord in TEXT_BYTES[v]) then
|
|
break;
|
|
dec(v, ord('B'));
|
|
if v > high(TSoundExValues) then
|
|
continue;
|
|
v := Values[v]; // get soundex value
|
|
if (v = 0) or
|
|
(v = old) then
|
|
continue; // invalid or dopple value
|
|
old := v;
|
|
result := result shl SOUNDEX_BITS;
|
|
inc(result, v);
|
|
inc(n);
|
|
if n = ((32 - 8) div SOUNDEX_BITS) then // first char use up to 8-bit
|
|
break; // result up to a cardinal size
|
|
until false;
|
|
end;
|
|
|
|
function SoundExComputeFirstCharUtf8(var U: PUtf8Char): cardinal;
|
|
label
|
|
err;
|
|
begin
|
|
if U = nil then
|
|
begin
|
|
err:result := 0;
|
|
exit;
|
|
end;
|
|
repeat
|
|
result := GetNextUtf8Upper(U);
|
|
if result = 0 then
|
|
goto err; // end of input text, without a word
|
|
// trim initial spaces or 'H'
|
|
until AnsiChar(result) in ['A'..'G', 'I'..'Z'];
|
|
end;
|
|
|
|
|
|
{ TSynSoundEx }
|
|
|
|
const
|
|
/// english Soundex pronunciation scores
|
|
// - defines the default values used for the SoundEx() function below
|
|
// (used if Values parameter is nil)
|
|
ValueEnglish: TSoundExValues =
|
|
// B C D E F G H I J K L M N O P Q R S T U V W X Y Z
|
|
(1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2);
|
|
|
|
/// french Soundex pronunciation scores
|
|
// - can be used to override default values used for the SoundEx()
|
|
// function below
|
|
ValueFrench: TSoundExValues =
|
|
// B C D E F G H I J K L M N O P Q R S T U V W X Y Z
|
|
(1, 2, 3, 0, 9, 7, 0, 0, 7, 2, 4, 5, 5, 0, 1, 2, 6, 8, 3, 0, 9, 0, 8, 0, 8);
|
|
|
|
/// spanish Soundex pronunciation scores
|
|
// - can be used to override default values used for the SoundEx()
|
|
// function below
|
|
ValueSpanish: TSoundExValues =
|
|
// B C D E F G H I J K L M N O P Q R S T U V W X Y Z
|
|
(1, 2, 3, 0, 1, 2, 0, 0, 0, 2, 0, 5, 5, 0, 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2);
|
|
|
|
SOUNDEXVALUES: array[TSynSoundExPronunciation] of PSoundExValues = (
|
|
@ValueEnglish,
|
|
@ValueFrench,
|
|
@ValueSpanish,
|
|
@ValueEnglish);
|
|
|
|
|
|
function TSynSoundEx.Ansi(A: PAnsiChar): boolean;
|
|
var
|
|
Value, c: cardinal;
|
|
begin
|
|
result := false;
|
|
if A = nil then
|
|
exit;
|
|
repeat
|
|
// test beginning of word
|
|
c := SoundExComputeFirstCharAnsi(A);
|
|
if c = 0 then
|
|
exit
|
|
else if c = FirstChar then
|
|
begin
|
|
// here we had the first char match -> check if word match UpperValue
|
|
Value := c - (ord('A') - 1);
|
|
SoundExComputeAnsi(A, Value, fValues);
|
|
if Value = search then
|
|
begin
|
|
result := true; // UpperValue found!
|
|
exit;
|
|
end;
|
|
end
|
|
else
|
|
repeat
|
|
if A^ = #0 then
|
|
exit
|
|
else if not (tcWord in TEXT_CHARS[NormToUpper[A^]]) then
|
|
break
|
|
else
|
|
inc(A);
|
|
until false;
|
|
// find beginning of next word
|
|
repeat
|
|
if A^ = #0 then
|
|
exit
|
|
else if tcWord in TEXT_CHARS[NormToUpper[A^]] then
|
|
break
|
|
else
|
|
inc(A);
|
|
until false;
|
|
until false;
|
|
end;
|
|
|
|
function TSynSoundEx.Utf8(U: PUtf8Char): boolean;
|
|
var
|
|
Value, c: cardinal;
|
|
V: PUtf8Char;
|
|
begin
|
|
result := false;
|
|
if U = nil then
|
|
exit;
|
|
repeat
|
|
// find beginning of word
|
|
c := SoundExComputeFirstCharUtf8(U);
|
|
if c = 0 then
|
|
exit
|
|
else if c = FirstChar then
|
|
begin
|
|
// here we had the first char match -> check if word match UpperValue
|
|
Value := c - (ord('A') - 1);
|
|
SoundExComputeUtf8(U, Value, fValues);
|
|
if Value = search then
|
|
begin
|
|
result := true; // UpperValue found!
|
|
exit;
|
|
end;
|
|
end
|
|
else
|
|
repeat
|
|
c := GetNextUtf8Upper(U);
|
|
if c = 0 then
|
|
exit;
|
|
until not (tcWord in TEXT_BYTES[c]);
|
|
// find beginning of next word
|
|
repeat
|
|
if U = nil then
|
|
exit;
|
|
V := U;
|
|
c := GetNextUtf8Upper(U);
|
|
if c = 0 then
|
|
exit;
|
|
until tcWord in TEXT_BYTES[c];
|
|
U := V;
|
|
until U = nil;
|
|
end;
|
|
|
|
function TSynSoundEx.Prepare(UpperValue: PAnsiChar; Lang: PSoundExValues): boolean;
|
|
begin
|
|
fValues := Lang;
|
|
Search := SoundExAnsi(UpperValue, nil, Lang);
|
|
if Search = 0 then
|
|
result := false
|
|
else
|
|
begin
|
|
FirstChar := SoundExComputeFirstCharAnsi(UpperValue);
|
|
result := true;
|
|
end;
|
|
end;
|
|
|
|
function TSynSoundEx.Prepare(UpperValue: PAnsiChar;
|
|
Lang: TSynSoundExPronunciation): boolean;
|
|
begin
|
|
result := Prepare(UpperValue, SOUNDEXVALUES[Lang]);
|
|
end;
|
|
|
|
function SoundExAnsi(A: PAnsiChar; next: PPAnsiChar; Lang: PSoundExValues): cardinal;
|
|
begin
|
|
result := SoundExComputeFirstCharAnsi(A);
|
|
if result <> 0 then
|
|
begin
|
|
dec(result, ord('A') - 1); // first Soundex char is first char
|
|
SoundExComputeAnsi(A, result, Lang);
|
|
end;
|
|
if next <> nil then
|
|
begin
|
|
while tcWord in TEXT_CHARS[NormToUpper[A^]] do
|
|
inc(A); // go to end of word
|
|
next^ := A;
|
|
end;
|
|
end;
|
|
|
|
function SoundExAnsi(A: PAnsiChar; next: PPAnsiChar;
|
|
Lang: TSynSoundExPronunciation): cardinal;
|
|
begin
|
|
result := SoundExAnsi(A, next, SOUNDEXVALUES[Lang]);
|
|
end;
|
|
|
|
function SoundExUtf8(U: PUtf8Char; next: PPUtf8Char;
|
|
Lang: TSynSoundExPronunciation): cardinal;
|
|
begin
|
|
result := SoundExComputeFirstCharUtf8(U);
|
|
if result <> 0 then
|
|
begin
|
|
dec(result, ord('A') - 1); // first Soundex char is first char
|
|
SoundExComputeUtf8(U, result, SOUNDEXVALUES[Lang]);
|
|
end;
|
|
if next <> nil then
|
|
next^ := FindNextUtf8WordBegin(U);
|
|
end;
|
|
|
|
|
|
{ ****************** Efficient CSV Parsing using RTTI }
|
|
|
|
function TDynArrayLoadCsv(var Value: TDynArray; Csv: PUtf8Char;
|
|
Intern: TRawUtf8Interning): boolean;
|
|
var
|
|
rt: TRttiCustom;
|
|
pr: PRttiCustomProp;
|
|
p, v: PUtf8Char;
|
|
s: RawUtf8;
|
|
mapcount, mapped: PtrInt;
|
|
rec: PAnsiChar;
|
|
map: PRttiCustomPropDynArray;
|
|
m: ^PRttiCustomProp;
|
|
extcount, mcount: integer;
|
|
ext: PInteger;
|
|
begin
|
|
result := false;
|
|
rt := Value.Info.ArrayRtti;
|
|
if (rt = nil) or
|
|
(rt.Parser <> ptRecord) or
|
|
(rt.Props.Count = 0) then
|
|
exit;
|
|
// parse the CSV headers
|
|
mapped := 0;
|
|
mapcount := 0;
|
|
SetLength(map, 32);
|
|
p := pointer(GetNextLine(Csv, Csv));
|
|
if Csv = nil then
|
|
exit; // no data
|
|
while p <> nil do
|
|
begin
|
|
GetNextItem(p, ',', '"', s);
|
|
if s = '' then
|
|
exit; // we don't support void headers
|
|
if mapcount = length(map) then
|
|
SetLength(map, NextGrow(mapcount));
|
|
pr := rt.Props.Find(s);
|
|
if pr <> nil then
|
|
begin
|
|
map[mapcount] := pr; // found a matching field
|
|
inc(mapped);
|
|
end;
|
|
inc(mapcount);
|
|
end;
|
|
if mapped = 0 then
|
|
exit; // no field matching any header
|
|
// parse the value rows
|
|
extcount := 0;
|
|
ext := Value.CountExternal;
|
|
if ext = nil then
|
|
Value.UseExternalCount(@extcount); // faster Value.NewPtr
|
|
v := Csv;
|
|
while v^ in [#10, #13] do
|
|
inc(v);
|
|
while v^ <> #0 do
|
|
begin
|
|
rec := Value.NewPtr;
|
|
m := pointer(map);
|
|
mcount := mapcount;
|
|
repeat
|
|
// parse next value
|
|
Csv := v;
|
|
if v^ = '"' then
|
|
v := GotoEndOfQuotedString(v); // special handling of double quotes
|
|
while (v^ <> ',') and
|
|
(v^ > #13) do
|
|
inc(v);
|
|
if mcount <> 0 then
|
|
begin
|
|
if m^ <> nil then // not matching fields are just ignored
|
|
begin
|
|
if Csv^ = '"' then
|
|
begin
|
|
UnQuoteSqlStringVar(Csv, s);
|
|
if Intern <> nil then
|
|
Intern.UniqueText(s);
|
|
end
|
|
else
|
|
Intern.Unique(s, Csv, v - Csv);
|
|
m^.Value.ValueSetText(rec + m^.OffsetSet, s);
|
|
end;
|
|
inc(m);
|
|
dec(mcount);
|
|
end;
|
|
if v^ <> ',' then
|
|
break;
|
|
inc(v);
|
|
until v^ in [#0, #10, #13];
|
|
// go to next row
|
|
while v^ in [#10, #13] do
|
|
inc(v);
|
|
end;
|
|
if Value.Count = 0 then
|
|
Value.Capacity := 0
|
|
else
|
|
DynArrayFakeLength(Value.Value^, Value.Count);
|
|
Value.UseExternalCount(ext); // restore fCountP if local n count was used
|
|
result := true;
|
|
end;
|
|
|
|
function DynArrayLoadCsv(var Value; const Csv: RawUtf8; TypeInfo: PRttiInfo;
|
|
Intern: TRawUtf8Interning): boolean;
|
|
var
|
|
da: TDynArray;
|
|
begin
|
|
da.Init(TypeInfo, Value);
|
|
result := TDynArrayLoadCsv(da, pointer(CSV), Intern);
|
|
end;
|
|
|
|
|
|
{ ****************** Versatile Expression Search Engine }
|
|
|
|
|
|
function ToText(r: TExprParserResult): PShortString;
|
|
begin
|
|
result := GetEnumName(TypeInfo(TExprParserResult), ord(r));
|
|
end;
|
|
|
|
function ToUtf8(r: TExprParserResult): RawUtf8;
|
|
begin
|
|
result := UnCamelCase(TrimLeftLowerCaseShort(ToText(r)));
|
|
end;
|
|
|
|
|
|
{ TExprNode }
|
|
|
|
function TExprNode.Append(node: TExprNode): boolean;
|
|
begin
|
|
result := node <> nil;
|
|
if result then
|
|
Last.fNext := node;
|
|
end;
|
|
|
|
constructor TExprNode.Create(nodeType: TExprNodeType);
|
|
begin
|
|
inherited Create; // may have been overriden
|
|
fNodeType := nodeType;
|
|
end;
|
|
|
|
destructor TExprNode.Destroy;
|
|
begin
|
|
fNext.Free;
|
|
inherited Destroy;
|
|
end;
|
|
|
|
function TExprNode.Last: TExprNode;
|
|
begin
|
|
result := self;
|
|
while result.Next <> nil do
|
|
result := result.Next;
|
|
end;
|
|
|
|
|
|
{ TParserAbstract }
|
|
|
|
constructor TParserAbstract.Create;
|
|
begin
|
|
inherited Create; // may have been overriden
|
|
Initialize;
|
|
end;
|
|
|
|
destructor TParserAbstract.Destroy;
|
|
begin
|
|
Clear;
|
|
inherited Destroy;
|
|
end;
|
|
|
|
procedure TParserAbstract.Clear;
|
|
begin
|
|
fWordCount := 0;
|
|
fWords := nil;
|
|
fExpression := '';
|
|
FreeAndNil(fFirstNode);
|
|
end;
|
|
|
|
function TParserAbstract.ParseExpr: TExprNode;
|
|
begin
|
|
result := ParseFactor;
|
|
ParseNextCurrentWord;
|
|
if (fCurrentWord = '') or
|
|
(fCurrentWord = ')') then
|
|
exit;
|
|
if PropNameEquals(fCurrentWord, fAndWord) then
|
|
begin
|
|
// w1 & w2 = w1 AND w2
|
|
ParseNextCurrentWord;
|
|
if result.Append(ParseExpr) then
|
|
result.Append(TExprNode.Create(entAnd));
|
|
exit;
|
|
end
|
|
else if PropNameEquals(fCurrentWord, fOrWord) then
|
|
begin
|
|
// w1 + w2 = w1 OR w2
|
|
ParseNextCurrentWord;
|
|
if result.Append(ParseExpr) then
|
|
result.Append(TExprNode.Create(entOr));
|
|
exit;
|
|
end
|
|
else if fNoWordIsAnd and result.Append(ParseExpr) then
|
|
// 'w1 w2' = 'w1 & w2'
|
|
result.Append(TExprNode.Create(entAnd));
|
|
end;
|
|
|
|
function TParserAbstract.ParseFactor: TExprNode;
|
|
begin
|
|
if fCurrentError <> eprSuccess then
|
|
result := nil
|
|
else if PropNameEquals(fCurrentWord, fNotWord) then
|
|
begin
|
|
ParseNextCurrentWord;
|
|
result := ParseFactor;
|
|
if fCurrentError <> eprSuccess then
|
|
exit;
|
|
result.Append(TExprNode.Create(entNot));
|
|
end
|
|
else
|
|
result := ParseTerm;
|
|
end;
|
|
|
|
function TParserAbstract.ParseTerm: TExprNode;
|
|
begin
|
|
result := nil;
|
|
if fCurrentError <> eprSuccess then
|
|
exit;
|
|
if fCurrentWord = '(' then
|
|
begin
|
|
ParseNextCurrentWord;
|
|
result := ParseExpr;
|
|
if fCurrentError <> eprSuccess then
|
|
exit;
|
|
if fCurrentWord <> ')' then
|
|
begin
|
|
FreeAndNil(result);
|
|
fCurrentError := eprMissingParenthesis;
|
|
end;
|
|
end
|
|
else if fCurrentWord = '' then
|
|
begin
|
|
result := nil;
|
|
fCurrentError := eprMissingFinalWord;
|
|
end
|
|
else
|
|
try // calls meta-class overriden constructor
|
|
result := fWordClass.Create(self, fCurrentWord);
|
|
fCurrentError := TExprNodeWordAbstract(result).ParseWord;
|
|
if fCurrentError <> eprSuccess then
|
|
begin
|
|
FreeAndNil(result);
|
|
exit;
|
|
end;
|
|
SetLength(fWords, fWordCount + 1);
|
|
fWords[fWordCount] := TExprNodeWordAbstract(result);
|
|
inc(fWordCount);
|
|
except
|
|
FreeAndNil(result);
|
|
fCurrentError := eprInvalidExpression;
|
|
end;
|
|
end;
|
|
|
|
function TParserAbstract.Parse(const aExpression: RawUtf8): TExprParserResult;
|
|
var
|
|
depth: integer;
|
|
n: TExprNode;
|
|
begin
|
|
Clear;
|
|
fCurrentError := eprSuccess;
|
|
fCurrent := pointer(aExpression);
|
|
ParseNextCurrentWord;
|
|
if fCurrentWord = '' then
|
|
begin
|
|
result := eprNoExpression;
|
|
exit;
|
|
end;
|
|
fFirstNode := ParseExpr;
|
|
result := fCurrentError;
|
|
if result = eprSuccess then
|
|
begin
|
|
depth := 0;
|
|
n := fFirstNode;
|
|
while n <> nil do
|
|
begin
|
|
case n.NodeType of
|
|
entWord:
|
|
begin
|
|
inc(depth);
|
|
if depth > high(fFoundStack) then
|
|
begin
|
|
result := eprTooManyParenthesis;
|
|
break;
|
|
end;
|
|
end;
|
|
entOr, entAnd:
|
|
dec(depth);
|
|
end;
|
|
n := n.Next;
|
|
end;
|
|
end;
|
|
if result = eprSuccess then
|
|
fExpression := aExpression
|
|
else
|
|
Clear;
|
|
fCurrent := nil;
|
|
end;
|
|
|
|
class function TParserAbstract.ParseError(const aExpression: RawUtf8): RawUtf8;
|
|
var
|
|
parser: TParserAbstract;
|
|
res: TExprParserResult;
|
|
begin
|
|
parser := Create;
|
|
try
|
|
res := parser.Parse(aExpression);
|
|
if res = eprSuccess then
|
|
result := ''
|
|
else
|
|
result := ToUtf8(res);
|
|
finally
|
|
parser.Free;
|
|
end;
|
|
end;
|
|
|
|
function TParserAbstract.Execute: boolean;
|
|
var
|
|
n: TExprNode;
|
|
st: PBoolean;
|
|
begin
|
|
// code below compiles very efficiently on FPC/x86-64
|
|
st := @fFoundStack;
|
|
n := fFirstNode;
|
|
repeat
|
|
case n.NodeType of
|
|
entWord:
|
|
begin
|
|
st^ := TExprNodeWordAbstract(n).fFound;
|
|
inc(st); // see eprTooManyParenthesis above to avoid buffer overflow
|
|
end;
|
|
entNot:
|
|
PAnsiChar(st)[-1] := AnsiChar(ord(PAnsiChar(st)[-1]) xor 1);
|
|
entOr:
|
|
begin
|
|
dec(st);
|
|
PAnsiChar(st)[-1] := AnsiChar(st^ or boolean(PAnsiChar(st)[-1]));
|
|
end; { TODO : optimize TExprParser OR when left member is already TRUE }
|
|
entAnd:
|
|
begin
|
|
dec(st);
|
|
PAnsiChar(st)[-1] := AnsiChar(st^ and boolean(PAnsiChar(st)[-1]));
|
|
end;
|
|
end;
|
|
n := n.Next;
|
|
until n = nil;
|
|
result := boolean(PAnsiChar(st)[-1]);
|
|
end;
|
|
|
|
|
|
{ TExprParserAbstract }
|
|
|
|
procedure TExprParserAbstract.Initialize;
|
|
begin
|
|
fAndWord := '&';
|
|
fOrWord := '+';
|
|
fNotWord := '-';
|
|
fNoWordIsAnd := true;
|
|
end;
|
|
|
|
procedure TExprParserAbstract.ParseNextCurrentWord;
|
|
var
|
|
P: PUtf8Char;
|
|
begin
|
|
fCurrentWord := '';
|
|
P := fCurrent;
|
|
if P = nil then
|
|
exit;
|
|
while P^ in [#1..' '] do
|
|
inc(P);
|
|
if P^ = #0 then
|
|
exit;
|
|
if P^ in PARSER_STOPCHAR then
|
|
begin
|
|
FastSetString(fCurrentWord, P, 1);
|
|
fCurrent := P + 1;
|
|
end
|
|
else
|
|
begin
|
|
fCurrent := P;
|
|
ParseNextWord;
|
|
end;
|
|
end;
|
|
|
|
procedure TExprParserAbstract.ParseNextWord;
|
|
const
|
|
STOPCHAR = PARSER_STOPCHAR + [#0, ' '];
|
|
var
|
|
P: PUtf8Char;
|
|
begin
|
|
P := fCurrent;
|
|
while not (P^ in STOPCHAR) do
|
|
inc(P);
|
|
FastSetString(fCurrentWord, fCurrent, P - fCurrent);
|
|
fCurrent := P;
|
|
end;
|
|
|
|
|
|
{ TExprNodeWordAbstract }
|
|
|
|
constructor TExprNodeWordAbstract.Create(aOwner: TParserAbstract;
|
|
const aWord: RawUtf8);
|
|
begin
|
|
inherited Create(entWord);
|
|
fWord := aWord;
|
|
fOwner := aOwner;
|
|
end;
|
|
|
|
|
|
{ TExprParserMatchNode }
|
|
|
|
type
|
|
TExprParserMatchNode = class(TExprNodeWordAbstract)
|
|
protected
|
|
fMatch: TMatch;
|
|
function ParseWord: TExprParserResult; override;
|
|
end;
|
|
|
|
function TExprParserMatchNode.ParseWord: TExprParserResult;
|
|
begin
|
|
fMatch.Prepare(fWord, (fOwner as TExprParserMatch).fCaseSensitive, {reuse=}true);
|
|
result := eprSuccess;
|
|
end;
|
|
|
|
|
|
{ TExprParserMatch }
|
|
|
|
var
|
|
// equals 1 for ['0'..'9', 'A'..'Z', 'a'..'z', #$80..#$ff]
|
|
ROUGH_UTF8: TAnsiCharToByte;
|
|
|
|
constructor TExprParserMatch.Create(aCaseSensitive: boolean);
|
|
var
|
|
c: AnsiChar;
|
|
begin
|
|
inherited Create;
|
|
if ROUGH_UTF8['0'] = 0 then // ensure is initialized for Search()
|
|
for c := low(c) to high(c) do
|
|
if c in ['0'..'9', 'A'..'Z', 'a'..'z', #$80..#$ff] then
|
|
ROUGH_UTF8[c] := 1;
|
|
fCaseSensitive := aCaseSensitive;
|
|
end;
|
|
|
|
procedure TExprParserMatch.Initialize;
|
|
begin
|
|
inherited Initialize;
|
|
fWordClass := TExprParserMatchNode;
|
|
end;
|
|
|
|
function TExprParserMatch.Search(const aText: RawUtf8): boolean;
|
|
begin
|
|
result := Search(pointer(aText), length(aText));
|
|
end;
|
|
|
|
function TExprParserMatch.Search(aText: PUtf8Char; aTextLen: PtrInt): boolean;
|
|
var
|
|
P, PEnd: PUtf8Char;
|
|
n: PtrInt;
|
|
tab: PAnsiCharToByte;
|
|
begin
|
|
P := aText;
|
|
if (P = nil) or
|
|
(fWords = nil) then
|
|
begin
|
|
result := false;
|
|
exit;
|
|
end;
|
|
// reset any previous resultset
|
|
if fMatchedLastSet > 0 then
|
|
begin
|
|
n := fWordCount;
|
|
repeat
|
|
dec(n);
|
|
fWords[n].fFound := false;
|
|
until n = 0;
|
|
fMatchedLastSet := 0;
|
|
end;
|
|
PEnd := P + aTextLen;
|
|
while (P < PEnd) and
|
|
(fMatchedLastSet < fWordCount) do
|
|
begin
|
|
// recognize next word boudaries
|
|
tab := @ROUGH_UTF8;
|
|
while tab[P^] = 0 do
|
|
begin
|
|
inc(P);
|
|
if P = PEnd then
|
|
break;
|
|
end;
|
|
if P = PEnd then
|
|
break;
|
|
aText := P;
|
|
repeat
|
|
inc(P);
|
|
until (P = PEnd) or
|
|
(tab[P^] = 0);
|
|
// apply the expression nodes to this word
|
|
aTextLen := P - aText; // now aText/aTextLen point to a word
|
|
n := fWordCount;
|
|
repeat
|
|
dec(n);
|
|
with TExprParserMatchNode(fWords[n]) do
|
|
if not fFound and
|
|
fMatch.Match(aText, aTextLen) then
|
|
begin
|
|
fFound := true;
|
|
inc(fMatchedLastSet);
|
|
end;
|
|
until n = 0;
|
|
end;
|
|
result := Execute;
|
|
end;
|
|
|
|
|
|
|
|
{ ****************** Bloom Filter Probabilistic Index }
|
|
|
|
{ TSynBloomFilter }
|
|
|
|
const
|
|
BLOOM_VERSION = 0;
|
|
BLOOM_MAXHASH = 32; // only 7 is needed for 1% false positive ratio
|
|
|
|
constructor TSynBloomFilter.Create;
|
|
begin
|
|
fHasher := @crc32c; // default/standard/mORMot1 hash function
|
|
end;
|
|
|
|
constructor TSynBloomFilter.Create(aSize: integer;
|
|
aFalsePositivePercent: double; aHasher: THasher);
|
|
const
|
|
LN2 = 0.69314718056;
|
|
begin
|
|
Create; // set fHasher := crc32c + may have been overriden
|
|
if aSize < 0 then
|
|
fSize := 1000
|
|
else
|
|
fSize := aSize;
|
|
if aFalsePositivePercent <= 0 then
|
|
fFalsePositivePercent := 1
|
|
else if aFalsePositivePercent > 100 then
|
|
fFalsePositivePercent := 100
|
|
else
|
|
fFalsePositivePercent := aFalsePositivePercent;
|
|
if @aHasher <> nil then
|
|
fHasher := aHasher;
|
|
// see http://stackoverflow.com/a/22467497
|
|
fBits := Round(-ln(fFalsePositivePercent / 100) * aSize / (LN2 * LN2));
|
|
fHashFunctions := Round(fBits / fSize * LN2);
|
|
if fHashFunctions = 0 then
|
|
fHashFunctions := 1
|
|
else if fHashFunctions > BLOOM_MAXHASH then
|
|
fHashFunctions := BLOOM_MAXHASH;
|
|
Reset;
|
|
end;
|
|
|
|
constructor TSynBloomFilter.Create(const aSaved: RawByteString;
|
|
aMagic: cardinal; aHasher: THasher);
|
|
begin
|
|
Create; // set fHasher := crc32c + may have been overriden
|
|
if @aHasher <> nil then
|
|
fHasher := aHasher;
|
|
if not LoadFrom(aSaved, aMagic) then // will load fSize+fBits+fHashFunctions
|
|
raise ESynException.CreateUtf8('%.Create with invalid aSaved content', [self]);
|
|
end;
|
|
|
|
procedure TSynBloomFilter.Insert(const aValue: RawByteString);
|
|
begin
|
|
Insert(pointer(aValue), length(aValue));
|
|
end;
|
|
|
|
procedure TSynBloomFilter.Insert(aValue: pointer; aValueLen: integer);
|
|
var
|
|
h: integer;
|
|
h1, h2: cardinal; // https://goo.gl/Pls5wi
|
|
begin
|
|
if (self = nil) or
|
|
(aValueLen <= 0) or
|
|
(fBits = 0) then
|
|
exit;
|
|
h1 := fHasher(0, aValue, aValueLen);
|
|
if fHashFunctions = 1 then
|
|
h2 := 0
|
|
else
|
|
h2 := fHasher(h1, aValue, aValueLen);
|
|
fSafe.WriteLock;
|
|
try
|
|
for h := 0 to fHashFunctions - 1 do
|
|
begin
|
|
SetBitPtr(pointer(fStore), h1 mod fBits);
|
|
inc(h1, h2);
|
|
end;
|
|
inc(fInserted);
|
|
finally
|
|
fSafe.WriteUnLock;
|
|
end;
|
|
end;
|
|
|
|
function TSynBloomFilter.MayExist(const aValue: RawByteString): boolean;
|
|
begin
|
|
result := MayExist(pointer(aValue), length(aValue));
|
|
end;
|
|
|
|
function TSynBloomFilter.MayExist(aValue: pointer; aValueLen: integer): boolean;
|
|
var
|
|
h: integer;
|
|
h1, h2: cardinal; // https://goo.gl/Pls5wi
|
|
begin
|
|
result := false;
|
|
if (self = nil) or
|
|
(aValueLen <= 0) or
|
|
(fBits = 0) then
|
|
exit;
|
|
h1 := fHasher(0, aValue, aValueLen);
|
|
if fHashFunctions = 1 then
|
|
h2 := 0
|
|
else
|
|
h2 := fHasher(h1, aValue, aValueLen);
|
|
fSafe.ReadOnlyLock; // allow concurrent reads
|
|
try
|
|
for h := 0 to fHashFunctions - 1 do
|
|
if GetBitPtr(pointer(fStore), h1 mod fBits) then
|
|
inc(h1, h2)
|
|
else
|
|
exit;
|
|
finally
|
|
fSafe.ReadOnlyUnLock;
|
|
end;
|
|
result := true;
|
|
end;
|
|
|
|
procedure TSynBloomFilter.Reset;
|
|
begin
|
|
fSafe.WriteLock;
|
|
try
|
|
if fStore = '' then
|
|
SetLength(fStore, (fBits shr 3) + 1);
|
|
FillcharFast(pointer(fStore)^, length(fStore), 0);
|
|
fInserted := 0;
|
|
finally
|
|
fSafe.WriteUnLock;
|
|
end;
|
|
end;
|
|
|
|
function TSynBloomFilter.SaveTo(aMagic: cardinal): RawByteString;
|
|
var
|
|
W: TBufferWriter;
|
|
BufLen: integer;
|
|
temp: array[word] of byte;
|
|
begin
|
|
BufLen := length(fStore) + 100;
|
|
if BufLen <= SizeOf(temp) then
|
|
W := TBufferWriter.Create(TRawByteStringStream, @temp, SizeOf(temp))
|
|
else
|
|
W := TBufferWriter.Create(TRawByteStringStream, BufLen);
|
|
try
|
|
SaveTo(W, aMagic);
|
|
W.Flush;
|
|
result := TRawByteStringStream(W.Stream).DataString;
|
|
finally
|
|
W.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure TSynBloomFilter.SaveTo(aDest: TBufferWriter; aMagic: cardinal);
|
|
begin
|
|
aDest.Write4(aMagic);
|
|
aDest.Write1(BLOOM_VERSION);
|
|
fSafe.ReadOnlyLock;
|
|
try
|
|
aDest.Write8(@fFalsePositivePercent);
|
|
aDest.Write4(fSize);
|
|
aDest.Write4(fBits);
|
|
aDest.Write1(fHashFunctions);
|
|
aDest.Write4(fInserted);
|
|
// warning: fHasher is NOT persisted yet
|
|
ZeroCompress(pointer(fStore), Length(fStore), aDest);
|
|
finally
|
|
fSafe.ReadOnlyUnLock;
|
|
end;
|
|
end;
|
|
|
|
function TSynBloomFilter.LoadFrom(const aSaved: RawByteString; aMagic: cardinal): boolean;
|
|
begin
|
|
result := LoadFrom(pointer(aSaved), length(aSaved));
|
|
end;
|
|
|
|
function TSynBloomFilter.LoadFrom(P: PByte; PLen: integer; aMagic: cardinal): boolean;
|
|
var
|
|
start: PByte;
|
|
version: integer;
|
|
begin
|
|
result := false;
|
|
start := P;
|
|
if (P = nil) or
|
|
(PLen < 32) or
|
|
(PCardinal(P)^ <> aMagic) then
|
|
exit;
|
|
inc(P, 4);
|
|
version := P^;
|
|
inc(P);
|
|
if version > BLOOM_VERSION then
|
|
exit;
|
|
fSafe.WriteLock;
|
|
try
|
|
fFalsePositivePercent := unaligned(PDouble(P)^);
|
|
inc(P, 8);
|
|
if (fFalsePositivePercent <= 0) or
|
|
(fFalsePositivePercent > 100) then
|
|
exit;
|
|
fSize := PCardinal(P)^;
|
|
inc(P, 4);
|
|
fBits := PCardinal(P)^;
|
|
inc(P, 4);
|
|
if fBits < fSize then
|
|
exit;
|
|
fHashFunctions := P^;
|
|
inc(P);
|
|
if fHashFunctions - 1 >= BLOOM_MAXHASH then
|
|
exit;
|
|
Reset;
|
|
fInserted := PCardinal(P)^;
|
|
inc(P, 4);
|
|
ZeroDecompress(P, PLen - (PAnsiChar(P) - PAnsiChar(start)), fStore);
|
|
result := length(fStore) = integer(fBits shr 3) + 1;
|
|
finally
|
|
fSafe.WriteUnLock;
|
|
end;
|
|
end;
|
|
|
|
|
|
{ TSynBloomFilterDiff }
|
|
|
|
type
|
|
TBloomDiffHeaderKind = (bdDiff, bdFull, bdUpToDate);
|
|
TBloomDiffHeader = packed record
|
|
kind: TBloomDiffHeaderKind;
|
|
size: cardinal;
|
|
inserted: cardinal;
|
|
revision: Int64;
|
|
crc: cardinal;
|
|
end;
|
|
|
|
procedure TSynBloomFilterDiff.Insert(aValue: pointer; aValueLen: integer);
|
|
begin
|
|
fSafe.WriteLock;
|
|
try
|
|
inherited Insert(aValue, aValueLen);
|
|
inc(fRevision);
|
|
inc(fSnapshotInsertCount);
|
|
finally
|
|
fSafe.WriteUnLock;
|
|
end;
|
|
end;
|
|
|
|
procedure TSynBloomFilterDiff.Reset;
|
|
begin
|
|
fSafe.WriteLock;
|
|
try
|
|
inherited Reset;
|
|
fSnapshotAfterInsertCount := fSize shr 5;
|
|
fSnapShotAfterMinutes := 30;
|
|
fSnapshotTimestamp := 0;
|
|
fSnapshotInsertCount := 0;
|
|
fRevision := UnixTimeUtc shl 31;
|
|
fKnownRevision := 0;
|
|
fKnownStore := '';
|
|
finally
|
|
fSafe.WriteUnLock;
|
|
end;
|
|
end;
|
|
|
|
procedure TSynBloomFilterDiff.DiffSnapshot;
|
|
begin
|
|
fSafe.WriteLock;
|
|
try
|
|
fKnownRevision := fRevision;
|
|
fSnapshotInsertCount := 0;
|
|
FastSetRawByteString(fKnownStore, pointer(fStore), length(fStore));
|
|
if fSnapShotAfterMinutes = 0 then
|
|
fSnapshotTimestamp := 0
|
|
else
|
|
fSnapshotTimestamp := GetTickCount64 + fSnapShotAfterMinutes * 60000;
|
|
finally
|
|
fSafe.WriteUnLock;
|
|
end;
|
|
end;
|
|
|
|
function TSynBloomFilterDiff.SaveToDiff(const aKnownRevision: Int64): RawByteString;
|
|
var
|
|
head: TBloomDiffHeader;
|
|
W: TBufferWriter;
|
|
temp: array[word] of byte;
|
|
begin
|
|
fSafe.ReadWriteLock; // DiffSnapshot makes a WriteLock
|
|
try
|
|
if aKnownRevision = fRevision then
|
|
head.kind := bdUpToDate
|
|
else if (fKnownRevision = 0) or
|
|
(fSnapshotInsertCount > fSnapshotAfterInsertCount) or
|
|
((fSnapshotInsertCount > 0) and
|
|
(fSnapshotTimestamp <> 0) and
|
|
(GetTickCount64 > fSnapshotTimestamp)) then
|
|
begin
|
|
DiffSnapshot;
|
|
head.kind := bdFull;
|
|
end
|
|
else if (aKnownRevision < fKnownRevision) or
|
|
(aKnownRevision > fRevision) then
|
|
head.kind := bdFull
|
|
else
|
|
head.kind := bdDiff;
|
|
head.size := length(fStore);
|
|
head.inserted := fInserted;
|
|
head.revision := fRevision;
|
|
head.crc := fHasher(0, @head, SizeOf(head) - SizeOf(head.crc));
|
|
if head.kind = bdUpToDate then
|
|
begin
|
|
FastSetRawByteString(result, @head, SizeOf(head));
|
|
exit;
|
|
end;
|
|
if head.size + 100 <= SizeOf(temp) then
|
|
W := TBufferWriter.Create(TRawByteStringStream, @temp, SizeOf(temp))
|
|
else
|
|
W := TBufferWriter.Create(TRawByteStringStream, head.size + 100);
|
|
try
|
|
W.Write(@head, SizeOf(head));
|
|
case head.kind of
|
|
bdFull:
|
|
SaveTo(W);
|
|
bdDiff:
|
|
ZeroCompressXor(pointer(fStore), pointer(fKnownStore), head.size, W);
|
|
end;
|
|
result := W.FlushTo;
|
|
finally
|
|
W.Free;
|
|
end;
|
|
finally
|
|
fSafe.ReadWriteUnLock;
|
|
end;
|
|
end;
|
|
|
|
function TSynBloomFilterDiff.DiffKnownRevision(const aDiff: RawByteString): Int64;
|
|
var
|
|
head: ^TBloomDiffHeader absolute aDiff;
|
|
begin
|
|
if (length(aDiff) < SizeOf(head^)) or
|
|
(head.kind > high(TBloomDiffHeaderKind)) or
|
|
(head.size <> cardinal(length(fStore))) or
|
|
(head.crc <> fHasher(0, pointer(head), SizeOf(head^) - SizeOf(head.crc))) then
|
|
result := 0
|
|
else
|
|
result := head.Revision;
|
|
end;
|
|
|
|
function TSynBloomFilterDiff.LoadFromDiff(const aDiff: RawByteString): boolean;
|
|
var
|
|
head: ^TBloomDiffHeader absolute aDiff;
|
|
P: PByte;
|
|
PLen: integer;
|
|
begin
|
|
result := false;
|
|
P := pointer(aDiff);
|
|
PLen := length(aDiff);
|
|
if (PLen < SizeOf(head^)) or
|
|
(head.kind > high(head.kind)) or
|
|
(head.crc <> fHasher(0, pointer(head), SizeOf(head^) - SizeOf(head.crc))) then
|
|
exit;
|
|
if (fStore <> '') and
|
|
(head.size <> cardinal(length(fStore))) then
|
|
exit;
|
|
inc(P, SizeOf(head^));
|
|
dec(PLen, SizeOf(head^));
|
|
fSafe.WriteLock;
|
|
try
|
|
case head.kind of
|
|
bdFull:
|
|
result := LoadFrom(P, PLen);
|
|
bdDiff:
|
|
if fStore <> '' then
|
|
result := ZeroDecompressOr(pointer(P), Pointer(fStore), PLen, head.size);
|
|
bdUpToDate:
|
|
result := true;
|
|
end;
|
|
if result then
|
|
begin
|
|
fRevision := head.revision;
|
|
fInserted := head.inserted;
|
|
end;
|
|
finally
|
|
fSafe.WriteUnLock;
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure ZeroCompress(P: PAnsiChar; Len: integer; Dest: TBufferWriter);
|
|
var
|
|
PEnd, beg, zero: PAnsiChar;
|
|
crc: cardinal;
|
|
begin
|
|
Dest.WriteVarUInt32(Len);
|
|
PEnd := P + Len;
|
|
beg := P;
|
|
crc := 0;
|
|
while P < PEnd do
|
|
begin
|
|
while (P^ <> #0) and
|
|
(P < PEnd) do
|
|
inc(P);
|
|
zero := P;
|
|
while (P^ = #0) and
|
|
(P < PEnd) do
|
|
inc(P);
|
|
if P - zero > 3 then
|
|
begin
|
|
Len := zero - beg;
|
|
crc := crc32c(crc, beg, Len);
|
|
Dest.WriteVarUInt32(Len);
|
|
Dest.Write(beg, Len);
|
|
Len := P - zero;
|
|
crc := crc32c(crc, @Len, SizeOf(Len));
|
|
Dest.WriteVarUInt32(Len - 3);
|
|
beg := P;
|
|
end;
|
|
end;
|
|
Len := P - beg;
|
|
if Len > 0 then
|
|
begin
|
|
crc := crc32c(crc, beg, Len);
|
|
Dest.WriteVarUInt32(Len);
|
|
Dest.Write(beg, Len);
|
|
end;
|
|
Dest.Write4(crc);
|
|
end;
|
|
|
|
procedure ZeroCompressXor(New, Old: PAnsiChar; Len: cardinal; Dest: TBufferWriter);
|
|
var
|
|
beg, same, index, crc, L: cardinal;
|
|
begin
|
|
Dest.WriteVarUInt32(Len);
|
|
beg := 0;
|
|
index := 0;
|
|
crc := 0;
|
|
while index < Len do
|
|
begin
|
|
while (New[index] <> Old[index]) and
|
|
(index < Len) do
|
|
inc(index);
|
|
same := index;
|
|
while (New[index] = Old[index]) and
|
|
(index < Len) do
|
|
inc(index);
|
|
L := index - same;
|
|
if L > 3 then
|
|
begin
|
|
Dest.WriteVarUInt32(same - beg);
|
|
Dest.WriteXor(New + beg, Old + beg, same - beg, @crc);
|
|
crc := crc32c(crc, @L, SizeOf(L));
|
|
Dest.WriteVarUInt32(L - 3);
|
|
beg := index;
|
|
end;
|
|
end;
|
|
L := index - beg;
|
|
if L > 0 then
|
|
begin
|
|
Dest.WriteVarUInt32(L);
|
|
Dest.WriteXor(New + beg, Old + beg, L, @crc);
|
|
end;
|
|
Dest.Write4(crc);
|
|
end;
|
|
|
|
procedure ZeroDecompress(P: PByte; Len: integer; var Dest: RawByteString);
|
|
var
|
|
PEnd, D, DEnd: PAnsiChar;
|
|
DestLen, crc: cardinal;
|
|
begin
|
|
PEnd := PAnsiChar(P) + Len - 4;
|
|
DestLen := FromVarUInt32(P);
|
|
FastNewRawByteString(Dest, DestLen);
|
|
D := pointer(Dest);
|
|
DEnd := D + DestLen;
|
|
crc := 0;
|
|
while PAnsiChar(P) < PEnd do
|
|
begin
|
|
Len := FromVarUInt32(P);
|
|
if D + Len > DEnd then
|
|
break;
|
|
MoveFast(P^, D^, Len);
|
|
crc := crc32c(crc, D, Len);
|
|
inc(P, Len);
|
|
inc(D, Len);
|
|
if PAnsiChar(P) >= PEnd then
|
|
break;
|
|
Len := FromVarUInt32(P) + 3;
|
|
if D + Len > DEnd then
|
|
break;
|
|
FillCharFast(D^, Len, 0);
|
|
crc := crc32c(crc, @Len, SizeOf(Len));
|
|
inc(D, Len);
|
|
end;
|
|
if crc <> PCardinal(P)^ then
|
|
Dest := '';
|
|
end;
|
|
|
|
function ZeroDecompressOr(P, Dest: PAnsiChar; Len, DestLen: integer): boolean;
|
|
var
|
|
PEnd, DEnd: PAnsiChar;
|
|
crc: cardinal;
|
|
begin
|
|
PEnd := P + Len - 4;
|
|
if cardinal(DestLen) <> FromVarUInt32(PByte(P)) then
|
|
begin
|
|
result := false;
|
|
exit;
|
|
end;
|
|
DEnd := Dest + DestLen;
|
|
crc := 0;
|
|
while (P < PEnd) and
|
|
(Dest < DEnd) do
|
|
begin
|
|
Len := FromVarUInt32(PByte(P));
|
|
if Dest + Len > DEnd then
|
|
break;
|
|
crc := crc32c(crc, P, Len);
|
|
OrMemory(pointer(Dest), pointer(P), Len);
|
|
inc(P, Len);
|
|
inc(Dest, Len);
|
|
if P >= PEnd then
|
|
break;
|
|
Len := FromVarUInt32(PByte(P)) + 3;
|
|
crc := crc32c(crc, @Len, SizeOf(Len));
|
|
inc(Dest, Len);
|
|
end;
|
|
result := crc = PCardinal(P)^;
|
|
end;
|
|
|
|
|
|
{ ****************** Binary Buffers Delta Compression }
|
|
|
|
function Max(a, b: PtrInt): PtrInt; {$ifdef HASINLINE}inline;{$endif}
|
|
begin
|
|
if a > b then
|
|
result := a
|
|
else
|
|
result := b;
|
|
end;
|
|
|
|
function Min(a, b: PtrInt): PtrInt; {$ifdef HASINLINE}inline;{$endif}
|
|
begin
|
|
if a < b then
|
|
result := a
|
|
else
|
|
result := b;
|
|
end;
|
|
|
|
{$ifdef HASINLINE}
|
|
function Comp(a, b: PAnsiChar; len: PtrInt): PtrInt; inline;
|
|
var
|
|
lenptr: PtrInt;
|
|
begin
|
|
result := 0;
|
|
lenptr := len - SizeOf(PtrInt);
|
|
if lenptr >= 0 then
|
|
repeat
|
|
if PPtrInt(a + result)^ <> PPtrInt(b + result)^ then
|
|
break;
|
|
inc(result, SizeOf(PtrInt));
|
|
until result > lenptr;
|
|
if result < len then
|
|
repeat
|
|
if a[result] <> b[result] then
|
|
exit;
|
|
inc(result);
|
|
until result = len;
|
|
end;
|
|
{$else} // eax = a, edx = b, ecx = len
|
|
function Comp(a, b: PAnsiChar; len: PtrInt): PtrInt;
|
|
asm // the 'rep cmpsb' version is slower on Intel Core CPU (not AMD)
|
|
or ecx, ecx
|
|
push ebx
|
|
push ecx
|
|
jz @ok
|
|
|
|
@1: mov bx, [eax]
|
|
lea eax, [eax + 2]
|
|
cmp bl, [edx]
|
|
jne @ok
|
|
dec ecx
|
|
jz @ok
|
|
cmp bh, [edx + 1]
|
|
lea edx, [edx + 2]
|
|
jne @ok
|
|
dec ecx
|
|
jnz @1
|
|
|
|
@ok: pop eax
|
|
sub eax, ecx
|
|
pop ebx
|
|
end;
|
|
{$endif HASINLINE}
|
|
|
|
function CompReverse(a, b: pointer; len: PtrInt): PtrInt;
|
|
begin
|
|
result := 0;
|
|
if len > 0 then
|
|
repeat
|
|
if PByteArray(a)[-result] <> PByteArray(b)[-result] then
|
|
exit;
|
|
inc(result);
|
|
until result = len;
|
|
end;
|
|
|
|
function WriteCurOfs(curofs, curlen, curofssize: integer; sp: PAnsiChar): PAnsiChar;
|
|
begin
|
|
if curlen = 0 then
|
|
begin
|
|
sp^ := #0;
|
|
inc(sp);
|
|
end
|
|
else
|
|
begin
|
|
sp := Pointer(ToVarUInt32(curlen, PByte(sp)));
|
|
PInteger(sp)^ := curofs;
|
|
inc(sp, curofssize);
|
|
end;
|
|
result := sp;
|
|
end;
|
|
|
|
{$ifdef CPUINTEL}
|
|
// crc32c SSE4.2 hardware accellerated dword hash
|
|
{$ifdef CPUX86}
|
|
function crc32c32sse42(buf: pointer): cardinal;
|
|
{$ifdef FPC} nostackframe; assembler; {$endif}
|
|
asm
|
|
mov edx, eax
|
|
xor eax, eax
|
|
{$ifdef HASAESNI}
|
|
crc32 eax, dword ptr [edx]
|
|
{$else}
|
|
db $F2, $0F, $38, $F1, $02
|
|
{$endif HASAESNI}
|
|
end;
|
|
{$else}
|
|
function crc32c32sse42(buf: pointer): cardinal;
|
|
{$ifdef FPC}nostackframe; assembler; asm {$else}
|
|
asm // ecx=buf (Linux: edi=buf)
|
|
.noframe
|
|
{$endif FPC}
|
|
xor eax, eax
|
|
crc32 eax, dword ptr [buf]
|
|
end;
|
|
{$endif CPUX86}
|
|
{$endif CPUINTEL}
|
|
|
|
function hash32prime(buf: pointer): cardinal;
|
|
begin
|
|
// inlined xxHash32Mixup - won't pollute L1 cache with crc lookup tables
|
|
result := PCardinal(buf)^;
|
|
result := result xor (result shr 15);
|
|
result := result * 2246822519;
|
|
result := result xor (result shr 13);
|
|
result := result * 3266489917;
|
|
result := result xor (result shr 16);
|
|
end;
|
|
|
|
const
|
|
HTabBits = 18; // fits well with DeltaCompress(..,BufSize=2MB)
|
|
HTabMask = (1 shl HTabBits) - 1; // =$3ffff
|
|
HListMask = $ffffff; // HTab[]=($ff,$ff,$ff)
|
|
|
|
type
|
|
PHTab = ^THTab; // HTabBits=18 -> SizeOf=767KB
|
|
THTab = packed array[0..HTabMask] of array[0..2] of byte;
|
|
|
|
function DeltaCompute(NewBuf, OldBuf, OutBuf, WorkBuf: PAnsiChar;
|
|
NewBufSize, OldBufSize, MaxLevel: PtrInt; HList, HTab: PHTab): PAnsiChar;
|
|
var
|
|
i, curofs, curlen, curlevel, match, curofssize, h, oldh: PtrInt;
|
|
sp, pInBuf, pOut: PAnsiChar;
|
|
ofs: cardinal;
|
|
spb: PByte absolute sp;
|
|
hash: function(buf: pointer): cardinal;
|
|
begin
|
|
// 1. fill HTab[] with hashes for all old data
|
|
{$ifdef CPUINTEL}
|
|
if cfSSE42 in CpuFeatures then
|
|
hash := @crc32c32sse42
|
|
else
|
|
{$endif CPUINTEL}
|
|
hash := @hash32prime;
|
|
FillCharFast(HTab^, SizeOf(HTab^), $ff); // HTab[]=HListMask by default
|
|
pInBuf := OldBuf;
|
|
oldh := -1; // force calculate first hash
|
|
sp := pointer(HList);
|
|
for i := 0 to OldBufSize - 3 do
|
|
begin
|
|
h := hash(pInBuf) and HTabMask;
|
|
inc(pInBuf);
|
|
if h = oldh then
|
|
continue;
|
|
oldh := h;
|
|
h := PtrInt(@HTab^[h]); // fast 24-bit data process
|
|
PCardinal(sp)^ := PCardinal(h)^;
|
|
PCardinal(h)^ := cardinal(i) or (PCardinal(h)^ and $ff000000);
|
|
inc(sp, 3);
|
|
end;
|
|
// 2. compression init
|
|
if OldBufSize <= $ffff then
|
|
curofssize := 2
|
|
else
|
|
curofssize := 3;
|
|
curlen := -1;
|
|
curofs := 0;
|
|
pOut := OutBuf + 7;
|
|
sp := WorkBuf;
|
|
// 3. handle identical leading bytes
|
|
match := Comp(OldBuf, NewBuf, Min(OldBufSize, NewBufSize));
|
|
if match > 2 then
|
|
begin
|
|
sp := WriteCurOfs(0, match, curofssize, sp);
|
|
sp^ := #0;
|
|
inc(sp);
|
|
inc(NewBuf, match);
|
|
dec(NewBufSize, match);
|
|
end;
|
|
pInBuf := NewBuf;
|
|
// 4. main loop: identify longest sequences using hash, and store reference
|
|
if NewBufSize >= 8 then
|
|
repeat
|
|
// hash 4 next bytes from NewBuf, and find longest match in OldBuf
|
|
ofs := PCardinal(@HTab^[hash(NewBuf) and HTabMask])^ and HListMask;
|
|
if ofs <> HListMask then
|
|
begin
|
|
// brute force search loop of best hash match
|
|
curlevel := MaxLevel;
|
|
repeat
|
|
with PHash128Rec(OldBuf + ofs)^ do
|
|
// always test 8 bytes at once
|
|
{$ifdef CPU64}
|
|
if PHash128Rec(NewBuf)^.Lo = Lo then
|
|
{$else}
|
|
if (PHash128Rec(NewBuf)^.c0 = c0) and
|
|
(PHash128Rec(NewBuf)^.c1 = c1) then
|
|
{$endif CPU64}
|
|
begin
|
|
// test remaining bytes
|
|
match := Comp(@PHash128Rec(NewBuf)^.c2, @c2,
|
|
Min(PtrUInt(OldBufSize) - ofs, NewBufSize) - 8);
|
|
if match > curlen then
|
|
begin
|
|
// found a longer sequence
|
|
curlen := match;
|
|
curofs := ofs;
|
|
end;
|
|
end;
|
|
dec(curlevel);
|
|
ofs := PCardinal(@HList^[ofs])^ and HListMask;
|
|
until (ofs = HListMask) or
|
|
(curlevel = 0);
|
|
end;
|
|
// curlen = longest sequence length
|
|
if curlen < 0 then
|
|
begin
|
|
// no sequence found -> copy one byte
|
|
dec(NewBufSize);
|
|
pOut^ := NewBuf^;
|
|
inc(NewBuf);
|
|
inc(pOut);
|
|
if NewBufSize > 8 then // >=8 may overflow
|
|
continue
|
|
else
|
|
break;
|
|
end;
|
|
inc(curlen, 8);
|
|
sp := WriteCurOfs(curofs, curlen, curofssize, sp);
|
|
spb := ToVarUInt32(NewBuf - pInBuf, spb);
|
|
inc(NewBuf, curlen); // continue to search after the sequence
|
|
dec(NewBufSize, curlen);
|
|
curlen := -1;
|
|
pInBuf := NewBuf;
|
|
if NewBufSize > 8 then // >=8 may overflow
|
|
continue
|
|
else
|
|
break;
|
|
until false;
|
|
// 5. write remaining bytes
|
|
if NewBufSize > 0 then
|
|
begin
|
|
MoveFast(NewBuf^, pOut^, NewBufSize);
|
|
inc(pOut, NewBufSize);
|
|
inc(NewBuf, NewBufSize);
|
|
end;
|
|
sp^ := #0;
|
|
inc(sp);
|
|
spb := ToVarUInt32(NewBuf - pInBuf, spb);
|
|
// 6. write header
|
|
PInteger(OutBuf)^ := pOut - OutBuf - 7;
|
|
h := sp - WorkBuf;
|
|
PInteger(OutBuf + 3)^ := h;
|
|
OutBuf[6] := AnsiChar(curofssize);
|
|
// 7. copy commands
|
|
MoveFast(WorkBuf^, pOut^, h);
|
|
result := pOut + h;
|
|
end;
|
|
|
|
function ExtractBuf(GoodCRC: cardinal; p: PAnsiChar;
|
|
var aUpd, Delta: PAnsiChar; Old: PAnsiChar): TDeltaError;
|
|
var
|
|
pEnd, buf, upd, src: PAnsiChar;
|
|
bufsize, datasize, leading, srclen: PtrUInt;
|
|
curofssize: byte;
|
|
begin
|
|
// 1. decompression init
|
|
upd := aUpd;
|
|
bufsize := PCardinal(p)^ and $00ffffff;
|
|
inc(p, 3);
|
|
datasize := PCardinal(p)^ and $00ffffff;
|
|
inc(p, 3);
|
|
curofssize := ord(p^);
|
|
inc(p);
|
|
buf := p;
|
|
inc(p, bufsize);
|
|
pEnd := p + datasize;
|
|
src := nil;
|
|
// 2. main loop
|
|
while p < pEnd do
|
|
begin
|
|
// src/srclen = sequence to be copied
|
|
srclen := FromVarUInt32(PByte(p));
|
|
if srclen > 0 then
|
|
if curofssize = 2 then
|
|
begin
|
|
src := Old + PWord(p)^;
|
|
inc(p, 2);
|
|
end
|
|
else
|
|
begin
|
|
src := Old + PCardinal(p)^ and $00ffffff;
|
|
inc(p, 3);
|
|
end;
|
|
// copy leading bytes
|
|
leading := FromVarUInt32(PByte(p));
|
|
if leading <> 0 then
|
|
begin
|
|
MoveFast(buf^, upd^, leading);
|
|
inc(buf, leading);
|
|
inc(upd, leading);
|
|
end;
|
|
// copy sequence
|
|
if srclen <> 0 then
|
|
begin
|
|
if PtrUInt(upd - src) < srclen then
|
|
MoveByOne(src, upd, srclen)
|
|
else
|
|
MoveFast(src^, upd^, srclen);
|
|
inc(upd, srclen);
|
|
end;
|
|
end;
|
|
// 3. result check
|
|
Delta := p;
|
|
if (p = pEnd) and
|
|
(crc32c(0, aUpd, upd - aUpd) = GoodCRC) then
|
|
// whole CRC is faster than incremental
|
|
result := dsSuccess
|
|
else
|
|
result := dsCrcExtract;
|
|
aUpd := upd;
|
|
end;
|
|
|
|
procedure WriteByte(var P: PAnsiChar; V: byte);
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
begin
|
|
PByte(P)^ := V;
|
|
inc(P);
|
|
end;
|
|
|
|
procedure WriteInt(var P: PAnsiChar; V: cardinal);
|
|
{$ifdef HASINLINE}inline;{$endif}
|
|
begin
|
|
PCardinal(P)^ := V;
|
|
inc(P, 4);
|
|
end;
|
|
|
|
const
|
|
FLAG_COPIED = 0;
|
|
FLAG_COMPRESS = 1;
|
|
FLAG_BEGIN = 2;
|
|
FLAG_END = 3;
|
|
|
|
function DeltaCompress(New, Old: PAnsiChar; NewSize, OldSize: integer;
|
|
out Delta: PAnsiChar; Level, BufSize: integer): integer;
|
|
var
|
|
HTab, HList: PHTab;
|
|
d, workbuf: PAnsiChar;
|
|
db: PByte absolute d;
|
|
BufRead, OldRead, Trailing, NewSizeSave: PtrInt;
|
|
bigfile: boolean;
|
|
|
|
procedure CreateCopied;
|
|
begin
|
|
Getmem(Delta, NewSizeSave + 17); // 17 = 4*integer + 1*byte
|
|
d := Delta;
|
|
db := ToVarUInt32(0, ToVarUInt32(NewSizeSave, db));
|
|
WriteByte(d, FLAG_COPIED); // block copied flag
|
|
db := ToVarUInt32(NewSizeSave, db);
|
|
WriteInt(d, crc32c(0, New, NewSizeSave));
|
|
MoveFast(New^, d^, NewSizeSave);
|
|
inc(d, NewSizeSave);
|
|
result := d - Delta;
|
|
end;
|
|
|
|
begin
|
|
// 1. special cases
|
|
if (NewSize = OldSize) and
|
|
mormot.core.base.CompareMem(Old, New, NewSize) then
|
|
begin
|
|
Getmem(Delta, 1);
|
|
Delta^ := '=';
|
|
result := 1;
|
|
exit;
|
|
end;
|
|
NewSizeSave := NewSize;
|
|
if OldSize = 0 then
|
|
begin
|
|
// Delta from nothing -> direct copy of whole block
|
|
CreateCopied;
|
|
exit;
|
|
end;
|
|
// 2. compression init
|
|
bigfile := OldSize > BufSize;
|
|
if BufSize > NewSize then
|
|
BufSize := NewSize;
|
|
if BufSize > HListMask then
|
|
BufSize := HListMask; // we store offsets with 2..3 bytes -> max 16MB chunk
|
|
Trailing := 0;
|
|
Getmem(workbuf, BufSize); // compression temporary buffers
|
|
Getmem(HList, BufSize * SizeOf({%H-}HList[0]));
|
|
Getmem(HTab, SizeOf({%H-}HTab^));
|
|
Getmem(Delta, Max(NewSize, OldSize) + 4096); // Delta size max evalulation
|
|
try
|
|
d := Delta;
|
|
db := ToVarUInt32(NewSize, db); // Destination Size
|
|
// 3. handle leading and trailing identical bytes (for biggest files)
|
|
if bigfile then
|
|
begin
|
|
// test initial same chars
|
|
BufRead := Comp(New, Old, Min(NewSize, OldSize));
|
|
if BufRead > 9 then
|
|
begin
|
|
// it happens very often: modification is usually in the middle/end
|
|
db := ToVarUInt32(BufRead, db); // blockSize = Size BufIdem
|
|
WriteByte(d, FLAG_BEGIN);
|
|
WriteInt(d, crc32c(0, New, BufRead));
|
|
inc(New, BufRead);
|
|
dec(NewSize, BufRead);
|
|
inc(Old, BufRead);
|
|
dec(OldSize, BufRead);
|
|
end;
|
|
// test trailing same chars
|
|
BufRead := CompReverse(New + NewSize - 1, Old + OldSize - 1,
|
|
Min(NewSize, OldSize));
|
|
if BufRead > 5 then
|
|
begin
|
|
if NewSize = BufRead then
|
|
dec(BufRead); // avoid block overflow
|
|
dec(OldSize, BufRead);
|
|
dec(NewSize, BufRead);
|
|
Trailing := BufRead;
|
|
end;
|
|
end;
|
|
// 4. main loop
|
|
repeat
|
|
BufRead := Min(BufSize, NewSize);
|
|
dec(NewSize, BufRead);
|
|
if (BufRead = 0) and
|
|
(Trailing > 0) then
|
|
begin
|
|
db := ToVarUInt32(Trailing, db);
|
|
WriteByte(d, FLAG_END); // block idem end flag
|
|
WriteInt(d, crc32c(0, New, Trailing));
|
|
break;
|
|
end;
|
|
OldRead := Min(BufSize, OldSize);
|
|
dec(OldSize, OldRead);
|
|
db := ToVarUInt32(OldRead, db);
|
|
if (BufRead < 4) or
|
|
(OldRead < 4) or
|
|
(BufRead shr 2 > OldRead) then
|
|
begin
|
|
WriteByte(d, FLAG_COPIED); // block copied flag
|
|
db := ToVarUInt32(BufRead, db);
|
|
if BufRead = 0 then
|
|
break;
|
|
WriteInt(d, crc32c(0, New, BufRead));
|
|
MoveFast(New^, d^, BufRead);
|
|
inc(New, BufRead);
|
|
inc(d, BufRead);
|
|
end
|
|
else
|
|
begin
|
|
WriteByte(d, FLAG_COMPRESS); // block compressed flag
|
|
WriteInt(d, crc32c(0, New, BufRead));
|
|
WriteInt(d, crc32c(0, Old, OldRead));
|
|
d := DeltaCompute(New, Old, d, workbuf, BufRead, OldRead, Level, HList, HTab);
|
|
inc(New, BufRead);
|
|
inc(Old, OldRead);
|
|
end;
|
|
until false;
|
|
// 5. release temp memory
|
|
finally
|
|
result := d - Delta;
|
|
Freemem(HTab);
|
|
Freemem(HList);
|
|
Freemem(workbuf);
|
|
end;
|
|
if result >= NewSizeSave + 17 then
|
|
begin
|
|
// Delta didn't compress well -> store it (with up to 17 bytes overhead)
|
|
Freemem(Delta);
|
|
CreateCopied;
|
|
end;
|
|
end;
|
|
|
|
function DeltaCompress(const New, Old: RawByteString;
|
|
Level, BufSize: integer): RawByteString;
|
|
begin
|
|
result := DeltaCompress(pointer(New), pointer(Old),
|
|
length(New), length(Old), Level, BufSize);
|
|
end;
|
|
|
|
function DeltaCompress(New, Old: PAnsiChar; NewSize, OldSize,
|
|
Level, BufSize: integer): RawByteString;
|
|
var
|
|
Delta: PAnsiChar;
|
|
DeltaLen: integer;
|
|
begin
|
|
DeltaLen := DeltaCompress(New, Old, NewSize, OldSize, Delta, Level, BufSize);
|
|
FastSetRawByteString(result, Delta, DeltaLen);
|
|
Freemem(Delta);
|
|
end;
|
|
|
|
function DeltaExtract(Delta, Old, New: PAnsiChar): TDeltaError;
|
|
var
|
|
BufCRC: cardinal;
|
|
Code: byte;
|
|
Len, BufRead, OldRead: PtrInt;
|
|
db: PByte absolute Delta;
|
|
Upd: PAnsiChar;
|
|
begin
|
|
result := dsSuccess;
|
|
Len := FromVarUInt32(db);
|
|
Upd := New;
|
|
repeat
|
|
OldRead := FromVarUInt32(db);
|
|
Code := db^;
|
|
inc(db);
|
|
case Code of
|
|
FLAG_COPIED:
|
|
begin
|
|
// block copied flag - copy new from Delta
|
|
BufRead := FromVarUInt32(db);
|
|
if BufRead = 0 then
|
|
break;
|
|
if crc32c(0, Delta + 4, BufRead) <> PCardinal(Delta)^ then
|
|
begin
|
|
result := dsCrcCopy;
|
|
exit;
|
|
end;
|
|
inc(Delta, 4);
|
|
MoveFast(Delta^, New^, BufRead);
|
|
if BufRead >= Len then
|
|
exit; // if Old=nil -> only copy new
|
|
inc(Delta, BufRead);
|
|
inc(New, BufRead);
|
|
end;
|
|
FLAG_COMPRESS:
|
|
begin
|
|
// block compressed flag - extract Delta from Old
|
|
BufCRC := PCardinal(Delta)^;
|
|
inc(Delta, 4);
|
|
if crc32c(0, Old, OldRead) <> PCardinal(Delta)^ then
|
|
begin
|
|
result := dsCrcComp;
|
|
exit;
|
|
end;
|
|
inc(Delta, 4);
|
|
result := ExtractBuf(BufCRC, Delta, New, Delta, Old);
|
|
if result <> dsSuccess then
|
|
exit;
|
|
end;
|
|
FLAG_BEGIN:
|
|
begin
|
|
// block idem begin flag
|
|
if crc32c(0, Old, OldRead) <> PCardinal(Delta)^ then
|
|
begin
|
|
result := dsCrcBegin;
|
|
exit;
|
|
end;
|
|
inc(Delta, 4);
|
|
MoveFast(Old^, New^, OldRead);
|
|
inc(New, OldRead);
|
|
end;
|
|
FLAG_END:
|
|
begin
|
|
// block idem end flag
|
|
if crc32c(0, Old, OldRead) <> PCardinal(Delta)^ then
|
|
result := dsCrcEnd;
|
|
MoveFast(Old^, New^, OldRead);
|
|
inc(New, OldRead);
|
|
break;
|
|
end;
|
|
else
|
|
begin
|
|
result := dsFlag;
|
|
exit;
|
|
end;
|
|
end; // Case Code of
|
|
inc(Old, OldRead);
|
|
until false;
|
|
if New - Upd <> Len then
|
|
result := dsLen;
|
|
end;
|
|
|
|
function DeltaExtract(const Delta, Old: RawByteString;
|
|
out New: RawByteString): TDeltaError;
|
|
begin
|
|
if (Delta = '') or
|
|
(Delta = '=') then
|
|
begin
|
|
New := Old;
|
|
result := dsSuccess;
|
|
end
|
|
else
|
|
begin
|
|
SetLength(New, DeltaExtractSize(pointer(Delta)));
|
|
result := DeltaExtract(pointer(Delta), pointer(Old), pointer(New));
|
|
end;
|
|
end;
|
|
|
|
function DeltaExtractSize(const Delta: RawByteString): integer;
|
|
begin
|
|
result := DeltaExtractSize(pointer(Delta));
|
|
end;
|
|
|
|
function DeltaExtractSize(Delta: PAnsiChar): integer;
|
|
begin
|
|
if Delta = nil then
|
|
result := 0
|
|
else
|
|
result := FromVarUInt32(PByte(Delta));
|
|
end;
|
|
|
|
function ToText(err: TDeltaError): PShortString;
|
|
begin
|
|
result := GetEnumName(TypeInfo(TDeltaError), ord(err));
|
|
end;
|
|
|
|
|
|
{ ****************** TDynArray Low-Level Binary Search }
|
|
|
|
function SimpleDynArrayLoadFrom(Source: PAnsiChar; aTypeInfo: PRttiInfo;
|
|
out Count, ElemSize: PtrInt): pointer;
|
|
var
|
|
Hash: PCardinalArray absolute Source;
|
|
iteminfo: PRttiInfo;
|
|
begin
|
|
result := nil;
|
|
if (aTypeInfo = nil) or
|
|
(aTypeInfo^.Kind <> rkDynArray) then
|
|
exit;
|
|
iteminfo := aTypeInfo^.DynArrayItemType(ElemSize);
|
|
if (iteminfo <> nil) or
|
|
(Source = nil) or
|
|
// (Source[0] <> AnsiChar(ElemSize)) or mORMot 2 stores elemsize=0
|
|
(Source[1] <> #0) then
|
|
exit; // invalid type information or Source content
|
|
inc(Source,2);
|
|
Count := FromVarUInt32(PByte(Source)); // dynamic array count
|
|
if Count <> 0 then
|
|
result := @Hash[1]; // returns valid Source content
|
|
end;
|
|
|
|
function IntegerDynArrayLoadFrom(Source: PAnsiChar;
|
|
var Count: integer): PIntegerArray;
|
|
var
|
|
Hash: PCardinalArray absolute Source;
|
|
begin
|
|
result := nil;
|
|
if (Source = nil) or
|
|
// (Source[0] <> #4) or mORMot 2 stores elemsize=0
|
|
(Source[1] <> #0) then
|
|
exit; // invalid Source content
|
|
inc(Source, 2);
|
|
Count := FromVarUInt32(PByte(Source)); // dynamic array count
|
|
if Count <> 0 then
|
|
result := @Hash[1]; // returns valid Source content
|
|
end;
|
|
|
|
function RawUtf8DynArrayLoadFromContains(Source: PAnsiChar;
|
|
Value: PUtf8Char; ValueLen: PtrInt; CaseSensitive: boolean): PtrInt;
|
|
var
|
|
Count, Len: PtrInt;
|
|
begin
|
|
if (Value = nil) or
|
|
(ValueLen = 0) or
|
|
(Source = nil) then
|
|
// (Source[0] <> AnsiChar(SizeOf(PtrInt))) mORMot 2 stores elemsize=0
|
|
// {$ifdef ISDELPHI} or (Source[1] <> AnsiChar(rkLString)){$endif}
|
|
begin
|
|
result := -1;
|
|
exit; // invalid Source or Value content
|
|
end;
|
|
inc(Source, 2);
|
|
Count := FromVarUInt32(PByte(Source)); // dynamic array count
|
|
inc(Source, SizeOf(cardinal)); // ignore Hash32 security checksum
|
|
for result := 0 to Count - 1 do
|
|
begin
|
|
Len := FromVarUInt32(PByte(Source));
|
|
if CaseSensitive then
|
|
begin
|
|
if (Len = ValueLen) and
|
|
CompareMemFixed(Value, Source, Len) then
|
|
exit;
|
|
end
|
|
else if Utf8ILComp(Value, pointer(Source), ValueLen, Len) = 0 then
|
|
exit;
|
|
inc(Source, Len);
|
|
end;
|
|
result := -1;
|
|
end;
|
|
|
|
|
|
{ TDynArrayLoadFrom }
|
|
|
|
function TDynArrayLoadFrom.Init(ArrayTypeInfo: PRttiInfo; Source: PAnsiChar;
|
|
SourceMaxLen: PtrInt): boolean;
|
|
begin
|
|
result := false;
|
|
Count := 0;
|
|
Current := 0;
|
|
Reader.Init(Source, SourceMaxLen);
|
|
ArrayRtti := Rtti.RegisterType(ArrayTypeInfo);
|
|
if (ArrayRtti.Parser <> ptDynArray) or
|
|
Reader.EOF then
|
|
exit;
|
|
if ArrayRtti.Cache.ItemInfo = nil then
|
|
ArrayLoad := nil
|
|
else
|
|
ArrayLoad := RTTI_BINARYLOAD[ArrayRtti.Cache.ItemInfo^.Kind];
|
|
Count := DynArrayLoadHeader(Reader, ArrayRtti.Info, ArrayRtti.Cache.ItemInfo);
|
|
result := true;
|
|
end;
|
|
|
|
function TDynArrayLoadFrom.Init(ArrayTypeInfo: PRttiInfo;
|
|
const Source: RawByteString): boolean;
|
|
begin
|
|
result := Init(ArrayTypeInfo, pointer(Source), length(Source));
|
|
end;
|
|
|
|
function TDynArrayLoadFrom.Step(Item: pointer): boolean;
|
|
begin
|
|
if (Current < Count) and
|
|
not Reader.EOF then
|
|
begin
|
|
if Assigned(ArrayLoad) then
|
|
ArrayLoad(Item, Reader, ArrayRtti.Cache.ItemInfo)
|
|
else
|
|
Reader.Copy(Item, ArrayRtti.Cache.ItemSize);
|
|
inc(Current);
|
|
result := true;
|
|
end
|
|
else
|
|
result := false;
|
|
end;
|
|
|
|
function TDynArrayLoadFrom.FirstField(Field: pointer): boolean;
|
|
var
|
|
load: TRttiBinaryLoad;
|
|
info: PRttiInfo;
|
|
noiteration: TFastReader;
|
|
begin
|
|
if (Current < Count) and
|
|
not Reader.EOF then
|
|
begin
|
|
info := PT_INFO[ArrayRtti.ArrayFirstField];
|
|
if info <> nil then
|
|
begin
|
|
load := RTTI_BINARYLOAD[info^.Kind];
|
|
if Assigned(load) then
|
|
begin
|
|
noiteration := Reader;
|
|
load(Field, noiteration, info);
|
|
result := true;
|
|
exit;
|
|
end;
|
|
end;
|
|
end;
|
|
result := false;
|
|
end;
|
|
|
|
|
|
|
|
{ ****************** TSynFilter and TSynValidate Processing Classes }
|
|
|
|
function IsValidIP4Address(P: PUtf8Char): boolean;
|
|
var
|
|
ndot: PtrInt;
|
|
V: PtrUInt;
|
|
begin
|
|
result := false;
|
|
if (P = nil) or
|
|
not (P^ in ['0'..'9']) then
|
|
exit;
|
|
V := 0;
|
|
ndot := 0;
|
|
repeat
|
|
case P^ of
|
|
#0:
|
|
break;
|
|
'.':
|
|
if (P[-1] = '.') or
|
|
(V > 255) then
|
|
exit
|
|
else
|
|
begin
|
|
inc(ndot);
|
|
V := 0;
|
|
end;
|
|
'0'..'9':
|
|
V := (V * 10) + ord(P^) - 48;
|
|
else
|
|
exit;
|
|
end;
|
|
inc(P);
|
|
until false;
|
|
if (ndot = 3) and
|
|
(V <= 255) and
|
|
(P[-1] <> '.') then
|
|
result := true;
|
|
end;
|
|
|
|
function IsValidEmail(P: PUtf8Char): boolean;
|
|
// Initial Author: Ernesto D'Spirito - UTF-8 version by AB
|
|
// http://www.howtodothings.com/computers/a1169-validating-email-addresses-in-delphi.html
|
|
const
|
|
// Valid characters in an "atom"
|
|
atom_chars: TSynAnsicharSet = [#33..#255] -
|
|
['(', ')', '<', '>', '@', ',', ';', ':', '\', '/', '"', '.', '[', ']', #127];
|
|
// Valid characters in a "quoted-string"
|
|
quoted_string_chars: TSynAnsicharSet =
|
|
[#0..#255] - ['"', #13, '\'];
|
|
// Valid characters in a subdomain
|
|
letters_digits: TSynAnsicharSet =
|
|
['0'..'9', 'A'..'Z', 'a'..'z'];
|
|
type
|
|
States = (
|
|
STATE_BEGIN,
|
|
STATE_ATOM,
|
|
STATE_QTEXT,
|
|
STATE_QCHAR,
|
|
STATE_QUOTE,
|
|
STATE_LOCAL_PERIOD,
|
|
STATE_EXPECTING_SUBDOMAIN,
|
|
STATE_SUBDOMAIN,
|
|
STATE_HYPHEN);
|
|
var
|
|
State: States;
|
|
subdomains: integer;
|
|
c: AnsiChar;
|
|
ch: PtrInt;
|
|
begin
|
|
State := STATE_BEGIN;
|
|
subdomains := 1;
|
|
if P <> nil then
|
|
repeat
|
|
ch := ord(P^);
|
|
if ch and $80 = 0 then
|
|
inc(P)
|
|
else
|
|
ch := UTF8_TABLE.GetHighUtf8Ucs4(P);
|
|
if (ch <= 255) and
|
|
(WinAnsiConvert.AnsiToWide[ch] <= 255) then
|
|
// convert into WinAnsi char
|
|
c := AnsiChar(ch)
|
|
else
|
|
// invalid char
|
|
c := #127;
|
|
case State of
|
|
STATE_BEGIN:
|
|
if c in atom_chars then
|
|
State := STATE_ATOM
|
|
else if c = '"' then
|
|
State := STATE_QTEXT
|
|
else
|
|
break;
|
|
STATE_ATOM:
|
|
if c = '@' then
|
|
State := STATE_EXPECTING_SUBDOMAIN
|
|
else if c = '.' then
|
|
State := STATE_LOCAL_PERIOD
|
|
else if not (c in atom_chars) then
|
|
break;
|
|
STATE_QTEXT:
|
|
if c = '\' then
|
|
State := STATE_QCHAR
|
|
else if c = '"' then
|
|
State := STATE_QUOTE
|
|
else if not (c in quoted_string_chars) then
|
|
break;
|
|
STATE_QCHAR:
|
|
State := STATE_QTEXT;
|
|
STATE_QUOTE:
|
|
if c = '@' then
|
|
State := STATE_EXPECTING_SUBDOMAIN
|
|
else if c = '.' then
|
|
State := STATE_LOCAL_PERIOD
|
|
else
|
|
break;
|
|
STATE_LOCAL_PERIOD:
|
|
if c in atom_chars then
|
|
State := STATE_ATOM
|
|
else if c = '"' then
|
|
State := STATE_QTEXT
|
|
else
|
|
break;
|
|
STATE_EXPECTING_SUBDOMAIN:
|
|
if c in letters_digits then
|
|
State := STATE_SUBDOMAIN
|
|
else
|
|
break;
|
|
STATE_SUBDOMAIN:
|
|
if c = '.' then
|
|
begin
|
|
inc(subdomains);
|
|
State := STATE_EXPECTING_SUBDOMAIN
|
|
end
|
|
else if c = '-' then
|
|
State := STATE_HYPHEN
|
|
else if not (c in letters_digits) then
|
|
break;
|
|
STATE_HYPHEN:
|
|
if c in letters_digits then
|
|
State := STATE_SUBDOMAIN
|
|
else if c <> '-' then
|
|
break;
|
|
end;
|
|
if P^ = #0 then
|
|
begin
|
|
P := nil;
|
|
break;
|
|
end;
|
|
until false;
|
|
result := (State = STATE_SUBDOMAIN) and
|
|
(subdomains >= 2);
|
|
end;
|
|
|
|
|
|
{ TSynFilterOrValidate }
|
|
|
|
constructor TSynFilterOrValidate.Create(const aParameters: RawUtf8);
|
|
begin
|
|
inherited Create;
|
|
SetParameters(aParameters); // should parse the JSON-encoded parameters
|
|
end;
|
|
|
|
constructor TSynFilterOrValidate.CreateUtf8(const Format: RawUtf8; const Args,
|
|
Params: array of const);
|
|
begin
|
|
Create(FormatJson(Format, Args, Params));
|
|
end;
|
|
|
|
procedure TSynFilterOrValidate.SetParameters(const value: RawUtf8);
|
|
begin
|
|
fParameters := value;
|
|
end;
|
|
|
|
function TSynFilterOrValidate.AddOnce(var aObjArray: TSynFilterOrValidateObjArray;
|
|
aFreeIfAlreadyThere: boolean): TSynFilterOrValidate;
|
|
var
|
|
i: integer;
|
|
begin
|
|
if self <> nil then
|
|
begin
|
|
for i := 0 to length(aObjArray) - 1 do
|
|
if (PPointer(aObjArray[i])^ = PPointer(self)^) and
|
|
(aObjArray[i].fParameters = fParameters) then
|
|
begin
|
|
if aFreeIfAlreadyThere then
|
|
Free;
|
|
result := aObjArray[i];
|
|
exit;
|
|
end;
|
|
ObjArrayAdd(aObjArray, self);
|
|
end;
|
|
result := self;
|
|
end;
|
|
|
|
|
|
{ TSynFilterUpperCase }
|
|
|
|
procedure TSynFilterUpperCase.Process(aFieldIndex: integer; var value: RawUtf8);
|
|
begin
|
|
value := mormot.core.unicode.UpperCase(value);
|
|
end;
|
|
|
|
|
|
{ TSynFilterUpperCaseU }
|
|
|
|
procedure TSynFilterUpperCaseU.Process(aFieldIndex: integer; var value: RawUtf8);
|
|
begin
|
|
value := UpperCaseU(value);
|
|
end;
|
|
|
|
|
|
{ TSynFilterLowerCase }
|
|
|
|
procedure TSynFilterLowerCase.Process(aFieldIndex: integer; var value: RawUtf8);
|
|
begin
|
|
value := LowerCase(value);
|
|
end;
|
|
|
|
|
|
{ TSynFilterLowerCaseU }
|
|
|
|
procedure TSynFilterLowerCaseU.Process(aFieldIndex: integer; var value: RawUtf8);
|
|
begin
|
|
value := LowerCaseU(value);
|
|
end;
|
|
|
|
|
|
{ TSynFilterTrim }
|
|
|
|
procedure TSynFilterTrim.Process(aFieldIndex: integer; var value: RawUtf8);
|
|
begin
|
|
TrimSelf(value);
|
|
end;
|
|
|
|
|
|
{ TSynFilterTruncate}
|
|
|
|
procedure TSynFilterTruncate.SetParameters(const value: RawUtf8);
|
|
var
|
|
V: array[0..1] of TValuePUtf8Char;
|
|
tmp: TSynTempBuffer;
|
|
begin
|
|
tmp.Init(value);
|
|
JsonDecode(tmp.buf, [
|
|
'MaxLength', // 0
|
|
'Utf8Length' // 1
|
|
], @V);
|
|
fMaxLength := V[0].ToCardinal(0);
|
|
fUtf8Length := V[1].ToBoolean;
|
|
tmp.Done;
|
|
end;
|
|
|
|
procedure TSynFilterTruncate.Process(aFieldIndex: integer; var value: RawUtf8);
|
|
begin
|
|
if fMaxLength - 1 < cardinal(maxInt) then
|
|
if fUtf8Length then
|
|
Utf8TruncateToLength(value, fMaxLength)
|
|
else
|
|
Utf8TruncateToUnicodeLength(value, fMaxLength);
|
|
end;
|
|
|
|
|
|
{ TSynValidateIPAddress }
|
|
|
|
function TSynValidateIPAddress.Process(aFieldIndex: integer; const value:
|
|
RawUtf8; var ErrorMsg: string): boolean;
|
|
begin
|
|
result := IsValidIP4Address(pointer(value));
|
|
if not result then
|
|
ErrorMsg := Format(sInvalidIPAddress, [Utf8ToString(value)]);
|
|
end;
|
|
|
|
|
|
{ TSynValidateEmail }
|
|
|
|
function TSynValidateEmail.Process(aFieldIndex: integer; const value: RawUtf8;
|
|
var ErrorMsg: string): boolean;
|
|
var
|
|
TLD, DOM: RawUtf8;
|
|
i: integer;
|
|
const
|
|
TopLevelTLD: array[0..20] of PUtf8Char = (
|
|
// see http://en.wikipedia.org/wiki/List_of_Internet_top-level_domains
|
|
'aero', 'asia', 'biz', 'cat', 'com', 'coop', 'edu', 'gov', 'info', 'int',
|
|
'jobs', 'mil', 'mobi', 'museum', 'name', 'net', 'org', 'pro', 'site', 'tel',
|
|
'travel'); // no xxx !
|
|
begin
|
|
if IsValidEmail(pointer(value)) then
|
|
repeat
|
|
DOM := lowercase(copy(value, PosExChar('@', value) + 1, 100));
|
|
if length(DOM) > 63 then
|
|
break; // exceeded 63-character limit of a DNS name
|
|
if (ForbiddenDomains <> '') and
|
|
CsvContains(ForbiddenDomains, DOM) then
|
|
break;
|
|
i := length(value);
|
|
while (i > 0) and
|
|
(value[i] <> '.') do
|
|
dec(i);
|
|
TLD := lowercase(copy(value, i + 1, 100));
|
|
if (AllowedTLD <> '') and
|
|
not CsvContains(AllowedTLD, TLD) then
|
|
break;
|
|
if (ForbiddenTLD <> '') and
|
|
CsvContains(ForbiddenTLD, TLD) then
|
|
break;
|
|
if not fAnyTLD then
|
|
if FastFindPUtf8CharSorted(@TopLevelTLD, high(TopLevelTLD), pointer(TLD)) < 0 then
|
|
if length(TLD) <> 2 then
|
|
break; // assume a two chars string is a ISO 3166-1 alpha-2 code
|
|
result := true;
|
|
exit;
|
|
until true;
|
|
ErrorMsg := Format(sInvalidEmailAddress, [Utf8ToString(value)]);
|
|
result := false;
|
|
end;
|
|
|
|
procedure TSynValidateEmail.SetParameters(const value: RawUtf8);
|
|
var
|
|
V: array[0..3] of TValuePUtf8Char;
|
|
tmp: TSynTempBuffer;
|
|
begin
|
|
inherited;
|
|
tmp.Init(value);
|
|
JsonDecode(tmp.buf, [
|
|
'AllowedTLD', // 0
|
|
'ForbiddenTLD', // 1
|
|
'ForbiddenDomains', // 2
|
|
'AnyTLD' // 3
|
|
], @V);
|
|
LowerCaseCopy(V[0].Text, V[0].Len, fAllowedTLD);
|
|
LowerCaseCopy(V[1].Text, V[1].Len, fForbiddenTLD);
|
|
LowerCaseCopy(V[2].Text, V[2].Len, fForbiddenDomains);
|
|
AnyTLD := V[3].ToBoolean;
|
|
tmp.Done;
|
|
end;
|
|
|
|
|
|
{ TSynValidatePattern }
|
|
|
|
procedure TSynValidatePattern.SetParameters(const Value: RawUtf8);
|
|
begin
|
|
inherited SetParameters(Value);
|
|
fMatch.Prepare(Value, ClassType = TSynValidatePatternI, {reuse=}true);
|
|
end;
|
|
|
|
function TSynValidatePattern.Process(aFieldIndex: integer; const value: RawUtf8;
|
|
var ErrorMsg: string): boolean;
|
|
|
|
procedure SetErrorMsg;
|
|
begin
|
|
ErrorMsg := Format(sInvalidPattern, [Utf8ToString(value)]);
|
|
end;
|
|
|
|
begin
|
|
result := fMatch.Match(value);
|
|
if not result then
|
|
SetErrorMsg;
|
|
end;
|
|
|
|
|
|
{ TSynValidateNonVoidText }
|
|
|
|
function Character01n(n: integer): string;
|
|
begin
|
|
if n < 0 then
|
|
n := 0
|
|
else if n > 1 then
|
|
n := 2;
|
|
result := GetCsvItemString(pointer(string(sCharacter01n)), n);
|
|
end;
|
|
|
|
procedure InvalidTextLengthMin(min: integer; var result: string);
|
|
begin
|
|
result := Format(sInvalidTextLengthMin, [min, Character01n(min)]);
|
|
end;
|
|
|
|
function TSynValidateNonVoidText.Process(aFieldIndex: integer; const value:
|
|
RawUtf8; var ErrorMsg: string): boolean;
|
|
begin
|
|
if value = '' then
|
|
begin
|
|
InvalidTextLengthMin(1, ErrorMsg);
|
|
result := false;
|
|
end
|
|
else
|
|
result := true;
|
|
end;
|
|
|
|
|
|
{ TSynValidateText }
|
|
|
|
procedure TSynValidateText.SetErrorMsg(fPropsIndex, InvalidTextIndex, MainIndex:
|
|
integer; var result: string);
|
|
var
|
|
P: PChar;
|
|
begin
|
|
P := pointer(string(sInvalidTextChar));
|
|
result := GetCsvItemString(P, MainIndex);
|
|
if fPropsIndex > 0 then
|
|
result := Format(result, [fProps[fPropsIndex],
|
|
GetCsvItemString(P, InvalidTextIndex), Character01n(fProps[fPropsIndex])]);
|
|
end;
|
|
|
|
function TSynValidateText.Process(aFieldIndex: integer; const value: RawUtf8;
|
|
var ErrorMsg: string): boolean;
|
|
var
|
|
i, L: cardinal;
|
|
Min: array[2..7] of cardinal;
|
|
begin
|
|
result := false;
|
|
if fUtf8Length then
|
|
L := length(value)
|
|
else
|
|
L := Utf8ToUnicodeLength(pointer(value));
|
|
if L < MinLength then
|
|
InvalidTextLengthMin(MinLength, ErrorMsg)
|
|
else if L > MaxLength then
|
|
ErrorMsg := Format(sInvalidTextLengthMax, [MaxLength, Character01n(MaxLength)])
|
|
else
|
|
begin
|
|
FillCharFast(Min, SizeOf(Min), 0);
|
|
L := length(value);
|
|
for i := 1 to L do
|
|
case value[i] of
|
|
' ':
|
|
inc(Min[7]);
|
|
'a'..'z':
|
|
begin
|
|
inc(Min[2]);
|
|
inc(Min[5]);
|
|
end;
|
|
'A'..'Z':
|
|
begin
|
|
inc(Min[2]);
|
|
inc(Min[6]);
|
|
end;
|
|
'0'..'9':
|
|
inc(Min[3]);
|
|
'_', '!', ';', '.', ',', '/', ':', '?', '%', '$', '=', '"', '#', '@',
|
|
'(', ')', '{', '}', '+', '''', '-', '*':
|
|
inc(Min[4]);
|
|
end;
|
|
for i := 2 to 7 do
|
|
if Min[i] < fProps[i] then
|
|
begin
|
|
SetErrorMsg(i, i, 0, ErrorMsg);
|
|
exit;
|
|
end
|
|
else if Min[i] > fProps[i + 8] then
|
|
begin
|
|
SetErrorMsg(i + 8, i, 1, ErrorMsg);
|
|
exit;
|
|
end;
|
|
if value <> '' then
|
|
begin
|
|
if MaxLeftTrimCount < cardinal(maxInt) then
|
|
begin
|
|
// if MaxLeftTrimCount is set, check against Value
|
|
i := 0;
|
|
while (i < L) and
|
|
(value[i + 1] = ' ') do
|
|
inc(i);
|
|
if i > MaxLeftTrimCount then
|
|
begin
|
|
SetErrorMsg(0, 0, 8, ErrorMsg);
|
|
exit;
|
|
end;
|
|
end;
|
|
if MaxRightTrimCount < cardinal(maxInt) then
|
|
begin
|
|
// if MaxRightTrimCount is set, check against Value
|
|
i := 0;
|
|
while (i < L) and
|
|
(value[L - i] = ' ') do
|
|
dec(i);
|
|
if i > MaxRightTrimCount then
|
|
begin
|
|
SetErrorMsg(0, 0, 9, ErrorMsg);
|
|
exit;
|
|
end;
|
|
end;
|
|
end;
|
|
result := true;
|
|
end;
|
|
end;
|
|
|
|
procedure TSynValidateText.SetParameters(const value: RawUtf8);
|
|
var
|
|
V: array[0..high(TSynValidateTextProps) + {Utf8Length} 1] of TValuePUtf8Char;
|
|
i: PtrInt;
|
|
tmp: TSynTempBuffer;
|
|
const
|
|
DEFAULT: TSynValidateTextProps = (
|
|
1, // MinLength
|
|
maxInt, // MaxLength
|
|
0, // MinAlphaCount
|
|
0, // MinDigitCount
|
|
0, // MinPunctCount
|
|
0, // MinLowerCount
|
|
0, // MinUpperCount
|
|
0, // MinSpaceCount
|
|
maxInt, // MaxLeftTrimCount
|
|
maxInt, // MaxRightTrimCount
|
|
maxInt, // MaxAlphaCount
|
|
maxInt, // MaxDigitCount
|
|
maxInt, // MaxPunctCount
|
|
maxInt, // MaxLowerCount
|
|
maxInt, // MaxUpperCount
|
|
maxInt); // MaxSpaceCount
|
|
begin
|
|
if (MinLength = 0) and
|
|
(MaxLength = 0) then // if not previously set
|
|
fProps := DEFAULT;
|
|
inherited SetParameters(value);
|
|
if value = '' then
|
|
exit;
|
|
tmp.Init(value);
|
|
try
|
|
JsonDecode(tmp.buf, [
|
|
'MinLength',
|
|
'MaxLength',
|
|
'MinAlphaCount',
|
|
'MinDigitCount',
|
|
'MinPunctCount',
|
|
'MinLowerCount',
|
|
'MinUpperCount',
|
|
'MinSpaceCount',
|
|
'MaxLeftTrimCount',
|
|
'MaxRightTrimCount',
|
|
'MaxAlphaCount',
|
|
'MaxDigitCount',
|
|
'MaxPunctCount',
|
|
'MaxLowerCount',
|
|
'MaxUpperCount',
|
|
'MaxSpaceCount',
|
|
'Utf8Length'], @V);
|
|
for i := 0 to high(fProps) do
|
|
fProps[i] := V[i].ToCardinal(fProps[i]);
|
|
with V[high(V)] do
|
|
fUtf8Length := ToBoolean;
|
|
finally
|
|
tmp.Done;
|
|
end;
|
|
end;
|
|
|
|
|
|
{ TSynValidatePassWord }
|
|
|
|
procedure TSynValidatePassWord.SetParameters(const value: RawUtf8);
|
|
const
|
|
DEFAULT: TSynValidateTextProps = (
|
|
5, // MinLength
|
|
20, // MaxLength
|
|
1, // MinAlphaCount
|
|
1, // MinDigitCount
|
|
1, // MinPunctCount
|
|
1, // MinLowerCount
|
|
1, // MinUpperCount
|
|
0, // MinSpaceCount
|
|
maxInt, // MaxLeftTrimCount
|
|
maxInt, // MaxRightTrimCount
|
|
maxInt, // MaxAlphaCount
|
|
maxInt, // MaxDigitCount
|
|
maxInt, // MaxPunctCount
|
|
maxInt, // MaxLowerCount
|
|
maxInt, // MaxUpperCount
|
|
0); // MaxSpaceCount
|
|
begin
|
|
// set default values for validating a strong password
|
|
fProps := DEFAULT;
|
|
fUtf8Length := false;
|
|
// read custom parameters
|
|
inherited;
|
|
end;
|
|
|
|
|
|
|
|
{ ***************** Cross-Platform TSynTimeZone Time Zones }
|
|
|
|
{ TTimeZoneData }
|
|
|
|
function TTimeZoneData.GetTziFor(year: integer): PTimeZoneInfo;
|
|
var
|
|
i, last: PtrInt;
|
|
begin
|
|
if dyn = nil then
|
|
result := @tzi
|
|
else if year <= dyn[0].year then
|
|
result := @dyn[0].tzi
|
|
else
|
|
begin
|
|
last := high(dyn);
|
|
if year >= dyn[last].year then
|
|
result := @dyn[last].tzi
|
|
else
|
|
begin
|
|
for i := 1 to last do
|
|
if year < dyn[i].year then
|
|
begin
|
|
result := @dyn[i - 1].tzi;
|
|
exit;
|
|
end;
|
|
result := @tzi; // should never happen, but makes compiler happy
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
{ TTimeZoneInformation }
|
|
|
|
constructor TSynTimeZone.Create;
|
|
begin
|
|
fZones.InitSpecific(TypeInfo(TTimeZoneDataDynArray),
|
|
fZone, ptRawUtf8, @fZoneCount);
|
|
end;
|
|
|
|
constructor TSynTimeZone.CreateDefault(dummycpp: integer);
|
|
begin
|
|
Create;
|
|
{$ifdef OSWINDOWS}
|
|
LoadFromRegistry;
|
|
{$else}
|
|
LoadFromFile;
|
|
if fZoneCount = 0 then
|
|
LoadFromResource; // if no .tz file is available, try if bound to executable
|
|
{$endif OSWINDOWS}
|
|
end;
|
|
|
|
destructor TSynTimeZone.Destroy;
|
|
begin
|
|
inherited Destroy;
|
|
fIds.Free;
|
|
fDisplays.Free;
|
|
end;
|
|
|
|
var
|
|
SharedSynTimeZone: TSynTimeZone;
|
|
|
|
class function TSynTimeZone.Default: TSynTimeZone;
|
|
begin
|
|
if SharedSynTimeZone = nil then
|
|
begin
|
|
GlobalLock; // RegisterGlobalShutdownRelease() will use it anyway
|
|
try
|
|
if SharedSynTimeZone = nil then
|
|
SharedSynTimeZone :=
|
|
RegisterGlobalShutdownRelease(TSynTimeZone.CreateDefault);
|
|
finally
|
|
GlobalUnLock;
|
|
end;
|
|
end;
|
|
result := SharedSynTimeZone;
|
|
end;
|
|
|
|
function TSynTimeZone.SaveToBuffer: RawByteString;
|
|
begin
|
|
fSafe.ReadLock;
|
|
try
|
|
result := AlgoSynLZ.Compress(fZones.SaveTo);
|
|
finally
|
|
fSafe.ReadUnLock;
|
|
end;
|
|
end;
|
|
|
|
procedure TSynTimeZone.SaveToFile(const FileName: TFileName);
|
|
var
|
|
FN: TFileName;
|
|
begin
|
|
if FileName = '' then
|
|
FN := ChangeFileExt(Executable.ProgramFileName, '.tz')
|
|
else
|
|
FN := FileName;
|
|
FileFromString(SaveToBuffer, FN);
|
|
end;
|
|
|
|
procedure TSynTimeZone.LoadFromBuffer(const Buffer: RawByteString);
|
|
begin
|
|
if Buffer = '' then
|
|
exit;
|
|
fSafe.WriteLock;
|
|
try
|
|
fZones.LoadFromBinary(AlgoSynLZ.Decompress(Buffer));
|
|
fZones.ForceReHash;
|
|
FreeAndNil(fIds);
|
|
FreeAndNil(fDisplays);
|
|
finally
|
|
fSafe.WriteUnLock;
|
|
end;
|
|
end;
|
|
|
|
procedure TSynTimeZone.LoadFromFile(const FileName: TFileName);
|
|
var
|
|
FN: TFileName;
|
|
begin
|
|
if FileName = '' then
|
|
FN := ChangeFileExt(Executable.ProgramFileName, '.tz')
|
|
else
|
|
FN := FileName;
|
|
LoadFromBuffer(StringFromFile(FN));
|
|
end;
|
|
|
|
procedure TSynTimeZone.LoadFromResource(Instance: TLibHandle);
|
|
var
|
|
buf: RawByteString;
|
|
begin
|
|
ResourceToRawByteString(ClassName, PChar(10), buf, Instance);
|
|
if buf <> '' then
|
|
LoadFromBuffer(buf);
|
|
end;
|
|
|
|
{$ifdef OSWINDOWS}
|
|
|
|
procedure TSynTimeZone.LoadFromRegistry;
|
|
const
|
|
REGKEY = 'Software\Microsoft\Windows NT\CurrentVersion\Time Zones\';
|
|
var
|
|
reg: TWinRegistry;
|
|
keys: TRawUtf8DynArray;
|
|
i, first, last, year, n: integer;
|
|
item: TTimeZoneData;
|
|
begin
|
|
fSafe.WriteLock;
|
|
try
|
|
fZones.Clear;
|
|
if reg.ReadOpen(wrLocalMachine, REGKEY) then
|
|
keys := reg.ReadEnumEntries
|
|
else
|
|
keys := nil; // make Delphi 6 happy
|
|
n := length(keys);
|
|
fZones.Capacity := n;
|
|
for i := 0 to n - 1 do
|
|
begin
|
|
Finalize(item);
|
|
FillcharFast(item.tzi, SizeOf(item.tzi), 0);
|
|
if reg.ReadOpen(wrLocalMachine, REGKEY + keys[i], {reopen=}true) then
|
|
begin
|
|
item.id := keys[i]; // registry keys are genuine by definition
|
|
item.display := reg.ReadString('Display');
|
|
reg.ReadBuffer('TZI', @item.tzi, SizeOf(item.tzi));
|
|
if reg.ReadOpen(wrLocalMachine, REGKEY + keys[i] + '\Dynamic DST', true) then
|
|
begin
|
|
// warning: never defined on XP/2003, and not for all entries
|
|
first := reg.ReadDword('FirstEntry');
|
|
last := reg.ReadDword('LastEntry');
|
|
if (first > 0) and
|
|
(last >= first) then
|
|
begin
|
|
n := 0;
|
|
SetLength(item.dyn, last - first + 1);
|
|
for year := first to last do
|
|
if reg.ReadBuffer(Utf8ToSynUnicode(UInt32ToUtf8(year)),
|
|
@item.dyn[n].tzi, SizeOf(TTimeZoneInfo)) then
|
|
begin
|
|
item.dyn[n].year := year;
|
|
inc(n);
|
|
end;
|
|
SetLength(item.dyn, n);
|
|
end;
|
|
end;
|
|
fZones.Add(item);
|
|
end;
|
|
end;
|
|
reg.Close;
|
|
fZones.ForceReHash;
|
|
FreeAndNil(fIds);
|
|
FreeAndNil(fDisplays);
|
|
finally
|
|
fSafe.WriteUnLock;
|
|
end;
|
|
end;
|
|
|
|
{$endif OSWINDOWS}
|
|
|
|
function TSynTimeZone.LockedFindZoneIndex(const TzId: TTimeZoneID): PtrInt;
|
|
begin
|
|
if TzId = '' then
|
|
result := -1
|
|
else
|
|
begin
|
|
if TzId = fLastZone then
|
|
result := fLastIndex
|
|
else
|
|
begin
|
|
result := fZones.FindHashed(TzId);
|
|
fLastZone := TzId;
|
|
flastIndex := result;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function TSynTimeZone.GetDisplay(const TzId: TTimeZoneID): RawUtf8;
|
|
var
|
|
ndx: PtrInt;
|
|
begin
|
|
fSafe.ReadLock;
|
|
ndx := LockedFindZoneIndex(TzId);
|
|
if ndx < 0 then
|
|
if TzId = 'UTC' then // e.g. on XP
|
|
result := TzId
|
|
else
|
|
result := ''
|
|
else
|
|
result := fZone[ndx].display;
|
|
fSafe.ReadUnLock;
|
|
end;
|
|
|
|
function TSynTimeZone.GetBiasForDateTime(const Value: TDateTime;
|
|
const TzId: TTimeZoneID; out Bias: integer; out HaveDaylight: boolean;
|
|
ValueIsUtc: boolean): boolean;
|
|
var
|
|
ndx: PtrInt;
|
|
d: TSynSystemTime;
|
|
tzi: PTimeZoneInfo;
|
|
std, dlt: TDateTime;
|
|
begin
|
|
fSafe.ReadLock;
|
|
try
|
|
ndx := LockedFindZoneIndex(TzId);
|
|
if ndx < 0 then
|
|
begin
|
|
Bias := 0;
|
|
HaveDaylight := false;
|
|
result := TzId = 'UTC'; // e.g. on XP
|
|
exit;
|
|
end;
|
|
d.FromDate(Value); // faster than DecodeDate
|
|
tzi := fZone[ndx].GetTziFor(d.Year);
|
|
if tzi.change_time_std.IsZero then
|
|
begin
|
|
HaveDaylight := false;
|
|
Bias := tzi.Bias + tzi.bias_std;
|
|
end
|
|
else
|
|
begin
|
|
HaveDaylight := true;
|
|
std := tzi.change_time_std.EncodeForTimeChange(d.Year);
|
|
dlt := tzi.change_time_dlt.EncodeForTimeChange(d.Year);
|
|
if ValueIsUtc then
|
|
begin
|
|
// STD shifts by the DLT bias to convert to UTC
|
|
std := ((std * MinsPerDay) + tzi.Bias + tzi.bias_dlt) / MinsPerDay;
|
|
// DLT shifts by the STD bias
|
|
dlt := ((dlt * MinsPerDay) + tzi.Bias + tzi.bias_std) / MinsPerDay;
|
|
end;
|
|
if std < dlt then
|
|
if (std <= Value) and
|
|
(Value < dlt) then
|
|
Bias := tzi.Bias + tzi.bias_std
|
|
else
|
|
Bias := tzi.Bias + tzi.bias_dlt
|
|
else if (dlt <= Value) and
|
|
(Value < std) then
|
|
Bias := tzi.Bias + tzi.bias_dlt
|
|
else
|
|
Bias := tzi.Bias + tzi.bias_std;
|
|
end;
|
|
result := true;
|
|
finally
|
|
fSafe.ReadUnLock;
|
|
end;
|
|
end;
|
|
|
|
function TSynTimeZone.UtcToLocal(const UtcDateTime: TDateTime;
|
|
const TzId: TTimeZoneID): TDateTime;
|
|
var
|
|
Bias: integer;
|
|
HaveDaylight: boolean;
|
|
begin
|
|
if (self = nil) or
|
|
(TzId = '') then
|
|
result := UtcDateTime
|
|
else
|
|
begin
|
|
GetBiasForDateTime(UtcDateTime, TzId, Bias, HaveDaylight, {fromutc=}true);
|
|
result := ((UtcDateTime * MinsPerDay) - Bias) / MinsPerDay;
|
|
end;
|
|
end;
|
|
|
|
function TSynTimeZone.NowToLocal(const TzId: TTimeZoneID): TDateTime;
|
|
begin
|
|
result := UtcToLocal(NowUtc, TzId);
|
|
end;
|
|
|
|
function TSynTimeZone.LocalToUtc(const LocalDateTime: TDateTime;
|
|
const TzID: TTimeZoneID): TDateTime;
|
|
var
|
|
Bias: integer;
|
|
HaveDaylight: boolean;
|
|
begin
|
|
if (self = nil) or
|
|
(TzID = '') then
|
|
result := LocalDateTime
|
|
else
|
|
begin
|
|
GetBiasForDateTime(LocalDateTime, TzID, Bias, HaveDaylight);
|
|
result := ((LocalDateTime * MinsPerDay) + Bias) / MinsPerDay;
|
|
end;
|
|
end;
|
|
|
|
function TSynTimeZone.Ids: TStrings;
|
|
var
|
|
i: PtrInt;
|
|
begin
|
|
if fIDs = nil then
|
|
begin
|
|
fIDs := TStringList.Create;
|
|
fSafe.ReadLock;
|
|
for i := 0 to length(fZone) - 1 do
|
|
fIDs.Add(Utf8ToString(RawUtf8(fZone[i].id)));
|
|
fSafe.ReadUnLock;
|
|
end;
|
|
result := fIDs;
|
|
end;
|
|
|
|
function TSynTimeZone.Displays: TStrings;
|
|
var
|
|
i: PtrInt;
|
|
begin
|
|
if fDisplays = nil then
|
|
begin
|
|
fDisplays := TStringList.Create;
|
|
fSafe.ReadLock;
|
|
for i := 0 to length(fZone) - 1 do
|
|
fDisplays.Add(Utf8ToString(fZone[i].display));
|
|
fSafe.ReadUnLock;
|
|
end;
|
|
result := fDisplays;
|
|
end;
|
|
|
|
|
|
function GetBiasForDateTime(const Value: TDateTime; const TzId: TTimeZoneID;
|
|
out Bias: integer; out HaveDaylight: boolean; ValueIsUtc: boolean): boolean;
|
|
begin
|
|
result := TSynTimeZone.Default.
|
|
GetBiasForDateTime(Value, TzId, Bias, HaveDaylight, ValueIsUtc);
|
|
end;
|
|
|
|
function GetDisplay(const TzId: TTimeZoneID): RawUtf8;
|
|
begin
|
|
result := TSynTimeZone.Default.GetDisplay(TzId);
|
|
end;
|
|
|
|
function UtcToLocal(const UtcDateTime: TDateTime; const TzId: TTimeZoneID): TDateTime;
|
|
begin
|
|
result := TSynTimeZone.Default.UtcToLocal(UtcDateTime, TzId);
|
|
end;
|
|
|
|
function NowToLocal(const TzId: TTimeZoneID): TDateTime;
|
|
begin
|
|
result := TSynTimeZone.Default.NowToLocal(TzId);
|
|
end;
|
|
|
|
function LocalToUtc(const LocalDateTime: TDateTime; const TzID: TTimeZoneID): TDateTime;
|
|
begin
|
|
result := TSynTimeZone.Default.LocalToUtc(LocalDateTime, TzId);
|
|
end;
|
|
|
|
|
|
end.
|