/// Framework Core Text, Binary and Time Search Engines // - this unit is a part of the Open Source Synopse mORMot framework 2, // licensed under a MPL/GPL/LGPL three license - see LICENSE.md unit mormot.core.search; { ***************************************************************************** Several Indexing and Search Engines, as used by other parts of the framework - Files Search in Folders - ScanUtf8, GLOB and SOUNDEX Text Search - Efficient CSV Parsing using RTTI - Versatile Expression Search Engine - Bloom Filter Probabilistic Index - Binary Buffers Delta Compression - TDynArray Low-Level Binary Search and Iteration - TSynFilter and TSynValidate Processing Classes - Cross-Platform TSynTimeZone Time Zones ***************************************************************************** } interface {$I mormot.defines.inc} uses classes, sysutils, mormot.core.base, mormot.core.os, mormot.core.rtti, mormot.core.unicode, mormot.core.text, mormot.core.buffers, mormot.core.datetime, mormot.core.data, mormot.core.json; { ****************** Files Search in Folders } type {$A-} /// define one file found result item, as returned by FindFiles() // - Delphi "object" is buggy on stack -> also defined as record with methods {$ifdef USERECORDWITHMETHODS} TFindFiles = record {$else} TFindFiles = object {$endif USERECORDWITHMETHODS} public /// the matching file name // - including its folder name unless ffoExcludesDir is set Name: TFileName; /// the matching file attributes Attr: integer; /// the matching file size Size: Int64; /// the matching file local date/time Timestamp: TDateTime; /// fill the item properties from a FindFirst/FindNext's TSearchRec procedure FromSearchRec(const Directory: TFileName; const F: TSearchRec); /// returns some ready-to-be-loggued text function ToText: ShortString; end; {$A+} /// result list, as returned by FindFiles() TFindFilesDynArray = array of TFindFiles; /// one optional feature of FindFiles() // - ffoSortByName will sort the result files by extension then name // - ffoExcludesDir won't include the path in TFindFiles.Name // - ffoSubFolder will search within nested folders TFindFilesOption = ( ffoSortByName, ffoExcludesDir, ffoSubFolder); /// the optional features of FindFiles() TFindFilesOptions = set of TFindFilesOption; /// search for matching files by names // - just an enhanced wrapper around FindFirst/FindNext with some options // - you may specify several masks in Mask, e.g. as '*.jpg;*.jpeg' function FindFiles(const Directory: TFileName; const Mask: TFileName = FILES_ALL; const IgnoreFileName: TFileName = ''; Options: TFindFilesOptions = []): TFindFilesDynArray; /// search for matching file names // - just a wrapper around FindFilesDynArrayToFileNames(FindFiles()) function FileNames(const Directory: TFileName; const Mask: TFileName = FILES_ALL; Options: TFindFilesOptions = []; const IgnoreFileName: TFileName = ''): TFileNameDynArray; overload; /// search for matching file names from path-delimited content // - is a wrapper around FindFileNames(MakePath()) function FileNames(const Path: array of const; const Mask: TFileName = FILES_ALL; Options: TFindFilesOptions = []): TFileNameDynArray; overload; /// convert a result list, as returned by FindFiles(), into an array of Files[].Name function FindFilesDynArrayToFileNames(const Files: TFindFilesDynArray): TFileNameDynArray; /// sort a FindFiles() result list by its TFindFiles[].Timestamp field procedure FindFilesSortByTimestamp(var Files: TFindFilesDynArray); type /// one optional feature of SynchFolders() // - process recursively nested folders if sfoSubFolder is included // - use file content instead of file date check if sfoByContent is included // - display synched file name on console if sfoWriteFileNameToConsole is included TSynchFoldersOption = ( sfoSubFolder, sfoByContent, sfoWriteFileNameToConsole); /// the optional features of SynchFolders() TSynchFoldersOptions = set of TSynchFoldersOption; /// ensure all files in Dest folder(s) do match the one in Reference // - won't copy all files from Reference folders, but will update files already // existing in Dest, which did change since last synchronization // - file copy will use in-memory loading, so won't work well with huge files // - returns the number of files copied during the process function SynchFolders(const Reference, Dest: TFileName; Options: TSynchFoldersOptions = []): integer; /// copy all files from a source folder to a destination folder // - will copy only new or changed files, keeping existing identical files // - file copy will use stream loading, so would cope with huge files // - returns the number of fields copied during the process, -1 on error function CopyFolder(const Source, Dest: TFileName; Options: TSynchFoldersOptions = []): integer; { ****************** ScanUtf8, GLOB and SOUNDEX Text Search } /// read and store text into values[] according to fmt specifiers // - %d as PInteger, %D as PInt64, %u as PCardinal, %U as PQWord, %f as PDouble, // %F as PCurrency, %x as 8 hexa chars to PInteger, %X as 16 hexa chars to PInt64, // %s as PShortString (UTF-8 encoded), %S as PRawUtf8, %L as PRawUtf8 (getting // all text until the end of the line) // - optionally, specifiers and any whitespace separated identifiers may be // extracted and stored into the ident[] array, e.g. '%dFirstInt %s %DOneInt64' // will store ['dFirstInt','s','DOneInt64'] into ident[] dynamic array function ScanUtf8(const text, fmt: RawUtf8; const values: array of pointer; ident: PRawUtf8DynArray = nil): integer; overload; /// read text from P/PLen and store it into values[] according to fmt specifiers function ScanUtf8(P: PUtf8Char; PLen: PtrInt; const fmt: RawUtf8; const values: array of pointer; ident: PRawUtf8DynArray): integer; overload; type PMatch = ^TMatch; // used when inlining TMatch.Match TMatchSearchFunction = function(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; /// low-level structure used by IsMatch() for actual GLOB search // - you can use this object to prepare a given pattern, e.g. in a loop // - implemented as a fast brute-force state-machine without any heap allocation // - some common patterns ('exactmatch', 'startwith*', '*endwith', '*contained*') // are handled with dedicated code, optionally with case-insensitive search // - PrepareContains() is the most efficient method for '*contained*' search // - consider using TMatchs (or SetMatchs/TMatchDynArray) if you expect to // search for several patterns, or even TExprParserMatch for expression search {$ifdef USERECORDWITHMETHODS} TMatch = record {$else} TMatch = object {$endif USERECORDWITHMETHODS} private Pattern, Text: PUtf8Char; P, T, PMax, TMax: PtrInt; Upper: PNormTable; State: (sNONE, sABORT, sEND, sLITERAL, sPATTERN, sRANGE, sVALID); procedure MatchAfterStar; procedure MatchMain; public /// published for proper inlining Search: TMatchSearchFunction; /// initialize the internal fields for a given glob search pattern // - note that the aPattern instance should remain in memory, since it will // be pointed to by the PatternText private field of this object procedure Prepare(const aPattern: RawUtf8; aCaseInsensitive, aReuse: boolean); overload; {$ifdef HASINLINE}inline;{$endif} /// initialize the internal fields for a given glob search pattern // - note that the aPattern buffer should remain in memory, since it will // be pointed to by the PatternText private field of this object procedure Prepare(aPattern: PUtf8Char; aPatternLen: integer; aCaseInsensitive, aReuse: boolean); overload; /// initialize low-level internal fields for'*aPattern*' search // - this method is faster than a regular Prepare('*' + aPattern + '*'), // since it may use the SBNDMQ2 algorithm for patterns of length 2..31 // - warning: the supplied aPattern variable may be modified in-place to be // filled with some lookup buffer, when SBNDMQ2 is triggered procedure PrepareContains(var aPattern: RawUtf8; aCaseInsensitive: boolean); overload; /// initialize low-level internal fields for a custom search algorithm procedure PrepareRaw(aPattern: PUtf8Char; aPatternLen: integer; aSearch: TMatchSearchFunction); /// returns TRUE if the supplied content matches the prepared glob pattern // - this method is not thread-safe function Match(const aText: RawUtf8): boolean; overload; {$ifdef FPC} inline;{$endif} /// returns TRUE if the supplied content matches the prepared glob pattern // - this method is not thread-safe function Match(aText: PUtf8Char; aTextLen: PtrInt): boolean; overload; {$ifdef HASINLINE}inline;{$endif} /// returns TRUE if the supplied content matches the prepared glob pattern // - this method IS thread-safe, and won't lock function MatchThreadSafe(const aText: RawUtf8): boolean; /// returns TRUE if the supplied VCL/LCL content matches the prepared glob pattern // - this method IS thread-safe, will use on-stack UTF-8 temporary conversion // if possible, and won't lock function MatchString(const aText: string): boolean; /// returns TRUE if this search pattern matches another function Equals(const aAnother: TMatch): boolean; {$ifdef HASINLINE}inline;{$endif} /// access to the pattern length as stored in PMax + 1 function PatternLength: integer; {$ifdef HASINLINE}inline;{$endif} /// access to the pattern text as stored in Pattern function PatternText: PUtf8Char; {$ifdef HASINLINE}inline;{$endif} /// check if the pattern search was defined as case-insensitive function CaseInsensitive: boolean; {$ifdef HASINLINE}inline;{$endif} end; /// stores an array of GLOB search engines // - use SetMatchs() to initialize such an array from a CSV pattern text TMatchDynArray = array of TMatch; /// TMatch descendant owning a copy of the Pattern string to avoid GPF issues TMatchStore = record /// access to the research criteria // - defined as a nested record (and not an object) to circumvent Delphi bug Pattern: TMatch; /// Pattern.Pattern PUtf8Char will point to this instance PatternInstance: RawUtf8; end; TMatchStoreDynArray = array of TMatchStore; /// stores several TMatch instances, from a set of glob patterns TMatchs = class(TSynPersistent) protected fMatch: TMatchStoreDynArray; fMatchCount: integer; public /// add once some glob patterns to the internal TMach list // - aPatterns[] follows the IsMatch() syntax constructor Create(const aPatterns: TRawUtf8DynArray; CaseInsensitive: boolean); reintroduce; overload; /// add once some glob patterns to the internal TMach list // - aPatterns[] follows the IsMatch() syntax procedure Subscribe(const aPatterns: TRawUtf8DynArray; CaseInsensitive: boolean); overload; virtual; /// add once some glob patterns to the internal TMach list // - each CSV item in aPatterns follows the IsMatch() syntax procedure Subscribe(const aPatternsCsv: RawUtf8; CaseInsensitive: boolean); overload; /// search patterns in the supplied UTF-8 text // - returns -1 if no filter has been subscribed // - returns -2 if there is no match on any previous pattern subscription // - returns fMatch[] index, i.e. >= 0 number on first matching pattern // - this method is thread-safe function Match(const aText: RawUtf8): integer; overload; {$ifdef HASINLINE}inline;{$endif} /// search patterns in the supplied UTF-8 text buffer function Match(aText: PUtf8Char; aLen: integer): integer; overload; /// search patterns in the supplied RTL string text // - could be used on a TFileName for instance // - will avoid any memory allocation if aText is small enough function MatchString(const aText: string): integer; end; /// store a decoded URI as full path and file/resource name {$ifdef USERECORDWITHMETHODS} TUriMatchName = record {$else} TUriMatchName = object {$endif USERECORDWITHMETHODS} public Path, Name: TValuePUtf8Char; /// to be called once Path has been populated to compute Name procedure ParsePath; end; /// efficient GLOB path or resource name lockup for an URI // - using mORMot fast TMatch engine {$ifdef USERECORDWITHMETHODS} TUriMatch = record {$else} TUriMatch = object {$endif USERECORDWITHMETHODS} private Init: TLightLock; Names, Paths: TMatchDynArray; procedure DoInit(csv: PUtf8Char; caseinsensitive: boolean); public /// main entry point of the GLOB resource/path URI pattern matching // - will thread-safe initialize the internal TMatch instances if necessary function Check(const csv: RawUtf8; const uri: TUriMatchName; caseinsensitive: boolean): boolean; end; /// fill the Match[] dynamic array with all glob patterns supplied as CSV // - returns how many patterns have been set in Match[|] // - note that the CsvPattern instance should remain in memory, since it will // be pointed to by the Match[].Pattern private field function SetMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean; out Match: TMatchDynArray; CsvSep: AnsiChar = ','): integer; overload; /// fill the Match[0..MatchMax] static array with all glob patterns supplied as CSV // - note that the CsvPattern instance should remain in memory, since it will // be pointed to by the Match[].Pattern private field function SetMatchs(CsvPattern: PUtf8Char; CaseInsensitive: boolean; Match: PMatch; MatchMax: integer; CsvSep: AnsiChar = ','): integer; overload; /// fill a TMatch instance with the next glob pattern supplied as CSV function SetNextMatch(P: PUtf8Char; var Dest: TMatch; CaseInsensitive, Reuse: boolean; CsvSep: AnsiChar): PUtf8Char; /// search if one TMach is already registered in the Several[] dynamic array function MatchExists(const One: TMatch; const Several: TMatchDynArray): boolean; /// add one TMach if not already registered in the Several[] dynamic array function MatchAdd(const One: TMatch; var Several: TMatchDynArray): boolean; /// allocate one TMach in the Several[] dynamic array function MatchNew(var Several: TMatchDynArray): PMatch; /// returns TRUE if Match=nil or if any Match[].Match(Text) is TRUE function MatchAny(const Match: TMatchDynArray; const Text: RawUtf8): boolean; overload; {$ifdef HASINLINE} inline; {$endif} /// returns TRUE if Match=nil or if any Match[].Match(Text, TextLen) is TRUE function MatchAny(Match: PMatch; Text: PUtf8Char; TextLen: PtrInt): boolean; overload; /// apply the CSV-supplied glob patterns to an array of RawUtf8 // - any text not matching the pattern will be deleted from the array // - the patterns are specified as CSV, separated by ',' procedure FilterMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean; var Values: TRawUtf8DynArray; CsvSep: AnsiChar = ','); overload; /// apply the CSV-supplied glob patterns to an array of string // - any text not matching the pattern will be deleted from the array // - the patterns are specified as CSV, separated by ',' procedure FilterMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean; var Values: TStringDynArray; CsvSep: AnsiChar = ','); overload; /// return TRUE if the supplied content matches a glob pattern // - ? Matches any single character // - * Matches any contiguous characters // - [abc] Matches a or b or c at that position // - [^abc] Matches anything but a or b or c at that position // - [!abc] Matches anything but a or b or c at that position // - [a-e] Matches a through e at that position // - [abcx-z] Matches a or b or c or x or y or or z, as does [a-cx-z] // - 'ma?ch.*' would match match.exe, mavch.dat, march.on, etc.. // - 'this [e-n]s a [!zy]est' would match 'this is a test', but would not // match 'this as a test' nor 'this is a zest' // - consider using TMatch or TMatchs if you expect to reuse the pattern function IsMatch(const Pattern, Text: RawUtf8; CaseInsensitive: boolean = false): boolean; /// return TRUE if the supplied content matches a glob pattern, using RTL strings // - is a wrapper around IsMatch() with fast UTF-8 conversion function IsMatchString(const Pattern, Text: string; CaseInsensitive: boolean = false): boolean; /// return TRUE if the supplied content matches one or several glob patterns // - the patterns are specified as CSV, separated by ',' function IsMatchs(const CsvPattern, Text: RawUtf8; CaseInsensitive: boolean = false; CsvSep: AnsiChar = ','): boolean; overload; /// return TRUE if the supplied content matches one or several glob patterns // - the patterns are specified as CSV, separated by ',' function IsMatchs(CsvPattern, Text: PUtf8Char; TextLen: PtrInt; CaseInsensitive: boolean = false; CsvSep: AnsiChar = ','): boolean; overload; type /// available pronunciations for our fast Soundex implementation TSynSoundExPronunciation = ( sndxEnglish, sndxFrench, sndxSpanish, sndxNone); TSoundExValues = array[0..ord('Z') - ord('B')] of byte; PSoundExValues = ^TSoundExValues; PSynSoundEx = ^TSynSoundEx; /// fast search of a text value, using the Soundex approximation mechanism // - Soundex is a phonetic algorithm for indexing names by sound, // as pronounced in a given language. The goal is for homophones to be // encoded to the same representation so that they can be matched despite // minor differences in spelling // - this implementation is very fast and can be used e.g. to parse and search // in a huge text buffer // - this version also handles french and spanish pronunciations on request, // which differs from default Soundex, i.e. English {$ifdef USERECORDWITHMETHODS} TSynSoundEx = record {$else} TSynSoundEx = object {$endif USERECORDWITHMETHODS} private Search, FirstChar: cardinal; fValues: PSoundExValues; public /// prepare for a Soundex search // - you can specify another language pronunciation than default english function Prepare(UpperValue: PAnsiChar; Lang: TSynSoundExPronunciation = sndxEnglish): boolean; overload; /// prepare for a custom Soundex search // - you can specify any language pronunciation from raw TSoundExValues array function Prepare(UpperValue: PAnsiChar; Lang: PSoundExValues): boolean; overload; /// return true if prepared value is contained in a text buffer // (UTF-8 encoded), by using the SoundEx comparison algorithm // - search prepared value at every word beginning in U^ function Utf8(U: PUtf8Char): boolean; /// return true if prepared value is contained in a ANSI text buffer // by using the SoundEx comparison algorithm // - search prepared value at every word beginning in A^ function Ansi(A: PAnsiChar): boolean; end; /// Retrieve the Soundex value of a text word, from Ansi buffer // - Return the soundex value as an easy to use cardinal value, 0 if the // incoming string contains no valid word // - if next is defined, its value is set to the end of the encoded word // (so that you can call again this function to encode a full sentence) function SoundExAnsi(A: PAnsiChar; next: PPAnsiChar = nil; Lang: TSynSoundExPronunciation = sndxEnglish): cardinal; overload; /// Retrieve the Soundex value of a text word, from Ansi buffer // - Return the soundex value as an easy to use cardinal value, 0 if the // incoming string contains no valid word // - if next is defined, its value is set to the end of the encoded word // (so that you can call again this function to encode a full sentence) function SoundExAnsi(A: PAnsiChar; next: PPAnsiChar; Lang: PSoundExValues): cardinal; overload; /// Retrieve the Soundex value of a text word, from UTF-8 buffer // - Return the soundex value as an easy to use cardinal value, 0 if the // incoming string contains no valid word // - if next is defined, its value is set to the end of the encoded word // (so that you can call again this function to encode a full sentence) // - very fast: all UTF-8 decoding is handled on the fly function SoundExUtf8(U: PUtf8Char; next: PPUtf8Char = nil; Lang: TSynSoundExPronunciation = sndxEnglish): cardinal; const /// number of bits to use for each interesting soundex char // - default is to use 8-bit, i.e. 4 soundex chars, which is the // standard approach // - for a more detailed soundex, use 4 bits resolution, which will // compute up to 7 soundex chars in a cardinal (that's our choice) SOUNDEX_BITS = 4; { ****************** Efficient CSV Parsing using RTTI } /// parse a CSV buffer into a TDynArray of records using its RTTI fields // - TypeInfo should have proper fields description, e.g. from Delphi 2010 // extended RTTI or mormot.core.rtti.pas' Rtti.RegisterFromText() // - first CSV line has headers matching the needed case-insensitive field names // - following CSV lines will be read and parsed into the dynamic array records // - any unknown header name within the RTTI fields will be ignored // - you can optionally intern all RawUtf8 values to reduce memory consumption function TDynArrayLoadCsv(var Value: TDynArray; Csv: PUtf8Char; Intern: TRawUtf8Interning = nil): boolean; /// parse a CSV UTF-8 string into a dynamic array of records using its RTTI fields // - just a wrapper around DynArrayLoadCsv() with a temporary TDynArray function DynArrayLoadCsv(var Value; const Csv: RawUtf8; TypeInfo: PRttiInfo; Intern: TRawUtf8Interning = nil): boolean; { ****************** Versatile Expression Search Engine } type /// exception type used by TExprParser EExprParser = class(ESynException); /// identify an expression search engine node type, as used by TExprParser TExprNodeType = ( entWord, entNot, entOr, entAnd); /// results returned by TExprParserAbstract.Parse method TExprParserResult = ( eprSuccess, eprNoExpression, eprMissingParenthesis, eprTooManyParenthesis, eprMissingFinalWord, eprInvalidExpression, eprUnknownVariable, eprUnsupportedOperator, eprInvalidConstantOrVariable); TParserAbstract = class; /// stores an expression search engine node, as used by TExprParser TExprNode = class(TSynPersistent) protected fNext: TExprNode; fNodeType: TExprNodeType; function Append(node: TExprNode): boolean; public /// initialize a node for the search engine constructor Create(nodeType: TExprNodeType); reintroduce; /// recursively destroys the linked list of nodes (i.e. Next) destructor Destroy; override; /// browse all nodes until Next = nil function Last: TExprNode; /// points to the next node in the parsed tree property Next: TExprNode read fNext; /// what is actually stored in this node property NodeType: TExprNodeType read fNodeType; end; /// abstract class to handle word search, as used by TExprParser TExprNodeWordAbstract = class(TExprNode) protected fOwner: TParserAbstract; fWord: RawUtf8; /// should be set from actual data before TExprParser.Found is called fFound: boolean; function ParseWord: TExprParserResult; virtual; abstract; public /// you should override this virtual constructor for proper initialization constructor Create(aOwner: TParserAbstract; const aWord: RawUtf8); reintroduce; virtual; end; /// class-reference type (metaclass) for a TExprNode // - allow to customize the actual searching process for entWord TExprNodeWordClass = class of TExprNodeWordAbstract; /// parent class of TExprParserAbstract TParserAbstract = class(TSynPersistent) protected fExpression, fCurrentWord, fAndWord, fOrWord, fNotWord: RawUtf8; fCurrent: PUtf8Char; fCurrentError: TExprParserResult; fFirstNode: TExprNode; fWordClass: TExprNodeWordClass; fWords: array of TExprNodeWordAbstract; fWordCount: integer; fNoWordIsAnd: boolean; fFoundStack: array[byte] of boolean; // simple stack-based virtual machine procedure ParseNextCurrentWord; virtual; abstract; function ParseExpr: TExprNode; function ParseFactor: TExprNode; function ParseTerm: TExprNode; procedure Clear; virtual; // override this method to initialize fWordClass and fAnd/Or/NotWord procedure Initialize; virtual; abstract; /// perform the expression search over TExprNodeWord.fFound flags // - warning: caller should check that fFirstNode<>nil (e.g. WordCount>0) function Execute: boolean; {$ifdef HASINLINE}inline;{$endif} public /// initialize an expression parser constructor Create; override; /// finalize the expression parser destructor Destroy; override; /// initialize the parser from a given text expression function Parse(const aExpression: RawUtf8): TExprParserResult; /// try this parser class on a given text expression // - returns '' on success, or an explicit error message (e.g. // 'Missing parenthesis') class function ParseError(const aExpression: RawUtf8): RawUtf8; /// the associated text expression used to define the search property Expression: RawUtf8 read fExpression; /// how many words did appear in the search expression property WordCount: integer read fWordCount; end; /// abstract class to parse a text expression into nodes // - you should inherit this class to provide actual text search // - searched expressions can use parenthesis and &=AND -=WITHOUT +=OR operators, // e.g. '((w1 & w2) - w3) + w4' means ((w1 and w2) without w3) or w4 // - no operator is handled like a AND, e.g. 'w1 w2' = 'w1 & w2' TExprParserAbstract = class(TParserAbstract) protected procedure ParseNextCurrentWord; override; // may be overridden to provide custom words escaping (e.g. handle quotes) procedure ParseNextWord; virtual; procedure Initialize; override; end; /// search expression engine using TMatch for the actual word searches TExprParserMatch = class(TExprParserAbstract) protected fCaseSensitive: boolean; fMatchedLastSet: integer; procedure Initialize; override; public /// initialize the search engine constructor Create(aCaseSensitive: boolean = true); reintroduce; /// returns TRUE if the expression is within the text buffer function Search(aText: PUtf8Char; aTextLen: PtrInt): boolean; overload; /// returns TRUE if the expression is within the text buffer function Search(const aText: RawUtf8): boolean; overload; {$ifdef HASINLINE}inline;{$endif} end; const /// may be used when overriding TExprParserAbstract.ParseNextWord method PARSER_STOPCHAR = ['&', '+', '-', '(', ')']; function ToText(r: TExprParserResult): PShortString; overload; function ToUtf8(r: TExprParserResult): RawUtf8; overload; { ****************** Bloom Filter Probabilistic Index } type /// implements a thread-safe Bloom Filter storage // - a "Bloom Filter" is a space-efficient probabilistic data structure, // that is used to test whether an element is a member of a set. False positive // matches are possible, but false negatives are not. Elements can be added to // the set, but not removed. Typical use cases are to avoid unnecessary // slow disk or network access if possible, when a lot of items are involved. // - memory use is very low, when compared to storage of all values: fewer // than 10 bits per element are required for a 1% false positive probability, // independent of the size or number of elements in the set - for instance, // storing 10,000,000 items presence with 1% of false positive ratio // would consume only 11.5 MB of memory, using 7 hash functions // - use Insert() methods to add an item to the internal bits array, and // Reset() to clear all bits array, if needed // - MayExist() function would check if the supplied item was probably set // - SaveTo() and LoadFrom() methods allow transmission of the bits array, // for a disk/database storage or transmission over a network // - internally, several (hardware-accelerated) crc32c hash functions will be // used, with some random seed values, to simulate several hashing functions; // you can customize the hash function if needed // - all methods are thread-safe, and MayExist can be concurrent (via a TRWLock) TSynBloomFilter = class(TSynPersistent) private fSafe: TRWLock; // need an upgradable lock for TSynBloomFilterDiff fHasher: THasher; fSize: cardinal; fBits: cardinal; fHashFunctions: cardinal; fInserted: cardinal; fFalsePositivePercent: double; fStore: RawByteString; public /// don't call this raw constructor, but its overloads constructor Create; overload; override; /// initialize the internal bits storage for a given number of items // - by default, internal bits array size will be guess from a 1 % false // positive rate - but you may specify another value, to reduce memory use // - this constructor would compute and initialize Bits and HashFunctions // corresponding to the expected false positive ratio // - you can specify a custom hash function if you find that the default // crc32c() has too many collisions: but SaveTo/LoadFrom will be tied to it; // see e.g. CryptCrc32(caMd5/caSha1) from mormot.crypt.secure constructor Create(aSize: integer; aFalsePositivePercent: double = 1; aHasher: THasher = nil); reintroduce; overload; /// initialize the internal bits storage from a SaveTo() binary buffer // - this constructor will initialize the internal bits array calling LoadFrom() // - you can specify a custom hash function to match with the one used before constructor Create(const aSaved: RawByteString; aMagic: cardinal = $B1003F11; aHasher: THasher = nil); reintroduce; overload; /// add an item in the internal bits array storage // - this method is thread-safe procedure Insert(const aValue: RawByteString); overload; /// add an item in the internal bits array storage // - this method is thread-safe procedure Insert(aValue: pointer; aValueLen: integer); overload; virtual; /// clear the internal bits array storage // - you may call this method after some time, if some items may have // been removed, to reduce false positives // - this method is thread-safe procedure Reset; virtual; /// returns TRUE if the supplied items was probably set via Insert() // - some false positive may occur, but not much than FalsePositivePercent // - this method is thread-safe, and allow concurrent calls (via a TRWLock) function MayExist(const aValue: RawByteString): boolean; overload; {$ifdef HASINLINE} inline; {$endif} /// returns TRUE if the supplied items was probably set via Insert() // - some false positive may occur, but not much than FalsePositivePercent // - this method is thread-safe, and allow concurrent calls (via a TRWLock) function MayExist(aValue: pointer; aValueLen: integer): boolean; overload; /// store the internal bits array into a binary buffer // - may be used to transmit or store the state of a dataset, avoiding // to recompute all Insert() at program startup, or to synchronize // networks nodes information and reduce the number of remote requests // - this method is thread-safe, and won't block MayExist (via a TRWLock) function SaveTo(aMagic: cardinal = $B1003F11): RawByteString; overload; /// store the internal bits array into a binary buffer // - may be used to transmit or store the state of a dataset, avoiding // to recompute all Insert() at program startup, or to synchronize // networks nodes information and reduce the number of remote requests // - this method is thread-safe, and won't block MayExist (via a TRWLock) procedure SaveTo(aDest: TBufferWriter; aMagic: cardinal = $B1003F11); overload; /// read the internal bits array from a binary buffer // - as previously serialized by the SaveTo method // - may be used to transmit or store the state of a dataset function LoadFrom(const aSaved: RawByteString; aMagic: cardinal = $B1003F11): boolean; overload; /// read the internal bits array from a binary buffer // - as previously serialized by the SaveTo method // - may be used to transmit or store the state of a dataset function LoadFrom(P: PByte; PLen: integer; aMagic: cardinal = $B1003F11): boolean; overload; virtual; published /// maximum number of items which are expected to be inserted property Size: cardinal read fSize; /// expected percentage (1..100) of false positive results for MayExists() property FalsePositivePercent: double read fFalsePositivePercent; /// number of bits stored in the internal bits array property Bits: cardinal read fBits; /// how many hash functions would be applied for each Insert() property HashFunctions: cardinal read fHashFunctions; /// how many times the Insert() method has been called property Inserted: cardinal read fInserted; end; /// implements a thread-safe differential Bloom Filter storage // - this inherited class is able to compute incremental serialization of // its internal bits array, to reduce network use // - an obfuscated revision counter is used to identify storage history TSynBloomFilterDiff = class(TSynBloomFilter) protected fRevision: Int64; fSnapShotAfterMinutes: cardinal; fSnapshotAfterInsertCount: cardinal; fSnapshotTimestamp: Int64; fSnapshotInsertCount: cardinal; fKnownRevision: Int64; fKnownStore: RawByteString; public /// add an item in the internal bits array storage // - this overloaded thread-safe method would compute fRevision procedure Insert(aValue: pointer; aValueLen: integer); override; /// clear the internal bits array storage // - this overloaded thread-safe method would reset fRevision procedure Reset; override; /// store the internal bits array into an incremental binary buffer // - here the difference from a previous SaveToDiff revision will be computed // - if aKnownRevision is outdated (e.g. if equals 0), the whole bits array // would be returned, and around 10 bits per item would be transmitted // (for 1% false positive ratio) // - incremental retrieval would then return around 10 bytes per newly added // item since the last snapshot reference state (with 1% ratio, i.e. 7 hash // functions) function SaveToDiff(const aKnownRevision: Int64): RawByteString; /// use the current internal bits array state as known revision // - is done the first time SaveToDiff() is called, then after 1/32th of // the filter size has been inserted (see SnapshotAfterInsertCount property), // or after SnapShotAfterMinutes property timeout period procedure DiffSnapshot; /// retrieve the revision number from an incremental binary buffer // - returns 0 if the supplied binary buffer does not match this bloom filter function DiffKnownRevision(const aDiff: RawByteString): Int64; /// read the internal bits array from an incremental binary buffer // - as previously serialized by the SaveToDiff() method // - may be used to transmit or store the state of a dataset // - returns false if the supplied content is incorrect, e.g. if the known // revision is deprecated function LoadFromDiff(const aDiff: RawByteString): boolean; /// the opaque revision number of this internal storage // - is in fact the Unix timestamp shifted by 31 bits, and an incremental // counter: this pattern will allow consistent IDs over several ServPanels property Revision: Int64 read fRevision; /// after how many Insert() the internal bits array storage should be // promoted as known revision // - equals Size div 32 by default property SnapshotAfterInsertCount: cardinal read fSnapshotAfterInsertCount write fSnapshotAfterInsertCount; /// after how many time the internal bits array storage should be // promoted as known revision // - equals 30 minutes by default property SnapShotAfterMinutes: cardinal read fSnapShotAfterMinutes write fSnapShotAfterMinutes; end; /// RLE compression of a memory buffer containing mostly zeros // - will store the number of consecutive zeros instead of plain zero bytes // - used for spare bit sets, e.g. TSynBloomFilter serialization // - will also compute the crc32c of the supplied content // - use ZeroDecompress() to expand the compressed result // - resulting content would be at most 14 bytes bigger than the input // - you may use this function before SynLZ compression procedure ZeroCompress(P: PAnsiChar; Len: integer; Dest: TBufferWriter); /// RLE uncompression of a memory buffer containing mostly zeros // - returns Dest='' if P^ is not a valid ZeroCompress() function result // - used for spare bit sets, e.g. TSynBloomFilter serialization // - will also check the crc32c of the supplied content procedure ZeroDecompress(P: PByte; Len: integer; var Dest: RawByteString); /// RLE compression of XORed memory buffers resulting in mostly zeros // - will perform ZeroCompress(Dest^ := New^ xor Old^) without any temporary // memory allocation // - is used e.g. by TSynBloomFilterDiff.SaveToDiff() in incremental mode // - will also compute the crc32c of the supplied content procedure ZeroCompressXor(New, Old: PAnsiChar; Len: cardinal; Dest: TBufferWriter); /// RLE uncompression and ORing of a memory buffer containing mostly zeros // - will perform Dest^ := Dest^ or ZeroDecompress(P^) without any temporary // memory allocation // - is used e.g. by TSynBloomFilterDiff.LoadFromDiff() in incremental mode // - returns false if P^ is not a valid ZeroCompress/ZeroCompressXor() result // - will also check the crc32c of the supplied content function ZeroDecompressOr(P, Dest: PAnsiChar; Len, DestLen: integer): boolean; { ****************** Binary Buffers Delta Compression } const /// normal pattern search depth for DeltaCompress() // - gives good results on most content DELTA_LEVEL_FAST = 100; /// brutal pattern search depth for DeltaCompress() // - may become very slow, with minor benefit, on huge content DELTA_LEVEL_BEST = 500; /// 2MB as internal chunks/window default size for DeltaCompress() // - will use up to 9 MB of RAM during DeltaCompress() - none in DeltaExtract() DELTA_BUF_DEFAULT = 2 shl 20; /// compute difference of two binary buffers // - returns '=' for equal buffers, or an optimized binary delta // - DeltaExtract() could be used later on to compute New from Old + Delta function DeltaCompress(const New, Old: RawByteString; Level: integer = DELTA_LEVEL_FAST; BufSize: integer = DELTA_BUF_DEFAULT): RawByteString; overload; /// compute difference of two binary buffers // - returns '=' for equal buffers, or an optimized binary delta // - DeltaExtract() could be used later on to compute New from Old function DeltaCompress(New, Old: PAnsiChar; NewSize, OldSize: integer; Level: integer = DELTA_LEVEL_FAST; BufSize: integer = DELTA_BUF_DEFAULT): RawByteString; overload; /// compute difference of two binary buffers // - returns '=' for equal buffers, or an optimized binary delta // - DeltaExtract() could be used later on to compute New from Old + Delta // - caller should call Freemem(Delta) once finished with the output buffer function DeltaCompress(New, Old: PAnsiChar; NewSize, OldSize: integer; out Delta: PAnsiChar; Level: integer = DELTA_LEVEL_FAST; BufSize: integer = DELTA_BUF_DEFAULT): integer; overload; type /// result of function DeltaExtract() TDeltaError = ( dsSuccess, dsCrcCopy, dsCrcComp, dsCrcBegin, dsCrcEnd, dsCrcExtract, dsFlag, dsLen); /// returns how many bytes a DeltaCompress() result will expand to function DeltaExtractSize(const Delta: RawByteString): integer; overload; /// returns how many bytes a DeltaCompress() result will expand to function DeltaExtractSize(Delta: PAnsiChar): integer; overload; /// apply the delta binary as computed by DeltaCompress() // - decompression don't use any RAM, will perform crc32c check, and is very fast // - return dsSuccess if was uncompressed to aOutUpd as expected function DeltaExtract(const Delta, Old: RawByteString; out New: RawByteString): TDeltaError; overload; /// low-level apply the delta binary as computed by DeltaCompress() // - New should already be allocated with DeltaExtractSize(Delta) bytes // - as such, expect Delta, Old and New to be <> nil, and Delta <> '=' // - return dsSuccess if was uncompressed to aOutUpd as expected function DeltaExtract(Delta, Old, New: PAnsiChar): TDeltaError; overload; function ToText(err: TDeltaError): PShortString; overload; { ****************** TDynArray Low-Level Binary Search and Iteration } /// wrap a simple dynamic array BLOB content as stored by TDynArray.SaveTo // - a "simple" dynamic array contains data with no reference count, e.g. byte, // word, integer, cardinal, Int64, double or Currency // - same as TDynArray.LoadFrom() with no memory allocation nor memory copy: so // is much faster than creating a temporary dynamic array to load the data // - will return nil if no or invalid data, or a pointer to the data // array otherwise, with the items number stored in Count and the individual // element size in ElemSize (e.g. 2 for a TWordDynArray) // - note: mORMot 1.18 Hash32 is not stored any more function SimpleDynArrayLoadFrom(Source: PAnsiChar; aTypeInfo: PRttiInfo; out Count, ElemSize: PtrInt): pointer; /// wrap an integer dynamic array BLOB content as stored by TDynArray.SaveTo // - same as TDynArray.LoadFrom() with no memory allocation nor memory copy: so // is much faster than creating a temporary dynamic array to load the data // - will return nil if no or invalid data, or a pointer to the integer // array otherwise, with the items number stored in Count // - slightly faster than SimpleDynArrayLoadFrom(Source,TypeInfo(TIntegerDynArray),Count) function IntegerDynArrayLoadFrom(Source: PAnsiChar; var Count: integer): PIntegerArray; /// search in a RawUtf8 dynamic array BLOB content as stored by TDynArray.SaveTo // - same as search within TDynArray.LoadFrom() with no memory allocation nor // memory copy: so is much faster // - will return -1 if no match or invalid data, or the matched entry index function RawUtf8DynArrayLoadFromContains(Source: PAnsiChar; Value: PUtf8Char; ValueLen: PtrInt; CaseSensitive: boolean): PtrInt; type /// allows to iterate over a TDynArray.SaveTo binary buffer // - may be used as alternative to TDynArray.LoadFrom, if you don't want // to allocate all items at once, but retrieve items one by one {$ifdef USERECORDWITHMETHODS} TDynArrayLoadFrom = record {$else} TDynArrayLoadFrom = object {$endif USERECORDWITHMETHODS} private ArrayLoad: TRttiBinaryLoad; public /// how many items were saved in the TDynArray.SaveTo binary buffer // - equals -1 if Init() failed to deserialize its header Count: integer; /// the zero-based index of the current item pointed by next Step() call // - is in range 0..Count-1 until Step() returns false Current: integer; /// current read position in the TDynArray.SaveTo binary buffer // - after Step() returned false, points just after the binary buffer, // like a regular TDynArray.LoadFrom Reader: TFastReader; /// RTTI information of the deserialized dynamic array ArrayRtti: TRttiCustom; /// initialize iteration over a TDynArray.SaveTo binary buffer // - returns true on success, with Count and Position being set // - returns false if the supplied binary buffer is not correct // - you should specify SourceMaxLen to avoid any buffer overflow function Init(ArrayTypeInfo: PRttiInfo; Source: PAnsiChar; SourceMaxLen: PtrInt): boolean; overload; /// initialize iteration over a TDynArray.SaveTo binary buffer // - returns true on success, with Count and Position being set // - returns false if the supplied binary buffer is not correct function Init(ArrayTypeInfo: PRttiInfo; const Source: RawByteString): boolean; overload; /// iterate over the current stored item // - Item should point to a variable of the exact item type stored in this // dynamic array // - returns true if Item was filled with one value, or false if all // items were read, and Position contains the end of the binary buffer function Step(Item: pointer): boolean; /// extract the first field value of the current stored item // - this function won't increase the internal Current pointer // - returns true if Field was filled with one value, or false if all // items were read, and Position contains the end of the binary buffer // - Field is expected to be of ArrayRtti.ArrayFirstField type // - could be called before Step(), to pre-allocate a new item instance, // or update an existing instance function FirstField(Field: pointer): boolean; end; { ****************** TSynFilter and TSynValidate Processing Classes } type TSynFilterOrValidate = class; TSynFilterOrValidateObjArray = array of TSynFilterOrValidate; TSynFilterOrValidateObjArrayArray = array of TSynFilterOrValidateObjArray; /// will define a filter (transformation) or a validation process to be // applied to a database Record content (typically a TOrm) // - the optional associated parameters are to be supplied JSON-encoded TSynFilterOrValidate = class protected fParameters: RawUtf8; /// children must override this method in order to parse the JSON-encoded // parameters, and store it in protected field values procedure SetParameters(const Value: RawUtf8); virtual; public /// add the filter or validation process to a list, checking if not present // - if an instance with the same class type and parameters is already // registered, will call aInstance.Free and return the exising instance // - if there is no similar instance, will add it to the list and return it function AddOnce(var aObjArray: TSynFilterOrValidateObjArray; aFreeIfAlreadyThere: boolean = true): TSynFilterOrValidate; public /// initialize the filter (transformation) or validation instance // - most of the time, optional parameters may be specified as JSON, // possibly with the extended MongoDB syntax constructor Create(const aParameters: RawUtf8 = ''); overload; virtual; /// initialize the filter or validation instance /// - this overloaded constructor will allow to easily set the parameters constructor CreateUtf8(const Format: RawUtf8; const Args, Params: array of const); overload; /// the optional associated parameters, supplied as JSON-encoded property Parameters: RawUtf8 read fParameters write SetParameters; end; /// will define a validation to be applied to a Record (typically a TOrm) // field content // - a typical usage is to validate an email or IP address e.g. // - the optional associated parameters are to be supplied JSON-encoded TSynValidate = class(TSynFilterOrValidate) public /// perform the validation action to the specified value // - the value is expected by be UTF-8 text, as generated by // TPropInfo.GetValue e.g. // - if the validation failed, must return FALSE and put some message in // ErrorMsg (translated into the current language: you could e.g. use // a resourcestring and a SysUtils.Format() call for automatic translation // via the mORMoti18n unit - you can leave ErrorMsg='' to trigger a // generic error message from clas name ('"Validate email" rule failed' // for TSynValidateEmail class e.g.) // - if the validation passed, will return TRUE function Process(aFieldIndex: integer; const Value: RawUtf8; var ErrorMsg: string): boolean; virtual; abstract; end; /// points to a TSynValidate variable // - used e.g. as optional parameter to TOrm.Validate/FilterAndValidate PSynValidate = ^TSynValidate; /// IP v4 address validation to be applied to a Record field content // (typically a TOrm) // - this versions expect no parameter TSynValidateIPAddress = class(TSynValidate) protected public /// perform the IP Address validation action to the specified value function Process(aFieldIndex: integer; const Value: RawUtf8; var ErrorMsg: string): boolean; override; end; /// IP address validation to be applied to a Record field content // (typically a TOrm) // - optional JSON encoded parameters are "AllowedTLD" or "ForbiddenTLD", // expecting a CSV lis of Top-Level-Domain (TLD) names, e.g. // $ '{"AllowedTLD":"com,org,net","ForbiddenTLD":"fr"}' // $ '{AnyTLD:true,ForbiddenDomains:"mailinator.com,yopmail.com"}' // - this will process a validation according to RFC 822 (calling the // IsValidEmail() function) then will check for the TLD to be in one of // the Top-Level domains ('.com' and such) or a two-char country, and // then will check the TLD according to AllowedTLD and ForbiddenTLD TSynValidateEmail = class(TSynValidate) private fAllowedTLD: RawUtf8; fForbiddenTLD: RawUtf8; fForbiddenDomains: RawUtf8; fAnyTLD: boolean; protected /// decode all published properties from their JSON representation procedure SetParameters(const Value: RawUtf8); override; public /// perform the Email Address validation action to the specified value // - call IsValidEmail() function and check for the supplied TLD function Process(aFieldIndex: integer; const Value: RawUtf8; var ErrorMsg: string): boolean; override; /// allow any TLD to be allowed, even if not a generic TLD (.com,.net ...) // - this may be mandatory since already over 1,300 new gTLD names or // "strings" could become available in the next few years: there is a // growing list of new gTLDs available at // @http://newgtlds.icann.org/en/program-status/delegated-strings // - the only restriction is that it should be ascii characters property AnyTLD: boolean read fAnyTLD write fAnyTLD; /// a CSV list of allowed TLD // - if accessed directly, should be set as lower case values // - e.g. 'com,org,net' property AllowedTLD: RawUtf8 read fAllowedTLD write fAllowedTLD; /// a CSV list of forbidden TLD // - if accessed directly, should be set as lower case values // - e.g. 'fr' property ForbiddenTLD: RawUtf8 read fForbiddenTLD write fForbiddenTLD; /// a CSV list of forbidden domain names // - if accessed directly, should be set as lower case values // - not only the TLD, but whole domains like 'cracks.ru,hotmail.com' or such property ForbiddenDomains: RawUtf8 read fForbiddenDomains write fForbiddenDomains; end; /// glob case-sensitive pattern validation of a Record field content // - parameter is NOT JSON encoded, but is some basic TMatch glob pattern // - ? Matches any single character // - * Matches any contiguous characters // - [abc] Matches a or b or c at that position // - [^abc] Matches anything but a or b or c at that position // - [!abc] Matches anything but a or b or c at that position // - [a-e] Matches a through e at that position // - [abcx-z] Matches a or b or c or x or y or or z, as does [a-cx-z] // - 'ma?ch.*' would match match.exe, mavch.dat, march.on, etc.. // - 'this [e-n]s a [!zy]est' would match 'this is a test', but would not // match 'this as a test' nor 'this is a zest' // - pattern check IS case sensitive (TSynValidatePatternI is not) // - this class is not as complete as PCRE regex for example, // but code overhead is very small, and speed good enough in practice TSynValidatePattern = class(TSynValidate) protected fMatch: TMatch; procedure SetParameters(const Value: RawUtf8); override; public /// perform the pattern validation to the specified value // - pattern can be e.g. '[0-9][0-9]:[0-9][0-9]:[0-9][0-9]' // - this method will implement both TSynValidatePattern and // TSynValidatePatternI, checking the current class function Process(aFieldIndex: integer; const Value: RawUtf8; var ErrorMsg: string): boolean; override; end; /// glob case-insensitive pattern validation of a text field content // (typically a TOrm) // - parameter is NOT JSON encoded, but is some basic TMatch glob pattern // - same as TSynValidatePattern, but is NOT case sensitive TSynValidatePatternI = class(TSynValidatePattern); /// text validation to ensure that to any text field would not be '' TSynValidateNonVoidText = class(TSynValidate) public /// perform the non void text validation action to the specified value function Process(aFieldIndex: integer; const Value: RawUtf8; var ErrorMsg: string): boolean; override; end; TSynValidateTextProps = array[0..15] of cardinal; {$M+} // to have existing RTTI for published properties /// text validation to be applied to any Record field content // - default MinLength value is 1, MaxLength is maxInt: so a blank // TSynValidateText.Create('') is the same as TSynValidateNonVoidText // - MinAlphaCount, MinDigitCount, MinPunctCount, MinLowerCount and // MinUpperCount allow you to specify the minimal count of respectively // alphabetical [a-zA-Z], digit [0-9], punctuation [_!;.,/:?%$="#@(){}+-*], // lower case or upper case characters // - expects optional JSON parameters of the allowed text length range as // $ '{"MinLength":5,"MaxLength":10,"MinAlphaCount":1,"MinDigitCount":1, // $ "MinPunctCount":1,"MinLowerCount":1,"MinUpperCount":1} TSynValidateText = class(TSynValidate) private /// used to store all associated validation properties by index fProps: TSynValidateTextProps; fUtf8Length: boolean; protected /// use sInvalidTextChar resourcestring to create a translated error message procedure SetErrorMsg(fPropsIndex, InvalidTextIndex, MainIndex: integer; var result: string); /// decode "MinLength", "MaxLength", and other parameters into fProps[] procedure SetParameters(const Value: RawUtf8); override; public /// perform the text length validation action to the specified value function Process(aFieldIndex: integer; const Value: RawUtf8; var ErrorMsg: string): boolean; override; published /// Minimal length value allowed for the text content // - the length is calculated with UTF-16 Unicode codepoints, unless // Utf8Length has been set to TRUE so that the UTF-8 byte count is checked // - default is 1, i.e. a void text will not pass the validation property MinLength: cardinal read fProps[0] write fProps[0]; /// Maximal length value allowed for the text content // - the length is calculated with UTF-16 Unicode codepoints, unless // Utf8Length has been set to TRUE so that the UTF-8 byte count is checked // - default is maxInt, i.e. no maximum length is set property MaxLength: cardinal read fProps[1] write fProps[1]; /// Minimal alphabetical character [a-zA-Z] count // - default is 0, i.e. no minimum set property MinAlphaCount: cardinal read fProps[2] write fProps[2]; /// Maximal alphabetical character [a-zA-Z] count // - default is maxInt, i.e. no Maximum set property MaxAlphaCount: cardinal read fProps[10] write fProps[10]; /// Minimal digit character [0-9] count // - default is 0, i.e. no minimum set property MinDigitCount: cardinal read fProps[3] write fProps[3]; /// Maximal digit character [0-9] count // - default is maxInt, i.e. no Maximum set property MaxDigitCount: cardinal read fProps[11] write fProps[11]; /// Minimal punctuation sign [_!;.,/:?%$="#@(){}+-*] count // - default is 0, i.e. no minimum set property MinPunctCount: cardinal read fProps[4] write fProps[4]; /// Maximal punctuation sign [_!;.,/:?%$="#@(){}+-*] count // - default is maxInt, i.e. no Maximum set property MaxPunctCount: cardinal read fProps[12] write fProps[12]; /// Minimal alphabetical lower case character [a-z] count // - default is 0, i.e. no minimum set property MinLowerCount: cardinal read fProps[5] write fProps[5]; /// Maximal alphabetical lower case character [a-z] count // - default is maxInt, i.e. no Maximum set property MaxLowerCount: cardinal read fProps[13] write fProps[13]; /// Minimal alphabetical upper case character [A-Z] count // - default is 0, i.e. no minimum set property MinUpperCount: cardinal read fProps[6] write fProps[6]; /// Maximal alphabetical upper case character [A-Z] count // - default is maxInt, i.e. no Maximum set property MaxUpperCount: cardinal read fProps[14] write fProps[14]; /// Minimal space count inside the value text // - default is 0, i.e. any space number allowed property MinSpaceCount: cardinal read fProps[7] write fProps[7]; /// Maximal space count inside the value text // - default is maxInt, i.e. any space number allowed property MaxSpaceCount: cardinal read fProps[15] write fProps[15]; /// Maximal space count allowed on the Left side // - default is maxInt, i.e. any Left space allowed property MaxLeftTrimCount: cardinal read fProps[8] write fProps[8]; /// Maximal space count allowed on the Right side // - default is maxInt, i.e. any Right space allowed property MaxRightTrimCount: cardinal read fProps[9] write fProps[9]; /// defines if lengths parameters expects UTF-8 or UTF-16 codepoints number // - with default FALSE, the length is calculated with UTF-16 Unicode // codepoints - MaxLength may not match the UCS4 CodePoint, in case of // UTF-16 surrogates // - you can set this property to TRUE so that the UTF-8 byte count would // be used for truncation against the MaxLength parameter property Utf8Length: boolean read fUtf8Length write fUtf8Length; end; {$M-} /// strong password validation for a Record field content (typically a TOrm) // - the following parameters are set by default to // $ '{"MinLength":5,"MaxLength":20,"MinAlphaCount":1,"MinDigitCount":1, // $ "MinPunctCount":1,"MinLowerCount":1,"MinUpperCount":1,"MaxSpaceCount":0}' // - you can specify some JSON encoded parameters to change this default // values, which will validate the text field only if it contains from 5 to 10 // characters, with at least one digit, one upper case letter, one lower case // letter, and one punctuation sign, with no space allowed inside TSynValidatePassWord = class(TSynValidateText) protected /// set password specific parameters procedure SetParameters(const Value: RawUtf8); override; end; { C++Builder doesn't support array elements as properties (RSP-12595). For now, simply exclude the relevant classes from C++Builder. } {$NODEFINE TSynValidateTextProps} {$NODEFINE TSynValidateText } {$NODEFINE TSynValidatePassWord } /// will define a transformation to be applied to a Record field content // (typically a TOrm) // - here "filter" means that content would be transformed according to a // set of defined rules // - a typical usage is to convert to lower or upper case, or // trim any time or date value in a TDateTime field // - the optional associated parameters are to be supplied JSON-encoded TSynFilter = class(TSynFilterOrValidate) protected public /// perform the transformation to the specified value // - the value is converted into UTF-8 text, as expected by // TPropInfo.GetValue / TPropInfo.SetValue e.g. procedure Process(aFieldIndex: integer; var Value: RawUtf8); virtual; abstract; end; /// class-reference type (metaclass) for a TSynFilter or a TSynValidate TSynFilterOrValidateClass = class of TSynFilterOrValidate; /// class-reference type (metaclass) of a record filter (transformation) TSynFilterClass = class of TSynFilter; /// convert the value into ASCII Upper Case characters // - UpperCase conversion is made for ASCII-7 only, i.e. 'a'..'z' characters // - this version expects no parameter TSynFilterUpperCase = class(TSynFilter) public /// perform the case conversion to the specified value procedure Process(aFieldIndex: integer; var Value: RawUtf8); override; end; /// convert the value into WinAnsi Upper Case characters // - UpperCase conversion is made for all latin characters in the WinAnsi // code page only, e.g. 'e' acute will be converted to 'E' // - this version expects no parameter TSynFilterUpperCaseU = class(TSynFilter) public /// perform the case conversion to the specified value procedure Process(aFieldIndex: integer; var Value: RawUtf8); override; end; /// convert the value into ASCII Lower Case characters // - LowerCase conversion is made for ASCII-7 only, i.e. 'A'..'Z' characters // - this version expects no parameter TSynFilterLowerCase = class(TSynFilter) public /// perform the case conversion to the specified value procedure Process(aFieldIndex: integer; var Value: RawUtf8); override; end; /// convert the value into WinAnsi Lower Case characters // - LowerCase conversion is made for all latin characters in the WinAnsi // code page only, e.g. 'E' acute will be converted to 'e' // - this version expects no parameter TSynFilterLowerCaseU = class(TSynFilter) public /// perform the case conversion to the specified value procedure Process(aFieldIndex: integer; var Value: RawUtf8); override; end; /// trim any space character left or right to the value // - this versions expect no parameter TSynFilterTrim = class(TSynFilter) public /// perform the space trimming conversion to the specified value procedure Process(aFieldIndex: integer; var Value: RawUtf8); override; end; /// truncate a text above a given maximum length // - expects optional JSON parameters of the allowed text length range as // $ '{MaxLength":10} TSynFilterTruncate = class(TSynFilter) protected fMaxLength: cardinal; fUtf8Length: boolean; /// decode the MaxLength: and Utf8Length: parameters procedure SetParameters(const Value: RawUtf8); override; public /// perform the length truncation of the specified value procedure Process(aFieldIndex: integer; var Value: RawUtf8); override; /// Maximal length value allowed for the text content // - the length is calculated with UTF-16 Unicode codepoints, unless // Utf8Length has been set to TRUE so that the UTF-8 byte count is checked // - default is 0, i.e. no maximum length is forced property MaxLength: cardinal read fMaxLength write fMaxLength; /// defines if MaxLength is stored as UTF-8 or UTF-16 codepoints number // - with default FALSE, the length is calculated with UTF-16 Unicode // codepoints - MaxLength may not match the UCS4 CodePoint, in case of // UTF-16 surrogates // - you can set this property to TRUE so that the UTF-8 byte count would // be used for truncation against the MaxLength parameter property Utf8Length: boolean read fUtf8Length write fUtf8Length; end; {$ifdef ISDELPHI} resourcestring {$else} const {$endif ISDELPHI} sInvalidIPAddress = '"%s" is an invalid IP v4 address'; sInvalidEmailAddress = '"%s" is an invalid email address'; sInvalidPattern = '"%s" does not match the expected pattern'; sCharacter01n = 'character,character,characters'; sInvalidTextLengthMin = 'Expect at least %d %s'; sInvalidTextLengthMax = 'Expect up to %d %s'; sInvalidTextChar = 'Expect at least %d %s %s,Expect up to %d %s %s,'+ 'alphabetical,digital,punctuation,lowercase,uppercase,space,'+ 'Too much spaces on the left,Too much spaces on the right'; sValidationFailed = '"%s" rule failed'; sValidationFieldVoid = 'An unique key field must not be void'; sValidationFieldDuplicate = 'Value already used for this unique key field'; /// return TRUE if the supplied content is a valid IP v4 address function IsValidIP4Address(P: PUtf8Char): boolean; /// return TRUE if the supplied content is a valid email address // - follows RFC 822, to validate local-part@domain email format function IsValidEmail(P: PUtf8Char): boolean; { ***************** Cross-Platform TSynTimeZone Time Zones } type {$A-} { make all records packed for cross-platform binary serialization } /// used to store Time Zone bias in TSynTimeZone // - map how low-level information is stored in the Windows Registry TTimeZoneInfo = record Bias: integer; bias_std: integer; bias_dlt: integer; change_time_std: TSynSystemTime; change_time_dlt: TSynSystemTime; end; PTimeZoneInfo = ^TTimeZoneInfo; /// text identifier of a Time Zone, following Microsoft Windows naming TTimeZoneID = type RawUtf8; /// used to store Time Zone information for a single area in TSynTimeZone // - Delphi "object" is buggy on stack -> also defined as record with methods {$ifdef USERECORDWITHMETHODS} TTimeZoneData = record {$else} TTimeZoneData = object {$endif USERECORDWITHMETHODS} public id: TTimeZoneID; display: RawUtf8; tzi: TTimeZoneInfo; dyn: array of packed record year: integer; tzi: TTimeZoneInfo; end; /// search for the TTimeZoneInfo of a given year function GetTziFor(year: integer): PTimeZoneInfo; end; /// used to store the Time Zone information of a TSynTimeZone class TTimeZoneDataDynArray = array of TTimeZoneData; {$A+} /// handle cross-platform time conversions, following Microsoft time zones // - is able to retrieve accurate information from the Windows registry, // or from a binary compressed file on other platforms (which should have been // saved from a Windows system first) // - for Linux/POSIX our mORMot 2 repository supplies a ready-to-use // ! {$R mormot.tz.res} // - each time zone will be identified by its TzId string, as defined by // Microsoft for its Windows Operating system // - note that each instance is thread-safe TSynTimeZone = class protected fSafe: TRWLightLock; fZone: TTimeZoneDataDynArray; fZoneCount: integer; fZones: TDynArrayHashed; fLastZone: TTimeZoneID; fLastIndex: integer; fIds: TStringList; fDisplays: TStringList; function LockedFindZoneIndex(const TzId: TTimeZoneID): PtrInt; public /// initialize the internal storage // - but no data is available, until Load* methods are called constructor Create; /// retrieve the time zones from Windows registry, or from a local file // - under Linux, the file should be located with the executable, renamed // with a .tz extension - may have been created via SaveToFile(''), or // from a 'TSynTimeZone' bound resource // - "dummycpp" parameter exists only to disambiguate constructors for C++ constructor CreateDefault(dummycpp: integer = 0); /// finalize the instance destructor Destroy; override; /// will retrieve the default shared TSynTimeZone instance // - locally created via the CreateDefault constructor // - see also the NowToLocal/LocalToUtc/UtcToLocal global functions class function Default: TSynTimeZone; {$ifdef OSWINDOWS} /// read time zone information from the Windows registry procedure LoadFromRegistry; {$endif OSWINDOWS} /// read time zone information from a compressed file // - if no file name is supplied, a ExecutableName.tz file would be used procedure LoadFromFile(const FileName: TFileName = ''); /// read time zone information from a compressed memory buffer procedure LoadFromBuffer(const Buffer: RawByteString); /// read time zone information from a 'TSynTimeZone' resource // - the resource should contain the SaveToBuffer compressed binary content // - is no resource matching the TSynTimeZone class name and ResType=10 // do exist, nothing would be loaded // - the resource could be created as such, from a Windows system: // ! TSynTimeZone.Default.SaveToFile('TSynTimeZone.data'); // then compile the resource as expected, with a brcc32 .rc entry: // ! TSynTimeZone 10 "TSynTimeZone.data" // - you can specify a library (dll) resource instance handle, if needed // - for Linux/POSIX our mORMot 2 repository supplies a ready-to-use // ! {$R mormot.tz.res} procedure LoadFromResource(Instance: TLibHandle = 0); /// write then time zone information into a compressed file // - if no file name is supplied, a ExecutableName.tz file would be created procedure SaveToFile(const FileName: TFileName); /// write then time zone information into a compressed memory buffer function SaveToBuffer: RawByteString; /// retrieve the time bias (in minutes) for a given date/time on a TzId function GetBiasForDateTime(const Value: TDateTime; const TzId: TTimeZoneID; out Bias: integer; out HaveDaylight: boolean; ValueIsUtc: boolean = false): boolean; /// retrieve the display text corresponding to a TzId // - returns '' if the supplied TzId is not recognized function GetDisplay(const TzId: TTimeZoneID): RawUtf8; /// compute the UTC date/time corrected for a given TzId function UtcToLocal(const UtcDateTime: TDateTime; const TzId: TTimeZoneID): TDateTime; /// compute the current date/time corrected for a given TzId function NowToLocal(const TzId: TTimeZoneID): TDateTime; /// compute the UTC date/time for a given local TzId value // - by definition, a local time may correspond to two UTC times, during the // time bias period, so the returned value is informative only, and any // stored value should be following UTC function LocalToUtc(const LocalDateTime: TDateTime; const TzID: TTimeZoneID): TDateTime; /// direct access to the low-level time zone information property Zone: TTimeZoneDataDynArray read fZone; /// direct access to the wrapper over the time zone information array property Zones: TDynArrayHashed read fZones; /// returns a TStringList of all TzID values // - could be used to fill any UI component to select the time zone // - order in Ids[] array follows the Zone[].id information function Ids: TStrings; /// returns a TStringList of all Display text values // - could be used to fill any UI component to select the time zone // - order in Displays[] array follows the Zone[].display information function Displays: TStrings; end; /// retrieve the time bias (in minutes) for a given date/time on a TzId // - will use a global shared thread-safe TSynTimeZone instance for the request function GetBiasForDateTime(const Value: TDateTime; const TzId: TTimeZoneID; out Bias: integer; out HaveDaylight: boolean; ValueIsUtc: boolean = false): boolean; /// retrieve the display text corresponding to a TzId // - returns '' if the supplied TzId is not recognized // - will use a global shared thread-safe TSynTimeZone instance for the request function GetDisplay(const TzId: TTimeZoneID): RawUtf8; /// compute the UTC date/time corrected for a given TzId // - will use a global shared thread-safe TSynTimeZone instance for the request function UtcToLocal(const UtcDateTime: TDateTime; const TzId: TTimeZoneID): TDateTime; {$ifdef HASINLINE} inline; {$endif} /// compute the current date/time corrected for a given TzId // - will use a global shared thread-safe TSynTimeZone instance for the request function NowToLocal(const TzId: TTimeZoneID): TDateTime; {$ifdef HASINLINE} inline; {$endif} /// compute the UTC date/time for a given local TzId value // - by definition, a local time may correspond to two UTC times, during the // time bias period, so the returned value is informative only, and any // stored value should be following UTC // - will use a global shared thread-safe TSynTimeZone instance for the request function LocalToUtc(const LocalDateTime: TDateTime; const TzID: TTimeZoneID): TDateTime; {$ifdef HASINLINE} inline; {$endif} implementation { ****************** Files Search in Folders } procedure TFindFiles.FromSearchRec(const Directory: TFileName; const F: TSearchRec); begin Name := Directory + TFileName(F.Name); {$ifdef OSWINDOWS} {$ifdef HASINLINE} // FPC or Delphi 2006+ Size := F.Size; {$else} // F.Size was limited to 32-bit on older Delphi PInt64Rec(@Size)^.Lo := F.FindData.nFileSizeLow; PInt64Rec(@Size)^.Hi := F.FindData.nFileSizeHigh; {$endif HASINLINE} {$else} Size := F.Size; {$endif OSWINDOWS} Attr := F.Attr; Timestamp := SearchRecToDateTime(F); end; function TFindFiles.ToText: ShortString; begin FormatShort('% % %', [Name, KB(Size), DateTimeToFileShort(Timestamp)], result); end; function FindFiles(const Directory, Mask, IgnoreFileName: TFileName; Options: TFindFilesOptions): TFindFilesDynArray; var m, count: integer; dir: TFileName; da: TDynArray; masks: TRawUtf8DynArray; masked: TFindFilesDynArray; procedure SearchFolder(const folder: TFileName); var F: TSearchRec; ff: TFindFiles; fold, name: TFileName; // FPC requires these implicit local variables :( begin fold := dir + folder; name := fold + Mask; if FindFirst(name, faAnyfile - faDirectory, F) = 0 then begin repeat if SearchRecValidFile(F) and ((IgnoreFileName = '') or (AnsiCompareFileName(F.Name, IgnoreFileName) <> 0)) then begin if ffoExcludesDir in Options then ff.FromSearchRec(folder, F) else ff.FromSearchRec(fold, F); da.Add(ff); end; until FindNext(F) <> 0; FindClose(F); end; if (ffoSubFolder in Options) and (FindFirst(fold + '*', faDirectory, F) = 0) then begin // recursive SearchFolder() call for nested directories repeat if SearchRecValidFolder(F) and ((IgnoreFileName = '') or (AnsiCompareFileName(F.Name, IgnoreFileName) <> 0)) then SearchFolder(IncludeTrailingPathDelimiter(folder + F.Name)); until FindNext(F) <> 0; FindClose(F); end; end; begin Finalize(result); da.Init(TypeInfo(TFindFilesDynArray), result, @count); if Pos(';', Mask) > 0 then CsvToRawUtf8DynArray(pointer(StringToUtf8(Mask)), masks, ';'); if masks <> nil then begin // recursive calls for each masks[] if ffoSortByName in Options then QuickSortRawUtf8(masks, length(masks), nil, {$ifdef OSWINDOWS} @StrIComp {$else} @StrComp {$endif}); for m := 0 to length(masks) - 1 do begin masked := FindFiles( Directory, Utf8ToString(masks[m]), IgnoreFileName, Options); da.AddArray(masked); end; end else begin // single mask search if Directory <> '' then dir := IncludeTrailingPathDelimiter(Directory); SearchFolder(''); if (ffoSortByName in Options) and (da.Count > 1) then da.Sort(SortDynArrayFileName); end; if count <> 0 then DynArrayFakeLength(result, count); end; function FileNames(const Directory, Mask: TFileName; Options: TFindFilesOptions; const IgnoreFileName: TFileName): TFileNameDynArray; begin result := FindFilesDynArrayToFileNames( FindFiles(Directory, Mask, IgnoreFileName, Options)); end; function FileNames(const Path: array of const; const Mask: TFileName; Options: TFindFilesOptions): TFileNameDynArray; var dir: TFileName; begin dir := MakePath(Path, {endwithdelim=}true); result := FileNames(dir, Mask, Options); end; function FindFilesDynArrayToFileNames(const Files: TFindFilesDynArray): TFileNameDynArray; var i, n: PtrInt; begin Finalize(result); if Files = nil then exit; n := length(Files); SetLength(result, n); for i := 0 to n - 1 do result[i] := Files[i].Name; end; function SortFindFileTimestamp(const A, B): integer; begin result := CompareFloat(TFindFiles(A).Timestamp, TFindFiles(B).Timestamp); end; procedure FindFilesSortByTimestamp(var Files: TFindFilesDynArray); begin DynArray(TypeInfo(TFindFilesDynArray), Files).Sort(SortFindFileTimestamp); end; function SynchFolders(const Reference, Dest: TFileName; Options: TSynchFoldersOptions): integer; var ref, dst, reffn, dstfn: TFileName; fdst: TSearchRec; refsize: Int64; reftime: TUnixMSTime; s: RawByteString; begin result := 0; ref := IncludeTrailingPathDelimiter(Reference); dst := IncludeTrailingPathDelimiter(Dest); if DirectoryExists(ref) and (FindFirst(dst + FILES_ALL, faAnyFile, fdst) = 0) then begin repeat if SearchRecValidFile(fdst) then begin reffn := ref + fdst.Name; if not FileInfoByName(reffn, refsize, reftime) then continue; // only update existing files if not (sfoByContent in Options) then if (refsize = fdst.Size) and (reftime = SearchRecToUnixTimeUtc(fdst)) then continue; dstfn := dst + fdst.Name; s := StringFromFile(reffn); if (s = '') or ((sfoByContent in Options) and (length(s) = fdst.Size) and (DefaultHasher(0, pointer(s), fdst.Size) = HashFile(dstfn))) then continue; FileFromString(s, dstfn); FileSetDateFromUnixUtc(dstfn, reftime div MSecsPerSec); inc(result); if sfoWriteFileNameToConsole in Options then ConsoleWrite('synched %', [dstfn]); end else if (sfoSubFolder in Options) and SearchRecValidFolder(fdst) then inc(result, SynchFolders(ref + fdst.Name, dst + fdst.Name, Options)); until FindNext(fdst) <> 0; FindClose(fdst); end; end; function CopyFolder(const Source, Dest: TFileName; Options: TSynchFoldersOptions): integer; var src, dst, reffn, dstfn: TFileName; sr: TSearchRec; dsize: Int64; dtime: TUnixMSTime; nested: integer; begin result := 0; src := IncludeTrailingPathDelimiter(Source); if not DirectoryExists(src) then exit; dst := EnsureDirectoryExists(Dest); if (dst = '') or (FindFirst(src + FILES_ALL, faAnyFile, sr) <> 0) then exit; repeat reffn := src + sr.Name; dstfn := dst + sr.Name; if SearchRecValidFile(sr) then begin if FileInfoByName(dstfn, dsize, dtime) and // fast single syscall (sr.Size = dsize) then if sfoByContent in Options then begin if SameFileContent(reffn, dstfn) then continue; end else if abs(SearchRecToUnixTimeUtc(sr) * 1000 - dtime) < 1000 then continue; // allow error of 1 second timestamp resolution if not CopyFile(reffn, dstfn, {failsifexists=}false) then result := -1; end else if not SearchRecValidFolder(sr) then continue else if sfoSubFolder in Options then begin nested := CopyFolder(reffn, dstfn, Options); if nested < 0 then result := nested else inc(result, nested); end; if result < 0 then break; inc(result); if sfoWriteFileNameToConsole in Options then ConsoleWrite('copied %', [reffn]); until (FindNext(sr) <> 0); FindClose(sr); end; { ****************** ScanUtf8, GLOB and SOUNDEX Text Search } function ScanUtf8(P: PUtf8Char; PLen: PtrInt; const fmt: RawUtf8; const values: array of pointer; ident: PRawUtf8DynArray): integer; var v, w: PtrInt; F, FEnd, PEnd: PUtf8Char; tab: PTextCharSet; label next; begin result := 0; if (fmt = '') or (P = nil) or (PLen <= 0) or (high(values) < 0) then exit; if ident <> nil then SetLength(ident^, length(values)); F := pointer(fmt); FEnd := F + length(fmt); PEnd := P + PLen; for v := 0 to high(values) do repeat if (P^ <= ' ') and (P^ <> #0) then // ignore any whitespace char in text repeat inc(P); if P = PEnd then exit; until (P^ > ' ') or (P^ = #0); while (F^ <= ' ') and (F^ <> #0) do begin // ignore any whitespace char in fmt inc(F); if F = FEnd then exit; end; if F^ = '%' then begin // handle format specifier inc(F); if F = FEnd then exit; case F^ of 'd': PInteger(values[v])^ := GetNextItemInteger(P, #0); 'D': PInt64(values[v])^ := GetNextItemInt64(P, #0); 'u': PCardinal(values[v])^ := GetNextItemCardinal(P, #0); 'U': PQword(values[v])^ := GetNextItemQword(P, #0); 'f': unaligned(PDouble(values[v])^) := GetNextItemDouble(P, #0); 'F': GetNextItemCurrency(P, PCurrency(values[v])^, #0); 'x': if not GetNextItemHexDisplayToBin(P, values[v], 4, #0) then exit; 'X': if not GetNextItemHexDisplayToBin(P, values[v], 8, #0) then exit; 's', 'S': begin w := 0; while (P[w] > ' ') and (P + w <= PEnd) do inc(w); if F^ = 's' then SetString(PShortString(values[v])^, PAnsiChar(P), w) else FastSetString(PRawUtf8(values[v])^, P, w); inc(P, w); while (P^ <= ' ') and (P^ <> #0) and (P <= PEnd) do inc(P); end; 'L': begin w := 0; tab := @TEXT_CHARS; while (tcNot01013 in tab[P[w]]) and (P + w <= PEnd) do inc(w); FastSetString(PRawUtf8(values[v])^, P, w); inc(P, w); end; '%': goto next; else raise ESynException.CreateUtf8( 'ScanUtf8: unknown ''%'' specifier [%]', [F^, fmt]); end; inc(result); tab := @TEXT_CHARS; if (tcIdentifier in tab[F[1]]) or (ident <> nil) then begin w := 0; repeat inc(w) until not (tcIdentifier in tab[F[w]]) or (F + w = FEnd); if ident <> nil then FastSetString(ident^[v], F, w); inc(F, w); end else inc(F); if (F >= FEnd) or (P >= PEnd) then exit; break; end else begin next: while (P^ <> F^) and (P <= PEnd) do inc(P); inc(F); inc(P); if (F >= FEnd) or (P >= PEnd) then exit; end; until false; end; function ScanUtf8(const text, fmt: RawUtf8; const values: array of pointer; ident: PRawUtf8DynArray): integer; begin result := ScanUtf8(pointer(text), length(text), fmt, values, ident); end; // inspired by ZMatchPattern.pas - http://www.zeoslib.sourceforge.net procedure TMatch.MatchMain; var RangeStart, RangeEnd: PtrInt; c: AnsiChar; flags: set of (Invert, MemberMatch); begin while ((State = sNONE) and (P <= PMax)) do begin c := Upper[Pattern[P]]; if T > TMax then begin if (c = '*') and (P + 1 > PMax) then State := sVALID else State := sABORT; exit; end else case c of '?': ; '*': MatchAfterStar; '[': begin inc(P); byte(flags) := 0; if Pattern[P] in ['!', '^'] then begin include(flags, Invert); inc(P); end; if Pattern[P] = ']' then begin State := sPATTERN; exit; end; c := Upper[Text[T]]; while Pattern[P] <> ']' do begin RangeStart := P; RangeEnd := P; inc(P); if P > PMax then begin State := sPATTERN; exit; end; if Pattern[P] = '-' then begin inc(P); RangeEnd := P; if (P > PMax) or (Pattern[RangeEnd] = ']') then begin State := sPATTERN; exit; end; inc(P); end; if P > PMax then begin State := sPATTERN; exit; end; if RangeStart < RangeEnd then begin if (c >= Upper[Pattern[RangeStart]]) and (c <= Upper[Pattern[RangeEnd]]) then begin include(flags, MemberMatch); break; end; end else if (c >= Upper[Pattern[RangeEnd]]) and (c <= Upper[Pattern[RangeStart]]) then begin include(flags, MemberMatch); break; end; end; if ((Invert in flags) and (MemberMatch in flags)) or not ((Invert in flags) or (MemberMatch in flags)) then begin State := sRANGE; exit; end; if MemberMatch in flags then while (P <= PMax) and (Pattern[P] <> ']') do inc(P); if P > PMax then begin State := sPATTERN; exit; end; end; else if c <> Upper[Text[T]] then State := sLITERAL; end; inc(P); inc(T); end; if State = sNONE then if T <= TMax then State := sEND else State := sVALID; end; procedure TMatch.MatchAfterStar; var retryT, retryP: PtrInt; begin if (TMax = 1) or (P = PMax) then begin State := sVALID; exit; end else if (PMax = 0) or (TMax = 0) then begin State := sABORT; exit; end; while (T <= TMax) and (P < PMax) and (Pattern[P] in ['?', '*']) do begin if Pattern[P] = '?' then inc(T); inc(P); end; if T >= TMax then begin State := sABORT; exit; end else if P >= PMax then begin State := sVALID; exit; end; repeat if (Upper[Pattern[P]] = Upper[Text[T]]) or (Pattern[P] = '[') then begin retryT := T; retryP := P; MatchMain; if State = sVALID then break; State := sNONE; // retry until end of Text, (check below) or State valid T := retryT; P := retryP; end; inc(T); if (T > TMax) or (P > PMax) then begin State := sABORT; exit; end; until State <> sNONE; end; function SearchAny(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin aMatch.State := sNONE; aMatch.P := 0; aMatch.T := 0; aMatch.Text := aText; aMatch.TMax := aTextLen - 1; aMatch.MatchMain; result := aMatch.State = sVALID; end; // faster alternative (without recursion) for only * ? (but not [...]) {$ifdef CPU32} // less registers on this CPU - also circumvent ARM problems (Alf) function SearchNoRange(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; var c: AnsiChar; pat, txt: PtrInt; // use local registers begin aMatch.T := 0; // aMatch.P/T are used for retry positions after * aMatch.Text := aText; aMatch.TMax := aTextLen - 1; pat := 0; txt := 0; repeat if pat <= aMatch.PMax then begin c := aMatch.Pattern[pat]; case c of '?': if txt <= aMatch.TMax then begin inc(pat); inc(txt); continue; end; '*': begin aMatch.P := pat; aMatch.T := txt + 1; inc(pat); continue; end; else if (txt <= aMatch.TMax) and (c = aMatch.Text[txt]) then begin inc(pat); inc(txt); continue; end; end; end else if txt > aMatch.TMax then break; txt := aMatch.T; if (txt > 0) and (txt <= aMatch.TMax + 1) then begin inc(aMatch.T); pat := aMatch.P + 1; continue; end; result := false; exit; until false; result := true; end; {$else} // optimized for x86_64/ARM with more registers function SearchNoRange(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; var c: AnsiChar; pat, patend, txtend, txtretry, patretry: PUtf8Char; label fin; begin pat := pointer(aMatch.Pattern); if pat = nil then goto fin; patend := pat + aMatch.PMax; patretry := nil; txtend := aText + aTextLen - 1; txtretry := nil; repeat if pat <= patend then begin c := pat^; if c <> '*' then if c <> '?' then begin if (aText <= txtend) and (c = aText^) then begin inc(pat); inc(aText); continue; end; end else begin // '?' if aText <= txtend then begin inc(pat); inc(aText); continue; end; end else begin // '*' inc(pat); txtretry := aText + 1; patretry := pat; continue; end; end else if aText > txtend then break; if (PtrInt(PtrUInt(txtretry)) > 0) and (txtretry <= txtend + 1) then begin aText := txtretry; inc(txtretry); pat := patretry; continue; end; fin:result := false; exit; until false; result := true; end; {$endif CPUX86} function SearchNoRangeU(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; var c: AnsiChar; pat, txt: PtrInt; begin aMatch.T := 0; aMatch.Text := aText; aMatch.TMax := aTextLen - 1; pat := 0; txt := 0; repeat if pat <= aMatch.PMax then begin c := aMatch.Pattern[pat]; case c of '?': if txt <= aMatch.TMax then begin inc(pat); inc(txt); continue; end; '*': begin aMatch.P := pat; aMatch.T := txt + 1; inc(pat); continue; end; else if (txt <= aMatch.TMax) and (aMatch.Upper[c] = aMatch.Upper[aMatch.Text[txt]]) then begin inc(pat); inc(txt); continue; end; end; end else if txt > aMatch.TMax then break; txt := aMatch.T; if (txt > 0) and (txt <= aMatch.TMax + 1) then begin inc(aMatch.T); pat := aMatch.P + 1; continue; end; result := false; exit; until false; result := true; end; function SimpleContainsU(t, tend, p: PUtf8Char; pmax: PtrInt; up: PNormTable): boolean; {$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label // brute force case-insensitive search p[0..pmax] in t..tend-1 var first: AnsiChar; i: PtrInt; label next; begin first := up[p^]; repeat if up[t^] <> first then begin next: inc(t); if t < tend then continue else break; end; for i := 1 to pmax do if up[t[i]] <> up[p[i]] then goto next; result := true; exit; until false; result := false; end; {$ifdef CPU64} // naive but very efficient code generation on FPC x86-64 function SimpleContains8(t, tend, p: PUtf8Char; pmax: PtrInt): boolean; {$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label label next; var i, first: PtrInt; begin first := PPtrInt(p)^; repeat if PPtrInt(t)^ <> first then begin next: inc(t); if t < tend then continue else break; end; for i := 8 to pmax do if t[i] <> p[i] then goto next; result := true; exit; until false; result := false; end; {$endif CPU64} {$ifdef CPUX86} function SimpleContains1(t, tend, p: PUtf8Char; pmax: PtrInt): boolean; {$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label label next; var i: PtrInt; begin repeat if t^ <> p^ then begin next: inc(t); if t < tend then continue else break; end; for i := 1 to pmax do if t[i] <> p[i] then goto next; result := true; exit; until false; result := false; end; function SimpleContains4(t, tend, p: PUtf8Char; pmax: PtrInt): boolean; {$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label label next; var i: PtrInt; begin repeat if PCardinal(t)^ <> PCardinal(p)^ then begin next: inc(t); if t < tend then continue else break; end; for i := 1 to pmax do if t[i] <> p[i] then goto next; result := true; exit; until false; result := false; end; {$else} function SimpleContains1(t, tend, p: PUtf8Char; pmax: PtrInt): boolean; {$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label label next; var i: PtrInt; first: AnsiChar; begin first := p^; repeat if t^ <> first then begin next: inc(t); if t < tend then continue else break; end; for i := 1 to pmax do if t[i] <> p[i] then goto next; result := true; exit; until false; result := false; end; function SimpleContains4(t, tend, p: PUtf8Char; pmax: PtrInt): boolean; {$ifdef FPC}inline;{$endif} // Delphi has troubles inlining goto/label label next; var i: PtrInt; first: cardinal; begin first := PCardinal(p)^; repeat if PCardinal(t)^ <> first then begin next: inc(t); if t < tend then continue else break; end; for i := 1 to pmax do if t[i] <> p[i] then goto next; result := true; exit; until false; result := false; end; {$endif CPUX86} function CompareMemU(P1, P2: PUtf8Char; len: PtrInt; U: PNormTable): boolean; {$ifdef FPC} inline;{$endif} begin // here we know that len>0 result := false; repeat dec(len); if U[P1[len]] <> U[P2[len]] then exit; until len = 0; result := true; end; function SearchVoid(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin result := aTextLen = 0; end; function SearchNoPattern(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin result := (aMatch.PMax + 1 = aTextLen) and mormot.core.base.CompareMem(aText, aMatch.Pattern, aTextLen); end; function SearchNoPatternU(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin result := (aMatch.PMax + 1 = aTextLen) and CompareMemU(aText, aMatch.Pattern, aTextLen, aMatch.Upper); end; function SearchContainsValid(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin result := true; end; function SearchContainsU(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin dec(aTextLen, aMatch.PMax); if aTextLen > 0 then result := SimpleContainsU(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax, aMatch.Upper) else result := false; end; function SearchContains1(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin dec(aTextLen, aMatch.PMax); if aTextLen > 0 then result := SimpleContains1(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax) else result := false; end; function SearchContains4(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin dec(aTextLen, aMatch.PMax); if aTextLen > 0 then result := SimpleContains4(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax) else result := false; end; {$ifdef CPU64} function SearchContains8(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin // optimized e.g. to search an IP address as '*12.34.56.78*' in logs dec(aTextLen, aMatch.PMax); if aTextLen > 0 then result := SimpleContains8(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax) else result := false; end; {$endif CPU64} function SearchStartWith(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin result := (aMatch.PMax < aTextLen) and mormot.core.base.CompareMem(aText, aMatch.Pattern, aMatch.PMax + 1); end; function SearchStartWithU(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin result := (aMatch.PMax < aTextLen) and CompareMemU(aText, aMatch.Pattern, aMatch.PMax + 1, aMatch.Upper); end; function SearchEndWith(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin dec(aTextLen, aMatch.PMax); result := (aTextLen >= 0) and mormot.core.base.CompareMem(aText + aTextLen, aMatch.Pattern, aMatch.PMax); end; function SearchEndWithU(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; begin dec(aTextLen, aMatch.PMax); result := (aTextLen >= 0) and CompareMemU(aText + aTextLen, aMatch.Pattern, aMatch.PMax, aMatch.Upper); end; procedure TMatch.Prepare(const aPattern: RawUtf8; aCaseInsensitive, aReuse: boolean); begin Prepare(pointer(aPattern), length(aPattern), aCaseInsensitive, aReuse); end; procedure TMatch.Prepare(aPattern: PUtf8Char; aPatternLen: integer; aCaseInsensitive, aReuse: boolean); const SPECIALS: PUtf8Char = '*?['; begin Pattern := aPattern; pmax := aPatternLen - 1; // search in Pattern[0..PMax] if Pattern = nil then begin Search := SearchVoid; exit; end; if aCaseInsensitive and not IsCaseSensitive(aPattern, aPatternLen) then aCaseInsensitive := false; // don't slow down e.g. number or IP search if aCaseInsensitive then Upper := @NormToUpperAnsi7 else Upper := @NormToNorm; Search := nil; if aReuse then if strcspn(Pattern, SPECIALS) > pmax then if aCaseInsensitive then Search := SearchNoPatternU else Search := SearchNoPattern else if pmax > 0 then begin if Pattern[pmax] = '*' then begin if strcspn(Pattern + 1, SPECIALS) = pmax - 1 then case Pattern[0] of '*': begin // *something* inc(Pattern); dec(pmax, 2); // trim trailing and ending * if pmax < 0 then Search := SearchContainsValid else if aCaseInsensitive then Search := SearchContainsU {$ifdef CPU64} else if pmax >= 7 then Search := SearchContains8 {$endif CPU64} else if pmax >= 3 then Search := SearchContains4 else Search := SearchContains1; end; '?': // ?something* if aCaseInsensitive then Search := SearchNoRangeU else Search := SearchNoRange; '[': Search := SearchAny; else begin dec(pmax); // trim trailing * if aCaseInsensitive then Search := SearchStartWithU else Search := SearchStartWith; end; end; end else if (Pattern[0] = '*') and (strcspn(Pattern + 1, SPECIALS) >= pmax) then begin inc(Pattern); // jump leading * if aCaseInsensitive then Search := SearchEndWithU else Search := SearchEndWith; end; end else if Pattern[0] in ['*', '?'] then Search := SearchContainsValid; if not Assigned(Search) then begin aPattern := PosChar(Pattern, '['); if (aPattern = nil) or (aPattern - Pattern > pmax) then if aCaseInsensitive then Search := SearchNoRangeU else Search := SearchNoRange else Search := SearchAny; end; end; type // Holub and Durian (2005) SBNDM2 algorithm // see http://www.cri.haifa.ac.il/events/2005/string/presentations/Holub.pdf TSBNDMQ2Mask = array[AnsiChar] of cardinal; PSBNDMQ2Mask = ^TSBNDMQ2Mask; function SearchSBNDMQ2ComputeMask(const Pattern: RawUtf8; u: PNormTable): RawByteString; var i: PtrInt; p: PAnsiChar absolute Pattern; m: PSBNDMQ2Mask absolute result; c: PCardinal; begin FastNewRawByteString(result, SizeOf(m^)); FillCharFast(m^, SizeOf(m^), 0); for i := 0 to length(Pattern) - 1 do begin c := @m^[u[p[i]]]; // for FPC code generation c^ := c^ or cardinal(1 shl i); end; end; function SearchSBNDMQ2(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; var mask: PSBNDMQ2Mask; max, i, j: PtrInt; state: cardinal; begin mask := pointer(aMatch^.Pattern); // was filled by SearchSBNDMQ2ComputeMask() max := aMatch^.pmax; i := max - 1; dec(aTextLen); if i < aTextLen then begin repeat state := mask[aText[i + 1]] shr 1; // in two steps for better FPC codegen state := state and mask[aText[i]]; if state = 0 then begin inc(i, max); // fast skip if i >= aTextLen then break; continue; end; j := i - max; repeat dec(i); if i < 0 then break; state := (state shr 1) and mask[aText[i]]; until state = 0; if i = j then begin result := true; exit; end; inc(i, max); if i >= aTextLen then break; until false; end; result := false; end; function SearchSBNDMQ2U(aMatch: PMatch; aText: PUtf8Char; aTextLen: PtrInt): boolean; var u: PNormTable; mask: PSBNDMQ2Mask; max, i, j: PtrInt; state: cardinal; begin mask := pointer(aMatch^.Pattern); max := aMatch^.pmax; u := aMatch^.Upper; i := max - 1; dec(aTextLen); if i < aTextLen then begin repeat state := mask[u[aText[i + 1]]] shr 1; state := state and mask[u[aText[i]]]; if state = 0 then begin inc(i, max); if i >= aTextLen then break; continue; end; j := i - max; repeat dec(i); if i < 0 then break; state := (state shr 1) and mask[u[aText[i]]]; until state = 0; if i = j then begin result := true; exit; end; inc(i, max); if i >= aTextLen then break; until false; end; result := false; end; procedure TMatch.PrepareContains(var aPattern: RawUtf8; aCaseInsensitive: boolean); begin pmax := length(aPattern) - 1; if aCaseInsensitive and not IsCaseSensitive(pointer(aPattern), pmax + 1) then aCaseInsensitive := false; if aCaseInsensitive then Upper := @NormToUpperAnsi7 else Upper := @NormToNorm; if pmax < 0 then Search := SearchContainsValid else if pmax > 30 then if aCaseInsensitive then Search := SearchContainsU else {$ifdef CPU64} Search := SearchContains8 {$else} Search := SearchContains4 {$endif CPU64} else if pmax >= 1 then begin // PMax=[1..30] -> len=[2..31] -> aPattern becomes a SBNDMQ2 lookup table aPattern := SearchSBNDMQ2ComputeMask(aPattern, Upper); if aCaseInsensitive then Search := SearchSBNDMQ2U else Search := SearchSBNDMQ2; end else if aCaseInsensitive then Search := SearchContainsU else Search := SearchContains1; // todo: use ByteScanIndex() asm? Pattern := pointer(aPattern); end; procedure TMatch.PrepareRaw(aPattern: PUtf8Char; aPatternLen: integer; aSearch: TMatchSearchFunction); begin Pattern := aPattern; pmax := aPatternLen - 1; // search in Pattern[0..PMax] Search := aSearch; end; function TMatch.Match(const aText: RawUtf8): boolean; begin if pointer(aText) <> nil then result := Search(@self, pointer(aText), PStrLen(PAnsiChar(pointer(aText)) - _STRLEN)^) else result := pmax < 0; end; function TMatch.Match(aText: PUtf8Char; aTextLen: PtrInt): boolean; begin if (aText <> nil) and (aTextLen > 0) then result := Search(@self, aText, aTextLen) else result := pmax < 0; end; function TMatch.MatchThreadSafe(const aText: RawUtf8): boolean; var local: TMatch; // thread-safe with no lock! begin local := self; if aText <> '' then result := local.Search(@local, pointer(aText), length(aText)) else result := local.PMax < 0; end; function TMatch.MatchString(const aText: string): boolean; var local: TMatch; // thread-safe with no lock! temp: TSynTempBuffer; len: integer; begin if aText = '' then begin result := pmax < 0; exit; end; len := length(aText); temp.Init(len * 3); {$ifdef UNICODE} len := RawUnicodeToUtf8(temp.buf, temp.len + 1, pointer(aText), len, [ccfNoTrailingZero]); {$else} len := CurrentAnsiConvert.AnsiBufferToUtf8(temp.buf, pointer(aText), len) - temp.buf; {$endif UNICODE} local := self; result := local.Search(@local, temp.buf, len); temp.Done; end; function TMatch.Equals(const aAnother: TMatch): boolean; begin result := (pmax = TMatch(aAnother).pmax) and (Upper = TMatch(aAnother).Upper) and mormot.core.base.CompareMem(Pattern, TMatch(aAnother).Pattern, pmax + 1); end; function TMatch.PatternLength: integer; begin result := pmax + 1; end; function TMatch.PatternText: PUtf8Char; begin result := Pattern; end; function TMatch.CaseInsensitive: boolean; begin result := Upper = @NormToUpperAnsi7; end; { TUriMatchName } procedure TUriMatchName.ParsePath; var i: PtrInt; begin Name := Path; i := Name.Len; while i > 0 do // retrieve begin dec(i); if Name.Text[i] <> '/' then continue; inc(i); inc(Name.Text, i); dec(Name.Len, i); break; end; end; { TUriMatch } procedure TUriMatch.DoInit(csv: PUtf8Char; caseinsensitive: boolean); var s: PUtf8Char; m: ^TMatchDynArray; begin if csv <> nil then repeat m := @Names; // default 'file.ext' pattern csv := GotoNextNotSpace(csv); s := csv; repeat case csv^ of #0, ',': break; '/': m := @Paths; // is a 'path/to/file.ext' pattern end; inc(csv); until false; if csv <> s then MatchNew(m^)^.Prepare(s, csv - s, caseinsensitive, true); if csv^ = #0 then break; inc(csv); until false; end; function TUriMatch.Check(const csv: RawUtf8; const uri: TUriMatchName; caseinsensitive: boolean): boolean; begin if Init.TryLock then // thread-safe init once from supplied csv DoInit(pointer(csv), caseinsensitive); result := ((Names <> nil) and MatchAny(pointer(Names), uri.Name.Text, uri.Name.Len)) or ((Paths <> nil) and MatchAny(pointer(Paths), uri.Path.Text, uri.Path.Len)); end; function IsMatch(const Pattern, Text: RawUtf8; CaseInsensitive: boolean): boolean; var match: TMatch; begin match.Prepare(pointer(Pattern), length(Pattern), CaseInsensitive, {reuse=}false); result := match.Match(Text); end; function IsMatchString(const Pattern, Text: string; CaseInsensitive: boolean): boolean; var match: TMatch; pat, txt: RawUtf8; begin StringToUtf8(Pattern, pat); // local variable is mandatory for FPC StringToUtf8(Text, txt); match.Prepare(pat, CaseInsensitive, {reuse=}false); result := match.Match(txt); end; function SetNextMatch(P: PUtf8Char; var Dest: TMatch; CaseInsensitive, Reuse: boolean; CsvSep: AnsiChar): PUtf8Char; begin result := P; repeat while not (result^ in [#0, CsvSep]) do inc(result); if result <> P then begin Dest.Prepare(P, result - P, CaseInsensitive, Reuse); if result^ = CsvSep then inc(result); // go to next CSV exit; end; until result^ = #0; result := nil; // indicates Dest.Prepare() was not called end; function IsMatchs(CsvPattern, Text: PUtf8Char; TextLen: PtrInt; CaseInsensitive: boolean; CsvSep: AnsiChar): boolean; var match: TMatch; begin result := (CsvPattern <> nil) and (TextLen > 0); if not result then exit; repeat CsvPattern := SetNextMatch( CsvPattern, match, CaseInsensitive, {reuse=}false, CsvSep); if CsvPattern = nil then break; if match.Search(@match, Text, TextLen) then exit; until CsvPattern^ = #0; result := false; end; function IsMatchs(const CsvPattern, Text: RawUtf8; CaseInsensitive: boolean; CsvSep: AnsiChar): boolean; begin result := IsMatchs(pointer(CsvPattern), pointer(Text), length(Text), CaseInsensitive, CsvSep); end; function SetMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean; out Match: TMatchDynArray; CsvSep: AnsiChar): integer; var P, S: PUtf8Char; begin P := pointer(CsvPattern); if P <> nil then repeat S := P; while not (P^ in [#0, CsvSep]) do inc(P); if P <> S then MatchNew(Match)^.Prepare(S, P - S, CaseInsensitive, {reuse=}true); if P^ = #0 then break; inc(P); until false; result := length(Match); end; function SetMatchs(CsvPattern: PUtf8Char; CaseInsensitive: boolean; Match: PMatch; MatchMax: integer; CsvSep: AnsiChar): integer; var S: PUtf8Char; begin result := 0; if (CsvPattern <> nil) and (MatchMax >= 0) then repeat S := CsvPattern; while not (CsvPattern^ in [#0, CsvSep]) do inc(CsvPattern); if CsvPattern <> S then begin Match^.Prepare(S, CsvPattern - S, CaseInsensitive, {reuse=}true); inc(result); if result > MatchMax then break; inc(Match); end; if CsvPattern^ = #0 then break; inc(CsvPattern); until false; end; function MatchExists(const One: TMatch; const Several: TMatchDynArray): boolean; var i: PtrInt; begin result := true; for i := 0 to length(Several) - 1 do if Several[i].Equals(One) then exit; result := false; end; function MatchAdd(const One: TMatch; var Several: TMatchDynArray): boolean; begin result := not MatchExists(One, Several); if result then MatchNew(Several)^ := One; end; function MatchNew(var Several: TMatchDynArray): PMatch; var n: PtrInt; begin n := length(Several); SetLength(Several, n + 1); result := @Several[n]; end; function MatchAny(const Match: TMatchDynArray; const Text: RawUtf8): boolean; begin result := MatchAny(pointer(Match), pointer(Text), length(Text)); end; function MatchAny(Match: PMatch; Text: PUtf8Char; TextLen: PtrInt): boolean; var n: integer; begin result := true; if Match = nil then exit; if TextLen <= 0 then Text := nil; n := PDALen(PAnsiChar(pointer(Match)) - _DALEN)^ + (_DAOFF - 1); repeat // inlined Match^.Match() to avoid internal error on Delphi if Text <> nil then begin if Match^.Search(Match, Text, TextLen) then exit; end else if Match^.pmax < 0 then exit; inc(Match); dec(n); until n = 0; result := false; end; procedure FilterMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean; var Values: TRawUtf8DynArray; CsvSep: AnsiChar); var match: TMatchDynArray; m, n, i: PtrInt; begin if SetMatchs(CsvPattern, CaseInsensitive, match, CsvSep) = 0 then exit; n := 0; for i := 0 to high(Values) do for m := 0 to high(match) do if match[m].Match(Values[i]) then begin if i <> n then Values[n] := Values[i]; inc(n); break; end; if n <> length(Values) then SetLength(Values, n); end; procedure FilterMatchs(const CsvPattern: RawUtf8; CaseInsensitive: boolean; var Values: TStringDynArray; CsvSep: AnsiChar); var match: TMatchDynArray; m, n, i: PtrInt; begin if SetMatchs(CsvPattern, CaseInsensitive, match, CsvSep) = 0 then exit; n := 0; for i := 0 to high(Values) do for m := 0 to high(match) do if match[m].MatchString(Values[i]) then begin if i <> n then Values[n] := Values[i]; inc(n); break; end; if n <> length(Values) then SetLength(Values, n); end; { TMatchs } constructor TMatchs.Create(const aPatterns: TRawUtf8DynArray; CaseInsensitive: boolean); begin inherited Create; // may have been overriden Subscribe(aPatterns, CaseInsensitive); end; function TMatchs.Match(const aText: RawUtf8): integer; begin result := Match(pointer(aText), length(aText)); end; function TMatchs.Match(aText: PUtf8Char; aLen: integer): integer; var one: ^TMatchStore; local: TMatch; // thread-safe with no lock! begin if (self = nil) or (fMatch = nil) then result := -1 // no filter by name -> allow e.g. to process everything else begin one := pointer(fMatch); if aLen <> 0 then begin for result := 0 to fMatchCount - 1 do begin local := one^.Pattern; if local.Search(@local, aText, aLen) then exit; inc(one); end; end else for result := 0 to fMatchCount - 1 do if one^.Pattern.PMax < 0 then exit else inc(one); result := -2; end; end; function TMatchs.MatchString(const aText: string): integer; var temp: TSynTempBuffer; len: integer; begin len := StringToUtf8(aText, temp); result := Match(temp.buf, len); temp.Done; end; procedure TMatchs.Subscribe(const aPatternsCsv: RawUtf8; CaseInsensitive: boolean); var patterns: TRawUtf8DynArray; begin CsvToRawUtf8DynArray(pointer(aPatternsCsv), patterns); Subscribe(patterns, CaseInsensitive); end; procedure TMatchs.Subscribe(const aPatterns: TRawUtf8DynArray; CaseInsensitive: boolean); var i, j, m, n: integer; found: ^TMatchStore; pat: PRawUtf8; begin m := length(aPatterns); if m = 0 then exit; n := fMatchCount; SetLength(fMatch, n + m); pat := pointer(aPatterns); for i := 1 to m do begin found := pointer(fMatch); for j := 1 to n do if StrComp(pointer(found^.PatternInstance), pointer(pat^)) = 0 then begin found := nil; break; end else inc(found); if found <> nil then with fMatch[n] do begin PatternInstance := pat^; // avoid GPF if aPatterns[] is released Pattern.Prepare(PatternInstance, CaseInsensitive, {reuse=}true); inc(n); end; inc(pat); end; fMatchCount := n; if n <> length(fMatch) then SetLength(fMatch, n); end; procedure SoundExComputeAnsi(var p: PAnsiChar; var result: cardinal; Values: PSoundExValues); var n, v, old: PtrUInt; begin n := 0; old := 0; if Values <> nil then repeat v := NormToUpperByte[ord(p^)]; // also handle 8-bit WinAnsi (1252 accents) if not (tcWord in TEXT_BYTES[v]) then break; inc(p); dec(v, ord('B')); if v > high(TSoundExValues) then continue; v := Values[v]; // get soundex value if (v = 0) or (v = old) then continue; // invalid or dopple value old := v; result := result shl SOUNDEX_BITS; inc(result, v); inc(n); if n = ((32 - 8) div SOUNDEX_BITS) then // first char use up to 8-bit break; // result up to a cardinal size until false; end; function SoundExComputeFirstCharAnsi(var p: PAnsiChar): cardinal; label err; begin if p = nil then begin err:result := 0; exit; end; repeat result := NormToUpperByte[ord(p^)]; // also handle 8-bit WinAnsi (CP 1252) if result = 0 then goto err; // end of input text, without a word inc(p); // trim initial spaces or 'H' until AnsiChar(result) in ['A'..'G', 'I'..'Z']; end; procedure SoundExComputeUtf8(var U: PUtf8Char; var result: cardinal; Values: PSoundExValues); var n, v, old: cardinal; begin n := 0; old := 0; if Values <> nil then repeat v := GetNextUtf8Upper(U); if not (tcWord in TEXT_BYTES[v]) then break; dec(v, ord('B')); if v > high(TSoundExValues) then continue; v := Values[v]; // get soundex value if (v = 0) or (v = old) then continue; // invalid or dopple value old := v; result := result shl SOUNDEX_BITS; inc(result, v); inc(n); if n = ((32 - 8) div SOUNDEX_BITS) then // first char use up to 8-bit break; // result up to a cardinal size until false; end; function SoundExComputeFirstCharUtf8(var U: PUtf8Char): cardinal; label err; begin if U = nil then begin err:result := 0; exit; end; repeat result := GetNextUtf8Upper(U); if result = 0 then goto err; // end of input text, without a word // trim initial spaces or 'H' until AnsiChar(result) in ['A'..'G', 'I'..'Z']; end; { TSynSoundEx } const /// english Soundex pronunciation scores // - defines the default values used for the SoundEx() function below // (used if Values parameter is nil) ValueEnglish: TSoundExValues = // B C D E F G H I J K L M N O P Q R S T U V W X Y Z (1, 2, 3, 0, 1, 2, 0, 0, 2, 2, 4, 5, 5, 0, 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2); /// french Soundex pronunciation scores // - can be used to override default values used for the SoundEx() // function below ValueFrench: TSoundExValues = // B C D E F G H I J K L M N O P Q R S T U V W X Y Z (1, 2, 3, 0, 9, 7, 0, 0, 7, 2, 4, 5, 5, 0, 1, 2, 6, 8, 3, 0, 9, 0, 8, 0, 8); /// spanish Soundex pronunciation scores // - can be used to override default values used for the SoundEx() // function below ValueSpanish: TSoundExValues = // B C D E F G H I J K L M N O P Q R S T U V W X Y Z (1, 2, 3, 0, 1, 2, 0, 0, 0, 2, 0, 5, 5, 0, 1, 2, 6, 2, 3, 0, 1, 0, 2, 0, 2); SOUNDEXVALUES: array[TSynSoundExPronunciation] of PSoundExValues = ( @ValueEnglish, @ValueFrench, @ValueSpanish, @ValueEnglish); function TSynSoundEx.Ansi(A: PAnsiChar): boolean; var Value, c: cardinal; begin result := false; if A = nil then exit; repeat // test beginning of word c := SoundExComputeFirstCharAnsi(A); if c = 0 then exit else if c = FirstChar then begin // here we had the first char match -> check if word match UpperValue Value := c - (ord('A') - 1); SoundExComputeAnsi(A, Value, fValues); if Value = search then begin result := true; // UpperValue found! exit; end; end else repeat if A^ = #0 then exit else if not (tcWord in TEXT_CHARS[NormToUpper[A^]]) then break else inc(A); until false; // find beginning of next word repeat if A^ = #0 then exit else if tcWord in TEXT_CHARS[NormToUpper[A^]] then break else inc(A); until false; until false; end; function TSynSoundEx.Utf8(U: PUtf8Char): boolean; var Value, c: cardinal; V: PUtf8Char; begin result := false; if U = nil then exit; repeat // find beginning of word c := SoundExComputeFirstCharUtf8(U); if c = 0 then exit else if c = FirstChar then begin // here we had the first char match -> check if word match UpperValue Value := c - (ord('A') - 1); SoundExComputeUtf8(U, Value, fValues); if Value = search then begin result := true; // UpperValue found! exit; end; end else repeat c := GetNextUtf8Upper(U); if c = 0 then exit; until not (tcWord in TEXT_BYTES[c]); // find beginning of next word repeat if U = nil then exit; V := U; c := GetNextUtf8Upper(U); if c = 0 then exit; until tcWord in TEXT_BYTES[c]; U := V; until U = nil; end; function TSynSoundEx.Prepare(UpperValue: PAnsiChar; Lang: PSoundExValues): boolean; begin fValues := Lang; Search := SoundExAnsi(UpperValue, nil, Lang); if Search = 0 then result := false else begin FirstChar := SoundExComputeFirstCharAnsi(UpperValue); result := true; end; end; function TSynSoundEx.Prepare(UpperValue: PAnsiChar; Lang: TSynSoundExPronunciation): boolean; begin result := Prepare(UpperValue, SOUNDEXVALUES[Lang]); end; function SoundExAnsi(A: PAnsiChar; next: PPAnsiChar; Lang: PSoundExValues): cardinal; begin result := SoundExComputeFirstCharAnsi(A); if result <> 0 then begin dec(result, ord('A') - 1); // first Soundex char is first char SoundExComputeAnsi(A, result, Lang); end; if next <> nil then begin while tcWord in TEXT_CHARS[NormToUpper[A^]] do inc(A); // go to end of word next^ := A; end; end; function SoundExAnsi(A: PAnsiChar; next: PPAnsiChar; Lang: TSynSoundExPronunciation): cardinal; begin result := SoundExAnsi(A, next, SOUNDEXVALUES[Lang]); end; function SoundExUtf8(U: PUtf8Char; next: PPUtf8Char; Lang: TSynSoundExPronunciation): cardinal; begin result := SoundExComputeFirstCharUtf8(U); if result <> 0 then begin dec(result, ord('A') - 1); // first Soundex char is first char SoundExComputeUtf8(U, result, SOUNDEXVALUES[Lang]); end; if next <> nil then next^ := FindNextUtf8WordBegin(U); end; { ****************** Efficient CSV Parsing using RTTI } function TDynArrayLoadCsv(var Value: TDynArray; Csv: PUtf8Char; Intern: TRawUtf8Interning): boolean; var rt: TRttiCustom; pr: PRttiCustomProp; p, v: PUtf8Char; s: RawUtf8; mapcount, mapped: PtrInt; rec: PAnsiChar; map: PRttiCustomPropDynArray; m: ^PRttiCustomProp; extcount, mcount: integer; ext: PInteger; begin result := false; rt := Value.Info.ArrayRtti; if (rt = nil) or (rt.Parser <> ptRecord) or (rt.Props.Count = 0) then exit; // parse the CSV headers mapped := 0; mapcount := 0; SetLength(map, 32); p := pointer(GetNextLine(Csv, Csv)); if Csv = nil then exit; // no data while p <> nil do begin GetNextItem(p, ',', '"', s); if s = '' then exit; // we don't support void headers if mapcount = length(map) then SetLength(map, NextGrow(mapcount)); pr := rt.Props.Find(s); if pr <> nil then begin map[mapcount] := pr; // found a matching field inc(mapped); end; inc(mapcount); end; if mapped = 0 then exit; // no field matching any header // parse the value rows extcount := 0; ext := Value.CountExternal; if ext = nil then Value.UseExternalCount(@extcount); // faster Value.NewPtr v := Csv; while v^ in [#10, #13] do inc(v); while v^ <> #0 do begin rec := Value.NewPtr; m := pointer(map); mcount := mapcount; repeat // parse next value Csv := v; if v^ = '"' then v := GotoEndOfQuotedString(v); // special handling of double quotes while (v^ <> ',') and (v^ > #13) do inc(v); if mcount <> 0 then begin if m^ <> nil then // not matching fields are just ignored begin if Csv^ = '"' then begin UnQuoteSqlStringVar(Csv, s); if Intern <> nil then Intern.UniqueText(s); end else Intern.Unique(s, Csv, v - Csv); m^.Value.ValueSetText(rec + m^.OffsetSet, s); end; inc(m); dec(mcount); end; if v^ <> ',' then break; inc(v); until v^ in [#0, #10, #13]; // go to next row while v^ in [#10, #13] do inc(v); end; if Value.Count = 0 then Value.Capacity := 0 else DynArrayFakeLength(Value.Value^, Value.Count); Value.UseExternalCount(ext); // restore fCountP if local n count was used result := true; end; function DynArrayLoadCsv(var Value; const Csv: RawUtf8; TypeInfo: PRttiInfo; Intern: TRawUtf8Interning): boolean; var da: TDynArray; begin da.Init(TypeInfo, Value); result := TDynArrayLoadCsv(da, pointer(CSV), Intern); end; { ****************** Versatile Expression Search Engine } function ToText(r: TExprParserResult): PShortString; begin result := GetEnumName(TypeInfo(TExprParserResult), ord(r)); end; function ToUtf8(r: TExprParserResult): RawUtf8; begin result := UnCamelCase(TrimLeftLowerCaseShort(ToText(r))); end; { TExprNode } function TExprNode.Append(node: TExprNode): boolean; begin result := node <> nil; if result then Last.fNext := node; end; constructor TExprNode.Create(nodeType: TExprNodeType); begin inherited Create; // may have been overriden fNodeType := nodeType; end; destructor TExprNode.Destroy; begin fNext.Free; inherited Destroy; end; function TExprNode.Last: TExprNode; begin result := self; while result.Next <> nil do result := result.Next; end; { TParserAbstract } constructor TParserAbstract.Create; begin inherited Create; // may have been overriden Initialize; end; destructor TParserAbstract.Destroy; begin Clear; inherited Destroy; end; procedure TParserAbstract.Clear; begin fWordCount := 0; fWords := nil; fExpression := ''; FreeAndNil(fFirstNode); end; function TParserAbstract.ParseExpr: TExprNode; begin result := ParseFactor; ParseNextCurrentWord; if (fCurrentWord = '') or (fCurrentWord = ')') then exit; if PropNameEquals(fCurrentWord, fAndWord) then begin // w1 & w2 = w1 AND w2 ParseNextCurrentWord; if result.Append(ParseExpr) then result.Append(TExprNode.Create(entAnd)); exit; end else if PropNameEquals(fCurrentWord, fOrWord) then begin // w1 + w2 = w1 OR w2 ParseNextCurrentWord; if result.Append(ParseExpr) then result.Append(TExprNode.Create(entOr)); exit; end else if fNoWordIsAnd and result.Append(ParseExpr) then // 'w1 w2' = 'w1 & w2' result.Append(TExprNode.Create(entAnd)); end; function TParserAbstract.ParseFactor: TExprNode; begin if fCurrentError <> eprSuccess then result := nil else if PropNameEquals(fCurrentWord, fNotWord) then begin ParseNextCurrentWord; result := ParseFactor; if fCurrentError <> eprSuccess then exit; result.Append(TExprNode.Create(entNot)); end else result := ParseTerm; end; function TParserAbstract.ParseTerm: TExprNode; begin result := nil; if fCurrentError <> eprSuccess then exit; if fCurrentWord = '(' then begin ParseNextCurrentWord; result := ParseExpr; if fCurrentError <> eprSuccess then exit; if fCurrentWord <> ')' then begin FreeAndNil(result); fCurrentError := eprMissingParenthesis; end; end else if fCurrentWord = '' then begin result := nil; fCurrentError := eprMissingFinalWord; end else try // calls meta-class overriden constructor result := fWordClass.Create(self, fCurrentWord); fCurrentError := TExprNodeWordAbstract(result).ParseWord; if fCurrentError <> eprSuccess then begin FreeAndNil(result); exit; end; SetLength(fWords, fWordCount + 1); fWords[fWordCount] := TExprNodeWordAbstract(result); inc(fWordCount); except FreeAndNil(result); fCurrentError := eprInvalidExpression; end; end; function TParserAbstract.Parse(const aExpression: RawUtf8): TExprParserResult; var depth: integer; n: TExprNode; begin Clear; fCurrentError := eprSuccess; fCurrent := pointer(aExpression); ParseNextCurrentWord; if fCurrentWord = '' then begin result := eprNoExpression; exit; end; fFirstNode := ParseExpr; result := fCurrentError; if result = eprSuccess then begin depth := 0; n := fFirstNode; while n <> nil do begin case n.NodeType of entWord: begin inc(depth); if depth > high(fFoundStack) then begin result := eprTooManyParenthesis; break; end; end; entOr, entAnd: dec(depth); end; n := n.Next; end; end; if result = eprSuccess then fExpression := aExpression else Clear; fCurrent := nil; end; class function TParserAbstract.ParseError(const aExpression: RawUtf8): RawUtf8; var parser: TParserAbstract; res: TExprParserResult; begin parser := Create; try res := parser.Parse(aExpression); if res = eprSuccess then result := '' else result := ToUtf8(res); finally parser.Free; end; end; function TParserAbstract.Execute: boolean; var n: TExprNode; st: PBoolean; begin // code below compiles very efficiently on FPC/x86-64 st := @fFoundStack; n := fFirstNode; repeat case n.NodeType of entWord: begin st^ := TExprNodeWordAbstract(n).fFound; inc(st); // see eprTooManyParenthesis above to avoid buffer overflow end; entNot: PAnsiChar(st)[-1] := AnsiChar(ord(PAnsiChar(st)[-1]) xor 1); entOr: begin dec(st); PAnsiChar(st)[-1] := AnsiChar(st^ or boolean(PAnsiChar(st)[-1])); end; { TODO : optimize TExprParser OR when left member is already TRUE } entAnd: begin dec(st); PAnsiChar(st)[-1] := AnsiChar(st^ and boolean(PAnsiChar(st)[-1])); end; end; n := n.Next; until n = nil; result := boolean(PAnsiChar(st)[-1]); end; { TExprParserAbstract } procedure TExprParserAbstract.Initialize; begin fAndWord := '&'; fOrWord := '+'; fNotWord := '-'; fNoWordIsAnd := true; end; procedure TExprParserAbstract.ParseNextCurrentWord; var P: PUtf8Char; begin fCurrentWord := ''; P := fCurrent; if P = nil then exit; while P^ in [#1..' '] do inc(P); if P^ = #0 then exit; if P^ in PARSER_STOPCHAR then begin FastSetString(fCurrentWord, P, 1); fCurrent := P + 1; end else begin fCurrent := P; ParseNextWord; end; end; procedure TExprParserAbstract.ParseNextWord; const STOPCHAR = PARSER_STOPCHAR + [#0, ' ']; var P: PUtf8Char; begin P := fCurrent; while not (P^ in STOPCHAR) do inc(P); FastSetString(fCurrentWord, fCurrent, P - fCurrent); fCurrent := P; end; { TExprNodeWordAbstract } constructor TExprNodeWordAbstract.Create(aOwner: TParserAbstract; const aWord: RawUtf8); begin inherited Create(entWord); fWord := aWord; fOwner := aOwner; end; { TExprParserMatchNode } type TExprParserMatchNode = class(TExprNodeWordAbstract) protected fMatch: TMatch; function ParseWord: TExprParserResult; override; end; function TExprParserMatchNode.ParseWord: TExprParserResult; begin fMatch.Prepare(fWord, (fOwner as TExprParserMatch).fCaseSensitive, {reuse=}true); result := eprSuccess; end; { TExprParserMatch } var // equals 1 for ['0'..'9', 'A'..'Z', 'a'..'z', #$80..#$ff] ROUGH_UTF8: TAnsiCharToByte; constructor TExprParserMatch.Create(aCaseSensitive: boolean); var c: AnsiChar; begin inherited Create; if ROUGH_UTF8['0'] = 0 then // ensure is initialized for Search() for c := low(c) to high(c) do if c in ['0'..'9', 'A'..'Z', 'a'..'z', #$80..#$ff] then ROUGH_UTF8[c] := 1; fCaseSensitive := aCaseSensitive; end; procedure TExprParserMatch.Initialize; begin inherited Initialize; fWordClass := TExprParserMatchNode; end; function TExprParserMatch.Search(const aText: RawUtf8): boolean; begin result := Search(pointer(aText), length(aText)); end; function TExprParserMatch.Search(aText: PUtf8Char; aTextLen: PtrInt): boolean; var P, PEnd: PUtf8Char; n: PtrInt; tab: PAnsiCharToByte; begin P := aText; if (P = nil) or (fWords = nil) then begin result := false; exit; end; // reset any previous resultset if fMatchedLastSet > 0 then begin n := fWordCount; repeat dec(n); fWords[n].fFound := false; until n = 0; fMatchedLastSet := 0; end; PEnd := P + aTextLen; while (P < PEnd) and (fMatchedLastSet < fWordCount) do begin // recognize next word boudaries tab := @ROUGH_UTF8; while tab[P^] = 0 do begin inc(P); if P = PEnd then break; end; if P = PEnd then break; aText := P; repeat inc(P); until (P = PEnd) or (tab[P^] = 0); // apply the expression nodes to this word aTextLen := P - aText; // now aText/aTextLen point to a word n := fWordCount; repeat dec(n); with TExprParserMatchNode(fWords[n]) do if not fFound and fMatch.Match(aText, aTextLen) then begin fFound := true; inc(fMatchedLastSet); end; until n = 0; end; result := Execute; end; { ****************** Bloom Filter Probabilistic Index } { TSynBloomFilter } const BLOOM_VERSION = 0; BLOOM_MAXHASH = 32; // only 7 is needed for 1% false positive ratio constructor TSynBloomFilter.Create; begin fHasher := @crc32c; // default/standard/mORMot1 hash function end; constructor TSynBloomFilter.Create(aSize: integer; aFalsePositivePercent: double; aHasher: THasher); const LN2 = 0.69314718056; begin Create; // set fHasher := crc32c + may have been overriden if aSize < 0 then fSize := 1000 else fSize := aSize; if aFalsePositivePercent <= 0 then fFalsePositivePercent := 1 else if aFalsePositivePercent > 100 then fFalsePositivePercent := 100 else fFalsePositivePercent := aFalsePositivePercent; if @aHasher <> nil then fHasher := aHasher; // see http://stackoverflow.com/a/22467497 fBits := Round(-ln(fFalsePositivePercent / 100) * aSize / (LN2 * LN2)); fHashFunctions := Round(fBits / fSize * LN2); if fHashFunctions = 0 then fHashFunctions := 1 else if fHashFunctions > BLOOM_MAXHASH then fHashFunctions := BLOOM_MAXHASH; Reset; end; constructor TSynBloomFilter.Create(const aSaved: RawByteString; aMagic: cardinal; aHasher: THasher); begin Create; // set fHasher := crc32c + may have been overriden if @aHasher <> nil then fHasher := aHasher; if not LoadFrom(aSaved, aMagic) then // will load fSize+fBits+fHashFunctions raise ESynException.CreateUtf8('%.Create with invalid aSaved content', [self]); end; procedure TSynBloomFilter.Insert(const aValue: RawByteString); begin Insert(pointer(aValue), length(aValue)); end; procedure TSynBloomFilter.Insert(aValue: pointer; aValueLen: integer); var h: integer; h1, h2: cardinal; // https://goo.gl/Pls5wi begin if (self = nil) or (aValueLen <= 0) or (fBits = 0) then exit; h1 := fHasher(0, aValue, aValueLen); if fHashFunctions = 1 then h2 := 0 else h2 := fHasher(h1, aValue, aValueLen); fSafe.WriteLock; try for h := 0 to fHashFunctions - 1 do begin SetBitPtr(pointer(fStore), h1 mod fBits); inc(h1, h2); end; inc(fInserted); finally fSafe.WriteUnLock; end; end; function TSynBloomFilter.MayExist(const aValue: RawByteString): boolean; begin result := MayExist(pointer(aValue), length(aValue)); end; function TSynBloomFilter.MayExist(aValue: pointer; aValueLen: integer): boolean; var h: integer; h1, h2: cardinal; // https://goo.gl/Pls5wi begin result := false; if (self = nil) or (aValueLen <= 0) or (fBits = 0) then exit; h1 := fHasher(0, aValue, aValueLen); if fHashFunctions = 1 then h2 := 0 else h2 := fHasher(h1, aValue, aValueLen); fSafe.ReadOnlyLock; // allow concurrent reads try for h := 0 to fHashFunctions - 1 do if GetBitPtr(pointer(fStore), h1 mod fBits) then inc(h1, h2) else exit; finally fSafe.ReadOnlyUnLock; end; result := true; end; procedure TSynBloomFilter.Reset; begin fSafe.WriteLock; try if fStore = '' then SetLength(fStore, (fBits shr 3) + 1); FillcharFast(pointer(fStore)^, length(fStore), 0); fInserted := 0; finally fSafe.WriteUnLock; end; end; function TSynBloomFilter.SaveTo(aMagic: cardinal): RawByteString; var W: TBufferWriter; BufLen: integer; temp: array[word] of byte; begin BufLen := length(fStore) + 100; if BufLen <= SizeOf(temp) then W := TBufferWriter.Create(TRawByteStringStream, @temp, SizeOf(temp)) else W := TBufferWriter.Create(TRawByteStringStream, BufLen); try SaveTo(W, aMagic); W.Flush; result := TRawByteStringStream(W.Stream).DataString; finally W.Free; end; end; procedure TSynBloomFilter.SaveTo(aDest: TBufferWriter; aMagic: cardinal); begin aDest.Write4(aMagic); aDest.Write1(BLOOM_VERSION); fSafe.ReadOnlyLock; try aDest.Write8(@fFalsePositivePercent); aDest.Write4(fSize); aDest.Write4(fBits); aDest.Write1(fHashFunctions); aDest.Write4(fInserted); // warning: fHasher is NOT persisted yet ZeroCompress(pointer(fStore), Length(fStore), aDest); finally fSafe.ReadOnlyUnLock; end; end; function TSynBloomFilter.LoadFrom(const aSaved: RawByteString; aMagic: cardinal): boolean; begin result := LoadFrom(pointer(aSaved), length(aSaved)); end; function TSynBloomFilter.LoadFrom(P: PByte; PLen: integer; aMagic: cardinal): boolean; var start: PByte; version: integer; begin result := false; start := P; if (P = nil) or (PLen < 32) or (PCardinal(P)^ <> aMagic) then exit; inc(P, 4); version := P^; inc(P); if version > BLOOM_VERSION then exit; fSafe.WriteLock; try fFalsePositivePercent := unaligned(PDouble(P)^); inc(P, 8); if (fFalsePositivePercent <= 0) or (fFalsePositivePercent > 100) then exit; fSize := PCardinal(P)^; inc(P, 4); fBits := PCardinal(P)^; inc(P, 4); if fBits < fSize then exit; fHashFunctions := P^; inc(P); if fHashFunctions - 1 >= BLOOM_MAXHASH then exit; Reset; fInserted := PCardinal(P)^; inc(P, 4); ZeroDecompress(P, PLen - (PAnsiChar(P) - PAnsiChar(start)), fStore); result := length(fStore) = integer(fBits shr 3) + 1; finally fSafe.WriteUnLock; end; end; { TSynBloomFilterDiff } type TBloomDiffHeaderKind = (bdDiff, bdFull, bdUpToDate); TBloomDiffHeader = packed record kind: TBloomDiffHeaderKind; size: cardinal; inserted: cardinal; revision: Int64; crc: cardinal; end; procedure TSynBloomFilterDiff.Insert(aValue: pointer; aValueLen: integer); begin fSafe.WriteLock; try inherited Insert(aValue, aValueLen); inc(fRevision); inc(fSnapshotInsertCount); finally fSafe.WriteUnLock; end; end; procedure TSynBloomFilterDiff.Reset; begin fSafe.WriteLock; try inherited Reset; fSnapshotAfterInsertCount := fSize shr 5; fSnapShotAfterMinutes := 30; fSnapshotTimestamp := 0; fSnapshotInsertCount := 0; fRevision := UnixTimeUtc shl 31; fKnownRevision := 0; fKnownStore := ''; finally fSafe.WriteUnLock; end; end; procedure TSynBloomFilterDiff.DiffSnapshot; begin fSafe.WriteLock; try fKnownRevision := fRevision; fSnapshotInsertCount := 0; FastSetRawByteString(fKnownStore, pointer(fStore), length(fStore)); if fSnapShotAfterMinutes = 0 then fSnapshotTimestamp := 0 else fSnapshotTimestamp := GetTickCount64 + fSnapShotAfterMinutes * 60000; finally fSafe.WriteUnLock; end; end; function TSynBloomFilterDiff.SaveToDiff(const aKnownRevision: Int64): RawByteString; var head: TBloomDiffHeader; W: TBufferWriter; temp: array[word] of byte; begin fSafe.ReadWriteLock; // DiffSnapshot makes a WriteLock try if aKnownRevision = fRevision then head.kind := bdUpToDate else if (fKnownRevision = 0) or (fSnapshotInsertCount > fSnapshotAfterInsertCount) or ((fSnapshotInsertCount > 0) and (fSnapshotTimestamp <> 0) and (GetTickCount64 > fSnapshotTimestamp)) then begin DiffSnapshot; head.kind := bdFull; end else if (aKnownRevision < fKnownRevision) or (aKnownRevision > fRevision) then head.kind := bdFull else head.kind := bdDiff; head.size := length(fStore); head.inserted := fInserted; head.revision := fRevision; head.crc := fHasher(0, @head, SizeOf(head) - SizeOf(head.crc)); if head.kind = bdUpToDate then begin FastSetRawByteString(result, @head, SizeOf(head)); exit; end; if head.size + 100 <= SizeOf(temp) then W := TBufferWriter.Create(TRawByteStringStream, @temp, SizeOf(temp)) else W := TBufferWriter.Create(TRawByteStringStream, head.size + 100); try W.Write(@head, SizeOf(head)); case head.kind of bdFull: SaveTo(W); bdDiff: ZeroCompressXor(pointer(fStore), pointer(fKnownStore), head.size, W); end; result := W.FlushTo; finally W.Free; end; finally fSafe.ReadWriteUnLock; end; end; function TSynBloomFilterDiff.DiffKnownRevision(const aDiff: RawByteString): Int64; var head: ^TBloomDiffHeader absolute aDiff; begin if (length(aDiff) < SizeOf(head^)) or (head.kind > high(TBloomDiffHeaderKind)) or (head.size <> cardinal(length(fStore))) or (head.crc <> fHasher(0, pointer(head), SizeOf(head^) - SizeOf(head.crc))) then result := 0 else result := head.Revision; end; function TSynBloomFilterDiff.LoadFromDiff(const aDiff: RawByteString): boolean; var head: ^TBloomDiffHeader absolute aDiff; P: PByte; PLen: integer; begin result := false; P := pointer(aDiff); PLen := length(aDiff); if (PLen < SizeOf(head^)) or (head.kind > high(head.kind)) or (head.crc <> fHasher(0, pointer(head), SizeOf(head^) - SizeOf(head.crc))) then exit; if (fStore <> '') and (head.size <> cardinal(length(fStore))) then exit; inc(P, SizeOf(head^)); dec(PLen, SizeOf(head^)); fSafe.WriteLock; try case head.kind of bdFull: result := LoadFrom(P, PLen); bdDiff: if fStore <> '' then result := ZeroDecompressOr(pointer(P), Pointer(fStore), PLen, head.size); bdUpToDate: result := true; end; if result then begin fRevision := head.revision; fInserted := head.inserted; end; finally fSafe.WriteUnLock; end; end; procedure ZeroCompress(P: PAnsiChar; Len: integer; Dest: TBufferWriter); var PEnd, beg, zero: PAnsiChar; crc: cardinal; begin Dest.WriteVarUInt32(Len); PEnd := P + Len; beg := P; crc := 0; while P < PEnd do begin while (P^ <> #0) and (P < PEnd) do inc(P); zero := P; while (P^ = #0) and (P < PEnd) do inc(P); if P - zero > 3 then begin Len := zero - beg; crc := crc32c(crc, beg, Len); Dest.WriteVarUInt32(Len); Dest.Write(beg, Len); Len := P - zero; crc := crc32c(crc, @Len, SizeOf(Len)); Dest.WriteVarUInt32(Len - 3); beg := P; end; end; Len := P - beg; if Len > 0 then begin crc := crc32c(crc, beg, Len); Dest.WriteVarUInt32(Len); Dest.Write(beg, Len); end; Dest.Write4(crc); end; procedure ZeroCompressXor(New, Old: PAnsiChar; Len: cardinal; Dest: TBufferWriter); var beg, same, index, crc, L: cardinal; begin Dest.WriteVarUInt32(Len); beg := 0; index := 0; crc := 0; while index < Len do begin while (New[index] <> Old[index]) and (index < Len) do inc(index); same := index; while (New[index] = Old[index]) and (index < Len) do inc(index); L := index - same; if L > 3 then begin Dest.WriteVarUInt32(same - beg); Dest.WriteXor(New + beg, Old + beg, same - beg, @crc); crc := crc32c(crc, @L, SizeOf(L)); Dest.WriteVarUInt32(L - 3); beg := index; end; end; L := index - beg; if L > 0 then begin Dest.WriteVarUInt32(L); Dest.WriteXor(New + beg, Old + beg, L, @crc); end; Dest.Write4(crc); end; procedure ZeroDecompress(P: PByte; Len: integer; var Dest: RawByteString); var PEnd, D, DEnd: PAnsiChar; DestLen, crc: cardinal; begin PEnd := PAnsiChar(P) + Len - 4; DestLen := FromVarUInt32(P); FastNewRawByteString(Dest, DestLen); D := pointer(Dest); DEnd := D + DestLen; crc := 0; while PAnsiChar(P) < PEnd do begin Len := FromVarUInt32(P); if D + Len > DEnd then break; MoveFast(P^, D^, Len); crc := crc32c(crc, D, Len); inc(P, Len); inc(D, Len); if PAnsiChar(P) >= PEnd then break; Len := FromVarUInt32(P) + 3; if D + Len > DEnd then break; FillCharFast(D^, Len, 0); crc := crc32c(crc, @Len, SizeOf(Len)); inc(D, Len); end; if crc <> PCardinal(P)^ then Dest := ''; end; function ZeroDecompressOr(P, Dest: PAnsiChar; Len, DestLen: integer): boolean; var PEnd, DEnd: PAnsiChar; crc: cardinal; begin PEnd := P + Len - 4; if cardinal(DestLen) <> FromVarUInt32(PByte(P)) then begin result := false; exit; end; DEnd := Dest + DestLen; crc := 0; while (P < PEnd) and (Dest < DEnd) do begin Len := FromVarUInt32(PByte(P)); if Dest + Len > DEnd then break; crc := crc32c(crc, P, Len); OrMemory(pointer(Dest), pointer(P), Len); inc(P, Len); inc(Dest, Len); if P >= PEnd then break; Len := FromVarUInt32(PByte(P)) + 3; crc := crc32c(crc, @Len, SizeOf(Len)); inc(Dest, Len); end; result := crc = PCardinal(P)^; end; { ****************** Binary Buffers Delta Compression } function Max(a, b: PtrInt): PtrInt; {$ifdef HASINLINE}inline;{$endif} begin if a > b then result := a else result := b; end; function Min(a, b: PtrInt): PtrInt; {$ifdef HASINLINE}inline;{$endif} begin if a < b then result := a else result := b; end; {$ifdef HASINLINE} function Comp(a, b: PAnsiChar; len: PtrInt): PtrInt; inline; var lenptr: PtrInt; begin result := 0; lenptr := len - SizeOf(PtrInt); if lenptr >= 0 then repeat if PPtrInt(a + result)^ <> PPtrInt(b + result)^ then break; inc(result, SizeOf(PtrInt)); until result > lenptr; if result < len then repeat if a[result] <> b[result] then exit; inc(result); until result = len; end; {$else} // eax = a, edx = b, ecx = len function Comp(a, b: PAnsiChar; len: PtrInt): PtrInt; asm // the 'rep cmpsb' version is slower on Intel Core CPU (not AMD) or ecx, ecx push ebx push ecx jz @ok @1: mov bx, [eax] lea eax, [eax + 2] cmp bl, [edx] jne @ok dec ecx jz @ok cmp bh, [edx + 1] lea edx, [edx + 2] jne @ok dec ecx jnz @1 @ok: pop eax sub eax, ecx pop ebx end; {$endif HASINLINE} function CompReverse(a, b: pointer; len: PtrInt): PtrInt; begin result := 0; if len > 0 then repeat if PByteArray(a)[-result] <> PByteArray(b)[-result] then exit; inc(result); until result = len; end; function WriteCurOfs(curofs, curlen, curofssize: integer; sp: PAnsiChar): PAnsiChar; begin if curlen = 0 then begin sp^ := #0; inc(sp); end else begin sp := Pointer(ToVarUInt32(curlen, PByte(sp))); PInteger(sp)^ := curofs; inc(sp, curofssize); end; result := sp; end; {$ifdef CPUINTEL} // crc32c SSE4.2 hardware accellerated dword hash {$ifdef CPUX86} function crc32c32sse42(buf: pointer): cardinal; {$ifdef FPC} nostackframe; assembler; {$endif} asm mov edx, eax xor eax, eax {$ifdef HASAESNI} crc32 eax, dword ptr [edx] {$else} db $F2, $0F, $38, $F1, $02 {$endif HASAESNI} end; {$else} function crc32c32sse42(buf: pointer): cardinal; {$ifdef FPC}nostackframe; assembler; asm {$else} asm // ecx=buf (Linux: edi=buf) .noframe {$endif FPC} xor eax, eax crc32 eax, dword ptr [buf] end; {$endif CPUX86} {$endif CPUINTEL} function hash32prime(buf: pointer): cardinal; begin // inlined xxHash32Mixup - won't pollute L1 cache with crc lookup tables result := PCardinal(buf)^; result := result xor (result shr 15); result := result * 2246822519; result := result xor (result shr 13); result := result * 3266489917; result := result xor (result shr 16); end; const HTabBits = 18; // fits well with DeltaCompress(..,BufSize=2MB) HTabMask = (1 shl HTabBits) - 1; // =$3ffff HListMask = $ffffff; // HTab[]=($ff,$ff,$ff) type PHTab = ^THTab; // HTabBits=18 -> SizeOf=767KB THTab = packed array[0..HTabMask] of array[0..2] of byte; function DeltaCompute(NewBuf, OldBuf, OutBuf, WorkBuf: PAnsiChar; NewBufSize, OldBufSize, MaxLevel: PtrInt; HList, HTab: PHTab): PAnsiChar; var i, curofs, curlen, curlevel, match, curofssize, h, oldh: PtrInt; sp, pInBuf, pOut: PAnsiChar; ofs: cardinal; spb: PByte absolute sp; hash: function(buf: pointer): cardinal; begin // 1. fill HTab[] with hashes for all old data {$ifdef CPUINTEL} if cfSSE42 in CpuFeatures then hash := @crc32c32sse42 else {$endif CPUINTEL} hash := @hash32prime; FillCharFast(HTab^, SizeOf(HTab^), $ff); // HTab[]=HListMask by default pInBuf := OldBuf; oldh := -1; // force calculate first hash sp := pointer(HList); for i := 0 to OldBufSize - 3 do begin h := hash(pInBuf) and HTabMask; inc(pInBuf); if h = oldh then continue; oldh := h; h := PtrInt(@HTab^[h]); // fast 24-bit data process PCardinal(sp)^ := PCardinal(h)^; PCardinal(h)^ := cardinal(i) or (PCardinal(h)^ and $ff000000); inc(sp, 3); end; // 2. compression init if OldBufSize <= $ffff then curofssize := 2 else curofssize := 3; curlen := -1; curofs := 0; pOut := OutBuf + 7; sp := WorkBuf; // 3. handle identical leading bytes match := Comp(OldBuf, NewBuf, Min(OldBufSize, NewBufSize)); if match > 2 then begin sp := WriteCurOfs(0, match, curofssize, sp); sp^ := #0; inc(sp); inc(NewBuf, match); dec(NewBufSize, match); end; pInBuf := NewBuf; // 4. main loop: identify longest sequences using hash, and store reference if NewBufSize >= 8 then repeat // hash 4 next bytes from NewBuf, and find longest match in OldBuf ofs := PCardinal(@HTab^[hash(NewBuf) and HTabMask])^ and HListMask; if ofs <> HListMask then begin // brute force search loop of best hash match curlevel := MaxLevel; repeat with PHash128Rec(OldBuf + ofs)^ do // always test 8 bytes at once {$ifdef CPU64} if PHash128Rec(NewBuf)^.Lo = Lo then {$else} if (PHash128Rec(NewBuf)^.c0 = c0) and (PHash128Rec(NewBuf)^.c1 = c1) then {$endif CPU64} begin // test remaining bytes match := Comp(@PHash128Rec(NewBuf)^.c2, @c2, Min(PtrUInt(OldBufSize) - ofs, NewBufSize) - 8); if match > curlen then begin // found a longer sequence curlen := match; curofs := ofs; end; end; dec(curlevel); ofs := PCardinal(@HList^[ofs])^ and HListMask; until (ofs = HListMask) or (curlevel = 0); end; // curlen = longest sequence length if curlen < 0 then begin // no sequence found -> copy one byte dec(NewBufSize); pOut^ := NewBuf^; inc(NewBuf); inc(pOut); if NewBufSize > 8 then // >=8 may overflow continue else break; end; inc(curlen, 8); sp := WriteCurOfs(curofs, curlen, curofssize, sp); spb := ToVarUInt32(NewBuf - pInBuf, spb); inc(NewBuf, curlen); // continue to search after the sequence dec(NewBufSize, curlen); curlen := -1; pInBuf := NewBuf; if NewBufSize > 8 then // >=8 may overflow continue else break; until false; // 5. write remaining bytes if NewBufSize > 0 then begin MoveFast(NewBuf^, pOut^, NewBufSize); inc(pOut, NewBufSize); inc(NewBuf, NewBufSize); end; sp^ := #0; inc(sp); spb := ToVarUInt32(NewBuf - pInBuf, spb); // 6. write header PInteger(OutBuf)^ := pOut - OutBuf - 7; h := sp - WorkBuf; PInteger(OutBuf + 3)^ := h; OutBuf[6] := AnsiChar(curofssize); // 7. copy commands MoveFast(WorkBuf^, pOut^, h); result := pOut + h; end; function ExtractBuf(GoodCRC: cardinal; p: PAnsiChar; var aUpd, Delta: PAnsiChar; Old: PAnsiChar): TDeltaError; var pEnd, buf, upd, src: PAnsiChar; bufsize, datasize, leading, srclen: PtrUInt; curofssize: byte; begin // 1. decompression init upd := aUpd; bufsize := PCardinal(p)^ and $00ffffff; inc(p, 3); datasize := PCardinal(p)^ and $00ffffff; inc(p, 3); curofssize := ord(p^); inc(p); buf := p; inc(p, bufsize); pEnd := p + datasize; src := nil; // 2. main loop while p < pEnd do begin // src/srclen = sequence to be copied srclen := FromVarUInt32(PByte(p)); if srclen > 0 then if curofssize = 2 then begin src := Old + PWord(p)^; inc(p, 2); end else begin src := Old + PCardinal(p)^ and $00ffffff; inc(p, 3); end; // copy leading bytes leading := FromVarUInt32(PByte(p)); if leading <> 0 then begin MoveFast(buf^, upd^, leading); inc(buf, leading); inc(upd, leading); end; // copy sequence if srclen <> 0 then begin if PtrUInt(upd - src) < srclen then MoveByOne(src, upd, srclen) else MoveFast(src^, upd^, srclen); inc(upd, srclen); end; end; // 3. result check Delta := p; if (p = pEnd) and (crc32c(0, aUpd, upd - aUpd) = GoodCRC) then // whole CRC is faster than incremental result := dsSuccess else result := dsCrcExtract; aUpd := upd; end; procedure WriteByte(var P: PAnsiChar; V: byte); {$ifdef HASINLINE}inline;{$endif} begin PByte(P)^ := V; inc(P); end; procedure WriteInt(var P: PAnsiChar; V: cardinal); {$ifdef HASINLINE}inline;{$endif} begin PCardinal(P)^ := V; inc(P, 4); end; const FLAG_COPIED = 0; FLAG_COMPRESS = 1; FLAG_BEGIN = 2; FLAG_END = 3; function DeltaCompress(New, Old: PAnsiChar; NewSize, OldSize: integer; out Delta: PAnsiChar; Level, BufSize: integer): integer; var HTab, HList: PHTab; d, workbuf: PAnsiChar; db: PByte absolute d; BufRead, OldRead, Trailing, NewSizeSave: PtrInt; bigfile: boolean; procedure CreateCopied; begin Getmem(Delta, NewSizeSave + 17); // 17 = 4*integer + 1*byte d := Delta; db := ToVarUInt32(0, ToVarUInt32(NewSizeSave, db)); WriteByte(d, FLAG_COPIED); // block copied flag db := ToVarUInt32(NewSizeSave, db); WriteInt(d, crc32c(0, New, NewSizeSave)); MoveFast(New^, d^, NewSizeSave); inc(d, NewSizeSave); result := d - Delta; end; begin // 1. special cases if (NewSize = OldSize) and mormot.core.base.CompareMem(Old, New, NewSize) then begin Getmem(Delta, 1); Delta^ := '='; result := 1; exit; end; NewSizeSave := NewSize; if OldSize = 0 then begin // Delta from nothing -> direct copy of whole block CreateCopied; exit; end; // 2. compression init bigfile := OldSize > BufSize; if BufSize > NewSize then BufSize := NewSize; if BufSize > HListMask then BufSize := HListMask; // we store offsets with 2..3 bytes -> max 16MB chunk Trailing := 0; Getmem(workbuf, BufSize); // compression temporary buffers Getmem(HList, BufSize * SizeOf({%H-}HList[0])); Getmem(HTab, SizeOf({%H-}HTab^)); Getmem(Delta, Max(NewSize, OldSize) + 4096); // Delta size max evalulation try d := Delta; db := ToVarUInt32(NewSize, db); // Destination Size // 3. handle leading and trailing identical bytes (for biggest files) if bigfile then begin // test initial same chars BufRead := Comp(New, Old, Min(NewSize, OldSize)); if BufRead > 9 then begin // it happens very often: modification is usually in the middle/end db := ToVarUInt32(BufRead, db); // blockSize = Size BufIdem WriteByte(d, FLAG_BEGIN); WriteInt(d, crc32c(0, New, BufRead)); inc(New, BufRead); dec(NewSize, BufRead); inc(Old, BufRead); dec(OldSize, BufRead); end; // test trailing same chars BufRead := CompReverse(New + NewSize - 1, Old + OldSize - 1, Min(NewSize, OldSize)); if BufRead > 5 then begin if NewSize = BufRead then dec(BufRead); // avoid block overflow dec(OldSize, BufRead); dec(NewSize, BufRead); Trailing := BufRead; end; end; // 4. main loop repeat BufRead := Min(BufSize, NewSize); dec(NewSize, BufRead); if (BufRead = 0) and (Trailing > 0) then begin db := ToVarUInt32(Trailing, db); WriteByte(d, FLAG_END); // block idem end flag WriteInt(d, crc32c(0, New, Trailing)); break; end; OldRead := Min(BufSize, OldSize); dec(OldSize, OldRead); db := ToVarUInt32(OldRead, db); if (BufRead < 4) or (OldRead < 4) or (BufRead shr 2 > OldRead) then begin WriteByte(d, FLAG_COPIED); // block copied flag db := ToVarUInt32(BufRead, db); if BufRead = 0 then break; WriteInt(d, crc32c(0, New, BufRead)); MoveFast(New^, d^, BufRead); inc(New, BufRead); inc(d, BufRead); end else begin WriteByte(d, FLAG_COMPRESS); // block compressed flag WriteInt(d, crc32c(0, New, BufRead)); WriteInt(d, crc32c(0, Old, OldRead)); d := DeltaCompute(New, Old, d, workbuf, BufRead, OldRead, Level, HList, HTab); inc(New, BufRead); inc(Old, OldRead); end; until false; // 5. release temp memory finally result := d - Delta; Freemem(HTab); Freemem(HList); Freemem(workbuf); end; if result >= NewSizeSave + 17 then begin // Delta didn't compress well -> store it (with up to 17 bytes overhead) Freemem(Delta); CreateCopied; end; end; function DeltaCompress(const New, Old: RawByteString; Level, BufSize: integer): RawByteString; begin result := DeltaCompress(pointer(New), pointer(Old), length(New), length(Old), Level, BufSize); end; function DeltaCompress(New, Old: PAnsiChar; NewSize, OldSize, Level, BufSize: integer): RawByteString; var Delta: PAnsiChar; DeltaLen: integer; begin DeltaLen := DeltaCompress(New, Old, NewSize, OldSize, Delta, Level, BufSize); FastSetRawByteString(result, Delta, DeltaLen); Freemem(Delta); end; function DeltaExtract(Delta, Old, New: PAnsiChar): TDeltaError; var BufCRC: cardinal; Code: byte; Len, BufRead, OldRead: PtrInt; db: PByte absolute Delta; Upd: PAnsiChar; begin result := dsSuccess; Len := FromVarUInt32(db); Upd := New; repeat OldRead := FromVarUInt32(db); Code := db^; inc(db); case Code of FLAG_COPIED: begin // block copied flag - copy new from Delta BufRead := FromVarUInt32(db); if BufRead = 0 then break; if crc32c(0, Delta + 4, BufRead) <> PCardinal(Delta)^ then begin result := dsCrcCopy; exit; end; inc(Delta, 4); MoveFast(Delta^, New^, BufRead); if BufRead >= Len then exit; // if Old=nil -> only copy new inc(Delta, BufRead); inc(New, BufRead); end; FLAG_COMPRESS: begin // block compressed flag - extract Delta from Old BufCRC := PCardinal(Delta)^; inc(Delta, 4); if crc32c(0, Old, OldRead) <> PCardinal(Delta)^ then begin result := dsCrcComp; exit; end; inc(Delta, 4); result := ExtractBuf(BufCRC, Delta, New, Delta, Old); if result <> dsSuccess then exit; end; FLAG_BEGIN: begin // block idem begin flag if crc32c(0, Old, OldRead) <> PCardinal(Delta)^ then begin result := dsCrcBegin; exit; end; inc(Delta, 4); MoveFast(Old^, New^, OldRead); inc(New, OldRead); end; FLAG_END: begin // block idem end flag if crc32c(0, Old, OldRead) <> PCardinal(Delta)^ then result := dsCrcEnd; MoveFast(Old^, New^, OldRead); inc(New, OldRead); break; end; else begin result := dsFlag; exit; end; end; // Case Code of inc(Old, OldRead); until false; if New - Upd <> Len then result := dsLen; end; function DeltaExtract(const Delta, Old: RawByteString; out New: RawByteString): TDeltaError; begin if (Delta = '') or (Delta = '=') then begin New := Old; result := dsSuccess; end else begin SetLength(New, DeltaExtractSize(pointer(Delta))); result := DeltaExtract(pointer(Delta), pointer(Old), pointer(New)); end; end; function DeltaExtractSize(const Delta: RawByteString): integer; begin result := DeltaExtractSize(pointer(Delta)); end; function DeltaExtractSize(Delta: PAnsiChar): integer; begin if Delta = nil then result := 0 else result := FromVarUInt32(PByte(Delta)); end; function ToText(err: TDeltaError): PShortString; begin result := GetEnumName(TypeInfo(TDeltaError), ord(err)); end; { ****************** TDynArray Low-Level Binary Search } function SimpleDynArrayLoadFrom(Source: PAnsiChar; aTypeInfo: PRttiInfo; out Count, ElemSize: PtrInt): pointer; var Hash: PCardinalArray absolute Source; iteminfo: PRttiInfo; begin result := nil; if (aTypeInfo = nil) or (aTypeInfo^.Kind <> rkDynArray) then exit; iteminfo := aTypeInfo^.DynArrayItemType(ElemSize); if (iteminfo <> nil) or (Source = nil) or // (Source[0] <> AnsiChar(ElemSize)) or mORMot 2 stores elemsize=0 (Source[1] <> #0) then exit; // invalid type information or Source content inc(Source,2); Count := FromVarUInt32(PByte(Source)); // dynamic array count if Count <> 0 then result := @Hash[1]; // returns valid Source content end; function IntegerDynArrayLoadFrom(Source: PAnsiChar; var Count: integer): PIntegerArray; var Hash: PCardinalArray absolute Source; begin result := nil; if (Source = nil) or // (Source[0] <> #4) or mORMot 2 stores elemsize=0 (Source[1] <> #0) then exit; // invalid Source content inc(Source, 2); Count := FromVarUInt32(PByte(Source)); // dynamic array count if Count <> 0 then result := @Hash[1]; // returns valid Source content end; function RawUtf8DynArrayLoadFromContains(Source: PAnsiChar; Value: PUtf8Char; ValueLen: PtrInt; CaseSensitive: boolean): PtrInt; var Count, Len: PtrInt; begin if (Value = nil) or (ValueLen = 0) or (Source = nil) then // (Source[0] <> AnsiChar(SizeOf(PtrInt))) mORMot 2 stores elemsize=0 // {$ifdef ISDELPHI} or (Source[1] <> AnsiChar(rkLString)){$endif} begin result := -1; exit; // invalid Source or Value content end; inc(Source, 2); Count := FromVarUInt32(PByte(Source)); // dynamic array count inc(Source, SizeOf(cardinal)); // ignore Hash32 security checksum for result := 0 to Count - 1 do begin Len := FromVarUInt32(PByte(Source)); if CaseSensitive then begin if (Len = ValueLen) and CompareMemFixed(Value, Source, Len) then exit; end else if Utf8ILComp(Value, pointer(Source), ValueLen, Len) = 0 then exit; inc(Source, Len); end; result := -1; end; { TDynArrayLoadFrom } function TDynArrayLoadFrom.Init(ArrayTypeInfo: PRttiInfo; Source: PAnsiChar; SourceMaxLen: PtrInt): boolean; begin result := false; Count := 0; Current := 0; Reader.Init(Source, SourceMaxLen); ArrayRtti := Rtti.RegisterType(ArrayTypeInfo); if (ArrayRtti.Parser <> ptDynArray) or Reader.EOF then exit; if ArrayRtti.Cache.ItemInfo = nil then ArrayLoad := nil else ArrayLoad := RTTI_BINARYLOAD[ArrayRtti.Cache.ItemInfo^.Kind]; Count := DynArrayLoadHeader(Reader, ArrayRtti.Info, ArrayRtti.Cache.ItemInfo); result := true; end; function TDynArrayLoadFrom.Init(ArrayTypeInfo: PRttiInfo; const Source: RawByteString): boolean; begin result := Init(ArrayTypeInfo, pointer(Source), length(Source)); end; function TDynArrayLoadFrom.Step(Item: pointer): boolean; begin if (Current < Count) and not Reader.EOF then begin if Assigned(ArrayLoad) then ArrayLoad(Item, Reader, ArrayRtti.Cache.ItemInfo) else Reader.Copy(Item, ArrayRtti.Cache.ItemSize); inc(Current); result := true; end else result := false; end; function TDynArrayLoadFrom.FirstField(Field: pointer): boolean; var load: TRttiBinaryLoad; info: PRttiInfo; noiteration: TFastReader; begin if (Current < Count) and not Reader.EOF then begin info := PT_INFO[ArrayRtti.ArrayFirstField]; if info <> nil then begin load := RTTI_BINARYLOAD[info^.Kind]; if Assigned(load) then begin noiteration := Reader; load(Field, noiteration, info); result := true; exit; end; end; end; result := false; end; { ****************** TSynFilter and TSynValidate Processing Classes } function IsValidIP4Address(P: PUtf8Char): boolean; var ndot: PtrInt; V: PtrUInt; begin result := false; if (P = nil) or not (P^ in ['0'..'9']) then exit; V := 0; ndot := 0; repeat case P^ of #0: break; '.': if (P[-1] = '.') or (V > 255) then exit else begin inc(ndot); V := 0; end; '0'..'9': V := (V * 10) + ord(P^) - 48; else exit; end; inc(P); until false; if (ndot = 3) and (V <= 255) and (P[-1] <> '.') then result := true; end; function IsValidEmail(P: PUtf8Char): boolean; // Initial Author: Ernesto D'Spirito - UTF-8 version by AB // http://www.howtodothings.com/computers/a1169-validating-email-addresses-in-delphi.html const // Valid characters in an "atom" atom_chars: TSynAnsicharSet = [#33..#255] - ['(', ')', '<', '>', '@', ',', ';', ':', '\', '/', '"', '.', '[', ']', #127]; // Valid characters in a "quoted-string" quoted_string_chars: TSynAnsicharSet = [#0..#255] - ['"', #13, '\']; // Valid characters in a subdomain letters_digits: TSynAnsicharSet = ['0'..'9', 'A'..'Z', 'a'..'z']; type States = ( STATE_BEGIN, STATE_ATOM, STATE_QTEXT, STATE_QCHAR, STATE_QUOTE, STATE_LOCAL_PERIOD, STATE_EXPECTING_SUBDOMAIN, STATE_SUBDOMAIN, STATE_HYPHEN); var State: States; subdomains: integer; c: AnsiChar; ch: PtrInt; begin State := STATE_BEGIN; subdomains := 1; if P <> nil then repeat ch := ord(P^); if ch and $80 = 0 then inc(P) else ch := UTF8_TABLE.GetHighUtf8Ucs4(P); if (ch <= 255) and (WinAnsiConvert.AnsiToWide[ch] <= 255) then // convert into WinAnsi char c := AnsiChar(ch) else // invalid char c := #127; case State of STATE_BEGIN: if c in atom_chars then State := STATE_ATOM else if c = '"' then State := STATE_QTEXT else break; STATE_ATOM: if c = '@' then State := STATE_EXPECTING_SUBDOMAIN else if c = '.' then State := STATE_LOCAL_PERIOD else if not (c in atom_chars) then break; STATE_QTEXT: if c = '\' then State := STATE_QCHAR else if c = '"' then State := STATE_QUOTE else if not (c in quoted_string_chars) then break; STATE_QCHAR: State := STATE_QTEXT; STATE_QUOTE: if c = '@' then State := STATE_EXPECTING_SUBDOMAIN else if c = '.' then State := STATE_LOCAL_PERIOD else break; STATE_LOCAL_PERIOD: if c in atom_chars then State := STATE_ATOM else if c = '"' then State := STATE_QTEXT else break; STATE_EXPECTING_SUBDOMAIN: if c in letters_digits then State := STATE_SUBDOMAIN else break; STATE_SUBDOMAIN: if c = '.' then begin inc(subdomains); State := STATE_EXPECTING_SUBDOMAIN end else if c = '-' then State := STATE_HYPHEN else if not (c in letters_digits) then break; STATE_HYPHEN: if c in letters_digits then State := STATE_SUBDOMAIN else if c <> '-' then break; end; if P^ = #0 then begin P := nil; break; end; until false; result := (State = STATE_SUBDOMAIN) and (subdomains >= 2); end; { TSynFilterOrValidate } constructor TSynFilterOrValidate.Create(const aParameters: RawUtf8); begin inherited Create; SetParameters(aParameters); // should parse the JSON-encoded parameters end; constructor TSynFilterOrValidate.CreateUtf8(const Format: RawUtf8; const Args, Params: array of const); begin Create(FormatJson(Format, Args, Params)); end; procedure TSynFilterOrValidate.SetParameters(const value: RawUtf8); begin fParameters := value; end; function TSynFilterOrValidate.AddOnce(var aObjArray: TSynFilterOrValidateObjArray; aFreeIfAlreadyThere: boolean): TSynFilterOrValidate; var i: integer; begin if self <> nil then begin for i := 0 to length(aObjArray) - 1 do if (PPointer(aObjArray[i])^ = PPointer(self)^) and (aObjArray[i].fParameters = fParameters) then begin if aFreeIfAlreadyThere then Free; result := aObjArray[i]; exit; end; ObjArrayAdd(aObjArray, self); end; result := self; end; { TSynFilterUpperCase } procedure TSynFilterUpperCase.Process(aFieldIndex: integer; var value: RawUtf8); begin value := mormot.core.unicode.UpperCase(value); end; { TSynFilterUpperCaseU } procedure TSynFilterUpperCaseU.Process(aFieldIndex: integer; var value: RawUtf8); begin value := UpperCaseU(value); end; { TSynFilterLowerCase } procedure TSynFilterLowerCase.Process(aFieldIndex: integer; var value: RawUtf8); begin value := LowerCase(value); end; { TSynFilterLowerCaseU } procedure TSynFilterLowerCaseU.Process(aFieldIndex: integer; var value: RawUtf8); begin value := LowerCaseU(value); end; { TSynFilterTrim } procedure TSynFilterTrim.Process(aFieldIndex: integer; var value: RawUtf8); begin TrimSelf(value); end; { TSynFilterTruncate} procedure TSynFilterTruncate.SetParameters(const value: RawUtf8); var V: array[0..1] of TValuePUtf8Char; tmp: TSynTempBuffer; begin tmp.Init(value); JsonDecode(tmp.buf, [ 'MaxLength', // 0 'Utf8Length' // 1 ], @V); fMaxLength := V[0].ToCardinal(0); fUtf8Length := V[1].ToBoolean; tmp.Done; end; procedure TSynFilterTruncate.Process(aFieldIndex: integer; var value: RawUtf8); begin if fMaxLength - 1 < cardinal(maxInt) then if fUtf8Length then Utf8TruncateToLength(value, fMaxLength) else Utf8TruncateToUnicodeLength(value, fMaxLength); end; { TSynValidateIPAddress } function TSynValidateIPAddress.Process(aFieldIndex: integer; const value: RawUtf8; var ErrorMsg: string): boolean; begin result := IsValidIP4Address(pointer(value)); if not result then ErrorMsg := Format(sInvalidIPAddress, [Utf8ToString(value)]); end; { TSynValidateEmail } function TSynValidateEmail.Process(aFieldIndex: integer; const value: RawUtf8; var ErrorMsg: string): boolean; var TLD, DOM: RawUtf8; i: integer; const TopLevelTLD: array[0..20] of PUtf8Char = ( // see http://en.wikipedia.org/wiki/List_of_Internet_top-level_domains 'aero', 'asia', 'biz', 'cat', 'com', 'coop', 'edu', 'gov', 'info', 'int', 'jobs', 'mil', 'mobi', 'museum', 'name', 'net', 'org', 'pro', 'site', 'tel', 'travel'); // no xxx ! begin if IsValidEmail(pointer(value)) then repeat DOM := lowercase(copy(value, PosExChar('@', value) + 1, 100)); if length(DOM) > 63 then break; // exceeded 63-character limit of a DNS name if (ForbiddenDomains <> '') and CsvContains(ForbiddenDomains, DOM) then break; i := length(value); while (i > 0) and (value[i] <> '.') do dec(i); TLD := lowercase(copy(value, i + 1, 100)); if (AllowedTLD <> '') and not CsvContains(AllowedTLD, TLD) then break; if (ForbiddenTLD <> '') and CsvContains(ForbiddenTLD, TLD) then break; if not fAnyTLD then if FastFindPUtf8CharSorted(@TopLevelTLD, high(TopLevelTLD), pointer(TLD)) < 0 then if length(TLD) <> 2 then break; // assume a two chars string is a ISO 3166-1 alpha-2 code result := true; exit; until true; ErrorMsg := Format(sInvalidEmailAddress, [Utf8ToString(value)]); result := false; end; procedure TSynValidateEmail.SetParameters(const value: RawUtf8); var V: array[0..3] of TValuePUtf8Char; tmp: TSynTempBuffer; begin inherited; tmp.Init(value); JsonDecode(tmp.buf, [ 'AllowedTLD', // 0 'ForbiddenTLD', // 1 'ForbiddenDomains', // 2 'AnyTLD' // 3 ], @V); LowerCaseCopy(V[0].Text, V[0].Len, fAllowedTLD); LowerCaseCopy(V[1].Text, V[1].Len, fForbiddenTLD); LowerCaseCopy(V[2].Text, V[2].Len, fForbiddenDomains); AnyTLD := V[3].ToBoolean; tmp.Done; end; { TSynValidatePattern } procedure TSynValidatePattern.SetParameters(const Value: RawUtf8); begin inherited SetParameters(Value); fMatch.Prepare(Value, ClassType = TSynValidatePatternI, {reuse=}true); end; function TSynValidatePattern.Process(aFieldIndex: integer; const value: RawUtf8; var ErrorMsg: string): boolean; procedure SetErrorMsg; begin ErrorMsg := Format(sInvalidPattern, [Utf8ToString(value)]); end; begin result := fMatch.Match(value); if not result then SetErrorMsg; end; { TSynValidateNonVoidText } function Character01n(n: integer): string; begin if n < 0 then n := 0 else if n > 1 then n := 2; result := GetCsvItemString(pointer(string(sCharacter01n)), n); end; procedure InvalidTextLengthMin(min: integer; var result: string); begin result := Format(sInvalidTextLengthMin, [min, Character01n(min)]); end; function TSynValidateNonVoidText.Process(aFieldIndex: integer; const value: RawUtf8; var ErrorMsg: string): boolean; begin if value = '' then begin InvalidTextLengthMin(1, ErrorMsg); result := false; end else result := true; end; { TSynValidateText } procedure TSynValidateText.SetErrorMsg(fPropsIndex, InvalidTextIndex, MainIndex: integer; var result: string); var P: PChar; begin P := pointer(string(sInvalidTextChar)); result := GetCsvItemString(P, MainIndex); if fPropsIndex > 0 then result := Format(result, [fProps[fPropsIndex], GetCsvItemString(P, InvalidTextIndex), Character01n(fProps[fPropsIndex])]); end; function TSynValidateText.Process(aFieldIndex: integer; const value: RawUtf8; var ErrorMsg: string): boolean; var i, L: cardinal; Min: array[2..7] of cardinal; begin result := false; if fUtf8Length then L := length(value) else L := Utf8ToUnicodeLength(pointer(value)); if L < MinLength then InvalidTextLengthMin(MinLength, ErrorMsg) else if L > MaxLength then ErrorMsg := Format(sInvalidTextLengthMax, [MaxLength, Character01n(MaxLength)]) else begin FillCharFast(Min, SizeOf(Min), 0); L := length(value); for i := 1 to L do case value[i] of ' ': inc(Min[7]); 'a'..'z': begin inc(Min[2]); inc(Min[5]); end; 'A'..'Z': begin inc(Min[2]); inc(Min[6]); end; '0'..'9': inc(Min[3]); '_', '!', ';', '.', ',', '/', ':', '?', '%', '$', '=', '"', '#', '@', '(', ')', '{', '}', '+', '''', '-', '*': inc(Min[4]); end; for i := 2 to 7 do if Min[i] < fProps[i] then begin SetErrorMsg(i, i, 0, ErrorMsg); exit; end else if Min[i] > fProps[i + 8] then begin SetErrorMsg(i + 8, i, 1, ErrorMsg); exit; end; if value <> '' then begin if MaxLeftTrimCount < cardinal(maxInt) then begin // if MaxLeftTrimCount is set, check against Value i := 0; while (i < L) and (value[i + 1] = ' ') do inc(i); if i > MaxLeftTrimCount then begin SetErrorMsg(0, 0, 8, ErrorMsg); exit; end; end; if MaxRightTrimCount < cardinal(maxInt) then begin // if MaxRightTrimCount is set, check against Value i := 0; while (i < L) and (value[L - i] = ' ') do dec(i); if i > MaxRightTrimCount then begin SetErrorMsg(0, 0, 9, ErrorMsg); exit; end; end; end; result := true; end; end; procedure TSynValidateText.SetParameters(const value: RawUtf8); var V: array[0..high(TSynValidateTextProps) + {Utf8Length} 1] of TValuePUtf8Char; i: PtrInt; tmp: TSynTempBuffer; const DEFAULT: TSynValidateTextProps = ( 1, // MinLength maxInt, // MaxLength 0, // MinAlphaCount 0, // MinDigitCount 0, // MinPunctCount 0, // MinLowerCount 0, // MinUpperCount 0, // MinSpaceCount maxInt, // MaxLeftTrimCount maxInt, // MaxRightTrimCount maxInt, // MaxAlphaCount maxInt, // MaxDigitCount maxInt, // MaxPunctCount maxInt, // MaxLowerCount maxInt, // MaxUpperCount maxInt); // MaxSpaceCount begin if (MinLength = 0) and (MaxLength = 0) then // if not previously set fProps := DEFAULT; inherited SetParameters(value); if value = '' then exit; tmp.Init(value); try JsonDecode(tmp.buf, [ 'MinLength', 'MaxLength', 'MinAlphaCount', 'MinDigitCount', 'MinPunctCount', 'MinLowerCount', 'MinUpperCount', 'MinSpaceCount', 'MaxLeftTrimCount', 'MaxRightTrimCount', 'MaxAlphaCount', 'MaxDigitCount', 'MaxPunctCount', 'MaxLowerCount', 'MaxUpperCount', 'MaxSpaceCount', 'Utf8Length'], @V); for i := 0 to high(fProps) do fProps[i] := V[i].ToCardinal(fProps[i]); with V[high(V)] do fUtf8Length := ToBoolean; finally tmp.Done; end; end; { TSynValidatePassWord } procedure TSynValidatePassWord.SetParameters(const value: RawUtf8); const DEFAULT: TSynValidateTextProps = ( 5, // MinLength 20, // MaxLength 1, // MinAlphaCount 1, // MinDigitCount 1, // MinPunctCount 1, // MinLowerCount 1, // MinUpperCount 0, // MinSpaceCount maxInt, // MaxLeftTrimCount maxInt, // MaxRightTrimCount maxInt, // MaxAlphaCount maxInt, // MaxDigitCount maxInt, // MaxPunctCount maxInt, // MaxLowerCount maxInt, // MaxUpperCount 0); // MaxSpaceCount begin // set default values for validating a strong password fProps := DEFAULT; fUtf8Length := false; // read custom parameters inherited; end; { ***************** Cross-Platform TSynTimeZone Time Zones } { TTimeZoneData } function TTimeZoneData.GetTziFor(year: integer): PTimeZoneInfo; var i, last: PtrInt; begin if dyn = nil then result := @tzi else if year <= dyn[0].year then result := @dyn[0].tzi else begin last := high(dyn); if year >= dyn[last].year then result := @dyn[last].tzi else begin for i := 1 to last do if year < dyn[i].year then begin result := @dyn[i - 1].tzi; exit; end; result := @tzi; // should never happen, but makes compiler happy end; end; end; { TTimeZoneInformation } constructor TSynTimeZone.Create; begin fZones.InitSpecific(TypeInfo(TTimeZoneDataDynArray), fZone, ptRawUtf8, @fZoneCount); end; constructor TSynTimeZone.CreateDefault(dummycpp: integer); begin Create; {$ifdef OSWINDOWS} LoadFromRegistry; {$else} LoadFromFile; if fZoneCount = 0 then LoadFromResource; // if no .tz file is available, try if bound to executable {$endif OSWINDOWS} end; destructor TSynTimeZone.Destroy; begin inherited Destroy; fIds.Free; fDisplays.Free; end; var SharedSynTimeZone: TSynTimeZone; class function TSynTimeZone.Default: TSynTimeZone; begin if SharedSynTimeZone = nil then begin GlobalLock; // RegisterGlobalShutdownRelease() will use it anyway try if SharedSynTimeZone = nil then SharedSynTimeZone := RegisterGlobalShutdownRelease(TSynTimeZone.CreateDefault); finally GlobalUnLock; end; end; result := SharedSynTimeZone; end; function TSynTimeZone.SaveToBuffer: RawByteString; begin fSafe.ReadLock; try result := AlgoSynLZ.Compress(fZones.SaveTo); finally fSafe.ReadUnLock; end; end; procedure TSynTimeZone.SaveToFile(const FileName: TFileName); var FN: TFileName; begin if FileName = '' then FN := ChangeFileExt(Executable.ProgramFileName, '.tz') else FN := FileName; FileFromString(SaveToBuffer, FN); end; procedure TSynTimeZone.LoadFromBuffer(const Buffer: RawByteString); begin if Buffer = '' then exit; fSafe.WriteLock; try fZones.LoadFromBinary(AlgoSynLZ.Decompress(Buffer)); fZones.ForceReHash; FreeAndNil(fIds); FreeAndNil(fDisplays); finally fSafe.WriteUnLock; end; end; procedure TSynTimeZone.LoadFromFile(const FileName: TFileName); var FN: TFileName; begin if FileName = '' then FN := ChangeFileExt(Executable.ProgramFileName, '.tz') else FN := FileName; LoadFromBuffer(StringFromFile(FN)); end; procedure TSynTimeZone.LoadFromResource(Instance: TLibHandle); var buf: RawByteString; begin ResourceToRawByteString(ClassName, PChar(10), buf, Instance); if buf <> '' then LoadFromBuffer(buf); end; {$ifdef OSWINDOWS} procedure TSynTimeZone.LoadFromRegistry; const REGKEY = 'Software\Microsoft\Windows NT\CurrentVersion\Time Zones\'; var reg: TWinRegistry; keys: TRawUtf8DynArray; i, first, last, year, n: integer; item: TTimeZoneData; begin fSafe.WriteLock; try fZones.Clear; if reg.ReadOpen(wrLocalMachine, REGKEY) then keys := reg.ReadEnumEntries else keys := nil; // make Delphi 6 happy n := length(keys); fZones.Capacity := n; for i := 0 to n - 1 do begin Finalize(item); FillcharFast(item.tzi, SizeOf(item.tzi), 0); if reg.ReadOpen(wrLocalMachine, REGKEY + keys[i], {reopen=}true) then begin item.id := keys[i]; // registry keys are genuine by definition item.display := reg.ReadString('Display'); reg.ReadBuffer('TZI', @item.tzi, SizeOf(item.tzi)); if reg.ReadOpen(wrLocalMachine, REGKEY + keys[i] + '\Dynamic DST', true) then begin // warning: never defined on XP/2003, and not for all entries first := reg.ReadDword('FirstEntry'); last := reg.ReadDword('LastEntry'); if (first > 0) and (last >= first) then begin n := 0; SetLength(item.dyn, last - first + 1); for year := first to last do if reg.ReadBuffer(Utf8ToSynUnicode(UInt32ToUtf8(year)), @item.dyn[n].tzi, SizeOf(TTimeZoneInfo)) then begin item.dyn[n].year := year; inc(n); end; SetLength(item.dyn, n); end; end; fZones.Add(item); end; end; reg.Close; fZones.ForceReHash; FreeAndNil(fIds); FreeAndNil(fDisplays); finally fSafe.WriteUnLock; end; end; {$endif OSWINDOWS} function TSynTimeZone.LockedFindZoneIndex(const TzId: TTimeZoneID): PtrInt; begin if TzId = '' then result := -1 else begin if TzId = fLastZone then result := fLastIndex else begin result := fZones.FindHashed(TzId); fLastZone := TzId; flastIndex := result; end; end; end; function TSynTimeZone.GetDisplay(const TzId: TTimeZoneID): RawUtf8; var ndx: PtrInt; begin fSafe.ReadLock; ndx := LockedFindZoneIndex(TzId); if ndx < 0 then if TzId = 'UTC' then // e.g. on XP result := TzId else result := '' else result := fZone[ndx].display; fSafe.ReadUnLock; end; function TSynTimeZone.GetBiasForDateTime(const Value: TDateTime; const TzId: TTimeZoneID; out Bias: integer; out HaveDaylight: boolean; ValueIsUtc: boolean): boolean; var ndx: PtrInt; d: TSynSystemTime; tzi: PTimeZoneInfo; std, dlt: TDateTime; begin fSafe.ReadLock; try ndx := LockedFindZoneIndex(TzId); if ndx < 0 then begin Bias := 0; HaveDaylight := false; result := TzId = 'UTC'; // e.g. on XP exit; end; d.FromDate(Value); // faster than DecodeDate tzi := fZone[ndx].GetTziFor(d.Year); if tzi.change_time_std.IsZero then begin HaveDaylight := false; Bias := tzi.Bias + tzi.bias_std; end else begin HaveDaylight := true; std := tzi.change_time_std.EncodeForTimeChange(d.Year); dlt := tzi.change_time_dlt.EncodeForTimeChange(d.Year); if ValueIsUtc then begin // STD shifts by the DLT bias to convert to UTC std := ((std * MinsPerDay) + tzi.Bias + tzi.bias_dlt) / MinsPerDay; // DLT shifts by the STD bias dlt := ((dlt * MinsPerDay) + tzi.Bias + tzi.bias_std) / MinsPerDay; end; if std < dlt then if (std <= Value) and (Value < dlt) then Bias := tzi.Bias + tzi.bias_std else Bias := tzi.Bias + tzi.bias_dlt else if (dlt <= Value) and (Value < std) then Bias := tzi.Bias + tzi.bias_dlt else Bias := tzi.Bias + tzi.bias_std; end; result := true; finally fSafe.ReadUnLock; end; end; function TSynTimeZone.UtcToLocal(const UtcDateTime: TDateTime; const TzId: TTimeZoneID): TDateTime; var Bias: integer; HaveDaylight: boolean; begin if (self = nil) or (TzId = '') then result := UtcDateTime else begin GetBiasForDateTime(UtcDateTime, TzId, Bias, HaveDaylight, {fromutc=}true); result := ((UtcDateTime * MinsPerDay) - Bias) / MinsPerDay; end; end; function TSynTimeZone.NowToLocal(const TzId: TTimeZoneID): TDateTime; begin result := UtcToLocal(NowUtc, TzId); end; function TSynTimeZone.LocalToUtc(const LocalDateTime: TDateTime; const TzID: TTimeZoneID): TDateTime; var Bias: integer; HaveDaylight: boolean; begin if (self = nil) or (TzID = '') then result := LocalDateTime else begin GetBiasForDateTime(LocalDateTime, TzID, Bias, HaveDaylight); result := ((LocalDateTime * MinsPerDay) + Bias) / MinsPerDay; end; end; function TSynTimeZone.Ids: TStrings; var i: PtrInt; begin if fIDs = nil then begin fIDs := TStringList.Create; fSafe.ReadLock; for i := 0 to length(fZone) - 1 do fIDs.Add(Utf8ToString(RawUtf8(fZone[i].id))); fSafe.ReadUnLock; end; result := fIDs; end; function TSynTimeZone.Displays: TStrings; var i: PtrInt; begin if fDisplays = nil then begin fDisplays := TStringList.Create; fSafe.ReadLock; for i := 0 to length(fZone) - 1 do fDisplays.Add(Utf8ToString(fZone[i].display)); fSafe.ReadUnLock; end; result := fDisplays; end; function GetBiasForDateTime(const Value: TDateTime; const TzId: TTimeZoneID; out Bias: integer; out HaveDaylight: boolean; ValueIsUtc: boolean): boolean; begin result := TSynTimeZone.Default. GetBiasForDateTime(Value, TzId, Bias, HaveDaylight, ValueIsUtc); end; function GetDisplay(const TzId: TTimeZoneID): RawUtf8; begin result := TSynTimeZone.Default.GetDisplay(TzId); end; function UtcToLocal(const UtcDateTime: TDateTime; const TzId: TTimeZoneID): TDateTime; begin result := TSynTimeZone.Default.UtcToLocal(UtcDateTime, TzId); end; function NowToLocal(const TzId: TTimeZoneID): TDateTime; begin result := TSynTimeZone.Default.NowToLocal(TzId); end; function LocalToUtc(const LocalDateTime: TDateTime; const TzID: TTimeZoneID): TDateTime; begin result := TSynTimeZone.Default.LocalToUtc(LocalDateTime, TzId); end; end.