FastReport_2022_VCL/Source/frxHTMLBuffConv.pas
2024-01-01 16:13:08 +01:00

2038 lines
51 KiB
ObjectPascal

{
HtmlViewer Version 11.9
Copyright (c) 2010-2018 by Bernd Gabriel
This source module is based on code of CodeChangerDecode.pas written by SchwarzKopf-M (SchwarzKopf-M@yandex.ru)
and the source code library libiconv, which is published under the GNU Library General Public License.
Thanks to SchwarzKopf-M for his extensive work.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Note that the source modules HTMLGIF1.PAS and DITHERUNIT.PAS
are covered by separate copyright notices located in those modules.
}
//This is the original copyright notice in CodeChangerDecode.pas
//This is charset Windows-1251:
//Ìîäóëü ïåðåêîäèðîâêè òåêñòà ÿâëÿåòñÿ ñâîáîäíî ðàñïðîñòðàíÿåìûì è ðàñïðîñòðàíÿåòñÿ "Êàê åñòü".
//Ïðè ìîäèôèêàöèè èëè ÷àñòè÷íîì èñïîëüçîâàíèè èñõîäíîãî êîäà ññûëêè
// íà îðèãèíàëüíûé ìîäóëü îáÿçàòåëüíû.
//Àâòîðñêîå ïðàâî îñòàâëÿþ çà ñîáîé - SchwarzKopf-M (SchwarzKopf-M@yandex.ru).
//Â äàííîì ìîäóëå èñïîëüçóåòñÿ èíôîðìàöèÿ íàéäåííàÿ â Èíòåðíåòå
// áåç íàðóøåíèÿ èñõîäíûõ àâòîðñêèõ ïðàâ.
//Äëÿ ïåðåêîäèðîâêè íåêîòîðûõ àçèàòñêèõ ÿçûêîâ èñïîëüçîâàëàñü âîëüíàÿ
// èíòåðïðåòàöèÿ èñõîäíûõ êîäîâ áèáëèîòåêè libiconv.
//
//Ñïåöèàëüíî äëÿ HTMLViewer (http://code.google.com/p/thtmlviewer)
// Translated by "http://translate.google.de/#ru|en|" it is:
// Text conversion module is freeware and is distributed "as is".
// If you modify or partial use of source code links
// To the original module is required.
// Copyright reserve the - SchwarzKopf-M (SchwarzKopf-M@yandex.ru).
// This module uses the information found on the Internet
// Without affecting the original copyright holder.
// For the conversion of some Asian languages ??used in freestyle
// Interpretation of the source code library libiconv.
//
// Especially for HTMLViewer (http://code.google.com/p/thtmlviewer)
{$I frxHTMLCons.inc}
unit frxHTMLBuffConv;
interface
uses
{$ifdef MSWindows}
Windows,
{$endif}
{$ifdef LCL}
LclIntf, LclType,
{$endif}
SysUtils,
frxHTMLBuffConvArrays,
frxHTMLGlobals,
frxHTMLBuffer;
type
// BG, 06.10.2012: converts UTF7 to unicode.
TBuffConvUTF7 = class(TBuffConverter)
private
State: Byte;
protected
procedure Assign(Source: TBuffConverter); override;
public
function NextChar: TBuffChar; override;
end;
// BG, 26.09.2012: converts UTF8 to unicode.
TBuffConvUTF8 = class(TBuffConverter)
public
function NextChar: TBuffChar; override;
end;
// BG, 28.01.2013: converts several single byte code pages using an AnsiCharMap to unicode.
TBuffConvSingleByteMap = class(TBuffConverter)
private
FMap: ThtAnsiCharMap;
procedure UpdateMap;
protected
procedure Assign(Source: TBuffConverter); override;
public
procedure AfterConstruction; override;
function NextChar: TBuffChar; override;
end;
// BG, 02.10.2012: converts several single byte code pages using an AnsiCharHighMap to unicode.
TBuffConvHighMap = class(TBuffConverter)
private
FMap: ThtAnsiCharHighMap;
procedure UpdateMap;
protected
procedure Assign(Source: TBuffConverter); override;
public
procedure AfterConstruction; override;
function NextChar: TBuffChar; override;
end;
// BG, 02.10.2012: converts the single byte code of ISO 8859 using an AnsiCharIsoMap to unicode.
TBuffConvIsoMap = class(TBuffConverter)
private
FMap: ThtAnsiCharISOMap;
procedure UpdateMap;
protected
procedure Assign(Source: TBuffConverter); override;
public
procedure AfterConstruction; override;
function NextChar: TBuffChar; override;
end;
TBuffBaseConverter = class(TBuffConverter)
protected
function ASCIIDecodeChar(c1: Byte): TBuffChar;
function CP922EstonianDecodeChar(c1: Byte): TBuffChar;
function GB2312DecodeChar(c1, c2: Byte): TBuffChar;
function GBKDecodeChar(c1, c2: Byte): TBuffChar;
function ISO8859_11DecodeChar(c1: Byte): TBuffChar;
function ISO8859_15DecodeChar(c1: Byte): TBuffChar;
function ISO8859_5DecodeChar(c1: Byte): TBuffChar;
function ISO8859_6DecodeChar(c1: Byte): TBuffChar;
function ISO8859_7DecodeChar(c1: Byte): TBuffChar;
function ISO8859_8DecodeChar(c1: Byte): TBuffChar;
function ISO8859_9DecodeChar(c1: Byte): TBuffChar;
function JIS_X0201DecodeChar(c1: Byte): TBuffChar;
function JIS_X0208DecodeChar(c1, c2: Byte): TBuffChar;
function JIS_X0212DecodeChar(c1, c2: Byte): TBuffChar;
function KSC5601DecodeChar(c1, c2: Byte): TBuffChar;
function Latin1EuroDecodeChar(c1: Byte): TBuffChar;
function MultiByteToWideCharDecodeChar(c1: Byte): TBuffChar;
end;
// BG, 02.10.2012: converts several single byte code pages using one of the *DecoderChar functions to unicode.
TBuffSingleByteDecoder = function(c1: Byte): TBuffChar of object;
TBuffConvSingleByte = class(TBuffBaseConverter)
private
FDecoder: TBuffSingleByteDecoder;
procedure UpdateDecoder;
protected
procedure Assign(Source: TBuffConverter); override;
public
procedure AfterConstruction; override;
function NextChar: TBuffChar; override;
end;
// BG, 02.10.2012: converts several single byte code pages using one of the *DecoderChar functions to unicode.
TBuffDoubleByteDecoder = function(c1, c2: Byte): TBuffChar of object;
TBuffConvDoubleByte = class(TBuffBaseConverter)
private
FDecoder: TBuffDoubleByteDecoder;
procedure UpdateDecoder;
protected
procedure Assign(Source: TBuffConverter); override;
public
procedure AfterConstruction; override;
function NextChar: TBuffChar; override;
end;
// BG, 26.09.2012: converts shift-jis (codepages 932, 943) to unicode.
TBuffConvShiftJis932 = class(TBuffConverter)
public
function NextChar: TBuffChar; override;
end;
// BG, 03.10.2012: converts simplified Chinese (PRC, Singapore) / Chinese simplified (GB2312) to unicode.
TBuffConvIBM936 = class(TBuffBaseConverter)
public
function NextChar: TBuffChar; override;
end;
// BG, 04.10.2012: converts hangeul (codepage 949 to unicode.
TBuffConvHangeul949 = class(TBuffBaseConverter)
public
function NextChar: TBuffChar; override;
end;
// BG, 04.10.2012: converts Traditional Chinese (code page 950) to unicode.
TBuffConvBig5CP950 = class(TBuffConverter)
public
function NextChar: TBuffChar; override;
end;
TBuffConvGB18030 = class(TBuffBaseConverter)
public
function NextChar: TBuffChar; override;
end;
TBuffConvEUC_JP = class(TBuffBaseConverter)
public
function NextChar: TBuffChar; override;
end;
TBuffConvISO2022CN = class(TBuffBaseConverter)
private
CNState1: (CNSTATE_ASCII, CNSTATE_TWOBYTE);
CNState2: (CNSTATE2_NONE, STATE2_DESIGNATED_GB2312, STATE2_DESIGNATED_CNS11643_1);
CNState3: (CNSTATE3_NONE, STATE3_DESIGNATED_CNS11643_2);
function CNS11643_1DecodeChar(c1, c2: Word): TBuffChar;
function CNS11643_2DecodeChar(c1, c2: Word): TBuffChar;
protected
procedure Assign(Source: TBuffConverter); override;
public
function NextChar: TBuffChar; override;
end;
// BG, 27.09.2012: converts stateful ISO-2022-JP, -1, -2 (codepage 50220) to unicode.
TBuffConvISO2022JP = class(TBuffBaseConverter)
private
JPState: (
JPS_ASCII,
JPS_JISX0201ROMAN,
JPS_JISX0208,
JPS_JISX0212,
JPS_GB2312,
JPS_KSC5601,
JPS_8859_1,
JPS_8859_7);
protected
procedure Assign(Source: TBuffConverter); override;
public
function NextChar: TBuffChar; override;
end;
TBuffConvISO2022KR = class(TBuffBaseConverter)
private
KRState1: (KRSTATE_ASCII, KRSTATE_TWOBYTE);
KRState2: (KRSTATE2_NONE, STATE2_DESIGNATED_KSC5601);
protected
procedure Assign(Source: TBuffConverter); override;
public
function NextChar: TBuffChar; override;
end;
implementation
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffBaseConverter.MultiByteToWideCharDecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$00: Result := TBuffChar(c1);
else
{$ifdef MSWindows}
MultiByteToWideChar(FCodePage, 0, PAnsiChar(@c1), 1, @Result, 1)
{$else}
raise Exception.Create('function MultiByteToWideCharDecodeChar() not implemented.');
{$endif MSWindows}
end;
end;
//-- BG ---------------------------------------------------------- 02.10.2012 --
function TBuffBaseConverter.ASCIIDecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$00..$7F: Result := TBuffChar(c1);
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 02.10.2012 --
function TBuffBaseConverter.GB2312DecodeChar(c1, c2: Byte): TBuffChar;
var
i: Integer;
begin
case c1 of
$21..$29, $30..$77:
case c2 of
$21..$7E:
begin
i := 94 * (c1 - $21) + (c2 - $21);
case i of
0.. 830: Result := GB2312_1Map[i];
1410..8177: Result := GB2312_2Map[i - 1410];
else
Result := TBuffChar($FFFD);
end;
end;
else
Result := TBuffChar($FFFD);
end;
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 06.10.2012 --
function TBuffBaseConverter.GBKDecodeChar(c1, c2: Byte): TBuffChar;
function Offset(cmul, c1, c2: Word): Integer;
begin
case c2 of
$40..$7E: Result := cmul * (c1 - $81) + (c2 - $40);
$80..$FE: Result := cmul * (c1 - $81) + (c2 - $41);
else
Result := MaxInt;
end;
end;
var
i: Integer;
begin
case c1 of
$81..$A0:
begin
Result := GBKE_1Map[Offset(190, c1, c2)];
Exit;
end;
$A1:
case c2 of
$A4: Result := TBuffChar($00B7);
$AA: Result := TBuffChar($2014);
else
Result := GB2312DecodeChar(c1 - $80, c2 - $80);
end;
$A2:
case c2 of
$A1..$AA: Result := TBuffChar($2170 + (c2 - $A1));
else
Result := GB2312DecodeChar(c1 - $80, c2 - $80);
end;
else
Result := GB2312DecodeChar(c1 - $80, c2 - $80);
end;
if Result <> TBuffChar(#$FFFD) then
Exit;
case c1 of
$A6, $A8:
begin
i := Offset(190, c1, c2);
case i of
7189..7210: Result := CP936E_1Map[i - 7189];
7532..7537: Result := CP936E_2Map[i - 7532];
end;
if Result <> TBuffChar(#$FFFD) then
exit;
end;
end;
case c1 of
$A8..$FE:
begin
i := Offset(96, c1, c2);
case i of
3744..12015: Result := GBKE_2Map[i - 3744];
end;
if Result <> TBuffChar(#$FFFD) then
exit;
end;
end;
case c1 of
$A1..$A2:
case c2 of
$40..$7E: Result := TBuffChar($E4C6 + 96 * (c1 - $81) + (c2 - $40));
$80..$A0: Result := TBuffChar($E4C6 + 96 * (c1 - $A1) + (c2 - $41));
end;
$AA..$AF, $F8..$FE:
case c2 of
$A1..$F7: Result := TBuffChar($E000 + 94 * (c1 - $AA) + (c2 - $A1));
$F8..$FE: Result := TBuffChar($E000 + 94 * (c1 - $F2) + (c2 - $A1));
end;
end;
end;
//-- BG ---------------------------------------------------------- 04.10.2012 --
function TBuffBaseConverter.JIS_X0201DecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$5C: Result := #$00A5;
$7E: Result := #$203E;
$A1..$DF: Result := TBuffChar(c1 + $FEC0);
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 04.10.2012 --
function TBuffBaseConverter.JIS_X0208DecodeChar(c1, c2: Byte): TBuffChar;
var
i: Integer;
begin
case c1 of
$21..$27, $30..$73:
case c2 of
$21..$7E:
begin
i := 94 * (c1 - $21) + (c2 - $21);
case i of
0.. 689: Result := JIS_X0208_1Map[i];
1410..7807: Result := JIS_X0208_2Map[i - 1410];
else
Result := TBuffChar($FFFD);
end;
end;
else
Result := TBuffChar($FFFD);
end;
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 04.10.2012 --
function TBuffBaseConverter.JIS_X0212DecodeChar(c1, c2: Byte): TBuffChar;
var
i: Integer;
begin
case c1 of
$22, $26..$27, $29..$2B, $30..$6D:
case c2 of
$21..$7E:
begin
i := 94 * (c1 - $21) + (c2 - $21);
case i of
0.. 174: Result := JIS_X0212_1Map[i - 94];
470.. 657: Result := JIS_X0212_2Map[i - 470];
752..1026: Result := JIS_X0212_3Map[i - 752];
1410..7210: Result := JIS_X0212_4Map[i - 1410];
else
Result := TBuffChar($FFFD);
end;
end;
else
Result := TBuffChar($FFFD);
end;
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 02.10.2012 --
function TBuffBaseConverter.KSC5601DecodeChar(c1, c2: Byte): TBuffChar;
var
i: Integer;
begin
case c1 of
$21..$2C, $30..$48, $4A..$7D:
case c2 of
$21..$7E:
begin
i := 94 * (c1 - $21) + (c2 - $21);
case i of
0..1114: Result := KSC5601_1Map[i];
1410..3759: Result := KSC5601_2Map[i - 1410];
3854..8741: Result := KSC5601_3Map[i - 3854];
else
Result := TBuffChar($FFFD);
end;
end;
else
Result := TBuffChar($FFFD);
end;
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffBaseConverter.ISO8859_5DecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$00..$A0, $AD: Result := TBuffChar(c1);
$F0 : Result := #$2116;
$FD : Result := #$00A7;
else
Result := TBuffChar(c1 + $0360);
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffBaseConverter.ISO8859_6DecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$00..$A0, $A4, $AD : Result := TBuffChar(c1);
$AC, $BB, $BF, $C1..$DA, $E0..$F2: Result := TBuffChar(c1 + $0580);
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffBaseConverter.ISO8859_7DecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$00..$A0, $A6..$A9, $AB..$AD,
$B0..$B3, $B7, $BB, $BD: Result := TBuffChar(c1);
$A1 : Result := #$2018;
$A2 : Result := #$2019;
$AF : Result := #$2015;
$D2, $FF: Result := #$FFFF;
else
Result := TBuffChar(c1 + $02D0);
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffBaseConverter.ISO8859_8DecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$00..$A0, $A2..$A9, $AB..$AE,
$B0..$B9, $BB..$BE: Result := TBuffChar(c1);
$AA : Result := #$00D7;
$AF : Result := #$203E;
$BA : Result := #$00F7;
$DF : Result := #$2017;
$E0..$FA: Result := TBuffChar(c1 + $04E0);
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffBaseConverter.ISO8859_9DecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$D0: Result := #$011E;
$DD: Result := #$0130;
$DE: Result := #$015E;
$F0: Result := #$011F;
$FD: Result := #$0131;
$FE: Result := #$015F;
else
Result := TBuffChar(c1);
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffBaseConverter.ISO8859_11DecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$00..$A0: Result := TBuffChar(c1);
$A1..$DA,
$DF..$FB: Result := TBuffChar(c1 + $0D60);
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffBaseConverter.ISO8859_15DecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$A4: Result := #$20AC;
$A6: Result := #$00A6;
$A8: Result := #$0161;
$B4: Result := #$017D;
$B8: Result := #$017E;
$BC: Result := #$0152;
$BD: Result := #$0153;
$BE: Result := #$0178;
else
Result := TBuffChar(c1);
end;
end;
//-- BG ---------------------------------------------------------- 03.10.2012 --
function TBuffBaseConverter.Latin1EuroDecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$00..$7F: Result := TBuffChar(c1);
$D5: Result := #$20AC;
else
Result := CP850Map[c1];
end;
end;
//-- BG ---------------------------------------------------------- 03.10.2012 --
function TBuffBaseConverter.CP922EstonianDecodeChar(c1: Byte): TBuffChar;
begin
case c1 of
$A0..$AF: Result := CP922_1Map[c1 - $A0];
$D0..$DF: Result := CP922_2Map[c1 - $D0];
$F0..$FF: Result := CP922_3Map[c1 - $F0];
else
Result := TBuffChar(c1);
end;
end;
{ TBuffConvSingleByte }
//-- BG ---------------------------------------------------------- 05.10.2012 --
procedure TBuffConvSingleByte.AfterConstruction;
begin
inherited;
UpdateDecoder;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
procedure TBuffConvSingleByte.Assign(Source: TBuffConverter);
begin
inherited;
UpdateDecoder;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffConvSingleByte.NextChar: TBuffChar;
begin
Result := FDecoder(GetNext);
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
procedure TBuffConvSingleByte.UpdateDecoder;
begin
case FCodePage of
858: FDecoder := Latin1EuroDecodeChar;
922: FDecoder := CP922EstonianDecodeChar;
28595: FDecoder := ISO8859_5DecodeChar;
28596: FDecoder := ISO8859_6DecodeChar;
28597: FDecoder := ISO8859_7DecodeChar;
28598: FDecoder := ISO8859_8DecodeChar;
28599: FDecoder := ISO8859_9DecodeChar;
28601: FDecoder := ISO8859_11DecodeChar;
28605: FDecoder := ISO8859_15DecodeChar;
else
FDecoder := MultiByteToWideCharDecodeChar;
end;
end;
{ TBuffConvDoubleByte }
//-- BG ---------------------------------------------------------- 05.10.2012 --
procedure TBuffConvDoubleByte.AfterConstruction;
begin
inherited;
UpdateDecoder;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
procedure TBuffConvDoubleByte.Assign(Source: TBuffConverter);
begin
inherited;
UpdateDecoder;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffConvDoubleByte.NextChar: TBuffChar;
var
c1: Word;
begin
c1 := GetNext;
case c1 of
$00..$80: Result := TBuffChar(c1);
$A1..$FE: Result := FDecoder(c1 - $80, GetNext - $80);
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
procedure TBuffConvDoubleByte.UpdateDecoder;
begin
case FCodePage of
51936: FDecoder := GB2312DecodeChar;
51949: FDecoder := KSC5601DecodeChar;
end;
end;
{ TBuffConvUTF8 }
//-- BG ---------------------------------------------------------- 26.09.2012 --
function TBuffConvUTF8.NextChar: TBuffChar;
const
MaxChar: LongWord = 65535; // "Not A Character"
var
Buffer: LongWord;
Chr, Cnt: Word;
function NextByte: Boolean;
begin
Chr := GetNext;
Inc(Cnt);
Result := (Chr and $C0) = $80;
Buffer := (Buffer shl 6) + (Chr and $3F);
end;
procedure Failed;
begin
if Chr = 0 then
Buffer := 0
else
Buffer := MaxChar;
Dec(FPos.BytePtr, Cnt);
end;
begin
Buffer := GetNext;
case Buffer of
$00..$7F: // 1 byte, 7 bits
;
$C0..$DF: // 2 bytes, 11 bits
begin
Buffer := Buffer and $1F;
Cnt := 0;
if not NextByte then
Failed; // invalid
end;
$E0..$EF: // 3 bytes, 16 bits
begin
Buffer := Buffer and $0F;
Cnt := 0;
if not (NextByte and NextByte) then
Failed; // invalid
end;
$F0..$F7: // 4 bytes, 21 bits
begin
Buffer := Buffer and $07;
Cnt := 0;
if not (NextByte and NextByte and NextByte) then
Failed // invalid
else if Buffer > MaxChar then
Buffer := MaxChar;
end;
$F8..$FB: // 5 bytes, 26 bits
begin
Buffer := Buffer and $03;
Cnt := 0;
if not (NextByte and NextByte and NextByte and NextByte) then
Failed // invalid
else if Buffer > MaxChar then
Buffer := MaxChar;
end;
$FC..$FD: // 6 bytes, 31 bits
begin
Buffer := Buffer and $01;
Cnt := 0;
if not (NextByte and NextByte and NextByte and NextByte and NextByte) then
Failed // invalid
else if Buffer > MaxChar then
Buffer := MaxChar;
end;
else
// $80..$BF, // invalid
// $FE..$FF:
Buffer := MaxChar;
end;
Result := TBuffChar(Buffer);
end;
{ TBuffConvSingleByteMap }
//-- BG ---------------------------------------------------------- 28.01.2013 --
procedure TBuffConvSingleByteMap.AfterConstruction;
begin
inherited;
UpdateMap;
end;
//-- BG ---------------------------------------------------------- 28.01.2013 --
procedure TBuffConvSingleByteMap.Assign(Source: TBuffConverter);
begin
inherited;
UpdateMap;
end;
//-- BG ---------------------------------------------------------- 28.01.2013 --
function TBuffConvSingleByteMap.NextChar: TBuffChar;
begin
Result := FMap[GetNext];
end;
//-- BG ---------------------------------------------------------- 28.01.2013 --
procedure TBuffConvSingleByteMap.UpdateMap;
begin
case CodePage of
29001: FMap := CP29001;
end;
end;
{ TBuffConvHighMap }
//-- BG ---------------------------------------------------------- 02.10.2012 --
procedure TBuffConvHighMap.AfterConstruction;
begin
inherited;
UpdateMap;
end;
//-- BG ---------------------------------------------------------- 02.10.2012 --
procedure TBuffConvHighMap.Assign(Source: TBuffConverter);
begin
inherited;
UpdateMap;
end;
//-- BG ---------------------------------------------------------- 02.10.2012 --
function TBuffConvHighMap.NextChar: TBuffChar;
begin
Result := TBuffChar(GetNext);
case Ord(Result) of
$80..$FF: Result := FMap[Ord(Result)];
end;
end;
//-- BG ---------------------------------------------------------- 02.10.2012 --
procedure TBuffConvHighMap.UpdateMap;
begin
case CodePage of
437: FMap := CP437Map;
708: FMap := Windows708Map;
737: FMap := Windows737Map;
775: FMap := Windows775Map;
850: FMap := CP850Map;
852: FMap := CP852Map;
855: FMap := CP855Map;
857: FMap := CP857Map;
860: FMap := IBM860Map;
861: FMap := CP861Map;
862: FMap := CP862Map;
863: FMap := CP863Map;
864: FMap := Windows864Map;
865: FMap := CP865Map;
866: FMap := CP866Map;
869: FMap := CP869Map;
874: FMap := Windows874Map;
1250: FMap := CP1250Map;
1251: FMap := CP1251Map;
1252: FMap := CP1252Map;
1253: FMap := CP1253Map;
1254: FMap := CP1254Map;
1255: FMap := CP1255Map;
1256: FMap := CP1256Map;
1257: FMap := CP1257Map;
1258: FMap := CP1258Map;
10000: FMap := MacRomanMap;
10004: FMap := MacArabicMap;
10005: FMap := MacHebrewMap;
10006: FMap := MacGreekMap;
10007: FMap := MacCyrillicMap;
10010: FMap := MacRomaniaMap;
10017: FMap := MacUkraineMap;
10021: FMap := MacThaiMap;
10029: FMap := MacCentralEuropeMap;
10079: FMap := MacIcelandicMap;
10081: FMap := MacTurkishMap;
10082: FMap := MacCroatianMap;
20866: FMap := KOI8_RMap;
end;
end;
{ TBuffConvIsoMap }
//-- BG ---------------------------------------------------------- 04.10.2012 --
procedure TBuffConvIsoMap.AfterConstruction;
begin
inherited;
UpdateMap;
end;
//-- BG ---------------------------------------------------------- 04.10.2012 --
procedure TBuffConvIsoMap.Assign(Source: TBuffConverter);
begin
inherited;
UpdateMap;
end;
//-- BG ---------------------------------------------------------- 04.10.2012 --
function TBuffConvIsoMap.NextChar: TBuffChar;
begin
Result := TBuffChar(GetNext);
case Ord(Result) of
$A0..$FF: Result := FMap[Ord(Result)];
end;
end;
//-- BG ---------------------------------------------------------- 04.10.2012 --
procedure TBuffConvIsoMap.UpdateMap;
begin
case CodePage of
28592: FMap := ISO8859_2Map;
28593: FMap := ISO8859_3Map;
28594: FMap := ISO8859_4Map;
28600: FMap := ISO8859_10Map;
28603: FMap := ISO8859_13Map;
28604: FMap := ISO8859_14Map;
28606: FMap := ISO8859_16Map;
end;
end;
{ TBuffConvShiftJis932 }
//-- BG ---------------------------------------------------------- 02.10.2012 --
function TBuffConvShiftJis932.NextChar: TBuffChar;
var c1, c2, t1, t2, i, a, b: Word;
WC: TBuffChar;
begin
c1 := GetNext;
Result := TBuffChar(c1);
case c1 of
$00..$80: ;
// done above: Result := TBuffChar(c1);
$A1..$DF:
Result := TBuffChar(c1 + $FEC0);
$81..$86, $88..$9F, $E0..$EA:
begin
c2 := GetNext;
case c2 of
$40..$7E, $80..$FC:
begin
if c1 < $E0 then
t1 := c1 - $81
else
t1 := c1 - $C1;
if c2 < $80 then
t2 := c2 - $40
else
t2 := c2 - $41;
if t2 < $5E then
begin
c1 := 2 * t1 + 0 + $21;
c2 := t2 + $21;
end
else
begin
c1 := 2 * t1 + 1 + $21;
c2 := t2 - $5E + $21;
end;
case c1 of
$21..$28, $30..$74:
case c2 of
$21..$7E:
begin
i := 94 * (c1 - $21) + (c2 - $21);
WC := #$FFFD;
if i < 1410 then
begin
if i < 690 then
WC := JIS_X0208_1Map[i];
end
else
begin
if i < 7808 then
WC := JIS_X0208_2Map[i - 1410];
end;
if WC <> #$FFFD then
Result := WC;
end;
end;
end;
end;
end;
end;
$87, $ED..$EE, $FA..$FC:
begin
c2 := GetNext;
case c2 of
$40..$7E, $80..$FC:
begin
if c1 >= $E0 then
a := $C1
else
a := $81;
if c2 >= $80 then
b := $41
else
b := $40;
i := 188 * (c1 - a) + (c2 - b);
WC := #$FFFD;
if i < 8272 then
begin
if i < 1220 then
WC := CP932E_1Map[i - 1128];
end
else if i < 10716 then
begin
if i < 8648 then
WC := CP932E_2Map[i - 8272]
end
else
begin
if i < 11104 then
WC := CP932E_3Map[i - 10716];
end;
if WC <> #$FFFD then
Result := WC;
end;
end;
end;
$F0..$F9:
begin
c2 := GetNext;
case c2 of
$40..$7E: Result := TBuffChar($E000 + 188 * (c1 - $F0) + (c2 - $40));
$80..$FC: Result := TBuffChar($E000 + 188 * (c1 - $F0) + (c2 - $41));
end;
end;
end;
end;
{ TBuffConvIBM936 }
//-- BG ---------------------------------------------------------- 03.10.2012 --
function TBuffConvIBM936.NextChar: TBuffChar;
var
c1: Word;
begin
c1 := GetNext;
case c1 of
$00..$7F: Result := TBuffChar(c1);
$80: Result := TBuffChar($20AC);
else
// two byte code
Result := GBKDecodeChar(c1, GetNext);
end;
end;
{ TBuffConvHangeul949 }
//-- BG ---------------------------------------------------------- 04.10.2012 --
function TBuffConvHangeul949.NextChar: TBuffChar;
function UHC1Decode(row, col: Integer): TBuffChar;
var
I: Integer;
begin
i := 178 * row + col;
if i < 5696 then
begin
if col >= 89 then
Result := TBuffChar(UHC_1_1Map[2 * row + 1] + UHC_1_2Map[i])
else
Result := TBuffChar(UHC_1_1Map[2 * row] + UHC_1_2Map[i])
end
else
Result := #$FFFD;
end;
function UHC2Decode(row, col: Integer): TBuffChar;
var
I: Integer;
begin
i := 84 * row + col;
if i < 3126 then
begin
if col >= 42 then
Result := TBuffChar(UHC_2_1Map[2 * row + 1] + UHC_2_2Map[i])
else
Result := TBuffChar(UHC_2_1Map[2 * row] + UHC_2_2Map[i])
end
else
Result := #$FFFD;
end;
function KSC5601Decode(c1, c2: Integer): TBuffChar;
begin
Result := KSC5601DecodeChar(c1 - $80, c2 - $80);
end;
var
c1, c2: Word;
begin
c1 := GetNext;
case c1 of
$00..$7F: Result := TBuffChar(c1);
$81..$A0:
begin
c2 := GetNext;
case c2 of
$40..$5A: Result := UHC1Decode(c1 - $81, c2 - $41);
$61..$7A: Result := UHC1Decode(c1 - $81, c2 - $47);
$81..$FE: Result := UHC1Decode(c1 - $81, c2 - $4D);
else
Result := TBuffChar($FFFD);
end;
end;
$A1..$C5:
begin
c2 := GetNext;
case c2 of
$40..$5A: Result := UHC2Decode(c1 - $A1, c2 - $41);
$61..$7A: Result := UHC2Decode(c1 - $A1, c2 - $47);
$81..$A0: Result := UHC2Decode(c1 - $A1, c2 - $4D);
$A1..$E7,
$E9..$FE: Result := KSC5601Decode(c1, c2);
$E8:
if c1 <> $A2 then
Result := KSC5601Decode(c1, c2)
else
Result := TBuffChar($FFFD);
else
Result := TBuffChar($FFFD);
end;
end;
$C6..$FD:
begin
c2 := GetNext;
Result := KSC5601Decode(c1, c2);
end;
$FE:
begin
c2 := GetNext;
Result := TBuffChar($E000 + (c2 - $A1));
end;
else
Result := TBuffChar($FFFD);
end;
end;
{ TBuffConvBig5CP950 }
//-- BG ---------------------------------------------------------- 04.10.2012 --
function TBuffConvBig5CP950.NextChar: TBuffChar;
function CP950Decode1(row, col: Integer): TBuffChar;
var
i: Integer;
begin
i := 157 * row + col;
Result := CP950Map[i];
end;
function BIG5Decode(row, col: Integer): TBuffChar;
var
i: Integer;
begin
i := 157 * row + col;
case i of
0.. 6120: Result := BIG5_1Map[i];
6280..13931: Result := BIG5_2Map[i - 6280];
else
Result := TBuffChar($FFFD);
end;
end;
var
c1, c2, cc: Word;
begin
c1 := GetNext;
case c1 of
$00..$7F: Result := TBuffChar(c1); // ANSI/ASCII
$80, $FF: Result := TBuffChar($FFFD); // undefined
$81..$A0: Result := TBuffChar($FFFD); // private
$A1..$A2:
begin
c2 := GetNext;
case c2 of
$40..$7E: Result := CP950Decode1(c1 - $A1, c2 - $40);
$A1..$FE: Result := CP950Decode1(c1 - $A1, c2 - $62);
else
Result := TBuffChar($FFFD);
end;
end;
$A3..$F9:
begin
c2 := GetNext;
cc := c1 shl 8 + c2;
case cc of
$A3E1: Result := TBuffChar($20AC); // Euro sign
$C6A1..$C6FE,
$C800..$C8FE: Result := TBuffChar($FFFD); // private
$F9D6..$F9FE: Result := CP950EMap[c2 - $D6];
$F9FF..$FEFE: Result := TBuffChar($FFFD); // private
else
case c2 of
$40..$7E: Result := BIG5Decode(c1 - $A1, c2 - $40);
$A1..$FE: Result := BIG5Decode(c1 - $A1, c2 - $62);
else
Result := TBuffChar($FFFD);
end;
end;
end;
else
Result := TBuffChar($FFFD);
end;
end;
{ TBuffConvEUC_JP }
function TBuffConvEUC_JP.NextChar: TBuffChar;
var
c1, c2, c3: Word;
begin
c1 := GetNext;
case c1 of
$00..$7F: Result := TBuffChar(c1); // ANSI/ASCII
$8E:
begin
// two byte code
c2 := GetNext;
case c2 of
$A1..$DF: Result := JIS_X0201DecodeChar(c2);
else
Result := TBuffChar($FFFD);
end;
end;
$8F:
begin
// three byte code
c2 := GetNext;
c3 := GetNext;
case c2 of
$A1..$F4:
case c3 of
$A1..$FE: Result := JIS_X0212DecodeChar(c2 - $80, c3 - $80);
else
Result := TBuffChar($FFFD);
end;
$F5..$FE:
case c3 of
$A1..$FE: Result := TBuffChar($E3AC + 94 * (c2 - $F5) + (c3 - $A1));
else
Result := TBuffChar($FFFD);
end;
else
Result := TBuffChar($FFFD);
end;
end;
$A1..$F4:
begin
// two byte code
c2 := GetNext;
Result := JIS_X0208DecodeChar(c1 - $80, c2 - $80);
end;
$F5..$FE:
begin
// two byte code
c2 := GetNext;
case c2 of
$A1..$FE: Result := TBuffChar($E000 + 94 * (c1 - $F5) + (c2 - $A1));
else
Result := TBuffChar($FFFD);
end;
end;
else
// $80..$A0, $FF:
Result := TBuffChar($FFFD);
end;
end;
{ TBuffConvISO2022CN }
//-- BG ---------------------------------------------------------- 05.10.2012 --
procedure TBuffConvISO2022CN.Assign(Source: TBuffConverter);
var
Src: TBuffConvISO2022CN absolute Source;
begin
inherited;
if Source is TBuffConvISO2022CN then
begin
CNState1 := Src.CNState1;
CNState2 := Src.CNState2;
CNState3 := Src.CNState3;
end
else
begin
CNState1 := CNSTATE_ASCII;
CNState2 := CNSTATE2_NONE;
CNState3 := CNSTATE3_NONE;
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffConvISO2022CN.CNS11643_1DecodeChar(c1, c2: Word): TBuffChar;
var
i: Integer;
begin
case c1 of
$21..$27, $42, $44..$7D:
begin
case c2 of
$21..$7E:
begin
i := 94 * (Ord(c1) - $21) + (Ord(c2) - $21);
case i of
0.. 499: Result := CNS11643_1_1Map[i];
571: Result := TBuffChar($4EA0);
578: Result := TBuffChar($51AB);
583: Result := TBuffChar($52F9);
3102..3134: Result := CNS11643_1_2Map[i - 3102];
3290..8690: Result := CNS11643_1_3Map[i - 3290];
else
Result := TBuffChar(#$FFFD);
end;
end;
else
Result := TBuffChar(#$FFFD);
end;
end;
else
Result := TBuffChar(#$FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffConvISO2022CN.CNS11643_2DecodeChar(c1, c2: Word): TBuffChar;
var
i: Integer;
begin
case c1 of
$21..$72:
case c2 of
$21..$7E:
begin
i := 94 * (c1 - $21) + (c2 - $21);
if i < 7650 then
Result := CNS11643_2Map[i]
else
Result := TBuffChar(#$FFFD);
end;
else
Result := TBuffChar(#$FFFD);
end;
else
Result := TBuffChar(#$FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffConvISO2022CN.NextChar: TBuffChar;
var
c1, c2: Word;
Pos: PAnsiChar;
begin
repeat
c1 := GetNext;
case c1 of
$1B:
if FEnd.AnsiChr - FPos.AnsiChr >= 2 then
begin
Pos := FPos.AnsiChr;
case TBuffChar(GetNext) of
'$':
case TBuffChar(GetNext) of
')':
case TBuffChar(GetNext) of
'A':
begin
CNState2 := STATE2_DESIGNATED_GB2312;
Continue;
end;
'G':
begin
CNState2 := STATE2_DESIGNATED_CNS11643_1;
Continue;
end;
end;
'*':
case TBuffChar(GetNext) of
'H':
begin
CNState3 := STATE3_DESIGNATED_CNS11643_2;
Continue;
end;
end;
end;
'N':
case CNState3 of
CNSTATE3_NONE:
Break;
STATE3_DESIGNATED_CNS11643_2:
begin
// do not use both GetNexts as parameters. You don't know which parameter is processed first!
c2 := GetNext;
Result := CNS11643_2DecodeChar(c2, GetNext);
Exit;
end;
end;
end;
FPos.AnsiChr := Pos;
end;
$0E:
begin
if (CNState2 <> STATE2_DESIGNATED_GB2312) and (CNState2 <> STATE2_DESIGNATED_CNS11643_1) then
Break;
CNState1 := CNSTATE_TWOBYTE;
Continue;
end;
$0F:
begin
CNState1 := CNSTATE_ASCII;
Continue;
end;
end;
break;
until false;
case CNState1 of
CNSTATE_ASCII:
begin
Result := ASCIIDecodeChar(c1);
case Result of
#$000A, #$000D:
begin
CNState2 := CNSTATE2_NONE;
CNState3 := CNSTATE3_NONE;
end;
end;
end;
CNSTATE_TWOBYTE:
begin
c2 := GetNext;
case CNState2 of
STATE2_DESIGNATED_GB2312: Result := GB2312DecodeChar(c1, c2);
STATE2_DESIGNATED_CNS11643_1: Result := CNS11643_1DecodeChar(c1, c2);
else
Result := TBuffChar(#$FFFD);
end;
end;
else
Result := TBuffChar(#$FFFD);
end;
end;
{ TBuffConvISO2022KR }
//-- BG ---------------------------------------------------------- 05.10.2012 --
procedure TBuffConvISO2022KR.Assign(Source: TBuffConverter);
var
Src: TBuffConvISO2022KR absolute Source;
begin
inherited;
if Source is TBuffConvISO2022KR then
begin
KRState1 := Src.KRState1;
KRState2 := Src.KRState2;
end
else
begin
KRState1 := KRSTATE_ASCII;
KRState2 := KRSTATE2_NONE;
end;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffConvISO2022KR.NextChar: TBuffChar;
var
c1: Word;
Pos: PAnsiChar;
begin
repeat
c1 := GetNext;
case c1 of
$1B:
if FEnd.AnsiChr - FPos.AnsiChr >= 2 then
begin
Pos := FPos.AnsiChr;
case TBuffChar(GetNext) of
'$':
case TBuffChar(GetNext) of
')':
case TBuffChar(GetNext) of
'C':
begin
KRState2 := STATE2_DESIGNATED_KSC5601;
Continue;
end;
end;
end;
end;
FPos.AnsiChr := Pos;
end;
$0E:
begin
if KRState2 <> STATE2_DESIGNATED_KSC5601 then
Break;
KRState1 := KRSTATE_TWOBYTE;
Continue;
end;
$0F:
begin
KRState1 := KRSTATE_ASCII;
Continue;
end;
end;
Break;
until False;
case KRState1 of
KRSTATE_ASCII:
begin
Result := ASCIIDecodeChar(c1);
case Result of
#$000A, #$000D:
KRState2 := KRSTATE2_NONE;
end;
end;
KRSTATE_TWOBYTE:
begin
if KRState2 <> STATE2_DESIGNATED_KSC5601 then
begin
Result := TBuffChar(#$FFFD);
Exit;
end;
Result := KSC5601DecodeChar(c1, GetNext);
end;
else
Result := TBuffChar(#$FFFD);
end;
end;
{ TBuffConvISO2022JP }
//-- BG ---------------------------------------------------------- 05.10.2012 --
procedure TBuffConvISO2022JP.Assign(Source: TBuffConverter);
var
Src: TBuffConvISO2022JP absolute Source;
begin
inherited;
if Source is TBuffConvISO2022JP then
JPState := Src.JPState;
end;
//-- BG ---------------------------------------------------------- 05.10.2012 --
function TBuffConvISO2022JP.NextChar: TBuffChar;
var
c1: Word;
Pos: PAnsiChar;
begin
repeat
c1 := GetNext;
case c1 of
$1B:
if FEnd.AnsiChr - FPos.AnsiChr >= 2 then
begin
Pos := FPos.AnsiChr;
case TBuffChar(GetNext) of
'(':
case TBuffChar(GetNext) of
'B':
begin
JPState := JPS_ASCII;
Continue;
end;
'J':
begin
JPState := JPS_JISX0201ROMAN;
Continue;
end;
end;
'$':
case TBuffChar(GetNext) of
'@', 'B':
begin
JPState := JPS_JISX0208;
Continue;
end;
'A':
begin
JPState := JPS_GB2312;
Continue;
end;
'(':
case TBuffChar(GetNext) of
'C':
begin
JPState := JPS_KSC5601;
Continue;
end;
'D':
begin
JPState := JPS_JISX0212;
Continue;
end;
end;
end;
'.':
case TBuffChar(GetNext) of
'A':
begin
JPState := JPS_8859_1;
Continue;
end;
'F':
begin
JPState := JPS_8859_7;
Continue;
end;
end;
end;
FPos.AnsiChr := Pos;
end;
end;
Break;
until False;
case JPState of
JPS_ASCII: Result := ASCIIDecodeChar(c1);
JPS_8859_1: Result := TBuffChar(c1);
JPS_8859_7: Result := ISO8859_7DecodeChar(c1);
JPS_JISX0201ROMAN: Result := JIS_X0201DecodeChar(c1);
JPS_JISX0208: Result := JIS_X0208DecodeChar(c1, GetNext);
JPS_JISX0212: Result := JIS_X0212DecodeChar(c1, GetNext);
JPS_KSC5601: Result := KSC5601DecodeChar(c1, GetNext);
JPS_GB2312: Result := GB2312DecodeChar(c1, GetNext);
else
Result := TBuffChar($FFFD);
end;
end;
//-- BG ---------------------------------------------------------- 06.10.2012 --
function TBuffConvGB18030.NextChar: TBuffChar;
function GB18030ExtDecode(row, col: Integer): TBuffChar;
var
i: Integer;
begin
i := 190 * row + col;
case i of
6432: Result := #$20AC;
7536: Result := #$01F9;
7672..7684: Result := GB18030Ext_2uni_pagea9[i-7672];
23750..23844: Result := GB18030Ext_2uni_pagefe[i-23750];
else
Result := TBuffChar($FFFD);
end;
end;
function GB18030Uni1DecodeChar(c1, c2, c3, c4: Byte): TBuffChar;
var
i: Integer;
k, k1, k2: Byte;
begin
case c4 of
$30..$39:
begin
i := (((c1 - $81) * 10 + (c2 - $30)) * 126 + (c3 - $81)) * 10 + (c4 - $30);
case i of
0..39419:
begin
k1 := 0;
k2 := 193;
while k1 < k2 do
begin
k := (k1 + k2) div 2;
if i <= GB18030Uni_charset2uni_ranges[k shl 1 + 1] then
k2 := k
else if i >= GB18030Uni_charset2uni_ranges[k shl 1 + 2] then
k1 := k + 1
else
begin
Result := TBuffChar($FFFD);
exit;
end;
end;
Result := TBuffChar(i + gb18030uni_ranges[k1]);
end;
else
Result := TBuffChar($FFFD);
end;
end;
else
Result := TBuffChar($FFFD);
end;
end;
{$ifdef TBuffChar_Has_At_Least_3_Bytes}
function GB18030Uni2DecodeChar(c1, c2, c3, c4: Byte): TBuffChar;
var
i: Integer;
begin
case c4 of
$30..$39:
begin
i := (((c1 - $90) * 10 + (c2 - $30)) * 126 + (c3 - $81)) * 10 + (c4 - $30);
if (i >= 0) and (i < $100000) then
Result := TBuffChar($10000 + i)
else
Result := TBuffChar($FFFD);
end;
else
Result := TBuffChar($FFFD);
end;
end;
{$endif TBuffChar_Has_At_Least_3_Bytes}
var
c1, c2, c3, c4: Word;
begin
c1 := GetNext;
{$ifdef TBuffChar_Has_At_Least_3_Bytes}
c2 := 0;
c3 := 0;
c4 := 0;
{$endif TBuffChar_Has_At_Least_3_Bytes}
case c1 of
$00..$7F: Result := ASCIIDecodeChar(c1);
$81..$84:
begin
// Code set 2 (remainder of Unicode U+0000..U+FFFF)
c2 := GetNext;
case c2 of
$30..$39:
begin
c3 := GetNext;
case c3 of
$81..$FE:
begin
c4 := GetNext;
Result := GB18030Uni1DecodeChar(c1, c2, c3, c4);
end;
else
Result := TBuffChar($FFFD);
end;
end;
else
Result := TBuffChar($FFFD);
end;
end;
$A2, $A8..$A9, $FE:
begin
// Code set 1 (GBK extended)
c2 := GetNext;
case c2 of
$40..$7E: Result := GB18030ExtDecode(c1 - $81, c2 - $40);
$80..$FE: Result := GB18030ExtDecode(c1 - $81, c2 - $41);
else
Result := TBuffChar($FFFD);
end;
end;
else
// Code set 1 (GBK extended)
Result := GBKDecodeChar(c1, GetNext);
end;
{$ifdef TBuffChar_Has_At_Least_3_Bytes}
if Result <> TBuffChar($FFFD) then
exit;
{ Code set 3 (Unicode U+10000..U+10FFFF) }
case c1 of
$90..$E3:
begin
if c2 = 0 then
c2 := GetNext;
case c2 of
$30..$39:
begin
if c3 = 0 then
c3 := GetNext;
case c3 of
$81..$FE:
begin
if c4 = 0 then
c4 := GetNext;
Result := GB18030Uni2DecodeChar(c1, c2, c3, c4);
end;
else
Result := TBuffChar($FFFD);
end;
end;
end;
end;
end;
{$endif TBuffChar_Has_At_Least_3_Bytes}
end;
{ TBuffConvUTF7 }
//-- BG ---------------------------------------------------------- 05.10.2012 --
procedure TBuffConvUTF7.Assign(Source: TBuffConverter);
var
Src: TBuffConvUTF7 absolute Source;
begin
inherited;
if Source is TBuffConvUTF7 then
begin
State := Src.State;
end
else
begin
State := 0;
end;
end;
//-- BG ---------------------------------------------------------- 06.10.2012 --
function TBuffConvUTF7.NextChar: TBuffChar;
// Specification: RFC 2152 (and old RFC 1641, RFC 1642)
// The original Base64 encoding is defined in RFC 2045.
// Set of direct characters:
// A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
function IsDirect(ch: Byte): Boolean; {$ifdef UseInline} inline; {$endif}
const
Tab: array [0..15] of Byte = (
$00, $26, $00, $00, $81, $f3, $ff, $87,
$fe, $ff, $ff, $07, $fe, $ff, $ff, $07);
begin
Result := (ch < 128) and (((Tab[ch shr 3] shr (ch and 7)) and 1) <> 0);
end;
// Set of direct and optional direct characters:
// A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
// ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
function IsXDirect(ch: Byte): Boolean; {$ifdef UseInline} inline; {$endif}
const
Tab: array [0..15] of Byte = (
$00, $26, $00, $00, $ff, $f7, $ff, $ff,
$ff, $ff, $ff, $ef, $ff, $ff, $ff, $3f);
begin
Result := (ch < 128) and (((Tab[ch shr 3] shr (ch and 7)) and 1) <> 0);
end;
// Set of base64 characters, extended:
// A-Z a-z 0-9 + / -
function IsXBase64(ch: Byte): Boolean; {$ifdef UseInline} inline; {$endif}
const
Tab: array [0..15] of Byte = (
$00, $00, $00, $00, $00, $a8, $ff, $03,
$fe, $ff, $ff, $07, $fe, $ff, $ff, $07);
begin
Result := (ch < 128) and (((Tab[ch shr 3] shr (ch and 7)) and 1) <> 0);
end;
label
active, inactive, none;
var
c1, c2: Byte;
wc: Cardinal;
base64state: Byte;
kmax: Cardinal; // number of payload bytes to read
k: Cardinal; // number of payload bytes already read
base64count: Cardinal; // number of base64 bytes already read
i: Cardinal;
{$ifdef TBuffChar_Has_At_Least_3_Bytes}
wc1, wc2: Cardinal;
{$endif TBuffChar_Has_At_Least_3_Bytes}
begin
c1 := GetNext;
if (state and 3) <> 0 then
goto active
else
goto inactive;
inactive:
// Here (state & 3) == 0
if c1 = 0 then
goto none;
if IsXDirect(c1) then
begin
Result := TBuffChar(c1);
exit;
end;
if c1 = Ord('+') then
begin
c2 := GetNext;
if c2 = Ord('-') then
begin
Result := TBuffChar(c1);
exit;
end;
c1 := c2;
state := 1;
goto active;
end;
Result := TBuffChar($FFFD);
exit;
active:
// base64 encoding active
wc := 0;
base64state := State;
kmax := 2;
k := 0;
base64count := 0;
{$IFNDEF Compiler32_Plus}i := 0;{$ENDIF} // valium for the compiler
while c1 <> 0 do
begin
case c1 of
Ord('A')..Ord('Z'): i := c1 - Ord('A');
Ord('a')..Ord('z'): i := c1 - Ord('a') + 26;
Ord('0')..Ord('9'): i := c1 - Ord('0') + 52;
Ord('+'): i := 62;
Ord('/'): i := 63;
else
// c terminates base64 encoding
if (base64state and Cardinal(-4)) <> 0 then
begin
Result := TBuffChar($FFFD); // data must be 0, otherwise illegal
State := 0;
exit;
end;
if base64count <> 0 then
begin
Result := TBuffChar($FFFD); // partial UTF-16 characters are invalid
State := 0;
exit;
end;
if c1 = Ord('-') then
c1 := GetNext;
state := 0;
goto inactive;
end;
//s++;
Inc(base64count);
//* read 6 bits: 0 <= i < 64 */
case base64state and 3 of
1: // inside base64, no pending bits
begin
base64state := i shl 2;
end;
0: // inside base64, 6 bits remain from 1st byte
begin
wc := (wc shl 8) or (base64state and Cardinal(-4)) or (i shr 4);
Inc(k);
base64state := ((i and 15) shl 4) or 2;
end;
2: // inside base64, 4 bits remain from 2nd byte
begin
wc := (wc shl 8) or (base64state and Cardinal(-4)) or (i shr 2);
Inc(k);
base64state := ((i and 3) shl 6) or 3;
end;
3: // inside base64, 2 bits remain from 3rd byte
begin
wc := (wc shl 8) or (base64state and Cardinal(-4)) or i;
Inc(k);
base64state := 1;
end;
end;
if k = kmax then
begin
// UTF-16: When we see a High Surrogate, we must also decode the following Low Surrogate.
if (kmax = 2) and (wc >= $D800) and (wc <= $DBFF) then
kmax := 4
else
break;
end;
if base64count <= 0 then
goto none;
c1 := GetNext;
if c1 = 0 then
goto none;
end;
// Here k = kmax > 0, hence base64count > 0.
if (base64state and 3) = 0 then
Result := TBuffChar($FFFD)
else if kmax = 4 then
{$ifdef TBuffChar_Has_At_Least_3_Bytes}
begin
wc1 := wc shr 16;
wc2 := wc and 0xffff;
if not ((wc1 >= $D800) and (wc1 < $DC00)) then
Result := TBuffChar($FFFD)
else if not ((wc2 >= $DC00) and (wc2 < $E000)) then
Result := TBuffChar($FFFD)
else
Result := $10000 + ((wc1 - $D800) shl 10) + (wc2 - $DC00);
end
{$else TBuffChar_Has_At_Least_3_Bytes}
Result := TBuffChar($FFFD)
{$endif TBuffChar_Has_At_Least_3_Bytes}
else
Result := TBuffChar(wc);
State := base64state;
exit;
none:
if c1 = 0 then
Result := TBuffChar(0)
else
Result := TBuffChar($FFFD);
end;
end.