184 lines
4.8 KiB
C
184 lines
4.8 KiB
C
|
/*****************************************************************************
|
||
|
*
|
||
|
* token.c
|
||
|
*
|
||
|
* Tokenization.
|
||
|
*
|
||
|
* The tokenizer always returns unsnapped tokens.
|
||
|
*
|
||
|
* We avoid the traditional tokenizer problems of ``giant comment'' and
|
||
|
* ``giant string'' by using a dynamic token buffer.
|
||
|
*
|
||
|
* All tokens are stacked into the token buffer. If you need the token
|
||
|
* to be persistent, you have to save it somewhere else.
|
||
|
*
|
||
|
*****************************************************************************/
|
||
|
|
||
|
#include "m4.h"
|
||
|
|
||
|
/*****************************************************************************
|
||
|
*
|
||
|
* typGetComTch
|
||
|
*
|
||
|
* Scan and consume a comment token, returning typQuo
|
||
|
* because comments and quotes are essentially the same thing.
|
||
|
* tch contains the open-comment.
|
||
|
*
|
||
|
* Comments do not nest.
|
||
|
*
|
||
|
*****************************************************************************/
|
||
|
|
||
|
TYP STDCALL
|
||
|
typGetComTch(TCH tch)
|
||
|
{
|
||
|
AddArgTch(tch); /* Save the comment start */
|
||
|
do {
|
||
|
tch = tchGet();
|
||
|
AddArgTch(tch);
|
||
|
if (tch == tchMagic) {
|
||
|
/* Ooh, regurgitating a magic token - these consist of two bytes */
|
||
|
tch = tchGet();
|
||
|
if (tch == tchEof) {
|
||
|
Die("EOF in comment");
|
||
|
}
|
||
|
AddArgTch(tch);
|
||
|
}
|
||
|
} while (!fRcomTch(tch));
|
||
|
return typQuo;
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************
|
||
|
*
|
||
|
* typGetQuoTch
|
||
|
*
|
||
|
* Scan and consume a quote token, returning typQuo.
|
||
|
* tch contains the open-quote.
|
||
|
*
|
||
|
*****************************************************************************/
|
||
|
|
||
|
TYP STDCALL
|
||
|
typGetQuoTch(TCH tch)
|
||
|
{
|
||
|
int iDepth = 1;
|
||
|
for (;;) {
|
||
|
tch = tchGet();
|
||
|
if (tch == tchMagic) {
|
||
|
/* SOMEDAY -- Should unget so that Die won't see past EOF */
|
||
|
|
||
|
/* Ooh, regurgitating a magic token - these consist of two bytes */
|
||
|
tch = tchGet();
|
||
|
if (tch == tchEof) {
|
||
|
Die("EOF in quote");
|
||
|
}
|
||
|
AddArgTch(tchMagic); /* Add the magic prefix */
|
||
|
/* Fallthrough will add tch */
|
||
|
} else if (fLquoTch(tch)) {
|
||
|
++iDepth;
|
||
|
} else if (fRquoTch(tch)) {
|
||
|
if (--iDepth == 0) {
|
||
|
break; /* Final Rquo found */
|
||
|
}
|
||
|
}
|
||
|
AddArgTch(tch);
|
||
|
}
|
||
|
return typQuo;
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************
|
||
|
*
|
||
|
* typGetIdentTch
|
||
|
*
|
||
|
* Scan and consume an identifier token, returning typId.
|
||
|
* tch contains the first character of the identifier.
|
||
|
*
|
||
|
*****************************************************************************/
|
||
|
|
||
|
TYP STDCALL
|
||
|
typGetIdentTch(TCH tch)
|
||
|
{
|
||
|
do {
|
||
|
AddArgTch(tch);
|
||
|
tch = tchGet();
|
||
|
} while (fIdentTch(tch));
|
||
|
UngetTch(tch);
|
||
|
return typId;
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************
|
||
|
*
|
||
|
* typGetMagicTch
|
||
|
*
|
||
|
* Scan and consume a magic token, returning the token type.
|
||
|
* Magics are out-of-band gizmos that get inserted into the
|
||
|
* input stream via the tchMagic escape.
|
||
|
*
|
||
|
*****************************************************************************/
|
||
|
|
||
|
TYP STDCALL
|
||
|
typGetMagicTch(TCH tch)
|
||
|
{
|
||
|
AddArgTch(tch);
|
||
|
tch = tchGet();
|
||
|
Assert(fValidMagicTch(tch));
|
||
|
AddArgTch(tch);
|
||
|
return typMagic;
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************
|
||
|
*
|
||
|
* typGetPuncTch
|
||
|
*
|
||
|
* Scan and consume a punctuation token, returning the token type.
|
||
|
*
|
||
|
* It is here that comments are recognized.
|
||
|
*
|
||
|
*
|
||
|
* LATER - It is here where consecutive typPunc's are coalesced.
|
||
|
* This would speed up top-level scanning.
|
||
|
* Be careful not to coalesce a comma!
|
||
|
* Lparen is okay because xtok handles that one.
|
||
|
* Whitespace is also okay because xtok handles those too.
|
||
|
*
|
||
|
*****************************************************************************/
|
||
|
|
||
|
TYP STDCALL
|
||
|
typGetPuncTch(TCH tch)
|
||
|
{
|
||
|
AddArgTch(tch);
|
||
|
return typPunc;
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************
|
||
|
*
|
||
|
* typGetPtok
|
||
|
*
|
||
|
* Scan and consume a snapped token, returning the token type.
|
||
|
*
|
||
|
*****************************************************************************/
|
||
|
|
||
|
TYP STDCALL
|
||
|
typGetPtok(PTOK ptok)
|
||
|
{
|
||
|
TCH tch;
|
||
|
TYP typ;
|
||
|
|
||
|
OpenArgPtok(ptok);
|
||
|
|
||
|
tch = tchGet();
|
||
|
|
||
|
if (fInitialIdentTch(tch)) {
|
||
|
typ = typGetIdentTch(tch);
|
||
|
} else if (fLcomTch(tch)) {
|
||
|
typ = typGetComTch(tch);
|
||
|
} else if (fLquoTch(tch)) {
|
||
|
typ = typGetQuoTch(tch);
|
||
|
} else if (fMagicTch(tch)) {
|
||
|
typ = typGetMagicTch(tch);
|
||
|
} else {
|
||
|
typ = typGetPuncTch(tch);
|
||
|
}
|
||
|
CloseArgPtok(ptok);
|
||
|
SnapArgPtok(ptok);
|
||
|
return typ;
|
||
|
}
|