Windows-Server-2003/sdktools/m4/token.c

/*****************************************************************************
 *
 * token.c
 *
 *  Tokenization.
 *
 *  The tokenizer always returns unsnapped tokens.
 *
 *  We avoid the traditional tokenizer problems of ``giant comment'' and
 *  ``giant string'' by using a dynamic token buffer.
 *
 *  All tokens are stacked into the token buffer.  If you need the token
 *  to be persistent, you have to save it somewhere else.
 *
 *****************************************************************************/

#include "m4.h"

/*****************************************************************************
 *
 *  typGetComTch
 *
 *      Scan and consume a comment token, returning typQuo
 *      because comments and quotes are essentially the same thing.
 *      tch contains the open-comment.
 *
 *      Comments do not nest.
 *
 *****************************************************************************/

TYP STDCALL
typGetComTch(TCH tch)
{
    AddArgTch(tch);                     /* Save the comment start */
    do {
        tch = tchGet();
        AddArgTch(tch);
        if (tch == tchMagic) {
            /* Ooh, regurgitating a magic token - these consist of two bytes */
            tch = tchGet();
            if (tch == tchEof) {
                Die("EOF in comment");
            }
            AddArgTch(tch);
        }
    } while (!fRcomTch(tch));
    return typQuo;
}

/*****************************************************************************
 *
 *  typGetQuoTch
 *
 *      Scan and consume a quote token, returning typQuo.
 *      tch contains the open-quote.
 *
 *****************************************************************************/

TYP STDCALL
typGetQuoTch(TCH tch)
{
    int iDepth = 1;
    for (;;) {
        tch = tchGet();
        if (tch == tchMagic) {
            /* SOMEDAY -- Should unget so that Die won't see past EOF */

            /* Ooh, regurgitating a magic token - these consist of two bytes */
            tch = tchGet();
            if (tch == tchEof) {
                Die("EOF in quote");
            }
            AddArgTch(tchMagic);        /* Add the magic prefix */
                                        /* Fallthrough will add tch */
        } else if (fLquoTch(tch)) {
            ++iDepth;
        } else if (fRquoTch(tch)) {
            if (--iDepth == 0) {
                break;                  /* Final Rquo found */
            }
        }
        AddArgTch(tch);
    }
    return typQuo;
}

/*****************************************************************************
 *
 *  typGetIdentTch
 *
 *      Scan and consume an identifier token, returning typId.
 *      tch contains the first character of the identifier.
 *
 *****************************************************************************/

TYP STDCALL
typGetIdentTch(TCH tch)
{
    do {
        AddArgTch(tch);
        tch = tchGet();
    } while (fIdentTch(tch));
    UngetTch(tch);
    return typId;
}

/*****************************************************************************
 *
 *  typGetMagicTch
 *
 *      Scan and consume a magic token, returning the token type.
 *      Magics are out-of-band gizmos that get inserted into the
 *      input stream via the tchMagic escape.
 *
 *****************************************************************************/

TYP STDCALL
typGetMagicTch(TCH tch)
{
    AddArgTch(tch);
    tch = tchGet();
    Assert(fValidMagicTch(tch));
    AddArgTch(tch);
    return typMagic;
}

/*****************************************************************************
 *
 *  typGetPuncTch
 *
 *      Scan and consume a punctuation token, returning the token type.
 *
 *      It is here that comments are recognized.
 *
 *
 *  LATER - It is here where consecutive typPunc's are coalesced.
 *  This would speed up top-level scanning.
 *  Be careful not to coalesce a comma!
 *  Lparen is okay because xtok handles that one.
 *  Whitespace is also okay because xtok handles those too.
 *
 *****************************************************************************/

TYP STDCALL
typGetPuncTch(TCH tch)
{
    AddArgTch(tch);
    return typPunc;
}

/*****************************************************************************
 *
 *  typGetPtok
 *
 *      Scan and consume a snapped token, returning the token type.
 *
 *****************************************************************************/

TYP STDCALL
typGetPtok(PTOK ptok)
{
    TCH tch;
    TYP typ;

    OpenArgPtok(ptok);

    tch = tchGet();

    if (fInitialIdentTch(tch)) {
        typ = typGetIdentTch(tch);
    } else if (fLcomTch(tch)) {
        typ = typGetComTch(tch);
    } else if (fLquoTch(tch)) {
        typ = typGetQuoTch(tch);
    } else if (fMagicTch(tch)) {
        typ = typGetMagicTch(tch);
    } else {
        typ = typGetPuncTch(tch);
    }
    CloseArgPtok(ptok);
    SnapArgPtok(ptok);
    return typ;
}
Initiall commit 2024-08-04 01:28:15 +02:00			`/*****************************************************************************`
			`*`
			`* token.c`
			`*`
			`* Tokenization.`
			`*`
			`* The tokenizer always returns unsnapped tokens.`
			`*`
			* We avoid the traditional tokenizer problems of ``giant comment'' and
			* ``giant string'' by using a dynamic token buffer.
			`*`
			`* All tokens are stacked into the token buffer. If you need the token`
			`* to be persistent, you have to save it somewhere else.`
			`*`
			`*****************************************************************************/`

			`#include "m4.h"`

			`/*****************************************************************************`
			`*`
			`* typGetComTch`
			`*`
			`* Scan and consume a comment token, returning typQuo`
			`* because comments and quotes are essentially the same thing.`
			`* tch contains the open-comment.`
			`*`
			`* Comments do not nest.`
			`*`
			`*****************************************************************************/`

			`TYP STDCALL`
			`typGetComTch(TCH tch)`
			`{`
			`AddArgTch(tch); /* Save the comment start */`
			`do {`
			`tch = tchGet();`
			`AddArgTch(tch);`
			`if (tch == tchMagic) {`
			`/* Ooh, regurgitating a magic token - these consist of two bytes */`
			`tch = tchGet();`
			`if (tch == tchEof) {`
			`Die("EOF in comment");`
			`}`
			`AddArgTch(tch);`
			`}`
			`} while (!fRcomTch(tch));`
			`return typQuo;`
			`}`

			`/*****************************************************************************`
			`*`
			`* typGetQuoTch`
			`*`
			`* Scan and consume a quote token, returning typQuo.`
			`* tch contains the open-quote.`
			`*`
			`*****************************************************************************/`

			`TYP STDCALL`
			`typGetQuoTch(TCH tch)`
			`{`
			`int iDepth = 1;`
			`for (;;) {`
			`tch = tchGet();`
			`if (tch == tchMagic) {`
			`/* SOMEDAY -- Should unget so that Die won't see past EOF */`

			`/* Ooh, regurgitating a magic token - these consist of two bytes */`
			`tch = tchGet();`
			`if (tch == tchEof) {`
			`Die("EOF in quote");`
			`}`
			`AddArgTch(tchMagic); /* Add the magic prefix */`
			`/* Fallthrough will add tch */`
			`} else if (fLquoTch(tch)) {`
			`++iDepth;`
			`} else if (fRquoTch(tch)) {`
			`if (--iDepth == 0) {`
			`break; /* Final Rquo found */`
			`}`
			`}`
			`AddArgTch(tch);`
			`}`
			`return typQuo;`
			`}`

			`/*****************************************************************************`
			`*`
			`* typGetIdentTch`
			`*`
			`* Scan and consume an identifier token, returning typId.`
			`* tch contains the first character of the identifier.`
			`*`
			`*****************************************************************************/`

			`TYP STDCALL`
			`typGetIdentTch(TCH tch)`
			`{`
			`do {`
			`AddArgTch(tch);`
			`tch = tchGet();`
			`} while (fIdentTch(tch));`
			`UngetTch(tch);`
			`return typId;`
			`}`

			`/*****************************************************************************`
			`*`
			`* typGetMagicTch`
			`*`
			`* Scan and consume a magic token, returning the token type.`
			`* Magics are out-of-band gizmos that get inserted into the`
			`* input stream via the tchMagic escape.`
			`*`
			`*****************************************************************************/`

			`TYP STDCALL`
			`typGetMagicTch(TCH tch)`
			`{`
			`AddArgTch(tch);`
			`tch = tchGet();`
			`Assert(fValidMagicTch(tch));`
			`AddArgTch(tch);`
			`return typMagic;`
			`}`

			`/*****************************************************************************`
			`*`
			`* typGetPuncTch`
			`*`
			`* Scan and consume a punctuation token, returning the token type.`
			`*`
			`* It is here that comments are recognized.`
			`*`
			`*`
			`* LATER - It is here where consecutive typPunc's are coalesced.`
			`* This would speed up top-level scanning.`
			`* Be careful not to coalesce a comma!`
			`* Lparen is okay because xtok handles that one.`
			`* Whitespace is also okay because xtok handles those too.`
			`*`
			`*****************************************************************************/`

			`TYP STDCALL`
			`typGetPuncTch(TCH tch)`
			`{`
			`AddArgTch(tch);`
			`return typPunc;`
			`}`

			`/*****************************************************************************`
			`*`
			`* typGetPtok`
			`*`
			`* Scan and consume a snapped token, returning the token type.`
			`*`
			`*****************************************************************************/`

			`TYP STDCALL`
			`typGetPtok(PTOK ptok)`
			`{`
			`TCH tch;`
			`TYP typ;`

			`OpenArgPtok(ptok);`

			`tch = tchGet();`

			`if (fInitialIdentTch(tch)) {`
			`typ = typGetIdentTch(tch);`
			`} else if (fLcomTch(tch)) {`
			`typ = typGetComTch(tch);`
			`} else if (fLquoTch(tch)) {`
			`typ = typGetQuoTch(tch);`
			`} else if (fMagicTch(tch)) {`
			`typ = typGetMagicTch(tch);`
			`} else {`
			`typ = typGetPuncTch(tch);`
			`}`
			`CloseArgPtok(ptok);`
			`SnapArgPtok(ptok);`
			`return typ;`
			`}`