8801 lines
229 KiB
C++
8801 lines
229 KiB
C++
/*++
|
||
|
||
Copyright (c) 1994 Microsoft Corporation
|
||
|
||
Module Name:
|
||
|
||
urlpars.cpp
|
||
|
||
Abstract:
|
||
|
||
Contains all the worker routines for Combine and Canonicalize
|
||
|
||
Contents:
|
||
(ConvertChar)
|
||
|
||
Author:
|
||
|
||
Zeke Lucas (zekel) 16-Dez-96
|
||
|
||
Ahsan Kabir (akabir): UrlCombine parser rewritten in July-Sept98
|
||
|
||
Environment:
|
||
|
||
Win32(s) user-mode DLL
|
||
|
||
Revision History:
|
||
|
||
there is about one percent of this derived
|
||
from the Spyglass or MSHTML/WININET codebase
|
||
|
||
--*/
|
||
|
||
#include "priv.h"
|
||
#include <shstr.h>
|
||
|
||
#ifdef UNIX
|
||
#include <shlobj.h>
|
||
#endif
|
||
|
||
#include <intshcut.h>
|
||
|
||
#include <shlwapip.h>
|
||
|
||
#ifdef UNIX
|
||
#include "unixstuff.h"
|
||
#endif
|
||
#include <wininet.h>
|
||
|
||
#define DM_PERF 0 // perf stats
|
||
|
||
#define PF_LOGSCHEMEHITS 0x00000001
|
||
|
||
#ifndef CPP_FUNCTIONS
|
||
#define CPP_FUNCTIONS
|
||
#include <crtfree.h>
|
||
#endif
|
||
|
||
#define USE_FAST_PARSER
|
||
#ifdef DEBUG
|
||
//#define PROOFREAD_PARSES
|
||
#endif
|
||
|
||
// Same as in wininet; however, this is only theoretical, since urls aren't necessarily so
|
||
// constrained. However, this is true throughout the product, so we'll have to do this.
|
||
|
||
#define INTERNET_MAX_PATH_LENGTH 2048
|
||
#define INTERNET_MAX_SCHEME_LENGTH 32
|
||
|
||
#define HEX_ESCAPE L'%'
|
||
#define HEX_ESCAPE_A '%'
|
||
|
||
#define TERMSTR(pch) *(pch) = L'\0'
|
||
|
||
// (WCHAR) 8 is backspace
|
||
#define DEADSEGCHAR ((WCHAR) 8)
|
||
#define KILLSEG(pch) *(pch) = DEADSEGCHAR
|
||
|
||
#define CR L'\r'
|
||
#define LF L'\n'
|
||
#define TAB L'\t'
|
||
#define SPC L' '
|
||
#define SLASH L'/'
|
||
#define WHACK L'\\'
|
||
#define QUERY L'?'
|
||
#define POUND L'#'
|
||
#define SEMICOLON L';'
|
||
#define COLON L':'
|
||
#define BAR L'|'
|
||
#define DOT L'.'
|
||
#define AT L'@'
|
||
|
||
#define UPF_SCHEME_OPAQUE 0x00000001 // should not be treated as heriarchical
|
||
#define UPF_SCHEME_INTERNET 0x00000002
|
||
#define UPF_SCHEME_NOHISTORY 0x00000004
|
||
#define UPF_SCHEME_CONVERT 0x00000008 // treat slashes and whacks as equiv
|
||
#define UPF_SCHEME_DONTCORRECT 0x00000010 // Don't try to autocorrect to this scheme
|
||
|
||
|
||
#define UPF_SEG_ABSOLUTE 0x00000100 // the initial segment is the root
|
||
#define UPF_SEG_LOCKFIRST 0x00000200 // this is for file parsing
|
||
#define UPF_SEG_EMPTYSEG 0x00000400 // this was an empty string, but is still important
|
||
#define UPF_EXSEG_DIRECTORY 0x00001000 // the final segment is a "directory" (trailing slash)
|
||
|
||
#define UPF_FILEISPATHURL 0x10000000 // this is for file paths, dont unescape because they are actually dos paths
|
||
//
|
||
// the masks are for inheritance purposes during BlendParts
|
||
// if you inherit that part you inherit that mask
|
||
//
|
||
#define UPF_SCHEME_MASK 0x000000FF
|
||
#define UPF_SEG_MASK 0x00000F00
|
||
#define UPF_EXSEG_MASK 0x0000F000
|
||
|
||
|
||
// right now these masks are unused, and can be recycled
|
||
#define UPF_SERVER_MASK 0x000F0000
|
||
#define UPF_QUERY_MASK 0x0F000000
|
||
|
||
extern "C" int _StrCmpNA(LPCSTR lpStr1, LPCSTR lpStr2, int nChar, BOOL fMBCS);
|
||
extern "C" LPSTR _StrChrA(LPCSTR lpStart, WORD wMatch, BOOL fMBCS);
|
||
typedef struct _UrlParts
|
||
{
|
||
DWORD dwFlags;
|
||
LPWSTR pszScheme;
|
||
URL_SCHEME eScheme;
|
||
LPWSTR pszServer;
|
||
LPWSTR pszSegments;
|
||
DWORD cSegments;
|
||
LPWSTR pszExtraSegs;
|
||
DWORD cExtraSegs;
|
||
LPWSTR pszQuery;
|
||
LPWSTR pszFragment;
|
||
} URLPARTS, *PURLPARTS;
|
||
|
||
|
||
HRESULT SHUrlParse(LPCWSTR pszBase, LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags);
|
||
HRESULT SHUrlCreateFromPath(LPCWSTR pszPath, PSHSTRW pstrOut, DWORD dwFlags);
|
||
|
||
// Ansi wrappers might overwrite the unicode core's return value
|
||
// We should try to prevent that
|
||
HRESULT ReconcileHresults(HRESULT hr1, HRESULT hr2)
|
||
{
|
||
return (hr2==S_OK) ? hr1 : hr2;
|
||
}
|
||
|
||
|
||
|
||
PRIVATE CONST WORD isSafe[96] =
|
||
|
||
/* Bit 0 alphadigit -- 'a' to 'z', '0' to '9', 'A' to 'Z'
|
||
** Bit 1 Hex -- '0' to '9', 'a' to 'f', 'A' to 'F'
|
||
** Bit 2 valid scheme -- alphadigit | "-" | "." | "+"
|
||
** Bit 3 mark -- "%" | "$"| "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" | ","
|
||
*/
|
||
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
||
// {0, 8, 0, 0, 8, 8, 0, 8, 8, 8, 8, 4, 8,12,12, 0, /* 2x !"#$%&'()*+,-./ */
|
||
// IE4 BETA1: allow + through unmolested. Should consider other options
|
||
// post beta1. 12feb97 tonyci
|
||
{0, 8, 0, 0, 8, 8, 0, 8, 8, 8, 8, 12, 8,12,12, 0, /* 2x !"#$%&'()*+,-./ */
|
||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 8, 0, 8, 0, 0, /* 3x 0123456789:;<=>? */
|
||
8, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x @ABCDEFGHIJKLMNO */
|
||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 8, /* 5X PQRSTUVWXYZ[\]^_ */
|
||
0, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x `abcdefghijklmno */
|
||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 8, 0}; /* 7X pqrstuvwxyz{|}~ DEL */
|
||
|
||
PRIVATE const WCHAR hex[] = L"0123456789ABCDEF";
|
||
|
||
PRIVATE inline BOOL IsSafe(WCHAR ch, WORD mask)
|
||
{
|
||
if(((ch > 31 ) && (ch < 128) && (isSafe[ch - 32] & mask)))
|
||
return TRUE;
|
||
|
||
return FALSE;
|
||
}
|
||
|
||
#define IsAlphaDigit(c) IsSafe(c, 1)
|
||
#define IsHex(c) IsSafe(c, 2)
|
||
#define IsValidSchemeCharA(c) IsSafe(c, 5)
|
||
#define IsSafePathChar(c) ((c > 0xff) || IsSafe(c, 9))
|
||
#define IsUpper(c) ((c) >= 'A' && (c) <= 'Z')
|
||
|
||
PRIVATE inline BOOL IsAsciiCharW(WCHAR ch)
|
||
{
|
||
return (!(ch >> 8) && ((CHAR) ch));
|
||
}
|
||
|
||
PRIVATE inline WCHAR Ascii_ToLowerW(WCHAR ch)
|
||
{
|
||
return (ch >= L'A' && ch <= L'Z') ? (ch - L'A' + L'a') : ch;
|
||
}
|
||
|
||
BOOL IsValidSchemeCharW(WCHAR ch)
|
||
{
|
||
if(IsAsciiCharW(ch))
|
||
return IsSafe( (CHAR) ch, 5);
|
||
return FALSE;
|
||
}
|
||
|
||
|
||
|
||
WCHAR const c_szHttpScheme[] = L"http";
|
||
WCHAR const c_szFileScheme[] = L"file";
|
||
WCHAR const c_szFTPScheme[] = L"ftp";
|
||
WCHAR const c_szGopherScheme[] = L"gopher";
|
||
WCHAR const c_szMailToScheme[] = L"mailto";
|
||
WCHAR const c_szNewsScheme[] = L"news";
|
||
WCHAR const c_szNNTPScheme[] = L"nntp";
|
||
WCHAR const c_szTelnetScheme[] = L"telnet";
|
||
WCHAR const c_szWAISScheme[] = L"wais";
|
||
WCHAR const c_szMkScheme[] = L"mk";
|
||
WCHAR const c_szHttpsScheme[] = L"https";
|
||
WCHAR const c_szLocalScheme[] = L"local";
|
||
WCHAR const c_szShellScheme[] = L"shell";
|
||
WCHAR const c_szJSScheme[] = L"javascript";
|
||
WCHAR const c_szVSScheme[] = L"vbscript";
|
||
WCHAR const c_szAboutScheme[] = L"about";
|
||
WCHAR const c_szSnewsScheme[] = L"snews";
|
||
WCHAR const c_szResScheme[] = L"res";
|
||
WCHAR const c_szRootedScheme[] = L"ms-shell-rooted";
|
||
WCHAR const c_szIDListScheme[] = L"ms-shell-idlist";
|
||
WCHAR const c_szMsHelpScheme[] = L"hcp";
|
||
|
||
const struct
|
||
{
|
||
LPCWSTR pszScheme;
|
||
URL_SCHEME eScheme;
|
||
DWORD cchScheme;
|
||
DWORD dwFlags;
|
||
} g_mpUrlSchemeTypes[] =
|
||
{
|
||
// Because we use a linear search, sort this in the order of
|
||
// most common usage.
|
||
{ c_szHttpScheme, URL_SCHEME_HTTP, SIZECHARS(c_szHttpScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
|
||
{ c_szFileScheme, URL_SCHEME_FILE, SIZECHARS(c_szFileScheme) - 1, UPF_SCHEME_CONVERT},
|
||
{ c_szFTPScheme, URL_SCHEME_FTP, SIZECHARS(c_szFTPScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
|
||
{ c_szHttpsScheme, URL_SCHEME_HTTPS, SIZECHARS(c_szHttpsScheme) -1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT|UPF_SCHEME_DONTCORRECT},
|
||
{ c_szNewsScheme, URL_SCHEME_NEWS, SIZECHARS(c_szNewsScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
|
||
{ c_szMailToScheme, URL_SCHEME_MAILTO, SIZECHARS(c_szMailToScheme) - 1, UPF_SCHEME_OPAQUE},
|
||
{ c_szGopherScheme, URL_SCHEME_GOPHER, SIZECHARS(c_szGopherScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
|
||
{ c_szNNTPScheme, URL_SCHEME_NNTP, SIZECHARS(c_szNNTPScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
|
||
{ c_szTelnetScheme, URL_SCHEME_TELNET, SIZECHARS(c_szTelnetScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
|
||
{ c_szWAISScheme, URL_SCHEME_WAIS, SIZECHARS(c_szWAISScheme) - 1, 0},
|
||
{ c_szMkScheme, URL_SCHEME_MK, SIZECHARS(c_szMkScheme) - 1, UPF_SCHEME_NOHISTORY},
|
||
{ c_szShellScheme, URL_SCHEME_SHELL, SIZECHARS(c_szShellScheme) - 1, UPF_SCHEME_OPAQUE},
|
||
{ c_szLocalScheme, URL_SCHEME_LOCAL, SIZECHARS(c_szLocalScheme) - 1, 0},
|
||
{ c_szJSScheme, URL_SCHEME_JAVASCRIPT,SIZECHARS(c_szJSScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY},
|
||
{ c_szVSScheme, URL_SCHEME_VBSCRIPT, SIZECHARS(c_szVSScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY},
|
||
{ c_szSnewsScheme, URL_SCHEME_SNEWS, SIZECHARS(c_szSnewsScheme) - 1, UPF_SCHEME_INTERNET|UPF_SCHEME_CONVERT},
|
||
{ c_szAboutScheme, URL_SCHEME_ABOUT, SIZECHARS(c_szAboutScheme) - 1, UPF_SCHEME_OPAQUE|UPF_SCHEME_NOHISTORY},
|
||
{ c_szResScheme, URL_SCHEME_RES, SIZECHARS(c_szResScheme) - 1, UPF_SCHEME_NOHISTORY},
|
||
{ c_szRootedScheme, URL_SCHEME_MSSHELLROOTED, SIZECHARS(c_szRootedScheme) - 1, 0},
|
||
{ c_szIDListScheme, URL_SCHEME_MSSHELLIDLIST, SIZECHARS(c_szIDListScheme) - 1, 0},
|
||
{ c_szMsHelpScheme, URL_SCHEME_MSHELP, SIZECHARS(c_szMsHelpScheme) - 1, 0},
|
||
};
|
||
|
||
PRIVATE int _StrCmpNMixed(LPCSTR psz, LPCWSTR pwz, DWORD cch)
|
||
{
|
||
int iRet = 0;
|
||
|
||
//
|
||
// we dont have to real mbcs conversion here because we are
|
||
// guaranteed to have only ascii chars here
|
||
//
|
||
|
||
for (;cch; psz++, pwz++, cch--)
|
||
{
|
||
WCHAR ch = *psz;
|
||
if (ch != *pwz)
|
||
{
|
||
//
|
||
// this makes it case insensitive
|
||
if (IsUpper(ch) && (ch + 32) == *pwz)
|
||
continue;
|
||
|
||
if(ch > *pwz)
|
||
iRet = 1;
|
||
else
|
||
iRet = -1;
|
||
break;
|
||
}
|
||
}
|
||
|
||
return iRet;
|
||
}
|
||
|
||
//*** g_iScheme -- cache for g_mpUrlSchemeTypes
|
||
// DESCRIPTION
|
||
// we call GetSchemeTypeAndFlags many times for the same scheme. if
|
||
// it's the 0th table entry, no biggee. if it's a later entry linear
|
||
// search isnt very good. add a 1-element MRU cache. even for the most common
|
||
// (by far) case of "http" (0th entry), we *still* win due to the cheaper
|
||
// StrCmpC and skipped loop.
|
||
// NOTES
|
||
// g_iScheme refs/sets are atomic so no need for lock
|
||
int g_iScheme; // last guy we hit
|
||
|
||
#ifdef DEBUG
|
||
int g_cSTTot, g_cSTHit, g_cSTHit0;
|
||
#endif
|
||
|
||
//
|
||
// all of the pszScheme to nScheme functions are necessary at this point
|
||
// because some parsing is vioent, and some is necessarily soft
|
||
//
|
||
PRIVATE URL_SCHEME
|
||
GetSchemeTypeAndFlagsW(LPCWSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags)
|
||
{
|
||
DWORD i;
|
||
|
||
ASSERT(pszScheme);
|
||
|
||
|
||
#ifdef DEBUG
|
||
if ((g_cSTTot % 10) == 0)
|
||
TraceMsg(DM_PERF, "gstaf: tot=%d hit=%d hit0=%d", g_cSTTot, g_cSTHit, g_cSTHit0);
|
||
#endif
|
||
DBEXEC(TRUE, g_cSTTot++);
|
||
// check cache 1st
|
||
i = g_iScheme;
|
||
if (cchScheme == g_mpUrlSchemeTypes[i].cchScheme
|
||
&& StrCmpNCW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme) == 0)
|
||
{
|
||
DBEXEC(TRUE, i == 0 ? g_cSTHit0++ : g_cSTHit++);
|
||
Lhit:
|
||
if (pdwFlags)
|
||
*pdwFlags = g_mpUrlSchemeTypes[i].dwFlags;
|
||
|
||
// update cache (unconditionally)
|
||
g_iScheme = i;
|
||
|
||
return g_mpUrlSchemeTypes[i].eScheme;
|
||
}
|
||
|
||
for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
|
||
{
|
||
if(cchScheme == g_mpUrlSchemeTypes[i].cchScheme
|
||
&& 0 == StrCmpNIW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme))
|
||
goto Lhit;
|
||
}
|
||
|
||
if (pdwFlags)
|
||
{
|
||
*pdwFlags = 0;
|
||
}
|
||
return URL_SCHEME_UNKNOWN;
|
||
}
|
||
|
||
PRIVATE URL_SCHEME
|
||
GetSchemeTypeAndFlagsA(LPCSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags)
|
||
{
|
||
DWORD i;
|
||
|
||
ASSERT(pszScheme);
|
||
|
||
|
||
for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
|
||
{
|
||
if(0 == _StrCmpNMixed(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme))
|
||
{
|
||
if (pdwFlags)
|
||
*pdwFlags = g_mpUrlSchemeTypes[i].dwFlags;
|
||
return g_mpUrlSchemeTypes[i].eScheme;
|
||
}
|
||
}
|
||
|
||
if (pdwFlags)
|
||
{
|
||
*pdwFlags = 0;
|
||
}
|
||
return URL_SCHEME_UNKNOWN;
|
||
}
|
||
|
||
/*----------------------------------------------------------
|
||
Purpose: Return the scheme ordinal type (URL_SCHEME_*) based on the
|
||
URL string.
|
||
|
||
|
||
Returns: URL_SCHEME_ ordinal
|
||
Cond: --
|
||
*/
|
||
|
||
PRIVATE inline BOOL IsSameSchemeW(LPCWSTR pszLocal, LPCWSTR pszGlobal, DWORD cch)
|
||
{
|
||
ASSERT(pszLocal);
|
||
ASSERT(pszGlobal);
|
||
ASSERT(cch);
|
||
|
||
return !StrCmpNIW(pszLocal, pszGlobal, cch);
|
||
}
|
||
|
||
PRIVATE BOOL IsSameSchemeA(LPCSTR pszLocal, LPCWSTR pszGlobal, DWORD cch)
|
||
{
|
||
ASSERT(pszLocal);
|
||
ASSERT(pszGlobal);
|
||
ASSERT(cch);
|
||
|
||
return !_StrCmpNMixed(pszLocal, pszGlobal, cch);
|
||
}
|
||
|
||
|
||
PRIVATE URL_SCHEME
|
||
SchemeTypeFromStringA(
|
||
LPCSTR psz,
|
||
DWORD cch)
|
||
{
|
||
DWORD i;
|
||
|
||
// psz is a counted string (by cch), not a null-terminated string,
|
||
// so use IS_VALID_READ_BUFFER instead of IS_VALID_STRING_PTRA.
|
||
ASSERT(IS_VALID_READ_BUFFER(psz, CHAR, cch));
|
||
ASSERT(cch);
|
||
// We use a linear search. A binary search wouldn't pay off
|
||
// because the list isn't big enough, and we can sort the list
|
||
// according to the most popular protocol schemes and pay off
|
||
// bigger.
|
||
|
||
for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
|
||
{
|
||
if(cch == g_mpUrlSchemeTypes[i].cchScheme &&
|
||
IsSameSchemeA(psz, g_mpUrlSchemeTypes[i].pszScheme, cch))
|
||
return g_mpUrlSchemeTypes[i].eScheme;
|
||
}
|
||
|
||
return URL_SCHEME_UNKNOWN;
|
||
}
|
||
|
||
|
||
PRIVATE URL_SCHEME
|
||
SchemeTypeFromStringW(
|
||
LPCWSTR psz,
|
||
DWORD cch)
|
||
{
|
||
DWORD i;
|
||
|
||
// psz is a counted string (by cch), not a null-terminated string,
|
||
// so use IS_VALID_READ_BUFFER instead of IS_VALID_STRING_PTRW.
|
||
ASSERT(IS_VALID_READ_BUFFER(psz, WCHAR, cch));
|
||
ASSERT(cch);
|
||
|
||
// We use a linear search. A binary search wouldn't pay off
|
||
// because the list isn't big enough, and we can sort the list
|
||
// according to the most popular protocol schemes and pay off
|
||
// bigger.
|
||
|
||
for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
|
||
{
|
||
if(cch == g_mpUrlSchemeTypes[i].cchScheme &&
|
||
IsSameSchemeW(psz, g_mpUrlSchemeTypes[i].pszScheme, cch))
|
||
return g_mpUrlSchemeTypes[i].eScheme;
|
||
}
|
||
|
||
return URL_SCHEME_UNKNOWN;
|
||
}
|
||
|
||
//
|
||
// these are used during path fumbling that i do
|
||
// each string between a path delimiter ( '/' or '\')
|
||
// is a segment. we dont ever really care about
|
||
// empty ("") segments, so it is best to use
|
||
// NextLiveSegment().
|
||
//
|
||
inline PRIVATE LPWSTR
|
||
NextSegment(LPWSTR psz)
|
||
{
|
||
ASSERT (psz);
|
||
return psz + lstrlenW(psz) + 1;
|
||
}
|
||
|
||
#define IsLiveSegment(p) ((p) && (*p) != DEADSEGCHAR)
|
||
|
||
PRIVATE LPWSTR
|
||
NextLiveSegment(LPWSTR pszSeg, DWORD *piSeg, DWORD cSegs)
|
||
{
|
||
if(pszSeg) do
|
||
{
|
||
//
|
||
// count the number of dead segments that we skip.
|
||
// if the segment isnt dead, then we can just skip one,
|
||
// the current one.
|
||
//
|
||
DWORD cSkip;
|
||
for (cSkip = 0; (*pszSeg) == DEADSEGCHAR; pszSeg++, cSkip++);
|
||
cSkip = cSkip ? cSkip : 1;
|
||
|
||
if((*piSeg) + cSkip < cSegs)
|
||
{
|
||
|
||
pszSeg = NextSegment(pszSeg);
|
||
(*piSeg) += cSkip;
|
||
}
|
||
else
|
||
pszSeg = NULL;
|
||
|
||
} while (pszSeg && (*pszSeg == DEADSEGCHAR));
|
||
|
||
return pszSeg;
|
||
}
|
||
|
||
PRIVATE LPWSTR
|
||
LastLiveSegment(LPWSTR pszSeg, DWORD cSegs, BOOL fFailIfFirst)
|
||
{
|
||
DWORD iSeg = 0;
|
||
LPWSTR pszLast = NULL;
|
||
BOOL fLastIsFirst = FALSE;
|
||
|
||
if(cSegs)
|
||
{
|
||
if(IsLiveSegment(pszSeg))
|
||
{
|
||
pszLast = pszSeg;
|
||
fLastIsFirst = TRUE;
|
||
}
|
||
|
||
while(pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs))
|
||
{
|
||
if(!pszLast)
|
||
fLastIsFirst = TRUE;
|
||
else
|
||
fLastIsFirst = FALSE;
|
||
|
||
pszLast = pszSeg;
|
||
}
|
||
|
||
if(fFailIfFirst && fLastIsFirst)
|
||
pszLast = NULL;
|
||
}
|
||
|
||
return pszLast;
|
||
}
|
||
|
||
PRIVATE LPWSTR
|
||
FirstLiveSegment(LPWSTR pszSeg, DWORD *piSeg, DWORD cSegs)
|
||
{
|
||
ASSERT(piSeg);
|
||
|
||
*piSeg = 0;
|
||
|
||
if(!pszSeg || !cSegs)
|
||
return NULL;
|
||
|
||
if(!IsLiveSegment(pszSeg))
|
||
pszSeg = NextLiveSegment(pszSeg, piSeg, cSegs);
|
||
|
||
return pszSeg;
|
||
}
|
||
|
||
inline BOOL IsDosDrive(LPCWSTR p)
|
||
{
|
||
return (*p && p[1] == COLON);
|
||
}
|
||
|
||
inline BOOL IsDosPath(LPCWSTR p)
|
||
{
|
||
return (*p == WHACK || IsDosDrive(p));
|
||
}
|
||
|
||
inline BOOL IsDriveUrl(const WCHAR *p)
|
||
{
|
||
return (*p && p[1] == BAR);
|
||
}
|
||
|
||
inline BOOL IsDrive(LPCWSTR p)
|
||
{
|
||
return (IsDosDrive(p) || IsDriveUrl(p));
|
||
}
|
||
|
||
inline BOOL IsSeparator(const WCHAR *p)
|
||
{
|
||
return (*p == SLASH || *p == WHACK );
|
||
}
|
||
|
||
inline BOOL IsAbsolute(const WCHAR *p)
|
||
{
|
||
#ifndef UNIX
|
||
return (IsSeparator(p) || IsDrive(p));
|
||
#else
|
||
return (IsSeparator(p)) ;
|
||
#endif
|
||
}
|
||
|
||
#define IsUNC(pathW) PathIsUNCW(pathW)
|
||
|
||
inline BOOL IsDot(LPCWSTR p) // if p == "." return TRUE
|
||
{
|
||
return (*p == DOT && !p[1]);
|
||
}
|
||
|
||
inline BOOL IsDotDot(LPCWSTR p) // if p == ".." return TRUE
|
||
{
|
||
return (*p == DOT && p[1] == DOT && !p[2]);
|
||
}
|
||
|
||
//+---------------------------------------------------------------------------
|
||
//
|
||
// Method: ConvertChar
|
||
//
|
||
// Synopsis:
|
||
//
|
||
// Arguments: [szStr] --
|
||
// [cIn] --
|
||
// [cOut] --
|
||
//
|
||
// Returns:
|
||
//
|
||
// History: 03-20-96 JoeS (Joe Souza) Created
|
||
//
|
||
// Notes:
|
||
//
|
||
//----------------------------------------------------------------------------
|
||
static void ConvertChar(LPWSTR ptr, WCHAR cIn, WCHAR cOut, BOOL fProtectExtra)
|
||
{
|
||
while (*ptr)
|
||
{
|
||
if (fProtectExtra && (*ptr == QUERY || *ptr == POUND ))
|
||
{
|
||
break;
|
||
}
|
||
|
||
if (*ptr == cIn)
|
||
{
|
||
*ptr = cOut;
|
||
}
|
||
|
||
ptr++;
|
||
}
|
||
}
|
||
|
||
PUBLIC void WininetFixFileSlashes(WCHAR *p)
|
||
{
|
||
// NB: This function assumes that p points to a file URL.
|
||
// The file URL *MUST* be of the form "file://...".
|
||
// HTParse() guarantees that this will be so.
|
||
|
||
int schemelen = 0;
|
||
|
||
schemelen = SIZECHARS(L"file://") - 1;
|
||
|
||
/* In UNIX system, we don't need to convert the SLASH to WHACK */
|
||
if (p && lstrlenW(p) > schemelen)
|
||
{
|
||
#ifdef UNIX
|
||
ConvertChar(p + schemelen, WHACK, SLASH, TRUE);
|
||
#else
|
||
ConvertChar(p + schemelen, SLASH, WHACK, TRUE);
|
||
#endif
|
||
}
|
||
}
|
||
|
||
//
|
||
// in the URL spec, it says that all whitespace should be ignored
|
||
// due to the fact that it is possible to introduce
|
||
// new whitespace and eliminate other whitespace
|
||
// however, we are only going to strip out TAB CR LF
|
||
// because we consider SPACE's to be significant.
|
||
//
|
||
|
||
PRIVATE inline BOOL IsInsignificantWhite(WCHAR ch)
|
||
{
|
||
return (ch == TAB ||
|
||
ch == CR ||
|
||
ch == LF);
|
||
}
|
||
|
||
#define IsWhite(c) ((DWORD) (c) > 32 ? FALSE : TRUE)
|
||
|
||
PRIVATE void TrimAndStripInsignificantWhite(WCHAR *psz)
|
||
{
|
||
ASSERT(psz);
|
||
|
||
if(*psz)
|
||
{
|
||
|
||
LPCWSTR pszSrc = psz;
|
||
LPWSTR pszDest = psz;
|
||
LPWSTR pszLastSpace = NULL;
|
||
|
||
// first trim the front side by just moving the source pointer.
|
||
while(*pszSrc && IsWhite(*pszSrc)) {
|
||
pszSrc++;
|
||
}
|
||
|
||
//
|
||
// Copy the body stripping "insignificant" white spaces.
|
||
// Remember the last white space to trim trailing space later.
|
||
//
|
||
while (*pszSrc)
|
||
{
|
||
if(IsInsignificantWhite(*pszSrc)) {
|
||
pszSrc++;
|
||
} else {
|
||
if (IsWhite(*pszSrc)) {
|
||
if (pszLastSpace==NULL) {
|
||
pszLastSpace = pszDest;
|
||
}
|
||
} else {
|
||
pszLastSpace = NULL;
|
||
}
|
||
|
||
*pszDest++ = *pszSrc++;
|
||
}
|
||
}
|
||
|
||
// Trim the trailing space
|
||
if (pszLastSpace) {
|
||
*pszLastSpace = L'\0';
|
||
} else {
|
||
*pszDest = L'\0';
|
||
}
|
||
|
||
}
|
||
}
|
||
|
||
|
||
struct EXTKEY
|
||
{
|
||
PCSTR szExt;
|
||
PCWSTR wszExt;
|
||
DWORD cchExt;
|
||
};
|
||
|
||
const EXTKEY ExtTable[] = {
|
||
{ ".html", L".html", ARRAYSIZE(".html") - 1 },
|
||
{ ".htm", L".htm", ARRAYSIZE(".htm") - 1 },
|
||
{ ".xml", L".xml", ARRAYSIZE(".xml") - 1 },
|
||
{ ".doc", L".doc", ARRAYSIZE(".doc") - 1 },
|
||
{ ".xls", L".xls", ARRAYSIZE(".xls") - 1 },
|
||
{ ".ppt", L".ppt", ARRAYSIZE(".ppt") - 1 },
|
||
{ ".rtf", L".rtf", ARRAYSIZE(".rtf") - 1 },
|
||
{ ".dot", L".dot", ARRAYSIZE(".dot") - 1 },
|
||
{ ".xlw", L".xlw", ARRAYSIZE(".xlw") - 1 },
|
||
{ ".pps", L".pps", ARRAYSIZE(".pps") - 1 },
|
||
{ ".xlt", L".xlt", ARRAYSIZE(".xlt") - 1 },
|
||
{ ".hta", L".hta", ARRAYSIZE(".hta") - 1 },
|
||
{ ".pot", L".pot", ARRAYSIZE(".pot") - 1 },
|
||
{ ".pdf", L".pdf", ARRAYSIZE(".pdf") - 1 }
|
||
};
|
||
|
||
inline BOOL CompareExtA(PCSTR psz, DWORD_PTR cch)
|
||
{
|
||
for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++)
|
||
{
|
||
if (ExtTable[i].cchExt>cch)
|
||
continue;
|
||
|
||
if (!StrCmpNIA(psz - (LONG_PTR)ExtTable[i].cchExt, ExtTable[i].szExt, ExtTable[i].cchExt))
|
||
return TRUE;
|
||
}
|
||
return FALSE;
|
||
}
|
||
|
||
inline BOOL CompareExtW(PCWSTR pwsz, DWORD_PTR cch)
|
||
{
|
||
for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++)
|
||
{
|
||
if (ExtTable[i].cchExt>cch)
|
||
continue;
|
||
|
||
if (!StrCmpNIW(pwsz - (LONG_PTR)ExtTable[i].cchExt, ExtTable[i].wszExt, ExtTable[i].cchExt))
|
||
return TRUE;
|
||
}
|
||
return FALSE;
|
||
}
|
||
|
||
|
||
PRIVATE LPCSTR FindFragmentA(LPCSTR psz, BOOL fMBCS, BOOL fIsFile)
|
||
{
|
||
CHAR *pch = _StrChrA(psz, POUND, fMBCS);
|
||
if(pch && fIsFile)
|
||
{
|
||
CHAR *pchQuery = _StrChrA(psz, QUERY, fMBCS);
|
||
if (pchQuery && (pchQuery < pch))
|
||
goto exit;
|
||
|
||
do
|
||
{
|
||
LONG_PTR cch = pch - psz;
|
||
|
||
// REARCHITECT: we shouldn't hardcode ".htm".
|
||
// #s are significant in dospaths - zekel 9-JUL-97
|
||
// so we want to check the path in front and make sure
|
||
// that it is an html file. we believe this heuristic should work
|
||
// in about 99% of all cases.
|
||
//
|
||
// if it is not an html file it is not a hash
|
||
if (CompareExtA(pch, cch))
|
||
{
|
||
break;
|
||
}
|
||
} while (pch = _StrChrA(++pch, POUND, fMBCS));
|
||
}
|
||
exit:
|
||
return pch;
|
||
}
|
||
|
||
PRIVATE LPCWSTR FindFragmentW(LPCWSTR psz, BOOL fIsFile)
|
||
{
|
||
WCHAR *pch = StrChrW(psz, POUND);
|
||
if(pch && fIsFile)
|
||
{
|
||
WCHAR *pchQuery = StrChrW(psz, QUERY);
|
||
if (pchQuery && (pchQuery < pch))
|
||
goto exit;
|
||
|
||
do
|
||
{
|
||
LONG_PTR cch = pch - psz;
|
||
|
||
// REARCHITECT: we shouldn't hardcode ".htm".
|
||
// #s are significant in dospaths - zekel 9-JUL-97
|
||
// so we want to check the path in front and make sure
|
||
// that it is an html file. we believe this heuristic should work
|
||
// in about 99% of all cases.
|
||
//
|
||
// if it is not an html file it is not a hash
|
||
if (CompareExtW(pch, cch))
|
||
{
|
||
break;
|
||
}
|
||
|
||
} while (pch = StrChrW(++pch, POUND));
|
||
}
|
||
exit:
|
||
return pch;
|
||
}
|
||
|
||
PRIVATE VOID BreakFragment(LPWSTR *ppsz, PURLPARTS parts)
|
||
{
|
||
ASSERT(ppsz);
|
||
ASSERT(*ppsz);
|
||
|
||
//
|
||
// Opaque URLs are not allowed to use fragments - zekel 27-feb-97
|
||
// Is it possible for an opaque URL to use a fragment?
|
||
// right now we assume not. i suspect so but will leave it this way for now
|
||
// this is especially important to javascript and vbscript
|
||
// FEATURE: this might be worth investigation, but probably can't change this code
|
||
//
|
||
if(!**ppsz || parts->dwFlags & UPF_SCHEME_OPAQUE)
|
||
return;
|
||
|
||
WCHAR *pch = (LPWSTR) FindFragmentW(*ppsz, parts->eScheme == URL_SCHEME_FILE);
|
||
|
||
if (pch)
|
||
{
|
||
TERMSTR(pch);
|
||
parts->pszFragment = pch +1;
|
||
}
|
||
}
|
||
|
||
PRIVATE inline BOOL IsUrlPrefixA(LPCSTR psz)
|
||
{
|
||
//
|
||
// Optimized for this particular case. Notice that most of it
|
||
// will be lego-ized out anyway.
|
||
//
|
||
if (psz[0]=='u' || psz[0]=='U') {
|
||
if (psz[1]=='r' || psz[1]=='R') {
|
||
if (psz[2]=='l' || psz[2]=='L') {
|
||
return TRUE;
|
||
}
|
||
}
|
||
}
|
||
return FALSE;
|
||
// return !StrCmpNIA(psz, c_szURLPrefixA, c_cchURLPrefix);
|
||
}
|
||
|
||
PRIVATE inline BOOL IsUrlPrefixW(LPCWSTR psz)
|
||
{
|
||
//
|
||
// Optimized for this particular case. Notice that most of it
|
||
// will be lego-ized out anyway.
|
||
//
|
||
if (psz[0]==L'u' || psz[0]==L'U') {
|
||
if (psz[1]==L'r' || psz[1]==L'R') {
|
||
if (psz[2]==L'l' || psz[2]==L'L') {
|
||
return TRUE;
|
||
}
|
||
}
|
||
}
|
||
return FALSE;
|
||
// return !StrCmpNIW(psz, c_szURLPrefixW, c_cchURLPrefix);
|
||
}
|
||
|
||
//
|
||
// if FindScheme() succeeds, it returns a pointer to the scheme,
|
||
// and the cch holds the count of chars for the scheme
|
||
// if it fails, and cch is non-zero then cch is how much should be skipped.
|
||
// this is to allow "URL:/foo/bar", a relative URL with the "URL:" prefix.
|
||
//
|
||
LPCSTR FindSchemeA(LPCSTR psz, LPDWORD pcchScheme)
|
||
{
|
||
LPCSTR pch;
|
||
DWORD cch;
|
||
|
||
ASSERT(pcchScheme);
|
||
ASSERT(psz);
|
||
|
||
*pcchScheme = 0;
|
||
|
||
for (pch = psz, cch = 0; *pch; pch++, cch++)
|
||
{
|
||
if (*pch == ':')
|
||
{
|
||
if (IsUrlPrefixA(psz))
|
||
{
|
||
psz = pch +1;
|
||
|
||
// set pcchScheme to skip past "URL:"
|
||
*pcchScheme = cch + 1;
|
||
|
||
// reset cch for the scheme len
|
||
cch = -1;
|
||
continue;
|
||
}
|
||
else
|
||
{
|
||
//
|
||
// Scheme found if it is at least two characters
|
||
if(cch > 1)
|
||
{
|
||
*pcchScheme = cch;
|
||
return psz;
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
if(!IsValidSchemeCharA(*pch))
|
||
break;
|
||
}
|
||
|
||
return NULL;
|
||
}
|
||
|
||
//
|
||
// FindSchemeW() around for Perf reasons for ParseURL()
|
||
// Any changes in either FindScheme() needs to reflected in the other
|
||
//
|
||
LPCWSTR FindSchemeW(LPCWSTR psz, LPDWORD pcchScheme, BOOL fAllowSemicolon = FALSE)
|
||
{
|
||
LPCWSTR pch;
|
||
DWORD cch;
|
||
|
||
ASSERT(pcchScheme);
|
||
ASSERT(psz);
|
||
|
||
*pcchScheme = 0;
|
||
|
||
for (pch = psz, cch = 0; *pch; pch++, cch++)
|
||
{
|
||
|
||
if (*pch == L':' ||
|
||
|
||
// Autocorrect permits a semicolon typo
|
||
(fAllowSemicolon && *pch == L';'))
|
||
{
|
||
if (IsUrlPrefixW(psz))
|
||
{
|
||
psz = pch +1;
|
||
|
||
// set pcchScheme to skip past "URL:"
|
||
*pcchScheme = cch + 1;
|
||
|
||
// reset cch for the scheme len
|
||
cch = -1;
|
||
continue;
|
||
}
|
||
else
|
||
{
|
||
//
|
||
// Scheme found if it is at least two characters
|
||
if(cch > 1)
|
||
{
|
||
*pcchScheme = cch;
|
||
return psz;
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
if(!IsValidSchemeCharW(*pch))
|
||
break;
|
||
}
|
||
|
||
return NULL;
|
||
}
|
||
|
||
PRIVATE DWORD
|
||
CountSlashes(LPCWSTR *ppsz)
|
||
{
|
||
DWORD cSlashes = 0;
|
||
LPCWSTR pch = *ppsz;
|
||
|
||
while (IsSeparator(pch))
|
||
{
|
||
*ppsz = pch;
|
||
pch++;
|
||
cSlashes++;
|
||
}
|
||
|
||
return cSlashes;
|
||
}
|
||
|
||
|
||
PRIVATE LPCWSTR
|
||
FindDosPath(LPCWSTR psz)
|
||
{
|
||
if (IsDosDrive(psz) || IsUNC(psz))
|
||
{
|
||
return psz;
|
||
}
|
||
else
|
||
{
|
||
DWORD cch;
|
||
LPCWSTR pszScheme = FindSchemeW(psz, &cch);
|
||
|
||
if (pszScheme && URL_SCHEME_FILE == GetSchemeTypeAndFlagsW(pszScheme, cch, NULL))
|
||
{
|
||
LPCWSTR pch = psz + cch + 1;
|
||
DWORD c = CountSlashes(&pch);
|
||
|
||
switch (c)
|
||
{
|
||
case 2:
|
||
if(IsDosDrive(++pch))
|
||
return pch;
|
||
break;
|
||
|
||
case 4:
|
||
return --pch;
|
||
}
|
||
}
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
|
||
/*+++
|
||
|
||
WininetCopyUrlForParse()
|
||
this copies the url and prepends a "file://" if necessary
|
||
This should never be called except from wininet
|
||
everyone else should be calling UrlCreateFromPath()
|
||
|
||
Parameters
|
||
IN -
|
||
pszDst the destination buffer
|
||
pszSrc source buffer
|
||
|
||
OUT -
|
||
pszDst is filled with a Live URL
|
||
|
||
Returns
|
||
VOID
|
||
|
||
NOTE - Assume "file:" if no scheme and it looks like fully-qualified file path.
|
||
---*/
|
||
static const WCHAR c_szFileSchemeString[] = L"file://";
|
||
|
||
PRIVATE HRESULT
|
||
WininetCopyUrlForParse(PSHSTRW pstrDst, LPCWSTR pszSrc)
|
||
{
|
||
#ifndef UNIX
|
||
if (IsDrive(pszSrc) || IsUNC(pszSrc))
|
||
{
|
||
//
|
||
// NOTE: the first SetStr will always succeed
|
||
// because the default buffer is more than "file://"
|
||
pstrDst->SetStr(c_szFileSchemeString);
|
||
return pstrDst->Append(pszSrc);
|
||
}
|
||
else
|
||
#endif /* !UNIX */
|
||
return pstrDst->SetStr(pszSrc);
|
||
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
CopyUrlForParse(LPCWSTR pszUrl, PSHSTRW pstrUrl, DWORD dwFlags)
|
||
{
|
||
LPCWSTR pch;
|
||
HRESULT hr;
|
||
//
|
||
// now we will make copies of the URLs so that we can rip them apart
|
||
// WininetCopyUrlForParse() will prepend a file: if it wants...
|
||
//
|
||
|
||
if(dwFlags & URL_WININET_COMPATIBILITY)
|
||
{
|
||
hr = WininetCopyUrlForParse(pstrUrl, pszUrl);
|
||
}
|
||
else if(pch = FindDosPath(pszUrl))
|
||
{
|
||
hr = SHUrlCreateFromPath(pch, pstrUrl, dwFlags);
|
||
}
|
||
else
|
||
{
|
||
hr = pstrUrl->SetStr(pszUrl);
|
||
}
|
||
|
||
// Trim leading and trailing whitespace
|
||
// Remove tab and CRLF characters. Netscape does this.
|
||
if(SUCCEEDED(hr))
|
||
TrimAndStripInsignificantWhite(pstrUrl->GetInplaceStr());
|
||
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
PRIVATE VOID BreakScheme(LPWSTR *ppsz, PURLPARTS parts)
|
||
{
|
||
if(!**ppsz || IsDrive(*ppsz))
|
||
return;
|
||
|
||
DWORD cch;
|
||
|
||
//
|
||
// if FindScheme() succeeds, it returns a pointer to the scheme,
|
||
// and the cch holds the count of chars for the scheme
|
||
// if it fails, and cch is none zero then cch is how much should be skipped.
|
||
// this is to allow "URL:/foo/bar", a relative URL with the "URL:" prefix.
|
||
//
|
||
if(NULL != (parts->pszScheme = (LPWSTR) FindSchemeW(*ppsz, &cch)))
|
||
{
|
||
parts->pszScheme[cch] = '\0';
|
||
CharLowerW(parts->pszScheme);
|
||
|
||
// put the pointer past the scheme for next Break()
|
||
*ppsz = parts->pszScheme + cch + 1;
|
||
|
||
|
||
#ifdef DEBUG
|
||
if (g_dwPrototype & PF_LOGSCHEMEHITS)
|
||
{
|
||
// this is for logging of url schemes, to make sure that we have the right order
|
||
int c = GetPrivateProfileIntW(L"SchemeHits", parts->pszScheme, 0, L"UrlPars.ini");
|
||
WCHAR szc[25];
|
||
StringCchPrintfW(szc, ARRAYSIZE(szc), L"%d", ++c);
|
||
WritePrivateProfileStringW(L"SchemeHits", parts->pszScheme, szc, L"UrlPars.ini");
|
||
}
|
||
#endif //DEBUG
|
||
|
||
|
||
parts->eScheme = GetSchemeTypeAndFlagsW(parts->pszScheme, cch, &parts->dwFlags);
|
||
}
|
||
else if (cch)
|
||
*ppsz += cch + 1;
|
||
}
|
||
|
||
|
||
PRIVATE VOID BreakQuery(LPWSTR *ppsz, PURLPARTS parts)
|
||
{
|
||
WCHAR *pch;
|
||
|
||
if(!**ppsz)
|
||
return;
|
||
|
||
if(parts->dwFlags & UPF_SCHEME_OPAQUE)
|
||
return;
|
||
|
||
pch = StrChrW(*ppsz, QUERY);
|
||
|
||
//
|
||
// APPCOMPAT NETSCAPE COMPATBILITY - zekel - 27-JAN-97
|
||
// we will also get http://foo#frag?query
|
||
// even tho legally it should be http://foo?query#frag
|
||
// of course we will put it back together the right way.
|
||
//
|
||
if(!pch && parts->pszFragment)
|
||
pch = StrChrW(parts->pszFragment, QUERY);
|
||
|
||
// found our query string...
|
||
if (pch)
|
||
{
|
||
TERMSTR(pch);
|
||
parts->pszQuery = pch + 1;
|
||
}
|
||
}
|
||
|
||
PRIVATE VOID MkBreakServer(LPWSTR *ppsz, PURLPARTS parts)
|
||
{
|
||
//
|
||
// NOTE: we dont convert WHACKs to SLASHs because mk can be of the
|
||
// form <mk:@class:\\Server\Share\file.itl/path/in/the/file.gif
|
||
// and we want to preserve the DOS/UNC path as it is
|
||
//
|
||
|
||
if (**ppsz == TEXT('@'))
|
||
{
|
||
WCHAR *pch;
|
||
// treat everything to separator as host
|
||
//
|
||
parts->pszServer = *ppsz;
|
||
|
||
pch = StrChrW(*ppsz ,SLASH);
|
||
if (pch)
|
||
{
|
||
parts->dwFlags |= UPF_SEG_ABSOLUTE;
|
||
TERMSTR(pch);
|
||
*ppsz = pch + 1;
|
||
}
|
||
else
|
||
*ppsz += lstrlenW(*ppsz);
|
||
}
|
||
}
|
||
|
||
PRIVATE VOID DefaultBreakServer(LPWSTR *ppsz, PURLPARTS parts)
|
||
{
|
||
if (**ppsz == SLASH)
|
||
{
|
||
parts->dwFlags |= UPF_SEG_ABSOLUTE;
|
||
|
||
(*ppsz)++;
|
||
|
||
if (**ppsz == SLASH)
|
||
{
|
||
// we have a winner!
|
||
WCHAR * pch;
|
||
|
||
parts->pszServer = (*ppsz) + 1;
|
||
|
||
pch = StrChrW(parts->pszServer, SLASH);
|
||
|
||
if(pch)
|
||
{
|
||
TERMSTR(pch);
|
||
*ppsz = pch + 1;
|
||
}
|
||
else
|
||
*ppsz = *ppsz + lstrlenW(*ppsz);
|
||
}
|
||
}
|
||
else if(parts->pszScheme)
|
||
parts->dwFlags |= UPF_SCHEME_OPAQUE;
|
||
}
|
||
|
||
PRIVATE VOID FileBreakServer(LPWSTR *ppsz, PURLPARTS parts)
|
||
{
|
||
LPWSTR pch;
|
||
|
||
// CountSlashes() will set *ppsz to the last slash
|
||
DWORD cSlashes = CountSlashes((LPCWSTR *)ppsz);
|
||
|
||
if(cSlashes || IsDrive(*ppsz))
|
||
parts->dwFlags |= UPF_SEG_ABSOLUTE;
|
||
|
||
switch (cSlashes)
|
||
{
|
||
case 0:
|
||
break;
|
||
|
||
case 4:
|
||
// we identify file://\\UNC as a true DOS path with no escaped characters
|
||
parts->dwFlags |= UPF_FILEISPATHURL;
|
||
|
||
// fall through
|
||
|
||
case 2:
|
||
if(IsDrive((*ppsz) + 1))
|
||
{
|
||
// this is a root drive
|
||
TERMSTR(*ppsz);
|
||
parts->pszServer = *ppsz;
|
||
(*ppsz)++;
|
||
// we identify file://C:\PATH as a true DOS path with no escaped characters
|
||
parts->dwFlags |= UPF_FILEISPATHURL;
|
||
break;
|
||
} //else fallthru to UNC handling
|
||
// fall through
|
||
|
||
case 5:
|
||
case 6:
|
||
//
|
||
// cases like "file:////..." or "file://///..."
|
||
// we see this as a UNC path
|
||
// lets set the server
|
||
//
|
||
parts->pszServer = ++(*ppsz);
|
||
for(pch = *ppsz; *pch && !IsSeparator(pch); pch++);
|
||
|
||
if(pch && *pch)
|
||
{
|
||
TERMSTR(pch);
|
||
*ppsz = pch + 1;
|
||
}
|
||
else
|
||
*ppsz = pch + lstrlenW(pch);
|
||
break;
|
||
|
||
case 1:
|
||
//
|
||
//we think of "file:/..." as on the local machine
|
||
// so we have zero length pszServer
|
||
//
|
||
case 3:
|
||
//
|
||
//we think of file:///... as properly normalized on the local machine
|
||
// so we have zero length pszServer
|
||
//
|
||
default:
|
||
// there is just too many, we pretend that there is just one and ignore
|
||
// the rest
|
||
TERMSTR(*ppsz);
|
||
parts->pszServer = *ppsz;
|
||
(*ppsz)++;
|
||
break;
|
||
}
|
||
|
||
// detect file://localserver/c:/path
|
||
if(parts->pszServer && !StrCmpIW(parts->pszServer, L"localhost"))
|
||
parts->pszServer = NULL;
|
||
}
|
||
|
||
PRIVATE VOID BreakServer(LPWSTR *ppsz, PURLPARTS parts, BOOL fConvert)
|
||
{
|
||
if(!**ppsz || parts->dwFlags & UPF_SCHEME_OPAQUE)
|
||
return;
|
||
|
||
//
|
||
// APPCOMPAT - we pretend that whacks are the equiv of slashes - zekel 17-MAR-97
|
||
// this is because the internet uses slashes and DOS
|
||
// uses whacks. so for useability's sake we allow both.
|
||
// but not in all cases. in particular, the "mk:" stream
|
||
// protocol depends upon the buggy behavior of one of IE30's
|
||
// many URL parsers treating relative URLs with whacks as one
|
||
// segment.
|
||
// NOTE: IE30 had inconsistent behavior WRT URLs. so we handled
|
||
// this case differently depending on when we saw, looked, touched, or
|
||
// played with these URLs. wininet would always convert, but mshtml
|
||
// sometimes would other times not.
|
||
//
|
||
// with MK: we cannot convert the base, or the relative
|
||
// but in breakpath we have to allow for the use of WHACK
|
||
// to indicate a root path
|
||
//
|
||
// we dont have to fProtectExtra because query and fragments
|
||
// are already broken off if necessary.
|
||
if (fConvert)
|
||
ConvertChar(*ppsz, WHACK, SLASH, FALSE);
|
||
|
||
switch(parts->eScheme)
|
||
{
|
||
case URL_SCHEME_FILE:
|
||
FileBreakServer(ppsz, parts);
|
||
break;
|
||
|
||
case URL_SCHEME_MK:
|
||
MkBreakServer(ppsz, parts);
|
||
break;
|
||
|
||
default:
|
||
DefaultBreakServer(ppsz, parts);
|
||
break;
|
||
}
|
||
}
|
||
|
||
PRIVATE VOID DefaultBreakSegments(LPWSTR psz, PURLPARTS parts)
|
||
{
|
||
WCHAR *pch;
|
||
|
||
while (pch = StrChrW(psz, SLASH))
|
||
{
|
||
parts->cSegments++;
|
||
TERMSTR(pch);
|
||
psz = pch + 1;
|
||
}
|
||
|
||
if(!*psz || IsDot(psz) || IsDotDot(psz))
|
||
{
|
||
if (!*psz && parts->cSegments > 1)
|
||
parts->cSegments--;
|
||
|
||
parts->dwFlags |= UPF_EXSEG_DIRECTORY;
|
||
}
|
||
}
|
||
|
||
PRIVATE VOID DefaultBreakPath(LPWSTR *ppsz, PURLPARTS parts)
|
||
{
|
||
if(!**ppsz)
|
||
return;
|
||
|
||
//
|
||
// this will keep the drive letter from being backed up over
|
||
// during canonicalization. if we want keep the UNC share
|
||
// from being backed up we should do it here
|
||
// or in FileBreakServer() similarly
|
||
//
|
||
if(IsDrive(*ppsz))
|
||
{
|
||
parts->dwFlags |= UPF_SEG_LOCKFIRST;
|
||
// also convert "c|" to "c:"
|
||
}
|
||
|
||
parts->pszSegments = *ppsz;
|
||
parts->cSegments = 1;
|
||
|
||
if(!(parts->dwFlags & UPF_SCHEME_OPAQUE))
|
||
DefaultBreakSegments(parts->pszSegments, parts);
|
||
|
||
}
|
||
|
||
PRIVATE VOID BreakPath(LPWSTR *ppsz, PURLPARTS parts)
|
||
{
|
||
if(!**ppsz)
|
||
return;
|
||
|
||
if (parts->dwFlags & UPF_SCHEME_OPAQUE)
|
||
{
|
||
parts->pszSegments = *ppsz;
|
||
parts->cSegments = 1;
|
||
}
|
||
else
|
||
{
|
||
//
|
||
// we only need to check for absolute when there was
|
||
// no server segment. if there was a server segment,
|
||
// then absolute has already been set, and we need
|
||
// to preserve any separators that exist in the path
|
||
//
|
||
if(!parts->pszServer && IsSeparator(*ppsz))
|
||
{
|
||
parts->dwFlags |= UPF_SEG_ABSOLUTE;
|
||
(*ppsz)++;
|
||
}
|
||
|
||
DefaultBreakPath(ppsz, parts);
|
||
}
|
||
}
|
||
|
||
|
||
BOOL _ShouldBreakBase(PURLPARTS parts, LPCWSTR pszBase)
|
||
{
|
||
if (pszBase)
|
||
{
|
||
if (!parts->pszScheme)
|
||
return TRUE;
|
||
|
||
DWORD cch;
|
||
LPCWSTR pszScheme = FindSchemeW(pszBase, &cch);
|
||
|
||
// this means that this will only optimize on known schemes
|
||
// if both urls use URL_SCHEME_UNKNOWN...then we parse both.
|
||
if (pszScheme && parts->eScheme == GetSchemeTypeAndFlagsW(pszScheme, cch, NULL))
|
||
return TRUE;
|
||
|
||
}
|
||
|
||
return FALSE;
|
||
}
|
||
|
||
/*+++
|
||
|
||
BreakUrl()
|
||
Break a URL for its consituent parts
|
||
|
||
Parameters
|
||
IN -
|
||
the URL to crack open, need not be fully qualified
|
||
|
||
OUT -
|
||
parts absolute or relative may be nonzero (but not both).
|
||
host, anchor and access may be nonzero if they were specified.
|
||
Any which are nonzero point to zero terminated strings.
|
||
|
||
Returns
|
||
VOID
|
||
|
||
Details -
|
||
|
||
WARNING !! function munges the incoming buffer
|
||
|
||
---*/
|
||
|
||
#define BreakUrl(s, p) BreakUrls(s, p, NULL, NULL, NULL, 0)
|
||
|
||
//
|
||
// **BreakUrls()**
|
||
// RETURNS
|
||
// S_OK if the two urls need to be blended
|
||
// S_FALSE if pszUrl is absolute, or there is no pszBase
|
||
// failure some sort of memory allocation error
|
||
//
|
||
PRIVATE HRESULT
|
||
BreakUrls(LPWSTR pszUrl, PURLPARTS parts, LPCWSTR pszBase, PSHSTRW pstrBase, PURLPARTS partsBase, DWORD dwFlags)
|
||
{
|
||
HRESULT hr = S_FALSE;
|
||
ASSERT(pszUrl && parts);
|
||
|
||
ZeroMemory(parts, SIZEOF(URLPARTS));
|
||
|
||
if(!*pszUrl)
|
||
parts->dwFlags |= UPF_SEG_EMPTYSEG;
|
||
|
||
//
|
||
// WARNING: this order is specific, according to the proposed standard
|
||
//
|
||
if(*pszUrl || pszBase)
|
||
{
|
||
BOOL fConvert;
|
||
|
||
BreakScheme(&pszUrl, parts);
|
||
BreakFragment(&pszUrl, parts);
|
||
BreakQuery(&pszUrl, parts);
|
||
|
||
//
|
||
// this is the first time that we need to access
|
||
// pszBase if it exists, so this is when we copy and parse
|
||
//
|
||
if (_ShouldBreakBase(parts, pszBase))
|
||
{
|
||
hr = CopyUrlForParse(pszBase, pstrBase, dwFlags);
|
||
|
||
// this will be some kind of memory error
|
||
if(FAILED(hr))
|
||
return hr;
|
||
|
||
// ASSERT(hr != S_FALSE);
|
||
|
||
BreakUrl(pstrBase->GetInplaceStr(), partsBase);
|
||
fConvert = (partsBase->dwFlags & UPF_SCHEME_CONVERT);
|
||
}
|
||
else
|
||
fConvert = (parts->dwFlags & UPF_SCHEME_CONVERT);
|
||
|
||
BreakServer(&pszUrl, parts, fConvert);
|
||
BreakPath(&pszUrl, parts);
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
/*+++
|
||
BlendParts() & all dependant Blend* functions
|
||
Blends the parts structures into one, taking the relavent
|
||
bits from each one and dumping the unused data.
|
||
|
||
Parameters
|
||
IN -
|
||
partsUrl the primary or relative parts - Takes precedence
|
||
partsBase the base or referrers parts
|
||
|
||
OUT -
|
||
partsOut the combined result
|
||
|
||
Returns
|
||
VOID -
|
||
|
||
NOTE: this will frequently NULL out the entire partsBase.
|
||
---*/
|
||
|
||
PRIVATE VOID
|
||
BlendScheme(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
|
||
{
|
||
if(partsUrl->pszScheme)
|
||
{
|
||
LPCWSTR pszScheme = partsOut->pszScheme = partsUrl->pszScheme;
|
||
URL_SCHEME eScheme = partsOut->eScheme = partsUrl->eScheme;
|
||
|
||
partsOut->dwFlags |= (partsUrl->dwFlags & UPF_SCHEME_MASK);
|
||
|
||
//
|
||
// this checks to make sure that these are the same scheme, and
|
||
// that the scheme is allowed to be used in relative URLs
|
||
// file: is not allowed to because of weirdness with drive letters
|
||
// and \\UNC\shares
|
||
//
|
||
if ((eScheme && (eScheme != partsBase->eScheme) || eScheme == URL_SCHEME_FILE) ||
|
||
(!partsBase->pszScheme) ||
|
||
(partsBase->pszScheme && StrCmpW(pszScheme, partsBase->pszScheme)))
|
||
{
|
||
// they are different schemes. DUMP partsBase.
|
||
|
||
ZeroMemory(partsBase, SIZEOF(URLPARTS));
|
||
}
|
||
}
|
||
else
|
||
{
|
||
partsOut->pszScheme = partsBase->pszScheme;
|
||
partsOut->eScheme = partsBase->eScheme;
|
||
partsOut->dwFlags |= (partsBase->dwFlags & UPF_SCHEME_MASK);
|
||
}
|
||
}
|
||
|
||
PRIVATE VOID
|
||
BlendServer(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
|
||
{
|
||
ASSERT(partsUrl && partsBase && partsOut);
|
||
|
||
//
|
||
// if we have different hosts then everything but the pszAccess is DUMPED
|
||
//
|
||
if(partsUrl->pszServer)
|
||
{
|
||
partsOut->pszServer = partsUrl->pszServer;
|
||
// NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_SERVER_MASK);
|
||
|
||
if ((partsBase->pszServer && StrCmpW(partsUrl->pszServer, partsBase->pszServer)))
|
||
{
|
||
// they are different Servers. DUMP partsBase.
|
||
|
||
ZeroMemory(partsBase, SIZEOF(URLPARTS));
|
||
}
|
||
}
|
||
else
|
||
{
|
||
partsOut->pszServer = partsBase->pszServer;
|
||
// NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_SERVER_MASK);
|
||
}
|
||
}
|
||
|
||
PRIVATE VOID
|
||
BlendPath(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
|
||
{
|
||
ASSERT(partsUrl && partsBase && partsOut);
|
||
|
||
if (partsUrl->dwFlags & UPF_SEG_ABSOLUTE)
|
||
{
|
||
if((partsBase->dwFlags & UPF_SEG_LOCKFIRST) &&
|
||
!(partsUrl->dwFlags & UPF_SEG_LOCKFIRST))
|
||
{
|
||
// this keeps the drive letters when necessary
|
||
partsOut->pszSegments = partsBase->pszSegments;
|
||
partsOut->cSegments = 1; // only keep the first segment
|
||
partsOut->dwFlags |= (partsBase->dwFlags & UPF_SEG_MASK) ;
|
||
|
||
partsOut->pszExtraSegs = partsUrl->pszSegments;
|
||
partsOut->cExtraSegs = partsUrl->cSegments;
|
||
partsOut->dwFlags |= (partsUrl->dwFlags & UPF_EXSEG_MASK);
|
||
}
|
||
else
|
||
{
|
||
|
||
|
||
// just use the absolute path
|
||
|
||
partsOut->pszSegments = partsUrl->pszSegments;
|
||
partsOut->cSegments = partsUrl->cSegments;
|
||
partsOut->dwFlags |= (partsUrl->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) );
|
||
}
|
||
|
||
ZeroMemory(partsBase, SIZEOF(URLPARTS));
|
||
|
||
}
|
||
else if ((partsBase->dwFlags & UPF_SEG_ABSOLUTE))
|
||
{
|
||
// Adopt path not name
|
||
partsOut->pszSegments = partsBase->pszSegments;
|
||
partsOut->cSegments = partsBase->cSegments;
|
||
partsOut->dwFlags |= (partsBase->dwFlags & UPF_SEG_MASK );
|
||
|
||
if(partsUrl->cSegments || partsUrl->dwFlags & UPF_SEG_EMPTYSEG)
|
||
{
|
||
//
|
||
// this a relative path that needs to be combined
|
||
//
|
||
|
||
partsOut->pszExtraSegs = partsUrl->pszSegments;
|
||
partsOut->cExtraSegs = partsUrl->cSegments;
|
||
partsOut->dwFlags |= (partsUrl->dwFlags & UPF_EXSEG_MASK );
|
||
|
||
if (!(partsBase->dwFlags & UPF_EXSEG_DIRECTORY))
|
||
{
|
||
//
|
||
// knock off the file name segment
|
||
// as long as the it isnt the first or the first is not locked
|
||
// or it isnt a dotdot. in the case of http://site/dir/, dir/ is
|
||
// not actually killed, only the NULL terminator following it is.
|
||
//
|
||
LPWSTR pszLast = LastLiveSegment(partsOut->pszSegments, partsOut->cSegments, partsOut->dwFlags & UPF_SEG_LOCKFIRST);
|
||
|
||
if(pszLast && !IsDotDot(pszLast))
|
||
{
|
||
if(partsUrl->dwFlags & UPF_SEG_EMPTYSEG)
|
||
partsOut->dwFlags |= UPF_EXSEG_DIRECTORY;
|
||
|
||
KILLSEG(pszLast);
|
||
}
|
||
}
|
||
}
|
||
else
|
||
partsOut->dwFlags |= (partsBase->dwFlags & UPF_EXSEG_MASK);
|
||
}
|
||
else if (partsUrl->cSegments)
|
||
{
|
||
partsOut->pszSegments = partsUrl->pszSegments;
|
||
partsOut->cSegments = partsUrl->cSegments;
|
||
partsOut->dwFlags |= (partsUrl->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) );
|
||
}
|
||
else if (partsBase->cSegments)
|
||
{
|
||
partsOut->pszSegments = partsBase->pszSegments;
|
||
partsOut->cSegments = partsBase->cSegments;
|
||
partsOut->dwFlags |= (partsBase->dwFlags & (UPF_SEG_MASK |UPF_EXSEG_MASK) );
|
||
|
||
}
|
||
|
||
// regardless, we want to zero if we have relative segs
|
||
if (partsUrl->cSegments)
|
||
ZeroMemory(partsBase, SIZEOF(URLPARTS));
|
||
|
||
}
|
||
|
||
PRIVATE VOID
|
||
BlendQuery(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
|
||
{
|
||
if(partsUrl->pszQuery)
|
||
{
|
||
LPCWSTR pszQuery = partsOut->pszQuery = partsUrl->pszQuery;
|
||
|
||
// NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_Query_MASK);
|
||
|
||
if ((partsBase->pszQuery && StrCmpW(pszQuery, partsBase->pszQuery)))
|
||
{
|
||
// they are different Querys. DUMP partsBase.
|
||
|
||
ZeroMemory(partsBase, SIZEOF(URLPARTS));
|
||
}
|
||
}
|
||
else
|
||
{
|
||
partsOut->pszQuery = partsBase->pszQuery;
|
||
// NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_Query_MASK);
|
||
}
|
||
}
|
||
|
||
PRIVATE VOID
|
||
BlendFragment(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
|
||
{
|
||
if(partsUrl->pszFragment || partsUrl->cSegments)
|
||
{
|
||
LPCWSTR pszFragment = partsOut->pszFragment = partsUrl->pszFragment;
|
||
|
||
// NOTUSED partsOut->dwFlags |= (partsUrl->dwFlags & UPF_Fragment_MASK);
|
||
|
||
if ((partsBase->pszFragment && StrCmpW(pszFragment, partsBase->pszFragment)))
|
||
{
|
||
// they are different Fragments. DUMP partsBase.
|
||
|
||
ZeroMemory(partsBase, SIZEOF(URLPARTS));
|
||
}
|
||
}
|
||
else
|
||
{
|
||
partsOut->pszFragment = partsBase->pszFragment;
|
||
// NOTUSED partsOut->dwFlags |= (partsBase->dwFlags & UPF_Fragment_MASK);
|
||
}
|
||
}
|
||
|
||
PRIVATE VOID
|
||
BlendParts(PURLPARTS partsUrl, PURLPARTS partsBase, PURLPARTS partsOut)
|
||
{
|
||
//
|
||
// partsUrl always takes priority over partsBase
|
||
//
|
||
|
||
ASSERT(partsUrl && partsBase && partsOut);
|
||
|
||
ZeroMemory(partsOut, SIZEOF(URLPARTS));
|
||
|
||
BlendScheme( partsUrl, partsBase, partsOut);
|
||
BlendServer( partsUrl, partsBase, partsOut);
|
||
BlendPath( partsUrl, partsBase, partsOut);
|
||
BlendQuery( partsUrl, partsBase, partsOut);
|
||
BlendFragment( partsUrl, partsBase, partsOut);
|
||
|
||
}
|
||
|
||
PRIVATE VOID
|
||
CanonServer(PURLPARTS parts)
|
||
{
|
||
//
|
||
// we only do stuff if this server is an internet style
|
||
// server. that way it uses FQDNs and IP port numbers
|
||
//
|
||
if (parts->pszServer && (parts->dwFlags & UPF_SCHEME_INTERNET))
|
||
{
|
||
|
||
LPWSTR pszName = StrRChrW(parts->pszServer, NULL, L'@');
|
||
|
||
if(!pszName)
|
||
pszName = parts->pszServer;
|
||
|
||
// this should just point to the FQDN:Port
|
||
CharLowerW(pszName);
|
||
|
||
//
|
||
// Ignore default port numbers, and trailing dots on FQDNs
|
||
// which will only cause identical adresses to look different
|
||
//
|
||
{
|
||
WCHAR *pch = StrChrW(pszName, COLON);
|
||
|
||
if (pch && parts->eScheme)
|
||
{
|
||
BOOL fIgnorePort = FALSE;
|
||
|
||
//
|
||
// FEATURE we should actually be getting this from
|
||
// the services file to find out the default protocol port
|
||
// but we dont think that most people will change them - zekel 17-Dec-96
|
||
//
|
||
switch(parts->eScheme)
|
||
{
|
||
case URL_SCHEME_HTTP:
|
||
if(StrCmpW(pch, L":80") == 0)
|
||
fIgnorePort = TRUE;
|
||
break;
|
||
|
||
case URL_SCHEME_FTP:
|
||
if(StrCmpW(pch, L":21") == 0)
|
||
fIgnorePort = TRUE;
|
||
break;
|
||
|
||
case URL_SCHEME_GOPHER:
|
||
if(StrCmpW(pch, L":70") == 0)
|
||
fIgnorePort = TRUE;
|
||
break;
|
||
|
||
case URL_SCHEME_HTTPS:
|
||
if(StrCmpW(pch, L":443") == 0)
|
||
fIgnorePort = TRUE;
|
||
break;
|
||
|
||
default:
|
||
break;
|
||
}
|
||
if(fIgnorePort)
|
||
TERMSTR(pch); // It is the default: ignore it
|
||
}
|
||
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
PRIVATE VOID
|
||
CanonCombineSegs(PURLPARTS parts)
|
||
{
|
||
ASSERT(parts);
|
||
ASSERT(parts->pszExtraSegs && parts->cExtraSegs);
|
||
|
||
LPWSTR pszLast = LastLiveSegment(parts->pszSegments, parts->cSegments, parts->dwFlags & UPF_SEG_LOCKFIRST);
|
||
|
||
LPWSTR pszExtra = parts->pszExtraSegs;
|
||
DWORD iExtra = 0;
|
||
DWORD cExtras = parts->cExtraSegs;
|
||
|
||
if(!IsLiveSegment(pszExtra))
|
||
pszExtra = NextLiveSegment(pszExtra, &iExtra, cExtras);
|
||
|
||
while(pszExtra && IsDotDot(pszExtra))
|
||
{
|
||
if (pszLast)
|
||
KILLSEG(pszLast);
|
||
|
||
KILLSEG(pszExtra);
|
||
|
||
pszLast = LastLiveSegment(parts->pszSegments, parts->cSegments, parts->dwFlags & UPF_SEG_LOCKFIRST);
|
||
pszExtra = NextLiveSegment(pszExtra, &iExtra, cExtras);
|
||
}
|
||
}
|
||
|
||
PRIVATE VOID
|
||
CanonSegments(LPWSTR pszSeg,
|
||
DWORD cSegs,
|
||
BOOL fLockFirst)
|
||
|
||
{
|
||
DWORD iSeg = 0;
|
||
LPWSTR pszLastSeg = NULL;
|
||
LPWSTR pszFirstSeg = pszSeg;
|
||
BOOL fLastIsFirst = TRUE;
|
||
BOOL fFirstSeg = TRUE;
|
||
|
||
ASSERT (pszSeg && cSegs);
|
||
|
||
pszSeg = FirstLiveSegment(pszSeg, &iSeg, cSegs);
|
||
|
||
while (pszSeg)
|
||
{
|
||
if(IsDot(pszSeg))
|
||
{
|
||
// if it is just a "." we can discard the segment
|
||
KILLSEG(pszSeg);
|
||
}
|
||
|
||
else if(IsDotDot(pszSeg))
|
||
{
|
||
// if it is ".." then we discard it and the last seg
|
||
|
||
//
|
||
// if we are at the first (root) or
|
||
// the last is the root and it is locked
|
||
// then we dont want to do anything
|
||
//
|
||
if(pszLastSeg && !IsDotDot(pszLastSeg) && !(fLastIsFirst && fLockFirst))
|
||
{
|
||
KILLSEG(pszLastSeg);
|
||
pszLastSeg = NULL;
|
||
KILLSEG(pszSeg);
|
||
}
|
||
}
|
||
|
||
if(IsLiveSegment(pszSeg))
|
||
{
|
||
if(!pszLastSeg && fFirstSeg)
|
||
fLastIsFirst = TRUE;
|
||
else
|
||
fLastIsFirst = FALSE;
|
||
|
||
pszLastSeg = pszSeg;
|
||
fFirstSeg = FALSE;
|
||
}
|
||
else
|
||
{
|
||
pszLastSeg = LastLiveSegment(pszFirstSeg, iSeg, fLockFirst);
|
||
}
|
||
|
||
pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
|
||
|
||
}
|
||
}
|
||
|
||
PRIVATE VOID
|
||
CanonPath(PURLPARTS parts)
|
||
{
|
||
|
||
ASSERT(parts);
|
||
|
||
if(parts->cSegments)
|
||
CanonSegments(parts->pszSegments, parts->cSegments, (parts->dwFlags & UPF_SEG_LOCKFIRST));
|
||
|
||
if(parts->cExtraSegs)
|
||
CanonSegments(parts->pszExtraSegs, parts->cExtraSegs, FALSE);
|
||
|
||
if(parts->cExtraSegs)
|
||
CanonCombineSegs(parts);
|
||
}
|
||
|
||
|
||
PRIVATE VOID
|
||
CanonParts(PURLPARTS parts)
|
||
{
|
||
ASSERT(parts);
|
||
|
||
//CanonScheme(parts);
|
||
CanonServer(parts);
|
||
CanonPath(parts);
|
||
//CanonQuery(parts);
|
||
//CanonFragment(parts);
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
BuildScheme(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
|
||
ASSERT(parts && pstr);
|
||
|
||
if(parts->pszScheme)
|
||
{
|
||
hr = pstr->Append(parts->pszScheme);
|
||
if(SUCCEEDED(hr))
|
||
hr = pstr->Append(COLON);
|
||
}
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
BuildServer(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
|
||
ASSERT(parts && pstr);
|
||
|
||
switch(parts->eScheme)
|
||
{
|
||
case URL_SCHEME_MK:
|
||
// CraigC's "mk:" has no // but acts like it does
|
||
break;
|
||
|
||
case URL_SCHEME_FILE:
|
||
if ((dwFlags & URL_WININET_COMPATIBILITY) || (dwFlags & URL_FILE_USE_PATHURL))
|
||
{
|
||
if(parts->pszServer && *parts->pszServer)
|
||
hr = pstr->Append(L"////");
|
||
else if (parts->pszSegments && IsDrive(parts->pszSegments))
|
||
hr = pstr->Append(SLASH);
|
||
else if (parts->dwFlags & UPF_SEG_ABSOLUTE)
|
||
hr = pstr->Append(L"//");
|
||
}
|
||
else if (parts->dwFlags & UPF_SEG_ABSOLUTE)
|
||
hr = pstr->Append(L"//");
|
||
break;
|
||
|
||
default:
|
||
if(parts->pszServer && SUCCEEDED(hr))
|
||
hr = pstr->Append(L"//");
|
||
break;
|
||
}
|
||
|
||
if(parts->pszServer && SUCCEEDED(hr))
|
||
hr = pstr->Append(parts->pszServer);
|
||
|
||
return hr;
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
BuildSegments(LPWSTR pszSeg, DWORD cSegs, PSHSTRW pstr, BOOL fRoot, BOOL *pfSlashLast)
|
||
{
|
||
DWORD iSeg = 0;
|
||
HRESULT hr = S_FALSE;
|
||
|
||
*pfSlashLast = FALSE;
|
||
|
||
ASSERT(pszSeg && pstr);
|
||
|
||
pszSeg = FirstLiveSegment(pszSeg, &iSeg, cSegs);
|
||
|
||
if(!fRoot && pszSeg)
|
||
{
|
||
hr = pstr->Append(pszSeg);
|
||
|
||
if(SUCCEEDED(hr))
|
||
pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
|
||
else
|
||
pszSeg = NULL;
|
||
}
|
||
|
||
while (pszSeg)
|
||
{
|
||
hr = pstr->Append(SLASH);
|
||
if(SUCCEEDED(hr) && *pszSeg)
|
||
{
|
||
hr = pstr->Append(pszSeg);
|
||
*pfSlashLast = FALSE;
|
||
}
|
||
else
|
||
*pfSlashLast = TRUE;
|
||
|
||
if(SUCCEEDED(hr))
|
||
pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
|
||
else
|
||
break;
|
||
}
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
|
||
PRIVATE HRESULT
|
||
BuildPath(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
BOOL fSlashLast = FALSE;
|
||
DWORD iSeg;
|
||
LPWSTR pszSegFirst = NULL;
|
||
|
||
ASSERT(parts && pstr);
|
||
|
||
if(parts->cSegments)
|
||
{
|
||
hr = BuildSegments(parts->pszSegments, parts->cSegments, pstr, parts->dwFlags & UPF_SEG_ABSOLUTE, &fSlashLast);
|
||
|
||
if (fSlashLast)
|
||
pstr->Append(SLASH);
|
||
|
||
|
||
}
|
||
|
||
if(SUCCEEDED(hr) && parts->cExtraSegs)
|
||
{
|
||
BOOL f = fSlashLast;
|
||
|
||
hr = BuildSegments(parts->pszExtraSegs, parts->cExtraSegs, pstr, !fSlashLast, &fSlashLast);
|
||
|
||
if (fSlashLast)
|
||
pstr->Append(SLASH);
|
||
|
||
if (hr == S_FALSE)
|
||
fSlashLast = f;
|
||
|
||
}
|
||
|
||
// trailing slash on a server name for IIS
|
||
if( !fSlashLast &&
|
||
(
|
||
(parts->dwFlags & UPF_EXSEG_DIRECTORY) ||
|
||
// if this is just a server name by itself
|
||
(!FirstLiveSegment(parts->pszSegments, &iSeg, parts->cSegments) &&
|
||
!FirstLiveSegment(parts->pszExtraSegs, &iSeg, parts->cExtraSegs) &&
|
||
parts->dwFlags & UPF_SEG_ABSOLUTE)
|
||
)
|
||
)
|
||
{
|
||
hr = pstr->Append(SLASH);
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
|
||
PRIVATE HRESULT
|
||
BuildQuery(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
|
||
ASSERT(parts && pstr);
|
||
|
||
if(parts->pszQuery)
|
||
{
|
||
hr = pstr->Append(QUERY);
|
||
if(SUCCEEDED(hr))
|
||
hr = pstr->Append(parts->pszQuery);
|
||
}
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
BuildFragment(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
|
||
ASSERT(parts && pstr);
|
||
|
||
if(parts->pszFragment)
|
||
{
|
||
hr = pstr->Append(POUND);
|
||
if(SUCCEEDED(hr))
|
||
hr = pstr->Append(parts->pszFragment);
|
||
}
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
BuildUrl(PURLPARTS parts, DWORD dwFlags, PSHSTRW pstr)
|
||
{
|
||
HRESULT hr;
|
||
|
||
ASSERT(parts && pstr);
|
||
|
||
if(
|
||
(SUCCEEDED(hr = BuildScheme(parts, dwFlags, pstr))) &&
|
||
(SUCCEEDED(hr = BuildServer(parts, dwFlags, pstr))) &&
|
||
(SUCCEEDED(hr = BuildPath(parts, dwFlags, pstr))) &&
|
||
(SUCCEEDED(hr = BuildQuery(parts, dwFlags, pstr)))
|
||
)
|
||
hr = BuildFragment(parts, dwFlags, pstr);
|
||
|
||
return hr;
|
||
}
|
||
|
||
/*+++
|
||
|
||
SHUrlEscape()
|
||
Escapes an URL
|
||
right now, i am only escaping stuff in the Path part of the URL
|
||
|
||
Parameters
|
||
IN -
|
||
pszUrl URL to examine
|
||
pstrOut SHSTR destination
|
||
dwFlags the relevant URL_* flags,
|
||
|
||
Returns
|
||
HRESULT -
|
||
SUCCESS S_OK
|
||
ERROR only E_OUTOFMEMORY
|
||
|
||
|
||
Helper Routines
|
||
Escape*(part) each part gets its own escape routine (ie EscapeScheme)
|
||
EscapeSpaces will only escape spaces (WININET compatibility mostly)
|
||
EscapeSegmentsGetNeededSize gets the required size of destination buffer for all path segments
|
||
EscapeLiveSegment does the work of escaping each path segment
|
||
---*/
|
||
|
||
PRIVATE HRESULT
|
||
EscapeSpaces(LPCWSTR psz, PSHSTRW pstr, DWORD dwFlags)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
LPCWSTR pch;
|
||
DWORD cSpaces = 0;
|
||
|
||
|
||
ASSERT(psz && pstr);
|
||
|
||
|
||
pstr->Reset();
|
||
|
||
for (pch = psz; *pch; pch++)
|
||
{
|
||
if (*pch == SPC)
|
||
cSpaces++;
|
||
}
|
||
|
||
if(cSpaces)
|
||
{
|
||
hr = pstr->SetSize(lstrlenW(psz) + cSpaces * 2 + 1);
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
int cchRemaing = pstr->GetSize();
|
||
LPWSTR pchOut = pstr->GetInplaceStr();
|
||
|
||
for (pch = psz; *pch; pch++)
|
||
{
|
||
if ((*pch == POUND || *pch == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO))
|
||
{
|
||
int cchCopied;
|
||
|
||
StringCchCopyW(pchOut, cchRemaing, pch);
|
||
cchCopied = lstrlenW(pchOut);
|
||
pchOut += cchCopied;
|
||
cchRemaing -= cchCopied;
|
||
break;
|
||
}
|
||
|
||
if (*pch == SPC)
|
||
{
|
||
*pchOut++ = HEX_ESCAPE;
|
||
*pchOut++ = L'2';
|
||
*pchOut++ = L'0';
|
||
|
||
cchRemaing -= 3;
|
||
}
|
||
else
|
||
{
|
||
*pchOut++ = *pch;
|
||
cchRemaing--;
|
||
}
|
||
|
||
ASSERT(cchRemaing >= 0);
|
||
}
|
||
|
||
TERMSTR(pchOut);
|
||
}
|
||
|
||
}
|
||
else
|
||
{
|
||
hr = pstr->SetStr(psz);
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
inline PRIVATE HRESULT
|
||
EscapeScheme(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
|
||
{
|
||
ASSERT(partsUrl && partsOut);
|
||
|
||
partsOut->pszScheme = partsUrl->pszScheme;
|
||
partsOut->eScheme = partsUrl->eScheme;
|
||
|
||
return S_OK;
|
||
}
|
||
|
||
inline PRIVATE HRESULT
|
||
EscapeServer(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
|
||
{
|
||
ASSERT(partsUrl && partsOut);
|
||
|
||
partsOut->pszServer = partsUrl->pszServer;
|
||
|
||
return S_OK;
|
||
}
|
||
|
||
inline PRIVATE HRESULT
|
||
EscapeQuery(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
|
||
{
|
||
ASSERT(partsUrl && partsOut);
|
||
|
||
partsOut->pszQuery = partsUrl->pszQuery;
|
||
|
||
return S_OK;
|
||
}
|
||
|
||
inline PRIVATE HRESULT
|
||
EscapeFragment(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
|
||
{
|
||
ASSERT(partsUrl && partsOut);
|
||
|
||
partsOut->pszFragment = partsUrl->pszFragment;
|
||
|
||
return S_OK;
|
||
}
|
||
|
||
PRIVATE BOOL
|
||
GetEscapeStringSize(LPWSTR psz, DWORD dwFlags, LPDWORD pcch)
|
||
|
||
{
|
||
BOOL fResize = FALSE;
|
||
ASSERT(psz);
|
||
ASSERT(pcch);
|
||
|
||
|
||
for (*pcch = 0; *psz; psz++)
|
||
{
|
||
(*pcch)++;
|
||
|
||
if(!IsSafePathChar(*psz) ||
|
||
((dwFlags & URL_ESCAPE_PERCENT) && (*psz == HEX_ESCAPE)))
|
||
{
|
||
fResize = TRUE;
|
||
*pcch += 2;
|
||
}
|
||
|
||
}
|
||
|
||
// for the NULL term
|
||
(*pcch)++;
|
||
|
||
return fResize;
|
||
}
|
||
|
||
PRIVATE DWORD
|
||
EscapeSegmentsGetNeededSize(LPWSTR pszSegments, DWORD cSegs, DWORD dwFlags)
|
||
{
|
||
DWORD cchNeeded = 0;
|
||
BOOL fResize = FALSE;
|
||
LPWSTR pszSeg;
|
||
DWORD iSeg;
|
||
|
||
ASSERT(pszSegments && cSegs);
|
||
|
||
pszSeg = FirstLiveSegment(pszSegments, &iSeg, cSegs);
|
||
|
||
while (IsLiveSegment(pszSeg))
|
||
{
|
||
DWORD cch;
|
||
|
||
if(GetEscapeStringSize(pszSeg, dwFlags, &cch))
|
||
fResize = TRUE;
|
||
cchNeeded += cch;
|
||
|
||
pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
|
||
}
|
||
|
||
return fResize ? cchNeeded : 0;
|
||
}
|
||
|
||
PRIVATE VOID
|
||
EscapeString(LPCWSTR pszSeg, DWORD dwFlags, LPWSTR *ppchOut)
|
||
{
|
||
LPWSTR pchIn; // This pointer has been trusted to not modify it's contents, just iterate.
|
||
LPWSTR pchOut = *ppchOut;
|
||
WCHAR ch;
|
||
|
||
for (pchIn = (LPWSTR)pszSeg; *pchIn; pchIn++)
|
||
{
|
||
ch = *pchIn;
|
||
|
||
if (!IsSafePathChar(ch) ||
|
||
((dwFlags & URL_ESCAPE_PERCENT) && (ch == HEX_ESCAPE)))
|
||
{
|
||
*pchOut++ = HEX_ESCAPE;
|
||
*pchOut++ = hex[(ch >> 4) & 15];
|
||
*pchOut++ = hex[ch & 15];
|
||
|
||
}
|
||
else
|
||
*pchOut++ = *pchIn;
|
||
}
|
||
|
||
TERMSTR(pchOut);
|
||
|
||
// move past the terminator
|
||
pchOut++;
|
||
|
||
*ppchOut = pchOut;
|
||
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
EscapeSegments(LPWSTR pszSegments, DWORD cSegs, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
|
||
{
|
||
DWORD cchNeeded;
|
||
|
||
HRESULT hr = S_OK;
|
||
|
||
ASSERT(pszSegments && cSegs && partsOut && pstr);
|
||
|
||
cchNeeded = EscapeSegmentsGetNeededSize(pszSegments, cSegs, dwFlags);
|
||
|
||
if(cchNeeded)
|
||
{
|
||
ASSERT(pstr);
|
||
|
||
hr = pstr->SetSize(cchNeeded);
|
||
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
LPWSTR pchOut = pstr->GetInplaceStr();
|
||
LPWSTR pszSeg;
|
||
DWORD iSeg;
|
||
|
||
partsOut->pszSegments = pchOut;
|
||
partsOut->cSegments = 0;
|
||
|
||
pszSeg = FirstLiveSegment(pszSegments, &iSeg, cSegs);
|
||
|
||
while (IsLiveSegment(pszSeg))
|
||
{
|
||
EscapeString(pszSeg, dwFlags, &pchOut);
|
||
partsOut->cSegments++;
|
||
|
||
pszSeg = NextLiveSegment(pszSeg, &iSeg, cSegs);
|
||
}
|
||
|
||
|
||
}
|
||
|
||
}
|
||
else
|
||
{
|
||
partsOut->cSegments = cSegs;
|
||
partsOut->pszSegments = pszSegments;
|
||
}
|
||
|
||
|
||
return hr;
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
EscapePath(PURLPARTS partsUrl, DWORD dwFlags, PURLPARTS partsOut, PSHSTRW pstr)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
|
||
ASSERT(partsUrl && partsOut && pstr);
|
||
|
||
if(partsUrl->cSegments)
|
||
{
|
||
hr = EscapeSegments(partsUrl->pszSegments, partsUrl->cSegments, dwFlags, partsOut, pstr);
|
||
|
||
}
|
||
else
|
||
{
|
||
partsOut->cSegments = 0;
|
||
partsOut->pszSegments = NULL;
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
HRESULT
|
||
SHUrlEscape (LPCWSTR pszUrl,
|
||
PSHSTRW pstrOut,
|
||
DWORD dwFlags)
|
||
{
|
||
#ifdef TESTING_SPACES_ONLY
|
||
return EscapeSpaces(pszUrl, pstrOut, dwFlags);
|
||
#else //TESTING_SPACES_ONLY
|
||
|
||
SHSTRW strUrl;
|
||
HRESULT hr;
|
||
|
||
ASSERT(pszUrl && pstrOut);
|
||
if(!pszUrl || !pstrOut)
|
||
return E_INVALIDARG;
|
||
|
||
//
|
||
// EscapeSpaces is remarkably poor,
|
||
// but so is this kind of functionality...
|
||
// it doesnt do any kind of real parsing, it
|
||
// only looks for spaces and escapes them...
|
||
//
|
||
if(dwFlags & URL_ESCAPE_SPACES_ONLY)
|
||
return EscapeSpaces(pszUrl, pstrOut, dwFlags);
|
||
|
||
// We are just passed a segment so we only want to
|
||
// escape that and nothing else. Don't look for
|
||
// URL pieces.
|
||
if(dwFlags & URL_ESCAPE_SEGMENT_ONLY)
|
||
{
|
||
URLPARTS partsOut;
|
||
SHSTRW strTemp;
|
||
|
||
EscapeSegments((LPWSTR)pszUrl, 1, dwFlags, &partsOut, &strTemp);
|
||
pstrOut->SetStr(partsOut.pszSegments);
|
||
return S_OK;
|
||
}
|
||
|
||
pstrOut->Reset();
|
||
|
||
hr = strUrl.SetStr(pszUrl);
|
||
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
URLPARTS partsUrl, partsOut;
|
||
SHSTRW strPath;
|
||
|
||
BreakUrl(strUrl.GetInplaceStr(), &partsUrl);
|
||
|
||
ZeroMemory(&partsOut, SIZEOF(URLPARTS));
|
||
//
|
||
// NOTE the only function here that is really active right now is the EscapePath
|
||
// if some other part needs to be escaped, then add a new SHSTR in the 4th param
|
||
// and change the appropriate subroutine
|
||
//
|
||
|
||
if(
|
||
(SUCCEEDED(hr = EscapeScheme(&partsUrl, dwFlags, &partsOut, NULL)))
|
||
&& (SUCCEEDED(hr = EscapeServer(&partsUrl, dwFlags, &partsOut, NULL)))
|
||
&& (SUCCEEDED(hr = EscapePath(&partsUrl, dwFlags, &partsOut, &strPath)))
|
||
&& (SUCCEEDED(hr = EscapeQuery(&partsUrl, dwFlags, &partsOut, NULL)))
|
||
&& (SUCCEEDED(hr = EscapeFragment(&partsUrl, dwFlags, &partsOut, NULL)))
|
||
)
|
||
{
|
||
partsOut.dwFlags = partsUrl.dwFlags;
|
||
|
||
hr = BuildUrl(&partsOut, dwFlags, pstrOut);
|
||
}
|
||
}
|
||
else
|
||
hr = E_OUTOFMEMORY;
|
||
|
||
return hr;
|
||
#endif //TESTING_SPACES_ONLY
|
||
}
|
||
|
||
|
||
/*+++
|
||
|
||
SHUrlUnescape()
|
||
Unescapes a string in place. this is ok because
|
||
it should never grow
|
||
|
||
Parameters
|
||
IN -
|
||
psz string to unescape inplace
|
||
dwFlags the relevant URL_* flags,
|
||
|
||
Returns
|
||
HRESULT -
|
||
SUCCESS S_OK
|
||
ERROR DOESNT error right now
|
||
|
||
|
||
Helper Routines
|
||
HexToWord takes a hexdigit and returns WORD with the right number or -1
|
||
IsEscapedChar looks at a ptr for "%XX" where X is a hexdigit
|
||
TranslateEscapedChar translates "%XX" to an 8 bit char
|
||
---*/
|
||
|
||
PRIVATE WORD
|
||
HexToWord(WCHAR ch)
|
||
{
|
||
if(ch >= TEXT('0') && ch <= TEXT('9'))
|
||
return (WORD) ch - TEXT('0');
|
||
if(ch >= TEXT('A') && ch <= TEXT('F'))
|
||
return (WORD) ch - TEXT('A') + 10;
|
||
if(ch >= TEXT('a') && ch <= TEXT('f'))
|
||
return (WORD) ch - TEXT('a') + 10;
|
||
|
||
ASSERT(FALSE); //we have tried to use a non-hex number
|
||
return (WORD) -1;
|
||
}
|
||
|
||
PRIVATE BOOL inline
|
||
IsEscapedOctetW(LPCWSTR pch)
|
||
{
|
||
return (pch[0] == HEX_ESCAPE && IsHex(pch[1]) && IsHex(pch[2])) ? TRUE : FALSE;
|
||
}
|
||
|
||
PRIVATE BOOL inline
|
||
IsEscapedOctetA(LPCSTR pch)
|
||
{
|
||
return (pch[0] == HEX_ESCAPE_A && IsHex((WCHAR)pch[1]) && IsHex((WCHAR)pch[2])) ? TRUE : FALSE;
|
||
}
|
||
|
||
PRIVATE WCHAR
|
||
TranslateEscapedOctetW(LPCWSTR pch)
|
||
{
|
||
WCHAR ch;
|
||
ASSERT(IsEscapedOctetW(pch));
|
||
|
||
pch++;
|
||
ch = (WCHAR) HexToWord(*pch++) * 16; // hi nibble
|
||
ch += HexToWord(*pch); // lo nibble
|
||
|
||
return ch;
|
||
}
|
||
|
||
PRIVATE CHAR
|
||
TranslateEscapedOctetA(LPCSTR pch)
|
||
{
|
||
CHAR ch;
|
||
ASSERT(IsEscapedOctetA(pch));
|
||
|
||
pch++;
|
||
ch = (CHAR) HexToWord(*pch++) * 16; // hi nibble
|
||
ch += HexToWord(*pch); // lo nibble
|
||
|
||
return ch;
|
||
}
|
||
|
||
|
||
HRESULT SHUrlUnescapeA(LPSTR psz, DWORD dwFlags)
|
||
{
|
||
CHAR *pchSrc = psz;
|
||
CHAR *pchDst = psz;
|
||
|
||
while (*pchSrc)
|
||
{
|
||
if ((*pchSrc == POUND || *pchSrc == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO))
|
||
{
|
||
while (*pchDst++ = *pchSrc++) {};
|
||
break;
|
||
}
|
||
|
||
if (IsEscapedOctetA(pchSrc))
|
||
{
|
||
CHAR ch = TranslateEscapedOctetA(pchSrc);
|
||
|
||
*pchDst++ = ch;
|
||
|
||
pchSrc += 3; // enuff for "%XX"
|
||
}
|
||
else
|
||
{
|
||
*pchDst++ = *pchSrc++;
|
||
}
|
||
}
|
||
|
||
TERMSTR(pchDst);
|
||
|
||
return S_OK;
|
||
}
|
||
|
||
|
||
HRESULT SHUrlUnescapeW(LPWSTR psz, DWORD dwFlags)
|
||
{
|
||
WCHAR *pchSrc = psz;
|
||
WCHAR *pchDst = psz;
|
||
|
||
while (*pchSrc)
|
||
{
|
||
if ((*pchSrc == POUND || *pchSrc == QUERY) && (dwFlags & URL_DONT_ESCAPE_EXTRA_INFO))
|
||
{
|
||
while (*pchDst++ = *pchSrc++) {};
|
||
break;
|
||
}
|
||
|
||
if (IsEscapedOctetW(pchSrc))
|
||
{
|
||
WCHAR ch = TranslateEscapedOctetW(pchSrc);
|
||
|
||
*pchDst++ = ch;
|
||
|
||
pchSrc += 3; // enuff for "%XX"
|
||
}
|
||
else
|
||
{
|
||
*pchDst++ = *pchSrc++;
|
||
}
|
||
}
|
||
|
||
TERMSTR(pchDst);
|
||
|
||
return S_OK;
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
BuildDosPath(PURLPARTS parts, PSHSTRW pstrOut, DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
// this will disable a preceding slash when there is a drive
|
||
if(parts->pszSegments && IsDrive(parts->pszSegments))
|
||
parts->dwFlags = (parts->dwFlags & ~UPF_SEG_ABSOLUTE);
|
||
|
||
|
||
// if there is a zero length server then
|
||
// we skip building it
|
||
if(parts->pszServer && !*parts->pszServer)
|
||
parts->pszServer = NULL;
|
||
|
||
|
||
// this prevents all the special file goo checking
|
||
parts->eScheme = URL_SCHEME_UNKNOWN;
|
||
|
||
//
|
||
// then go ahead and put the path together
|
||
if( (SUCCEEDED(hr = BuildServer(parts, dwFlags, pstrOut))) &&
|
||
(!parts->cSegments || SUCCEEDED(hr = BuildPath(parts, dwFlags, pstrOut)))
|
||
)
|
||
{
|
||
// then decode it cuz paths arent escaped
|
||
if (IsFlagSet(dwFlags, URL_FILE_USE_PATHURL))
|
||
WininetFixFileSlashes(pstrOut->GetInplaceStr());
|
||
else
|
||
#ifndef UNIX
|
||
ConvertChar(pstrOut->GetInplaceStr(), SLASH, WHACK, TRUE);
|
||
#else
|
||
ConvertChar(pstrOut->GetInplaceStr(), WHACK, SLASH, TRUE);
|
||
#endif
|
||
|
||
if(IsFlagClear(parts->dwFlags, UPF_FILEISPATHURL))
|
||
SHUrlUnescapeW(pstrOut->GetInplaceStr(), dwFlags);
|
||
|
||
if(IsDriveUrl(*pstrOut))
|
||
{
|
||
LPWSTR pszTemp = pstrOut->GetInplaceStr();
|
||
|
||
pszTemp[1] = COLON;
|
||
}
|
||
}
|
||
|
||
return hr;
|
||
|
||
}
|
||
HRESULT
|
||
SHPathCreateFromUrl(LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRW strUrl;
|
||
|
||
ASSERT(pszUrl && pstrOut);
|
||
|
||
pstrOut->Reset();
|
||
hr = strUrl.SetStr(pszUrl);
|
||
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
URLPARTS partsUrl;
|
||
|
||
// first we need to break it open
|
||
BreakUrl(strUrl.GetInplaceStr(), &partsUrl);
|
||
|
||
// then we make sure it is a file:
|
||
if(partsUrl.eScheme == URL_SCHEME_FILE)
|
||
{
|
||
hr = BuildDosPath(&partsUrl, pstrOut, dwFlags);
|
||
}
|
||
else
|
||
hr = E_INVALIDARG;
|
||
}
|
||
return hr;
|
||
}
|
||
|
||
|
||
HRESULT
|
||
SHUrlCreateFromPath(LPCWSTR pszPath, PSHSTRW pstrOut, DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRW strPath;
|
||
ASSERT(pszPath && pstrOut);
|
||
|
||
if(PathIsURLW(pszPath))
|
||
{
|
||
if(SUCCEEDED(hr = pstrOut->SetStr(pszPath)))
|
||
return S_FALSE;
|
||
else
|
||
return hr;
|
||
}
|
||
|
||
|
||
pstrOut->Reset();
|
||
hr = strPath.SetStr(pszPath);
|
||
|
||
TrimAndStripInsignificantWhite(strPath.GetInplaceStr());
|
||
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
URLPARTS partsIn, partsOut;
|
||
SHSTRW strEscapedPath, strEscapedServer;
|
||
LPWSTR pch = strPath.GetInplaceStr();
|
||
|
||
ZeroMemory(&partsIn, SIZEOF(URLPARTS));
|
||
|
||
partsIn.pszScheme = (LPWSTR)c_szFileScheme;
|
||
partsIn.eScheme = URL_SCHEME_FILE;
|
||
partsIn.dwFlags = UPF_SCHEME_CONVERT;
|
||
|
||
// first break the path
|
||
BreakFragment(&pch, &partsIn);
|
||
BreakServer(&pch, &partsIn, TRUE);
|
||
BreakPath(&pch, &partsIn);
|
||
|
||
partsOut = partsIn;
|
||
|
||
// then escape the path if we arent using path URLs
|
||
if (IsFlagClear(dwFlags, URL_FILE_USE_PATHURL))
|
||
{
|
||
hr = EscapePath(&partsIn, dwFlags | URL_ESCAPE_PERCENT, &partsOut, &strEscapedPath);
|
||
|
||
if(SUCCEEDED(hr) && partsOut.pszServer)
|
||
{
|
||
//
|
||
// i am treating the pszServer exactly like a path segment
|
||
//
|
||
|
||
DWORD cchNeeded;
|
||
|
||
if(GetEscapeStringSize(partsOut.pszServer, dwFlags | URL_ESCAPE_PERCENT, &cchNeeded) &&
|
||
SUCCEEDED(hr = strEscapedServer.SetSize(cchNeeded)))
|
||
{
|
||
pch = strEscapedServer.GetInplaceStr();
|
||
|
||
EscapeString(partsOut.pszServer, dwFlags | URL_ESCAPE_PERCENT, &pch);
|
||
partsOut.pszServer = strEscapedServer.GetInplaceStr();
|
||
}
|
||
}
|
||
}
|
||
|
||
if(!partsOut.pszServer && IsFlagSet(partsOut.dwFlags, UPF_SEG_ABSOLUTE))
|
||
partsOut.pszServer = L"";
|
||
|
||
// then build the URL
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
if(URL_SCHEME_FILE == partsOut.eScheme && IsFlagSet(dwFlags, URL_FILE_USE_PATHURL))
|
||
{
|
||
if (SUCCEEDED(hr = pstrOut->SetStr(c_szFileSchemeString)))
|
||
hr = BuildDosPath(&partsOut, pstrOut, dwFlags);
|
||
}
|
||
else
|
||
hr = BuildUrl(&partsOut, dwFlags, pstrOut);
|
||
}
|
||
|
||
if (SUCCEEDED(hr) && (IsFlagSet(dwFlags, URL_WININET_COMPATIBILITY)))
|
||
WininetFixFileSlashes(pstrOut->GetInplaceStr());
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
/*+++
|
||
|
||
SHUrlParse()
|
||
Canonicalize an URL
|
||
or Combine and Canonicalize two URLs
|
||
|
||
Parameters
|
||
IN -
|
||
pszBase the base or referring URL, may be NULL
|
||
pszUrl the relative URL
|
||
dwFlags the relevant URL_* flags,
|
||
|
||
Returns
|
||
HRESULT -
|
||
SUCCESS S_OK
|
||
ERROR appropriate error, usually just E_OUTOFMEMORY;
|
||
|
||
NOTE: pszUrl will always take precedence over pszBase.
|
||
|
||
---*/
|
||
HRESULT SHUrlParse(LPCWSTR pszBase, LPCWSTR pszUrl, PSHSTRW pstrOut, DWORD dwFlags)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
URLPARTS partsUrl, partsOut, partsBase;
|
||
|
||
SHSTRW strBase;
|
||
SHSTRW strUrl;
|
||
ASSERT(pszUrl);
|
||
ASSERT(pstrOut);
|
||
|
||
TraceMsgW(TF_URL | TF_FUNC, "entering SHUrlParse(%s, %s, 0x%X", pszBase,pszUrl ? pszUrl : L"NULL", dwFlags);
|
||
|
||
pstrOut->Reset();
|
||
|
||
//
|
||
// Don't bother parsing if all we have in an inter-page link as the
|
||
// pszUrl and no pszBase to parse
|
||
//
|
||
|
||
if (pszUrl[0] == POUND && (!pszBase || !*pszBase))
|
||
{
|
||
hr = pstrOut->SetStr(pszUrl);
|
||
|
||
goto quit;
|
||
}
|
||
|
||
|
||
//
|
||
// for Perf reasons we want to parse the relative url first.
|
||
// if it is an absolute URL, we need never look at the base.
|
||
//
|
||
|
||
hr = CopyUrlForParse(pszUrl, &strUrl, dwFlags);
|
||
|
||
if(FAILED(hr))
|
||
goto quit;
|
||
|
||
// -- Cybersitter compat ----
|
||
// Some bug fix broke the original parser. No time to go back and
|
||
// fix it, but since we know what to expect, we'll return this straight instead.
|
||
// Basically, when we canonicalize ://, we produce :///
|
||
if (!StrCmpW(strUrl, L"://"))
|
||
{
|
||
hr = pstrOut->SetStr(L":///");
|
||
goto quit;
|
||
}
|
||
|
||
//
|
||
// BreakUrls will decide if it is necessary to look at the relative
|
||
//
|
||
hr = BreakUrls(strUrl.GetInplaceStr(), &partsUrl, pszBase, &strBase, &partsBase, dwFlags);
|
||
|
||
if(FAILED(hr))
|
||
goto quit;
|
||
|
||
if(S_OK == hr) {
|
||
//
|
||
// this is where the real combination logic happens
|
||
// this first parts is the one that takes precedence
|
||
//
|
||
BlendParts(&partsUrl, &partsBase, &partsOut);
|
||
}
|
||
else
|
||
partsOut = partsUrl;
|
||
|
||
|
||
//
|
||
// we will now do the work of putting it together
|
||
// if these fail, it is because we are out of memory.
|
||
//
|
||
|
||
if (!(dwFlags & URL_DONT_SIMPLIFY))
|
||
CanonParts(&partsOut);
|
||
|
||
if(URL_SCHEME_FILE == partsOut.eScheme && IsFlagSet(dwFlags, URL_FILE_USE_PATHURL))
|
||
{
|
||
if (SUCCEEDED(hr = pstrOut->SetStr(c_szFileSchemeString)))
|
||
hr = BuildDosPath(&partsOut, pstrOut, dwFlags);
|
||
}
|
||
else
|
||
hr = BuildUrl(&partsOut, dwFlags, pstrOut);
|
||
|
||
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
if (dwFlags & URL_UNESCAPE)
|
||
SHUrlUnescapeW(pstrOut->GetInplaceStr(), dwFlags);
|
||
|
||
if (dwFlags & URL_ESCAPE_SPACES_ONLY || dwFlags & URL_ESCAPE_UNSAFE)
|
||
{
|
||
//
|
||
// we are going to reuse strUrl here
|
||
//
|
||
hr = strUrl.SetStr(*pstrOut);
|
||
|
||
if(SUCCEEDED(hr))
|
||
hr = SHUrlEscape(strUrl, pstrOut, dwFlags);
|
||
}
|
||
}
|
||
|
||
if (SUCCEEDED(hr) &&
|
||
(IsFlagSet(dwFlags, URL_WININET_COMPATIBILITY)) &&
|
||
(partsOut.eScheme == URL_SCHEME_FILE))
|
||
WininetFixFileSlashes(pstrOut->GetInplaceStr());
|
||
|
||
|
||
quit:
|
||
|
||
|
||
if(FAILED(hr))
|
||
{
|
||
pstrOut->Reset();
|
||
TraceMsg(TF_URL | TF_FUNC, TEXT("FAILED SHUrlParse() hr = 0x%X\n"), hr);
|
||
}
|
||
else
|
||
TraceMsgW(TF_URL | TF_FUNC, "SUCCEEDED SHUrlParse() %s\n", (LPCWSTR)*pstrOut);
|
||
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
typedef struct _LOGON {
|
||
LPWSTR pszUser;
|
||
LPWSTR pszPass;
|
||
LPWSTR pszHost;
|
||
LPWSTR pszPort;
|
||
} LOGON, *PLOGON;
|
||
|
||
PRIVATE void
|
||
BreakLogon(LPWSTR psz, PLOGON plo)
|
||
{
|
||
ASSERT(psz);
|
||
ASSERT(plo);
|
||
|
||
WCHAR *pch = StrChrW(psz, L'@');
|
||
if(pch)
|
||
{
|
||
TERMSTR(pch);
|
||
plo->pszHost = pch + 1;
|
||
|
||
plo->pszUser = psz;
|
||
pch = StrChrW(psz, COLON);
|
||
if (pch)
|
||
{
|
||
TERMSTR(pch);
|
||
plo->pszPass = pch + 1;
|
||
}
|
||
}
|
||
else
|
||
plo->pszHost = psz;
|
||
|
||
pch = StrChrW(plo->pszHost, COLON);
|
||
if (pch)
|
||
{
|
||
TERMSTR(pch);
|
||
plo->pszPort = pch + 1;
|
||
}
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
InternetGetPart(DWORD dwPart, PURLPARTS parts, PSHSTRW pstr, DWORD dwFlags)
|
||
{
|
||
HRESULT hr = E_FAIL;
|
||
|
||
if(parts->pszServer)
|
||
{
|
||
LOGON lo = {0};
|
||
|
||
BreakLogon(parts->pszServer, &lo);
|
||
|
||
switch (dwPart)
|
||
{
|
||
case URL_PART_HOSTNAME:
|
||
hr = pstr->Append(lo.pszHost);
|
||
break;
|
||
|
||
case URL_PART_USERNAME:
|
||
hr = pstr->Append(lo.pszUser);
|
||
break;
|
||
|
||
case URL_PART_PASSWORD:
|
||
hr = pstr->Append(lo.pszPass);
|
||
break;
|
||
|
||
case URL_PART_PORT:
|
||
hr = pstr->Append(lo.pszPort);
|
||
break;
|
||
|
||
default:
|
||
ASSERT(FALSE);
|
||
}
|
||
}
|
||
return hr;
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
SHUrlGetPart(PSHSTRW pstrIn, PSHSTRW pstrOut, DWORD dwPart, DWORD dwFlags)
|
||
{
|
||
ASSERT(pstrIn);
|
||
ASSERT(pstrOut);
|
||
ASSERT(dwPart);
|
||
|
||
HRESULT hr = S_OK;
|
||
|
||
URLPARTS parts;
|
||
|
||
BreakUrl(pstrIn->GetInplaceStr(), &parts);
|
||
|
||
if(dwFlags & URL_PARTFLAG_KEEPSCHEME)
|
||
{
|
||
hr = pstrOut->SetStr(parts.pszScheme);
|
||
if(SUCCEEDED(hr))
|
||
hr = pstrOut->Append(COLON);
|
||
}
|
||
else
|
||
pstrOut->Reset();
|
||
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
switch (dwPart)
|
||
{
|
||
case URL_PART_SCHEME:
|
||
hr = pstrOut->SetStr(parts.pszScheme);
|
||
break;
|
||
|
||
case URL_PART_HOSTNAME:
|
||
if (parts.eScheme == URL_SCHEME_FILE)
|
||
{
|
||
hr = pstrOut->SetStr(parts.pszServer);
|
||
break;
|
||
}
|
||
// else fall through
|
||
case URL_PART_USERNAME:
|
||
case URL_PART_PASSWORD:
|
||
case URL_PART_PORT:
|
||
if(parts.dwFlags & UPF_SCHEME_INTERNET)
|
||
{
|
||
hr = InternetGetPart(dwPart, &parts, pstrOut, dwFlags);
|
||
}
|
||
else
|
||
hr = E_FAIL;
|
||
break;
|
||
|
||
case URL_PART_QUERY:
|
||
hr = pstrOut->SetStr(parts.pszQuery);
|
||
break;
|
||
|
||
default:
|
||
ASSERT(FALSE);
|
||
hr = E_UNEXPECTED;
|
||
}
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
#define c_szURLPrefixesKey "Software\\Microsoft\\Windows\\CurrentVersion\\URL\\Prefixes"
|
||
const WCHAR c_szDefaultURLPrefixKey[] = L"Software\\Microsoft\\Windows\\CurrentVersion\\URL\\DefaultPrefix";
|
||
|
||
PRIVATE inline LPCWSTR SkipLeadingSlashes(LPCWSTR psz)
|
||
{
|
||
// Skip two leading slashes.
|
||
|
||
if (psz[0] == SLASH && psz[1] == SLASH)
|
||
psz += 2;
|
||
|
||
return psz;
|
||
}
|
||
|
||
PRIVATE HRESULT
|
||
UrlGuessScheme(LPCWSTR pszUrl, PSHSTRW pstr)
|
||
{
|
||
HRESULT hr = S_FALSE;
|
||
|
||
ASSERT(pszUrl && pstr);
|
||
|
||
HKEY hkeyPrefixes;
|
||
|
||
if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, c_szURLPrefixesKey, 0, KEY_QUERY_VALUE, &hkeyPrefixes)
|
||
== ERROR_SUCCESS)
|
||
{
|
||
DWORD dwiValue;
|
||
CHAR rgchValueName[MAX_PATH];
|
||
DWORD cchValueName = SIZECHARS(rgchValueName);
|
||
DWORD dwType;
|
||
CHAR rgchPrefix[MAX_PATH];
|
||
DWORD cbPrefix = SIZEOF(rgchPrefix);
|
||
|
||
// need to get past the initial two slashes if applicable
|
||
pszUrl = SkipLeadingSlashes(pszUrl);
|
||
|
||
for (dwiValue = 0;
|
||
RegEnumValueA(hkeyPrefixes, dwiValue, rgchValueName,
|
||
&cchValueName, NULL, &dwType, (PBYTE)rgchPrefix,
|
||
&cbPrefix) == ERROR_SUCCESS;
|
||
dwiValue++)
|
||
{
|
||
WCHAR wszValue[MAX_PATH];
|
||
|
||
MultiByteToWideChar(CP_ACP, 0, rgchValueName, -1, wszValue, ARRAYSIZE(wszValue));
|
||
|
||
// we check to make sure that we match and there is something more
|
||
if (!StrCmpNIW(pszUrl, wszValue, cchValueName) && pszUrl[cchValueName])
|
||
{
|
||
MultiByteToWideChar(CP_ACP, 0, rgchPrefix, -1, wszValue, ARRAYSIZE(wszValue));
|
||
if(SUCCEEDED(hr = pstr->SetStr(wszValue)))
|
||
hr = pstr->Append(pszUrl);
|
||
break;
|
||
}
|
||
|
||
cchValueName = SIZECHARS(rgchValueName);
|
||
cbPrefix = SIZEOF(rgchPrefix);
|
||
}
|
||
|
||
RegCloseKey(hkeyPrefixes);
|
||
}
|
||
|
||
return(hr);
|
||
}
|
||
|
||
/*----------------------------------------------------------
|
||
Purpose: Grabs the default URL prefix in the registry and applies
|
||
it to the given URL.
|
||
|
||
Returns: S_OK
|
||
S_FALSE if there is no default prefix
|
||
|
||
*/
|
||
const WCHAR c_szDefaultScheme[] = L"http://";
|
||
|
||
HRESULT
|
||
UrlApplyDefaultScheme(
|
||
LPCWSTR pszUrl,
|
||
PSHSTRW pstr)
|
||
{
|
||
HRESULT hr = S_FALSE;
|
||
WCHAR szDef[MAX_PATH];
|
||
DWORD cbSize = SIZEOF(szDef);
|
||
|
||
ASSERT(pszUrl && pstr);
|
||
ASSERT(!PathIsURLW(pszUrl));
|
||
|
||
DWORD dwType;
|
||
if (NO_ERROR == SHRegGetUSValueW(c_szDefaultURLPrefixKey, NULL, &dwType, (LPVOID)szDef, &cbSize, TRUE, (LPVOID)c_szDefaultScheme, SIZEOF(c_szDefaultScheme)))
|
||
{
|
||
pszUrl = SkipLeadingSlashes(pszUrl);
|
||
|
||
if(SUCCEEDED(hr = pstr->SetStr(szDef)))
|
||
hr = pstr->Append(pszUrl);
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
/*----------------------------------------------------------
|
||
Purpose: Guesses a URL protocol based upon a list in the registry,
|
||
compared to the first few characters of the given
|
||
URL suffix.
|
||
|
||
Returns: S_OK if a URL protocol is determined
|
||
S_FALSE if there were no problems but no prefix was prepended
|
||
|
||
*/
|
||
HRESULT
|
||
SHUrlApplyScheme(
|
||
LPCWSTR pszUrl,
|
||
PSHSTRW pstrOut,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr = S_FALSE;
|
||
|
||
ASSERT(IS_VALID_STRING_PTRW(pszUrl, -1));
|
||
|
||
//
|
||
// if there is already scheme there, we do nothing
|
||
// unless the caller insists. this is to support
|
||
// a string that looks like www.foo.com:8001.
|
||
// this is a site that needs to be guessed at but
|
||
// it also could be a valid scheme since '.' and '-'
|
||
// are both valid scheme chars.
|
||
//
|
||
DWORD cch;
|
||
if((dwFlags & URL_APPLY_FORCEAPPLY) || !FindSchemeW(pszUrl, &cch))
|
||
{
|
||
if(dwFlags & URL_APPLY_GUESSSCHEME)
|
||
hr = UrlGuessScheme(pszUrl, pstrOut);
|
||
|
||
if (hr != S_OK && (dwFlags & URL_APPLY_GUESSFILE))
|
||
{
|
||
LPCWSTR psz = FindDosPath(pszUrl);
|
||
|
||
// only change hr if we actually converted.
|
||
if(psz && SUCCEEDED(SHUrlCreateFromPath(psz, pstrOut, 0)))
|
||
hr = S_OK;
|
||
}
|
||
|
||
if (hr != S_OK && (dwFlags & URL_APPLY_DEFAULT || !dwFlags))
|
||
hr = UrlApplyDefaultScheme(pszUrl, pstrOut);
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
|
||
PRIVATE HRESULT
|
||
CopyOutA(PSHSTRA pstr, LPSTR psz, LPDWORD pcch)
|
||
{
|
||
HRESULT hr;
|
||
DWORD cch;
|
||
ASSERT(pstr);
|
||
ASSERT(psz);
|
||
ASSERT(pcch);
|
||
|
||
cch = pstr->GetLen();
|
||
if ((*pcch > cch) && psz)
|
||
{
|
||
hr = StringCchCopyA(psz, *pcch, pstr->GetStr());
|
||
}
|
||
else
|
||
{
|
||
hr = E_POINTER;
|
||
}
|
||
|
||
*pcch = cch + (FAILED(hr) ? 1 : 0);
|
||
|
||
return hr;
|
||
}
|
||
|
||
//*** StrCopyOutW --
|
||
// NOTES
|
||
// WARNING: must match semantics of CopyOutW! (esp. the *pcchOut part)
|
||
PRIVATE HRESULT
|
||
StrCopyOutW(LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut)
|
||
{
|
||
HRESULT hr;
|
||
DWORD cch;
|
||
|
||
cch = lstrlenW(pszIn);
|
||
if ((cch < *pcchOut) && pszOut)
|
||
{
|
||
hr = StringCchCopyW(pszOut, *pcchOut, pszIn);
|
||
}
|
||
else
|
||
{
|
||
hr = E_POINTER;
|
||
}
|
||
|
||
*pcchOut = cch + (FAILED(hr) ? 1 : 0);
|
||
|
||
return hr;
|
||
}
|
||
|
||
//***
|
||
// NOTES
|
||
// WARNING: StrCopyOutW must match this func, so if you change this change
|
||
// it too
|
||
PRIVATE HRESULT
|
||
CopyOutW(PSHSTRW pstr, LPWSTR psz, LPDWORD pcch)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
DWORD cch;
|
||
ASSERT(pstr);
|
||
ASSERT(psz);
|
||
ASSERT(pcch);
|
||
|
||
cch = pstr->GetLen();
|
||
if((*pcch > cch) && psz)
|
||
{
|
||
StringCchCopyW(psz, *pcch, pstr->GetStr());
|
||
}
|
||
else
|
||
{
|
||
hr = E_POINTER;
|
||
}
|
||
|
||
*pcch = cch + (FAILED(hr) ? 1 : 0);
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
LWSTDAPI
|
||
UrlCanonicalizeA(LPCSTR pszIn,
|
||
LPSTR pszOut,
|
||
LPDWORD pcchOut,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRA straOut;
|
||
|
||
RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlCanonicalizeA: Caller passed invalid pszIn");
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCanonicalizeA: Caller passed invalid pcchOut");
|
||
RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCanonicalizeA: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut == pszIn)
|
||
DEBUGWhackPathStringA(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferA(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszIn
|
||
|| !pszOut
|
||
|| !pcchOut
|
||
|| !*pcchOut)
|
||
{
|
||
hr = E_INVALIDARG;
|
||
}
|
||
else
|
||
{
|
||
hr = UrlCombineA("", pszIn, pszOut, pcchOut, dwFlags);
|
||
}
|
||
return hr;
|
||
|
||
}
|
||
|
||
|
||
LWSTDAPI
|
||
UrlEscapeA(LPCSTR pszIn,
|
||
LPSTR pszOut,
|
||
LPDWORD pcchOut,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRA straOut;
|
||
|
||
RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlEscapeA: Caller passed invalid pszin");
|
||
RIPMSG(NULL!=pcchOut, "UrlEscapeA: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlEscapeA: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszOut)
|
||
DEBUGWhackPathStringA(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferA(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszIn || !pszOut ||
|
||
!pcchOut || !*pcchOut)
|
||
hr = E_INVALIDARG;
|
||
else
|
||
{
|
||
SHSTRW strwOut;
|
||
SHSTRW strUrl;
|
||
|
||
if(SUCCEEDED(strUrl.SetStr(pszIn)))
|
||
hr = SHUrlEscape(strUrl, &strwOut, dwFlags);
|
||
else
|
||
hr = E_OUTOFMEMORY;
|
||
|
||
if(SUCCEEDED(hr))
|
||
hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
|
||
}
|
||
|
||
if(SUCCEEDED(hr))
|
||
hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut));
|
||
|
||
return hr;
|
||
}
|
||
|
||
LWSTDAPI
|
||
UrlGetPartA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwPart, DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRA straOut;
|
||
|
||
RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlGetPartA: Caller passed invalid pszIn");
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlGetPartA: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlGetPartA: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszIn)
|
||
DEBUGWhackPathStringA(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferA(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszIn || !pszOut ||
|
||
!pcchOut || !*pcchOut || dwPart == URL_PART_NONE)
|
||
hr = E_INVALIDARG;
|
||
else
|
||
{
|
||
SHSTRW strwOut;
|
||
SHSTRW strwIn;
|
||
|
||
if(SUCCEEDED(strwIn.SetStr(pszIn)))
|
||
hr = SHUrlGetPart(&strwIn, &strwOut, dwPart, dwFlags);
|
||
else
|
||
hr = E_OUTOFMEMORY;
|
||
|
||
if(SUCCEEDED(hr))
|
||
hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
|
||
}
|
||
|
||
if(SUCCEEDED(hr))
|
||
hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut));
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
LWSTDAPI_(BOOL) UrlIsA(LPCSTR pszURL, URLIS UrlIs)
|
||
{
|
||
BOOL fRet = FALSE;
|
||
|
||
RIPMSG(pszURL && IS_VALID_STRING_PTRA(pszURL, -1), "UrlIsA: Caller passed invalid pszURL");
|
||
if(pszURL)
|
||
{
|
||
DWORD cchScheme, dwFlags;
|
||
LPCSTR pszScheme = FindSchemeA(pszURL, &cchScheme);
|
||
|
||
if(pszScheme)
|
||
{
|
||
URL_SCHEME eScheme = GetSchemeTypeAndFlagsA(pszScheme, cchScheme, &dwFlags);
|
||
|
||
switch (UrlIs)
|
||
{
|
||
case URLIS_URL:
|
||
fRet = TRUE;
|
||
break;
|
||
|
||
case URLIS_OPAQUE:
|
||
fRet = (dwFlags & UPF_SCHEME_OPAQUE);
|
||
break;
|
||
|
||
case URLIS_NOHISTORY:
|
||
fRet = (dwFlags & UPF_SCHEME_NOHISTORY);
|
||
break;
|
||
|
||
case URLIS_FILEURL:
|
||
fRet = (eScheme == URL_SCHEME_FILE);
|
||
break;
|
||
|
||
default:
|
||
// if it cant be done quck and dirty
|
||
// then we need to thunk to the wide version
|
||
SHSTRW strUrl;
|
||
if (SUCCEEDED(strUrl.SetStr(pszURL)))
|
||
{
|
||
fRet = UrlIsW(strUrl, UrlIs);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return fRet;
|
||
}
|
||
|
||
LWSTDAPI_(BOOL) UrlIsW(LPCWSTR pszURL, URLIS UrlIs)
|
||
{
|
||
BOOL fRet = FALSE;
|
||
|
||
RIPMSG(NULL!=pszURL && IS_VALID_STRING_PTRW(pszURL, -1), "UrlIsW: Caller passed invalid pszURL");
|
||
if(pszURL)
|
||
{
|
||
DWORD cchScheme, dwFlags;
|
||
LPCWSTR pszScheme = FindSchemeW(pszURL, &cchScheme);
|
||
|
||
if(pszScheme)
|
||
{
|
||
SHSTRW str;
|
||
URL_SCHEME eScheme = GetSchemeTypeAndFlagsW(pszScheme, cchScheme, &dwFlags);
|
||
|
||
switch (UrlIs)
|
||
{
|
||
case URLIS_URL:
|
||
fRet = TRUE;
|
||
break;
|
||
|
||
case URLIS_OPAQUE:
|
||
fRet = (dwFlags & UPF_SCHEME_OPAQUE);
|
||
break;
|
||
|
||
case URLIS_NOHISTORY:
|
||
fRet = (dwFlags & UPF_SCHEME_NOHISTORY);
|
||
break;
|
||
|
||
case URLIS_FILEURL:
|
||
fRet = (eScheme == URL_SCHEME_FILE);
|
||
break;
|
||
|
||
case URLIS_APPLIABLE:
|
||
if (eScheme == URL_SCHEME_UNKNOWN)
|
||
{
|
||
if (S_OK == UrlGuessScheme(pszURL, &str))
|
||
fRet = TRUE;
|
||
}
|
||
break;
|
||
|
||
// these cases need a broken URL
|
||
case URLIS_DIRECTORY:
|
||
case URLIS_HASQUERY:
|
||
{
|
||
URLPARTS parts;
|
||
|
||
if (SUCCEEDED(str.SetStr(pszURL))
|
||
&& SUCCEEDED(BreakUrl(str.GetInplaceStr(), &parts)))
|
||
{
|
||
switch(UrlIs)
|
||
{
|
||
case URLIS_DIRECTORY:
|
||
// if the last seg has a trailing slash, or
|
||
// if there are no path segments at all...
|
||
fRet = (!parts.cSegments || (parts.dwFlags & UPF_EXSEG_DIRECTORY));
|
||
break;
|
||
|
||
case URLIS_HASQUERY:
|
||
fRet = (parts.pszQuery && *parts.pszQuery);
|
||
break;
|
||
|
||
default:
|
||
ASSERT(FALSE);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
|
||
default:
|
||
AssertMsg(FALSE, "UrlIs() called with invalid flag");
|
||
|
||
}
|
||
}
|
||
}
|
||
return fRet;
|
||
}
|
||
|
||
|
||
LWSTDAPI_(BOOL) UrlIsOpaqueA(LPCSTR pszURL)
|
||
{
|
||
return UrlIsA(pszURL, URLIS_OPAQUE);
|
||
}
|
||
|
||
LWSTDAPI_(BOOL) UrlIsOpaqueW(LPCWSTR pszURL)
|
||
{
|
||
return UrlIsW(pszURL, URLIS_OPAQUE);
|
||
}
|
||
|
||
|
||
LWSTDAPI_(BOOL) UrlIsNoHistoryA(LPCSTR pszURL)
|
||
{
|
||
return UrlIsA(pszURL, URLIS_NOHISTORY);
|
||
}
|
||
|
||
LWSTDAPI_(BOOL) UrlIsNoHistoryW(LPCWSTR pszURL)
|
||
{
|
||
return UrlIsW(pszURL, URLIS_NOHISTORY);
|
||
}
|
||
|
||
LWSTDAPI_(LPCSTR) UrlGetLocationA(LPCSTR pszURL)
|
||
{
|
||
CPINFO cpinfo;
|
||
BOOL fMBCS = (GetCPInfo(CP_ACP, &cpinfo) && cpinfo.LeadByte[0]);
|
||
|
||
RIPMSG(pszURL && IS_VALID_STRING_PTRA(pszURL, -1), "UrlGetLocationA: Caller passed invalid pszURL");
|
||
if(pszURL)
|
||
{
|
||
DWORD cchScheme, dwFlags;
|
||
LPCSTR pszScheme = FindSchemeA(pszURL, &cchScheme);
|
||
if(pszScheme)
|
||
{
|
||
URL_SCHEME eScheme = GetSchemeTypeAndFlagsA(pszScheme, cchScheme, &dwFlags);
|
||
|
||
return (dwFlags & UPF_SCHEME_OPAQUE) ? NULL : FindFragmentA(pszURL, fMBCS, (eScheme == URL_SCHEME_FILE));
|
||
}
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
LWSTDAPI_(LPCWSTR) UrlGetLocationW(LPCWSTR wzURL)
|
||
{
|
||
RIPMSG(wzURL && IS_VALID_STRING_PTRW(wzURL, -1), "UrlGetLocationW: Caller passed invalid wzURL");
|
||
if(wzURL)
|
||
{
|
||
DWORD cchScheme, dwFlags;
|
||
LPCWSTR pszScheme = FindSchemeW(wzURL, &cchScheme);
|
||
if(pszScheme)
|
||
{
|
||
URL_SCHEME eScheme = GetSchemeTypeAndFlagsW(pszScheme, cchScheme, &dwFlags);
|
||
|
||
return (dwFlags & UPF_SCHEME_OPAQUE) ? NULL : FindFragmentW(wzURL, (eScheme == URL_SCHEME_FILE));
|
||
}
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
|
||
LWSTDAPI_(int) UrlCompareA(LPCSTR psz1, LPCSTR psz2, BOOL fIgnoreSlash)
|
||
{
|
||
RIPMSG(psz1 && IS_VALID_STRING_PTRA(psz1, -1), "UrlCompareA: Caller passed invalid psz1");
|
||
RIPMSG(psz2 && IS_VALID_STRING_PTRA(psz1, -1), "UrlCompareA: Caller passed invalid psz2");
|
||
if (psz1 && psz2)
|
||
{
|
||
SHSTRW str1, str2;
|
||
|
||
if(SUCCEEDED(str1.SetStr(psz1)) && SUCCEEDED(str2.SetStr(psz2)) &&
|
||
SUCCEEDED(SHUrlUnescapeW(str1.GetInplaceStr(), 0)) && SUCCEEDED(SHUrlUnescapeW(str2.GetInplaceStr(), 0)) )
|
||
{
|
||
if(fIgnoreSlash)
|
||
{
|
||
LPWSTR pch;
|
||
|
||
pch = str1.GetInplaceStr() + str1.GetLen() - 1;
|
||
if(*pch == SLASH)
|
||
TERMSTR(pch);
|
||
|
||
pch = str2.GetInplaceStr() + str2.GetLen() - 1;
|
||
if(*pch == SLASH)
|
||
TERMSTR(pch);
|
||
}
|
||
|
||
return StrCmpW(str1, str2);
|
||
}
|
||
}
|
||
|
||
return lstrcmpA(psz1, psz2);
|
||
}
|
||
|
||
LWSTDAPI
|
||
UrlUnescapeA(LPSTR pszUrl, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
|
||
{
|
||
RIPMSG(pszUrl && IS_VALID_STRING_PTRA(pszUrl, -1), "UrlUnescapeA: Caller passed invalid pszUrl");
|
||
|
||
if(dwFlags & URL_UNESCAPE_INPLACE)
|
||
{
|
||
return SHUrlUnescapeA(pszUrl, dwFlags);
|
||
}
|
||
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlUnescapeA: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlUnescapeA: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszUrl)
|
||
DEBUGWhackPathStringA(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferA(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszUrl
|
||
|| !pcchOut
|
||
|| !*pcchOut
|
||
|| !pszOut)
|
||
{
|
||
return E_INVALIDARG;
|
||
}
|
||
|
||
SHSTRA str;
|
||
HRESULT hr = str.SetStr(pszUrl);
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
SHUrlUnescapeA(str.GetInplaceStr(), dwFlags);
|
||
hr = CopyOutA(&str, pszOut, pcchOut);
|
||
}
|
||
return hr;
|
||
}
|
||
|
||
|
||
|
||
|
||
LWSTDAPI
|
||
PathCreateFromUrlA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRA straOut;
|
||
|
||
RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "PathCreateFromUrlA: Caller passed invalid pszIn");
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "PathCreateFromUrlA: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "PathCreateFromUrlA: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszIn)
|
||
DEBUGWhackPathStringA(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferA(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszIn || !pszOut ||
|
||
!pcchOut || !*pcchOut )
|
||
hr = E_INVALIDARG;
|
||
else
|
||
{
|
||
SHSTRW strwOut;
|
||
SHSTRW strwIn;
|
||
|
||
if(SUCCEEDED(strwIn.SetStr(pszIn)))
|
||
hr = SHPathCreateFromUrl(strwIn, &strwOut, dwFlags);
|
||
else
|
||
hr = E_OUTOFMEMORY;
|
||
|
||
if(SUCCEEDED(hr))
|
||
hr = straOut.SetStr(strwOut);
|
||
}
|
||
|
||
if(SUCCEEDED(hr) )
|
||
hr = CopyOutA(&straOut, pszOut, pcchOut);
|
||
|
||
return hr;
|
||
}
|
||
|
||
LWSTDAPI
|
||
UrlCreateFromPathA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRA straOut;
|
||
|
||
RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlCreateFromPathA: Caller passed invalid pszIn");
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCreateFromPathA: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCreateFromPathA: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszIn)
|
||
DEBUGWhackPathStringA(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferA(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszIn || !pszOut ||
|
||
!pcchOut || !*pcchOut )
|
||
hr = E_INVALIDARG;
|
||
else
|
||
{
|
||
SHSTRW strwOut;
|
||
SHSTRW strwIn;
|
||
|
||
if(SUCCEEDED(strwIn.SetStr(pszIn)))
|
||
hr = SHUrlCreateFromPath(strwIn, &strwOut, dwFlags);
|
||
else
|
||
hr = E_OUTOFMEMORY;
|
||
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
|
||
}
|
||
}
|
||
|
||
if(SUCCEEDED(hr) )
|
||
hr = ReconcileHresults(hr, CopyOutA(&straOut, pszOut, pcchOut));
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
LWSTDAPI
|
||
UrlApplySchemeA(LPCSTR pszIn, LPSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRA straOut;
|
||
|
||
RIPMSG(pszIn && IS_VALID_STRING_PTRA(pszIn, -1), "UrlApplySchemeA: Caller passed invalid pszIn");
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlApplySchemeA: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlApplySchemeA: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszIn)
|
||
DEBUGWhackPathStringA(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferA(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszIn || !pszOut ||
|
||
!pcchOut || !*pcchOut )
|
||
hr = E_INVALIDARG;
|
||
else
|
||
{
|
||
SHSTRW strwOut;
|
||
SHSTRW strwIn;
|
||
|
||
if(SUCCEEDED(strwIn.SetStr(pszIn)))
|
||
hr = SHUrlApplyScheme(strwIn, &strwOut, dwFlags);
|
||
else
|
||
hr = E_OUTOFMEMORY;
|
||
|
||
if(S_OK == (hr))
|
||
hr = straOut.SetStr(strwOut);
|
||
}
|
||
|
||
if(S_OK == (hr))
|
||
hr = CopyOutA(&straOut, pszOut, pcchOut);
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
// PERF_CACHE
|
||
//*** g_szUCCanon -- 1-element cache for UrlCanonicalizeW
|
||
// DESCRIPTION
|
||
// it turns out a large # of our calls a) are for the same thing,
|
||
// and b) have pszOut(canon)=pszIn(raw). so cache the most recent guy.
|
||
LONG g_lockUC;
|
||
WCHAR g_szUCCanon[64]; // post-canon guy (also used for pre-canon check)
|
||
DWORD g_dwUCFlags;
|
||
|
||
#ifdef DEBUG
|
||
int g_cUCTot, g_cUCHit;
|
||
#endif
|
||
|
||
LWSTDAPI
|
||
UrlCanonicalizeW(LPCWSTR pszUrl,
|
||
LPWSTR pszCanonicalized,
|
||
LPDWORD pcchCanonicalized,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRW strwOut;
|
||
|
||
RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlCanonicalizeW: Caller passed invalid pszUrl");
|
||
RIPMSG(NULL!=pcchCanonicalized && IS_VALID_WRITE_PTR(pcchCanonicalized, DWORD), "UrlCanonicalizeW: Caller passed invalid pcchCanonicalized");
|
||
RIPMSG(NULL==pcchCanonicalized || (pszCanonicalized && IS_VALID_WRITE_BUFFER(pszCanonicalized, char, *pcchCanonicalized)), "UrlCanonicalizeW: Caller passed invalid pszCanonicalized");
|
||
#ifdef DEBUG
|
||
if (pcchCanonicalized)
|
||
{
|
||
if (pszCanonicalized == pszUrl)
|
||
DEBUGWhackPathStringW(pszCanonicalized, *pcchCanonicalized);
|
||
else
|
||
DEBUGWhackPathBufferW(pszCanonicalized, *pcchCanonicalized);
|
||
}
|
||
#endif
|
||
|
||
if (!pszUrl
|
||
|| !pszCanonicalized
|
||
|| !pcchCanonicalized
|
||
|| !*pcchCanonicalized)
|
||
{
|
||
hr = E_INVALIDARG;
|
||
}
|
||
else
|
||
{
|
||
#ifdef DEBUG
|
||
if ((g_cUCTot % 10) == 0)
|
||
TraceMsg(DM_PERF, "uc: tot=%d hit=%d", g_cUCTot, g_cUCHit);
|
||
#endif
|
||
|
||
DBEXEC(TRUE, g_cUCTot++);
|
||
// try the cache 1st
|
||
if (InterlockedExchange(&g_lockUC, 1) == 0) {
|
||
hr = E_FAIL;
|
||
if ((g_dwUCFlags==dwFlags)
|
||
&&
|
||
(!(dwFlags & URL_ESCAPE_PERCENT))
|
||
&&
|
||
StrCmpCW(pszUrl, g_szUCCanon) == 0)
|
||
{
|
||
DBEXEC(TRUE, g_cUCHit++);
|
||
DWORD cchTmp = *pcchCanonicalized;
|
||
hr = StrCopyOutW(g_szUCCanon, pszCanonicalized, pcchCanonicalized);
|
||
if (FAILED(hr))
|
||
*pcchCanonicalized = cchTmp; // restore!
|
||
}
|
||
InterlockedExchange(&g_lockUC, 0);
|
||
if (SUCCEEDED(hr))
|
||
return hr;
|
||
}
|
||
|
||
hr = UrlCombineW(L"", pszUrl, pszCanonicalized, pcchCanonicalized, dwFlags);
|
||
if (SUCCEEDED(hr) && *pcchCanonicalized < ARRAYSIZE(g_szUCCanon)) {
|
||
if (InterlockedExchange(&g_lockUC, 1) == 0)
|
||
{
|
||
StringCchCopyW(g_szUCCanon, ARRAYSIZE(g_szUCCanon), pszCanonicalized);
|
||
g_dwUCFlags = dwFlags;
|
||
InterlockedExchange(&g_lockUC, 0);
|
||
}
|
||
}
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
LWSTDAPI
|
||
UrlEscapeW(LPCWSTR pszUrl,
|
||
LPWSTR pszEscaped,
|
||
LPDWORD pcchEscaped,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRW strwOut;
|
||
|
||
RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlEscapeW: Caller passed invalid pszUrl");
|
||
RIPMSG(NULL!=pcchEscaped && IS_VALID_WRITE_PTR(pcchEscaped, DWORD), "UrlEscapeW: Caller passed invalid pcchEscaped");
|
||
RIPMSG(pszEscaped && (NULL==pcchEscaped || IS_VALID_WRITE_BUFFER(pszEscaped, WCHAR, *pcchEscaped)), "UrlEscapeW: Caller passed invalid pszEscaped");
|
||
#ifdef DEBUG
|
||
if (pcchEscaped)
|
||
{
|
||
if (pszEscaped==pszUrl)
|
||
DEBUGWhackPathStringW(pszEscaped, *pcchEscaped);
|
||
else
|
||
DEBUGWhackPathBufferW(pszEscaped, *pcchEscaped);
|
||
}
|
||
#endif
|
||
|
||
if (!pszUrl || !pszEscaped ||
|
||
!pcchEscaped || !*pcchEscaped)
|
||
hr = E_INVALIDARG;
|
||
else
|
||
{
|
||
hr = SHUrlEscape(pszUrl, &strwOut, dwFlags);
|
||
}
|
||
|
||
if(SUCCEEDED(hr) )
|
||
hr = CopyOutW(&strwOut, pszEscaped, pcchEscaped);
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
LWSTDAPI_(int) UrlCompareW(LPCWSTR psz1, LPCWSTR psz2, BOOL fIgnoreSlash)
|
||
{
|
||
RIPMSG(psz1 && IS_VALID_STRING_PTRW(psz1, -1), "UrlCompareW: Caller passed invalid psz1");
|
||
RIPMSG(psz2 && IS_VALID_STRING_PTRW(psz1, -1), "UrlCompareW: Caller passed invalid psz2");
|
||
if (psz1 && psz2)
|
||
{
|
||
SHSTRW str1, str2;
|
||
|
||
if( SUCCEEDED(str1.SetStr(psz1)) && SUCCEEDED(str2.SetStr(psz2)) &&
|
||
SUCCEEDED(SHUrlUnescapeW(str1.GetInplaceStr(), 0)) && SUCCEEDED(SHUrlUnescapeW(str2.GetInplaceStr(), 0)))
|
||
{
|
||
if(fIgnoreSlash)
|
||
{
|
||
LPWSTR pch;
|
||
|
||
pch = str1.GetInplaceStr() + str1.GetLen() - 1;
|
||
if(*pch == SLASH)
|
||
TERMSTR(pch);
|
||
|
||
pch = str2.GetInplaceStr() + str2.GetLen() - 1;
|
||
if(*pch == SLASH)
|
||
TERMSTR(pch);
|
||
}
|
||
|
||
return StrCmpW(str1, str2);
|
||
}
|
||
}
|
||
|
||
return StrCmpW(psz1, psz2);
|
||
}
|
||
|
||
|
||
|
||
LWSTDAPI
|
||
UrlUnescapeW(LPWSTR pszUrl, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwFlags)
|
||
{
|
||
RIPMSG(pszUrl && IS_VALID_STRING_PTRW(pszUrl, -1), "UrlUnescapeW: Caller passed invalid pszUrl");
|
||
|
||
if(dwFlags & URL_UNESCAPE_INPLACE)
|
||
{
|
||
return SHUrlUnescapeW(pszUrl, dwFlags);
|
||
}
|
||
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlUnescapeW: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlUnescapeW: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszUrl)
|
||
DEBUGWhackPathStringW(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferW(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszUrl
|
||
|| !pcchOut
|
||
|| !*pcchOut
|
||
|| !pszOut)
|
||
{
|
||
return E_INVALIDARG;
|
||
}
|
||
|
||
SHSTRW str;
|
||
HRESULT hr = str.SetStr(pszUrl);
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
SHUrlUnescapeW(str.GetInplaceStr(), dwFlags);
|
||
hr = CopyOutW(&str, pszOut, pcchOut);
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
LWSTDAPI
|
||
PathCreateFromUrlW
|
||
(LPCWSTR pszIn,
|
||
LPWSTR pszOut,
|
||
LPDWORD pcchOut,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRW strOut;
|
||
|
||
RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "PathCreateFromUrlW: Caller passed invalid pszIn");
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "PathCreateFromUrlW: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "PathCreateFromUrlW: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszIn)
|
||
DEBUGWhackPathStringW(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferW(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszIn || !pszOut ||
|
||
!pcchOut || !*pcchOut )
|
||
hr = E_INVALIDARG;
|
||
else
|
||
hr = SHPathCreateFromUrl(pszIn, &strOut, dwFlags);
|
||
|
||
if(SUCCEEDED(hr) )
|
||
hr = CopyOutW(&strOut, pszOut, pcchOut);
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
LWSTDAPI
|
||
UrlCreateFromPathW
|
||
(LPCWSTR pszIn,
|
||
LPWSTR pszOut,
|
||
LPDWORD pcchOut,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRW strOut;
|
||
|
||
RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlCreateFromPathW: Caller passed invalid pszIn");
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlCreateFromPathW: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlCreateFromPathW: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszIn)
|
||
DEBUGWhackPathStringW(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferW(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszIn || !pszOut ||
|
||
!pcchOut || !*pcchOut )
|
||
hr = E_INVALIDARG;
|
||
else
|
||
hr = SHUrlCreateFromPath(pszIn, &strOut, dwFlags);
|
||
|
||
if(SUCCEEDED(hr) )
|
||
hr = ReconcileHresults(hr, CopyOutW(&strOut, pszOut, pcchOut));
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
LWSTDAPI
|
||
UrlGetPartW(LPCWSTR pszIn, LPWSTR pszOut, LPDWORD pcchOut, DWORD dwPart, DWORD dwFlags)
|
||
{
|
||
SHSTRW strIn, strOut;
|
||
HRESULT hr;
|
||
|
||
RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlGetPartW: Caller passed invalid pszIn");
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlGetPartW: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlGetPartW: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszIn)
|
||
DEBUGWhackPathStringW(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferW(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszIn || !pszOut ||
|
||
!pcchOut || !*pcchOut || !dwPart)
|
||
hr = E_INVALIDARG;
|
||
else if (SUCCEEDED(hr = strIn.SetStr(pszIn)))
|
||
hr = SHUrlGetPart(&strIn, &strOut, dwPart, dwFlags);
|
||
|
||
if(SUCCEEDED(hr) )
|
||
hr = CopyOutW(&strOut, pszOut, pcchOut);
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
LWSTDAPI
|
||
UrlApplySchemeW
|
||
(LPCWSTR pszIn,
|
||
LPWSTR pszOut,
|
||
LPDWORD pcchOut,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRW strOut;
|
||
|
||
RIPMSG(pszIn && IS_VALID_STRING_PTRW(pszIn, -1), "UrlApplySchemeW: Caller passed invalid pszIn");
|
||
RIPMSG(NULL!=pcchOut && IS_VALID_WRITE_PTR(pcchOut, DWORD), "UrlApplySchemeW: Caller passed invalid pcchOut");
|
||
RIPMSG(pszOut && (NULL==pcchOut || IS_VALID_WRITE_BUFFER(pszOut, WCHAR, *pcchOut)), "UrlApplySchemeW: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut==pszIn)
|
||
DEBUGWhackPathStringW(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferW(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszIn || !pszOut ||
|
||
!pcchOut || !*pcchOut )
|
||
hr = E_INVALIDARG;
|
||
else
|
||
hr = SHUrlApplyScheme(pszIn, &strOut, dwFlags);
|
||
|
||
if(S_OK == (hr))
|
||
hr = CopyOutW(&strOut, pszOut, pcchOut);
|
||
|
||
return hr;
|
||
|
||
}
|
||
|
||
//
|
||
// this is the same table used by both URLMON and WININET's cache
|
||
//
|
||
const static BYTE Translate[256] =
|
||
{
|
||
1, 14,110, 25, 97,174,132,119,138,170,125,118, 27,233,140, 51,
|
||
87,197,177,107,234,169, 56, 68, 30, 7,173, 73,188, 40, 36, 65,
|
||
49,213,104,190, 57,211,148,223, 48,115, 15, 2, 67,186,210, 28,
|
||
12,181,103, 70, 22, 58, 75, 78,183,167,238,157,124,147,172,144,
|
||
176,161,141, 86, 60, 66,128, 83,156,241, 79, 46,168,198, 41,254,
|
||
178, 85,253,237,250,154,133, 88, 35,206, 95,116,252,192, 54,221,
|
||
102,218,255,240, 82,106,158,201, 61, 3, 89, 9, 42,155,159, 93,
|
||
166, 80, 50, 34,175,195,100, 99, 26,150, 16,145, 4, 33, 8,189,
|
||
121, 64, 77, 72,208,245,130,122,143, 55,105,134, 29,164,185,194,
|
||
193,239,101,242, 5,171,126, 11, 74, 59,137,228,108,191,232,139,
|
||
6, 24, 81, 20,127, 17, 91, 92,251,151,225,207, 21, 98,113,112,
|
||
84,226, 18,214,199,187, 13, 32, 94,220,224,212,247,204,196, 43,
|
||
249,236, 45,244,111,182,153,136,129, 90,217,202, 19,165,231, 71,
|
||
230,142, 96,227, 62,179,246,114,162, 53,160,215,205,180, 47,109,
|
||
44, 38, 31,149,135, 0,216, 52, 63, 23, 37, 69, 39,117,146,184,
|
||
163,200,222,235,248,243,219, 10,152,131,123,229,203, 76,120,209
|
||
};
|
||
|
||
PRIVATE void _HashData(LPBYTE pbData, DWORD cbData, LPBYTE pbHash, DWORD cbHash)
|
||
{
|
||
DWORD i, j;
|
||
// seed the hash
|
||
for (i = cbHash; i-- > 0;)
|
||
pbHash[i] = (BYTE) i;
|
||
|
||
// do the hash
|
||
for (j = cbData; j-- > 0;)
|
||
{
|
||
for (i = cbHash; i-- > 0;)
|
||
pbHash[i] = Translate[pbHash[i] ^ pbData[j]];
|
||
}
|
||
}
|
||
|
||
LWSTDAPI
|
||
HashData(LPBYTE pbData, DWORD cbData, LPBYTE pbHash, DWORD cbHash)
|
||
{
|
||
RIPMSG(pbData && IS_VALID_READ_BUFFER(pbData, BYTE, cbData), "HashData: Caller passed invalid pbData");
|
||
RIPMSG(pbHash && IS_VALID_WRITE_BUFFER(pbHash, BYTE, cbHash), "HashData: Caller passed invalid pbHash");
|
||
if (pbData && pbHash)
|
||
{
|
||
_HashData(pbData, cbData, pbHash, cbHash);
|
||
return S_OK;
|
||
}
|
||
return E_INVALIDARG;
|
||
}
|
||
|
||
|
||
LWSTDAPI
|
||
UrlHashA(LPCSTR psz, LPBYTE pb, DWORD cb)
|
||
{
|
||
HRESULT hr = E_INVALIDARG;
|
||
|
||
RIPMSG(psz && IS_VALID_STRING_PTRA(psz, -1), "UrlHashA: Caller passed invalid psz");
|
||
RIPMSG(pb && IS_VALID_WRITE_BUFFER(pb, BYTE, cb), "UrlHashA: Caller passed invalid pb");
|
||
if (psz && pb)
|
||
{
|
||
_HashData((LPBYTE) psz, lstrlenA(psz), pb, cb);
|
||
return S_OK;
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
LWSTDAPI
|
||
UrlHashW(LPCWSTR psz, LPBYTE pb, DWORD cb)
|
||
{
|
||
HRESULT hr;
|
||
|
||
RIPMSG(psz && IS_VALID_STRING_PTRW(psz, -1), "UrlHashW: Caller passed invalid psz");
|
||
RIPMSG(pb && IS_VALID_WRITE_BUFFER(pb, BYTE, cb), "UrlHashW: Caller passed invalid pb");
|
||
if (psz && pb)
|
||
{
|
||
SHSTRA str;
|
||
if (SUCCEEDED( hr = str.SetStr(psz)))
|
||
hr = UrlHashA(str, pb, cb);
|
||
}
|
||
else
|
||
{
|
||
hr = E_INVALIDARG;
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
|
||
/***************************** ParseURL Functions *****************************/
|
||
// these were originally in URL.DLL and then moved to shlwapi.
|
||
// i just added them from url.c for reuse of code.
|
||
// ParseURL now does no MBCS thunks, to keep it fast.
|
||
//
|
||
// declarations for ParseURL() APIs
|
||
//
|
||
|
||
typedef const PARSEDURLA CPARSEDURLA;
|
||
typedef const PARSEDURLA * PCPARSEDURLA;
|
||
|
||
typedef const PARSEDURLW CPARSEDURLW;
|
||
typedef const PARSEDURLW * PCPARSEDURLW;
|
||
|
||
|
||
#ifdef DEBUG
|
||
|
||
BOOL
|
||
IsValidPCPARSEDURLA(
|
||
LPCSTR pcszURL,
|
||
PCPARSEDURLA pcpu)
|
||
{
|
||
return(IS_VALID_READ_PTR(pcpu, CPARSEDURLA) &&
|
||
(IS_VALID_STRING_PTRA(pcpu->pszProtocol, -1) &&
|
||
EVAL(IsStringContainedA(pcszURL, pcpu->pszProtocol)) &&
|
||
EVAL(pcpu->cchProtocol < (UINT)lstrlenA(pcpu->pszProtocol))) &&
|
||
(IS_VALID_STRING_PTRA(pcpu->pszSuffix, -1) &&
|
||
EVAL(IsStringContainedA(pcszURL, pcpu->pszSuffix)) &&
|
||
EVAL(pcpu->cchSuffix <= (UINT)lstrlenA(pcpu->pszSuffix))) &&
|
||
EVAL(pcpu->cchProtocol + pcpu->cchSuffix < (UINT)lstrlenA(pcszURL)));
|
||
}
|
||
|
||
BOOL
|
||
IsValidPCPARSEDURLW(
|
||
LPCWSTR pcszURL,
|
||
PCPARSEDURLW pcpu)
|
||
{
|
||
return(IS_VALID_READ_PTR(pcpu, CPARSEDURLW) &&
|
||
(IS_VALID_STRING_PTRW(pcpu->pszProtocol, -1) &&
|
||
EVAL(IsStringContainedW(pcszURL, pcpu->pszProtocol)) &&
|
||
EVAL(pcpu->cchProtocol < (UINT)lstrlenW(pcpu->pszProtocol))) &&
|
||
(IS_VALID_STRING_PTRW(pcpu->pszSuffix, -1) &&
|
||
EVAL(IsStringContainedW(pcszURL, pcpu->pszSuffix)) &&
|
||
EVAL(pcpu->cchSuffix <= (UINT)lstrlenW(pcpu->pszSuffix))) &&
|
||
EVAL(pcpu->cchProtocol + pcpu->cchSuffix < (UINT)lstrlenW(pcszURL)));
|
||
}
|
||
|
||
#endif
|
||
|
||
|
||
/*----------------------------------------------------------
|
||
Purpose: Parse the given path into the PARSEDURL structure.
|
||
|
||
******
|
||
****** This function must not do any extraneous
|
||
****** things. It must be small and fast.
|
||
******
|
||
|
||
Returns: NOERROR if a valid URL format
|
||
URL_E_INVALID_SYNTAX if not
|
||
|
||
Cond: --
|
||
*/
|
||
STDMETHODIMP
|
||
ParseURLA(
|
||
LPCSTR pcszURL,
|
||
PPARSEDURLA ppu)
|
||
{
|
||
HRESULT hr = E_INVALIDARG;
|
||
|
||
RIP(IS_VALID_STRING_PTRA(pcszURL, -1));
|
||
RIP(IS_VALID_WRITE_PTR(ppu, PARSEDURLA));
|
||
|
||
if (pcszURL && ppu && SIZEOF(*ppu) == ppu->cbSize)
|
||
{
|
||
DWORD cch;
|
||
hr = URL_E_INVALID_SYNTAX; // assume error
|
||
|
||
ppu->pszProtocol = FindSchemeA(pcszURL, &cch);
|
||
|
||
if(ppu->pszProtocol)
|
||
{
|
||
ppu->cchProtocol = cch;
|
||
|
||
// Determine protocol scheme number
|
||
ppu->nScheme = SchemeTypeFromStringA(ppu->pszProtocol, cch);
|
||
|
||
ppu->pszSuffix = ppu->pszProtocol + cch + 1;
|
||
|
||
//
|
||
// APPCOMPAT - Backwards compatibility - zekel 28-feb-97
|
||
// ParseURL() believes in file: urls like "file://C:\foo\bar"
|
||
// and some pieces of code will use it to get the Dos Path.
|
||
// new code should always call PathCreateFromUrl() to
|
||
// get the dos path of a file: URL.
|
||
//
|
||
// i am leaving this behavior in case some compat stuff is out there.
|
||
//
|
||
if (URL_SCHEME_FILE == ppu->nScheme &&
|
||
'/' == ppu->pszSuffix[0] && '/' == ppu->pszSuffix[1])
|
||
{
|
||
// Yes; skip the "//"
|
||
ppu->pszSuffix += 2;
|
||
|
||
#ifndef UNIX
|
||
// FOR UNIX: If we have /vobs/build, we don't want to make
|
||
// There might be a third slash. Skip it.
|
||
if ('/' == *ppu->pszSuffix)
|
||
ppu->pszSuffix++;
|
||
#endif
|
||
|
||
}
|
||
|
||
ppu->cchSuffix = lstrlenA(ppu->pszSuffix);
|
||
|
||
hr = S_OK;
|
||
}
|
||
}
|
||
|
||
#ifdef DEBUG
|
||
if (hr == S_OK)
|
||
{
|
||
CHAR rgchDebugProtocol[MAX_PATH];
|
||
CHAR rgchDebugSuffix[MAX_PATH];
|
||
|
||
// (+ 1) for null terminator.
|
||
|
||
lstrcpynA(rgchDebugProtocol, ppu->pszProtocol,
|
||
min(ppu->cchProtocol + 1, SIZECHARS(rgchDebugProtocol)));
|
||
|
||
// (+ 1) for null terminator.
|
||
|
||
lstrcpynA(rgchDebugSuffix, ppu->pszSuffix,
|
||
min(ppu->cchSuffix + 1, SIZECHARS(rgchDebugSuffix)));
|
||
|
||
TraceMsgA(TF_URL, "ParseURL(): Parsed protocol \"%s\" and suffix \"%s\" from URL \"%s\".",
|
||
rgchDebugProtocol,
|
||
rgchDebugSuffix,
|
||
pcszURL);
|
||
}
|
||
else
|
||
{
|
||
TraceMsgA(TF_URL, "ParseURL(): Failed to parse \"%s\"", pcszURL);
|
||
}
|
||
#endif
|
||
|
||
|
||
ASSERT(FAILED(hr) ||
|
||
EVAL(IsValidPCPARSEDURLA(pcszURL, ppu)));
|
||
|
||
return(hr);
|
||
}
|
||
|
||
|
||
/*----------------------------------------------------------
|
||
Purpose: Parse the given path into the PARSEDURL structure.
|
||
|
||
******
|
||
****** This function must not do any extraneous
|
||
****** things. It must be small and fast.
|
||
******
|
||
|
||
Returns: NOERROR if a valid URL format
|
||
URL_E_INVALID_SYNTAX if not
|
||
|
||
Cond: --
|
||
*/
|
||
STDMETHODIMP
|
||
ParseURLW(
|
||
LPCWSTR pcszURL,
|
||
PPARSEDURLW ppu)
|
||
{
|
||
HRESULT hr = E_INVALIDARG;
|
||
|
||
RIP(IS_VALID_STRING_PTRW(pcszURL, -1));
|
||
RIP(IS_VALID_WRITE_PTR(ppu, PARSEDURLW));
|
||
|
||
if (pcszURL && ppu && SIZEOF(*ppu) == ppu->cbSize)
|
||
{
|
||
DWORD cch;
|
||
hr = URL_E_INVALID_SYNTAX; // assume error
|
||
|
||
ppu->pszProtocol = FindSchemeW(pcszURL, &cch);
|
||
|
||
if(ppu->pszProtocol)
|
||
{
|
||
ppu->cchProtocol = cch;
|
||
|
||
// Determine protocol scheme number
|
||
ppu->nScheme = SchemeTypeFromStringW(ppu->pszProtocol, cch);
|
||
|
||
ppu->pszSuffix = ppu->pszProtocol + cch + 1;
|
||
|
||
//
|
||
// APPCOMPAT - Backwards compatibility - zekel 28-feb-97
|
||
// ParseURL() believes in file: urls like "file://C:\foo\bar"
|
||
// and some pieces of code will use it to get the Dos Path.
|
||
// new code should always call PathCreateFromUrl() to
|
||
// get the dos path of a file: URL.
|
||
//
|
||
// i am leaving this behavior in case some compat stuff is out there.
|
||
//
|
||
if (URL_SCHEME_FILE == ppu->nScheme &&
|
||
'/' == ppu->pszSuffix[0] && '/' == ppu->pszSuffix[1])
|
||
{
|
||
// Yes; skip the "//"
|
||
ppu->pszSuffix += 2;
|
||
|
||
#ifndef UNIX
|
||
// There might be a third slash. Skip it.
|
||
// IEUNIX - On UNIX, it's a root directory, so don't skip it!
|
||
if ('/' == *ppu->pszSuffix)
|
||
ppu->pszSuffix++;
|
||
#endif
|
||
}
|
||
|
||
ppu->cchSuffix = lstrlenW(ppu->pszSuffix);
|
||
|
||
hr = S_OK;
|
||
}
|
||
}
|
||
|
||
|
||
#ifdef DEBUG
|
||
if (hr==S_OK)
|
||
{
|
||
WCHAR rgchDebugProtocol[MAX_PATH];
|
||
WCHAR rgchDebugSuffix[MAX_PATH];
|
||
|
||
// (+ 1) for null terminator.
|
||
|
||
StrCpyNW(rgchDebugProtocol, ppu->pszProtocol,
|
||
min(ppu->cchProtocol + 1, SIZECHARS(rgchDebugProtocol)));
|
||
|
||
// (+ 1) for null terminator.
|
||
|
||
StrCpyNW(rgchDebugSuffix, ppu->pszSuffix,
|
||
min(ppu->cchSuffix + 1, SIZECHARS(rgchDebugSuffix)));
|
||
|
||
TraceMsg(TF_URL, "ParseURL(): Parsed protocol \"%s\" and suffix \"%s\" from URL \"%s\".",
|
||
rgchDebugProtocol,
|
||
rgchDebugSuffix,
|
||
pcszURL);
|
||
}
|
||
else
|
||
{
|
||
TraceMsg(TF_URL, "ParseURL(): Failed to parse \"%s\"", pcszURL);
|
||
}
|
||
#endif
|
||
|
||
ASSERT(FAILED(hr) ||
|
||
EVAL(IsValidPCPARSEDURLW(pcszURL, ppu)));
|
||
|
||
return(hr);
|
||
}
|
||
|
||
#ifdef USE_FAST_PARSER
|
||
|
||
// GetSchemeTypeAndFlagsSpecialW
|
||
// performs the same behavior as GetSchemeTypeAndFlagsW plus, when successful
|
||
// copies the canonicalised form of the scheme back.
|
||
|
||
PRIVATE URL_SCHEME
|
||
GetSchemeTypeAndFlagsSpecialW(LPWSTR pszScheme, DWORD cchScheme, LPDWORD pdwFlags)
|
||
{
|
||
DWORD i;
|
||
|
||
ASSERT(pszScheme);
|
||
|
||
|
||
#ifdef DEBUG
|
||
if ((g_cSTTot % 10) == 0)
|
||
TraceMsg(DM_PERF, "gstaf: tot=%d hit=%d hit0=%d", g_cSTTot, g_cSTHit, g_cSTHit0);
|
||
#endif
|
||
DBEXEC(TRUE, g_cSTTot++);
|
||
// check cache 1st
|
||
i = g_iScheme;
|
||
if (cchScheme == g_mpUrlSchemeTypes[i].cchScheme
|
||
&& StrCmpNCW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme) == 0)
|
||
{
|
||
DBEXEC(TRUE, i == 0 ? g_cSTHit0++ : g_cSTHit++);
|
||
Lhit:
|
||
if (pdwFlags)
|
||
*pdwFlags = g_mpUrlSchemeTypes[i].dwFlags;
|
||
|
||
// update cache (unconditionally)
|
||
g_iScheme = i;
|
||
|
||
// We need to do this because the scheme might not be canonicalised
|
||
memcpy(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme*sizeof(WCHAR));
|
||
return g_mpUrlSchemeTypes[i].eScheme;
|
||
}
|
||
|
||
for (i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); i++)
|
||
{
|
||
if(cchScheme == g_mpUrlSchemeTypes[i].cchScheme
|
||
&& 0 == StrCmpNIW(pszScheme, g_mpUrlSchemeTypes[i].pszScheme, cchScheme))
|
||
goto Lhit;
|
||
}
|
||
|
||
if (pdwFlags)
|
||
{
|
||
*pdwFlags = 0;
|
||
}
|
||
return URL_SCHEME_UNKNOWN;
|
||
}
|
||
|
||
|
||
|
||
// URL_STRING --------------------------------------------------------------------------------------
|
||
|
||
// is a container for the combined URL. It attempts to construct a string from the information
|
||
// fed into it. If there is not enough buffer space available, it will measure how much additional
|
||
// space will be required to hold the string.
|
||
|
||
WCHAR wszBogus[] = L"";
|
||
|
||
|
||
// US_* are the various modes of transforming characters fed into the container.
|
||
// US_NOTHING do nothing to the character.
|
||
// US_UNESCAPE turn entries of the form %xx into the unescaped form
|
||
// US_ESCAPE_UNSAFE transform invalid path characters into %xx sequences
|
||
// US_ESCAPE_SPACES transform only spaces in to %20 sequences
|
||
|
||
enum
|
||
{
|
||
US_NOTHING,
|
||
US_UNESCAPE,
|
||
US_ESCAPE_UNSAFE,
|
||
US_ESCAPE_SPACES
|
||
};
|
||
|
||
class URL_STRING
|
||
{
|
||
protected:
|
||
URL_SCHEME _eScheme;
|
||
DWORD _ccWork, _ccMark, _ccLastWhite, _ccQuery, _ccFragment, _ccBuffer, _dwSchemeInfo;
|
||
DWORD _dwOldFlags, _dwFlags, _dwMode;
|
||
BOOL _fFixSlashes, _fExpecting, _fError;
|
||
WCHAR _wchLast, _wszInternalString[256];
|
||
PWSTR _pszWork;
|
||
|
||
VOID baseAccept(WCHAR wch);
|
||
VOID TrackWhiteSpace(WCHAR wch);
|
||
|
||
public:
|
||
URL_STRING(DWORD dwFlags);
|
||
~URL_STRING();
|
||
|
||
VOID CleanAccept(WCHAR wch);
|
||
VOID Accept(WCHAR wch);
|
||
VOID Accept(PWSTR a_psz);
|
||
VOID Contract(BOOL fContractLevel = TRUE);
|
||
VOID TrimEndWhiteSpace();
|
||
|
||
PWSTR GetStart();
|
||
LONG GetTotalLength();
|
||
BOOL AnyProblems();
|
||
|
||
VOID NoteScheme(URL_SCHEME a_eScheme, DWORD a_dwSchemeInfo);
|
||
VOID AddSchemeNote(DWORD a_dwSchemeInfo);
|
||
DWORD GetSchemeNotes();
|
||
URL_SCHEME QueryScheme();
|
||
|
||
VOID Mark();
|
||
VOID ClearMark();
|
||
VOID EraseMarkedText();
|
||
DWORD CompareMarkWith(PWSTR psz);
|
||
DWORD CompareLast(PCWSTR psz, DWORD cc);
|
||
|
||
VOID EnableMunging();
|
||
VOID DisableMunging();
|
||
VOID DisableSlashFixing();
|
||
VOID RestoreFlags();
|
||
VOID AddFlagNote(DWORD dwFlag);
|
||
|
||
VOID NotifyQuery();
|
||
VOID NotifyFragment();
|
||
VOID DropQuery();
|
||
VOID DropFragment();
|
||
};
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
URL_STRING::URL_STRING(DWORD dwFlags)
|
||
{
|
||
_ccBuffer = ARRAYSIZE(_wszInternalString);
|
||
_ccWork = 1;
|
||
_pszWork = _wszInternalString;
|
||
_ccQuery = _ccFragment = _ccMark = 0;
|
||
|
||
_eScheme = URL_SCHEME_UNKNOWN;
|
||
_dwOldFlags = _dwFlags = dwFlags;
|
||
_dwMode = US_NOTHING;
|
||
|
||
_fFixSlashes = TRUE;
|
||
_fError = _fExpecting = FALSE;
|
||
}
|
||
|
||
URL_STRING::~URL_STRING()
|
||
{
|
||
if (_ccBuffer > ARRAYSIZE(_wszInternalString))
|
||
{
|
||
LocalFree(_pszWork);
|
||
}
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------
|
||
// These are the standard functions used for adding characters to an url.
|
||
|
||
VOID URL_STRING::baseAccept(WCHAR wch)
|
||
{
|
||
_pszWork[_ccWork-1] = (_fFixSlashes
|
||
? ((wch!=WHACK) ? wch : SLASH)
|
||
: wch);
|
||
_ccWork++;
|
||
if (_ccWork>_ccBuffer)
|
||
{
|
||
if (!_fError)
|
||
{
|
||
PWSTR psz = (PWSTR)LocalAlloc(LPTR, 2*_ccBuffer*sizeof(WCHAR));
|
||
if (!psz)
|
||
{
|
||
_ccWork--;
|
||
_fError = TRUE;
|
||
return;
|
||
}
|
||
memcpy(psz, _pszWork, (_ccWork-1)*sizeof(WCHAR));
|
||
if (_ccBuffer>ARRAYSIZE(_wszInternalString))
|
||
{
|
||
LocalFree(_pszWork);
|
||
}
|
||
_ccBuffer *= 2;
|
||
_pszWork = psz;
|
||
}
|
||
else
|
||
{
|
||
_ccWork--;
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
VOID URL_STRING::TrackWhiteSpace(WCHAR wch)
|
||
{
|
||
if (IsWhite(wch))
|
||
{
|
||
if (!_ccLastWhite)
|
||
{
|
||
_ccLastWhite = _ccWork;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
_ccLastWhite = 0;
|
||
}
|
||
}
|
||
|
||
|
||
// -- URL_STRING::Accept ----------------------------
|
||
// Based on the current munging mode, transform the character into the
|
||
// desired form and add it to the string.
|
||
|
||
VOID URL_STRING::Accept(WCHAR wch)
|
||
{
|
||
TrackWhiteSpace(wch);
|
||
|
||
switch (_dwMode)
|
||
{
|
||
case US_NOTHING:
|
||
break;
|
||
|
||
case US_UNESCAPE:
|
||
if (_fExpecting)
|
||
{
|
||
if (!IsHex(wch))
|
||
{
|
||
baseAccept(HEX_ESCAPE);
|
||
if (_wchLast!=L'\0')
|
||
{
|
||
baseAccept(_wchLast);
|
||
}
|
||
_fExpecting = FALSE;
|
||
break;
|
||
}
|
||
else if (_wchLast!=L'\0')
|
||
{
|
||
wch = (HexToWord(_wchLast)*16) + HexToWord(wch);
|
||
TrackWhiteSpace(wch);
|
||
_fExpecting = FALSE;
|
||
if ((wch==WHACK) && _fFixSlashes)
|
||
{
|
||
_fFixSlashes = FALSE;
|
||
baseAccept(wch);
|
||
_fFixSlashes = TRUE;
|
||
return;
|
||
}
|
||
break;
|
||
}
|
||
else
|
||
{
|
||
_wchLast = wch;
|
||
}
|
||
return;
|
||
}
|
||
if (wch==HEX_ESCAPE)
|
||
{
|
||
_fExpecting = TRUE;
|
||
_wchLast = L'\0';
|
||
return;
|
||
}
|
||
break;
|
||
|
||
case US_ESCAPE_UNSAFE:
|
||
if ((wch==SLASH)
|
||
||
|
||
(wch==WHACK && _fFixSlashes)
|
||
||
|
||
(IsSafePathChar(wch) && (wch!=HEX_ESCAPE || !(_dwFlags & URL_ESCAPE_PERCENT))))
|
||
{
|
||
break;
|
||
}
|
||
|
||
baseAccept(L'%');
|
||
baseAccept(hex[(wch >> 4) & 15]);
|
||
baseAccept(hex[wch & 15]);
|
||
return;
|
||
|
||
case US_ESCAPE_SPACES:
|
||
if (wch==SPC)
|
||
{
|
||
baseAccept(L'%');
|
||
baseAccept(L'2');
|
||
baseAccept(L'0');
|
||
return;
|
||
}
|
||
break;
|
||
default:
|
||
ASSERT(FALSE);
|
||
}
|
||
baseAccept(wch);
|
||
}
|
||
|
||
// -- Accept --------------------------------
|
||
// Accept only a string
|
||
VOID URL_STRING::Accept(PWSTR psz)
|
||
{
|
||
while (*psz)
|
||
{
|
||
Accept(*psz);
|
||
psz++;
|
||
}
|
||
}
|
||
|
||
// -- Contract
|
||
// Whenever we call Contract, we're pointing past the last separator. We want to
|
||
// omit the segment between this separator and the one before it.
|
||
// This should be used ONLY when we're examining the path segment of the urls.
|
||
|
||
VOID URL_STRING::Contract(BOOL fContractLevel)
|
||
{
|
||
ASSERT(_ccWork && _ccMark);
|
||
|
||
// _ccWork is 1 after wherever the next character will be placed
|
||
// subtract +1 to derive what the last character in the url is
|
||
DWORD _ccEnd = _ccWork-1 - 1;
|
||
if (_eScheme!=URL_SCHEME_MK)
|
||
{
|
||
if (!fContractLevel && (_pszWork[_ccEnd]==SLASH || _pszWork[_ccEnd]==WHACK))
|
||
{
|
||
return;
|
||
}
|
||
do
|
||
{
|
||
_ccEnd--;
|
||
}
|
||
while ((_ccEnd>=_ccMark-1) && _pszWork[_ccEnd]!=SLASH && _pszWork[_ccEnd]!=WHACK);
|
||
}
|
||
else
|
||
{
|
||
if (!fContractLevel && (_pszWork[_ccEnd]==SLASH))
|
||
{
|
||
return;
|
||
}
|
||
do
|
||
{
|
||
_ccEnd--;
|
||
}
|
||
while ((_ccEnd>=_ccMark-1) && _pszWork[_ccEnd]!=SLASH);
|
||
}
|
||
if (_ccEnd<_ccMark-1)
|
||
{
|
||
_ccEnd = _ccMark-1;
|
||
}
|
||
else
|
||
{
|
||
_ccEnd++;
|
||
}
|
||
_ccWork = _ccEnd + 1;
|
||
}
|
||
|
||
VOID URL_STRING::TrimEndWhiteSpace()
|
||
{
|
||
if (_ccLastWhite)
|
||
{
|
||
_ccWork = _ccLastWhite;
|
||
_ccLastWhite = 0;
|
||
}
|
||
}
|
||
|
||
|
||
VOID URL_STRING::CleanAccept(WCHAR wch)
|
||
{
|
||
baseAccept(wch);
|
||
}
|
||
|
||
|
||
// -------------------------------------------------------------------------------
|
||
// These member functions return information about the url that is being formed
|
||
|
||
PWSTR URL_STRING::GetStart()
|
||
{
|
||
return _pszWork;
|
||
}
|
||
|
||
LONG URL_STRING::GetTotalLength()
|
||
{
|
||
return _ccWork - 1;
|
||
}
|
||
|
||
BOOL URL_STRING::AnyProblems()
|
||
{
|
||
return _fError;
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
VOID URL_STRING::NoteScheme(URL_SCHEME a_eScheme, DWORD a_dwSchemeInfo)
|
||
{
|
||
_eScheme = a_eScheme;
|
||
_dwSchemeInfo = a_dwSchemeInfo;
|
||
_fFixSlashes = a_dwSchemeInfo & UPF_SCHEME_CONVERT;
|
||
}
|
||
|
||
VOID URL_STRING::AddSchemeNote(DWORD a_dwSchemeInfo)
|
||
{
|
||
_dwSchemeInfo |= a_dwSchemeInfo;
|
||
_fFixSlashes = _dwSchemeInfo & UPF_SCHEME_CONVERT;
|
||
}
|
||
|
||
DWORD URL_STRING::GetSchemeNotes()
|
||
{
|
||
return _dwSchemeInfo;
|
||
}
|
||
|
||
URL_SCHEME URL_STRING::QueryScheme()
|
||
{
|
||
return _eScheme;
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
VOID URL_STRING::Mark()
|
||
{
|
||
_ccMark = _ccWork;
|
||
}
|
||
|
||
VOID URL_STRING::ClearMark()
|
||
{
|
||
_ccMark = 0;
|
||
}
|
||
|
||
VOID URL_STRING::EraseMarkedText()
|
||
{
|
||
if (_ccMark)
|
||
{
|
||
_ccWork = _ccMark;
|
||
_ccMark = 0;
|
||
}
|
||
}
|
||
|
||
DWORD URL_STRING::CompareMarkWith(PWSTR psz)
|
||
{
|
||
if (_ccMark)
|
||
{
|
||
*(_pszWork + _ccWork - 1) = L'\0';
|
||
return (StrCmpW(_pszWork + _ccMark - 1, psz));
|
||
}
|
||
// In other words, return that the string isn't present.
|
||
return 1;
|
||
}
|
||
|
||
DWORD URL_STRING::CompareLast(PCWSTR psz, DWORD cc)
|
||
{
|
||
if (_ccWork > cc)
|
||
{
|
||
return StrCmpNIW(_pszWork + _ccWork - 1 - cc, psz, cc);
|
||
}
|
||
return 1;
|
||
}
|
||
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
VOID URL_STRING::NotifyQuery()
|
||
{
|
||
if (!_ccQuery)
|
||
{
|
||
_ccQuery = _ccWork;
|
||
}
|
||
}
|
||
|
||
VOID URL_STRING::NotifyFragment()
|
||
{
|
||
if (!_ccFragment)
|
||
{
|
||
_ccFragment = _ccWork;
|
||
CleanAccept(POUND);
|
||
}
|
||
}
|
||
|
||
VOID URL_STRING::DropQuery()
|
||
{
|
||
if (_ccQuery)
|
||
{
|
||
_ccWork = _ccQuery;
|
||
_ccQuery = _ccFragment = 0;
|
||
}
|
||
}
|
||
|
||
VOID URL_STRING::DropFragment()
|
||
{
|
||
if (_ccFragment)
|
||
{
|
||
_ccWork = _ccFragment;
|
||
_ccFragment = 0;
|
||
}
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------
|
||
// These member functions are for determining how the url's characters are going
|
||
// to be represented
|
||
|
||
VOID URL_STRING::EnableMunging()
|
||
{
|
||
_dwMode = US_NOTHING;
|
||
|
||
// For opaque urls, munge ONLY if we're explicitly asked to URL_ESCAPE or URL_UNESCAPE,
|
||
// but NOT URL_ESCAPE_SPACES_ONLY
|
||
|
||
// For query and fragment, never allow for URL_ESCAPE_UNSAFE and for
|
||
// others ONLY when URL_DONT_ESCAPE_EXTRA_INFO is specified
|
||
|
||
if ((_dwSchemeInfo & UPF_SCHEME_OPAQUE)
|
||
&& (_dwFlags & URL_ESCAPE_SPACES_ONLY))
|
||
return;
|
||
|
||
if ((_ccQuery || _ccFragment)
|
||
&& ((_dwFlags & (URL_DONT_ESCAPE_EXTRA_INFO | URL_ESCAPE_UNSAFE))))
|
||
return;
|
||
|
||
if (_dwFlags & URL_UNESCAPE)
|
||
{
|
||
_dwMode = US_UNESCAPE;
|
||
}
|
||
else if (_dwFlags & URL_ESCAPE_UNSAFE)
|
||
{
|
||
_dwMode = US_ESCAPE_UNSAFE;
|
||
}
|
||
else if (_dwFlags & URL_ESCAPE_SPACES_ONLY)
|
||
{
|
||
_dwMode = US_ESCAPE_SPACES;
|
||
}
|
||
}
|
||
|
||
VOID URL_STRING::DisableMunging()
|
||
{
|
||
_dwMode = US_NOTHING;
|
||
}
|
||
|
||
VOID URL_STRING::DisableSlashFixing()
|
||
{
|
||
_fFixSlashes = FALSE;
|
||
}
|
||
|
||
VOID URL_STRING::AddFlagNote(DWORD dwFlag)
|
||
{
|
||
_dwFlags |= dwFlag;
|
||
}
|
||
|
||
VOID URL_STRING::RestoreFlags()
|
||
{
|
||
ASSERT((_eScheme==URL_SCHEME_FILE) || (_dwFlags==_dwOldFlags));
|
||
_dwFlags = _dwOldFlags;
|
||
EnableMunging();
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
|
||
// URL ------------------------------------------------------------------------------------
|
||
// The URL class is used to examine the base and relative URLs to determine what
|
||
// will go into the URL_STRING container. The difference should be clear:
|
||
// URL instances look, but don't touch. URL_STRINGs are used solely to build urls.
|
||
|
||
|
||
class URL
|
||
{
|
||
private:
|
||
PCWSTR _pszUrl, _pszWork;
|
||
URL_SCHEME _eScheme;
|
||
DWORD _dwSchemeNotes, _dwFlags;
|
||
BOOL _fPathCompressionOn;
|
||
BOOL _fIgnoreQuery;
|
||
|
||
WCHAR SmallForm(WCHAR wch);
|
||
BOOL IsAlpha(WCHAR ch);
|
||
PCWSTR IsUrlPrefix(PCWSTR psz);
|
||
BOOL IsLocalDrive(PCWSTR psz);
|
||
BOOL IsQualifiedDrive(PCWSTR psz);
|
||
BOOL DetectSymbols(WCHAR wch1, WCHAR wch2 = '\0', WCHAR wch3 = '\0');
|
||
|
||
PCWSTR NextChar(PCWSTR psz);
|
||
PCWSTR FeedUntil(PCWSTR psz, URL_STRING* pus, WCHAR wchDelim1 = '\0', WCHAR wchDelim2 = '\0', WCHAR wchDelim3 = '\0', WCHAR wchDelim4 = '\0');
|
||
|
||
BOOL DetectFileServer();
|
||
BOOL DetectMkServer();
|
||
BOOL DefaultDetectServer();
|
||
VOID FeedDefaultServer(URL_STRING* pus);
|
||
VOID FeedFileServer(URL_STRING* pus);
|
||
VOID FeedFtpServer(URL_STRING* pus);
|
||
VOID FeedHttpServer(URL_STRING* pus);
|
||
VOID FeedMkServer(URL_STRING* pus);
|
||
PCWSTR FeedPort(PCWSTR psz, URL_STRING* pus);
|
||
|
||
public:
|
||
VOID Setup(PCWSTR pszInUrl, DWORD a_dwFlags = 0);
|
||
VOID Reset();
|
||
BOOL IsReset();
|
||
|
||
BOOL DetectAndFeedScheme(URL_STRING* pus, BOOL fReconcileSchemes = FALSE);
|
||
VOID SetScheme(URL_SCHEME eScheme, DWORD dwFlag);
|
||
URL_SCHEME GetScheme();
|
||
VOID AddSchemeNote(DWORD dwFlag);
|
||
DWORD GetSchemeNotes();
|
||
|
||
BOOL DetectServer();
|
||
BOOL DetectAbsolutePath();
|
||
BOOL DetectPath();
|
||
BOOL DetectQueryOrFragment();
|
||
BOOL DetectQuery();
|
||
BOOL DetectLocalDrive();
|
||
BOOL DetectSlash();
|
||
BOOL DetectAnything();
|
||
WCHAR PeekNext();
|
||
|
||
VOID FeedPath(URL_STRING* pus, BOOL fMarkServer = TRUE);
|
||
PCWSTR CopySegment(PCWSTR psz, URL_STRING* pus, BOOL* pfContinue);
|
||
DWORD DetectDots(PCWSTR* ppsz);
|
||
VOID StopPathCompression();
|
||
|
||
VOID FeedServer(URL_STRING* pus);
|
||
VOID FeedLocalDrive(URL_STRING* pus);
|
||
VOID FeedQueryAndFragment(URL_STRING* pus);
|
||
VOID IgnoreQuery();
|
||
};
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
VOID URL::Setup(PCWSTR pszInUrl, DWORD a_dwFlags)
|
||
{
|
||
while (*pszInUrl && IsWhite(*pszInUrl))
|
||
{
|
||
pszInUrl++;
|
||
}
|
||
_pszWork = _pszUrl = pszInUrl;
|
||
_eScheme = URL_SCHEME_UNKNOWN;
|
||
_dwSchemeNotes = 0;
|
||
_dwFlags = a_dwFlags;
|
||
_fPathCompressionOn = TRUE;
|
||
_fIgnoreQuery = FALSE;
|
||
}
|
||
|
||
VOID URL::Reset()
|
||
{
|
||
_pszWork = wszBogus;
|
||
}
|
||
|
||
BOOL URL::IsReset()
|
||
{
|
||
return (_pszWork==wszBogus);
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
inline WCHAR URL::SmallForm(WCHAR wch)
|
||
{
|
||
return (wch < L'A' || wch > L'Z') ? wch : (wch - L'A' + L'a');
|
||
}
|
||
|
||
inline BOOL URL::IsAlpha(WCHAR ch)
|
||
{
|
||
return ((ch >= 'a') && (ch <= 'z'))
|
||
||
|
||
((ch >= 'A') && (ch <= 'Z'));
|
||
}
|
||
|
||
|
||
inline PCWSTR URL::IsUrlPrefix(PCWSTR psz)
|
||
{
|
||
// We want to skip instances of "URL:"
|
||
psz = NextChar(psz);
|
||
if (*psz==L'u' || *psz==L'U')
|
||
{
|
||
psz = NextChar(psz+1);
|
||
if (*psz==L'r' || *psz==L'R')
|
||
{
|
||
psz = NextChar(psz+1);
|
||
if (*psz==L'l' || *psz==L'L')
|
||
{
|
||
psz = NextChar(psz+1);
|
||
if (*psz==COLON)
|
||
{
|
||
return NextChar(psz+1);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
inline BOOL URL::IsLocalDrive(PCWSTR psz)
|
||
{
|
||
psz = NextChar(psz);
|
||
return (IsAlpha(*psz)
|
||
&&
|
||
((*NextChar(psz+1)==COLON) || (*NextChar(psz+1)==BAR)));
|
||
}
|
||
|
||
// -- IsQualifiedDrive --------
|
||
// On Win32 systems, a qualified drive is either
|
||
// i. <letter>: or ii. \\UNC\
|
||
// Under unix, it's only /.
|
||
|
||
inline BOOL URL::IsQualifiedDrive(PCWSTR psz)
|
||
{
|
||
psz = NextChar(psz);
|
||
BOOL fResult = IsLocalDrive(psz);
|
||
if (!fResult && *psz==WHACK)
|
||
{
|
||
psz = NextChar(psz+1);
|
||
fResult = *psz==WHACK;
|
||
}
|
||
return fResult;
|
||
}
|
||
|
||
// -- DetectSymbols -------------
|
||
// This is used to help determine what part of the URL we have reached.
|
||
inline BOOL URL::DetectSymbols(WCHAR wch1, WCHAR wch2, WCHAR wch3)
|
||
{
|
||
ASSERT(_pszWork);
|
||
PCWSTR psz = NextChar(_pszWork);
|
||
return (*psz && (*psz==wch1 || *psz==wch2 || *psz==wch3));
|
||
}
|
||
|
||
BOOL URL::DetectSlash()
|
||
{
|
||
return DetectSymbols(SLASH, WHACK);
|
||
}
|
||
|
||
BOOL URL::DetectAnything()
|
||
{
|
||
return (*NextChar(_pszWork)!=L'\0');
|
||
}
|
||
|
||
// -- NextChar -------------------------------------
|
||
// We use NextChar instead of *psz because we want to
|
||
// ignore characters such as TAB, CR, etc.
|
||
inline PCWSTR URL::NextChar(PCWSTR psz)
|
||
{
|
||
while (IsInsignificantWhite(*psz))
|
||
{
|
||
psz++;
|
||
}
|
||
return psz;
|
||
}
|
||
|
||
WCHAR URL::PeekNext()
|
||
{
|
||
return (*NextChar(NextChar(_pszWork)+1));
|
||
}
|
||
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
inline PCWSTR URL::FeedUntil(PCWSTR psz, URL_STRING* pus, WCHAR wchDelim1, WCHAR wchDelim2, WCHAR wchDelim3, WCHAR wchDelim4)
|
||
{
|
||
psz = NextChar(psz);
|
||
while (*psz && *psz!=wchDelim1 && *psz!=wchDelim2 && *psz!=wchDelim3 && *psz!=wchDelim4)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
return psz;
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
VOID URL::SetScheme(URL_SCHEME eScheme, DWORD dwFlag)
|
||
{
|
||
_eScheme = eScheme;
|
||
_dwSchemeNotes = dwFlag;
|
||
}
|
||
|
||
URL_SCHEME URL::GetScheme()
|
||
{
|
||
return _eScheme;
|
||
}
|
||
|
||
VOID URL::AddSchemeNote(DWORD dwFlag)
|
||
{
|
||
_dwSchemeNotes |= dwFlag;
|
||
}
|
||
|
||
DWORD URL::GetSchemeNotes()
|
||
{
|
||
return _dwSchemeNotes;
|
||
}
|
||
|
||
BOOL URL::DetectAndFeedScheme(URL_STRING* pus, BOOL fReconcileSchemes)
|
||
{
|
||
ASSERT(_pszWork);
|
||
ASSERT(!fReconcileSchemes || (fReconcileSchemes && pus->QueryScheme()!=URL_SCHEME_FILE));
|
||
|
||
PCWSTR psz = NextChar(_pszWork);
|
||
BOOL fResult = (IsQualifiedDrive(_pszWork));
|
||
if (fResult)
|
||
{
|
||
//
|
||
// Detected a File URL that isn't explicitly marked as such, ie C:\foo,
|
||
// in this case, we need to confirm that we're not overwriting
|
||
// a fully qualified relative URL with an Accept("file:"), although
|
||
// if the relative URL is the same scheme as the base, we now
|
||
// need to make the BASE-file URL take precedence.
|
||
//
|
||
|
||
_eScheme = URL_SCHEME_FILE;
|
||
|
||
if (!fReconcileSchemes)
|
||
{
|
||
pus->Accept((PWSTR)c_szFileScheme);
|
||
pus->Accept(COLON);
|
||
_dwSchemeNotes = g_mpUrlSchemeTypes[1].dwFlags;
|
||
pus->NoteScheme(_eScheme, _dwSchemeNotes);
|
||
pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
|
||
}
|
||
else if (pus->QueryScheme() != URL_SCHEME_FILE)
|
||
{
|
||
Reset();
|
||
}
|
||
|
||
goto exit;
|
||
}
|
||
|
||
for (;;)
|
||
{
|
||
while (IsValidSchemeCharW(*psz))
|
||
{
|
||
psz = NextChar(psz + 1);
|
||
}
|
||
if (*psz!=COLON)
|
||
{
|
||
break;
|
||
}
|
||
if (IsUrlPrefix(_pszWork))
|
||
{
|
||
// However, we want to skip instances of URL:
|
||
_pszWork = psz = NextChar(psz+1);
|
||
continue;
|
||
}
|
||
|
||
DWORD ccScheme = 0;
|
||
PCWSTR pszClone = NextChar(_pszWork);
|
||
|
||
if (!fReconcileSchemes)
|
||
{
|
||
while (pszClone<=psz)
|
||
{
|
||
pus->Accept(SmallForm(*pszClone));
|
||
ccScheme++;
|
||
pszClone = NextChar(pszClone+1);
|
||
}
|
||
_pszWork = pszClone;
|
||
// Subtract one for the colon
|
||
ccScheme--;
|
||
// BUG BUG Since we're smallifying the scheme above, we might be able to
|
||
// avoid calling this func, call GetSchemeTypeAndFlags instead.
|
||
_eScheme = GetSchemeTypeAndFlagsSpecialW(pus->GetStart(), ccScheme, &_dwSchemeNotes);
|
||
pus->NoteScheme(_eScheme, _dwSchemeNotes);
|
||
}
|
||
else
|
||
{
|
||
PWSTR pszKnownScheme = pus->GetStart();
|
||
while (pszClone<=psz && SmallForm(*pszClone)==*pszKnownScheme)
|
||
{
|
||
pszClone = NextChar(pszClone+1);
|
||
pszKnownScheme++;
|
||
}
|
||
if (pszClone<=psz)
|
||
{
|
||
Reset();
|
||
}
|
||
else
|
||
{
|
||
_pszWork = pszClone;
|
||
}
|
||
}
|
||
fResult = TRUE;
|
||
break;
|
||
}
|
||
exit:
|
||
return fResult;
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
BOOL URL::DetectServer()
|
||
{
|
||
ASSERT(_pszWork);
|
||
BOOL fRet;
|
||
|
||
switch (_eScheme)
|
||
{
|
||
case URL_SCHEME_FILE:
|
||
fRet = DetectFileServer();
|
||
break;
|
||
|
||
case URL_SCHEME_MK:
|
||
fRet = DetectMkServer();
|
||
break;
|
||
|
||
default:
|
||
fRet = DefaultDetectServer();
|
||
break;
|
||
}
|
||
return fRet;
|
||
}
|
||
|
||
BOOL URL::DetectLocalDrive()
|
||
{
|
||
return IsLocalDrive(_pszWork);
|
||
}
|
||
|
||
BOOL URL::DetectFileServer()
|
||
{
|
||
ASSERT(_pszWork);
|
||
PCWSTR psz = _pszWork;
|
||
|
||
BOOL fResult = IsLocalDrive(_pszWork);
|
||
if (fResult)
|
||
{
|
||
_dwSchemeNotes |= UPF_FILEISPATHURL;
|
||
}
|
||
else
|
||
{
|
||
fResult = DetectSymbols(SLASH, WHACK);
|
||
}
|
||
return fResult;
|
||
}
|
||
|
||
BOOL URL::DetectMkServer()
|
||
{
|
||
ASSERT(_pszWork);
|
||
PCWSTR psz = NextChar(_pszWork);
|
||
BOOL fResult = (*psz==L'@');
|
||
if (fResult)
|
||
{
|
||
_pszWork = NextChar(psz + 1);
|
||
}
|
||
return fResult;
|
||
}
|
||
|
||
BOOL URL::DefaultDetectServer()
|
||
{
|
||
BOOL fResult = FALSE;
|
||
if (DetectSymbols(SLASH, WHACK))
|
||
{
|
||
PCWSTR psz = NextChar(_pszWork + 1);
|
||
fResult = ((*psz==SLASH) || (*psz==WHACK));
|
||
}
|
||
return fResult;
|
||
}
|
||
|
||
VOID URL::FeedServer(URL_STRING* pus)
|
||
{
|
||
ASSERT(_pszWork);
|
||
switch (_eScheme)
|
||
{
|
||
case URL_SCHEME_FILE:
|
||
FeedFileServer(pus);
|
||
break;
|
||
|
||
case URL_SCHEME_MK:
|
||
FeedMkServer(pus);
|
||
break;
|
||
|
||
case URL_SCHEME_FTP:
|
||
FeedFtpServer(pus);
|
||
break;
|
||
|
||
case URL_SCHEME_HTTP:
|
||
case URL_SCHEME_HTTPS:
|
||
FeedHttpServer(pus);
|
||
break;
|
||
|
||
default:
|
||
FeedDefaultServer(pus);
|
||
break;
|
||
}
|
||
}
|
||
|
||
VOID URL::FeedMkServer(URL_STRING* pus)
|
||
{
|
||
ASSERT(_pszWork);
|
||
pus->EnableMunging();
|
||
pus->Accept(L'@');
|
||
_pszWork = FeedUntil(_pszWork, pus, SLASH);
|
||
if (!*_pszWork)
|
||
{
|
||
pus->TrimEndWhiteSpace();
|
||
}
|
||
else
|
||
{
|
||
_pszWork = NextChar(_pszWork+1);
|
||
}
|
||
pus->Accept(SLASH);
|
||
}
|
||
|
||
VOID URL::FeedLocalDrive(URL_STRING* pus)
|
||
{
|
||
pus->Accept(*NextChar(_pszWork));
|
||
_pszWork = NextChar(_pszWork+1);
|
||
pus->Accept(*_pszWork);
|
||
_pszWork = NextChar(_pszWork+1);
|
||
pus->DisableMunging();
|
||
}
|
||
|
||
VOID URL::FeedFileServer(URL_STRING* pus)
|
||
{
|
||
PCWSTR psz = NextChar(_pszWork);
|
||
|
||
while (*psz==SLASH || *psz==WHACK)
|
||
{
|
||
psz = NextChar(psz+1);
|
||
}
|
||
|
||
DWORD dwSlashes = (DWORD)(psz - _pszWork);
|
||
switch (dwSlashes)
|
||
{
|
||
case 4:
|
||
pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
|
||
_dwSchemeNotes |= UPF_FILEISPATHURL;
|
||
// 4 to 6 slashes == 1 UNC
|
||
case 2:
|
||
if (IsLocalDrive(psz))
|
||
{
|
||
pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
|
||
}
|
||
|
||
case 5:
|
||
case 6:
|
||
pus->Accept(SLASH);
|
||
pus->Accept(SLASH);
|
||
if (!IsLocalDrive(psz))
|
||
{
|
||
pus->EnableMunging();
|
||
psz = FeedUntil(psz, pus, SLASH, WHACK);
|
||
if (!*psz)
|
||
{
|
||
pus->TrimEndWhiteSpace();
|
||
Reset();
|
||
}
|
||
else
|
||
{
|
||
_pszWork = NextChar(psz+1);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
_pszWork = psz;
|
||
}
|
||
pus->Accept(SLASH);
|
||
break;
|
||
|
||
// If there are no slashes, then it can't be a UNC.
|
||
case 0:
|
||
if (IsLocalDrive(psz))
|
||
{
|
||
pus->AddFlagNote(URL_ESCAPE_PERCENT | URL_ESCAPE_UNSAFE);
|
||
}
|
||
|
||
|
||
// We think of "file:/" and "file:///" to be on the local machine
|
||
// And if there are more slashes than we typically handle, we'll treat them as 1.
|
||
case 1:
|
||
case 3:
|
||
// This is a not-good-case
|
||
default:
|
||
pus->Accept(SLASH);
|
||
pus->Accept(SLASH);
|
||
pus->Accept(SLASH);
|
||
_pszWork = NextChar(psz);
|
||
break;
|
||
}
|
||
}
|
||
|
||
|
||
VOID URL::FeedFtpServer(URL_STRING* pus)
|
||
{
|
||
ASSERT(_pszWork);
|
||
|
||
PCWSTR psz = NextChar(_pszWork);
|
||
|
||
if (*psz==WHACK || *psz==SLASH)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
if (*psz==WHACK || *psz==SLASH)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
|
||
pus->EnableMunging();
|
||
|
||
// The following is a grotesque and gruesome hack. We need to preserve case for
|
||
// embedded username/password
|
||
|
||
_pszWork = psz;
|
||
|
||
BOOL fPossibleUserPasswordCombo = FALSE;
|
||
while (*psz && *psz!=SLASH && *psz!=POUND && *psz!=QUERY)
|
||
{
|
||
if (*psz==L'@')
|
||
{
|
||
fPossibleUserPasswordCombo = TRUE;
|
||
break;
|
||
}
|
||
psz = NextChar(psz+1);
|
||
}
|
||
|
||
psz = _pszWork;
|
||
if (fPossibleUserPasswordCombo)
|
||
{
|
||
while (*psz!=L'@')
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
}
|
||
|
||
// This still leaves the issue of slashes, colons, ?s, @s, and #s in passwords; I guess they
|
||
// ought to be escaped. (You just can't win, sometimes.)
|
||
|
||
while (*psz && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND)
|
||
{
|
||
pus->Accept(SmallForm(*psz));
|
||
psz = NextChar(psz+1);
|
||
}
|
||
|
||
if (*psz==COLON)
|
||
{
|
||
psz = FeedPort(psz, pus);
|
||
}
|
||
pus->DisableMunging();
|
||
|
||
_pszWork = psz;
|
||
if (!*psz)
|
||
{
|
||
pus->TrimEndWhiteSpace();
|
||
pus->Accept(SLASH);
|
||
}
|
||
else
|
||
{
|
||
if (*psz==QUERY || *psz==POUND)
|
||
{
|
||
pus->Accept(SLASH);
|
||
}
|
||
else
|
||
{
|
||
pus->Accept(*psz);
|
||
_pszWork = NextChar(psz+1);
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
VOID URL::FeedHttpServer(URL_STRING* pus)
|
||
{
|
||
// This is a version of FeedDefaultServer, stripped of non-essentials.
|
||
// This includes a hack to enable username/password combos in http urls.
|
||
|
||
ASSERT(_pszWork);
|
||
|
||
PCWSTR psz = NextChar(_pszWork);
|
||
|
||
if (*psz==WHACK || *psz==SLASH)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
if (*psz==WHACK || *psz==SLASH)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
|
||
pus->EnableMunging();
|
||
|
||
// WARNING! FeedPort also calls Mark(). Must be careful that they don't overlap.
|
||
pus->Mark();
|
||
PCWSTR pszRestart = psz;
|
||
|
||
while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND && *psz!=AT)
|
||
{
|
||
pus->Accept(SmallForm(*psz));
|
||
psz = NextChar(psz+1);
|
||
}
|
||
|
||
if (*psz==COLON)
|
||
{
|
||
// We either have a port or a password.
|
||
PCWSTR pszPort = psz;
|
||
do
|
||
{
|
||
psz = NextChar(psz+1);
|
||
}
|
||
while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND && *psz!=AT);
|
||
if (*psz!=AT)
|
||
{
|
||
psz = FeedPort(pszPort, pus);
|
||
}
|
||
}
|
||
|
||
if (*psz==AT)
|
||
{
|
||
// We've hit a username/password combo. So we have to undo our case-changing
|
||
psz = pszRestart;
|
||
pus->EraseMarkedText();
|
||
while (*psz!=AT)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
|
||
// Now we carry on as before
|
||
while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND)
|
||
{
|
||
pus->Accept(SmallForm(*psz));
|
||
psz = NextChar(psz+1);
|
||
}
|
||
if (*psz==COLON)
|
||
{
|
||
psz = FeedPort(psz, pus);
|
||
}
|
||
}
|
||
|
||
pus->ClearMark();
|
||
pus->DisableMunging();
|
||
|
||
_pszWork = psz;
|
||
if (!*psz)
|
||
{
|
||
pus->TrimEndWhiteSpace();
|
||
if ((_eScheme!=URL_SCHEME_UNKNOWN) && !(_dwSchemeNotes & UPF_SCHEME_OPAQUE))
|
||
{
|
||
pus->Accept(SLASH);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (*psz==QUERY || *psz==POUND)
|
||
{
|
||
pus->Accept(SLASH);
|
||
}
|
||
else
|
||
{
|
||
pus->Accept(*psz);
|
||
_pszWork = NextChar(psz+1);
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
VOID URL::FeedDefaultServer(URL_STRING* pus)
|
||
{
|
||
ASSERT(_pszWork);
|
||
|
||
PCWSTR psz = NextChar(_pszWork);
|
||
if (!(_dwSchemeNotes & UPF_SCHEME_INTERNET))
|
||
{
|
||
pus->DisableSlashFixing();
|
||
}
|
||
|
||
if (*psz==WHACK || *psz==SLASH)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
if (*psz==WHACK || *psz==SLASH)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
if (_dwSchemeNotes & UPF_SCHEME_INTERNET)
|
||
{
|
||
pus->EnableMunging();
|
||
|
||
while (*psz && *psz!=WHACK && *psz!=SLASH && *psz!=COLON && *psz!=QUERY && *psz!=POUND)
|
||
{
|
||
pus->Accept(SmallForm(*psz));
|
||
psz = NextChar(psz+1);
|
||
}
|
||
if (*psz==COLON)
|
||
{
|
||
psz = FeedPort(psz, pus);
|
||
}
|
||
pus->DisableMunging();
|
||
}
|
||
else
|
||
{
|
||
while (*psz && *psz!=SLASH)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
}
|
||
_pszWork = psz;
|
||
if (!*psz)
|
||
{
|
||
pus->TrimEndWhiteSpace();
|
||
if ((_eScheme!=URL_SCHEME_UNKNOWN) && !(_dwSchemeNotes & UPF_SCHEME_OPAQUE))
|
||
{
|
||
pus->Accept(SLASH);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (*psz==QUERY || *psz==POUND)
|
||
{
|
||
pus->Accept(SLASH);
|
||
}
|
||
else
|
||
{
|
||
pus->Accept(*psz);
|
||
_pszWork = NextChar(psz+1);
|
||
}
|
||
}
|
||
}
|
||
|
||
PCWSTR URL::FeedPort(PCWSTR psz, URL_STRING* pus)
|
||
{
|
||
BOOL fIgnorePort = FALSE;
|
||
pus->Mark();
|
||
psz = FeedUntil(psz, pus, SLASH, WHACK, POUND, QUERY);
|
||
|
||
if (!(_dwFlags & URL_DONT_SIMPLIFY))
|
||
{
|
||
// Here, decide whether or not to ignore the port
|
||
// FEATURE we should actually be getting this from
|
||
// the services file to find out the default protocol port
|
||
// but we dont think that most people will change them - zekel 17-Dec-96
|
||
switch(_eScheme)
|
||
{
|
||
case URL_SCHEME_HTTP:
|
||
if (pus->CompareMarkWith(L":80")==0)
|
||
fIgnorePort = TRUE;
|
||
break;
|
||
|
||
case URL_SCHEME_HTTPS:
|
||
if (pus->CompareMarkWith(L":443")==0)
|
||
fIgnorePort = TRUE;
|
||
break;
|
||
|
||
case URL_SCHEME_FTP:
|
||
if (pus->CompareMarkWith(L":21")==0)
|
||
fIgnorePort = TRUE;
|
||
break;
|
||
|
||
case URL_SCHEME_GOPHER:
|
||
if (pus->CompareMarkWith(L":70")==0)
|
||
fIgnorePort = TRUE;
|
||
break;
|
||
}
|
||
}
|
||
if (fIgnorePort)
|
||
{
|
||
pus->EraseMarkedText();
|
||
}
|
||
else
|
||
{
|
||
pus->ClearMark();
|
||
}
|
||
return psz;
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
BOOL URL::DetectAbsolutePath()
|
||
{
|
||
BOOL fResult = FALSE;
|
||
if (_dwSchemeNotes & UPF_SCHEME_OPAQUE)
|
||
{
|
||
fResult = TRUE;
|
||
}
|
||
else if (DetectSymbols(SLASH, WHACK))
|
||
{
|
||
fResult = TRUE;
|
||
_pszWork = NextChar(_pszWork+1);
|
||
}
|
||
return fResult;
|
||
}
|
||
|
||
BOOL URL::DetectPath()
|
||
{
|
||
return (*NextChar(_pszWork) && !DetectSymbols(QUERY, POUND));
|
||
}
|
||
|
||
VOID URL::FeedPath(URL_STRING* pus, BOOL fMarkServer)
|
||
{
|
||
ASSERT(_pszWork);
|
||
PCWSTR psz = NextChar(_pszWork);
|
||
if (fMarkServer)
|
||
{
|
||
pus->Mark();
|
||
}
|
||
if (_dwSchemeNotes & UPF_SCHEME_OPAQUE)
|
||
{
|
||
_pszWork = FeedUntil(psz, pus);
|
||
pus->TrimEndWhiteSpace();
|
||
}
|
||
else
|
||
{
|
||
DWORD cDots;
|
||
BOOL fContinue = TRUE;
|
||
do
|
||
{
|
||
cDots = 0;
|
||
PCWSTR pszTmp = psz;
|
||
if (_fPathCompressionOn)
|
||
{
|
||
cDots = DetectDots(&psz);
|
||
}
|
||
|
||
if (cDots)
|
||
{
|
||
if (cDots==2)
|
||
{
|
||
pus->Contract();
|
||
}
|
||
continue;
|
||
}
|
||
psz = CopySegment(pszTmp, pus, &fContinue);
|
||
}
|
||
while (fContinue);
|
||
_pszWork = psz;
|
||
if (!*_pszWork)
|
||
{
|
||
pus->TrimEndWhiteSpace();
|
||
}
|
||
}
|
||
}
|
||
|
||
// pfContinue indicates whether there's anything following that would
|
||
// be of relevance to a path
|
||
PCWSTR URL::CopySegment(PCWSTR psz, URL_STRING* pus, BOOL* pfContinue)
|
||
{
|
||
ASSERT(pfContinue);
|
||
BOOL fStop = FALSE;
|
||
psz = NextChar(psz);
|
||
while (!fStop)
|
||
{
|
||
switch (*psz)
|
||
{
|
||
case POUND:
|
||
if (_eScheme==URL_SCHEME_FILE)
|
||
{
|
||
// Since #s are valid for dos paths, we have to accept them except
|
||
// for when they follow a .htm/.html file (See FindFragmentA/W)
|
||
// However, some inconsistencies may still arise...
|
||
for (DWORD i=0; i < ARRAYSIZE(ExtTable); i++)
|
||
{
|
||
if (!pus->CompareLast(ExtTable[i].wszExt, ExtTable[i].cchExt))
|
||
break;
|
||
}
|
||
// If we haven't found a matching file extension, we'll treat as a filename character.
|
||
if (i==ARRAYSIZE(ExtTable))
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
break;
|
||
}
|
||
}
|
||
goto next;
|
||
|
||
case QUERY:
|
||
// We're going to support query as a legitimate character in file urls.
|
||
// *sigh*
|
||
if (_eScheme==URL_SCHEME_FILE)
|
||
{
|
||
if (_fIgnoreQuery)
|
||
{
|
||
psz = wszBogus;
|
||
}
|
||
else
|
||
{
|
||
pus->CleanAccept(*psz);
|
||
psz = NextChar(psz+1);
|
||
break;
|
||
}
|
||
}
|
||
case L'\0':
|
||
next:
|
||
*pfContinue = FALSE;
|
||
fStop = TRUE;
|
||
break;
|
||
|
||
case SLASH:
|
||
case WHACK:
|
||
fStop = TRUE;
|
||
// fall through
|
||
|
||
default:
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
break;
|
||
}
|
||
}
|
||
return psz;
|
||
}
|
||
|
||
DWORD URL::DetectDots(PCWSTR* ppsz)
|
||
{
|
||
PCWSTR psz;
|
||
if (ppsz)
|
||
{
|
||
psz = *ppsz;
|
||
}
|
||
else
|
||
{
|
||
psz = NextChar(_pszWork);
|
||
}
|
||
|
||
DWORD cDots = 0;
|
||
if (*psz==DOT)
|
||
{
|
||
psz = NextChar(psz+1);
|
||
cDots++;
|
||
if (*psz==DOT)
|
||
{
|
||
psz = NextChar(psz+1);
|
||
cDots++;
|
||
}
|
||
switch (*psz)
|
||
{
|
||
case WHACK:
|
||
if (_eScheme==URL_SCHEME_MK)
|
||
{
|
||
cDots = 0;
|
||
}
|
||
|
||
case SLASH:
|
||
psz = NextChar(psz+1);
|
||
break;
|
||
|
||
case QUERY:
|
||
case POUND:
|
||
case L'\0':
|
||
break;
|
||
default:
|
||
cDots = 0;
|
||
break;
|
||
}
|
||
}
|
||
if (ppsz)
|
||
{
|
||
*ppsz = psz;
|
||
}
|
||
return cDots;
|
||
}
|
||
|
||
VOID URL::StopPathCompression()
|
||
{
|
||
_fPathCompressionOn = FALSE;
|
||
}
|
||
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
BOOL URL::DetectQueryOrFragment()
|
||
{
|
||
return (DetectSymbols(QUERY, POUND));
|
||
}
|
||
|
||
BOOL URL::DetectQuery()
|
||
{
|
||
return (DetectSymbols(QUERY));
|
||
}
|
||
|
||
VOID URL::IgnoreQuery()
|
||
{
|
||
ASSERT(_eScheme==URL_SCHEME_FILE);
|
||
_fIgnoreQuery = TRUE;
|
||
}
|
||
|
||
VOID URL::FeedQueryAndFragment(URL_STRING* pus)
|
||
{
|
||
ASSERT(_pszWork);
|
||
if (_dwSchemeNotes & UPF_SCHEME_OPAQUE)
|
||
{
|
||
PCWSTR psz = NextChar(_pszWork);
|
||
while (*psz)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
_pszWork = psz;
|
||
return;
|
||
}
|
||
|
||
PCWSTR psz = NextChar(_pszWork);
|
||
|
||
// This is okay since *psz must equal { ? | # }
|
||
if (*psz==QUERY)
|
||
{
|
||
pus->CleanAccept(QUERY);
|
||
}
|
||
|
||
// By munging, I mean taking an URL of form http://a/b#c?d and producing http://a/b?d#c
|
||
// We do this by default; however, we won't do this when we've been passed a fragment only
|
||
// as a relative url
|
||
|
||
// Query's always override.
|
||
|
||
if (*psz==QUERY)
|
||
{
|
||
pus->DropQuery();
|
||
pus->NotifyQuery();
|
||
pus->EnableMunging();
|
||
|
||
psz = NextChar(psz+1);
|
||
while (*psz)
|
||
{
|
||
if (*psz==POUND)
|
||
{
|
||
pus->NotifyFragment();
|
||
}
|
||
else
|
||
{
|
||
pus->Accept(*psz);
|
||
}
|
||
psz = NextChar(psz+1);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
// This line of code will determine whether we've been passed a fragment for a relative url
|
||
// For properly formed base urls, this won't matter.
|
||
BOOL fMunge = psz!=NextChar(_pszUrl);
|
||
|
||
pus->DropFragment();
|
||
pus->NotifyFragment();
|
||
pus->EnableMunging();
|
||
|
||
psz = NextChar(psz+1);
|
||
|
||
// The following line is bogus. It just keeps going until the end. Not good.
|
||
// We MAY or MAY NOT fix this, depending on how much people scream at me.
|
||
// This may be an issue for Netscape compatibility.
|
||
|
||
// What we could do is: when either query or fragment would be blank, preserve as is.
|
||
// This would minimise breaking compatibility across the board.
|
||
// -- AKABIR, 09/28/98
|
||
while ((*psz==QUERY && !fMunge) || *psz)
|
||
{
|
||
if (*psz==QUERY)
|
||
{
|
||
pus->CleanAccept(QUERY);
|
||
}
|
||
else
|
||
{
|
||
pus->Accept(*psz);
|
||
}
|
||
psz = NextChar(psz+1);
|
||
}
|
||
|
||
if (*psz==QUERY)
|
||
{
|
||
pus->DropFragment();
|
||
pus->NotifyQuery();
|
||
pus->CleanAccept(*psz);
|
||
psz = NextChar(psz+1);
|
||
while (*psz)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
pus->TrimEndWhiteSpace();
|
||
|
||
pus->NotifyFragment();
|
||
psz = NextChar(_pszWork);
|
||
pus->CleanAccept(*psz);
|
||
psz = NextChar(psz+1);
|
||
while (*psz!=QUERY)
|
||
{
|
||
pus->Accept(*psz);
|
||
psz = NextChar(psz+1);
|
||
}
|
||
}
|
||
}
|
||
pus->TrimEndWhiteSpace();
|
||
pus->ClearMark();
|
||
}
|
||
|
||
// -------------------------------------------------------------------------------
|
||
|
||
HRESULT
|
||
BlendUrls(URL& urlBase, URL& urlRelative, URL_STRING* pusOut, DWORD dwFlags)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
|
||
// -- SCHEME --------------------------------------------------------------------------
|
||
// Examine each url's scheme.
|
||
// We won't continue to use urlBase IF
|
||
// 1. their tokenized schemes are not identical
|
||
// 2. the scheme is a file
|
||
// 3. the actual string schemes are not identical
|
||
|
||
// this checks to make sure that these are the same scheme, and
|
||
// that the scheme is allowed to be used in relative URLs
|
||
// file: is not allowed to because of weirdness with drive letters
|
||
// and \\UNC\shares
|
||
|
||
BOOL fBaseServerDetected = FALSE, fRelativeServerDetected = FALSE;
|
||
BOOL fDetectAbsoluteRelPath = FALSE;
|
||
BOOL fDetectedRelScheme = urlRelative.DetectAndFeedScheme(pusOut);
|
||
BOOL fDetectedBaseScheme = FALSE;
|
||
if (fDetectedRelScheme
|
||
&& ((pusOut->QueryScheme()==URL_SCHEME_FILE)
|
||
|| (urlRelative.GetSchemeNotes() & UPF_SCHEME_OPAQUE)))
|
||
{
|
||
urlBase.Reset();
|
||
}
|
||
else if ((fDetectedBaseScheme = urlBase.DetectAndFeedScheme(pusOut, fDetectedRelScheme)))
|
||
{
|
||
if (!fDetectedRelScheme)
|
||
{
|
||
urlRelative.SetScheme(urlBase.GetScheme(), urlBase.GetSchemeNotes());
|
||
}
|
||
}
|
||
|
||
// We fall back on the original parser for those cases we don't handle yet.
|
||
// (dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)
|
||
if (((pusOut->QueryScheme()==URL_SCHEME_FILE)
|
||
|| (!(fDetectedRelScheme || fDetectedBaseScheme)))
|
||
&& ((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)))
|
||
{
|
||
hr = E_FAIL;
|
||
goto exit;
|
||
}
|
||
|
||
|
||
if ((pusOut->QueryScheme()==URL_SCHEME_UNKNOWN))
|
||
{
|
||
// BUG BUG For IE4 compat, we need to use the old parser. However
|
||
// if we're passed URL_PLUGGABLE_PROTOCOL, we'll use this parser.
|
||
|
||
if (!(dwFlags & URL_PLUGGABLE_PROTOCOL))
|
||
{
|
||
hr = E_FAIL;
|
||
goto exit;
|
||
}
|
||
|
||
urlRelative.StopPathCompression();
|
||
|
||
// Same schemes, so now we look at the base url to divine the opacity
|
||
if (urlBase.DetectAnything() && !urlBase.IsReset())
|
||
{
|
||
if (!urlBase.DetectSlash())
|
||
{
|
||
if (!urlRelative.DetectQueryOrFragment())
|
||
{
|
||
urlBase.Reset();
|
||
}
|
||
urlBase.AddSchemeNote(UPF_SCHEME_OPAQUE);
|
||
urlRelative.AddSchemeNote(UPF_SCHEME_OPAQUE);
|
||
pusOut->AddSchemeNote(UPF_SCHEME_OPAQUE);
|
||
}
|
||
}
|
||
else if (!urlRelative.DetectSlash())
|
||
{
|
||
// If urlBase is reset, that means the schemes are different,
|
||
// so we only have urlRelative to figure out opacity.
|
||
|
||
urlRelative.AddSchemeNote(UPF_SCHEME_OPAQUE);
|
||
pusOut->AddSchemeNote(UPF_SCHEME_OPAQUE);
|
||
}
|
||
}
|
||
else if (pusOut->QueryScheme()==URL_SCHEME_FTP)
|
||
{
|
||
// For ftp urls, we'll assume that we're being passed properly formed urls.
|
||
// Some ftp sites allow backslashes in their object filenames, so we should
|
||
// allow access to these. Also, domain passwords would otherwise need escaping.
|
||
pusOut->DisableSlashFixing();
|
||
}
|
||
|
||
if (dwFlags & URL_DONT_SIMPLIFY)
|
||
{
|
||
urlBase.StopPathCompression();
|
||
urlRelative.StopPathCompression();
|
||
}
|
||
|
||
// -- SERVER --------------------------------------------------------------------------
|
||
// Decide on the server to use.
|
||
// Question: if urlBase and UrlRelative have the same explicit server, isn't it pointless
|
||
// to continue looking at url base anyway?
|
||
|
||
pusOut->EnableMunging();
|
||
if (!(pusOut->GetSchemeNotes() & UPF_SCHEME_OPAQUE))
|
||
{
|
||
if (urlRelative.DetectServer()
|
||
&& !(urlBase.DetectServer() && (urlRelative.PeekNext()!=SLASH) && (urlRelative.PeekNext()!=WHACK)))
|
||
{
|
||
fRelativeServerDetected = TRUE;
|
||
urlRelative.FeedServer(pusOut);
|
||
urlBase.Reset();
|
||
}
|
||
else if (urlBase.DetectServer())
|
||
{
|
||
fBaseServerDetected = TRUE;
|
||
urlBase.FeedServer(pusOut);
|
||
}
|
||
}
|
||
|
||
// -- PATH ----------------------------------------------------------------------------
|
||
// Figure out the path
|
||
// If the relative url has a path, and it starts with a slash/whack, forget about the
|
||
// base's path and stuff. Otherwise, inherit the base and attach the relative
|
||
// Potential problem: when rel path is empty, we expect to knock of the last base segment
|
||
|
||
if (pusOut->QueryScheme()==URL_SCHEME_FILE)
|
||
{
|
||
// Hack for back compat
|
||
// If the relative url consists of a query string, we'll append that to
|
||
// our resultant url, rather than the base's query string
|
||
if (urlRelative.DetectQuery())
|
||
{
|
||
urlBase.IgnoreQuery();
|
||
}
|
||
else
|
||
{
|
||
|
||
BOOL fResult1 = urlRelative.DetectAbsolutePath();
|
||
BOOL fResult2 = urlRelative.DetectLocalDrive();
|
||
|
||
if (fResult2)
|
||
{
|
||
urlBase.Reset();
|
||
urlRelative.FeedLocalDrive(pusOut);
|
||
if (urlRelative.DetectAbsolutePath())
|
||
{
|
||
pusOut->Accept(SLASH);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (urlBase.DetectLocalDrive())
|
||
{
|
||
urlBase.FeedLocalDrive(pusOut);
|
||
if (fResult1)
|
||
{
|
||
pusOut->Accept(SLASH);
|
||
urlBase.Reset();
|
||
}
|
||
else if (urlBase.DetectAbsolutePath())
|
||
{
|
||
pusOut->Accept(SLASH);
|
||
}
|
||
}
|
||
else if (fResult1)
|
||
{
|
||
if (fRelativeServerDetected)
|
||
{
|
||
pusOut->Accept(SLASH);
|
||
}
|
||
urlBase.Reset();
|
||
}
|
||
}
|
||
}
|
||
}
|
||
else if (pusOut->QueryScheme()==URL_SCHEME_UNKNOWN)
|
||
{
|
||
if (pusOut->GetSchemeNotes() & UPF_SCHEME_OPAQUE)
|
||
{
|
||
if (!urlRelative.DetectAnything())
|
||
{
|
||
urlRelative.Reset();
|
||
}
|
||
}
|
||
else
|
||
{
|
||
// This code fragment is for urls with unknown schemes, that are to be
|
||
// treated hierarchically. Note that the authority (which has been passed in
|
||
// already) is terminated with /, ?, or \0. The / is *optional*, and should be
|
||
// appended if and only if the urls being combined call for it.
|
||
if (urlBase.IsReset())
|
||
{
|
||
// At this point, we're examining only the relative url. We've been brought to
|
||
// a stop by the presence of /, ? or \0. So
|
||
if (urlRelative.DetectSlash() && !fDetectedRelScheme)
|
||
{
|
||
pusOut->Accept(SLASH);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
// In this case, we have both the relative and base urls to look at.
|
||
// What's the terminator for the base url
|
||
if ((urlRelative.DetectSlash()
|
||
|| (!urlBase.DetectAnything()
|
||
&& urlRelative.DetectAnything()
|
||
&& !urlRelative.DetectQuery()))
|
||
&& !fDetectedRelScheme)
|
||
{
|
||
pusOut->Accept(SLASH);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
pusOut->EnableMunging();
|
||
|
||
if ((fBaseServerDetected && (fDetectAbsoluteRelPath = urlRelative.DetectAbsolutePath())))
|
||
{
|
||
if (!fRelativeServerDetected)
|
||
{
|
||
pusOut->RestoreFlags();
|
||
}
|
||
if (fDetectAbsoluteRelPath && urlRelative.DetectDots(NULL))
|
||
{
|
||
urlRelative.StopPathCompression();
|
||
}
|
||
urlRelative.FeedPath(pusOut);
|
||
urlBase.Reset();
|
||
}
|
||
else if (urlBase.DetectPath())
|
||
{
|
||
urlBase.FeedPath(pusOut);
|
||
// We don't want to contract the base path's free segment if
|
||
// a. the scheme is opaque
|
||
// b. the relative url has a path
|
||
// c. the relative url has no path, just a fragment/query
|
||
if (!(urlBase.GetSchemeNotes() & UPF_SCHEME_OPAQUE))
|
||
{
|
||
pusOut->RestoreFlags();
|
||
|
||
if (urlRelative.DetectPath()
|
||
|| !urlRelative.DetectQueryOrFragment())
|
||
{
|
||
if (urlRelative.DetectPath() || !fDetectedRelScheme)
|
||
{
|
||
pusOut->Contract(FALSE);
|
||
}
|
||
if (fDetectedRelScheme)
|
||
{
|
||
urlRelative.StopPathCompression();
|
||
}
|
||
urlRelative.FeedPath(pusOut, FALSE);
|
||
urlBase.Reset();
|
||
}
|
||
else
|
||
{
|
||
urlRelative.FeedPath(pusOut, FALSE);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
urlRelative.StopPathCompression();
|
||
urlRelative.FeedPath(pusOut, FALSE);
|
||
}
|
||
}
|
||
else if (urlRelative.DetectPath())
|
||
{
|
||
if (!fRelativeServerDetected)
|
||
{
|
||
pusOut->RestoreFlags();
|
||
}
|
||
else if (urlRelative.DetectDots(NULL))
|
||
{
|
||
urlRelative.StopPathCompression();
|
||
}
|
||
urlRelative.FeedPath(pusOut);
|
||
urlBase.Reset();
|
||
}
|
||
pusOut->ClearMark();
|
||
|
||
pusOut->DisableSlashFixing();
|
||
// -- QUERY AND FRAGMENT -----------------------------------------------------------
|
||
// Figure out the query
|
||
if (urlBase.DetectQueryOrFragment())
|
||
{
|
||
urlBase.FeedQueryAndFragment(pusOut);
|
||
}
|
||
if (urlRelative.DetectQueryOrFragment())
|
||
{
|
||
urlRelative.FeedQueryAndFragment(pusOut);
|
||
}
|
||
pusOut->CleanAccept(L'\0');
|
||
|
||
if (pusOut->AnyProblems())
|
||
{
|
||
hr = E_OUTOFMEMORY;
|
||
}
|
||
exit:
|
||
return hr;
|
||
}
|
||
|
||
|
||
HRESULT
|
||
FormUrlCombineResultW(LPCWSTR pszBase,
|
||
LPCWSTR pszRelative,
|
||
LPWSTR pszCombined,
|
||
LPDWORD pcchCombined,
|
||
DWORD dwFlags)
|
||
{
|
||
if ((dwFlags & URL_ESCAPE_UNSAFE)
|
||
&& (dwFlags & URL_ESCAPE_SPACES_ONLY))
|
||
{
|
||
// In the original parser, ESCAPE_SPACES_ONLY takes precedence over ESCAPE_UNSAFE
|
||
// Deactivate UNSAFE
|
||
dwFlags ^= URL_ESCAPE_UNSAFE;
|
||
}
|
||
|
||
DWORD dwTempFlags = dwFlags;
|
||
if (dwFlags & URL_UNESCAPE)
|
||
{
|
||
if (dwFlags & URL_ESCAPE_UNSAFE)
|
||
{
|
||
dwTempFlags ^= URL_ESCAPE_UNSAFE;
|
||
}
|
||
if (dwFlags & URL_ESCAPE_SPACES_ONLY)
|
||
{
|
||
dwTempFlags ^= URL_ESCAPE_SPACES_ONLY;
|
||
}
|
||
}
|
||
|
||
// Make a copy of the relative url if the client wants to either
|
||
// a. unescape and escape the URL (since roundtripping is not guaranteed), or
|
||
// b. use the same location for relative URL's buffer for the combined url
|
||
HRESULT hr;
|
||
URL curlBase, curlRelative;
|
||
curlBase.Setup((PWSTR)pszBase);
|
||
curlRelative.Setup((PWSTR)pszRelative);
|
||
URL_STRING us(dwTempFlags);
|
||
|
||
hr = BlendUrls(curlBase, curlRelative, &us, dwTempFlags);
|
||
|
||
if (SUCCEEDED(hr))
|
||
{
|
||
DWORD ccBuffer = us.GetTotalLength();
|
||
if ((dwFlags & URL_UNESCAPE)
|
||
&& (dwFlags & (URL_ESCAPE_UNSAFE | URL_ESCAPE_SPACES_ONLY)))
|
||
{
|
||
// No need to strip out URL_UNESCAPE
|
||
hr = UrlEscapeW(us.GetStart(), pszCombined, pcchCombined, dwFlags);
|
||
goto exit;
|
||
}
|
||
if (ccBuffer > *pcchCombined)
|
||
{
|
||
hr = E_POINTER;
|
||
}
|
||
else if (pszCombined)
|
||
{
|
||
memcpy(pszCombined, us.GetStart(), ccBuffer*sizeof(WCHAR));
|
||
// We return only the number of characters, not buffer size required.
|
||
ccBuffer--;
|
||
}
|
||
*pcchCombined = ccBuffer;
|
||
}
|
||
else if (hr==E_FAIL)
|
||
{
|
||
// ASSERT(((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)));
|
||
|
||
// We fall back on the original parser for those cases we don't handle yet.
|
||
// We should do this if and only if the new parser
|
||
// doesn't handle the flags cited above
|
||
// or we're passed a pluggable protocol without the forcing flag.
|
||
SHSTRW strwOut;
|
||
hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
hr = ReconcileHresults(hr, CopyOutW(&strwOut, pszCombined, pcchCombined));
|
||
}
|
||
}
|
||
|
||
exit:
|
||
return hr;
|
||
}
|
||
|
||
HRESULT
|
||
FormUrlCombineResultA(LPCSTR pszBase,
|
||
LPCSTR pszRelative,
|
||
LPSTR pszCombined,
|
||
LPDWORD pcchCombined,
|
||
DWORD dwFlags)
|
||
{
|
||
if ((dwFlags & URL_ESCAPE_UNSAFE)
|
||
&&
|
||
(dwFlags & URL_ESCAPE_SPACES_ONLY))
|
||
{
|
||
// In the original parser, ESCAPE_SPACES_ONLY takes precedence over ESCAPE_UNSAFE
|
||
// Deactivate UNSAFE
|
||
dwFlags ^= URL_ESCAPE_UNSAFE;
|
||
}
|
||
|
||
// Make a copy of the relative url if the client wants to either
|
||
// a. unescape and escape the URL (since roundtripping is not guaranteed), or
|
||
// b. use the same location for relative URL's buffer for the combined url
|
||
SHSTRW strwBase;
|
||
SHSTRW strwRelative;
|
||
HRESULT hr;
|
||
|
||
if (!(SUCCEEDED(strwBase.SetStr(pszBase)) && SUCCEEDED(strwRelative.SetStr(pszRelative))))
|
||
{
|
||
return E_OUTOFMEMORY;
|
||
}
|
||
|
||
DWORD dwTempFlags = dwFlags;
|
||
if (dwFlags & URL_UNESCAPE)
|
||
{
|
||
if (dwFlags & URL_ESCAPE_UNSAFE)
|
||
{
|
||
dwTempFlags ^= URL_ESCAPE_UNSAFE;
|
||
}
|
||
if (dwFlags & URL_ESCAPE_SPACES_ONLY)
|
||
{
|
||
dwTempFlags ^= URL_ESCAPE_SPACES_ONLY;
|
||
}
|
||
}
|
||
|
||
URL curlBase, curlRelative;
|
||
curlBase.Setup(strwBase);
|
||
curlRelative.Setup(strwRelative);
|
||
URL_STRING us(dwTempFlags);
|
||
|
||
hr = BlendUrls(curlBase, curlRelative, &us, dwTempFlags);
|
||
|
||
if (SUCCEEDED(hr))
|
||
{
|
||
SHSTRA straOut;
|
||
if ((dwFlags & URL_UNESCAPE)
|
||
&& (dwFlags & (URL_ESCAPE_UNSAFE | URL_ESCAPE_SPACES_ONLY)))
|
||
{
|
||
SHSTRW strwTemp;
|
||
// No need to strip out URL_UNESCAPE
|
||
hr = SHUrlEscape(us.GetStart(), &strwTemp, dwFlags);
|
||
hr = ReconcileHresults(hr, straOut.SetStr(strwTemp));
|
||
}
|
||
else
|
||
{
|
||
hr = straOut.SetStr(us.GetStart());
|
||
}
|
||
if (SUCCEEDED(hr))
|
||
{
|
||
hr = CopyOutA(&straOut, pszCombined, pcchCombined);
|
||
}
|
||
}
|
||
else if (hr==E_FAIL)
|
||
{
|
||
// ASSERT(((dwFlags & URL_FILE_USE_PATHURL) || (dwFlags & URL_WININET_COMPATIBILITY)));
|
||
|
||
// We fall back on the original parser for those cases we don't handle yet.
|
||
// We should do this if and only if the new parser
|
||
// doesn't handle the flags cited above
|
||
SHSTRW strwOut;
|
||
|
||
hr = SHUrlParse(strwBase, strwRelative, &strwOut, dwFlags);
|
||
if (SUCCEEDED(hr))
|
||
{
|
||
SHSTRA straOut;
|
||
hr = ReconcileHresults(hr, straOut.SetStr(strwOut));
|
||
if(SUCCEEDED(hr))
|
||
hr = ReconcileHresults(hr, CopyOutA(&straOut, pszCombined, pcchCombined));
|
||
}
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
|
||
#ifdef PROOFREAD_PARSES
|
||
|
||
EXTERN_C DWORD g_dwProofMode;
|
||
|
||
enum
|
||
{
|
||
PP_COMPARE,
|
||
PP_ORIGINAL_ONLY,
|
||
PP_NEW_ONLY
|
||
};
|
||
|
||
//#define SHOW_MESSAGEBOX
|
||
|
||
VOID LogData(PWSTR pszMsg)
|
||
{
|
||
SHSTRA str;
|
||
str.SetStr(pszMsg);
|
||
CHAR szFileName[MAX_PATH];
|
||
DWORD dwSize = MAX_PATH;
|
||
CHAR szComputerName[MAX_PATH];
|
||
HANDLE hResultsFile = NULL;
|
||
strcpy(szFileName, "\\\\BANYAN\\IPTD\\AKABIR\\1315\\");
|
||
if (!GetComputerNameA(szComputerName, &dwSize))
|
||
{
|
||
goto exit;
|
||
}
|
||
lstrcatA(szFileName, szComputerName);
|
||
hResultsFile = CreateFileA( szFileName,
|
||
GENERIC_WRITE,
|
||
FILE_SHARE_WRITE | FILE_SHARE_READ,
|
||
NULL,
|
||
OPEN_ALWAYS,
|
||
0,
|
||
NULL);
|
||
if (INVALID_HANDLE_VALUE == hResultsFile)
|
||
hResultsFile = NULL;
|
||
if (hResultsFile)
|
||
{
|
||
if (SetFilePointer(hResultsFile, 0, NULL, FILE_END)==0xFFFFFFFF)
|
||
{
|
||
goto exit;
|
||
}
|
||
DWORD dwFoo;
|
||
if (0==WriteFile(hResultsFile, (PVOID)(PSTR)str, lstrlenW(pszMsg), &dwFoo, NULL))
|
||
{
|
||
DWORD dwE = GetLastError();
|
||
}
|
||
}
|
||
exit:
|
||
if (hResultsFile)
|
||
{
|
||
CloseHandle(hResultsFile);
|
||
}
|
||
}
|
||
|
||
HRESULT ProofreadParses(HRESULT hr,
|
||
LPCWSTR pszBase,
|
||
LPCWSTR pszRelative,
|
||
LPWSTR pszCombined,
|
||
PDWORD pcchCombined,
|
||
DWORD dwFlags,
|
||
DWORD dwSize
|
||
)
|
||
{
|
||
static WCHAR szLast[2084];
|
||
SHSTRW strwOut;
|
||
|
||
switch(g_dwProofMode)
|
||
{
|
||
case PP_COMPARE:
|
||
{
|
||
HRESULT hr2 = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
|
||
WCHAR wstr[2084];
|
||
DWORD ccLen = min(2084, dwSize), ccUrl = SUCCEEDED(hr) ? *pcchCombined : 0;
|
||
if(SUCCEEDED(hr2))
|
||
{
|
||
hr2 = CopyOutW(&strwOut, wstr, &ccLen);
|
||
if (hr2 == E_POINTER && hr == E_POINTER)
|
||
{
|
||
goto exitpoint;
|
||
}
|
||
// Check if cached combine equals the new parser's result
|
||
if (!StrCmpW(pszCombined, szLast))
|
||
{
|
||
goto exitpoint;
|
||
}
|
||
// Check if cached combine equals the old parser's result
|
||
if (!StrCmpW(wstr, szLast))
|
||
{
|
||
*pcchCombined = ccLen;
|
||
StrCpyNW(pszCombined, wstr, ccLen + 1);
|
||
hr = hr2;
|
||
goto exitpoint;
|
||
}
|
||
if (SUCCEEDED(hr))
|
||
{
|
||
StrCpyNW(szLast, wstr, ccLen);
|
||
|
||
if (!StrCmpW(wstr, pszCombined))
|
||
{
|
||
goto exitpoint;
|
||
}
|
||
|
||
DWORD dwBogus;
|
||
if ((dwFlags & URL_ESCAPE_SPACES_ONLY) && !(dwFlags & URL_UNESCAPE))
|
||
{
|
||
PCWSTR psz = FindSchemeW(pszCombined, &dwBogus);
|
||
DWORD dw;
|
||
if (psz
|
||
&&
|
||
(URL_SCHEME_UNKNOWN
|
||
!=GetSchemeTypeAndFlagsW(psz, dwBogus, &dw))
|
||
&&
|
||
(dw & UPF_SCHEME_OPAQUE))
|
||
{
|
||
goto exitpoint;
|
||
}
|
||
}
|
||
|
||
// Filter
|
||
// base: "http://foo/bar/"
|
||
// rel: ""
|
||
// old: "http://foo/bar"
|
||
// new: "http://foo/bar/"
|
||
if ((*pszRelative==L'\0')
|
||
&&
|
||
(!StrCmpNW(pszCombined, wstr, ccLen))
|
||
&&
|
||
(ccUrl==(ccLen+1))
|
||
&&
|
||
(pszCombined[ccLen]==L'/'))
|
||
{
|
||
goto exitpoint;
|
||
}
|
||
|
||
// Filter
|
||
// base: "http://foo/bar/what?ho"
|
||
// rel: ""
|
||
// old: "http://foo/bar/?ho"
|
||
// new: "http://foo/bar/"
|
||
if ((*pszRelative==L'\0')
|
||
&&
|
||
(!StrCmpNW(pszCombined, wstr, ccUrl))
|
||
&&
|
||
(wstr[ccUrl]==QUERY))
|
||
{
|
||
goto exitpoint;
|
||
}
|
||
|
||
// Filter
|
||
// base: "http://foo/bar/what?ho"
|
||
// rel: "/"
|
||
// old: "http://foo"
|
||
// new: "http://foo/"
|
||
if ((*pszRelative==L'/')
|
||
&&
|
||
(!StrCmpNW(pszCombined, wstr, ccLen))
|
||
&&
|
||
(ccUrl==(ccLen+1))
|
||
&&
|
||
(pszCombined[ccLen]==L'/'))
|
||
{
|
||
goto exitpoint;
|
||
}
|
||
|
||
WCHAR wmsg[8192];
|
||
wnsprintfW(wmsg,
|
||
ARRAYSIZE(wmsg),
|
||
L"Flags:%#x\nBase:\"%s\"\nRelative:\"%s\"\nOriginal result:\"%s\"\nNew result:\"%s\"\nUse original, not new, result?\n",
|
||
dwFlags,
|
||
pszBase,
|
||
pszRelative,
|
||
wstr,
|
||
pszCombined
|
||
);
|
||
#ifdef SHOW_MESSAGEBOX
|
||
if (IDYES==MessageBoxW(
|
||
NULL,
|
||
wmsg,
|
||
L"CONTACT AKABIR: URLCOMBINE FAILURE",
|
||
MB_YESNO | MB_ICONERROR | MB_TASKMODAL))
|
||
{
|
||
StrCpyNW(pszCombined, wstr, dwSize);
|
||
*pcchCombined = ccLen;
|
||
}
|
||
else
|
||
{
|
||
StrCpyNW(szLast, pszCombined, *pcchCombined);
|
||
}
|
||
#endif
|
||
LogData(wmsg);
|
||
}
|
||
else
|
||
{
|
||
WCHAR wmsg[8192];
|
||
wnsprintfW(wmsg,
|
||
ARRAYSIZE(wmsg),
|
||
L"Flags:%#x\nBase:\"%s\"\nRelative:\"%s\"\nFAILED:%#x\nExpected:\"%s\"\n",
|
||
dwFlags,
|
||
pszBase,
|
||
pszRelative,
|
||
hr,
|
||
wstr);
|
||
#ifdef SHOW_MESSAGEBOX
|
||
MessageBoxW(
|
||
NULL,
|
||
wmsg,
|
||
L"CONTACT AKABIR: URLCOMBINE FAILURE",
|
||
MB_OK | MB_ICONERROR | MB_TASKMODAL);
|
||
#endif
|
||
StrCpyNW(pszCombined, wstr, dwSize);
|
||
*pcchCombined = ccLen;
|
||
LogData(wmsg);
|
||
}
|
||
hr = hr2;
|
||
}
|
||
}
|
||
break;
|
||
|
||
case PP_NEW_ONLY:
|
||
break;
|
||
|
||
case PP_ORIGINAL_ONLY:
|
||
{
|
||
hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
hr = CopyOutW(&strwOut, pszCombined, pcchCombined);
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
|
||
exitpoint:
|
||
return hr;
|
||
}
|
||
|
||
#endif //PROOFREAD_PARSES
|
||
|
||
LWSTDAPI
|
||
UrlCombineW(LPCWSTR pszBase,
|
||
LPCWSTR pszRelative,
|
||
LPWSTR pszCombined,
|
||
LPDWORD pcchCombined,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr = E_INVALIDARG;
|
||
|
||
if (pszBase && pszRelative && pcchCombined)
|
||
{
|
||
RIP(IS_VALID_STRING_PTRW(pszBase, INTERNET_MAX_PATH_LENGTH));
|
||
RIP(IS_VALID_STRING_PTRW(pszRelative, INTERNET_MAX_PATH_LENGTH));
|
||
RIP(IS_VALID_WRITE_PTR(pcchCombined, DWORD));
|
||
RIP((!pszCombined || IS_VALID_WRITE_BUFFER(pszCombined, WCHAR, *pcchCombined)));
|
||
|
||
#ifdef PROOFREAD_PARSES
|
||
DWORD dwSize = *pcchCombined;
|
||
#endif
|
||
|
||
hr = FormUrlCombineResultW(pszBase, pszRelative, pszCombined, pcchCombined, dwFlags);
|
||
|
||
#ifdef PROOFREAD_PARSES
|
||
hr = ProofreadParses(hr, pszBase, pszRelative, pszCombined, pcchCombined, dwFlags, dwSize);
|
||
#endif
|
||
}
|
||
return hr;
|
||
}
|
||
|
||
LWSTDAPI
|
||
UrlCombineA(LPCSTR pszBase,
|
||
LPCSTR pszRelative,
|
||
LPSTR pszOut,
|
||
LPDWORD pcchOut,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
|
||
if (!pszBase
|
||
|| !pszRelative
|
||
|| !pcchOut)
|
||
{
|
||
hr = E_INVALIDARG;
|
||
}
|
||
else
|
||
{
|
||
RIP(IS_VALID_STRING_PTRA(pszBase, INTERNET_MAX_PATH_LENGTH));
|
||
RIP(IS_VALID_STRING_PTRA(pszRelative, INTERNET_MAX_PATH_LENGTH));
|
||
RIP(IS_VALID_WRITE_PTR(pcchOut, DWORD));
|
||
RIP((!pszOut || IS_VALID_WRITE_BUFFER(pszOut, CHAR, *pcchOut)));
|
||
|
||
hr = FormUrlCombineResultA(pszBase, pszRelative, pszOut, pcchOut, dwFlags);
|
||
}
|
||
|
||
return hr;
|
||
}
|
||
|
||
#else // end USE_FAST_PARSER
|
||
|
||
LWSTDAPI
|
||
UrlCombineW(LPCWSTR pszBase,
|
||
LPCWSTR pszRelative,
|
||
LPWSTR pszCombined,
|
||
LPDWORD pcchCombined,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr = E_INVALIDARG;
|
||
|
||
RIPMSG(pszBase && IS_VALID_STRING_PTRW(pszBase, -1), "UrlCombineW: Caller passed invalid pszBase");
|
||
RIPMSG(pszRelative && IS_VALID_STRING_PTRW(pszRelative, -1), "UrlCombineW: Caller passed invalid pszRelative");
|
||
RIPMSG(NULL!=pcchOut, "UrlCombineW: Caller passed invalid pcchOut");
|
||
RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCombineW: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut == pszBase || pszOut == pszRelative)
|
||
DEBUGWhackPathStringW(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferW(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (pszBase && pszRelative && pcchCombined)
|
||
{
|
||
SHSTRW strwOut;
|
||
hr = SHUrlParse(pszBase, pszRelative, &strwOut, dwFlags);
|
||
if(SUCCEEDED(hr))
|
||
{
|
||
hr = CopyOutW(&strwOut, pszCombined, pcchCombined);
|
||
}
|
||
}
|
||
return hr;
|
||
}
|
||
|
||
LWSTDAPI
|
||
UrlCombineA(LPCSTR pszBase,
|
||
LPCSTR pszRelative,
|
||
LPSTR pszOut,
|
||
LPDWORD pcchOut,
|
||
DWORD dwFlags)
|
||
{
|
||
HRESULT hr;
|
||
SHSTRA straOut;
|
||
|
||
RIPMSG(pszBase && IS_VALID_STRING_PTRA(pszBase, -1), "UrlCombineA: Caller passed invalid pszBase");
|
||
RIPMSG(pszRelative && IS_VALID_STRING_PTRA(pszRelative, -1), "UrlCombineA: Caller passed invalid pszRelative");
|
||
RIPMSG(NULL!=pcchOut, "UrlCombineA: Caller passed invalid pcchOut");
|
||
RIPMSG(NULL==pcchOut || (pszOut && IS_VALID_WRITE_BUFFER(pszOut, char, *pcchOut)), "UrlCombineA: Caller passed invalid pszOut");
|
||
#ifdef DEBUG
|
||
if (pcchOut)
|
||
{
|
||
if (pszOut == pszBase || pszOut == pszRelative)
|
||
DEBUGWhackPathStringA(pszOut, *pcchOut);
|
||
else
|
||
DEBUGWhackPathBufferA(pszOut, *pcchOut);
|
||
}
|
||
#endif
|
||
|
||
if (!pszBase || !pszRelative || !pcchOut)
|
||
hr = E_INVALIDARG;
|
||
else
|
||
{
|
||
SHSTRW strwOut;
|
||
SHSTRW strwBase;
|
||
SHSTRW strwRelative;
|
||
|
||
if(SUCCEEDED(strwBase.SetStr(pszBase)) && SUCCEEDED(strwRelative.SetStr(pszRelative)))
|
||
hr = SHUrlParse((LPWSTR) strwBase, (LPWSTR)strwRelative, &strwOut, dwFlags);
|
||
else
|
||
hr = E_OUTOFMEMORY;
|
||
|
||
if(SUCCEEDED(hr))
|
||
hr = straOut.SetStr(strwOut);
|
||
}
|
||
|
||
if(SUCCEEDED(hr) )
|
||
hr = CopyOutA(&straOut, pszOut, pcchOut);
|
||
|
||
return hr;
|
||
}
|
||
|
||
#endif // !USE_FAST_PARSER
|
||
|
||
|
||
//
|
||
// Combines the desired scheme with the string after the scheme with a : in between. For
|
||
// some protocols, a // is placed after the colon.
|
||
//
|
||
|
||
PRIVATE HRESULT ColonSlashSlashW
|
||
(
|
||
LPCWSTR pszScheme, // url protocol (lower-case)
|
||
LPCWSTR pszAfterScheme, // string to append after the protocol
|
||
LPWSTR pszTranslatedUrl, // output buffer
|
||
int cchMax // size of output buffer
|
||
)
|
||
{
|
||
StrCpyNW(pszTranslatedUrl, pszScheme, cchMax);
|
||
|
||
// Append : after scheme and possibly a // as well.
|
||
int cchScheme = lstrlenW(pszScheme);
|
||
if (cchMax > cchScheme + 3)
|
||
{
|
||
pszTranslatedUrl[cchScheme] = L':';
|
||
|
||
// Number of characters to skip over in the buffer (how many non alphanums originally
|
||
// followed the protocol)
|
||
int cchSkip = 0;
|
||
|
||
// Number of characters past the protocol: to skip over in the URL (Do we insert slashes?)
|
||
int cchSlashes = 0;
|
||
|
||
|
||
// Modify this conditional to include any other protocols to always follow with ://
|
||
// Right now, http, https and ftp are automatic
|
||
if (!StrCmpW(pszScheme, L"http") || !StrCmpW(pszScheme, L"ftp") || !StrCmpW(pszScheme, L"https") )
|
||
{
|
||
//
|
||
// When preparing to copy the contents of pszAfterScheme into pszUrl, we can
|
||
// skip over as many as 3 non alpha numeric characters, since we are adding ://
|
||
// to the protocol directly
|
||
//
|
||
while ((cchSkip < 3) && pszAfterScheme[cchSkip] && !IsCharAlphaNumericW(pszAfterScheme[cchSkip]))
|
||
{
|
||
cchSkip++;
|
||
}
|
||
pszTranslatedUrl[cchScheme+1] = L'/';
|
||
pszTranslatedUrl[cchScheme+2] = L'/';
|
||
pszTranslatedUrl[cchScheme+3] = L'\0';
|
||
cchSlashes = 2;
|
||
}
|
||
else
|
||
// some other protocol
|
||
{
|
||
// just skip over colon
|
||
cchSkip = 1;
|
||
pszTranslatedUrl[cchScheme+1] = L'\0';
|
||
|
||
}
|
||
|
||
// Copy the rest of the Url from the UrlBuffer into the Url
|
||
StrCatBuffW(pszTranslatedUrl, pszAfterScheme + cchSkip, cchMax);
|
||
}
|
||
|
||
return S_OK;
|
||
}
|
||
|
||
//
|
||
// Scans the url for a scheme and if it does not match the known schemes, then
|
||
// a closest match is found.
|
||
//
|
||
LWSTDAPI
|
||
UrlFixupW
|
||
(
|
||
LPCWSTR pcszUrl, // URL to correct
|
||
LPWSTR pszTranslatedUrl, // buffer for corrected url (can be same as pcszUrl)
|
||
DWORD cchMax // size of pszTranslatedUrl
|
||
)
|
||
{
|
||
HRESULT hr = S_OK;
|
||
|
||
//
|
||
// Find the scheme
|
||
//
|
||
WCHAR szScheme[INTERNET_MAX_SCHEME_LENGTH];
|
||
ULONG cchScheme = 0;
|
||
LPCWSTR pszScheme = FindSchemeW(pcszUrl, &cchScheme, TRUE);
|
||
if (NULL == pszScheme || cchScheme > (ARRAYSIZE(szScheme)-1))
|
||
{
|
||
// No scheme found
|
||
return S_FALSE;
|
||
}
|
||
|
||
for (ULONG cch=0; cch < cchScheme; ++cch, ++pszScheme)
|
||
{
|
||
szScheme[cch] = Ascii_ToLowerW(*pszScheme);
|
||
}
|
||
szScheme[cch] = L'\0';
|
||
LPCWSTR pszAfterScheme = pszScheme;
|
||
|
||
//
|
||
// If input and output buffers are the same, copy the stuff after the scheme
|
||
// to another buffer so it doesn't get clobbered when we recombine.
|
||
//
|
||
WCHAR szBuf[INTERNET_MAX_PATH_LENGTH];
|
||
if (pcszUrl == pszTranslatedUrl)
|
||
{
|
||
StrCpyNW(szBuf, pszAfterScheme, ARRAYSIZE(szBuf));
|
||
pszAfterScheme = szBuf;
|
||
}
|
||
|
||
//
|
||
// See if it matches any of our known schemes
|
||
//
|
||
BOOL fKnownScheme = FALSE;
|
||
for (ULONG i = 0; i < ARRAYSIZE(g_mpUrlSchemeTypes); ++i)
|
||
{
|
||
if (StrCmpW(szScheme, g_mpUrlSchemeTypes[i].pszScheme) == 0)
|
||
{
|
||
fKnownScheme = TRUE;
|
||
break;
|
||
}
|
||
}
|
||
|
||
//
|
||
// If it matches a known scheme, then just fix :// if it's ftp or http
|
||
//
|
||
if (fKnownScheme ||
|
||
|
||
// Check for pluggable protocols too
|
||
NO_ERROR == SHGetValueW(HKEY_CLASSES_ROOT, szScheme, L"URL Protocol",
|
||
NULL, NULL, NULL))
|
||
{
|
||
ColonSlashSlashW(szScheme, pszAfterScheme, pszTranslatedUrl, cchMax);
|
||
return S_OK;
|
||
}
|
||
|
||
//
|
||
// Try to find a good match for the mispelled scheme
|
||
//
|
||
|
||
// These are weights used in the heuristic for the protocol matching
|
||
// iFloor is roughly the minimum percentage of characters that must match in
|
||
// order to make a change
|
||
const int cFloor = 60;
|
||
|
||
// A match in the first character has the greatest weight
|
||
const int cCorrectFirstChar = 150;
|
||
|
||
// Any other matched character
|
||
const int cCorrectChar = 100;
|
||
|
||
// The weight given to a character that only matches the preceding
|
||
// or subsequent character in the protocol
|
||
const int cOffByOneChar = 80;
|
||
|
||
// We penalize characters that are off by one, but if we have already
|
||
// observed the offset and subsequent characters continue the offset, we add this
|
||
const int cOffsetBonus = 20;
|
||
|
||
// The value of the best "match" found so far. Higher is a better match.
|
||
int iBestEval = 0;
|
||
|
||
// The protocol that's the best fit for the misspelled one
|
||
LPCWSTR pszBestMatch = NULL;
|
||
|
||
ULONG cchProt;
|
||
for (ULONG j = 0; j < ARRAYSIZE(g_mpUrlSchemeTypes); ++j)
|
||
{
|
||
// Is this one we don't correct to?
|
||
//
|
||
// Note: https is removed from this list. The potential for an intended "http" to
|
||
// be corrected to "https" is too high, and "http" is far more common. All this
|
||
// means is that if someone wants to get to an https site, they have to have it right.
|
||
//
|
||
if (IsFlagSet(g_mpUrlSchemeTypes[j].dwFlags, UPF_SCHEME_DONTCORRECT))
|
||
continue;
|
||
|
||
LPCWSTR pszProtocol = g_mpUrlSchemeTypes[j].pszScheme;
|
||
cchProt = g_mpUrlSchemeTypes[j].cchScheme;
|
||
|
||
// Evaluation of the fit of the currently tested protocol
|
||
int iEval = 0;
|
||
|
||
//
|
||
// Keep track of the positive or negative offset in the protocol
|
||
// such as "qhttp" instead of "http" or "elnet" instead of "telnet'
|
||
//
|
||
int iPosOffset = 0;
|
||
int iNegOffset = 0;
|
||
|
||
//
|
||
// The first character has the most weight. "htp" corrects
|
||
// to "http" and not "ftp" "ftt" corrects to "ftp"
|
||
//
|
||
if (*szScheme == *pszProtocol)
|
||
{
|
||
iEval += cCorrectFirstChar;
|
||
}
|
||
|
||
// Check for a negative offset
|
||
else if(*szScheme == pszProtocol[1])
|
||
{
|
||
iEval += cOffByOneChar;
|
||
iNegOffset = 1;
|
||
}
|
||
|
||
//
|
||
// We go through the characters in the protocol, even to the
|
||
// terminating null if iPosOffset == 1 (it is never more than 1)
|
||
// This is so the final "p" in "qhttp" gets a chance to be compared
|
||
//
|
||
for (i=1; i < cchProt + iPosOffset; i++)
|
||
{
|
||
// No points for null terminations matching
|
||
if (szScheme[i] == L'\0')
|
||
break;
|
||
|
||
//
|
||
// Check for adjacent character match
|
||
//
|
||
if (szScheme[i] == pszProtocol[i])
|
||
{
|
||
iEval += cCorrectChar;
|
||
}
|
||
else
|
||
{
|
||
if (szScheme[i] == pszProtocol[i - 1])
|
||
{
|
||
iEval += cOffByOneChar;
|
||
if (iPosOffset)
|
||
iEval += cOffsetBonus;
|
||
else
|
||
iPosOffset = 1;
|
||
}
|
||
else
|
||
{
|
||
if(szScheme[i] == pszProtocol[i + 1])
|
||
{
|
||
iEval += cOffByOneChar;
|
||
if (iNegOffset)
|
||
iEval += cOffsetBonus;
|
||
else
|
||
iNegOffset = 1;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Divide the Evaluated value by the MAX(cchScheme, cchProt)
|
||
iEval = iEval / (cchScheme > cchProt ? cchScheme : cchProt);
|
||
|
||
// A new best match?
|
||
if (iEval > iBestEval)
|
||
{
|
||
iBestEval = iEval;
|
||
pszBestMatch = pszProtocol;
|
||
|
||
//
|
||
// If we found an unquestionably good match (only 1 non-firstchar typo),
|
||
// break out of the loop
|
||
//
|
||
if (iEval >= 100)
|
||
break;
|
||
}
|
||
}
|
||
|
||
// If a good enough match was found, then correct url
|
||
if (iBestEval >= cFloor)
|
||
{
|
||
ColonSlashSlashW(pszBestMatch, pszAfterScheme, pszTranslatedUrl,cchMax);
|
||
}
|
||
else
|
||
{
|
||
hr = S_FALSE;
|
||
}
|
||
return hr;
|
||
}
|
||
|
||
|
||
|
||
// This is a port of InternetCrackUrl from wininet.
|
||
// NTRAID:108139 akabir We REALLY NEED TO CLEAN THIS CODE UP.
|
||
// RAID 109209
|
||
|
||
// A lot of the stuff is redundant with the other code available, but we
|
||
// need to be careful not to cause any regressions. Thus, I'm leaving it in for now.
|
||
|
||
//
|
||
// UrlSchemeList - the list of schemes that we support
|
||
//
|
||
|
||
typedef struct {
|
||
LPWSTR SchemeName;
|
||
DWORD SchemeLength;
|
||
SHINTERNET_SCHEME SchemeType;
|
||
BOOL NeedSlashes;
|
||
} URL_SCHEME_INFO;
|
||
|
||
#define UrlUnescapeInPlaceW(pszUrl, dwFlags) UrlUnescapeW(pszUrl, NULL, NULL, dwFlags | URL_UNESCAPE_INPLACE)
|
||
|
||
|
||
// NOTE MEGA REDUNDANCY. We could use the similar table above and check for opaque. However
|
||
// we'd have to modify that table
|
||
PRIVATE
|
||
URL_SCHEME_INFO
|
||
UrlSchemeList[] = {
|
||
NULL, 0, SHINTERNET_SCHEME_DEFAULT, FALSE,
|
||
L"ftp", 3, SHINTERNET_SCHEME_FTP, TRUE,
|
||
L"gopher", 6, SHINTERNET_SCHEME_GOPHER, TRUE,
|
||
L"http", 4, SHINTERNET_SCHEME_HTTP, TRUE,
|
||
L"https", 5, SHINTERNET_SCHEME_HTTPS, TRUE,
|
||
L"file", 4, SHINTERNET_SCHEME_FILE, TRUE,
|
||
L"news", 4, SHINTERNET_SCHEME_NEWS, FALSE,
|
||
L"mailto", 6, SHINTERNET_SCHEME_MAILTO, FALSE,
|
||
L"socks", 5, SHINTERNET_SCHEME_SOCKS, FALSE,
|
||
L"javascript", 10, SHINTERNET_SCHEME_JAVASCRIPT, FALSE,
|
||
L"vbscript", 8, SHINTERNET_SCHEME_VBSCRIPT, FALSE,
|
||
L"res", 3, SHINTERNET_SCHEME_RES, TRUE
|
||
};
|
||
|
||
#define NUMBER_OF_URL_SCHEMES ARRAYSIZE(UrlSchemeList)
|
||
|
||
// swiped from wininet\macros.h
|
||
#define IsDigit(c) (((c) >= L'0') && ((c) <= L'9'))
|
||
#define ARGUMENT_PRESENT(ArgumentPointer) (\
|
||
(CHAR *)(ArgumentPointer) != (CHAR *)(NULL) )
|
||
|
||
BOOL ScanSchemes(LPWSTR pszToCheck, DWORD ccStr, PDWORD pwResult)
|
||
{
|
||
for (DWORD i=0; i<NUMBER_OF_URL_SCHEMES; i++)
|
||
{
|
||
if ((UrlSchemeList[i].SchemeLength == ccStr)
|
||
&& (StrCmpNIW(UrlSchemeList[i].SchemeName, pszToCheck, ccStr)==0))
|
||
{
|
||
*pwResult = i;
|
||
return TRUE;
|
||
}
|
||
}
|
||
return FALSE;
|
||
}
|
||
|
||
#define ProbeWriteStringBufferW(a, b) ProbeWriteBuffer((LPVOID)a, (b*sizeof(WCHAR)));
|
||
#define PAGE_SIZE 4096
|
||
|
||
|
||
DWORD
|
||
ProbeWriteBuffer(
|
||
IN LPVOID lpBuffer,
|
||
IN DWORD dwBufferLength
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Probes a buffer for writeability. Used as part of API parameter validation,
|
||
this function tests the first and last locations in a buffer. This is not
|
||
as strict as the IsBadXPtr() Windows APIs, but it means we don't have to
|
||
test every location in the buffer
|
||
|
||
Arguments:
|
||
|
||
lpBuffer - pointer to buffer to test
|
||
|
||
dwBufferLength - length of buffer
|
||
|
||
Return Value:
|
||
|
||
DWORD
|
||
Success - ERROR_SUCCESS
|
||
|
||
Failure - ERROR_INVALID_PARAMETER
|
||
|
||
--*/
|
||
|
||
{
|
||
DWORD error;
|
||
|
||
//
|
||
// the buffer can be NULL if the probe length is 0. Otherwise, its an error
|
||
//
|
||
|
||
if (lpBuffer == NULL) {
|
||
error = (dwBufferLength == 0) ? ERROR_SUCCESS : ERROR_INVALID_PARAMETER;
|
||
} else if (dwBufferLength != 0) {
|
||
__try {
|
||
|
||
LPBYTE p;
|
||
LPBYTE end;
|
||
volatile BYTE b;
|
||
|
||
p = (LPBYTE)lpBuffer;
|
||
end = p + dwBufferLength - 1;
|
||
b = *end;
|
||
*end = b;
|
||
|
||
//
|
||
// visit every page in the buffer - it doesn't matter that we may
|
||
// test a character in the middle of a page
|
||
//
|
||
|
||
for (; p < end; p += PAGE_SIZE) {
|
||
b = *p;
|
||
*p = b;
|
||
}
|
||
error = ERROR_SUCCESS;
|
||
} __except(EXCEPTION_EXECUTE_HANDLER) {
|
||
error = ERROR_INVALID_PARAMETER;
|
||
}
|
||
ENDEXCEPT
|
||
} else {
|
||
//
|
||
// zero-length buffer
|
||
//
|
||
|
||
error = ERROR_SUCCESS;
|
||
}
|
||
|
||
return error;
|
||
}
|
||
|
||
DWORD
|
||
ProbeStringW(
|
||
IN LPWSTR lpString,
|
||
OUT LPDWORD lpdwStringLength
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Probes a wide string buffer for readability, and returns the length of the string
|
||
|
||
Arguments:
|
||
|
||
lpString - pointer to string to check
|
||
|
||
lpdwStringLength - returned length of string
|
||
|
||
Return Value:
|
||
|
||
DWORD
|
||
Success - ERROR_SUCCESS
|
||
|
||
Failure - ERROR_INVALID_PARAMETER
|
||
|
||
--*/
|
||
|
||
{
|
||
DWORD error;
|
||
DWORD length;
|
||
|
||
//
|
||
// initialize string length and return code
|
||
//
|
||
|
||
length = 0;
|
||
error = ERROR_SUCCESS;
|
||
|
||
//
|
||
// the buffer can be NULL
|
||
//
|
||
|
||
if (lpString != NULL) {
|
||
__try {
|
||
|
||
//
|
||
// unfortunately, for a string, we have to visit every location in
|
||
// the buffer to find the terminator
|
||
//
|
||
|
||
while (*lpString != '\0') {
|
||
++length;
|
||
++lpString;
|
||
}
|
||
} __except(EXCEPTION_EXECUTE_HANDLER) {
|
||
error = ERROR_INVALID_PARAMETER;
|
||
}
|
||
ENDEXCEPT
|
||
}
|
||
|
||
*lpdwStringLength = length;
|
||
|
||
return error;
|
||
}
|
||
|
||
|
||
DWORD
|
||
DecodeUrl(
|
||
IN LPWSTR Url,
|
||
IN DWORD UrlLength,
|
||
OUT LPWSTR DecodedString,
|
||
IN OUT LPDWORD DecodedLength
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Converts an URL string with embedded escape sequences (%xx) to a counted
|
||
string
|
||
|
||
It is safe to pass the same pointer for the string to convert, and the
|
||
buffer for the converted results: if the current character is not escaped,
|
||
it just gets overwritten, else the input pointer is moved ahead 2 characters
|
||
further than the output pointer, which is benign
|
||
|
||
Arguments:
|
||
|
||
Url - pointer to URL string to convert
|
||
|
||
UrlLength - number of characters in UrlString
|
||
|
||
DecodedString - pointer to buffer that receives converted string
|
||
|
||
DecodedLength - IN: number of characters in buffer
|
||
OUT: number of characters converted
|
||
|
||
Return Value:
|
||
|
||
DWORD
|
||
Success - ERROR_SUCCESS
|
||
|
||
Failure - ERROR_INTERNET_INVALID_URL
|
||
UrlString couldn't be converted
|
||
|
||
ERROR_INSUFFICIENT_BUFFER
|
||
ConvertedString isn't large enough to hold all the converted
|
||
UrlString
|
||
|
||
--*/
|
||
|
||
{
|
||
// NOTE We can replace this function with UrlUnescapeInPlace
|
||
|
||
DWORD bufferRemaining;
|
||
|
||
bufferRemaining = *DecodedLength;
|
||
while (UrlLength && bufferRemaining) {
|
||
WCHAR ch;
|
||
|
||
if (*Url == L'%') {
|
||
|
||
//
|
||
// REVIEW - would %00 ever appear in an URL?
|
||
//
|
||
if (IsHex(*(Url+1)) && IsHex(*(Url+2)))
|
||
{
|
||
ch = TranslateEscapedOctetW(Url);
|
||
Url += 3;
|
||
} else {
|
||
return ERROR_INTERNET_INVALID_URL;
|
||
}
|
||
UrlLength -= 3;
|
||
} else {
|
||
ch = *Url++;
|
||
--UrlLength;
|
||
}
|
||
*DecodedString++ = ch;
|
||
--bufferRemaining;
|
||
}
|
||
if (UrlLength == 0) {
|
||
*DecodedLength -= bufferRemaining;
|
||
return ERROR_SUCCESS;
|
||
} else {
|
||
return ERROR_INSUFFICIENT_BUFFER;
|
||
}
|
||
}
|
||
|
||
|
||
|
||
|
||
DWORD
|
||
DecodeUrlInSitu(
|
||
IN LPWSTR BufferAddress,
|
||
IN OUT LPDWORD BufferLength
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Decodes an URL string, if it contains escape sequences. The conversion is
|
||
done in place, since we know that a string containing escapes is longer than
|
||
the string with escape sequences (3 bytes) converted to characters (1 byte)
|
||
|
||
Arguments:
|
||
|
||
BufferAddress - pointer to the string to convert
|
||
|
||
BufferLength - IN: number of characters to convert
|
||
OUT: length of converted string
|
||
|
||
Return Value:
|
||
|
||
DWORD
|
||
Success - ERROR_SUCCESS
|
||
|
||
Failure - ERROR_INTERNET_INVALID_URL
|
||
ERROR_INSUFFICIENT_BUFFER
|
||
|
||
--*/
|
||
|
||
{
|
||
// NOTE We can replace this function with UrlUnescapeInPlace
|
||
DWORD stringLength = *BufferLength;
|
||
return DecodeUrl(BufferAddress,
|
||
stringLength,
|
||
BufferAddress,
|
||
BufferLength);
|
||
}
|
||
|
||
|
||
DWORD
|
||
GetUrlAddressInfo(
|
||
IN OUT LPWSTR* Url,
|
||
IN OUT LPDWORD UrlLength,
|
||
OUT LPWSTR* PartOne,
|
||
OUT LPDWORD PartOneLength,
|
||
OUT LPBOOL PartOneEscape,
|
||
OUT LPWSTR* PartTwo,
|
||
OUT LPDWORD PartTwoLength,
|
||
OUT LPBOOL PartTwoEscape
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Given a string of the form foo:bar, splits them into 2 counted strings about
|
||
the ':' character. The address string may or may not contain a ':'.
|
||
|
||
This function is intended to split into substrings the host:port and
|
||
username:password strings commonly used in Internet address specifications
|
||
and by association, in URLs
|
||
|
||
Arguments:
|
||
|
||
Url - pointer to pointer to string containing URL. On output
|
||
this is advanced past the address parts
|
||
|
||
UrlLength - pointer to length of URL in UrlString. On output this is
|
||
reduced by the number of characters parsed
|
||
|
||
PartOne - pointer which will receive first part of address string
|
||
|
||
PartOneLength - pointer which will receive length of first part of address
|
||
string
|
||
|
||
PartOneEscape - TRUE on output if PartOne contains escape sequences
|
||
|
||
PartTwo - pointer which will receive second part of address string
|
||
|
||
PartTwoLength - pointer which will receive length of second part of address
|
||
string
|
||
|
||
PartOneEscape - TRUE on output if PartTwo contains escape sequences
|
||
|
||
Return Value:
|
||
|
||
DWORD
|
||
Success - ERROR_SUCCESS
|
||
|
||
Failure - ERROR_INTERNET_INVALID_URL
|
||
|
||
--*/
|
||
|
||
{
|
||
LPWSTR pString;
|
||
LPWSTR pColon;
|
||
DWORD partLength;
|
||
LPBOOL partEscape;
|
||
DWORD length;
|
||
|
||
//
|
||
// parse out <host>[:<port>] or <name>[:<password>] (i.e. <part1>[:<part2>]
|
||
//
|
||
|
||
pString = *Url;
|
||
pColon = NULL;
|
||
partLength = 0;
|
||
*PartOne = pString;
|
||
*PartOneLength = 0;
|
||
*PartOneEscape = FALSE;
|
||
*PartTwoEscape = FALSE;
|
||
partEscape = PartOneEscape;
|
||
length = *UrlLength;
|
||
while ((*pString!=SLASH) && (*pString != L'\0') && (length != 0)) {
|
||
if (*pString==HEX_ESCAPE) {
|
||
// if there is a % in the string then it *must* (RFC 1738) be the
|
||
// start of an escape sequence. This function just reports the
|
||
// address of the substrings and their lengths; calling functions
|
||
// must handle the escape sequences (i.e. it is their responsibility
|
||
// to decide where to put the results)
|
||
//
|
||
*partEscape = TRUE;
|
||
}
|
||
if (*pString==COLON) {
|
||
if (pColon != NULL) {
|
||
|
||
//
|
||
// we don't expect more than 1 ':'
|
||
//
|
||
|
||
// ISSUE Note that passwords might contain colons, and thus not work in this
|
||
// case
|
||
return ERROR_INTERNET_INVALID_URL;
|
||
}
|
||
pColon = pString;
|
||
*PartOneLength = partLength;
|
||
if (partLength == 0) {
|
||
*PartOne = NULL;
|
||
}
|
||
partLength = 0;
|
||
partEscape = PartTwoEscape;
|
||
} else {
|
||
++partLength;
|
||
}
|
||
++pString;
|
||
--length;
|
||
}
|
||
|
||
//
|
||
// we either ended on the host (or user) name or the port number (or
|
||
// password), one of which we don't know the length of
|
||
//
|
||
|
||
if (pColon == NULL) {
|
||
*PartOneLength = partLength;
|
||
*PartTwo = NULL;
|
||
*PartTwoLength = 0;
|
||
*PartTwoEscape = FALSE;
|
||
} else {
|
||
*PartTwoLength = partLength;
|
||
*PartTwo = pColon + 1;
|
||
|
||
//
|
||
// in both the <user>:<password> and <host>:<port> cases, we cannot have
|
||
// the second part without the first, although both parts being zero
|
||
// length is OK (host name will be sorted out elsewhere, but (for now,
|
||
// at least) I am allowing <>:<> for username:password, since I don't
|
||
// see it expressly disallowed in the RFC. I may be revisiting this code
|
||
// later...)
|
||
//
|
||
// N.B.: ftp://ftp.microsoft.com uses http://:0/-http-gw-internal-/menu.gif
|
||
|
||
// if ((*PartOneLength == 0) && (partLength != 0)) {
|
||
// return ERROR_INTERNET_INVALID_URL;
|
||
// }
|
||
}
|
||
|
||
//
|
||
// update the URL pointer and length remaining
|
||
//
|
||
|
||
*Url = pString;
|
||
*UrlLength = length;
|
||
|
||
return ERROR_SUCCESS;
|
||
}
|
||
|
||
|
||
DWORD
|
||
GetUrlAddress(
|
||
IN OUT LPWSTR* lpszUrl,
|
||
OUT LPDWORD lpdwUrlLength,
|
||
OUT LPWSTR* lpszUserName OPTIONAL,
|
||
OUT LPDWORD lpdwUserNameLength OPTIONAL,
|
||
OUT LPWSTR* lpszPassword OPTIONAL,
|
||
OUT LPDWORD lpdwPasswordLength OPTIONAL,
|
||
OUT LPWSTR* lpszHostName OPTIONAL,
|
||
OUT LPDWORD lpdwHostNameLength OPTIONAL,
|
||
OUT LPSHINTERNET_PORT lpPort OPTIONAL,
|
||
OUT LPBOOL pHavePort
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
This function extracts any and all parts of the address information for a
|
||
generic URL. If any of the address parts contain escaped characters (%nn)
|
||
then they are converted in situ
|
||
|
||
The generic addressing format (RFC 1738) is:
|
||
|
||
<user>:<password>@<host>:<port>
|
||
|
||
The addressing information cannot contain a password without a user name,
|
||
or a port without a host name
|
||
NB: ftp://ftp.microsoft.com uses URL's that have a port without a host name!
|
||
(e.g. http://:0/-http-gw-internal-/menu.gif)
|
||
|
||
Although only the lpszUrl and lpdwUrlLength fields are required, the address
|
||
parts will be checked for presence and completeness
|
||
|
||
Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
|
||
then the accompanying lpdw field must also be supplied
|
||
|
||
Arguments:
|
||
|
||
lpszUrl - IN: pointer to the URL to parse
|
||
OUT: URL remaining after address information
|
||
|
||
N.B. The url-path is NOT canonicalized (unescaped)
|
||
because it may contain protocol-specific information
|
||
which must be parsed out by the protocol-specific
|
||
parser
|
||
|
||
lpdwUrlLength - returned length of the remainder of the URL after the
|
||
address information
|
||
|
||
lpszUserName - returned pointer to the user name
|
||
This parameter can be omitted by those protocol parsers
|
||
that do not require or expect user names in the URL
|
||
|
||
lpdwUserNameLength - returned length of the user name part
|
||
This parameter can be omitted by those protocol parsers
|
||
that do not require or expect user names in the URL
|
||
|
||
lpszPassword - returned pointer to the password
|
||
This parameter can be omitted by those protocol parsers
|
||
that do not require or expect user passwords in the URL
|
||
|
||
lpdwPasswordLength - returned length of the password
|
||
This parameter can be omitted by those protocol parsers
|
||
that do not require or expect user passwords in the URL
|
||
|
||
lpszHostName - returned pointer to the host name
|
||
This parameter can be omitted by those protocol parsers
|
||
that do not require the host name info
|
||
|
||
lpdwHostNameLength - returned length of the host name
|
||
This parameter can be omitted by those protocol parsers
|
||
that do not require the host name info
|
||
|
||
lpPort - returned value of the port field
|
||
This parameter can be omitted by those protocol parsers
|
||
that do not require or expect user port number
|
||
|
||
pHavePort - returned boolean indicating whether a port was specified
|
||
in the URL or not. This value is not returned if the
|
||
lpPort parameter is omitted.
|
||
|
||
Return Value:
|
||
|
||
DWORD
|
||
Success - ERROR_SUCCESS
|
||
|
||
Failure - ERROR_INTERNET_INVALID_URL
|
||
We could not parse some part of the address info, or we
|
||
found address info where the protocol parser didn't expect
|
||
any
|
||
|
||
ERROR_INSUFFICIENT_BUFFER
|
||
We could not convert an escaped string
|
||
|
||
--*/
|
||
|
||
{
|
||
LPWSTR pAt;
|
||
DWORD urlLength;
|
||
LPWSTR pUrl;
|
||
BOOL part1Escape;
|
||
BOOL part2Escape;
|
||
WCHAR portNumber[INTERNET_MAX_PORT_NUMBER_LENGTH + 1];
|
||
DWORD portNumberLength;
|
||
LPWSTR pPortNumber;
|
||
DWORD error;
|
||
LPWSTR hostName;
|
||
DWORD hostNameLength;
|
||
|
||
pUrl = *lpszUrl;
|
||
urlLength = lstrlenW(pUrl);
|
||
|
||
//
|
||
// check to see if there is an '@' separating user name & password. If we
|
||
// see a '/' or get to the end of the string before we see the '@' then
|
||
// there is no username:password part
|
||
//
|
||
|
||
pAt = NULL;
|
||
for (DWORD i = 0; i < urlLength; ++i) {
|
||
if (pUrl[i]==SLASH) {
|
||
break;
|
||
} else if (pUrl[i]==AT) {
|
||
pAt = &pUrl[i];
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (pAt != NULL) {
|
||
DWORD addressPartLength;
|
||
LPWSTR userName;
|
||
DWORD userNameLength;
|
||
LPWSTR password;
|
||
DWORD passwordLength;
|
||
|
||
addressPartLength = (DWORD) (pAt - pUrl);
|
||
urlLength -= addressPartLength;
|
||
error = GetUrlAddressInfo(&pUrl,
|
||
&addressPartLength,
|
||
&userName,
|
||
&userNameLength,
|
||
&part1Escape,
|
||
&password,
|
||
&passwordLength,
|
||
&part2Escape
|
||
);
|
||
if (error != ERROR_SUCCESS) {
|
||
return error;
|
||
}
|
||
|
||
//
|
||
// ensure there is no address information unparsed before the '@'
|
||
//
|
||
|
||
ASSERT(addressPartLength == 0);
|
||
ASSERT(pUrl == pAt);
|
||
|
||
if (ARGUMENT_PRESENT(lpszUserName)) {
|
||
|
||
ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength));
|
||
|
||
//
|
||
// convert the user name in situ
|
||
//
|
||
|
||
if (part1Escape) {
|
||
ASSERT(userName != NULL);
|
||
ASSERT(userNameLength != 0);
|
||
|
||
error = DecodeUrlInSitu(userName, &userNameLength);
|
||
if (error != ERROR_SUCCESS) {
|
||
return error;
|
||
}
|
||
}
|
||
*lpszUserName = userName;
|
||
*lpdwUserNameLength = userNameLength;
|
||
}
|
||
|
||
if (ARGUMENT_PRESENT(lpszPassword)) {
|
||
// convert the password in situ
|
||
if (part2Escape) {
|
||
ASSERT(userName != NULL);
|
||
ASSERT(userNameLength != 0);
|
||
ASSERT(password != NULL);
|
||
ASSERT(passwordLength != 0);
|
||
|
||
error = DecodeUrlInSitu(password, &passwordLength);
|
||
if (error != ERROR_SUCCESS) {
|
||
return error;
|
||
}
|
||
}
|
||
*lpszPassword = password;
|
||
*lpdwPasswordLength = passwordLength;
|
||
}
|
||
|
||
//
|
||
// the URL pointer now points at the host:port fields (remember that
|
||
// ExtractAddressParts() must have bumped pUrl up to the end of the
|
||
// password field (if present) which ends at pAt)
|
||
//
|
||
|
||
++pUrl;
|
||
|
||
//
|
||
// similarly, bump urlLength to account for the '@'
|
||
//
|
||
|
||
--urlLength;
|
||
} else {
|
||
//
|
||
// no '@' therefore no username or password
|
||
//
|
||
|
||
if (ARGUMENT_PRESENT(lpszUserName)) {
|
||
ASSERT(ARGUMENT_PRESENT(lpdwUserNameLength));
|
||
|
||
*lpszUserName = NULL;
|
||
*lpdwUserNameLength = 0;
|
||
}
|
||
if (ARGUMENT_PRESENT(lpszPassword)) {
|
||
ASSERT(ARGUMENT_PRESENT(lpdwPasswordLength));
|
||
|
||
*lpszPassword = NULL;
|
||
*lpdwPasswordLength = 0;
|
||
}
|
||
}
|
||
|
||
//
|
||
// now get the host name and the optional port
|
||
//
|
||
|
||
pPortNumber = portNumber;
|
||
portNumberLength = sizeof(portNumber);
|
||
error = GetUrlAddressInfo(&pUrl,
|
||
&urlLength,
|
||
&hostName,
|
||
&hostNameLength,
|
||
&part1Escape,
|
||
&pPortNumber,
|
||
&portNumberLength,
|
||
&part2Escape
|
||
);
|
||
if (error != ERROR_SUCCESS) {
|
||
return error;
|
||
}
|
||
|
||
//
|
||
// the URL address information MUST contain the host name
|
||
//
|
||
|
||
// if ((hostName == NULL) || (hostNameLength == 0)) {
|
||
// return ERROR_INTERNET_INVALID_URL;
|
||
// }
|
||
|
||
if (ARGUMENT_PRESENT(lpszHostName)) {
|
||
ASSERT(ARGUMENT_PRESENT(lpdwHostNameLength));
|
||
|
||
//
|
||
// if the host name contains escaped characters, convert them in situ
|
||
//
|
||
|
||
if (part1Escape) {
|
||
error = DecodeUrlInSitu(hostName, &hostNameLength);
|
||
if (error != ERROR_SUCCESS) {
|
||
return error;
|
||
}
|
||
}
|
||
*lpszHostName = hostName;
|
||
*lpdwHostNameLength = hostNameLength;
|
||
}
|
||
|
||
//
|
||
// if there is a port field, convert it if there are escaped characters,
|
||
// check it for valid numeric characters, and convert it to a number
|
||
//
|
||
|
||
if (ARGUMENT_PRESENT(lpPort)) {
|
||
if (portNumberLength != 0) {
|
||
DWORD i;
|
||
DWORD port;
|
||
|
||
ASSERT(pPortNumber != NULL);
|
||
|
||
if (part2Escape) {
|
||
error = DecodeUrlInSitu(pPortNumber, &portNumberLength);
|
||
if (error != ERROR_SUCCESS) {
|
||
return error;
|
||
}
|
||
}
|
||
|
||
//
|
||
// ensure all characters in the port number buffer are numeric, and
|
||
// calculate the port number at the same time
|
||
//
|
||
|
||
for (i = 0, port = 0; i < portNumberLength; ++i) {
|
||
if (!IsDigit(*pPortNumber)) {
|
||
return ERROR_INTERNET_INVALID_URL;
|
||
}
|
||
port = port * 10 + (int)(*pPortNumber++ - L'0');
|
||
// We won't allow ports larger than 65535 ((2^16)-1)
|
||
// We have to check this every time to make sure that someone
|
||
// doesn't try to overflow a DWORD.
|
||
if (port > 65535)
|
||
{
|
||
return ERROR_INTERNET_INVALID_URL;
|
||
}
|
||
}
|
||
*lpPort = (SHINTERNET_PORT)port;
|
||
if (ARGUMENT_PRESENT(pHavePort)) {
|
||
*pHavePort = TRUE;
|
||
}
|
||
} else {
|
||
*lpPort = INTERNET_INVALID_PORT_NUMBER;
|
||
if (ARGUMENT_PRESENT(pHavePort)) {
|
||
*pHavePort = FALSE;
|
||
}
|
||
}
|
||
}
|
||
|
||
//
|
||
// update the URL pointer and the length of the url-path
|
||
//
|
||
|
||
*lpszUrl = pUrl;
|
||
*lpdwUrlLength = urlLength;
|
||
|
||
return ERROR_SUCCESS;
|
||
}
|
||
|
||
|
||
DWORD
|
||
CrackUrl(
|
||
IN OUT LPWSTR lpszUrl,
|
||
IN DWORD dwUrlLength,
|
||
IN BOOL bEscape,
|
||
OUT LPSHINTERNET_SCHEME lpSchemeType OPTIONAL,
|
||
OUT LPWSTR* lpszSchemeName OPTIONAL,
|
||
OUT LPDWORD lpdwSchemeNameLength OPTIONAL,
|
||
OUT LPWSTR* lpszHostName OPTIONAL,
|
||
OUT LPDWORD lpdwHostNameLength OPTIONAL,
|
||
OUT LPSHINTERNET_PORT lpServerPort OPTIONAL,
|
||
OUT LPWSTR* lpszUserName OPTIONAL,
|
||
OUT LPDWORD lpdwUserNameLength OPTIONAL,
|
||
OUT LPWSTR* lpszPassword OPTIONAL,
|
||
OUT LPDWORD lpdwPasswordLength OPTIONAL,
|
||
OUT LPWSTR* lpszUrlPath OPTIONAL,
|
||
OUT LPDWORD lpdwUrlPathLength OPTIONAL,
|
||
OUT LPWSTR* lpszExtraInfo OPTIONAL,
|
||
OUT LPDWORD lpdwExtraInfoLength OPTIONAL,
|
||
OUT LPBOOL pHavePort
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Cracks an URL into its constituent parts
|
||
|
||
Assumes: 1. If one of the optional lpsz fields is present (e.g. lpszUserName)
|
||
then the accompanying lpdw field must also be supplied
|
||
|
||
Arguments:
|
||
|
||
lpszUrl - pointer to URL to crack. This buffer WILL BE
|
||
OVERWRITTEN if it contains escape sequences that
|
||
we will convert back to ANSI characters
|
||
|
||
dwUrlLength - if not 0, string length of lpszUrl
|
||
|
||
bEscape - TRUE if we are to escape the url-path
|
||
|
||
lpSchemeType - returned scheme type - e.g. INTERNET_SCHEME_HTTP
|
||
|
||
lpszSchemeName - returned scheme name
|
||
|
||
lpdwSchemeNameLength - length of scheme name
|
||
|
||
lpszHostName - returned host name
|
||
|
||
lpdwHostNameLength - length of host name buffer
|
||
|
||
lpServerPort - returned server port if present in the URL, else 0
|
||
|
||
lpszUserName - returned user name if present
|
||
|
||
lpdwUserNameLength - length of user name buffer
|
||
|
||
lpszPassword - returned password if present
|
||
|
||
lpdwPasswordLength - length of password buffer
|
||
|
||
lpszUrlPath - returned, canonicalized URL path
|
||
|
||
lpdwUrlPathLength - length of url-path buffer
|
||
|
||
lpszExtraInfo - returned search string or intra-page link if present
|
||
|
||
lpdwExtraInfoLength - length of extra info buffer
|
||
|
||
pHavePort - returned boolean indicating whether port was specified
|
||
|
||
Return Value:
|
||
|
||
DWORD
|
||
Success - ERROR_SUCCESS
|
||
|
||
Failure - ERROR_INTERNET_UNRECOGNIZED_SCHEME
|
||
|
||
--*/
|
||
|
||
{
|
||
DWORD error;
|
||
DWORD schemeLength;
|
||
SHINTERNET_SCHEME schemeType;
|
||
|
||
//
|
||
// if dwUrlLength is 0 then lpszUrl is ASCIIZ. Find its length
|
||
//
|
||
|
||
if (dwUrlLength == 0) {
|
||
dwUrlLength = lstrlenW(lpszUrl);
|
||
}
|
||
|
||
//
|
||
// get parser based on the protocol name
|
||
//
|
||
|
||
for (schemeLength = 0; lpszUrl[schemeLength]!=COLON; ++schemeLength) {
|
||
if ((dwUrlLength == 0) || (lpszUrl[schemeLength] == '\0')) {
|
||
//
|
||
// no ':' in URL? Bogus (dude)
|
||
//
|
||
error = ERROR_INTERNET_UNRECOGNIZED_SCHEME;
|
||
goto quit;
|
||
}
|
||
--dwUrlLength;
|
||
}
|
||
|
||
DWORD i;
|
||
int skip;
|
||
BOOL isGeneric;
|
||
BOOL needSlashes;
|
||
BOOL haveSlashes;
|
||
|
||
isGeneric = FALSE;
|
||
needSlashes = FALSE;
|
||
haveSlashes = FALSE;
|
||
|
||
schemeType = SHINTERNET_SCHEME_UNKNOWN;
|
||
|
||
if (ScanSchemes(lpszUrl, schemeLength, &i))
|
||
{
|
||
schemeType = UrlSchemeList[i].SchemeType;
|
||
needSlashes = UrlSchemeList[i].NeedSlashes;
|
||
}
|
||
|
||
skip = 1; // skip ':'
|
||
|
||
if ((dwUrlLength > 3) && (StrCmpNIW(&lpszUrl[schemeLength], L"://", 3) == 0)) {
|
||
skip = 3; // skip "://"
|
||
haveSlashes = TRUE;
|
||
}
|
||
|
||
if (schemeType == SHINTERNET_SCHEME_FILE)
|
||
isGeneric = TRUE;
|
||
|
||
if (schemeType == SHINTERNET_SCHEME_NEWS ||
|
||
schemeType == SHINTERNET_SCHEME_UNKNOWN) {
|
||
//
|
||
// urls can be hierarchical or opaque. if the slashes
|
||
// exist, then we should assume hierarchical
|
||
// when we dont know the scheme or it is news:.
|
||
// otherwise it is opaque (isGeneric)
|
||
//
|
||
|
||
needSlashes = haveSlashes;
|
||
isGeneric = !haveSlashes;
|
||
}
|
||
|
||
//
|
||
// If we don't have slashes, make sure we don't need them.
|
||
// If we have slashes, make sure they are required.
|
||
//
|
||
|
||
if ((!haveSlashes && !needSlashes) || (haveSlashes && needSlashes)) {
|
||
if (ARGUMENT_PRESENT(lpSchemeType)) {
|
||
*lpSchemeType = schemeType;
|
||
}
|
||
if (ARGUMENT_PRESENT(lpszSchemeName)) {
|
||
*lpszSchemeName = lpszUrl;
|
||
*lpdwSchemeNameLength = schemeLength;
|
||
}
|
||
lpszUrl += schemeLength + skip;
|
||
dwUrlLength -= skip;
|
||
|
||
if (SHINTERNET_SCHEME_RES == schemeType) {
|
||
if (ARGUMENT_PRESENT(lpszUserName)) {
|
||
*lpszUserName = NULL;
|
||
*lpdwUserNameLength = 0;
|
||
}
|
||
if (ARGUMENT_PRESENT(lpszPassword)) {
|
||
*lpszPassword = NULL;
|
||
*lpdwPasswordLength = 0;
|
||
}
|
||
if (ARGUMENT_PRESENT(lpServerPort)) {
|
||
*lpServerPort = 0;
|
||
}
|
||
PWSTR psz = lpszUrl;
|
||
while (*lpszUrl && *lpszUrl!=SLASH)
|
||
lpszUrl++;
|
||
|
||
if (ARGUMENT_PRESENT(lpszHostName)) {
|
||
*lpszHostName = psz;
|
||
*lpdwHostNameLength = (DWORD)(lpszUrl - psz);
|
||
dwUrlLength -= *lpdwHostNameLength;
|
||
error = DecodeUrlInSitu(*lpszHostName, lpdwHostNameLength);
|
||
}
|
||
} else if (isGeneric) {
|
||
if (ARGUMENT_PRESENT(lpszUserName)) {
|
||
*lpszUserName = NULL;
|
||
*lpdwUserNameLength = 0;
|
||
}
|
||
if (ARGUMENT_PRESENT(lpszPassword)) {
|
||
*lpszPassword = NULL;
|
||
*lpdwPasswordLength = 0;
|
||
}
|
||
if (ARGUMENT_PRESENT(lpszHostName)) {
|
||
*lpszHostName = NULL;
|
||
*lpdwHostNameLength = 0;
|
||
}
|
||
if (ARGUMENT_PRESENT(lpServerPort)) {
|
||
*lpServerPort = 0;
|
||
}
|
||
error = ERROR_SUCCESS;
|
||
} else {
|
||
error = GetUrlAddress(&lpszUrl,
|
||
&dwUrlLength,
|
||
lpszUserName,
|
||
lpdwUserNameLength,
|
||
lpszPassword,
|
||
lpdwPasswordLength,
|
||
lpszHostName,
|
||
lpdwHostNameLength,
|
||
lpServerPort,
|
||
pHavePort
|
||
);
|
||
}
|
||
if (bEscape && (error == ERROR_SUCCESS)) {
|
||
error = DecodeUrlInSitu(lpszUrl, &dwUrlLength);
|
||
}
|
||
if ((error == ERROR_SUCCESS) && ARGUMENT_PRESENT(lpszExtraInfo)) {
|
||
*lpdwExtraInfoLength = 0;
|
||
for (i = 0; i < (int)dwUrlLength; i++) {
|
||
if (lpszUrl[i] == '?' || lpszUrl[i] == '#') {
|
||
*lpszExtraInfo = &lpszUrl[i];
|
||
*lpdwExtraInfoLength = dwUrlLength - i;
|
||
dwUrlLength -= *lpdwExtraInfoLength;
|
||
}
|
||
}
|
||
}
|
||
if ((error == ERROR_SUCCESS) && ARGUMENT_PRESENT(lpszUrlPath)) {
|
||
*lpszUrlPath = lpszUrl;
|
||
*lpdwUrlPathLength = dwUrlLength;
|
||
}
|
||
} else {
|
||
error = ERROR_INTERNET_UNRECOGNIZED_SCHEME;
|
||
}
|
||
|
||
quit:
|
||
|
||
return error;
|
||
}
|
||
|
||
|
||
|
||
BOOL
|
||
WINAPI
|
||
UrlCrackW(
|
||
IN LPCWSTR lpszUrl,
|
||
IN DWORD dwUrlLength,
|
||
IN DWORD dwFlags,
|
||
IN LPSHURL_COMPONENTSW lpUrlComponents
|
||
)
|
||
|
||
/*++
|
||
|
||
Routine Description:
|
||
|
||
Cracks an URL into its constituent parts. Optionally escapes the url-path.
|
||
We assume that the user has supplied large enough buffers for the various
|
||
URL parts
|
||
|
||
Arguments:
|
||
|
||
lpszUrl - pointer to URL to crack
|
||
|
||
dwUrlLength - 0 if lpszUrl is ASCIIZ string, else length of lpszUrl
|
||
|
||
dwFlags - flags controlling operation
|
||
|
||
lpUrlComponents - pointer to URL_COMPONENTS
|
||
|
||
Return Value:
|
||
|
||
BOOL
|
||
Success - TRUE
|
||
|
||
Failure - FALSE. Call GetLastError() for more info
|
||
|
||
--*/
|
||
|
||
{
|
||
DWORD error = ERROR_SUCCESS;
|
||
|
||
// validate parameters
|
||
if (ARGUMENT_PRESENT(lpszUrl)) {
|
||
if (!dwUrlLength) {
|
||
error = ProbeStringW((LPWSTR)lpszUrl, &dwUrlLength);
|
||
} else if (IsBadReadPtr((LPVOID)lpszUrl, dwUrlLength*sizeof(WCHAR))) {
|
||
error = ERROR_INVALID_PARAMETER;
|
||
}
|
||
} else {
|
||
error = ERROR_INVALID_PARAMETER;
|
||
}
|
||
if (error != ERROR_SUCCESS)
|
||
{
|
||
goto quit;
|
||
}
|
||
|
||
if (IsBadWritePtr(lpUrlComponents, sizeof(*lpUrlComponents))
|
||
|| (lpUrlComponents->dwStructSize != sizeof(*lpUrlComponents)))
|
||
{
|
||
error = ERROR_INVALID_PARAMETER;
|
||
goto quit;
|
||
}
|
||
|
||
//
|
||
// we only allow two flags for this API
|
||
//
|
||
|
||
if (dwFlags & ~(ICU_ESCAPE | ICU_DECODE)) {
|
||
error = ERROR_INVALID_PARAMETER;
|
||
goto quit;
|
||
}
|
||
|
||
//
|
||
// get the individual components to return. If they reference a buffer then
|
||
// check it for writeability
|
||
//
|
||
|
||
LPWSTR lpUrl;
|
||
LPWSTR urlCopy;
|
||
SHINTERNET_SCHEME schemeType;
|
||
LPWSTR schemeName;
|
||
DWORD schemeNameLength;
|
||
LPWSTR hostName;
|
||
DWORD hostNameLength;
|
||
SHINTERNET_PORT nPort;
|
||
LPWSTR userName;
|
||
DWORD userNameLength;
|
||
LPWSTR password;
|
||
DWORD passwordLength;
|
||
LPWSTR urlPath;
|
||
DWORD urlPathLength;
|
||
LPWSTR extraInfo;
|
||
DWORD extraInfoLength;
|
||
BOOL copyComponent;
|
||
BOOL havePort;
|
||
|
||
copyComponent = FALSE;
|
||
|
||
schemeName = lpUrlComponents->lpszScheme;
|
||
schemeNameLength = lpUrlComponents->dwSchemeLength;
|
||
if ((schemeName != NULL) && (schemeNameLength != 0)) {
|
||
error = ProbeWriteStringBufferW((LPVOID)schemeName, schemeNameLength);
|
||
if (error != ERROR_SUCCESS) {
|
||
goto quit;
|
||
}
|
||
*schemeName = '\0';
|
||
copyComponent = TRUE;
|
||
}
|
||
|
||
hostName = lpUrlComponents->lpszHostName;
|
||
hostNameLength = lpUrlComponents->dwHostNameLength;
|
||
if ((hostName != NULL) && (hostNameLength != 0)) {
|
||
error = ProbeWriteStringBufferW((LPVOID)hostName, hostNameLength);
|
||
if (error != ERROR_SUCCESS) {
|
||
goto quit;
|
||
}
|
||
*hostName = '\0';
|
||
copyComponent = TRUE;
|
||
}
|
||
|
||
userName = lpUrlComponents->lpszUserName;
|
||
userNameLength = lpUrlComponents->dwUserNameLength;
|
||
if ((userName != NULL) && (userNameLength != 0)) {
|
||
error = ProbeWriteStringBufferW((LPVOID)userName, userNameLength);
|
||
if (error != ERROR_SUCCESS) {
|
||
goto quit;
|
||
}
|
||
*userName = '\0';
|
||
copyComponent = TRUE;
|
||
}
|
||
|
||
password = lpUrlComponents->lpszPassword;
|
||
passwordLength = lpUrlComponents->dwPasswordLength;
|
||
if ((password != NULL) && (passwordLength != 0)) {
|
||
error = ProbeWriteStringBufferW((LPVOID)password, passwordLength);
|
||
if (error != ERROR_SUCCESS) {
|
||
goto quit;
|
||
}
|
||
*password = '\0';
|
||
copyComponent = TRUE;
|
||
}
|
||
|
||
urlPath = lpUrlComponents->lpszUrlPath;
|
||
urlPathLength = lpUrlComponents->dwUrlPathLength;
|
||
if ((urlPath != NULL) && (urlPathLength != 0)) {
|
||
error = ProbeWriteStringBufferW((LPVOID)urlPath, urlPathLength);
|
||
if (error != ERROR_SUCCESS) {
|
||
goto quit;
|
||
}
|
||
*urlPath = '\0';
|
||
copyComponent = TRUE;
|
||
}
|
||
|
||
extraInfo = lpUrlComponents->lpszExtraInfo;
|
||
extraInfoLength = lpUrlComponents->dwExtraInfoLength;
|
||
if ((extraInfo != NULL) && (extraInfoLength != 0)) {
|
||
error = ProbeWriteStringBufferW((LPVOID)extraInfo, extraInfoLength);
|
||
if (error != ERROR_SUCCESS) {
|
||
goto quit;
|
||
}
|
||
*extraInfo = '\0';
|
||
copyComponent = TRUE;
|
||
}
|
||
|
||
//
|
||
// we can only escape or decode the URL if the caller has provided us with
|
||
// buffers to write the escaped strings into
|
||
//
|
||
|
||
if (dwFlags & (ICU_ESCAPE | ICU_DECODE)) {
|
||
if (!copyComponent) {
|
||
error = ERROR_INVALID_PARAMETER;
|
||
goto quit;
|
||
}
|
||
|
||
//
|
||
// create a copy of the URL. CrackUrl() will modify this in situ. We
|
||
// need to copy the results back to the user's buffer(s)
|
||
//
|
||
|
||
DWORD dw = dwUrlLength;
|
||
if (!dw)
|
||
{
|
||
dw = lstrlenW(lpszUrl);
|
||
}
|
||
urlCopy = new WCHAR[dw+1];
|
||
if (urlCopy == NULL) {
|
||
error = ERROR_NOT_ENOUGH_MEMORY;
|
||
goto quit;
|
||
}
|
||
memcpy(urlCopy, lpszUrl, (dw+1)*sizeof(WCHAR));
|
||
lpUrl = urlCopy;
|
||
} else {
|
||
lpUrl = (LPWSTR)lpszUrl;
|
||
urlCopy = NULL;
|
||
}
|
||
|
||
//
|
||
// crack the URL into its constituent parts
|
||
//
|
||
|
||
error = CrackUrl(lpUrl,
|
||
dwUrlLength,
|
||
(dwFlags & ICU_ESCAPE) ? TRUE : FALSE,
|
||
&schemeType,
|
||
&schemeName,
|
||
&schemeNameLength,
|
||
&hostName,
|
||
&hostNameLength,
|
||
&nPort,
|
||
&userName,
|
||
&userNameLength,
|
||
&password,
|
||
&passwordLength,
|
||
&urlPath,
|
||
&urlPathLength,
|
||
extraInfoLength ? &extraInfo : NULL,
|
||
extraInfoLength ? &extraInfoLength : 0,
|
||
&havePort
|
||
);
|
||
if (error != ERROR_SUCCESS) {
|
||
goto crack_error;
|
||
}
|
||
|
||
BOOL copyFailure;
|
||
|
||
copyFailure = FALSE;
|
||
|
||
//
|
||
// update the URL_COMPONENTS structure based on the results, and what was
|
||
// asked for
|
||
//
|
||
|
||
if (lpUrlComponents->lpszScheme != NULL) {
|
||
if (lpUrlComponents->dwSchemeLength > schemeNameLength) {
|
||
memcpy(lpUrlComponents->lpszScheme, schemeName, schemeNameLength*sizeof(WCHAR));
|
||
lpUrlComponents->lpszScheme[schemeNameLength] = '\0';
|
||
if (dwFlags & ICU_DECODE) {
|
||
UrlUnescapeInPlaceW(lpUrlComponents->lpszScheme, 0);
|
||
}
|
||
} else {
|
||
++schemeNameLength;
|
||
copyFailure = TRUE;
|
||
}
|
||
lpUrlComponents->dwSchemeLength = schemeNameLength;
|
||
} else if (lpUrlComponents->dwSchemeLength != 0) {
|
||
lpUrlComponents->lpszScheme = schemeName;
|
||
lpUrlComponents->dwSchemeLength = schemeNameLength;
|
||
}
|
||
|
||
if (lpUrlComponents->lpszHostName != NULL) {
|
||
if (lpUrlComponents->dwHostNameLength > hostNameLength) {
|
||
memcpy(lpUrlComponents->lpszHostName, hostName, hostNameLength*sizeof(WCHAR));
|
||
lpUrlComponents->lpszHostName[hostNameLength] = '\0';
|
||
if (dwFlags & ICU_DECODE) {
|
||
UrlUnescapeInPlaceW(lpUrlComponents->lpszHostName, 0);
|
||
}
|
||
} else {
|
||
++hostNameLength;
|
||
copyFailure = TRUE;
|
||
}
|
||
lpUrlComponents->dwHostNameLength = hostNameLength;
|
||
} else if (lpUrlComponents->dwHostNameLength != 0) {
|
||
lpUrlComponents->lpszHostName = hostName;
|
||
lpUrlComponents->dwHostNameLength = hostNameLength;
|
||
}
|
||
|
||
if (lpUrlComponents->lpszUserName != NULL) {
|
||
if (lpUrlComponents->dwUserNameLength > userNameLength) {
|
||
memcpy(lpUrlComponents->lpszUserName, userName, userNameLength*sizeof(WCHAR));
|
||
lpUrlComponents->lpszUserName[userNameLength] = '\0';
|
||
if (dwFlags & ICU_DECODE) {
|
||
UrlUnescapeInPlaceW(lpUrlComponents->lpszUserName, 0);
|
||
}
|
||
} else {
|
||
++userNameLength;
|
||
copyFailure = TRUE;
|
||
}
|
||
lpUrlComponents->dwUserNameLength = userNameLength;
|
||
} else if (lpUrlComponents->dwUserNameLength != 0) {
|
||
lpUrlComponents->lpszUserName = userName;
|
||
lpUrlComponents->dwUserNameLength = userNameLength;
|
||
}
|
||
|
||
if (lpUrlComponents->lpszPassword != NULL) {
|
||
if (lpUrlComponents->dwPasswordLength > passwordLength) {
|
||
memcpy(lpUrlComponents->lpszPassword, password, passwordLength*sizeof(WCHAR));
|
||
lpUrlComponents->lpszPassword[passwordLength] = '\0';
|
||
if (dwFlags & ICU_DECODE) {
|
||
UrlUnescapeInPlaceW(lpUrlComponents->lpszPassword, 0);
|
||
}
|
||
} else {
|
||
++passwordLength;
|
||
copyFailure = TRUE;
|
||
}
|
||
lpUrlComponents->dwPasswordLength = passwordLength;
|
||
} else if (lpUrlComponents->dwPasswordLength != 0) {
|
||
lpUrlComponents->lpszPassword = password;
|
||
lpUrlComponents->dwPasswordLength = passwordLength;
|
||
}
|
||
|
||
if (lpUrlComponents->lpszUrlPath != NULL) {
|
||
if(schemeType == SHINTERNET_SCHEME_FILE)
|
||
{
|
||
//
|
||
// for file: urls we return the path component
|
||
// as a valid dos path.
|
||
//
|
||
|
||
copyFailure = FAILED(PathCreateFromUrlW(lpUrl, lpUrlComponents->lpszUrlPath, &(lpUrlComponents->dwUrlPathLength), 0));
|
||
}
|
||
else if (lpUrlComponents->dwUrlPathLength > urlPathLength) {
|
||
memcpy(lpUrlComponents->lpszUrlPath, urlPath, urlPathLength*sizeof(WCHAR));
|
||
lpUrlComponents->lpszUrlPath[urlPathLength] = '\0';
|
||
if (dwFlags & ICU_DECODE) {
|
||
UrlUnescapeInPlaceW(lpUrlComponents->lpszUrlPath, 0);
|
||
}
|
||
lpUrlComponents->dwUrlPathLength = urlPathLength;
|
||
} else {
|
||
++urlPathLength;
|
||
copyFailure = TRUE;
|
||
lpUrlComponents->dwUrlPathLength = urlPathLength;
|
||
}
|
||
} else if (lpUrlComponents->dwUrlPathLength != 0) {
|
||
lpUrlComponents->lpszUrlPath = urlPath;
|
||
lpUrlComponents->dwUrlPathLength = urlPathLength;
|
||
}
|
||
|
||
if (lpUrlComponents->lpszExtraInfo != NULL) {
|
||
if (lpUrlComponents->dwExtraInfoLength > extraInfoLength) {
|
||
memcpy(lpUrlComponents->lpszExtraInfo, extraInfo, extraInfoLength*sizeof(WCHAR));
|
||
lpUrlComponents->lpszExtraInfo[extraInfoLength] = '\0';
|
||
if (dwFlags & ICU_DECODE) {
|
||
UrlUnescapeInPlaceW(lpUrlComponents->lpszExtraInfo, 0);
|
||
}
|
||
} else {
|
||
++extraInfoLength;
|
||
copyFailure = TRUE;
|
||
}
|
||
lpUrlComponents->dwExtraInfoLength = extraInfoLength;
|
||
} else if (lpUrlComponents->dwExtraInfoLength != 0) {
|
||
lpUrlComponents->lpszExtraInfo = extraInfo;
|
||
lpUrlComponents->dwExtraInfoLength = extraInfoLength;
|
||
}
|
||
|
||
//
|
||
// we may have failed to copy one or more components because we didn't have
|
||
// enough buffer space.
|
||
//
|
||
// N.B. Don't change error below here. If need be, move this test lower
|
||
//
|
||
|
||
if (copyFailure) {
|
||
error = ERROR_INSUFFICIENT_BUFFER;
|
||
}
|
||
|
||
//
|
||
// copy the scheme type
|
||
//
|
||
|
||
lpUrlComponents->nScheme = schemeType;
|
||
|
||
//
|
||
// convert 0 port (not in URL) to default value for scheme
|
||
//
|
||
|
||
if (nPort == INTERNET_INVALID_PORT_NUMBER && !havePort) {
|
||
switch (schemeType) {
|
||
case SHINTERNET_SCHEME_FTP:
|
||
nPort = INTERNET_DEFAULT_FTP_PORT;
|
||
break;
|
||
|
||
case SHINTERNET_SCHEME_GOPHER:
|
||
nPort = INTERNET_DEFAULT_GOPHER_PORT;
|
||
break;
|
||
|
||
case SHINTERNET_SCHEME_HTTP:
|
||
nPort = INTERNET_DEFAULT_HTTP_PORT;
|
||
break;
|
||
|
||
case SHINTERNET_SCHEME_HTTPS:
|
||
nPort = INTERNET_DEFAULT_HTTPS_PORT;
|
||
break;
|
||
}
|
||
}
|
||
lpUrlComponents->nPort = nPort;
|
||
|
||
crack_error:
|
||
|
||
if (urlCopy != NULL) {
|
||
delete [] urlCopy;
|
||
}
|
||
|
||
quit:
|
||
// return HRESULT_FROM_WIN32(error);
|
||
if (error!=ERROR_SUCCESS)
|
||
{
|
||
SetLastError(error);
|
||
}
|
||
return error==ERROR_SUCCESS;
|
||
}
|