RandomSec/main/webapp/WEB-INF/lib/jython/urlparse.py

"""Parse (absolute and relative) URLs.

See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
UC Irvine, June 1995.
"""

__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
           "urlsplit", "urlunsplit"]

# A classification of schemes ('' means apply by default)
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
                 'wais', 'file', 'https', 'shttp', 'mms',
                 'prospero', 'rtsp', 'rtspu', '', 'sftp']
uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
               'imap', 'wais', 'file', 'mms', 'https', 'shttp',
               'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
               'svn', 'svn+ssh', 'sftp']
non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
                    'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
               'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
               'mms', '', 'sftp']
uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
              'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
                 'nntp', 'wais', 'https', 'shttp', 'snews',
                 'file', 'prospero', '']

# Characters valid in scheme names
scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
                'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
                '0123456789'
                '+-.')

MAX_CACHE_SIZE = 20
_parse_cache = {}

def clear_cache():
    """Clear the parse cache."""
    global _parse_cache
    _parse_cache = {}


class BaseResult(tuple):
    """Base class for the parsed result objects.

    This provides the attributes shared by the two derived result
    objects as read-only properties.  The derived classes are
    responsible for checking the right number of arguments were
    supplied to the constructor.

    """

    __slots__ = ()

    # Attributes that access the basic components of the URL:

    @property
    def scheme(self):
        return self[0]

    @property
    def netloc(self):
        return self[1]

    @property
    def path(self):
        return self[2]

    @property
    def query(self):
        return self[-2]

    @property
    def fragment(self):
        return self[-1]

    # Additional attributes that provide access to parsed-out portions
    # of the netloc:

    @property
    def username(self):
        netloc = self.netloc
        if "@" in netloc:
            userinfo = netloc.split("@", 1)[0]
            if ":" in userinfo:
                userinfo = userinfo.split(":", 1)[0]
            return userinfo
        return None

    @property
    def password(self):
        netloc = self.netloc
        if "@" in netloc:
            userinfo = netloc.split("@", 1)[0]
            if ":" in userinfo:
                return userinfo.split(":", 1)[1]
        return None

    @property
    def hostname(self):
        netloc = self.netloc
        if "@" in netloc:
            netloc = netloc.split("@", 1)[1]
        if ":" in netloc:
            netloc = netloc.split(":", 1)[0]
        return netloc.lower() or None

    @property
    def port(self):
        netloc = self.netloc
        if "@" in netloc:
            netloc = netloc.split("@", 1)[1]
        if ":" in netloc:
            port = netloc.split(":", 1)[1]
            return int(port, 10)
        return None


class SplitResult(BaseResult):

    __slots__ = ()

    def __new__(cls, scheme, netloc, path, query, fragment):
        return BaseResult.__new__(
            cls, (scheme, netloc, path, query, fragment))

    def geturl(self):
        return urlunsplit(self)


class ParseResult(BaseResult):

    __slots__ = ()

    def __new__(cls, scheme, netloc, path, params, query, fragment):
        return BaseResult.__new__(
            cls, (scheme, netloc, path, params, query, fragment))

    @property
    def params(self):
        return self[3]

    def geturl(self):
        return urlunparse(self)


def urlparse(url, scheme='', allow_fragments=True):
    """Parse a URL into 6 components:
    <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
    Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes."""
    tuple = urlsplit(url, scheme, allow_fragments)
    scheme, netloc, url, query, fragment = tuple
    if scheme in uses_params and ';' in url:
        url, params = _splitparams(url)
    else:
        params = ''
    return ParseResult(scheme, netloc, url, params, query, fragment)

def _splitparams(url):
    if '/'  in url:
        i = url.find(';', url.rfind('/'))
        if i < 0:
            return url, ''
    else:
        i = url.find(';')
    return url[:i], url[i+1:]

def _splitnetloc(url, start=0):
    delim = len(url)   # position of end of domain part of url, default is end
    for c in '/?#':    # look for delimiters; the order is NOT important
        wdelim = url.find(c, start)        # find first of this delim
        if wdelim >= 0:                    # if found
            delim = min(delim, wdelim)     # use earliest delim position
    return url[start:delim], url[delim:]   # return (domain, rest)

def urlsplit(url, scheme='', allow_fragments=True):
    """Parse a URL into 5 components:
    <scheme>://<netloc>/<path>?<query>#<fragment>
    Return a 5-tuple: (scheme, netloc, path, query, fragment).
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes."""
    allow_fragments = bool(allow_fragments)
    key = url, scheme, allow_fragments, type(url), type(scheme)
    cached = _parse_cache.get(key, None)
    if cached:
        return cached
    if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
        clear_cache()
    netloc = query = fragment = ''
    i = url.find(':')
    if i > 0:
        if url[:i] == 'http': # optimize the common case
            scheme = url[:i].lower()
            url = url[i+1:]
            if url[:2] == '//':
                netloc, url = _splitnetloc(url, 2)
            if allow_fragments and '#' in url:
                url, fragment = url.split('#', 1)
            if '?' in url:
                url, query = url.split('?', 1)
            v = SplitResult(scheme, netloc, url, query, fragment)
            _parse_cache[key] = v
            return v
        for c in url[:i]:
            if c not in scheme_chars:
                break
        else:
            scheme, url = url[:i].lower(), url[i+1:]
    if scheme in uses_netloc and url[:2] == '//':
        netloc, url = _splitnetloc(url, 2)
    if allow_fragments and scheme in uses_fragment and '#' in url:
        url, fragment = url.split('#', 1)
    if scheme in uses_query and '?' in url:
        url, query = url.split('?', 1)
    v = SplitResult(scheme, netloc, url, query, fragment)
    _parse_cache[key] = v
    return v

def urlunparse((scheme, netloc, url, params, query, fragment)):
    """Put a parsed URL back together again.  This may result in a
    slightly different, but equivalent URL, if the URL that was parsed
    originally had redundant delimiters, e.g. a ? with an empty query
    (the draft states that these are equivalent)."""
    if params:
        url = "%s;%s" % (url, params)
    return urlunsplit((scheme, netloc, url, query, fragment))

def urlunsplit((scheme, netloc, url, query, fragment)):
    if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
        if url and url[:1] != '/': url = '/' + url
        url = '//' + (netloc or '') + url
    if scheme:
        url = scheme + ':' + url
    if query:
        url = url + '?' + query
    if fragment:
        url = url + '#' + fragment
    return url

def urljoin(base, url, allow_fragments=True):
    """Join a base URL and a possibly relative URL to form an absolute
    interpretation of the latter."""
    if not base:
        return url
    if not url:
        return base
    bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
            urlparse(base, '', allow_fragments)
    scheme, netloc, path, params, query, fragment = \
            urlparse(url, bscheme, allow_fragments)
    if scheme != bscheme or scheme not in uses_relative:
        return url
    if scheme in uses_netloc:
        if netloc:
            return urlunparse((scheme, netloc, path,
                               params, query, fragment))
        netloc = bnetloc
    if path[:1] == '/':
        return urlunparse((scheme, netloc, path,
                           params, query, fragment))
    if not (path or params or query):
        return urlunparse((scheme, netloc, bpath,
                           bparams, bquery, fragment))
    segments = bpath.split('/')[:-1] + path.split('/')
    # XXX The stuff below is bogus in various ways...
    if segments[-1] == '.':
        segments[-1] = ''
    while '.' in segments:
        segments.remove('.')
    while 1:
        i = 1
        n = len(segments) - 1
        while i < n:
            if (segments[i] == '..'
                and segments[i-1] not in ('', '..')):
                del segments[i-1:i+1]
                break
            i = i+1
        else:
            break
    if segments == ['', '..']:
        segments[-1] = ''
    elif len(segments) >= 2 and segments[-1] == '..':
        segments[-2:] = ['']
    return urlunparse((scheme, netloc, '/'.join(segments),
                       params, query, fragment))

def urldefrag(url):
    """Removes any existing fragment from URL.

    Returns a tuple of the defragmented URL and the fragment.  If
    the URL contained no fragments, the second element is the
    empty string.
    """
    if '#' in url:
        s, n, p, a, q, frag = urlparse(url)
        defrag = urlunparse((s, n, p, a, q, ''))
        return defrag, frag
    else:
        return url, ''


test_input = """
      http://a/b/c/d

      g:h        = <URL:g:h>
      http:g     = <URL:http://a/b/c/g>
      http:      = <URL:http://a/b/c/d>
      g          = <URL:http://a/b/c/g>
      ./g        = <URL:http://a/b/c/g>
      g/         = <URL:http://a/b/c/g/>
      /g         = <URL:http://a/g>
      //g        = <URL:http://g>
      ?y         = <URL:http://a/b/c/d?y>
      g?y        = <URL:http://a/b/c/g?y>
      g?y/./x    = <URL:http://a/b/c/g?y/./x>
      .          = <URL:http://a/b/c/>
      ./         = <URL:http://a/b/c/>
      ..         = <URL:http://a/b/>
      ../        = <URL:http://a/b/>
      ../g       = <URL:http://a/b/g>
      ../..      = <URL:http://a/>
      ../../g    = <URL:http://a/g>
      ../../../g = <URL:http://a/../g>
      ./../g     = <URL:http://a/b/g>
      ./g/.      = <URL:http://a/b/c/g/>
      /./g       = <URL:http://a/./g>
      g/./h      = <URL:http://a/b/c/g/h>
      g/../h     = <URL:http://a/b/c/h>
      http:g     = <URL:http://a/b/c/g>
      http:      = <URL:http://a/b/c/d>
      http:?y         = <URL:http://a/b/c/d?y>
      http:g?y        = <URL:http://a/b/c/g?y>
      http:g?y/./x    = <URL:http://a/b/c/g?y/./x>
"""

def test():
    import sys
    base = ''
    if sys.argv[1:]:
        fn = sys.argv[1]
        if fn == '-':
            fp = sys.stdin
        else:
            fp = open(fn)
    else:
        try:
            from cStringIO import StringIO
        except ImportError:
            from StringIO import StringIO
        fp = StringIO(test_input)
    while 1:
        line = fp.readline()
        if not line: break
        words = line.split()
        if not words:
            continue
        url = words[0]
        parts = urlparse(url)
        print '%-10s : %s' % (url, parts)
        abs = urljoin(base, url)
        if not base:
            base = abs
        wrapped = '<URL:%s>' % abs
        print '%-10s = %s' % (url, wrapped)
        if len(words) == 3 and words[1] == '=':
            if wrapped != words[2]:
                print 'EXPECTED', words[2], '!!!!!!!!!!'

if __name__ == '__main__':
    test()
added python code part of jython distribution in lib/jython-2.5.1 added python.path vm arg to startup script fixed infinite loop in unwrap() when displaying sequences of sequences git-svn-id: http://google-refine.googlecode.com/svn/trunk@509 7d457c2a-affb-35e4-300a-418c747d4874 2010-04-20 20:50:24 +02:00			`"""Parse (absolute and relative) URLs.`

			`See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,`
			`UC Irvine, June 1995.`
			`"""`

			`__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",`
			`"urlsplit", "urlunsplit"]`

			`# A classification of schemes ('' means apply by default)`
			`uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',`
			`'wais', 'file', 'https', 'shttp', 'mms',`
			`'prospero', 'rtsp', 'rtspu', '', 'sftp']`
			`uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',`
			`'imap', 'wais', 'file', 'mms', 'https', 'shttp',`
			`'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',`
			`'svn', 'svn+ssh', 'sftp']`
			`non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',`
			`'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']`
			`uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',`
			`'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',`
			`'mms', '', 'sftp']`
			`uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',`
			`'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']`
			`uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',`
			`'nntp', 'wais', 'https', 'shttp', 'snews',`
			`'file', 'prospero', '']`

			`# Characters valid in scheme names`
			`scheme_chars = ('abcdefghijklmnopqrstuvwxyz'`
			`'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`
			`'0123456789'`
			`'+-.')`

			`MAX_CACHE_SIZE = 20`
			`_parse_cache = {}`

			`def clear_cache():`
			`"""Clear the parse cache."""`
			`global _parse_cache`
			`_parse_cache = {}`


			`class BaseResult(tuple):`
			`"""Base class for the parsed result objects.`

			`This provides the attributes shared by the two derived result`
			`objects as read-only properties. The derived classes are`
			`responsible for checking the right number of arguments were`
			`supplied to the constructor.`

			`"""`

			`__slots__ = ()`

			`# Attributes that access the basic components of the URL:`

			`@property`
			`def scheme(self):`
			`return self[0]`

			`@property`
			`def netloc(self):`
			`return self[1]`

			`@property`
			`def path(self):`
			`return self[2]`

			`@property`
			`def query(self):`
			`return self[-2]`

			`@property`
			`def fragment(self):`
			`return self[-1]`

			`# Additional attributes that provide access to parsed-out portions`
			`# of the netloc:`

			`@property`
			`def username(self):`
			`netloc = self.netloc`
			`if "@" in netloc:`
			`userinfo = netloc.split("@", 1)[0]`
			`if ":" in userinfo:`
			`userinfo = userinfo.split(":", 1)[0]`
			`return userinfo`
			`return None`

			`@property`
			`def password(self):`
			`netloc = self.netloc`
			`if "@" in netloc:`
			`userinfo = netloc.split("@", 1)[0]`
			`if ":" in userinfo:`
			`return userinfo.split(":", 1)[1]`
			`return None`

			`@property`
			`def hostname(self):`
			`netloc = self.netloc`
			`if "@" in netloc:`
			`netloc = netloc.split("@", 1)[1]`
			`if ":" in netloc:`
			`netloc = netloc.split(":", 1)[0]`
			`return netloc.lower() or None`

			`@property`
			`def port(self):`
			`netloc = self.netloc`
			`if "@" in netloc:`
			`netloc = netloc.split("@", 1)[1]`
			`if ":" in netloc:`
			`port = netloc.split(":", 1)[1]`
			`return int(port, 10)`
			`return None`


			`class SplitResult(BaseResult):`

			`__slots__ = ()`

			`def __new__(cls, scheme, netloc, path, query, fragment):`
			`return BaseResult.__new__(`
			`cls, (scheme, netloc, path, query, fragment))`

			`def geturl(self):`
			`return urlunsplit(self)`


			`class ParseResult(BaseResult):`

			`__slots__ = ()`

			`def __new__(cls, scheme, netloc, path, params, query, fragment):`
			`return BaseResult.__new__(`
			`cls, (scheme, netloc, path, params, query, fragment))`

			`@property`
			`def params(self):`
			`return self[3]`

			`def geturl(self):`
			`return urlunparse(self)`


			`def urlparse(url, scheme='', allow_fragments=True):`
			`"""Parse a URL into 6 components:`
			`<scheme>://<netloc>/<path>;<params>?<query>#<fragment>`
			`Return a 6-tuple: (scheme, netloc, path, params, query, fragment).`
			`Note that we don't break the components up in smaller bits`
			`(e.g. netloc is a single string) and we don't expand % escapes."""`
			`tuple = urlsplit(url, scheme, allow_fragments)`
			`scheme, netloc, url, query, fragment = tuple`
			`if scheme in uses_params and ';' in url:`
			`url, params = _splitparams(url)`
			`else:`
			`params = ''`
			`return ParseResult(scheme, netloc, url, params, query, fragment)`

			`def _splitparams(url):`
			`if '/' in url:`
			`i = url.find(';', url.rfind('/'))`
			`if i < 0:`
			`return url, ''`
			`else:`
			`i = url.find(';')`
			`return url[:i], url[i+1:]`

			`def _splitnetloc(url, start=0):`
			`delim = len(url) # position of end of domain part of url, default is end`
			`for c in '/?#': # look for delimiters; the order is NOT important`
			`wdelim = url.find(c, start) # find first of this delim`
			`if wdelim >= 0: # if found`
			`delim = min(delim, wdelim) # use earliest delim position`
			`return url[start:delim], url[delim:] # return (domain, rest)`

			`def urlsplit(url, scheme='', allow_fragments=True):`
			`"""Parse a URL into 5 components:`
			`<scheme>://<netloc>/<path>?<query>#<fragment>`
			`Return a 5-tuple: (scheme, netloc, path, query, fragment).`
			`Note that we don't break the components up in smaller bits`
			`(e.g. netloc is a single string) and we don't expand % escapes."""`
			`allow_fragments = bool(allow_fragments)`
			`key = url, scheme, allow_fragments, type(url), type(scheme)`
			`cached = _parse_cache.get(key, None)`
			`if cached:`
			`return cached`
			`if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth`
			`clear_cache()`
			`netloc = query = fragment = ''`
			`i = url.find(':')`
			`if i > 0:`
			`if url[:i] == 'http': # optimize the common case`
			`scheme = url[:i].lower()`
			`url = url[i+1:]`
			`if url[:2] == '//':`
			`netloc, url = _splitnetloc(url, 2)`
			`if allow_fragments and '#' in url:`
			`url, fragment = url.split('#', 1)`
			`if '?' in url:`
			`url, query = url.split('?', 1)`
			`v = SplitResult(scheme, netloc, url, query, fragment)`
			`_parse_cache[key] = v`
			`return v`
			`for c in url[:i]:`
			`if c not in scheme_chars:`
			`break`
			`else:`
			`scheme, url = url[:i].lower(), url[i+1:]`
			`if scheme in uses_netloc and url[:2] == '//':`
			`netloc, url = _splitnetloc(url, 2)`
			`if allow_fragments and scheme in uses_fragment and '#' in url:`
			`url, fragment = url.split('#', 1)`
			`if scheme in uses_query and '?' in url:`
			`url, query = url.split('?', 1)`
			`v = SplitResult(scheme, netloc, url, query, fragment)`
			`_parse_cache[key] = v`
			`return v`

			`def urlunparse((scheme, netloc, url, params, query, fragment)):`
			`"""Put a parsed URL back together again. This may result in a`
			`slightly different, but equivalent URL, if the URL that was parsed`
			`originally had redundant delimiters, e.g. a ? with an empty query`
			`(the draft states that these are equivalent)."""`
			`if params:`
			`url = "%s;%s" % (url, params)`
			`return urlunsplit((scheme, netloc, url, query, fragment))`

			`def urlunsplit((scheme, netloc, url, query, fragment)):`
			`if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):`
			`if url and url[:1] != '/': url = '/' + url`
			`url = '//' + (netloc or '') + url`
			`if scheme:`
			`url = scheme + ':' + url`
			`if query:`
			`url = url + '?' + query`
			`if fragment:`
			`url = url + '#' + fragment`
			`return url`

			`def urljoin(base, url, allow_fragments=True):`
			`"""Join a base URL and a possibly relative URL to form an absolute`
			`interpretation of the latter."""`
			`if not base:`
			`return url`
			`if not url:`
			`return base`
			`bscheme, bnetloc, bpath, bparams, bquery, bfragment = \`
			`urlparse(base, '', allow_fragments)`
			`scheme, netloc, path, params, query, fragment = \`
			`urlparse(url, bscheme, allow_fragments)`
			`if scheme != bscheme or scheme not in uses_relative:`
			`return url`
			`if scheme in uses_netloc:`
			`if netloc:`
			`return urlunparse((scheme, netloc, path,`
			`params, query, fragment))`
			`netloc = bnetloc`
			`if path[:1] == '/':`
			`return urlunparse((scheme, netloc, path,`
			`params, query, fragment))`
			`if not (path or params or query):`
			`return urlunparse((scheme, netloc, bpath,`
			`bparams, bquery, fragment))`
			`segments = bpath.split('/')[:-1] + path.split('/')`
			`# XXX The stuff below is bogus in various ways...`
			`if segments[-1] == '.':`
			`segments[-1] = ''`
			`while '.' in segments:`
			`segments.remove('.')`
			`while 1:`
			`i = 1`
			`n = len(segments) - 1`
			`while i < n:`
			`if (segments[i] == '..'`
			`and segments[i-1] not in ('', '..')):`
			`del segments[i-1:i+1]`
			`break`
			`i = i+1`
			`else:`
			`break`
			`if segments == ['', '..']:`
			`segments[-1] = ''`
			`elif len(segments) >= 2 and segments[-1] == '..':`
			`segments[-2:] = ['']`
			`return urlunparse((scheme, netloc, '/'.join(segments),`
			`params, query, fragment))`

			`def urldefrag(url):`
			`"""Removes any existing fragment from URL.`

			`Returns a tuple of the defragmented URL and the fragment. If`
			`the URL contained no fragments, the second element is the`
			`empty string.`
			`"""`
			`if '#' in url:`
			`s, n, p, a, q, frag = urlparse(url)`
			`defrag = urlunparse((s, n, p, a, q, ''))`
			`return defrag, frag`
			`else:`
			`return url, ''`


			`test_input = """`
			`http://a/b/c/d`

			`g:h = <URL:g:h>`
			`http:g = <URL:http://a/b/c/g>`
			`http: = <URL:http://a/b/c/d>`
			`g = <URL:http://a/b/c/g>`
			`./g = <URL:http://a/b/c/g>`
			`g/ = <URL:http://a/b/c/g/>`
			`/g = <URL:http://a/g>`
			`//g = <URL:http://g>`
			`?y = <URL:http://a/b/c/d?y>`
			`g?y = <URL:http://a/b/c/g?y>`
			`g?y/./x = <URL:http://a/b/c/g?y/./x>`
			`. = <URL:http://a/b/c/>`
			`./ = <URL:http://a/b/c/>`
			`.. = <URL:http://a/b/>`
			`../ = <URL:http://a/b/>`
			`../g = <URL:http://a/b/g>`
			`../.. = <URL:http://a/>`
			`../../g = <URL:http://a/g>`
			`../../../g = <URL:http://a/../g>`
			`./../g = <URL:http://a/b/g>`
			`./g/. = <URL:http://a/b/c/g/>`
			`/./g = <URL:http://a/./g>`
			`g/./h = <URL:http://a/b/c/g/h>`
			`g/../h = <URL:http://a/b/c/h>`
			`http:g = <URL:http://a/b/c/g>`
			`http: = <URL:http://a/b/c/d>`
			`http:?y = <URL:http://a/b/c/d?y>`
			`http:g?y = <URL:http://a/b/c/g?y>`
			`http:g?y/./x = <URL:http://a/b/c/g?y/./x>`
			`"""`

			`def test():`
			`import sys`
			`base = ''`
			`if sys.argv[1:]:`
			`fn = sys.argv[1]`
			`if fn == '-':`
			`fp = sys.stdin`
			`else:`
			`fp = open(fn)`
			`else:`
			`try:`
			`from cStringIO import StringIO`
			`except ImportError:`
			`from StringIO import StringIO`
			`fp = StringIO(test_input)`
			`while 1:`
			`line = fp.readline()`
			`if not line: break`
			`words = line.split()`
			`if not words:`
			`continue`
			`url = words[0]`
			`parts = urlparse(url)`
			`print '%-10s : %s' % (url, parts)`
			`abs = urljoin(base, url)`
			`if not base:`
			`base = abs`
			`wrapped = '<URL:%s>' % abs`
			`print '%-10s = %s' % (url, wrapped)`
			`if len(words) == 3 and words[1] == '=':`
			`if wrapped != words[2]:`
			`print 'EXPECTED', words[2], '!!!!!!!!!!'`

			`if __name__ == '__main__':`
			`test()`