2363d6de43
git-svn-id: http://google-refine.googlecode.com/svn/trunk@517 7d457c2a-affb-35e4-300a-418c747d4874
520 lines
17 KiB
Python
520 lines
17 KiB
Python
# Module 'ntpath' -- common operations on WinNT/Win95 pathnames
|
|
"""Common pathname manipulations, WindowsNT/95 version.
|
|
|
|
Instead of importing this module directly, import os and refer to this
|
|
module as os.path.
|
|
"""
|
|
|
|
import java.io.File
|
|
import os
|
|
import stat
|
|
import sys
|
|
from org.python.core.Py import newString
|
|
|
|
__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
|
|
"basename","dirname","commonprefix","getsize","getmtime",
|
|
"getatime","getctime", "islink","exists","lexists","isdir","isfile",
|
|
"ismount","walk","expanduser","expandvars","normpath","abspath",
|
|
"splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
|
|
"extsep","devnull","realpath","supports_unicode_filenames"]
|
|
|
|
# strings representing various path-related bits and pieces
|
|
curdir = '.'
|
|
pardir = '..'
|
|
extsep = '.'
|
|
sep = '\\'
|
|
pathsep = ';'
|
|
altsep = '/'
|
|
defpath = '.;C:\\bin'
|
|
if 'ce' in sys.builtin_module_names:
|
|
defpath = '\\Windows'
|
|
elif 'os2' in sys.builtin_module_names:
|
|
# OS/2 w/ VACPP
|
|
altsep = '/'
|
|
devnull = 'nul'
|
|
|
|
# Normalize the case of a pathname and map slashes to backslashes.
|
|
# Other normalizations (such as optimizing '../' away) are not done
|
|
# (this is done by normpath).
|
|
|
|
def normcase(s):
|
|
"""Normalize case of pathname.
|
|
|
|
Makes all characters lowercase and all slashes into backslashes."""
|
|
return s.replace("/", "\\").lower()
|
|
|
|
|
|
# Return whether a path is absolute.
|
|
# Trivial in Posix, harder on the Mac or MS-DOS.
|
|
# For DOS it is absolute if it starts with a slash or backslash (current
|
|
# volume), or if a pathname after the volume letter and colon / UNC resource
|
|
# starts with a slash or backslash.
|
|
|
|
def isabs(s):
|
|
"""Test whether a path is absolute"""
|
|
s = splitdrive(s)[1]
|
|
return s != '' and s[:1] in '/\\'
|
|
|
|
|
|
# Join two (or more) paths.
|
|
|
|
def join(a, *p):
|
|
"""Join two or more pathname components, inserting "\\" as needed"""
|
|
path = a
|
|
for b in p:
|
|
b_wins = 0 # set to 1 iff b makes path irrelevant
|
|
if path == "":
|
|
b_wins = 1
|
|
|
|
elif isabs(b):
|
|
# This probably wipes out path so far. However, it's more
|
|
# complicated if path begins with a drive letter:
|
|
# 1. join('c:', '/a') == 'c:/a'
|
|
# 2. join('c:/', '/a') == 'c:/a'
|
|
# But
|
|
# 3. join('c:/a', '/b') == '/b'
|
|
# 4. join('c:', 'd:/') = 'd:/'
|
|
# 5. join('c:/', 'd:/') = 'd:/'
|
|
if path[1:2] != ":" or b[1:2] == ":":
|
|
# Path doesn't start with a drive letter, or cases 4 and 5.
|
|
b_wins = 1
|
|
|
|
# Else path has a drive letter, and b doesn't but is absolute.
|
|
elif len(path) > 3 or (len(path) == 3 and
|
|
path[-1] not in "/\\"):
|
|
# case 3
|
|
b_wins = 1
|
|
|
|
if b_wins:
|
|
path = b
|
|
else:
|
|
# Join, and ensure there's a separator.
|
|
assert len(path) > 0
|
|
if path[-1] in "/\\":
|
|
if b and b[0] in "/\\":
|
|
path += b[1:]
|
|
else:
|
|
path += b
|
|
elif path[-1] == ":":
|
|
path += b
|
|
elif b:
|
|
if b[0] in "/\\":
|
|
path += b
|
|
else:
|
|
path += "\\" + b
|
|
else:
|
|
# path is not empty and does not end with a backslash,
|
|
# but b is empty; since, e.g., split('a/') produces
|
|
# ('a', ''), it's best if join() adds a backslash in
|
|
# this case.
|
|
path += '\\'
|
|
|
|
return path
|
|
|
|
|
|
# Split a path in a drive specification (a drive letter followed by a
|
|
# colon) and the path specification.
|
|
# It is always true that drivespec + pathspec == p
|
|
def splitdrive(p):
|
|
"""Split a pathname into drive and path specifiers. Returns a 2-tuple
|
|
"(drive,path)"; either part may be empty"""
|
|
if p[1:2] == ':':
|
|
return p[0:2], p[2:]
|
|
return '', p
|
|
|
|
|
|
# Parse UNC paths
|
|
def splitunc(p):
|
|
"""Split a pathname into UNC mount point and relative path specifiers.
|
|
|
|
Return a 2-tuple (unc, rest); either part may be empty.
|
|
If unc is not empty, it has the form '//host/mount' (or similar
|
|
using backslashes). unc+rest is always the input path.
|
|
Paths containing drive letters never have an UNC part.
|
|
"""
|
|
if p[1:2] == ':':
|
|
return '', p # Drive letter present
|
|
firstTwo = p[0:2]
|
|
if firstTwo == '//' or firstTwo == '\\\\':
|
|
# is a UNC path:
|
|
# vvvvvvvvvvvvvvvvvvvv equivalent to drive letter
|
|
# \\machine\mountpoint\directories...
|
|
# directory ^^^^^^^^^^^^^^^
|
|
normp = normcase(p)
|
|
index = normp.find('\\', 2)
|
|
if index == -1:
|
|
##raise RuntimeError, 'illegal UNC path: "' + p + '"'
|
|
return ("", p)
|
|
index = normp.find('\\', index + 1)
|
|
if index == -1:
|
|
index = len(p)
|
|
return p[:index], p[index:]
|
|
return '', p
|
|
|
|
|
|
# Split a path in head (everything up to the last '/') and tail (the
|
|
# rest). After the trailing '/' is stripped, the invariant
|
|
# join(head, tail) == p holds.
|
|
# The resulting head won't end in '/' unless it is the root.
|
|
|
|
def split(p):
|
|
"""Split a pathname.
|
|
|
|
Return tuple (head, tail) where tail is everything after the final slash.
|
|
Either part may be empty."""
|
|
|
|
d, p = splitdrive(p)
|
|
# set i to index beyond p's last slash
|
|
i = len(p)
|
|
while i and p[i-1] not in '/\\':
|
|
i = i - 1
|
|
head, tail = p[:i], p[i:] # now tail has no slashes
|
|
# remove trailing slashes from head, unless it's all slashes
|
|
head2 = head
|
|
while head2 and head2[-1] in '/\\':
|
|
head2 = head2[:-1]
|
|
head = head2 or head
|
|
return d + head, tail
|
|
|
|
|
|
# Split a path in root and extension.
|
|
# The extension is everything starting at the last dot in the last
|
|
# pathname component; the root is everything before that.
|
|
# It is always true that root + ext == p.
|
|
|
|
def splitext(p):
|
|
"""Split the extension from a pathname.
|
|
|
|
Extension is everything from the last dot to the end.
|
|
Return (root, ext), either part may be empty."""
|
|
|
|
i = p.rfind('.')
|
|
if i<=max(p.rfind('/'), p.rfind('\\')):
|
|
return p, ''
|
|
else:
|
|
return p[:i], p[i:]
|
|
|
|
|
|
# Return the tail (basename) part of a path.
|
|
|
|
def basename(p):
|
|
"""Returns the final component of a pathname"""
|
|
return split(p)[1]
|
|
|
|
|
|
# Return the head (dirname) part of a path.
|
|
|
|
def dirname(p):
|
|
"""Returns the directory component of a pathname"""
|
|
return split(p)[0]
|
|
|
|
|
|
# Return the longest prefix of all list elements.
|
|
|
|
def commonprefix(m):
|
|
"Given a list of pathnames, returns the longest common leading component"
|
|
if not m: return ''
|
|
s1 = min(m)
|
|
s2 = max(m)
|
|
n = min(len(s1), len(s2))
|
|
for i in xrange(n):
|
|
if s1[i] != s2[i]:
|
|
return s1[:i]
|
|
return s1[:n]
|
|
|
|
|
|
# Get size, mtime, atime of files.
|
|
|
|
def getsize(filename):
|
|
"""Return the size of a file, reported by os.stat()"""
|
|
return os.stat(filename).st_size
|
|
|
|
def getmtime(filename):
|
|
"""Return the last modification time of a file, reported by os.stat()"""
|
|
return os.stat(filename).st_mtime
|
|
|
|
def getatime(filename):
|
|
"""Return the last access time of a file, reported by os.stat()"""
|
|
return os.stat(filename).st_atime
|
|
|
|
def getctime(filename):
|
|
"""Return the creation time of a file, reported by os.stat()."""
|
|
return os.stat(filename).st_ctime
|
|
|
|
# Is a path a symbolic link?
|
|
# This will always return false on systems where posix.lstat doesn't exist.
|
|
|
|
def islink(path):
|
|
"""Test for symbolic link. On WindowsNT/95 always returns false"""
|
|
return False
|
|
|
|
|
|
# Does a path exist?
|
|
|
|
def exists(path):
|
|
"""Test whether a path exists"""
|
|
try:
|
|
st = os.stat(path)
|
|
except os.error:
|
|
return False
|
|
return True
|
|
|
|
lexists = exists
|
|
|
|
|
|
# Is a path a dos directory?
|
|
# This follows symbolic links, so both islink() and isdir() can be true
|
|
# for the same path.
|
|
|
|
def isdir(path):
|
|
"""Test whether a path is a directory"""
|
|
try:
|
|
st = os.stat(path)
|
|
except os.error:
|
|
return False
|
|
return stat.S_ISDIR(st.st_mode)
|
|
|
|
|
|
# Is a path a regular file?
|
|
# This follows symbolic links, so both islink() and isdir() can be true
|
|
# for the same path.
|
|
|
|
def isfile(path):
|
|
"""Test whether a path is a regular file"""
|
|
try:
|
|
st = os.stat(path)
|
|
except os.error:
|
|
return False
|
|
return stat.S_ISREG(st.st_mode)
|
|
|
|
|
|
# Is a path a mount point? Either a root (with or without drive letter)
|
|
# or an UNC path with at most a / or \ after the mount point.
|
|
|
|
def ismount(path):
|
|
"""Test whether a path is a mount point (defined as root of drive)"""
|
|
unc, rest = splitunc(path)
|
|
if unc:
|
|
return rest in ("", "/", "\\")
|
|
p = splitdrive(path)[1]
|
|
return len(p) == 1 and p[0] in '/\\'
|
|
|
|
|
|
# Directory tree walk.
|
|
# For each directory under top (including top itself, but excluding
|
|
# '.' and '..'), func(arg, dirname, filenames) is called, where
|
|
# dirname is the name of the directory and filenames is the list
|
|
# of files (and subdirectories etc.) in the directory.
|
|
# The func may modify the filenames list, to implement a filter,
|
|
# or to impose a different order of visiting.
|
|
|
|
def walk(top, func, arg):
|
|
"""Directory tree walk with callback function.
|
|
|
|
For each directory in the directory tree rooted at top (including top
|
|
itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
|
|
dirname is the name of the directory, and fnames a list of the names of
|
|
the files and subdirectories in dirname (excluding '.' and '..'). func
|
|
may modify the fnames list in-place (e.g. via del or slice assignment),
|
|
and walk will only recurse into the subdirectories whose names remain in
|
|
fnames; this can be used to implement a filter, or to impose a specific
|
|
order of visiting. No semantics are defined for, or required of, arg,
|
|
beyond that arg is always passed to func. It can be used, e.g., to pass
|
|
a filename pattern, or a mutable object designed to accumulate
|
|
statistics. Passing None for arg is common."""
|
|
|
|
try:
|
|
names = os.listdir(top)
|
|
except os.error:
|
|
return
|
|
func(arg, top, names)
|
|
exceptions = ('.', '..')
|
|
for name in names:
|
|
if name not in exceptions:
|
|
name = join(top, name)
|
|
if isdir(name):
|
|
walk(name, func, arg)
|
|
|
|
|
|
# Expand paths beginning with '~' or '~user'.
|
|
# '~' means $HOME; '~user' means that user's home directory.
|
|
# If the path doesn't begin with '~', or if the user or $HOME is unknown,
|
|
# the path is returned unchanged (leaving error reporting to whatever
|
|
# function is called with the expanded path as argument).
|
|
# See also module 'glob' for expansion of *, ? and [...] in pathnames.
|
|
# (A function should also be defined to do full *sh-style environment
|
|
# variable expansion.)
|
|
|
|
def expanduser(path):
|
|
"""Expand ~ and ~user constructs.
|
|
|
|
If user or $HOME is unknown, do nothing."""
|
|
if path[:1] != '~':
|
|
return path
|
|
i, n = 1, len(path)
|
|
while i < n and path[i] not in '/\\':
|
|
i = i + 1
|
|
if i == 1:
|
|
if 'HOME' in os.environ:
|
|
userhome = os.environ['HOME']
|
|
elif not 'HOMEPATH' in os.environ:
|
|
return path
|
|
else:
|
|
try:
|
|
drive = os.environ['HOMEDRIVE']
|
|
except KeyError:
|
|
drive = ''
|
|
userhome = join(drive, os.environ['HOMEPATH'])
|
|
else:
|
|
return path
|
|
return userhome + path[i:]
|
|
|
|
|
|
# Expand paths containing shell variable substitutions.
|
|
# The following rules apply:
|
|
# - no expansion within single quotes
|
|
# - no escape character, except for '$$' which is translated into '$'
|
|
# - ${varname} is accepted.
|
|
# - varnames can be made out of letters, digits and the character '_'
|
|
# XXX With COMMAND.COM you can use any characters in a variable name,
|
|
# XXX except '^|<>='.
|
|
|
|
def expandvars(path):
|
|
"""Expand shell variables of form $var and ${var}.
|
|
|
|
Unknown variables are left unchanged."""
|
|
if '$' not in path:
|
|
return path
|
|
import string
|
|
varchars = string.ascii_letters + string.digits + '_-'
|
|
res = ''
|
|
index = 0
|
|
pathlen = len(path)
|
|
while index < pathlen:
|
|
c = path[index]
|
|
if c == '\'': # no expansion within single quotes
|
|
path = path[index + 1:]
|
|
pathlen = len(path)
|
|
try:
|
|
index = path.index('\'')
|
|
res = res + '\'' + path[:index + 1]
|
|
except ValueError:
|
|
res = res + path
|
|
index = pathlen - 1
|
|
elif c == '$': # variable or '$$'
|
|
if path[index + 1:index + 2] == '$':
|
|
res = res + c
|
|
index = index + 1
|
|
elif path[index + 1:index + 2] == '{':
|
|
path = path[index+2:]
|
|
pathlen = len(path)
|
|
try:
|
|
index = path.index('}')
|
|
var = path[:index]
|
|
if var in os.environ:
|
|
res = res + os.environ[var]
|
|
except ValueError:
|
|
res = res + path
|
|
index = pathlen - 1
|
|
else:
|
|
var = ''
|
|
index = index + 1
|
|
c = path[index:index + 1]
|
|
while c != '' and c in varchars:
|
|
var = var + c
|
|
index = index + 1
|
|
c = path[index:index + 1]
|
|
if var in os.environ:
|
|
res = res + os.environ[var]
|
|
if c != '':
|
|
res = res + c
|
|
else:
|
|
res = res + c
|
|
index = index + 1
|
|
return res
|
|
|
|
|
|
# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
|
|
# Previously, this function also truncated pathnames to 8+3 format,
|
|
# but as this module is called "ntpath", that's obviously wrong!
|
|
|
|
def normpath(path):
|
|
"""Normalize path, eliminating double slashes, etc."""
|
|
path = path.replace("/", "\\")
|
|
prefix, path = splitdrive(path)
|
|
# We need to be careful here. If the prefix is empty, and the path starts
|
|
# with a backslash, it could either be an absolute path on the current
|
|
# drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It
|
|
# is therefore imperative NOT to collapse multiple backslashes blindly in
|
|
# that case.
|
|
# The code below preserves multiple backslashes when there is no drive
|
|
# letter. This means that the invalid filename \\\a\b is preserved
|
|
# unchanged, where a\\\b is normalised to a\b. It's not clear that there
|
|
# is any better behaviour for such edge cases.
|
|
if prefix == '':
|
|
# No drive letter - preserve initial backslashes
|
|
while path[:1] == "\\":
|
|
prefix = prefix + "\\"
|
|
path = path[1:]
|
|
else:
|
|
# We have a drive letter - collapse initial backslashes
|
|
if path.startswith("\\"):
|
|
prefix = prefix + "\\"
|
|
path = path.lstrip("\\")
|
|
comps = path.split("\\")
|
|
i = 0
|
|
while i < len(comps):
|
|
if comps[i] in ('.', ''):
|
|
del comps[i]
|
|
elif comps[i] == '..':
|
|
if i > 0 and comps[i-1] != '..':
|
|
del comps[i-1:i+1]
|
|
i -= 1
|
|
elif i == 0 and prefix.endswith("\\"):
|
|
del comps[i]
|
|
else:
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
# If the path is now empty, substitute '.'
|
|
if not prefix and not comps:
|
|
comps.append('.')
|
|
return prefix + "\\".join(comps)
|
|
|
|
|
|
# Return an absolute path.
|
|
try:
|
|
from nt import _getfullpathname
|
|
|
|
except ImportError: # not running on Windows - mock up something sensible
|
|
def abspath(path):
|
|
"""Return the absolute version of a path."""
|
|
if not isabs(path):
|
|
path = join(os.getcwd(), path)
|
|
if not splitunc(path)[0] and not splitdrive(path)[0]:
|
|
# cwd lacks a UNC mount point, so it should have a drive
|
|
# letter (but lacks one): determine it
|
|
canon_path = newString(java.io.File(path).getCanonicalPath())
|
|
drive = splitdrive(canon_path)[0]
|
|
path = join(drive, path)
|
|
return normpath(path)
|
|
|
|
else: # use native Windows method on Windows
|
|
def abspath(path):
|
|
"""Return the absolute version of a path."""
|
|
|
|
if path: # Empty path must return current working directory.
|
|
try:
|
|
path = _getfullpathname(path)
|
|
except WindowsError:
|
|
pass # Bad path - return unchanged.
|
|
else:
|
|
path = os.getcwd()
|
|
return normpath(path)
|
|
|
|
# realpath is a no-op on systems without islink support
|
|
realpath = abspath
|
|
# Win9x family and earlier have no Unicode filename support.
|
|
supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
|
|
sys.getwindowsversion()[3] >= 2)
|