d0df704d8a
added python.path vm arg to startup script fixed infinite loop in unwrap() when displaying sequences of sequences git-svn-id: http://google-refine.googlecode.com/svn/trunk@509 7d457c2a-affb-35e4-300a-418c747d4874
170 lines
4.6 KiB
Python
170 lines
4.6 KiB
Python
"""Sort performance test.
|
|
|
|
See main() for command line syntax.
|
|
See tabulate() for output format.
|
|
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
import random
|
|
import marshal
|
|
import tempfile
|
|
import os
|
|
|
|
td = tempfile.gettempdir()
|
|
|
|
def randfloats(n):
|
|
"""Return a list of n random floats in [0, 1)."""
|
|
# Generating floats is expensive, so this writes them out to a file in
|
|
# a temp directory. If the file already exists, it just reads them
|
|
# back in and shuffles them a bit.
|
|
fn = os.path.join(td, "rr%06d" % n)
|
|
try:
|
|
fp = open(fn, "rb")
|
|
except IOError:
|
|
r = random.random
|
|
result = [r() for i in xrange(n)]
|
|
try:
|
|
try:
|
|
fp = open(fn, "wb")
|
|
marshal.dump(result, fp)
|
|
fp.close()
|
|
fp = None
|
|
finally:
|
|
if fp:
|
|
try:
|
|
os.unlink(fn)
|
|
except os.error:
|
|
pass
|
|
except IOError, msg:
|
|
print "can't write", fn, ":", msg
|
|
else:
|
|
result = marshal.load(fp)
|
|
fp.close()
|
|
# Shuffle it a bit...
|
|
for i in range(10):
|
|
i = random.randrange(n)
|
|
temp = result[:i]
|
|
del result[:i]
|
|
temp.reverse()
|
|
result.extend(temp)
|
|
del temp
|
|
assert len(result) == n
|
|
return result
|
|
|
|
def flush():
|
|
sys.stdout.flush()
|
|
|
|
def doit(L):
|
|
t0 = time.clock()
|
|
L.sort()
|
|
t1 = time.clock()
|
|
print "%6.2f" % (t1-t0),
|
|
flush()
|
|
|
|
def tabulate(r):
|
|
"""Tabulate sort speed for lists of various sizes.
|
|
|
|
The sizes are 2**i for i in r (the argument, a list).
|
|
|
|
The output displays i, 2**i, and the time to sort arrays of 2**i
|
|
floating point numbers with the following properties:
|
|
|
|
*sort: random data
|
|
\sort: descending data
|
|
/sort: ascending data
|
|
3sort: ascending, then 3 random exchanges
|
|
+sort: ascending, then 10 random at the end
|
|
%sort: ascending, then randomly replace 1% of the elements w/ random values
|
|
~sort: many duplicates
|
|
=sort: all equal
|
|
!sort: worst case scenario
|
|
|
|
"""
|
|
cases = tuple([ch + "sort" for ch in r"*\/3+%~=!"])
|
|
fmt = ("%2s %7s" + " %6s"*len(cases))
|
|
print fmt % (("i", "2**i") + cases)
|
|
for i in r:
|
|
n = 1 << i
|
|
L = randfloats(n)
|
|
print "%2d %7d" % (i, n),
|
|
flush()
|
|
doit(L) # *sort
|
|
L.reverse()
|
|
doit(L) # \sort
|
|
doit(L) # /sort
|
|
|
|
# Do 3 random exchanges.
|
|
for dummy in range(3):
|
|
i1 = random.randrange(n)
|
|
i2 = random.randrange(n)
|
|
L[i1], L[i2] = L[i2], L[i1]
|
|
doit(L) # 3sort
|
|
|
|
# Replace the last 10 with random floats.
|
|
if n >= 10:
|
|
L[-10:] = [random.random() for dummy in range(10)]
|
|
doit(L) # +sort
|
|
|
|
# Replace 1% of the elements at random.
|
|
for dummy in xrange(n // 100):
|
|
L[random.randrange(n)] = random.random()
|
|
doit(L) # %sort
|
|
|
|
# Arrange for lots of duplicates.
|
|
if n > 4:
|
|
del L[4:]
|
|
L = L * (n // 4)
|
|
# Force the elements to be distinct objects, else timings can be
|
|
# artificially low.
|
|
L = map(lambda x: --x, L)
|
|
doit(L) # ~sort
|
|
del L
|
|
|
|
# All equal. Again, force the elements to be distinct objects.
|
|
L = map(abs, [-0.5] * n)
|
|
doit(L) # =sort
|
|
del L
|
|
|
|
# This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case
|
|
# for an older implementation of quicksort, which used the median
|
|
# of the first, last and middle elements as the pivot.
|
|
half = n // 2
|
|
L = range(half - 1, -1, -1)
|
|
L.extend(range(half))
|
|
# Force to float, so that the timings are comparable. This is
|
|
# significantly faster if we leave tham as ints.
|
|
L = map(float, L)
|
|
doit(L) # !sort
|
|
print
|
|
|
|
def main():
|
|
"""Main program when invoked as a script.
|
|
|
|
One argument: tabulate a single row.
|
|
Two arguments: tabulate a range (inclusive).
|
|
Extra arguments are used to seed the random generator.
|
|
|
|
"""
|
|
# default range (inclusive)
|
|
k1 = 15
|
|
k2 = 20
|
|
if sys.argv[1:]:
|
|
# one argument: single point
|
|
k1 = k2 = int(sys.argv[1])
|
|
if sys.argv[2:]:
|
|
# two arguments: specify range
|
|
k2 = int(sys.argv[2])
|
|
if sys.argv[3:]:
|
|
# derive random seed from remaining arguments
|
|
x = 1
|
|
for a in sys.argv[3:]:
|
|
x = 69069 * x + hash(a)
|
|
random.seed(x)
|
|
r = range(k1, k2+1) # include the end point
|
|
tabulate(r)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|