2363d6de43
git-svn-id: http://google-refine.googlecode.com/svn/trunk@517 7d457c2a-affb-35e4-300a-418c747d4874
103 lines
2.9 KiB
Python
103 lines
2.9 KiB
Python
""" Python 'bz2_codec' Codec - bz2 compression encoding
|
|
|
|
Unlike most of the other codecs which target Unicode, this codec
|
|
will return Python string objects for both encode and decode.
|
|
|
|
Adapted by Raymond Hettinger from zlib_codec.py which was written
|
|
by Marc-Andre Lemburg (mal@lemburg.com).
|
|
|
|
"""
|
|
import codecs
|
|
import bz2 # this codec needs the optional bz2 module !
|
|
|
|
### Codec APIs
|
|
|
|
def bz2_encode(input,errors='strict'):
|
|
|
|
""" Encodes the object input and returns a tuple (output
|
|
object, length consumed).
|
|
|
|
errors defines the error handling to apply. It defaults to
|
|
'strict' handling which is the only currently supported
|
|
error handling for this codec.
|
|
|
|
"""
|
|
assert errors == 'strict'
|
|
output = bz2.compress(input)
|
|
return (output, len(input))
|
|
|
|
def bz2_decode(input,errors='strict'):
|
|
|
|
""" Decodes the object input and returns a tuple (output
|
|
object, length consumed).
|
|
|
|
input must be an object which provides the bf_getreadbuf
|
|
buffer slot. Python strings, buffer objects and memory
|
|
mapped files are examples of objects providing this slot.
|
|
|
|
errors defines the error handling to apply. It defaults to
|
|
'strict' handling which is the only currently supported
|
|
error handling for this codec.
|
|
|
|
"""
|
|
assert errors == 'strict'
|
|
output = bz2.decompress(input)
|
|
return (output, len(input))
|
|
|
|
class Codec(codecs.Codec):
|
|
|
|
def encode(self, input, errors='strict'):
|
|
return bz2_encode(input, errors)
|
|
def decode(self, input, errors='strict'):
|
|
return bz2_decode(input, errors)
|
|
|
|
class IncrementalEncoder(codecs.IncrementalEncoder):
|
|
def __init__(self, errors='strict'):
|
|
assert errors == 'strict'
|
|
self.errors = errors
|
|
self.compressobj = bz2.BZ2Compressor()
|
|
|
|
def encode(self, input, final=False):
|
|
if final:
|
|
c = self.compressobj.compress(input)
|
|
return c + self.compressobj.flush()
|
|
else:
|
|
return self.compressobj.compress(input)
|
|
|
|
def reset(self):
|
|
self.compressobj = bz2.BZ2Compressor()
|
|
|
|
class IncrementalDecoder(codecs.IncrementalDecoder):
|
|
def __init__(self, errors='strict'):
|
|
assert errors == 'strict'
|
|
self.errors = errors
|
|
self.decompressobj = bz2.BZ2Decompressor()
|
|
|
|
def decode(self, input, final=False):
|
|
try:
|
|
return self.decompressobj.decompress(input)
|
|
except EOFError:
|
|
return ''
|
|
|
|
def reset(self):
|
|
self.decompressobj = bz2.BZ2Decompressor()
|
|
|
|
class StreamWriter(Codec,codecs.StreamWriter):
|
|
pass
|
|
|
|
class StreamReader(Codec,codecs.StreamReader):
|
|
pass
|
|
|
|
### encodings module API
|
|
|
|
def getregentry():
|
|
return codecs.CodecInfo(
|
|
name="bz2",
|
|
encode=bz2_encode,
|
|
decode=bz2_decode,
|
|
incrementalencoder=IncrementalEncoder,
|
|
incrementaldecoder=IncrementalDecoder,
|
|
streamwriter=StreamWriter,
|
|
streamreader=StreamReader,
|
|
)
|