726d90d871
git-svn-id: http://google-refine.googlecode.com/svn/branches/split-refactor@908 7d457c2a-affb-35e4-300a-418c747d4874
103 lines
2.9 KiB
Python
103 lines
2.9 KiB
Python
""" Python 'zlib_codec' Codec - zlib compression encoding
|
|
|
|
Unlike most of the other codecs which target Unicode, this codec
|
|
will return Python string objects for both encode and decode.
|
|
|
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
|
|
|
"""
|
|
import codecs
|
|
import zlib # this codec needs the optional zlib module !
|
|
|
|
### Codec APIs
|
|
|
|
def zlib_encode(input,errors='strict'):
|
|
|
|
""" Encodes the object input and returns a tuple (output
|
|
object, length consumed).
|
|
|
|
errors defines the error handling to apply. It defaults to
|
|
'strict' handling which is the only currently supported
|
|
error handling for this codec.
|
|
|
|
"""
|
|
assert errors == 'strict'
|
|
output = zlib.compress(input)
|
|
return (output, len(input))
|
|
|
|
def zlib_decode(input,errors='strict'):
|
|
|
|
""" Decodes the object input and returns a tuple (output
|
|
object, length consumed).
|
|
|
|
input must be an object which provides the bf_getreadbuf
|
|
buffer slot. Python strings, buffer objects and memory
|
|
mapped files are examples of objects providing this slot.
|
|
|
|
errors defines the error handling to apply. It defaults to
|
|
'strict' handling which is the only currently supported
|
|
error handling for this codec.
|
|
|
|
"""
|
|
assert errors == 'strict'
|
|
output = zlib.decompress(input)
|
|
return (output, len(input))
|
|
|
|
class Codec(codecs.Codec):
|
|
|
|
def encode(self, input, errors='strict'):
|
|
return zlib_encode(input, errors)
|
|
def decode(self, input, errors='strict'):
|
|
return zlib_decode(input, errors)
|
|
|
|
class IncrementalEncoder(codecs.IncrementalEncoder):
|
|
def __init__(self, errors='strict'):
|
|
assert errors == 'strict'
|
|
self.errors = errors
|
|
self.compressobj = zlib.compressobj()
|
|
|
|
def encode(self, input, final=False):
|
|
if final:
|
|
c = self.compressobj.compress(input)
|
|
return c + self.compressobj.flush()
|
|
else:
|
|
return self.compressobj.compress(input)
|
|
|
|
def reset(self):
|
|
self.compressobj = zlib.compressobj()
|
|
|
|
class IncrementalDecoder(codecs.IncrementalDecoder):
|
|
def __init__(self, errors='strict'):
|
|
assert errors == 'strict'
|
|
self.errors = errors
|
|
self.decompressobj = zlib.decompressobj()
|
|
|
|
def decode(self, input, final=False):
|
|
if final:
|
|
c = self.decompressobj.decompress(input)
|
|
return c + self.decompressobj.flush()
|
|
else:
|
|
return self.decompressobj.decompress(input)
|
|
|
|
def reset(self):
|
|
self.decompressobj = zlib.decompressobj()
|
|
|
|
class StreamWriter(Codec,codecs.StreamWriter):
|
|
pass
|
|
|
|
class StreamReader(Codec,codecs.StreamReader):
|
|
pass
|
|
|
|
### encodings module API
|
|
|
|
def getregentry():
|
|
return codecs.CodecInfo(
|
|
name='zlib',
|
|
encode=zlib_encode,
|
|
decode=zlib_decode,
|
|
incrementalencoder=IncrementalEncoder,
|
|
incrementaldecoder=IncrementalDecoder,
|
|
streamreader=StreamReader,
|
|
streamwriter=StreamWriter,
|
|
)
|