919 lines
26 KiB
Python
919 lines
26 KiB
Python
|
# IDLSave - a python module to read IDL 'save' files
|
||
|
# Copyright (c) 2010 Thomas P. Robitaille
|
||
|
|
||
|
# Many thanks to Craig Markwardt for publishing the Unofficial Format
|
||
|
# Specification for IDL .sav files, without which this Python module would not
|
||
|
# exist (http://cow.physics.wisc.edu/~craigm/idl/savefmt).
|
||
|
|
||
|
# This code was developed by with permission from ITT Visual Information
|
||
|
# Systems. IDL(r) is a registered trademark of ITT Visual Information Systems,
|
||
|
# Inc. for their Interactive Data Language software.
|
||
|
|
||
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
||
|
# copy of this software and associated documentation files (the "Software"),
|
||
|
# to deal in the Software without restriction, including without limitation
|
||
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||
|
# and/or sell copies of the Software, and to permit persons to whom the
|
||
|
# Software is furnished to do so, subject to the following conditions:
|
||
|
|
||
|
# The above copyright notice and this permission notice shall be included in
|
||
|
# all copies or substantial portions of the Software.
|
||
|
|
||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
|
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||
|
# DEALINGS IN THE SOFTWARE.
|
||
|
|
||
|
__all__ = ['readsav']
|
||
|
|
||
|
import struct
|
||
|
import numpy as np
|
||
|
import tempfile
|
||
|
import zlib
|
||
|
import warnings
|
||
|
|
||
|
# Define the different data types that can be found in an IDL save file
|
||
|
DTYPE_DICT = {1: '>u1',
|
||
|
2: '>i2',
|
||
|
3: '>i4',
|
||
|
4: '>f4',
|
||
|
5: '>f8',
|
||
|
6: '>c8',
|
||
|
7: '|O',
|
||
|
8: '|O',
|
||
|
9: '>c16',
|
||
|
10: '|O',
|
||
|
11: '|O',
|
||
|
12: '>u2',
|
||
|
13: '>u4',
|
||
|
14: '>i8',
|
||
|
15: '>u8'}
|
||
|
|
||
|
# Define the different record types that can be found in an IDL save file
|
||
|
RECTYPE_DICT = {0: "START_MARKER",
|
||
|
1: "COMMON_VARIABLE",
|
||
|
2: "VARIABLE",
|
||
|
3: "SYSTEM_VARIABLE",
|
||
|
6: "END_MARKER",
|
||
|
10: "TIMESTAMP",
|
||
|
12: "COMPILED",
|
||
|
13: "IDENTIFICATION",
|
||
|
14: "VERSION",
|
||
|
15: "HEAP_HEADER",
|
||
|
16: "HEAP_DATA",
|
||
|
17: "PROMOTE64",
|
||
|
19: "NOTICE",
|
||
|
20: "DESCRIPTION"}
|
||
|
|
||
|
# Define a dictionary to contain structure definitions
|
||
|
STRUCT_DICT = {}
|
||
|
|
||
|
|
||
|
def _align_32(f):
|
||
|
'''Align to the next 32-bit position in a file'''
|
||
|
|
||
|
pos = f.tell()
|
||
|
if pos % 4 != 0:
|
||
|
f.seek(pos + 4 - pos % 4)
|
||
|
return
|
||
|
|
||
|
|
||
|
def _skip_bytes(f, n):
|
||
|
'''Skip `n` bytes'''
|
||
|
f.read(n)
|
||
|
return
|
||
|
|
||
|
|
||
|
def _read_bytes(f, n):
|
||
|
'''Read the next `n` bytes'''
|
||
|
return f.read(n)
|
||
|
|
||
|
|
||
|
def _read_byte(f):
|
||
|
'''Read a single byte'''
|
||
|
return np.uint8(struct.unpack('>B', f.read(4)[:1])[0])
|
||
|
|
||
|
|
||
|
def _read_long(f):
|
||
|
'''Read a signed 32-bit integer'''
|
||
|
return np.int32(struct.unpack('>l', f.read(4))[0])
|
||
|
|
||
|
|
||
|
def _read_int16(f):
|
||
|
'''Read a signed 16-bit integer'''
|
||
|
return np.int16(struct.unpack('>h', f.read(4)[2:4])[0])
|
||
|
|
||
|
|
||
|
def _read_int32(f):
|
||
|
'''Read a signed 32-bit integer'''
|
||
|
return np.int32(struct.unpack('>i', f.read(4))[0])
|
||
|
|
||
|
|
||
|
def _read_int64(f):
|
||
|
'''Read a signed 64-bit integer'''
|
||
|
return np.int64(struct.unpack('>q', f.read(8))[0])
|
||
|
|
||
|
|
||
|
def _read_uint16(f):
|
||
|
'''Read an unsigned 16-bit integer'''
|
||
|
return np.uint16(struct.unpack('>H', f.read(4)[2:4])[0])
|
||
|
|
||
|
|
||
|
def _read_uint32(f):
|
||
|
'''Read an unsigned 32-bit integer'''
|
||
|
return np.uint32(struct.unpack('>I', f.read(4))[0])
|
||
|
|
||
|
|
||
|
def _read_uint64(f):
|
||
|
'''Read an unsigned 64-bit integer'''
|
||
|
return np.uint64(struct.unpack('>Q', f.read(8))[0])
|
||
|
|
||
|
|
||
|
def _read_float32(f):
|
||
|
'''Read a 32-bit float'''
|
||
|
return np.float32(struct.unpack('>f', f.read(4))[0])
|
||
|
|
||
|
|
||
|
def _read_float64(f):
|
||
|
'''Read a 64-bit float'''
|
||
|
return np.float64(struct.unpack('>d', f.read(8))[0])
|
||
|
|
||
|
|
||
|
class Pointer:
|
||
|
'''Class used to define pointers'''
|
||
|
|
||
|
def __init__(self, index):
|
||
|
self.index = index
|
||
|
return
|
||
|
|
||
|
|
||
|
class ObjectPointer(Pointer):
|
||
|
'''Class used to define object pointers'''
|
||
|
pass
|
||
|
|
||
|
|
||
|
def _read_string(f):
|
||
|
'''Read a string'''
|
||
|
length = _read_long(f)
|
||
|
if length > 0:
|
||
|
chars = _read_bytes(f, length).decode('latin1')
|
||
|
_align_32(f)
|
||
|
else:
|
||
|
chars = ''
|
||
|
return chars
|
||
|
|
||
|
|
||
|
def _read_string_data(f):
|
||
|
'''Read a data string (length is specified twice)'''
|
||
|
length = _read_long(f)
|
||
|
if length > 0:
|
||
|
length = _read_long(f)
|
||
|
string_data = _read_bytes(f, length)
|
||
|
_align_32(f)
|
||
|
else:
|
||
|
string_data = ''
|
||
|
return string_data
|
||
|
|
||
|
|
||
|
def _read_data(f, dtype):
|
||
|
'''Read a variable with a specified data type'''
|
||
|
if dtype == 1:
|
||
|
if _read_int32(f) != 1:
|
||
|
raise Exception("Error occurred while reading byte variable")
|
||
|
return _read_byte(f)
|
||
|
elif dtype == 2:
|
||
|
return _read_int16(f)
|
||
|
elif dtype == 3:
|
||
|
return _read_int32(f)
|
||
|
elif dtype == 4:
|
||
|
return _read_float32(f)
|
||
|
elif dtype == 5:
|
||
|
return _read_float64(f)
|
||
|
elif dtype == 6:
|
||
|
real = _read_float32(f)
|
||
|
imag = _read_float32(f)
|
||
|
return np.complex64(real + imag * 1j)
|
||
|
elif dtype == 7:
|
||
|
return _read_string_data(f)
|
||
|
elif dtype == 8:
|
||
|
raise Exception("Should not be here - please report this")
|
||
|
elif dtype == 9:
|
||
|
real = _read_float64(f)
|
||
|
imag = _read_float64(f)
|
||
|
return np.complex128(real + imag * 1j)
|
||
|
elif dtype == 10:
|
||
|
return Pointer(_read_int32(f))
|
||
|
elif dtype == 11:
|
||
|
return ObjectPointer(_read_int32(f))
|
||
|
elif dtype == 12:
|
||
|
return _read_uint16(f)
|
||
|
elif dtype == 13:
|
||
|
return _read_uint32(f)
|
||
|
elif dtype == 14:
|
||
|
return _read_int64(f)
|
||
|
elif dtype == 15:
|
||
|
return _read_uint64(f)
|
||
|
else:
|
||
|
raise Exception("Unknown IDL type: %i - please report this" % dtype)
|
||
|
|
||
|
|
||
|
def _read_structure(f, array_desc, struct_desc):
|
||
|
'''
|
||
|
Read a structure, with the array and structure descriptors given as
|
||
|
`array_desc` and `structure_desc` respectively.
|
||
|
'''
|
||
|
|
||
|
nrows = array_desc['nelements']
|
||
|
columns = struct_desc['tagtable']
|
||
|
|
||
|
dtype = []
|
||
|
for col in columns:
|
||
|
if col['structure'] or col['array']:
|
||
|
dtype.append(((col['name'].lower(), col['name']), np.object_))
|
||
|
else:
|
||
|
if col['typecode'] in DTYPE_DICT:
|
||
|
dtype.append(((col['name'].lower(), col['name']),
|
||
|
DTYPE_DICT[col['typecode']]))
|
||
|
else:
|
||
|
raise Exception("Variable type %i not implemented" %
|
||
|
col['typecode'])
|
||
|
|
||
|
structure = np.rec.recarray((nrows, ), dtype=dtype)
|
||
|
|
||
|
for i in range(nrows):
|
||
|
for col in columns:
|
||
|
dtype = col['typecode']
|
||
|
if col['structure']:
|
||
|
structure[col['name']][i] = _read_structure(f,
|
||
|
struct_desc['arrtable'][col['name']],
|
||
|
struct_desc['structtable'][col['name']])
|
||
|
elif col['array']:
|
||
|
structure[col['name']][i] = _read_array(f, dtype,
|
||
|
struct_desc['arrtable'][col['name']])
|
||
|
else:
|
||
|
structure[col['name']][i] = _read_data(f, dtype)
|
||
|
|
||
|
# Reshape structure if needed
|
||
|
if array_desc['ndims'] > 1:
|
||
|
dims = array_desc['dims'][:int(array_desc['ndims'])]
|
||
|
dims.reverse()
|
||
|
structure = structure.reshape(dims)
|
||
|
|
||
|
return structure
|
||
|
|
||
|
|
||
|
def _read_array(f, typecode, array_desc):
|
||
|
'''
|
||
|
Read an array of type `typecode`, with the array descriptor given as
|
||
|
`array_desc`.
|
||
|
'''
|
||
|
|
||
|
if typecode in [1, 3, 4, 5, 6, 9, 13, 14, 15]:
|
||
|
|
||
|
if typecode == 1:
|
||
|
nbytes = _read_int32(f)
|
||
|
if nbytes != array_desc['nbytes']:
|
||
|
warnings.warn("Not able to verify number of bytes from header",
|
||
|
stacklevel=3)
|
||
|
|
||
|
# Read bytes as numpy array
|
||
|
array = np.frombuffer(f.read(array_desc['nbytes']),
|
||
|
dtype=DTYPE_DICT[typecode])
|
||
|
|
||
|
elif typecode in [2, 12]:
|
||
|
|
||
|
# These are 2 byte types, need to skip every two as they are not packed
|
||
|
|
||
|
array = np.frombuffer(f.read(array_desc['nbytes']*2),
|
||
|
dtype=DTYPE_DICT[typecode])[1::2]
|
||
|
|
||
|
else:
|
||
|
|
||
|
# Read bytes into list
|
||
|
array = []
|
||
|
for i in range(array_desc['nelements']):
|
||
|
dtype = typecode
|
||
|
data = _read_data(f, dtype)
|
||
|
array.append(data)
|
||
|
|
||
|
array = np.array(array, dtype=np.object_)
|
||
|
|
||
|
# Reshape array if needed
|
||
|
if array_desc['ndims'] > 1:
|
||
|
dims = array_desc['dims'][:int(array_desc['ndims'])]
|
||
|
dims.reverse()
|
||
|
array = array.reshape(dims)
|
||
|
|
||
|
# Go to next alignment position
|
||
|
_align_32(f)
|
||
|
|
||
|
return array
|
||
|
|
||
|
|
||
|
def _read_record(f):
|
||
|
'''Function to read in a full record'''
|
||
|
|
||
|
record = {'rectype': _read_long(f)}
|
||
|
|
||
|
nextrec = _read_uint32(f)
|
||
|
nextrec += _read_uint32(f).astype(np.int64) * 2**32
|
||
|
|
||
|
_skip_bytes(f, 4)
|
||
|
|
||
|
if record['rectype'] not in RECTYPE_DICT:
|
||
|
raise Exception("Unknown RECTYPE: %i" % record['rectype'])
|
||
|
|
||
|
record['rectype'] = RECTYPE_DICT[record['rectype']]
|
||
|
|
||
|
if record['rectype'] in ["VARIABLE", "HEAP_DATA"]:
|
||
|
|
||
|
if record['rectype'] == "VARIABLE":
|
||
|
record['varname'] = _read_string(f)
|
||
|
else:
|
||
|
record['heap_index'] = _read_long(f)
|
||
|
_skip_bytes(f, 4)
|
||
|
|
||
|
rectypedesc = _read_typedesc(f)
|
||
|
|
||
|
if rectypedesc['typecode'] == 0:
|
||
|
|
||
|
if nextrec == f.tell():
|
||
|
record['data'] = None # Indicates NULL value
|
||
|
else:
|
||
|
raise ValueError("Unexpected type code: 0")
|
||
|
|
||
|
else:
|
||
|
|
||
|
varstart = _read_long(f)
|
||
|
if varstart != 7:
|
||
|
raise Exception("VARSTART is not 7")
|
||
|
|
||
|
if rectypedesc['structure']:
|
||
|
record['data'] = _read_structure(f, rectypedesc['array_desc'],
|
||
|
rectypedesc['struct_desc'])
|
||
|
elif rectypedesc['array']:
|
||
|
record['data'] = _read_array(f, rectypedesc['typecode'],
|
||
|
rectypedesc['array_desc'])
|
||
|
else:
|
||
|
dtype = rectypedesc['typecode']
|
||
|
record['data'] = _read_data(f, dtype)
|
||
|
|
||
|
elif record['rectype'] == "TIMESTAMP":
|
||
|
|
||
|
_skip_bytes(f, 4*256)
|
||
|
record['date'] = _read_string(f)
|
||
|
record['user'] = _read_string(f)
|
||
|
record['host'] = _read_string(f)
|
||
|
|
||
|
elif record['rectype'] == "VERSION":
|
||
|
|
||
|
record['format'] = _read_long(f)
|
||
|
record['arch'] = _read_string(f)
|
||
|
record['os'] = _read_string(f)
|
||
|
record['release'] = _read_string(f)
|
||
|
|
||
|
elif record['rectype'] == "IDENTIFICATON":
|
||
|
|
||
|
record['author'] = _read_string(f)
|
||
|
record['title'] = _read_string(f)
|
||
|
record['idcode'] = _read_string(f)
|
||
|
|
||
|
elif record['rectype'] == "NOTICE":
|
||
|
|
||
|
record['notice'] = _read_string(f)
|
||
|
|
||
|
elif record['rectype'] == "DESCRIPTION":
|
||
|
|
||
|
record['description'] = _read_string_data(f)
|
||
|
|
||
|
elif record['rectype'] == "HEAP_HEADER":
|
||
|
|
||
|
record['nvalues'] = _read_long(f)
|
||
|
record['indices'] = [_read_long(f) for _ in range(record['nvalues'])]
|
||
|
|
||
|
elif record['rectype'] == "COMMONBLOCK":
|
||
|
|
||
|
record['nvars'] = _read_long(f)
|
||
|
record['name'] = _read_string(f)
|
||
|
record['varnames'] = [_read_string(f) for _ in range(record['nvars'])]
|
||
|
|
||
|
elif record['rectype'] == "END_MARKER":
|
||
|
|
||
|
record['end'] = True
|
||
|
|
||
|
elif record['rectype'] == "UNKNOWN":
|
||
|
|
||
|
warnings.warn("Skipping UNKNOWN record", stacklevel=3)
|
||
|
|
||
|
elif record['rectype'] == "SYSTEM_VARIABLE":
|
||
|
|
||
|
warnings.warn("Skipping SYSTEM_VARIABLE record", stacklevel=3)
|
||
|
|
||
|
else:
|
||
|
|
||
|
raise Exception(f"record['rectype']={record['rectype']} not implemented")
|
||
|
|
||
|
f.seek(nextrec)
|
||
|
|
||
|
return record
|
||
|
|
||
|
|
||
|
def _read_typedesc(f):
|
||
|
'''Function to read in a type descriptor'''
|
||
|
|
||
|
typedesc = {'typecode': _read_long(f), 'varflags': _read_long(f)}
|
||
|
|
||
|
if typedesc['varflags'] & 2 == 2:
|
||
|
raise Exception("System variables not implemented")
|
||
|
|
||
|
typedesc['array'] = typedesc['varflags'] & 4 == 4
|
||
|
typedesc['structure'] = typedesc['varflags'] & 32 == 32
|
||
|
|
||
|
if typedesc['structure']:
|
||
|
typedesc['array_desc'] = _read_arraydesc(f)
|
||
|
typedesc['struct_desc'] = _read_structdesc(f)
|
||
|
elif typedesc['array']:
|
||
|
typedesc['array_desc'] = _read_arraydesc(f)
|
||
|
|
||
|
return typedesc
|
||
|
|
||
|
|
||
|
def _read_arraydesc(f):
|
||
|
'''Function to read in an array descriptor'''
|
||
|
|
||
|
arraydesc = {'arrstart': _read_long(f)}
|
||
|
|
||
|
if arraydesc['arrstart'] == 8:
|
||
|
|
||
|
_skip_bytes(f, 4)
|
||
|
|
||
|
arraydesc['nbytes'] = _read_long(f)
|
||
|
arraydesc['nelements'] = _read_long(f)
|
||
|
arraydesc['ndims'] = _read_long(f)
|
||
|
|
||
|
_skip_bytes(f, 8)
|
||
|
|
||
|
arraydesc['nmax'] = _read_long(f)
|
||
|
|
||
|
arraydesc['dims'] = [_read_long(f) for _ in range(arraydesc['nmax'])]
|
||
|
|
||
|
elif arraydesc['arrstart'] == 18:
|
||
|
|
||
|
warnings.warn("Using experimental 64-bit array read", stacklevel=3)
|
||
|
|
||
|
_skip_bytes(f, 8)
|
||
|
|
||
|
arraydesc['nbytes'] = _read_uint64(f)
|
||
|
arraydesc['nelements'] = _read_uint64(f)
|
||
|
arraydesc['ndims'] = _read_long(f)
|
||
|
|
||
|
_skip_bytes(f, 8)
|
||
|
|
||
|
arraydesc['nmax'] = 8
|
||
|
|
||
|
arraydesc['dims'] = []
|
||
|
for d in range(arraydesc['nmax']):
|
||
|
v = _read_long(f)
|
||
|
if v != 0:
|
||
|
raise Exception("Expected a zero in ARRAY_DESC")
|
||
|
arraydesc['dims'].append(_read_long(f))
|
||
|
|
||
|
else:
|
||
|
|
||
|
raise Exception("Unknown ARRSTART: %i" % arraydesc['arrstart'])
|
||
|
|
||
|
return arraydesc
|
||
|
|
||
|
|
||
|
def _read_structdesc(f):
|
||
|
'''Function to read in a structure descriptor'''
|
||
|
|
||
|
structdesc = {}
|
||
|
|
||
|
structstart = _read_long(f)
|
||
|
if structstart != 9:
|
||
|
raise Exception("STRUCTSTART should be 9")
|
||
|
|
||
|
structdesc['name'] = _read_string(f)
|
||
|
predef = _read_long(f)
|
||
|
structdesc['ntags'] = _read_long(f)
|
||
|
structdesc['nbytes'] = _read_long(f)
|
||
|
|
||
|
structdesc['predef'] = predef & 1
|
||
|
structdesc['inherits'] = predef & 2
|
||
|
structdesc['is_super'] = predef & 4
|
||
|
|
||
|
if not structdesc['predef']:
|
||
|
|
||
|
structdesc['tagtable'] = [_read_tagdesc(f)
|
||
|
for _ in range(structdesc['ntags'])]
|
||
|
|
||
|
for tag in structdesc['tagtable']:
|
||
|
tag['name'] = _read_string(f)
|
||
|
|
||
|
structdesc['arrtable'] = {tag['name']: _read_arraydesc(f)
|
||
|
for tag in structdesc['tagtable']
|
||
|
if tag['array']}
|
||
|
|
||
|
structdesc['structtable'] = {tag['name']: _read_structdesc(f)
|
||
|
for tag in structdesc['tagtable']
|
||
|
if tag['structure']}
|
||
|
|
||
|
if structdesc['inherits'] or structdesc['is_super']:
|
||
|
structdesc['classname'] = _read_string(f)
|
||
|
structdesc['nsupclasses'] = _read_long(f)
|
||
|
structdesc['supclassnames'] = [
|
||
|
_read_string(f) for _ in range(structdesc['nsupclasses'])]
|
||
|
structdesc['supclasstable'] = [
|
||
|
_read_structdesc(f) for _ in range(structdesc['nsupclasses'])]
|
||
|
|
||
|
STRUCT_DICT[structdesc['name']] = structdesc
|
||
|
|
||
|
else:
|
||
|
|
||
|
if structdesc['name'] not in STRUCT_DICT:
|
||
|
raise Exception("PREDEF=1 but can't find definition")
|
||
|
|
||
|
structdesc = STRUCT_DICT[structdesc['name']]
|
||
|
|
||
|
return structdesc
|
||
|
|
||
|
|
||
|
def _read_tagdesc(f):
|
||
|
'''Function to read in a tag descriptor'''
|
||
|
|
||
|
tagdesc = {'offset': _read_long(f)}
|
||
|
|
||
|
if tagdesc['offset'] == -1:
|
||
|
tagdesc['offset'] = _read_uint64(f)
|
||
|
|
||
|
tagdesc['typecode'] = _read_long(f)
|
||
|
tagflags = _read_long(f)
|
||
|
|
||
|
tagdesc['array'] = tagflags & 4 == 4
|
||
|
tagdesc['structure'] = tagflags & 32 == 32
|
||
|
tagdesc['scalar'] = tagdesc['typecode'] in DTYPE_DICT
|
||
|
# Assume '10'x is scalar
|
||
|
|
||
|
return tagdesc
|
||
|
|
||
|
|
||
|
def _replace_heap(variable, heap):
|
||
|
|
||
|
if isinstance(variable, Pointer):
|
||
|
|
||
|
while isinstance(variable, Pointer):
|
||
|
|
||
|
if variable.index == 0:
|
||
|
variable = None
|
||
|
else:
|
||
|
if variable.index in heap:
|
||
|
variable = heap[variable.index]
|
||
|
else:
|
||
|
warnings.warn("Variable referenced by pointer not found "
|
||
|
"in heap: variable will be set to None",
|
||
|
stacklevel=3)
|
||
|
variable = None
|
||
|
|
||
|
replace, new = _replace_heap(variable, heap)
|
||
|
|
||
|
if replace:
|
||
|
variable = new
|
||
|
|
||
|
return True, variable
|
||
|
|
||
|
elif isinstance(variable, np.rec.recarray):
|
||
|
|
||
|
# Loop over records
|
||
|
for ir, record in enumerate(variable):
|
||
|
|
||
|
replace, new = _replace_heap(record, heap)
|
||
|
|
||
|
if replace:
|
||
|
variable[ir] = new
|
||
|
|
||
|
return False, variable
|
||
|
|
||
|
elif isinstance(variable, np.record):
|
||
|
|
||
|
# Loop over values
|
||
|
for iv, value in enumerate(variable):
|
||
|
|
||
|
replace, new = _replace_heap(value, heap)
|
||
|
|
||
|
if replace:
|
||
|
variable[iv] = new
|
||
|
|
||
|
return False, variable
|
||
|
|
||
|
elif isinstance(variable, np.ndarray):
|
||
|
|
||
|
# Loop over values if type is np.object_
|
||
|
if variable.dtype.type is np.object_:
|
||
|
|
||
|
for iv in range(variable.size):
|
||
|
|
||
|
replace, new = _replace_heap(variable.item(iv), heap)
|
||
|
|
||
|
if replace:
|
||
|
variable.reshape(-1)[iv] = new
|
||
|
|
||
|
return False, variable
|
||
|
|
||
|
else:
|
||
|
|
||
|
return False, variable
|
||
|
|
||
|
|
||
|
class AttrDict(dict):
|
||
|
'''
|
||
|
A case-insensitive dictionary with access via item, attribute, and call
|
||
|
notations:
|
||
|
|
||
|
>>> from scipy.io._idl import AttrDict
|
||
|
>>> d = AttrDict()
|
||
|
>>> d['Variable'] = 123
|
||
|
>>> d['Variable']
|
||
|
123
|
||
|
>>> d.Variable
|
||
|
123
|
||
|
>>> d.variable
|
||
|
123
|
||
|
>>> d('VARIABLE')
|
||
|
123
|
||
|
>>> d['missing']
|
||
|
Traceback (most recent error last):
|
||
|
...
|
||
|
KeyError: 'missing'
|
||
|
>>> d.missing
|
||
|
Traceback (most recent error last):
|
||
|
...
|
||
|
AttributeError: 'AttrDict' object has no attribute 'missing'
|
||
|
'''
|
||
|
|
||
|
def __init__(self, init={}):
|
||
|
dict.__init__(self, init)
|
||
|
|
||
|
def __getitem__(self, name):
|
||
|
return super().__getitem__(name.lower())
|
||
|
|
||
|
def __setitem__(self, key, value):
|
||
|
return super().__setitem__(key.lower(), value)
|
||
|
|
||
|
def __getattr__(self, name):
|
||
|
try:
|
||
|
return self.__getitem__(name)
|
||
|
except KeyError:
|
||
|
raise AttributeError(
|
||
|
f"'{type(self)}' object has no attribute '{name}'") from None
|
||
|
|
||
|
__setattr__ = __setitem__
|
||
|
__call__ = __getitem__
|
||
|
|
||
|
|
||
|
def readsav(file_name, idict=None, python_dict=False,
|
||
|
uncompressed_file_name=None, verbose=False):
|
||
|
"""
|
||
|
Read an IDL .sav file.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
file_name : str
|
||
|
Name of the IDL save file.
|
||
|
idict : dict, optional
|
||
|
Dictionary in which to insert .sav file variables.
|
||
|
python_dict : bool, optional
|
||
|
By default, the object return is not a Python dictionary, but a
|
||
|
case-insensitive dictionary with item, attribute, and call access
|
||
|
to variables. To get a standard Python dictionary, set this option
|
||
|
to True.
|
||
|
uncompressed_file_name : str, optional
|
||
|
This option only has an effect for .sav files written with the
|
||
|
/compress option. If a file name is specified, compressed .sav
|
||
|
files are uncompressed to this file. Otherwise, readsav will use
|
||
|
the `tempfile` module to determine a temporary filename
|
||
|
automatically, and will remove the temporary file upon successfully
|
||
|
reading it in.
|
||
|
verbose : bool, optional
|
||
|
Whether to print out information about the save file, including
|
||
|
the records read, and available variables.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
idl_dict : AttrDict or dict
|
||
|
If `python_dict` is set to False (default), this function returns a
|
||
|
case-insensitive dictionary with item, attribute, and call access
|
||
|
to variables. If `python_dict` is set to True, this function
|
||
|
returns a Python dictionary with all variable names in lowercase.
|
||
|
If `idict` was specified, then variables are written to the
|
||
|
dictionary specified, and the updated dictionary is returned.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from os.path import dirname, join as pjoin
|
||
|
>>> import scipy.io as sio
|
||
|
>>> from scipy.io import readsav
|
||
|
|
||
|
Get the filename for an example .sav file from the tests/data directory.
|
||
|
|
||
|
>>> data_dir = pjoin(dirname(sio.__file__), 'tests', 'data')
|
||
|
>>> sav_fname = pjoin(data_dir, 'array_float32_1d.sav')
|
||
|
|
||
|
Load the .sav file contents.
|
||
|
|
||
|
>>> sav_data = readsav(sav_fname)
|
||
|
|
||
|
Get keys of the .sav file contents.
|
||
|
|
||
|
>>> print(sav_data.keys())
|
||
|
dict_keys(['array1d'])
|
||
|
|
||
|
Access a content with a key.
|
||
|
|
||
|
>>> print(sav_data['array1d'])
|
||
|
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
|
||
|
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
|
||
|
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
|
||
|
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
|
||
|
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
|
||
|
0. 0. 0.]
|
||
|
|
||
|
"""
|
||
|
|
||
|
# Initialize record and variable holders
|
||
|
records = []
|
||
|
if python_dict or idict:
|
||
|
variables = {}
|
||
|
else:
|
||
|
variables = AttrDict()
|
||
|
|
||
|
# Open the IDL file
|
||
|
f = open(file_name, 'rb')
|
||
|
|
||
|
# Read the signature, which should be 'SR'
|
||
|
signature = _read_bytes(f, 2)
|
||
|
if signature != b'SR':
|
||
|
raise Exception("Invalid SIGNATURE: %s" % signature)
|
||
|
|
||
|
# Next, the record format, which is '\x00\x04' for normal .sav
|
||
|
# files, and '\x00\x06' for compressed .sav files.
|
||
|
recfmt = _read_bytes(f, 2)
|
||
|
|
||
|
if recfmt == b'\x00\x04':
|
||
|
pass
|
||
|
|
||
|
elif recfmt == b'\x00\x06':
|
||
|
|
||
|
if verbose:
|
||
|
print("IDL Save file is compressed")
|
||
|
|
||
|
if uncompressed_file_name:
|
||
|
fout = open(uncompressed_file_name, 'w+b')
|
||
|
else:
|
||
|
fout = tempfile.NamedTemporaryFile(suffix='.sav')
|
||
|
|
||
|
if verbose:
|
||
|
print(" -> expanding to %s" % fout.name)
|
||
|
|
||
|
# Write header
|
||
|
fout.write(b'SR\x00\x04')
|
||
|
|
||
|
# Cycle through records
|
||
|
while True:
|
||
|
|
||
|
# Read record type
|
||
|
rectype = _read_long(f)
|
||
|
fout.write(struct.pack('>l', int(rectype)))
|
||
|
|
||
|
# Read position of next record and return as int
|
||
|
nextrec = _read_uint32(f)
|
||
|
nextrec += _read_uint32(f).astype(np.int64) * 2**32
|
||
|
|
||
|
# Read the unknown 4 bytes
|
||
|
unknown = f.read(4)
|
||
|
|
||
|
# Check if the end of the file has been reached
|
||
|
if RECTYPE_DICT[rectype] == 'END_MARKER':
|
||
|
modval = np.int64(2**32)
|
||
|
fout.write(struct.pack('>I', int(nextrec) % modval))
|
||
|
fout.write(
|
||
|
struct.pack('>I', int((nextrec - (nextrec % modval)) / modval))
|
||
|
)
|
||
|
fout.write(unknown)
|
||
|
break
|
||
|
|
||
|
# Find current position
|
||
|
pos = f.tell()
|
||
|
|
||
|
# Decompress record
|
||
|
rec_string = zlib.decompress(f.read(nextrec-pos))
|
||
|
|
||
|
# Find new position of next record
|
||
|
nextrec = fout.tell() + len(rec_string) + 12
|
||
|
|
||
|
# Write out record
|
||
|
fout.write(struct.pack('>I', int(nextrec % 2**32)))
|
||
|
fout.write(struct.pack('>I', int((nextrec - (nextrec % 2**32)) / 2**32)))
|
||
|
fout.write(unknown)
|
||
|
fout.write(rec_string)
|
||
|
|
||
|
# Close the original compressed file
|
||
|
f.close()
|
||
|
|
||
|
# Set f to be the decompressed file, and skip the first four bytes
|
||
|
f = fout
|
||
|
f.seek(4)
|
||
|
|
||
|
else:
|
||
|
raise Exception("Invalid RECFMT: %s" % recfmt)
|
||
|
|
||
|
# Loop through records, and add them to the list
|
||
|
while True:
|
||
|
r = _read_record(f)
|
||
|
records.append(r)
|
||
|
if 'end' in r:
|
||
|
if r['end']:
|
||
|
break
|
||
|
|
||
|
# Close the file
|
||
|
f.close()
|
||
|
|
||
|
# Find heap data variables
|
||
|
heap = {}
|
||
|
for r in records:
|
||
|
if r['rectype'] == "HEAP_DATA":
|
||
|
heap[r['heap_index']] = r['data']
|
||
|
|
||
|
# Find all variables
|
||
|
for r in records:
|
||
|
if r['rectype'] == "VARIABLE":
|
||
|
replace, new = _replace_heap(r['data'], heap)
|
||
|
if replace:
|
||
|
r['data'] = new
|
||
|
variables[r['varname'].lower()] = r['data']
|
||
|
|
||
|
if verbose:
|
||
|
|
||
|
# Print out timestamp info about the file
|
||
|
for record in records:
|
||
|
if record['rectype'] == "TIMESTAMP":
|
||
|
print("-"*50)
|
||
|
print("Date: %s" % record['date'])
|
||
|
print("User: %s" % record['user'])
|
||
|
print("Host: %s" % record['host'])
|
||
|
break
|
||
|
|
||
|
# Print out version info about the file
|
||
|
for record in records:
|
||
|
if record['rectype'] == "VERSION":
|
||
|
print("-"*50)
|
||
|
print("Format: %s" % record['format'])
|
||
|
print("Architecture: %s" % record['arch'])
|
||
|
print("Operating System: %s" % record['os'])
|
||
|
print("IDL Version: %s" % record['release'])
|
||
|
break
|
||
|
|
||
|
# Print out identification info about the file
|
||
|
for record in records:
|
||
|
if record['rectype'] == "IDENTIFICATON":
|
||
|
print("-"*50)
|
||
|
print("Author: %s" % record['author'])
|
||
|
print("Title: %s" % record['title'])
|
||
|
print("ID Code: %s" % record['idcode'])
|
||
|
break
|
||
|
|
||
|
# Print out descriptions saved with the file
|
||
|
for record in records:
|
||
|
if record['rectype'] == "DESCRIPTION":
|
||
|
print("-"*50)
|
||
|
print("Description: %s" % record['description'])
|
||
|
break
|
||
|
|
||
|
print("-"*50)
|
||
|
print("Successfully read %i records of which:" %
|
||
|
(len(records)))
|
||
|
|
||
|
# Create convenience list of record types
|
||
|
rectypes = [r['rectype'] for r in records]
|
||
|
|
||
|
for rt in set(rectypes):
|
||
|
if rt != 'END_MARKER':
|
||
|
print(" - %i are of type %s" % (rectypes.count(rt), rt))
|
||
|
print("-"*50)
|
||
|
|
||
|
if 'VARIABLE' in rectypes:
|
||
|
print("Available variables:")
|
||
|
for var in variables:
|
||
|
print(f" - {var} [{type(variables[var])}]")
|
||
|
print("-"*50)
|
||
|
|
||
|
if idict:
|
||
|
for var in variables:
|
||
|
idict[var] = variables[var]
|
||
|
return idict
|
||
|
else:
|
||
|
return variables
|