You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
377 lines
12 KiB
377 lines
12 KiB
# $Id: fortbin.py,v 1.4 2011-08-31 20:27:31 wirawan Exp $
|
|
#
|
|
# wpylib.iofmt.fortbin module
|
|
# Created: 20100208
|
|
# Wirawan Purwanto
|
|
#
|
|
"""
|
|
Fortran binary format.
|
|
|
|
"""
|
|
|
|
import numpy
|
|
import sys
|
|
|
|
from wpylib.sugar import ifelse
|
|
|
|
class fortran_types(object):
|
|
"""A description of Fortran data types.
|
|
Useful for estimating memory use, file sizes, etc."""
|
|
desc = {
|
|
numpy.int32: dict(
|
|
size=4,
|
|
),
|
|
numpy.int64: dict(
|
|
size=8,
|
|
),
|
|
numpy.float32: dict( # IEEE single precision
|
|
size=4,
|
|
),
|
|
numpy.float64: dict( # IEEE double precision
|
|
size=8,
|
|
),
|
|
numpy.complex64: dict( # Fortran 90 complex(4)
|
|
size=8,
|
|
),
|
|
numpy.complex128: dict( # Fortran 90 complex(8)
|
|
size=16,
|
|
),
|
|
}
|
|
# defaults: ok for "LP64" systems or typical 32-bit systems
|
|
desc[int] = desc[numpy.int32]
|
|
desc[float] = desc[numpy.float64]
|
|
desc[complex] = desc[numpy.complex128]
|
|
record_marker_size = 4 # size for a record marker (2x this for a record)
|
|
|
|
# special data types
|
|
class fortran_dtype(object):
|
|
"""Base class for all special Fortran data types."""
|
|
pass
|
|
class character(object):
|
|
"""Fortran fixed-width character string."""
|
|
def __init__(self, len):
|
|
self.len = len
|
|
@property
|
|
def size(self):
|
|
return self.len
|
|
|
|
def size(self, dtype):
|
|
"""Computes the size of a single datatype."""
|
|
if isinstance(dtype, self.fortran_dtype):
|
|
return dtype.size()
|
|
else:
|
|
return self.desc[dtype]['size']
|
|
|
|
def file_rec_size(self, dtypes):
|
|
"""Computes the size of a record on disk."""
|
|
return sum([ self.size(d) for d in dtypes ]) + 2 * self.record_marker_size
|
|
|
|
def file_data_size(self, recs):
|
|
"""Computes the size of a sequence of records on disk."""
|
|
return sum([ self.file_rec_size(r) for r in recs ])
|
|
|
|
|
|
class fortran_bin_file(object):
|
|
"""A tool for reading and writing Fortran binary files.
|
|
|
|
Caveat: On 64-bit systems, typical Fortran implementations still have int==int32
|
|
(i.e. the LP64 programming model), unless "-i8" kind of option is enabled.
|
|
To use 64-bit default integer, set the default_int attribute to numpy.int64 .
|
|
"""
|
|
record_marker_type = numpy.uint32
|
|
default_int = numpy.int32
|
|
default_float = numpy.float64
|
|
default_complex = numpy.complex128
|
|
default_str = numpy.str_
|
|
|
|
def __init__(self, filename=None, mode="r"):
|
|
self.debug = 0
|
|
if filename:
|
|
self.open(filename, mode)
|
|
|
|
def open(self, filename, mode="r"):
|
|
self.F = open(filename, mode+"b")
|
|
|
|
def close(self):
|
|
if getattr(self, "F", None):
|
|
self.F.close()
|
|
self.F = None
|
|
|
|
@staticmethod
|
|
def fld_count(f):
|
|
"""Determines how many items are in a Fortran data field.
|
|
The `f' argument is a field descriptor, which can be given as
|
|
either (name, dtype) or (name, dtype, length) tuple.
|
|
If length is not specified, then a scalar value is read.
|
|
Length is a scalar for 1-D array, or a tuple or list for multidimensional
|
|
array.
|
|
"""
|
|
if len(f) > 2:
|
|
if isinstance(f[2], (list,tuple)):
|
|
return numpy.product(f[2])
|
|
else:
|
|
return f[2]
|
|
else:
|
|
return 1
|
|
|
|
def byte_length(self, *fields):
|
|
"""Given a list of field descriptors, determine how many bytes this
|
|
set of fields would occupy.
|
|
"""
|
|
expected_len = sum([ self.fld_count(f) * numpy.dtype(f[1]).itemsize
|
|
for f in fields ])
|
|
return expected_len
|
|
|
|
def read(self, *fields, **opts):
|
|
"""Reads a Fortran record.
|
|
This corresponds to a single READ statement in a Fortran program.
|
|
The description of the fields are given as
|
|
either (name, dtype) or (name, dtype, length) tuples.
|
|
If length is not specified, then a scalar value is read.
|
|
Length is a scalar for 1-D array, or a tuple or list for multidimensional
|
|
array.
|
|
|
|
Optional argument:
|
|
* dest = a structure to contain the result.
|
|
"""
|
|
from numpy import fromfile as rd
|
|
if self.debug or opts.get("debug"):
|
|
dbg = lambda msg : sys.stderr.write(msg)
|
|
else:
|
|
dbg = lambda msg : None
|
|
|
|
fld_count = self.fld_count
|
|
|
|
reclen = numpy.fromfile(self.F, self.record_marker_type, 1)
|
|
dbg("Record length = %d\n" % reclen)
|
|
expected_len = self.byte_length(*fields)
|
|
dbg("Expected length = %d\n" % expected_len)
|
|
if expected_len > reclen:
|
|
raise IOError, \
|
|
"Attempting to read %d bytes from a record of length %d bytes" \
|
|
% (expected_len, reclen)
|
|
|
|
if "dest" in opts:
|
|
rslt = opts["dest"]
|
|
else:
|
|
rslt = {}
|
|
|
|
if (issubclass(rslt.__class__, dict) and issubclass(dict, rslt.__class__)) \
|
|
or "__setitem__" in dir(rslt):
|
|
def setval(d, k, v):
|
|
d[k] = v
|
|
else:
|
|
# Assume we can use setattr method:
|
|
setval = setattr
|
|
|
|
for f in fields:
|
|
if len(f) > 2:
|
|
(name,Dtyp,Len) = f
|
|
dtyp = numpy.dtype(Dtyp)
|
|
Len2 = fld_count(f)
|
|
if isinstance(f[2], list) or isinstance(f[2], tuple):
|
|
# Special handling for shaped arrays
|
|
arr = numpy.fromfile(self.F, dtyp, Len2)
|
|
setval(rslt, name, arr.reshape(tuple(Len), order='F'))
|
|
else:
|
|
setval(rslt, name, numpy.fromfile(self.F, dtyp, Len2))
|
|
|
|
else:
|
|
# Special handling for scalars
|
|
name = f[0]
|
|
dtyp = numpy.dtype(f[1])
|
|
setval(rslt, name, numpy.fromfile(self.F, dtyp, 1)[0])
|
|
|
|
if expected_len < reclen:
|
|
self.F.seek(reclen - expected_len, 1)
|
|
|
|
reclen2 = numpy.fromfile(self.F, self.record_marker_type, 1)
|
|
dbg("Record length tail = %d\n" % reclen2)
|
|
|
|
if reclen2 != reclen:
|
|
raise IOError, \
|
|
"Inconsistency in record: end-marker length = %d; was expecting %d" \
|
|
% (reclen2, reclen)
|
|
|
|
return rslt
|
|
|
|
def bulk_read_array1(self, dtype, shape):
|
|
"""Reads data that is regularly stored as an array of Fortran records
|
|
(all of the same type and length).
|
|
Each record must be 'read' individually and validated if the record lengths
|
|
are indeed correct.
|
|
But this routine will bulk-read all the records at once, and shape it
|
|
into an array with that format.
|
|
|
|
Warning: because we load all the leading and trailing reclen markers, the array
|
|
will be larger than the actual size of the data, and the memory will not be
|
|
contiguous.
|
|
Use copy_subarray below to create the contiguous representation of the data
|
|
(per field name).
|
|
"""
|
|
from numpy import product, fromfile, all
|
|
dtype1 = numpy.dtype([('reclen', self.record_marker_type),
|
|
('content', dtype),
|
|
('reclen2', self.record_marker_type)])
|
|
|
|
dtype_itemsize = dtype1['content'].itemsize
|
|
|
|
size = product(shape) # total number of elements to read in bulk
|
|
# reads in *ALL* the records in a linear fashion, in one read stmt
|
|
arr = fromfile(self.F, dtype1, size)
|
|
|
|
if not all(arr['reclen'] == dtype_itemsize) \
|
|
or not all(arr['reclen2'] == dtype_itemsize):
|
|
raise IOError, \
|
|
(("Inconsistency detected in record array: " \
|
|
"one or more records do not have the expected record length=%d") \
|
|
% (dtype_itemsize,))
|
|
|
|
# Returns only the content--this WILL NOT be contiguous in memory.
|
|
return arr['content'].reshape(shape, order='F')
|
|
|
|
def write_vals(self, *vals, **opts):
|
|
"""Writes a Fortran record.
|
|
Only values need to be given, because the types are known.
|
|
This is a direct converse of read subroutine."""
|
|
if self.debug:
|
|
dbg = lambda msg : sys.stderr.write(msg)
|
|
else:
|
|
dbg = lambda msg : None
|
|
|
|
vals0 = vals
|
|
vals = []
|
|
for v in vals0:
|
|
if isinstance(v, int):
|
|
v2 = self.default_int(v)
|
|
if v2 != v:
|
|
raise OverflowError, \
|
|
"Integer too large to represent by default int: %d" % v
|
|
vals.append(v2)
|
|
elif isinstance(v, float):
|
|
v2 = self.default_float(v)
|
|
# FIXME: check for overflow error like in integer conversion above
|
|
vals.append(v2)
|
|
elif isinstance(v, basestring):
|
|
v2 = self.default_str(v)
|
|
vals.append(v2)
|
|
elif "itemsize" in dir(v):
|
|
vals.append(v)
|
|
else:
|
|
raise NotImplementedError, \
|
|
"Unsupported object of type %s of value %s" \
|
|
(str(type(v)), str(v))
|
|
|
|
reclen = numpy.sum([ v.size * v.itemsize for v in vals ], \
|
|
dtype=self.record_marker_type)
|
|
|
|
dbg("Record length = %d\n" % reclen)
|
|
dbg("Item count = %d\n" % len(vals))
|
|
reclen.tofile(self.F)
|
|
|
|
for v in vals:
|
|
if isinstance(v, numpy.ndarray):
|
|
# Always store in "Fortran" format, i.e. column major
|
|
# Since tofile() always write in the row major format,
|
|
# we will transpose it before writing:
|
|
v.T.tofile(self.F)
|
|
else:
|
|
v.tofile(self.F)
|
|
|
|
reclen.tofile(self.F)
|
|
|
|
|
|
def write_fields(self, src, *fields, **opts):
|
|
if (issubclass(src.__class__, dict) and issubclass(dict, src.__class__)) \
|
|
or "__getitem__" in dir(src):
|
|
def getval(d, k):
|
|
return d[k]
|
|
else:
|
|
# Assume we can use getattr method:
|
|
getval = getattr
|
|
|
|
vals = []
|
|
for f in fields:
|
|
if isinstance(f, basestring):
|
|
vals.append(getval(src, f))
|
|
elif isinstance(f, (list, tuple)):
|
|
v = getval(src, f[0])
|
|
# FIXME: check datatype and do necessary conversion if f[1] exists
|
|
# Exception: if a string spec is found, we will retrofit the string
|
|
# to that kind of object. Strings that are too long are silently
|
|
# truncated and those that are too short will have whitespaces
|
|
# (ASCII 32) appended.
|
|
if len(f) > 1:
|
|
dtyp = numpy.dtype(f[1])
|
|
if dtyp.char == 'S':
|
|
strlen = dtyp.itemsize
|
|
v = self.default_str("%-*s" % (strlen, v[:strlen]))
|
|
# FIXME: check dimensionality if f[2] exists
|
|
vals.append(v)
|
|
else:
|
|
raise ValueError, "Invalid field type: %s" % str(type(f))
|
|
|
|
self.write_vals(*vals, **opts)
|
|
|
|
def peek_next_rec_len(self):
|
|
"""Fetches the length of the next record, while preserving
|
|
the position of the file read pointer.
|
|
"""
|
|
filepos = self.F.tell()
|
|
reclen = numpy.fromfile(self.F, self.record_marker_type, 1)
|
|
self.F.seek(filepos)
|
|
return reclen[0]
|
|
|
|
|
|
def array_major_dim(arr):
|
|
"""Tests whether a numpy array is column or row major.
|
|
It will return the following:
|
|
-1 : row major
|
|
+1 : column major
|
|
0 : unknown (e.g. no indication one way or the other)
|
|
In the case of inconsistent order, we will raise an exception."""
|
|
if len(arr.shape) <= 1:
|
|
return 0
|
|
elif arr.flags['C_CONTIGUOUS']:
|
|
return -1
|
|
elif arr.flags['F_CONTIGUOUS']:
|
|
return +1
|
|
# In case of noncontiguous array, we will have to test it
|
|
# based on the strides
|
|
else:
|
|
Lstrides = numpy.array(arr.shape[:-1])
|
|
Rstrides = numpy.array(arr.shape[1:])
|
|
if numpy.all(Lstrides >= Rstrides):
|
|
# Categorizes equal consecutive strides to "row major" as well
|
|
return -1
|
|
elif numpy.all(Lstrides <= Rstrides):
|
|
return +1
|
|
else:
|
|
raise RuntimeError, \
|
|
"Unable to determine whether this is a row or column major object."
|
|
|
|
|
|
def copy_subarray(arr, key, order='F'):
|
|
"""Given a numpy array of structured datatype, copy out a subarray field
|
|
into a new array with contiguous format.
|
|
The field accessed by arr[key] must be a fixed-size array.
|
|
The order argument can be either 'F' or 'C':
|
|
- For 'F' ordering, then the subarray index will become the *first* index.
|
|
- For 'C' ordering, then the subarray index will become the *last* index.
|
|
"""
|
|
subarr = arr[key]
|
|
dim = len(arr.shape)
|
|
subdim = len(subarr.shape) - dim
|
|
if order == 'F':
|
|
rslt = numpy.transpose(subarr, axes=list(range(dim, subdim+dim) + range(dim)))
|
|
elif order == 'C':
|
|
rslt = subarr
|
|
else:
|
|
raise ValueError, 'Invalid order argument'
|
|
# Always return a copy!
|
|
if numpy.may_share_memory(rslt, arr):
|
|
return rslt.copy(order=order)
|
|
else:
|
|
return rslt
|
|
|
|
|
|
|