parent
5f367c8fc8
commit
cae9cabdea
1 changed files with 211 additions and 0 deletions
@ -0,0 +1,211 @@ |
|||||||
|
#!/usr/bin/python |
||||||
|
# $Id: text_input.py,v 1.1 2010-09-27 19:54:05 wirawan Exp $ |
||||||
|
# |
||||||
|
# wpylib.iofmt.text_input module |
||||||
|
# Quick-n-dirty text input utilities |
||||||
|
# |
||||||
|
# Wirawan Purwanto |
||||||
|
# Created: 20090601 |
||||||
|
# |
||||||
|
# Routines put here are commonly used in my own scripts. |
||||||
|
# They are not necessarily suitable for general-purpose uses; evaluate |
||||||
|
# your needs and see if they can them as well. |
||||||
|
# |
||||||
|
# 20090601: Created as pyqmc.utils.text_input . |
||||||
|
# 20100927: Moved to wpylib.iofmt.text_input . |
||||||
|
# |
||||||
|
# TODO |
||||||
|
# - book-keep the line number. Also note superfile must have its own line |
||||||
|
# number keeping. |
||||||
|
# |
||||||
|
""" |
||||||
|
Simple text-based input reader. |
||||||
|
|
||||||
|
This module is part of wpylib project. |
||||||
|
""" |
||||||
|
|
||||||
|
import re |
||||||
|
import numpy |
||||||
|
|
||||||
|
from wpylib.file.file_utils import open_input_file |
||||||
|
|
||||||
|
|
||||||
|
class text_input(object): |
||||||
|
'''Text input reader with support for UNIX-style comment marker (#) and |
||||||
|
standard field separation (tabs and whitespaces). |
||||||
|
Used for quick and dirty data reading (iterating only once in forward |
||||||
|
direction without the need of rewinding or skipping). |
||||||
|
This object can be treated like an input file, e.g. used as an iterator, |
||||||
|
etc. |
||||||
|
|
||||||
|
To support more fancy options (e.g., rewinding), use "superize=1" when |
||||||
|
creating the instance.''' |
||||||
|
|
||||||
|
def __init__(self, fname, **opts): |
||||||
|
if opts.get("superize", 0): |
||||||
|
open_opts = { "superize" : opts["superize"] } |
||||||
|
del opts["superize"] |
||||||
|
else: |
||||||
|
open_opts = {} |
||||||
|
self.file = open_input_file(fname, **open_opts) |
||||||
|
# field_filtering_proc field can be used to filter unwanted fields, or do |
||||||
|
# some additional transformations before final feed to the main iteration. |
||||||
|
self.field_filtering_proc = lambda flds : flds |
||||||
|
# Default fancy options: |
||||||
|
self.skip_blank_lines = True |
||||||
|
if len(opts) > 0: |
||||||
|
self.set_options(**opts) |
||||||
|
|
||||||
|
def __del__(self): |
||||||
|
if getattr(self, "file", None): |
||||||
|
self.file.close() |
||||||
|
|
||||||
|
def __iter__(self): |
||||||
|
return self |
||||||
|
|
||||||
|
""" |
||||||
|
def next(self): |
||||||
|
while True: |
||||||
|
L = self.file.next() |
||||||
|
F = self.field_filtering_proc(L.split("#")[0].split()) |
||||||
|
if len(F) > 0: |
||||||
|
return F |
||||||
|
""" |
||||||
|
|
||||||
|
def next_rec(self): |
||||||
|
'''Yields the next record, which is already separated into fields.''' |
||||||
|
while True: |
||||||
|
L = self.file.next() |
||||||
|
F = self.field_filtering_proc(L.split("#")[0].split()) |
||||||
|
if len(F) > 0 or not self.skip_blank_lines: |
||||||
|
return F |
||||||
|
|
||||||
|
def next_line(self): |
||||||
|
'''Yields the next line, which is already separated into fields.''' |
||||||
|
while True: |
||||||
|
L = self.file.next() |
||||||
|
F = self.field_filtering_proc(L.split("#")[0].rstrip()) |
||||||
|
if len(F) > 0 or not self.skip_blank_lines: |
||||||
|
return F |
||||||
|
|
||||||
|
# Do NOT touch the "next" field below unless you know what you're doing: |
||||||
|
next = next_line |
||||||
|
|
||||||
|
def seek_text(self, regex=None, match=None): |
||||||
|
'''Seeks the file until a particular piece text is encountered. |
||||||
|
We ignore all comments. |
||||||
|
The `regex' argument can be either a regex string or a standard python |
||||||
|
regular expression object.''' |
||||||
|
|
||||||
|
if regex: |
||||||
|
if isinstance(regex, str): |
||||||
|
Regexp = re.compile(regex) |
||||||
|
else: |
||||||
|
Regexp = regex |
||||||
|
match_proc = lambda x: Regexp.search(x) |
||||||
|
else: |
||||||
|
match_proc = match |
||||||
|
|
||||||
|
while True: |
||||||
|
L = self.next_line() |
||||||
|
if match_proc(L): |
||||||
|
return L |
||||||
|
|
||||||
|
|
||||||
|
def read_floats(self, *cols, **kwd): |
||||||
|
"""Quickly reads a set of floats from a text file. |
||||||
|
Returns a numpy array of the values in double precision. |
||||||
|
|
||||||
|
Example usage: |
||||||
|
>>> arr = text_input("/tmp/file.txt").read_floats(0, 2, 3) |
||||||
|
to read columns 1, 3, and 4 of the text file /tmp/file.txt, while disregarding |
||||||
|
comments. |
||||||
|
""" |
||||||
|
# float_fields extracts the desired columns and converts them to floats |
||||||
|
float_fields = lambda vals : [ float(vals[col]) for col in cols ] |
||||||
|
if "maxcount" in kwd: |
||||||
|
rslt = [ float_fields(vals.split()) for (c,vals) in zip(xrange(kwd['maxcount']),self) ] |
||||||
|
else: |
||||||
|
rslt = [ float_fields(vals.split()) for vals in self ] |
||||||
|
# finally convert them to a numpy ndarray: |
||||||
|
return numpy.array(rslt) |
||||||
|
|
||||||
|
def read_items(self, *col_desc, **kwd): |
||||||
|
"""Quickly reads a set of items from records of whitespace-separated fields |
||||||
|
in a text file. |
||||||
|
Returns a structured numpy array of the values read. |
||||||
|
|
||||||
|
Example usage: |
||||||
|
|
||||||
|
>>> arr = text_input("/tmp/file.txt").read_items(0, (2, int), (3, "S10", "Atom")) |
||||||
|
|
||||||
|
reads columns 1 (as floats, by default), 3 (as integers), and 4 (as strings of |
||||||
|
max length of 10, which field is named "Atom") from the text file /tmp/file.txt, |
||||||
|
while disregarding comments. |
||||||
|
|
||||||
|
If the tuple contains the third field, it is used as the name of the field; |
||||||
|
otherwise the fields are named f0, f1, f2, .... |
||||||
|
|
||||||
|
Additional keyword options: |
||||||
|
* deftype: default datatype |
||||||
|
* maxcount: maximum number of records to be read |
||||||
|
|
||||||
|
TODO: Needs ability to read in complex data. |
||||||
|
""" |
||||||
|
deftype = kwd.get("deftype", float) |
||||||
|
|
||||||
|
# float_fields extracts the desired columns and converts them to floats |
||||||
|
flds = [] |
||||||
|
cols = [] |
||||||
|
for (i,c) in zip(xrange(len(col_desc)), col_desc): |
||||||
|
if type(c) == int: |
||||||
|
cols.append(c) |
||||||
|
flds.append(('f' + str(i), deftype)) |
||||||
|
elif len(c) == 1: |
||||||
|
cols.append(c[0]) |
||||||
|
flds.append(('f' + str(i), deftype)) |
||||||
|
elif len(c) == 2: |
||||||
|
cols.append(c[0]) |
||||||
|
flds.append(('f' + str(i), c[1])) |
||||||
|
elif len(c) == 3: |
||||||
|
cols.append(c[0]) |
||||||
|
flds.append((c[2], c[1])) |
||||||
|
|
||||||
|
#print cols |
||||||
|
#print flds |
||||||
|
get_fields = lambda vals : tuple([ vals[col] for col in cols ]) |
||||||
|
if "maxcount" in kwd: |
||||||
|
#print "hello" |
||||||
|
rslt = [ get_fields(vals.split()) for (c,vals) in zip(xrange(kwd['maxcount']),self) ] |
||||||
|
else: |
||||||
|
rslt = [ get_fields(vals.split()) for vals in self ] |
||||||
|
#print rslt |
||||||
|
# finally convert them to a numpy ndarray: |
||||||
|
return numpy.array(rslt, dtype=flds) |
||||||
|
|
||||||
|
# Sets fancy options |
||||||
|
def set_options(self, **opts): |
||||||
|
for (o,v) in opts.iteritems(): |
||||||
|
if o == "expand_errorbar": |
||||||
|
self.expand_errorbar(v) |
||||||
|
if o == "skip_blank_lines": |
||||||
|
self.skip_blank_lines = v |
||||||
|
else: |
||||||
|
raise "ValueError", "Invalid option: %s" % (o,) |
||||||
|
return self |
||||||
|
|
||||||
|
# Option for errorbar expansion: |
||||||
|
def expand_errorbar(self, v=True): |
||||||
|
'''Enables or disables errorbar expansion.''' |
||||||
|
if v: |
||||||
|
self.opt_expand_errorbar = True |
||||||
|
self.field_filtering_proc = self.expand_errorbar_hook |
||||||
|
else: |
||||||
|
self.opt_expand_errorbar = False |
||||||
|
self.field_filtering_proc = lambda flds : flds |
||||||
|
return self |
||||||
|
|
||||||
|
def expand_errorbar_hook(self, F): |
||||||
|
# A hook for field_filtering_proc for expanding errorbars: |
||||||
|
from pyqmc.stats.errorbar import expand |
||||||
|
return expand(F, flatten=True) |
Loading…
Reference in new issue