|
|
|
@ -128,10 +128,10 @@ class text_input(object): |
|
|
|
|
return self.next_(self) |
|
|
|
|
|
|
|
|
|
def seek_text(self, regex=None, match=None): |
|
|
|
|
'''Seeks the file until a particular piece text is encountered. |
|
|
|
|
"""Seeks the file until a particular piece text is encountered. |
|
|
|
|
We ignore all comments. |
|
|
|
|
The `regex' argument can be either a regex string or a standard python |
|
|
|
|
regular expression object.''' |
|
|
|
|
regular expression object.""" |
|
|
|
|
|
|
|
|
|
if regex: |
|
|
|
|
if isinstance(regex, basestring): |
|
|
|
@ -349,3 +349,93 @@ def tail(filename, maxlines): |
|
|
|
|
pass |
|
|
|
|
return out[-maxlines:] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# More tools for extracting data from table-like text stream/string. |
|
|
|
|
|
|
|
|
|
tbl_filter_num1_rx = re.compile('^\s*[-+]?(?:[0-9]+|[0-9]+\.|\.[0-9]+|[0-9]+\.[0-9]+)(?:[EeDd][-+]?[0-9]+)?') |
|
|
|
|
def tbl_filter_num1(flds, col=0, **args): |
|
|
|
|
"""Simple filter function: given a list of splitted text in `flds`, |
|
|
|
|
if the col-th field of the row is a numerical |
|
|
|
|
string, then it is a valid row; otherwise we will ignore this row. |
|
|
|
|
""" |
|
|
|
|
return tbl_filter_num1_rx.match(flds[col]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def filter_table_text(T, filter=tbl_filter_num1, filter_args={}): |
|
|
|
|
"""Filters out irrelevant text (junk) from the table by commenting them out. |
|
|
|
|
Using the default filter, we assume that the target column (default==0) |
|
|
|
|
is a numerical value (usually a geometry value or a similar parameter). |
|
|
|
|
|
|
|
|
|
Input: |
|
|
|
|
* T = a text table (a multi-line string, with the linebreaks) |
|
|
|
|
* filter = a filter function |
|
|
|
|
* filter_args = dict-style arguments for the filter function.""" |
|
|
|
|
Lines = T.splitlines() |
|
|
|
|
for (i,L) in enumerate(Lines): |
|
|
|
|
F = L.split() |
|
|
|
|
if len(F) == 0: |
|
|
|
|
pass |
|
|
|
|
elif not F[0].startswith("#") and not filter(F, **filter_args): |
|
|
|
|
Lines[i] = "#" + L |
|
|
|
|
return "\n".join(Lines) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class tbl_filter_num1_limited_range(object): |
|
|
|
|
"""Fancy filtering: Assume that the first column is numerical |
|
|
|
|
(e.g., rbond); and only include rows where this `rbond` fall |
|
|
|
|
within a given range. |
|
|
|
|
""" |
|
|
|
|
def __init__(self, rmin, rmax, col=0): |
|
|
|
|
self.rmin, self.rmax = rmin, rmax |
|
|
|
|
self.col = col |
|
|
|
|
def __call__(self, flds, **args): |
|
|
|
|
if tbl_filter_num1_rx.match(flds[self.col]): |
|
|
|
|
r = float(flds[0]) |
|
|
|
|
return self.rmin <= r <= self.rmax |
|
|
|
|
else: |
|
|
|
|
return False |
|
|
|
|
def mk_table_filter(self): |
|
|
|
|
return lambda T: filter_table_text(T,filter=self) |
|
|
|
|
@classmethod |
|
|
|
|
def create(cls, rmin, rmax, col=0): |
|
|
|
|
o = cls(rmin, rmax, col=col) |
|
|
|
|
func = o.mk_table_filter() |
|
|
|
|
func.__name__ = "%s.create(%.4f,%.4f,%d)" \ |
|
|
|
|
% (cls.__name__, rmin, rmax, col) |
|
|
|
|
return func |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_table(F, maps={}): |
|
|
|
|
"""Reads in a 2-D table from a text stream. |
|
|
|
|
Returns a list of lists containing the table content, in each cell by |
|
|
|
|
default as a string, unless a mapping function is provided (for simple |
|
|
|
|
data conversion only). |
|
|
|
|
|
|
|
|
|
This is a legacy tool. It appears that numpy.genfromtxt can do what |
|
|
|
|
this tool can do, and better. |
|
|
|
|
You should probably check if numpy.genfromtxt can do the required job |
|
|
|
|
before using read_table/read_table_text provided in this module. |
|
|
|
|
""" |
|
|
|
|
rows = [] |
|
|
|
|
comment_char = "#" |
|
|
|
|
for L in F: |
|
|
|
|
L = L.split(comment_char,1)[0] |
|
|
|
|
flds = L.split() |
|
|
|
|
if len(flds) == 0: |
|
|
|
|
continue |
|
|
|
|
if maps: |
|
|
|
|
for i in xrange(len(flds)): |
|
|
|
|
if i in maps: |
|
|
|
|
flds[i] = maps[i](flds[i]) |
|
|
|
|
rows.append(flds) |
|
|
|
|
return rows |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_table_text(txt, maps={}): |
|
|
|
|
"""Reads in a 2-D table from a text stream. |
|
|
|
|
The text (as a whole string) is given in the txt argument. |
|
|
|
|
""" |
|
|
|
|
from StringIO import StringIO |
|
|
|
|
return read_table(StringIO(txt), maps) |
|
|
|
|
|
|
|
|
|