@ -1,4 +1,4 @@
# $Id: file_db.py,v 1.1 2010-02-06 23:21:09 wirawan Exp $
# $Id: file_db.py,v 1.2 2010-02-08 01:46:12 wirawan Exp $
#
#
# wpylib.db.filedb module
# wpylib.db.filedb module
# Created: 20100205
# Created: 20100205
@ -20,7 +20,7 @@ except:
class file_rec ( tuple ) :
class file_rec ( tuple ) :
pass
pass
class file_db ( object ) :
class file_table ( object ) :
# dtype for numpy (if wanted)
# dtype for numpy (if wanted)
dtype = numpy . dtype ( [
dtype = numpy . dtype ( [
( ' filename ' , ' S256 ' ) ,
( ' filename ' , ' S256 ' ) ,
@ -39,12 +39,14 @@ class file_db(object):
def __init__ ( self , src_name , table_name = ' filedb ' , extra_fields = [ ] ) :
def __init__ ( self , src_name , table_name = ' filedb ' , extra_fields = [ ] ) :
self . src_name = src_name
self . src_name = src_name
self . table_name = table_name
self . table_name = table_name
if os . path . isfile ( src_name ) :
if isinstance ( src_name , str ) : # os.path.isfile(src_name) :
self . db = sqlite3 . connect ( src_name )
self . db = sqlite3 . connect ( src_name )
self . dbc = self . db . cursor ( )
self . dbc = self . db . cursor ( )
els e :
elif isinstanc e ( src_name , sqlite3 . Connection ) :
self . db = sqlite3 . connect ( s rc_name )
self . db = src_name
self . dbc = self . db . cursor ( )
self . dbc = self . db . cursor ( )
else :
raise ValueError , " Invalid src_name data type "
self . db . text_factory = str
self . db . text_factory = str
self . sql_params = {
self . sql_params = {
' table_name ' : table_name ,
' table_name ' : table_name ,
@ -117,12 +119,12 @@ class file_db(object):
# Replaceable insert is not intended for tables with duplicate entries
# Replaceable insert is not intended for tables with duplicate entries
# of the same filename.
# of the same filename.
insert_sql = " UPDATE ' %(table_name)s ' SET " \
insert_sql = " UPDATE ' %(table_name)s ' SET " \
+ ' , ' . join ( [ " ' %s ' = ? " % dname for dname in dnames ] ) \
+ ' , ' . join ( [ " ' %s ' = ? " % d for d in dnames ] ) \
+ " WHERE filename = ?; "
+ " WHERE filename = ?; "
vals = tuple ( dvals + [ filename ] )
vals = tuple ( dvals + [ filename ] )
else :
else :
insert_sql = " INSERT INTO ' %(table_name)s ' (filename, " \
insert_sql = " INSERT INTO ' %(table_name)s ' (filename, " \
+ " , " . join ( [ " ' %s ' " % dname for dname in dnames ] ) \
+ " , " . join ( [ " ' %s ' " % d for d in dnames ] ) \
+ " ) VALUES (? " + ' ,? ' * ( len ( fields ) ) + " ); "
+ " ) VALUES (? " + ' ,? ' * ( len ( fields ) ) + " ); "
vals = tuple ( [ filename ] + dvals )
vals = tuple ( [ filename ] + dvals )
self . exec_sql ( insert_sql , vals )
self . exec_sql ( insert_sql , vals )
@ -145,6 +147,22 @@ class file_db(object):
sql_stmt = " SELECT * FROM ' %(table_name)s ' WHERE filename = ?; "
sql_stmt = " SELECT * FROM ' %(table_name)s ' WHERE filename = ?; "
return [ rslt for rslt in self . exec_sql ( sql_stmt , ( filename , ) ) ]
return [ rslt for rslt in self . exec_sql ( sql_stmt , ( filename , ) ) ]
def __setitem__ ( self , filename , newdata ) :
""" Updates the metadata on the filename. Any other field than the filename
can be updated . The filename serves as a unique key here . """
if isinstance ( newdata , dict ) or " keys " in dir ( newdata ) :
dnames = newdata . keys ( )
dvals = [ newdata [ k ] for k in dnames ]
else :
# Assuming an iterable with ('field', 'value') tuples.
dnames = [ dname for ( dname , dval ) in newdata ]
dvals = [ dval for ( dname , dval ) in newdata ]
update_sql = " UPDATE ' %(table_name)s ' SET " \
+ ' , ' . join ( [ " ' %s ' = ? " % d for d in dnames ] ) \
+ " WHERE filename = ?; "
vals = tuple ( dvals + [ filename ] )
self . exec_sql ( update_sql , vals )
def __contains__ ( self , filename ) :
def __contains__ ( self , filename ) :
""" Counts the number of record entries matching in the `filename ' field. """
""" Counts the number of record entries matching in the `filename ' field. """
if filename . find ( " % " ) > = 0 :
if filename . find ( " % " ) > = 0 :
@ -155,6 +173,10 @@ class file_db(object):
count = __contains__
count = __contains__
def fields ( self ) :
""" Returns the field names of the table of the latest query. """
return [ z [ 0 ] for z in self . dbc . description ]
def md5_digest_file ( filename ) :
def md5_digest_file ( filename ) :
""" Digests the content of a file. """
""" Digests the content of a file. """
@ -174,13 +196,16 @@ def str2hexstr(md5sum):
return " " . join ( [ " %02x " % ord ( c ) for c in md5sum ] )
return " " . join ( [ " %02x " % ord ( c ) for c in md5sum ] )
def get_file_stats ( filename ) :
def get_file_stats ( filename , get_md5sum = True ) :
stats = os . stat ( filename )
stats = os . stat ( filename )
mtime = time . localtime ( stats . st_mtime )
mtime = time . localtime ( stats . st_mtime )
Mdate = mtime . tm_year * 10000 + mtime . tm_mon * 100 + mtime . tm_mday
Mdate = mtime . tm_year * 10000 + mtime . tm_mon * 100 + mtime . tm_mday
Mtime = mtime . tm_hour * 10000 + mtime . tm_min * 100 + mtime . tm_sec
Mtime = mtime . tm_hour * 10000 + mtime . tm_min * 100 + mtime . tm_sec
size = stats . st_size
size = stats . st_size
if get_md5sum :
md5sum = str2hexstr ( md5_digest_file ( filename ) ) # this step is EXPEN$IVE
md5sum = str2hexstr ( md5_digest_file ( filename ) ) # this step is EXPEN$IVE
else :
md5sum = None
return {
return {
' filename ' : filename ,
' filename ' : filename ,
' mdate ' : Mdate ,
' mdate ' : Mdate ,