1-
21# MIT License
32
43# Copyright (c) 2023 Markus Iser, Karlsruhe Institute of Technology (KIT)
@@ -39,7 +38,6 @@ class FeatureInfo:
3938
4039
4140class Schema :
42-
4341 def __init__ (self , dbcon , dbname , path , features , context , csv = False ):
4442 self .dbname = dbname
4543 self .path = path
@@ -52,11 +50,14 @@ def __init__(self, dbcon, dbname, path, features, context, csv=False):
5250 def is_database (cls , path ):
5351 if os .path .isfile (path ):
5452 sz = os .path .getsize (path )
55- if sz == 0 : return True # new sqlite3 files can be empty
56- if sz < 100 : return False # sqlite header is 100 bytes
57- with open (path , 'rb' ) as fd : header = fd .read (100 ) # validate header
58- return (header [:16 ] == b'SQLite format 3\x00 ' )
59- elif confirm ("Database '{}' does not exist. Create new database?" .format (path )):
53+ if sz == 0 :
54+ return True # new sqlite3 files can be empty
55+ if sz < 100 :
56+ return False # sqlite header is 100 bytes
57+ with open (path , "rb" ) as fd :
58+ header = fd .read (100 ) # validate header
59+ return header [:16 ] == b"SQLite format 3\x00 "
60+ elif confirm ("Database '{}' does not exist. Create new database?" .format (path )):
6061 sqlite3 .connect (path ).close ()
6162 return True
6263 else :
@@ -93,15 +94,15 @@ def from_csv(cls, path):
9394 def features_from_csv (cls , dbname , path , con ) -> typing .Dict [str , FeatureInfo ]:
9495 features = dict ()
9596 with open (path ) as csvfile :
96- temp_lines = csvfile .readline () + ' \n ' + csvfile .readline ()
97+ temp_lines = csvfile .readline () + " \n " + csvfile .readline ()
9798 dialect = csv .Sniffer ().sniff (temp_lines , delimiters = ";, \t " )
9899 csvfile .seek (0 )
99100 csvreader = csv .DictReader (csvfile , dialect = dialect )
100101 if "hash" in csvreader .fieldnames :
101- cols = [ re .sub (' [^0-9a-zA-Z]+' , '_' , n ) for n in csvreader .fieldnames ]
102+ cols = [re .sub (" [^0-9a-zA-Z]+" , "_" , n ) for n in csvreader .fieldnames ]
102103 for colname in cols :
103104 features [colname ] = FeatureInfo (colname , dbname , "features" , colname , None )
104- con .execute (' CREATE TABLE IF NOT EXISTS {} ({})' .format ("features" , ", " .join (cols )))
105+ con .execute (" CREATE TABLE IF NOT EXISTS {} ({})" .format ("features" , ", " .join (cols )))
105106 for row in csvreader :
106107 con .execute ("INSERT INTO {} VALUES ('{}')" .format ("features" , "', '" .join (row .values ())))
107108 con .commit ()
@@ -113,11 +114,11 @@ def features_from_csv(cls, dbname, path, con) -> typing.Dict[str, FeatureInfo]:
113114 @classmethod
114115 def features_from_database (cls , dbname , path , con ) -> typing .Dict [str , FeatureInfo ]:
115116 features = dict ()
116- sql_tables = "SELECT tbl_name FROM sqlite_master WHERE type = 'table'"
117- tables = [ tab for (tab , ) in con .execute (sql_tables ).fetchall () if not tab .startswith ("_" ) ]
117+ sql_tables = "SELECT tbl_name FROM sqlite_master WHERE type = 'table'"
118+ tables = [tab for (tab ,) in con .execute (sql_tables ).fetchall () if not tab .startswith ("_" )]
118119 for table in tables :
119120 columns = con .execute ("PRAGMA table_info({})" .format (table )).fetchall ()
120- for ( index , colname , coltype , notnull , default_value , pk ) in columns :
121+ for index , colname , coltype , notnull , default_value , pk in columns :
121122 is_fk_column = table == "features" and colname in tables
122123 is_fk_hash = table != "features" and colname == "hash"
123124 if not is_fk_column and not is_fk_hash :
@@ -137,13 +138,12 @@ def context_from_database(cls, path):
137138
138139 @classmethod
139140 def context_from_name (cls , name ):
140- pair = name .split ('_' )
141+ pair = name .split ("_" )
141142 if len (pair ) > 1 and pair [0 ] in contexts .contexts ():
142143 return pair [0 ]
143144 else :
144145 return contexts .default_context ()
145146
146-
147147 @classmethod
148148 def dbname_from_path (cls , path ):
149149 filename = os .path .splitext (os .path .basename (path ))[0 ]
@@ -155,20 +155,159 @@ def dbname_from_path(cls, path):
155155 def valid_feature_or_raise (cls , name ):
156156 if not re .match ("[a-zA-Z][a-zA-Z0-9_]*" , name ):
157157 raise SchemaException ("Feature name '{}' must be alphanumeric (incl. underline) and start with a letter." .format (name ))
158- gbd_keywords = [ 'hash' , 'value' , 'local' , 'filename' , 'features' ]
158+ # gbd_keywords = [ 'hash', 'value', 'local', 'filename', 'features' ]
159+ gbd_keywords = ["hash" , "value" , "features" ]
159160 if name .lower () in gbd_keywords :
160161 raise SchemaException ("Feature name '{}' is reserved." .format (name ))
161- sqlite_keywords = ['abort' , 'action' , 'add' , 'after' , 'all' , 'alter' , 'always' , 'analyze' , 'and' , 'as' , 'asc' , 'attach' , 'autoincrement' ,
162- 'before' , 'begin' , 'between' , 'by' , 'cascade' , 'case' , 'cast' , 'check' , 'collate' , 'column' , 'commit' , 'conflict' , 'constraint' ,
163- 'create' , 'cross' , 'current' , 'current_date' , 'current_time' , 'current_timestamp' , 'database' , 'default' , 'deferrable' , 'deferred' ,
164- 'delete' , 'desc' , 'detach' , 'distinct' , 'do' , 'drop' , 'each' , 'else' , 'end' , 'escape' , 'except' , 'exclude' , 'exclusive' , 'exists' ,
165- 'explain' , 'fail' , 'filter' , 'first' , 'following' , 'for' , 'foreign' , 'from' , 'full' , 'generated' , 'glob' , 'group' , 'groups' ,
166- 'having' , 'if' , 'ignore' , 'immediate' , 'in' , 'index' , 'indexed' , 'initially' , 'inner' , 'insert' , 'instead' , 'intersect' , 'into' , 'is' , 'isnull' ,
167- 'join' , 'key' , 'last' , 'left' , 'like' , 'limit' , 'match' , 'materialized' , 'natural' , 'no' , 'not' , 'nothing' , 'notnull' , 'null' , 'nulls' ,
168- 'of' , 'offset' , 'on' , 'or' , 'order' , 'others' , 'outer' , 'over' , 'partition' , 'plan' , 'pragma' , 'preceding' , 'primary' , 'query' ,
169- 'raise' , 'range' , 'recursive' , 'references' , 'regexp' , 'reindex' , 'release' , 'rename' , 'replace' , 'restrict' , 'returning' , 'right' , 'rollback' ,
170- 'row' , 'rows' , 'savepoint' , 'select' , 'set' , 'table' , 'temp' , 'temporary' , 'then' , 'ties' , 'to' , 'transaction' , 'trigger' , 'unbounded' , 'union' ,
171- 'unique' , 'update' , 'using' , 'vacuum' , 'values' , 'view' , 'virtual' , 'when' , 'where' , 'window' , 'with' , 'without' ]
162+ sqlite_keywords = [
163+ "abort" ,
164+ "action" ,
165+ "add" ,
166+ "after" ,
167+ "all" ,
168+ "alter" ,
169+ "always" ,
170+ "analyze" ,
171+ "and" ,
172+ "as" ,
173+ "asc" ,
174+ "attach" ,
175+ "autoincrement" ,
176+ "before" ,
177+ "begin" ,
178+ "between" ,
179+ "by" ,
180+ "cascade" ,
181+ "case" ,
182+ "cast" ,
183+ "check" ,
184+ "collate" ,
185+ "column" ,
186+ "commit" ,
187+ "conflict" ,
188+ "constraint" ,
189+ "create" ,
190+ "cross" ,
191+ "current" ,
192+ "current_date" ,
193+ "current_time" ,
194+ "current_timestamp" ,
195+ "database" ,
196+ "default" ,
197+ "deferrable" ,
198+ "deferred" ,
199+ "delete" ,
200+ "desc" ,
201+ "detach" ,
202+ "distinct" ,
203+ "do" ,
204+ "drop" ,
205+ "each" ,
206+ "else" ,
207+ "end" ,
208+ "escape" ,
209+ "except" ,
210+ "exclude" ,
211+ "exclusive" ,
212+ "exists" ,
213+ "explain" ,
214+ "fail" ,
215+ "filter" ,
216+ "first" ,
217+ "following" ,
218+ "for" ,
219+ "foreign" ,
220+ "from" ,
221+ "full" ,
222+ "generated" ,
223+ "glob" ,
224+ "group" ,
225+ "groups" ,
226+ "having" ,
227+ "if" ,
228+ "ignore" ,
229+ "immediate" ,
230+ "in" ,
231+ "index" ,
232+ "indexed" ,
233+ "initially" ,
234+ "inner" ,
235+ "insert" ,
236+ "instead" ,
237+ "intersect" ,
238+ "into" ,
239+ "is" ,
240+ "isnull" ,
241+ "join" ,
242+ "key" ,
243+ "last" ,
244+ "left" ,
245+ "like" ,
246+ "limit" ,
247+ "match" ,
248+ "materialized" ,
249+ "natural" ,
250+ "no" ,
251+ "not" ,
252+ "nothing" ,
253+ "notnull" ,
254+ "null" ,
255+ "nulls" ,
256+ "of" ,
257+ "offset" ,
258+ "on" ,
259+ "or" ,
260+ "order" ,
261+ "others" ,
262+ "outer" ,
263+ "over" ,
264+ "partition" ,
265+ "plan" ,
266+ "pragma" ,
267+ "preceding" ,
268+ "primary" ,
269+ "query" ,
270+ "raise" ,
271+ "range" ,
272+ "recursive" ,
273+ "references" ,
274+ "regexp" ,
275+ "reindex" ,
276+ "release" ,
277+ "rename" ,
278+ "replace" ,
279+ "restrict" ,
280+ "returning" ,
281+ "right" ,
282+ "rollback" ,
283+ "row" ,
284+ "rows" ,
285+ "savepoint" ,
286+ "select" ,
287+ "set" ,
288+ "table" ,
289+ "temp" ,
290+ "temporary" ,
291+ "then" ,
292+ "ties" ,
293+ "to" ,
294+ "transaction" ,
295+ "trigger" ,
296+ "unbounded" ,
297+ "union" ,
298+ "unique" ,
299+ "update" ,
300+ "using" ,
301+ "vacuum" ,
302+ "values" ,
303+ "view" ,
304+ "virtual" ,
305+ "when" ,
306+ "where" ,
307+ "window" ,
308+ "with" ,
309+ "without" ,
310+ ]
172311 if name .lower () in sqlite_keywords or name .startswith ("sqlite_" ):
173312 raise SchemaException ("Feature name '{}' is reserved by sqlite." .format (name ))
174313
@@ -188,9 +327,8 @@ def execute(self, sql):
188327 con .commit ()
189328 con .close ()
190329
191-
192330 def get_tables (self ):
193- return list (set ([ f .table for f in self .get_features () ]))
331+ return list (set ([f .table for f in self .get_features ()]))
194332
195333 def get_features (self ):
196334 return self .features .values ()
@@ -204,44 +342,46 @@ def absorb(self, schema):
204342 else :
205343 raise SchemaException ("Internal Error: Attempt to merge non-virtual schemata" )
206344
207-
208345 def create_main_table_if_not_exists (self ):
209346 main_table = "features"
210347 if not main_table in self .get_tables ():
211348 self .execute ("CREATE TABLE IF NOT EXISTS {} (hash UNIQUE NOT NULL)" .format (main_table ))
212349 # insert all known hashes into main table and create triggers
213- for table in [ t for t in self .get_tables () if t != main_table ]:
350+ for table in [t for t in self .get_tables () if t != main_table ]:
214351 self .execute ("INSERT OR IGNORE INTO {} (hash) SELECT DISTINCT(hash) FROM {}" .format (main_table , table ))
215- self .execute ("""CREATE TRIGGER IF NOT EXISTS {}_dval AFTER INSERT ON {}
216- BEGIN INSERT OR IGNORE INTO {} (hash) VALUES (NEW.hash); END""" .format (table , table , main_table ))
352+ self .execute (
353+ """CREATE TRIGGER IF NOT EXISTS {}_dval AFTER INSERT ON {}
354+ BEGIN INSERT OR IGNORE INTO {} (hash) VALUES (NEW.hash); END""" .format (table , table , main_table )
355+ )
217356 self .features ["hash" ] = FeatureInfo ("hash" , self .dbname , main_table , "hash" , None )
218- return [ self .features ["hash" ] ]
357+ return [self .features ["hash" ]]
219358 else :
220- return [ ]
221-
359+ return []
222360
223361 def create_feature (self , name , default_value = None , permissive = False ):
224362 if not permissive : # internal use can be unchecked, e.g., to create the reserved features during initialization
225363 Schema .valid_feature_or_raise (name )
226364
227- created = [ ]
228-
365+ created = []
366+
229367 if not self .has_feature (name ):
230368 # ensure existence of main table:
231369 created .extend (self .create_main_table_if_not_exists ())
232370
233371 # create new feature:
234372 main_table = "features"
235- self .execute (' ALTER TABLE {} ADD {} TEXT NOT NULL DEFAULT {}' .format (main_table , name , default_value or "None" ))
373+ self .execute (" ALTER TABLE {} ADD {} TEXT NOT NULL DEFAULT {}" .format (main_table , name , default_value or "None" ))
236374 if default_value is not None :
237375 # feature is unique and resides in main features-table:
238376 self .features [name ] = FeatureInfo (name , self .dbname , main_table , name , default_value )
239377 else :
240378 # feature is not unique and resides in a separate table (column in main features-table is a foreign key):
241379 self .execute ("CREATE TABLE IF NOT EXISTS {} (hash TEXT NOT NULL, value TEXT NOT NULL, CONSTRAINT all_unique UNIQUE(hash, value))" .format (name ))
242380 self .execute ("INSERT INTO {} (hash, value) VALUES ('None', 'None')" .format (name ))
243- self .execute ("""CREATE TRIGGER IF NOT EXISTS {}_hash AFTER INSERT ON {}
244- BEGIN INSERT OR IGNORE INTO {} (hash) VALUES (NEW.hash); END""" .format (name , name , main_table ))
381+ self .execute (
382+ """CREATE TRIGGER IF NOT EXISTS {}_hash AFTER INSERT ON {}
383+ BEGIN INSERT OR IGNORE INTO {} (hash) VALUES (NEW.hash); END""" .format (name , name , main_table )
384+ )
245385 self .features [name ] = FeatureInfo (name , self .dbname , name , "value" , None )
246386
247387 # update schema:
@@ -252,18 +392,20 @@ def create_feature(self, name, default_value=None, permissive=False):
252392
253393 return created
254394
255-
256395 def set_values (self , feature , value , hashes ):
257396 if not self .has_feature (feature ):
258397 raise SchemaException ("Feature '{}' does not exist" .format (feature ))
259398 if not len (hashes ):
260399 raise SchemaException ("No hashes given" )
261400 table = self .features [feature ].table
262401 column = self .features [feature ].column
263- values = ', ' .join (["('{}', '{}')" .format (hash , value ) for hash in hashes ])
402+ values = ", " .join (["('{}', '{}')" .format (hash , value ) for hash in hashes ])
264403 if self .features [feature ].default is None :
265404 self .execute ("INSERT OR IGNORE INTO {tab} (hash, {col}) VALUES {vals}" .format (tab = table , col = column , vals = values ))
266405 self .execute ("UPDATE features SET {col}=hash WHERE hash in ('{h}')" .format (col = table , h = "', '" .join (hashes )))
267406 else :
268- self .execute ("INSERT INTO {tab} (hash, {col}) VALUES {vals} ON CONFLICT (hash) DO UPDATE SET {col}='{val}' WHERE hash in ('{h}')" .format (tab = table , col = column , val = value , vals = values , h = "', '" .join (hashes )))
269-
407+ self .execute (
408+ "INSERT INTO {tab} (hash, {col}) VALUES {vals} ON CONFLICT (hash) DO UPDATE SET {col}='{val}' WHERE hash in ('{h}')" .format (
409+ tab = table , col = column , val = value , vals = values , h = "', '" .join (hashes )
410+ )
411+ )
0 commit comments