Skip to content

Commit 32a8a8d

Browse files
committed
remove local and filename from reserved gbd names that prevents from using them in create-feature from api
1 parent c7670a3 commit 32a8a8d

2 files changed

Lines changed: 191 additions & 60 deletions

File tree

gbd_core/schema.py

Lines changed: 187 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
# MIT License
32

43
# Copyright (c) 2023 Markus Iser, Karlsruhe Institute of Technology (KIT)
@@ -39,7 +38,6 @@ class FeatureInfo:
3938

4039

4140
class Schema:
42-
4341
def __init__(self, dbcon, dbname, path, features, context, csv=False):
4442
self.dbname = dbname
4543
self.path = path
@@ -52,11 +50,14 @@ def __init__(self, dbcon, dbname, path, features, context, csv=False):
5250
def is_database(cls, path):
5351
if os.path.isfile(path):
5452
sz = os.path.getsize(path)
55-
if sz == 0: return True # new sqlite3 files can be empty
56-
if sz < 100: return False # sqlite header is 100 bytes
57-
with open(path, 'rb') as fd: header = fd.read(100) # validate header
58-
return (header[:16] == b'SQLite format 3\x00')
59-
elif confirm("Database '{}' does not exist. Create new database?".format(path)):
53+
if sz == 0:
54+
return True # new sqlite3 files can be empty
55+
if sz < 100:
56+
return False # sqlite header is 100 bytes
57+
with open(path, "rb") as fd:
58+
header = fd.read(100) # validate header
59+
return header[:16] == b"SQLite format 3\x00"
60+
elif confirm("Database '{}' does not exist. Create new database?".format(path)):
6061
sqlite3.connect(path).close()
6162
return True
6263
else:
@@ -93,15 +94,15 @@ def from_csv(cls, path):
9394
def features_from_csv(cls, dbname, path, con) -> typing.Dict[str, FeatureInfo]:
9495
features = dict()
9596
with open(path) as csvfile:
96-
temp_lines = csvfile.readline() + '\n' + csvfile.readline()
97+
temp_lines = csvfile.readline() + "\n" + csvfile.readline()
9798
dialect = csv.Sniffer().sniff(temp_lines, delimiters=";, \t")
9899
csvfile.seek(0)
99100
csvreader = csv.DictReader(csvfile, dialect=dialect)
100101
if "hash" in csvreader.fieldnames:
101-
cols = [ re.sub('[^0-9a-zA-Z]+', '_', n) for n in csvreader.fieldnames ]
102+
cols = [re.sub("[^0-9a-zA-Z]+", "_", n) for n in csvreader.fieldnames]
102103
for colname in cols:
103104
features[colname] = FeatureInfo(colname, dbname, "features", colname, None)
104-
con.execute('CREATE TABLE IF NOT EXISTS {} ({})'.format("features", ", ".join(cols)))
105+
con.execute("CREATE TABLE IF NOT EXISTS {} ({})".format("features", ", ".join(cols)))
105106
for row in csvreader:
106107
con.execute("INSERT INTO {} VALUES ('{}')".format("features", "', '".join(row.values())))
107108
con.commit()
@@ -113,11 +114,11 @@ def features_from_csv(cls, dbname, path, con) -> typing.Dict[str, FeatureInfo]:
113114
@classmethod
114115
def features_from_database(cls, dbname, path, con) -> typing.Dict[str, FeatureInfo]:
115116
features = dict()
116-
sql_tables="SELECT tbl_name FROM sqlite_master WHERE type = 'table'"
117-
tables = [ tab for (tab, ) in con.execute(sql_tables).fetchall() if not tab.startswith("_") ]
117+
sql_tables = "SELECT tbl_name FROM sqlite_master WHERE type = 'table'"
118+
tables = [tab for (tab,) in con.execute(sql_tables).fetchall() if not tab.startswith("_")]
118119
for table in tables:
119120
columns = con.execute("PRAGMA table_info({})".format(table)).fetchall()
120-
for (index, colname, coltype, notnull, default_value, pk) in columns:
121+
for index, colname, coltype, notnull, default_value, pk in columns:
121122
is_fk_column = table == "features" and colname in tables
122123
is_fk_hash = table != "features" and colname == "hash"
123124
if not is_fk_column and not is_fk_hash:
@@ -137,13 +138,12 @@ def context_from_database(cls, path):
137138

138139
@classmethod
139140
def context_from_name(cls, name):
140-
pair = name.split('_')
141+
pair = name.split("_")
141142
if len(pair) > 1 and pair[0] in contexts.contexts():
142143
return pair[0]
143144
else:
144145
return contexts.default_context()
145146

146-
147147
@classmethod
148148
def dbname_from_path(cls, path):
149149
filename = os.path.splitext(os.path.basename(path))[0]
@@ -155,20 +155,159 @@ def dbname_from_path(cls, path):
155155
def valid_feature_or_raise(cls, name):
156156
if not re.match("[a-zA-Z][a-zA-Z0-9_]*", name):
157157
raise SchemaException("Feature name '{}' must be alphanumeric (incl. underline) and start with a letter.".format(name))
158-
gbd_keywords = [ 'hash', 'value', 'local', 'filename', 'features' ]
158+
# gbd_keywords = [ 'hash', 'value', 'local', 'filename', 'features' ]
159+
gbd_keywords = ["hash", "value", "features"]
159160
if name.lower() in gbd_keywords:
160161
raise SchemaException("Feature name '{}' is reserved.".format(name))
161-
sqlite_keywords = ['abort', 'action', 'add', 'after', 'all', 'alter', 'always', 'analyze', 'and', 'as', 'asc', 'attach', 'autoincrement',
162-
'before', 'begin', 'between', 'by', 'cascade', 'case', 'cast', 'check', 'collate', 'column', 'commit', 'conflict', 'constraint',
163-
'create', 'cross', 'current', 'current_date', 'current_time', 'current_timestamp', 'database', 'default', 'deferrable', 'deferred',
164-
'delete', 'desc', 'detach', 'distinct', 'do', 'drop', 'each', 'else', 'end', 'escape', 'except', 'exclude', 'exclusive', 'exists',
165-
'explain', 'fail', 'filter', 'first', 'following', 'for', 'foreign', 'from', 'full', 'generated', 'glob', 'group', 'groups',
166-
'having', 'if', 'ignore', 'immediate', 'in', 'index', 'indexed', 'initially', 'inner', 'insert', 'instead', 'intersect', 'into', 'is', 'isnull',
167-
'join', 'key', 'last', 'left', 'like', 'limit', 'match', 'materialized', 'natural', 'no', 'not', 'nothing', 'notnull', 'null', 'nulls',
168-
'of', 'offset', 'on', 'or', 'order', 'others', 'outer', 'over', 'partition', 'plan', 'pragma', 'preceding', 'primary', 'query',
169-
'raise', 'range', 'recursive', 'references', 'regexp', 'reindex', 'release', 'rename', 'replace', 'restrict', 'returning', 'right', 'rollback',
170-
'row', 'rows', 'savepoint', 'select', 'set', 'table', 'temp', 'temporary', 'then', 'ties', 'to', 'transaction', 'trigger', 'unbounded', 'union',
171-
'unique', 'update', 'using', 'vacuum', 'values', 'view', 'virtual', 'when', 'where', 'window', 'with', 'without']
162+
sqlite_keywords = [
163+
"abort",
164+
"action",
165+
"add",
166+
"after",
167+
"all",
168+
"alter",
169+
"always",
170+
"analyze",
171+
"and",
172+
"as",
173+
"asc",
174+
"attach",
175+
"autoincrement",
176+
"before",
177+
"begin",
178+
"between",
179+
"by",
180+
"cascade",
181+
"case",
182+
"cast",
183+
"check",
184+
"collate",
185+
"column",
186+
"commit",
187+
"conflict",
188+
"constraint",
189+
"create",
190+
"cross",
191+
"current",
192+
"current_date",
193+
"current_time",
194+
"current_timestamp",
195+
"database",
196+
"default",
197+
"deferrable",
198+
"deferred",
199+
"delete",
200+
"desc",
201+
"detach",
202+
"distinct",
203+
"do",
204+
"drop",
205+
"each",
206+
"else",
207+
"end",
208+
"escape",
209+
"except",
210+
"exclude",
211+
"exclusive",
212+
"exists",
213+
"explain",
214+
"fail",
215+
"filter",
216+
"first",
217+
"following",
218+
"for",
219+
"foreign",
220+
"from",
221+
"full",
222+
"generated",
223+
"glob",
224+
"group",
225+
"groups",
226+
"having",
227+
"if",
228+
"ignore",
229+
"immediate",
230+
"in",
231+
"index",
232+
"indexed",
233+
"initially",
234+
"inner",
235+
"insert",
236+
"instead",
237+
"intersect",
238+
"into",
239+
"is",
240+
"isnull",
241+
"join",
242+
"key",
243+
"last",
244+
"left",
245+
"like",
246+
"limit",
247+
"match",
248+
"materialized",
249+
"natural",
250+
"no",
251+
"not",
252+
"nothing",
253+
"notnull",
254+
"null",
255+
"nulls",
256+
"of",
257+
"offset",
258+
"on",
259+
"or",
260+
"order",
261+
"others",
262+
"outer",
263+
"over",
264+
"partition",
265+
"plan",
266+
"pragma",
267+
"preceding",
268+
"primary",
269+
"query",
270+
"raise",
271+
"range",
272+
"recursive",
273+
"references",
274+
"regexp",
275+
"reindex",
276+
"release",
277+
"rename",
278+
"replace",
279+
"restrict",
280+
"returning",
281+
"right",
282+
"rollback",
283+
"row",
284+
"rows",
285+
"savepoint",
286+
"select",
287+
"set",
288+
"table",
289+
"temp",
290+
"temporary",
291+
"then",
292+
"ties",
293+
"to",
294+
"transaction",
295+
"trigger",
296+
"unbounded",
297+
"union",
298+
"unique",
299+
"update",
300+
"using",
301+
"vacuum",
302+
"values",
303+
"view",
304+
"virtual",
305+
"when",
306+
"where",
307+
"window",
308+
"with",
309+
"without",
310+
]
172311
if name.lower() in sqlite_keywords or name.startswith("sqlite_"):
173312
raise SchemaException("Feature name '{}' is reserved by sqlite.".format(name))
174313

@@ -188,9 +327,8 @@ def execute(self, sql):
188327
con.commit()
189328
con.close()
190329

191-
192330
def get_tables(self):
193-
return list(set([ f.table for f in self.get_features() ]))
331+
return list(set([f.table for f in self.get_features()]))
194332

195333
def get_features(self):
196334
return self.features.values()
@@ -204,44 +342,46 @@ def absorb(self, schema):
204342
else:
205343
raise SchemaException("Internal Error: Attempt to merge non-virtual schemata")
206344

207-
208345
def create_main_table_if_not_exists(self):
209346
main_table = "features"
210347
if not main_table in self.get_tables():
211348
self.execute("CREATE TABLE IF NOT EXISTS {} (hash UNIQUE NOT NULL)".format(main_table))
212349
# insert all known hashes into main table and create triggers
213-
for table in [ t for t in self.get_tables() if t != main_table ]:
350+
for table in [t for t in self.get_tables() if t != main_table]:
214351
self.execute("INSERT OR IGNORE INTO {} (hash) SELECT DISTINCT(hash) FROM {}".format(main_table, table))
215-
self.execute("""CREATE TRIGGER IF NOT EXISTS {}_dval AFTER INSERT ON {}
216-
BEGIN INSERT OR IGNORE INTO {} (hash) VALUES (NEW.hash); END""".format(table, table, main_table))
352+
self.execute(
353+
"""CREATE TRIGGER IF NOT EXISTS {}_dval AFTER INSERT ON {}
354+
BEGIN INSERT OR IGNORE INTO {} (hash) VALUES (NEW.hash); END""".format(table, table, main_table)
355+
)
217356
self.features["hash"] = FeatureInfo("hash", self.dbname, main_table, "hash", None)
218-
return [ self.features["hash"] ]
357+
return [self.features["hash"]]
219358
else:
220-
return [ ]
221-
359+
return []
222360

223361
def create_feature(self, name, default_value=None, permissive=False):
224362
if not permissive: # internal use can be unchecked, e.g., to create the reserved features during initialization
225363
Schema.valid_feature_or_raise(name)
226364

227-
created = [ ]
228-
365+
created = []
366+
229367
if not self.has_feature(name):
230368
# ensure existence of main table:
231369
created.extend(self.create_main_table_if_not_exists())
232370

233371
# create new feature:
234372
main_table = "features"
235-
self.execute('ALTER TABLE {} ADD {} TEXT NOT NULL DEFAULT {}'.format(main_table, name, default_value or "None"))
373+
self.execute("ALTER TABLE {} ADD {} TEXT NOT NULL DEFAULT {}".format(main_table, name, default_value or "None"))
236374
if default_value is not None:
237375
# feature is unique and resides in main features-table:
238376
self.features[name] = FeatureInfo(name, self.dbname, main_table, name, default_value)
239377
else:
240378
# feature is not unique and resides in a separate table (column in main features-table is a foreign key):
241379
self.execute("CREATE TABLE IF NOT EXISTS {} (hash TEXT NOT NULL, value TEXT NOT NULL, CONSTRAINT all_unique UNIQUE(hash, value))".format(name))
242380
self.execute("INSERT INTO {} (hash, value) VALUES ('None', 'None')".format(name))
243-
self.execute("""CREATE TRIGGER IF NOT EXISTS {}_hash AFTER INSERT ON {}
244-
BEGIN INSERT OR IGNORE INTO {} (hash) VALUES (NEW.hash); END""".format(name, name, main_table))
381+
self.execute(
382+
"""CREATE TRIGGER IF NOT EXISTS {}_hash AFTER INSERT ON {}
383+
BEGIN INSERT OR IGNORE INTO {} (hash) VALUES (NEW.hash); END""".format(name, name, main_table)
384+
)
245385
self.features[name] = FeatureInfo(name, self.dbname, name, "value", None)
246386

247387
# update schema:
@@ -252,18 +392,20 @@ def create_feature(self, name, default_value=None, permissive=False):
252392

253393
return created
254394

255-
256395
def set_values(self, feature, value, hashes):
257396
if not self.has_feature(feature):
258397
raise SchemaException("Feature '{}' does not exist".format(feature))
259398
if not len(hashes):
260399
raise SchemaException("No hashes given")
261400
table = self.features[feature].table
262401
column = self.features[feature].column
263-
values = ', '.join(["('{}', '{}')".format(hash, value) for hash in hashes])
402+
values = ", ".join(["('{}', '{}')".format(hash, value) for hash in hashes])
264403
if self.features[feature].default is None:
265404
self.execute("INSERT OR IGNORE INTO {tab} (hash, {col}) VALUES {vals}".format(tab=table, col=column, vals=values))
266405
self.execute("UPDATE features SET {col}=hash WHERE hash in ('{h}')".format(col=table, h="', '".join(hashes)))
267406
else:
268-
self.execute("INSERT INTO {tab} (hash, {col}) VALUES {vals} ON CONFLICT (hash) DO UPDATE SET {col}='{val}' WHERE hash in ('{h}')".format(tab=table, col=column, val=value, vals=values, h="', '".join(hashes)))
269-
407+
self.execute(
408+
"INSERT INTO {tab} (hash, {col}) VALUES {vals} ON CONFLICT (hash) DO UPDATE SET {col}='{val}' WHERE hash in ('{h}')".format(
409+
tab=table, col=column, val=value, vals=values, h="', '".join(hashes)
410+
)
411+
)

pyproject.toml

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,15 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "gbd_tools"
7-
version = "4.9.11"
7+
version = "4.9.12"
88
description = "GBD Tools: Maintenance and Distribution of Benchmark Instances and their Attributes"
99
readme = "README.md"
1010
license-files = ["LICENSE"]
1111
requires-python = ">=3.6"
12-
authors = [
13-
{ name = "Markus Iser", email = "markus.iser@kit.edu" }
14-
]
12+
authors = [{ name = "Markus Iser", email = "markus.iser@kit.edu" }]
1513
urls = { Homepage = "https://github.com/Udopia/gbd" }
16-
classifiers = [
17-
"Programming Language :: Python :: 3"
18-
]
19-
dependencies = [
20-
"flask",
21-
"tatsu",
22-
"pandas",
23-
"waitress",
24-
"pebble",
25-
"gbdc"
26-
]
14+
classifiers = ["Programming Language :: Python :: 3"]
15+
dependencies = ["flask", "tatsu", "pandas", "waitress", "pebble", "gbdc"]
2716
scripts = { gbd = "gbd:main" }
2817

2918
[tool.setuptools]

0 commit comments

Comments
 (0)