-
Notifications
You must be signed in to change notification settings - Fork 26
Expand file tree
/
Copy pathrtxcomplete.py
More file actions
161 lines (127 loc) · 5.64 KB
/
rtxcomplete.py
File metadata and controls
161 lines (127 loc) · 5.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import sqlite3
import re
import timeit
import sys
import os
def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)
pathlist = os.path.realpath(__file__).split(os.path.sep)
RTXindex = pathlist.index("RTX")
sys.path.append(os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code']))
from RTXConfiguration import RTXConfiguration
RTXConfig = RTXConfiguration()
autocomplete_filepath = os.path.sep.join([*pathlist[:(RTXindex + 1)], 'code', 'autocomplete'])
conn = None
cursor = None
cache_conn = None
cache_cursor = None
def load():
global conn
global cursor
global cache_conn
global cache_cursor
database_name = f"{autocomplete_filepath}{os.path.sep}{RTXConfig.autocomplete_path.split('/')[-1]}"
conn = sqlite3.connect(database_name)
cursor = conn.cursor()
try:
conn.execute(f"SELECT term FROM terms LIMIT 1")
print(f"INFO: Connected to {database_name}",file=sys.stderr)
except:
print(f"WARN: Could NOT connect to {database_name}. Please check that file and database exist!",file=sys.stderr)
cache_database_name = os.path.dirname(os.path.abspath(__file__)) + '/rtxcomplete_cache.sqlite'
cache_conn = sqlite3.connect(cache_database_name)
cache_cursor = cache_conn.cursor()
print(f"INFO: Connected to {cache_database_name}",file=sys.stderr)
cache_cursor.execute("CREATE TABLE IF NOT EXISTS cached_fragments(fragment VARCHAR(1024))")
cache_cursor.execute("CREATE TABLE IF NOT EXISTS cached_fragment_terms(fragment_id VARCHAR(1024), term VARCHAR(1024))")
return True
def get_nodes_like(word,requested_limit):
debug = False
t0 = timeit.default_timer()
requested_limit = int(requested_limit)
values = []
n_values = 0
if len(word) < 2:
return values
#### Try to avoid SQL injection exploits by sanitizing input #1823
word = word.replace('"','')
floor = word[:-1]
ceiling = floor + 'zz'
#### Get a list of matching node names that begin with these letters
if debug:
eprint(f"INFO: Query 1")
#cursor.execute("SELECT term FROM term WHERE term LIKE \"%s%%\" ORDER BY length(term),term LIMIT %s" % (word,1000))
cursor.execute(f"SELECT term FROM terms WHERE term > \"{floor}\" AND term < \"{ceiling}\" AND term LIKE \"{word}%%\" ORDER BY length(term),term LIMIT {requested_limit}")
rows = cursor.fetchall()
values_dict = {}
for row in rows:
term = row[0]
if term.upper() not in values_dict:
if debug:
eprint(f" - {term}")
properties = { "curie": '??', "name": term, "type": '??' }
values.append(properties)
values_dict[term.upper()] = 1
n_values += 1
if n_values >= requested_limit:
break
t1 = timeit.default_timer()
if debug:
eprint(f"INFO: Query 1 in {t1-t0} sec")
#### If we haven't reached the limit yet, add a list of matching terms that contain this string
if n_values < requested_limit:
if debug:
eprint(f"INFO: Query 2")
#### See if there is a cached entry already
word_part = word
found_fragment = None
while len(word_part) > 2:
cache_cursor.execute(f"SELECT rowid, fragment FROM cached_fragments WHERE fragment == \"{word_part}\"")
rows = cache_cursor.fetchall()
if len(rows) > 0:
fragment_id = rows[0][0]
found_fragment = rows[0][1]
break
word_part = word_part[:-1]
if found_fragment:
if debug:
eprint(f"Found matching fragment {found_fragment} as fragment_id {fragment_id}")
cache_cursor.execute(f"SELECT term FROM cached_fragment_terms WHERE fragment_id = {fragment_id} AND term LIKE \"%%{word}%%\"")
rows = cache_cursor.fetchall()
for row in rows:
term = row[0]
if term.upper() not in values_dict:
if n_values < requested_limit:
if debug:
print(f" - {term}")
properties = { "curie": '??', "name": term, "type": '??' }
values.append(properties)
n_values += 1
if found_fragment is None:
#### Cache this fragment in the database
try:
cache_cursor.execute("INSERT INTO cached_fragments(fragment) VALUES(?)", (word,))
fragment_id = cache_cursor.lastrowid
except:
eprint(f"ERROR: Unable to INSERT into cached_fragments(fragment)",file=sys.stderr)
fragment_id = 0
if debug:
eprint(f"fragment_id = {fragment_id}")
#### Execute an expensive LIKE query
cursor.execute("SELECT term FROM terms WHERE term LIKE \"%%%s%%\" ORDER BY length(term),term LIMIT %s" % (word,10000))
rows = cursor.fetchall()
for row in rows:
term = row[0]
if term.upper() not in values_dict:
if n_values < requested_limit:
if debug:
eprint(f" - {term}")
properties = { "curie": '??', "name": term, "type": '??' }
values.append(properties)
n_values += 1
values_dict[term.upper()] = 1
cache_cursor.execute("INSERT INTO cached_fragment_terms(fragment_id, term) VALUES(?,?)", (fragment_id, term,))
cache_conn.commit()
t2 = timeit.default_timer()
if debug:
eprint(f"INFO: Query 2 in {t2-t1} sec")
return(values)