Skip to content

Commit 029beda

Browse files
committed
Small tool for RPQ generation
1 parent 902340c commit 029beda

3 files changed

Lines changed: 56 additions & 0 deletions

File tree

tools/gen_RPQ/RDF_edge_stat.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import rdflib, sys
2+
3+
def get_labels_count (rdf):
4+
d = {}
5+
for s,p,o in rdf:
6+
d[p] = d.get(p,0) + 1
7+
8+
sorted_d = [(k, v) for k, v in sorted(d.items(), key=lambda item: item[1])]
9+
sorted_d.reverse()
10+
11+
return sorted_d
12+
13+
def print_config(lst, path_to_config):
14+
i = 0
15+
with open(path_to_config,'w') as config:
16+
for x in lst:
17+
config.write(x[0] + ' ' + str(i) + '\n')
18+
i = i + 1
19+
20+
g=rdflib.Graph()
21+
22+
g.load(sys.argv[1])
23+
24+
r = get_labels_count (g)
25+
26+
for x in r:
27+
print(x[0], ': ', x[1])
28+
29+
print_config(r, sys.argv[2])

tools/gen_RPQ/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
First step is edges investigation
2+
* Run ```python3 RDF_edge_stat.py my_rdf.rdf my_config``` . Mapping from URIs to integers will be saved to my_config. You can use tid config for RDf to triples convertion using RDF_to_triple tool. Numbers of edges labelled with URIs will be printed in concole.
3+
* Run ```python3 gen.py my_config n_URIs q_for_each_tpl```, where ```my_config``` is a config generated at the previous step, first ```n_URIs``` labels from config will be used to generate queryes, ```q_for_each_tpl``` queryes will be generated for each template.

tools/gen_RPQ/gen.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import numpy,sys
2+
3+
templates = [(1,'%s*'), (2,'%s %s*'), (3,'%s %s* %s*'), (2,'(%s | %s)*'), (3,'(%s | %s | %s)*'), (4,'(%s | %s | %s | %s)*'),
4+
(5,'(%s | %s | %s | %s | %s)*'), (3,'%s %s* %s'), (2,'%s* %s*'), (3,'%s %s %s*'), (2,'%s? %s*'),
5+
(2,'(%s | %s)+'), (3,'(%s | %s | %s)+'), (4,'(%s | %s | %s | %s)+'), (5,'(%s | %s | %s | %s | %s)+'),
6+
(3,'(%s | %s) %s*'), (4,'(%s | %s | %s) %s*'), (5,'(%s | %s | %s | %s) %s*'), (6,'(%s | %s | %s | %s | %s) %s*'),
7+
(2,'%s %s'), (3,'%s %s %s'), (4,'%s %s %s %s'), (5,'%s %s %s %s %s')]
8+
9+
def gen (tpl, n, lst, k):
10+
res = set()
11+
while (len(res) < n):
12+
perm = numpy.random.permutation(lst)
13+
res.add(tpl % tuple(perm[0:k]))
14+
return res
15+
16+
def gen_from_config(config, num_of_lalbels, num_of_queries):
17+
lbls = [ l.split(' ')[1].rstrip() for l in open(config,'r').readlines()]
18+
return [gen (tpl[1], num_of_queries, lbls[0:num_of_lalbels], tpl[0]) for tpl in templates]
19+
20+
r = gen_from_config(sys.argv[1],int(sys.argv[2]),int(sys.argv[3]))
21+
22+
for s in r:
23+
for q in s:
24+
print(q)

0 commit comments

Comments
 (0)