Skip to content

Commit 796f29a

Browse files
authored
Merge pull request #22 from EgorOrachyov/master
LUBM: enhance converter, add new labels mappings
2 parents b39a22b + f355657 commit 796f29a

2 files changed

Lines changed: 33 additions & 24 deletions

File tree

tools/LUBM/config.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,9 @@ http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#takesCourse TC
88
http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#publicationAuthor PA
99
http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#undergraduateDegreeFrom UG
1010
http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#mastersDegreeFrom MD
11-
http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#doctoralDegreeFrom DD
11+
http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#doctoralDegreeFrom DD
12+
http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#emailAddress EA
13+
http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#name NM
14+
http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#researchInterest RI
15+
http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#telephone TP
16+
http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#teachingAssistantOf TA

tools/LUBM/converter.py

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
88
Usage:
99
- Create a conversion configuration file. Each line must contain an IRI,
10-
a whitespace character and a string to replace the IRI by.
10+
a whitespace character and a string to replace the IRI by.
1111
- Run converter.py <prefix> <count> <config>
1212
- Result will have name <pefix><count><vertices count><indices count>.xml
1313
1414
The graph will contain explicit inverted edges added an 'R'.
15-
"""
15+
"""
1616

1717
import rdflib, sys, os
1818

@@ -37,51 +37,55 @@ def add_rdf_edge(subj, pred, obj, graph):
3737
replace = {} # map for replacing predicates
3838
config = sys.argv[3]
3939
for l in open(config,'r').readlines():
40-
pair = l.split(' ')
41-
old = rdflib.URIRef(pair[0].strip(' '))
42-
new = pair[1].strip('\n').strip(' ')
43-
replace[old] = new
40+
pair = l.split(' ')
41+
old = rdflib.URIRef(pair[0].strip(' '))
42+
new = pair[1].strip('\n').strip(' ')
43+
replace[old] = new
4444

4545
print(replace)
4646

4747
res = {} # map from resources to integer ids
4848
next_id = 0 # id counter
49-
edges_count = 0 # Total edges
49+
edges_count = 0 # Total edges
5050

5151
graph = rdflib.Graph()
5252
prefix = sys.argv[1]
5353
count = int(sys.argv[2])
5454

5555
processed = []
56+
notreplaced = set()
5657

5758
for i in range(0,count):
5859
for j in range(0,MAX_FILES_PER_UNI):
5960
filename = prefix + str(i) + '_' + str(j) + '.owl'
6061
try:
6162
g = rdflib.Graph()
6263
g.parse(filename)
63-
graph = graph + g # Merge graphs here (if 1 file with sub-graph - OK)
64+
65+
for s,p,o in g:
66+
for r in [s,o]:
67+
if r not in res:
68+
res[r] = str(next_id)
69+
next_id += 1
70+
71+
if p in replace:
72+
add_rdf_edge(res[s], replace[p], res[o], graph)
73+
add_rdf_edge(res[s], replace[p] + 'R', res[o], graph)
74+
edges_count += 2
75+
else:
76+
add_rdf_edge(res[s], 'OTHER', res[o], graph)
77+
edges_count += 1
78+
notreplaced.add(p)
79+
6480
processed.append(filename)
81+
print('Merged:', filename)
6582
except Exception:
6683
pass
6784

68-
output = rdflib.Graph()
69-
for s,p,o in graph:
70-
for r in [s,o]:
71-
if r not in res:
72-
res[r] = str(next_id)
73-
next_id += 1
74-
if p in replace:
75-
add_rdf_edge(res[s], replace[p], res[o], output)
76-
add_rdf_edge(res[s], replace[p] + 'R', res[o], output)
77-
edges_count += 2
78-
else:
79-
add_rdf_edge(res[s], 'OTHER', res[o], output)
80-
edges_count += 1
81-
8285
target = prefix + str(count) + 'v' + str(next_id) + 'e' + str(edges_count) # output file
83-
write_to_rdf(target,output)
86+
write_to_rdf(target,graph)
8487

8588
print('Total vertices:', next_id)
8689
print('Total edges:', edges_count)
8790
print('Processed files:\n', processed)
91+
print('Not replaced labels:', notreplaced)

0 commit comments

Comments
 (0)