77
88 Usage:
99 - Create a conversion configuration file. Each line must contain an IRI,
10- a whitespace character and a string to replace the IRI by.
10+ a whitespace character and a string to replace the IRI by.
1111 - Run converter.py <prefix> <count> <config>
1212 - Result will have name <pefix><count><vertices count><indices count>.xml
1313
1414 The graph will contain explicit inverted edges added an 'R'.
15- """
15+ """
1616
1717import rdflib , sys , os
1818
@@ -37,51 +37,55 @@ def add_rdf_edge(subj, pred, obj, graph):
3737replace = {} # map for replacing predicates
3838config = sys .argv [3 ]
3939for l in open (config ,'r' ).readlines ():
40- pair = l .split (' ' )
41- old = rdflib .URIRef (pair [0 ].strip (' ' ))
42- new = pair [1 ].strip ('\n ' ).strip (' ' )
43- replace [old ] = new
40+ pair = l .split (' ' )
41+ old = rdflib .URIRef (pair [0 ].strip (' ' ))
42+ new = pair [1 ].strip ('\n ' ).strip (' ' )
43+ replace [old ] = new
4444
4545print (replace )
4646
4747res = {} # map from resources to integer ids
4848next_id = 0 # id counter
49- edges_count = 0 # Total edges
49+ edges_count = 0 # Total edges
5050
5151graph = rdflib .Graph ()
5252prefix = sys .argv [1 ]
5353count = int (sys .argv [2 ])
5454
5555processed = []
56+ notreplaced = set ()
5657
5758for i in range (0 ,count ):
5859 for j in range (0 ,MAX_FILES_PER_UNI ):
5960 filename = prefix + str (i ) + '_' + str (j ) + '.owl'
6061 try :
6162 g = rdflib .Graph ()
6263 g .parse (filename )
63- graph = graph + g # Merge graphs here (if 1 file with sub-graph - OK)
64+
65+ for s ,p ,o in g :
66+ for r in [s ,o ]:
67+ if r not in res :
68+ res [r ] = str (next_id )
69+ next_id += 1
70+
71+ if p in replace :
72+ add_rdf_edge (res [s ], replace [p ], res [o ], graph )
73+ add_rdf_edge (res [s ], replace [p ] + 'R' , res [o ], graph )
74+ edges_count += 2
75+ else :
76+ add_rdf_edge (res [s ], 'OTHER' , res [o ], graph )
77+ edges_count += 1
78+ notreplaced .add (p )
79+
6480 processed .append (filename )
81+ print ('Merged:' , filename )
6582 except Exception :
6683 pass
6784
68- output = rdflib .Graph ()
69- for s ,p ,o in graph :
70- for r in [s ,o ]:
71- if r not in res :
72- res [r ] = str (next_id )
73- next_id += 1
74- if p in replace :
75- add_rdf_edge (res [s ], replace [p ], res [o ], output )
76- add_rdf_edge (res [s ], replace [p ] + 'R' , res [o ], output )
77- edges_count += 2
78- else :
79- add_rdf_edge (res [s ], 'OTHER' , res [o ], output )
80- edges_count += 1
81-
8285target = prefix + str (count ) + 'v' + str (next_id ) + 'e' + str (edges_count ) # output file
83- write_to_rdf (target ,output )
86+ write_to_rdf (target ,graph )
8487
8588print ('Total vertices:' , next_id )
8689print ('Total edges:' , edges_count )
8790print ('Processed files:\n ' , processed )
91+ print ('Not replaced labels:' , notreplaced )
0 commit comments