@@ -34,11 +34,9 @@ def find_data_types(txt, g7):
3434 for section in re .finditer (r'^#+ *([^\n]*)\n+((?:[^\n]|\n+[^\n#])*[^\n]*URI for[^\n]*data types? is(?:[^\n]|\n+[^\n#])*)' , txt , re .M ):
3535 for dt , uri in re .findall (r'URI[^\n]*`([^\n`]*)` data type[^\n]*`([^`\n:]*:[^\n`]*)`' , section .group (0 )):
3636 dturi [dt ] = uri
37- if uri .startswith ('g7:' ) or uri .startswith ('g7.1:' ):
38- slug = uri [uri .find (':' )+ 1 :]
39- if '#' in uri : uri = uri [:uri .find ('#' )]
40- if slug not in g7 :
41- g7 [slug ] = ('data type' , [section .group (2 ).strip ()])
37+ if uri .startswith ('g7:' ):
38+ if uri [3 :] not in g7 :
39+ g7 [uri [3 :]] = ('data type' , [section .group (2 ).strip ()])
4240 return dturi
4341
4442def find_cat_tables (txt , g7 , tagsets ):
@@ -110,19 +108,18 @@ def find_cat_tables(txt, g7, tagsets):
110108 raise Exception ("unexpected enumeration URI prefix " + repr (pfx ))
111109 if pfx not in cats :
112110 cats [pfx ] = meaning
113- if pfx .startswith ('g7:' ) or pfx .startswith ('g7.1:' ):
114- slug = pfx [pfx .find (':' )+ 1 :]
115- if slug in g7 :
116- raise Exception (pfx + ' defined as an enumeration and a ' + g7 [slug ][0 ])
111+ if pfx .startswith ('g7:' ):
112+ if pfx [3 :] in g7 :
113+ raise Exception (pfx + ' defined as an enumeration and a ' + g7 [pfx [3 :]][0 ])
117114 if label :
118- g7 [slug ] = (yamltype , meaning , None , label )
115+ g7 [pfx [ 3 :] ] = (yamltype , meaning , None , label )
119116 else :
120- g7 [slug ] = (yamltype , meaning )
117+ g7 [pfx [ 3 :] ] = (yamltype , meaning )
121118 return enums , calendars
122119
123120def find_calendars (txt , g7 ):
124121 """Looks for sections defining a `g7:cal-` URI"""
125- for bit in re .finditer (r'#+ `[^`]*`[^\n]*\n+((?:\n+(?!#)|[^\n])*is `g7(?:\.1)? :(cal-[^`]*)`(?:\n+(?!#)|[^\n#])*)' , txt ):
122+ for bit in re .finditer (r'#+ `[^`]*`[^\n]*\n+((?:\n+(?!#)|[^\n])*is `g7:(cal-[^`]*)`(?:\n+(?!#)|[^\n#])*)' , txt ):
126123 m = re .search ('The epoch markers? ([`_A-Z0-9, and]+) (is|are) permitted' , bit .group (1 ))
127124 marker = [] if not m else re .findall (r'[A-Z0-9_]+' , m [1 ])
128125 m = re .match (r'^The ([A-Z][A-Za-z]* )+calendar' , bit .group (1 ))
@@ -226,27 +223,24 @@ def find_descriptions(txt, g7, ssp):
226223 for name ,uri ,desc in re .findall (r'#+ `[^`]*`[^\n]*\(([^)]*)\)[^\n]*`([^:`\n]*:[^`\n]*)`[^\n]*\n+((?:\n+(?!#)|[^\n])*)' , txt ):
227224 if uri not in ssp :
228225 raise Exception ('Found section for ' + uri + ' but no gedstruct' )
229- if uri .startswith ('g7:' ) or uri .startswith ('g7.1:' ):
230- slug = uri [uri .find (':' )+ 1 :]
231- g7 .setdefault (slug ,('structure' ,[],ssp [uri ],name .strip ()))[1 ].extend ((
226+ if uri .startswith ('g7:' ):
227+ g7 .setdefault (uri [3 :],('structure' ,[],ssp [uri ],name .strip ()))[1 ].extend ((
232228 name .strip (),
233229 desc .strip ()
234230 ))
235231 for other in re .findall (r'[Aa] type of `(\S*)`' , desc ):
236232 m = re .search ('^#+ +`' + other + r'`[^\n`]*\n((?:[^\n]+|\n+(?!#))*)' , txt , re .M )
237233 if m :
238- g7 [uri [uri . find ( ':' ) + 1 :]][1 ].append (m .group (1 ).strip ())
234+ g7 [uri [3 :]][1 ].append (m .group (1 ).strip ())
239235
240236 # error check that gedstruct and sections align
241237 for uri in ssp :
242238 if uri .startswith ('g7:' ) and uri [3 :] not in g7 :
243239 raise Exception ('Found gedstruct for ' + uri + ' but no section' )
244- if uri .startswith ('g7.1:' ) and uri [5 :] not in g7 :
245- raise Exception ('Found gedstruct for ' + uri + ' but no section' )
246240
247241 # gedstruct sections
248242 for uri , desc in re .findall (r'#+ *`[^`]*` *:=[^\n]*\n+`+[^\n]*\n+n [^\n]*\} *(\S+:\S+) *(?:\n [^\n]*)*\n`+[^\n]*\n+((?:[^\n]|\n(?!#))*)' , txt ):
249- g7 [uri [uri . find ( ':' ) + 1 :]][1 ].append (desc .strip ())
243+ g7 [uri [3 :]][1 ].append (desc .strip ())
250244
251245 tagsets = {}
252246 # tag tables
@@ -257,11 +251,7 @@ def find_descriptions(txt, g7, ssp):
257251 if header .startswith ('Indi' ): pfx = 'INDI-'
258252 for tag , name , desc in re .findall (r'`([A-Z_0-9]+)` *\| *([^|\n]*?) *\| *([^|\n]*[^ |\n]) *' , table .group (2 )):
259253 if '<br' in name :
260- if '`g7:' in name :
261- tag = name [name .find ('`g7:' )+ 4 :name .rfind ('`' )]
262- elif '`g7.1' in name :
263- tag = name [name .find ('`g7.1' )+ 6 :name .rfind ('`' )]
264- else : assert False , "name without URI: " + repr (name )
254+ tag = name [name .find ('`g7:' )+ 4 :name .rfind ('`' )]
265255 name = name [:name .find ('<br' )]
266256 if tag not in g7 : tag = pfx + tag
267257 if tag not in g7 :
@@ -282,7 +272,7 @@ def find_enum_by_link(txt, enums, tagsets):
282272 # 'g7:FAM-FACT',
283273 # )) ## do not do for enumset-EVEN
284274 enum_prefix = {k [k .find ('enum-' )+ 5 :] for e in enums .values () for k in e }
285- for sect in re .finditer (r'# *`(g7(?:\.1)? :enumset-[^`]*)`[\s\S]*?\n#' , txt ):
275+ for sect in re .finditer (r'# *`(g7:enumset-[^`]*)`[\s\S]*?\n#' , txt ):
286276 if '[Events]' in sect .group (0 ):
287277 key = sect .group (1 ).replace ('`' ,'' ).replace ('.' ,'-' )
288278 for k in tagsets :
@@ -291,7 +281,7 @@ def find_enum_by_link(txt, enums, tagsets):
291281 for tag in tagsets [k ]:
292282 if tag .startswith ('INDI-' ) and tag [5 :] in enum_prefix : tag = 'enum-' + tag [5 :]
293283 if tag .startswith ('FAM-' ) and tag [4 :] in enum_prefix : tag = 'enum-' + tag [4 :]
294- tag = addpfx ( tag )
284+ tag = 'g7:' + tag
295285 if tag in enums [key ]: continue
296286 enums [key ].append (tag )
297287 if '[Attributes]' in sect .group (0 ):
@@ -302,22 +292,22 @@ def find_enum_by_link(txt, enums, tagsets):
302292 for tag in tagsets [k ]:
303293 if tag .startswith ('INDI-' ) and tag [5 :] in enum_prefix : tag = 'enum-' + tag [5 :]
304294 if tag .startswith ('FAM-' ) and tag [4 :] in enum_prefix : tag = 'enum-' + tag [4 :]
305- tag = addpfx ( tag )
295+ tag = 'g7:' + tag
306296 if tag in enums [key ]: continue
307297 enums [key ].append (tag )
308298 # enums.setdefault(key, []).extend(_ for _ in ['g7:'+_2.replace('INDI-','enum-').replace('FAM-','enum-') for _2 in tagsets[k]] if _ not in enums.get(key,[]))
309299
310300def find_enumsets (txt ):
311301 res = {}
312- for sect in re .finditer (r'# *[^\n]*?`(g7(?:\.1)? :[^`]*)`([\s\S]*?)\n#' , txt ):
313- if re . search ( f 'from set `g7(?:\.1)?: enumset-', sect .group (2 ) ):
302+ for sect in re .finditer (r'# *[^\n]*?`(g7:[^`]*)`([\s\S]*?)\n#' , txt ):
303+ if 'from set `g7: enumset-' in sect .group (2 ):
314304 key = sect .group (1 )
315- val = re .search (r'from set `(g7(?:\.1)? :enumset-[^`]*)`' , sect .group (2 )).group (1 )
305+ val = re .search (r'from set `(g7:enumset-[^`]*)`' , sect .group (2 )).group (1 )
316306 res [key ] = val
317307 return res
318308
319309def tidy_markdown (md , indent , width = 79 ):
320- """
310+ r """
321311 The markdown files in the specification directory use the following Markdown dialect:
322312
323313 Part of GFM:
@@ -351,7 +341,7 @@ def tidy_markdown(md, indent, width=79):
351341 import mdformat
352342 out = mdformat .text (md , extensions = {"gfm" }, options = {"number" :True , "wrap" :width })
353343
354- return out .rstrip ().replace ('\n ' ,'\n ' + ' ' * indent ).replace ('\[' ,'[' ).replace ('\]' ,']' )
344+ return out .rstrip ().replace ('\n ' ,'\n ' + ' ' * indent ).replace (r '\[' ,'[' ).replace (r '\]' ,']' )
355345
356346def yaml_str_helper (pfx , md , width = 79 ):
357347 txt = tidy_markdown (md , len (pfx ), width )
@@ -362,16 +352,10 @@ def yaml_str_helper(pfx, md, width=79):
362352 return pfx + txt
363353
364354def expand_prefix (txt , prefixes ):
365- global prerelease
366355 for key in sorted (prefixes .keys (), key = lambda x :- len (x )):
367356 k = key + ':'
368357 if txt .startswith (k ):
369- uri = prefixes [key ] + txt [len (k ):]
370- if 'https://gedcom.io/terms/v7.1/' in uri :
371- prerelease = True
372- return uri
373- if 'https://gedcom.io/terms/v7.1/' in txt :
374- prerelease = True
358+ return prefixes [key ] + txt [len (k ):]
375359 return txt
376360
377361if __name__ == '__main__' :
@@ -381,24 +365,14 @@ def expand_prefix(txt, prefixes):
381365 txt = get_text (specs )
382366
383367 prefixes = get_prefixes (txt )
384- prefix_of = {} # generally {tag: 'g7'} or {"record-REPO":"g7.1"} but sometimes {"month-":"g7"} for a set of values
385- for [pfx ,slug ] in re .findall ('(' + '|' .join (prefixes )+ r'):([^\s`<>]+)' , txt ):
386- assert prefix_of .get (slug ,pfx ) == pfx , f"Multiple prefixes for { slug } : { prefix_of [slug ]} and { pfx } "
387- prefix_of [slug ] = pfx
388- def addpfx (tag ):
389- if tag in prefix_of : return prefix_of [tag ]+ ':' + tag
390- if '-' in tag :
391- lead = tag [:tag .find ('-' )+ 1 ]
392- if lead in prefix_of : return prefix_of [lead ]+ ':' + tag
393- assert False , 'no prefix for ' + tag + ' in ' + str (prefix_of )
394368 dtypes = find_data_types (txt , g7 )
395369 rules = parse_rules (txt )
396370 ssp = parse_gedstruct (txt , rules , dtypes )
397371 tagsets = find_descriptions (txt , g7 , ssp )
398372 enums , calendars = find_cat_tables (txt , g7 , tagsets )
399373 find_enum_by_link (txt , enums , tagsets )
400374 for k in enums :
401- g7 [k [k . find ( ':' ) + 1 :]] = ('enumeration set' ,[])
375+ g7 [k [3 :]] = ('enumeration set' ,[])
402376 enumsets = find_enumsets (txt )
403377 find_calendars (txt , g7 )
404378 dtypes_inv = {expand_prefix (v ,prefixes ):k for k ,v in dtypes .items ()}
@@ -411,19 +385,17 @@ def addpfx(tag):
411385
412386 for tag in g7 :
413387 print ('outputting' , tag , '...' , end = ' ' )
414- prerelease = False
415388 maybe = join (dirname (specs [0 ]),'terms' ,tag )
416389 if exists (maybe ):
417390 copyfile (maybe , join (dest ,tag ))
418391 print ('by copying' , maybe , '...' , end = ' ' )
419392 continue
420- thispath = join (dest ,tag .replace ('#' ,'-' ))
421- with open (thispath , 'w' ) as fh :
393+ with open (join (dest ,tag .replace ('#' ,'-' )), 'w' ) as fh :
422394 fh .write ('%YAML 1.2\n ---\n ' )
423395 print ('lang: en-US' , file = fh )
424396 print ('\n type:' ,g7 [tag ][0 ], file = fh )
425397
426- uri = expand_prefix (addpfx ( tag ) ,prefixes )
398+ uri = expand_prefix ('g7:' + tag ,prefixes )
427399 print ('\n uri:' , uri , file = fh )
428400
429401 if g7 [tag ][0 ] in ('structure' , 'enumeration' , 'calendar' , 'month' ):
@@ -452,7 +424,7 @@ def addpfx(tag):
452424 print ('\n payload:' , payload , file = fh )
453425 payload_lookup .append ([uri , payload if payload != 'null' else '' ])
454426 if d ['pay' ] and 'Enum' in d ['pay' ]:
455- setname = expand_prefix (enumsets [addpfx ( tag ) ],prefixes )
427+ setname = expand_prefix (enumsets ['g7:' + tag ],prefixes )
456428 print ('\n enumeration set: "' + setname + '"' , file = fh )
457429 enum_lookup .append ([uri ,setname ])
458430 # print('\nenumeration values:', file=fh)
@@ -478,7 +450,7 @@ def addpfx(tag):
478450 struct_lookup .append (['' ,ptag ,uri ])
479451 elif g7 [tag ][0 ] == 'calendar' :
480452 print ('\n months:' , file = fh )
481- for k in calendars [addpfx ( tag ) ]:
453+ for k in calendars ['g7:' + tag ]:
482454 print (' - "' + expand_prefix (k , prefixes )+ '"' , file = fh )
483455 if len (g7 [tag ][2 ]) == 0 :
484456 print ('\n epochs: []' , file = fh )
@@ -488,11 +460,11 @@ def addpfx(tag):
488460 print (' -' , epoch , file = fh )
489461 elif g7 [tag ][0 ] == 'month' :
490462 print ('\n calendars:' , file = fh )
491- for k in calendars [addpfx ( tag ) ]:
463+ for k in calendars ['g7:' + tag ]:
492464 print (' - "' + expand_prefix (k , prefixes )+ '"' , file = fh )
493465 elif g7 [tag ][0 ] == 'enumeration set' :
494466 print ('\n enumeration values:' , file = fh )
495- for k in enums [addpfx ( tag ) ]:
467+ for k in enums ['g7:' + tag ]:
496468 valname = expand_prefix (k , prefixes )
497469 print (' - "' + valname + '"' , file = fh )
498470 enumset_lookup .append ([uri , valname ])
@@ -501,20 +473,11 @@ def addpfx(tag):
501473 # handle use in enumerations (which can include any tag type)
502474 is_used_by = False
503475 for tag2 in sorted (enums ):
504- if (addpfx ( tag ) ) in enums [tag2 ]:
476+ if ('g7:' + tag ) in enums [tag2 ]:
505477 if not is_used_by :
506478 print ('\n value of:' , file = fh )
507479 is_used_by = True
508480 print (' - "' + expand_prefix (tag2 ,prefixes )+ '"' , file = fh )
509-
510- if prerelease :
511- print ('\n prerelease: true' , file = fh )
512-
513- # manually check for v7.1 subsuming v7.0
514- if '/v7.1/' in uri :
515- res = run (['git' ,'show' ,'main:' + thispath ], capture_output = True )
516- if not res .returncode :
517- print ('\n subsumes:' , uri .replace ('/v7.1/' ,'/v7/' ), file = fh )
518481
519482 print ('\n contact: "https://gedcom.io/community/"' , file = fh )
520483 fh .write ('...\n ' )
@@ -542,4 +505,3 @@ def addpfx(tag):
542505 for row in data :
543506 print ('\t ' .join (row ), file = f )
544507 print ('done' )
545-
0 commit comments