Skip to content

Commit f87d885

Browse files
committed
Merge main into v7.1 (excluding extracted-files)
1 parent eb2dbc0 commit f87d885

10 files changed

Lines changed: 67 additions & 139 deletions

.github/workflows/generate-files.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727

2828
steps:
2929
- name: Check out GEDCOM
30-
uses: actions/checkout@v5
30+
uses: actions/checkout@v6
3131

3232
- name: Get the branch name
3333
id: extract_branch

.github/workflows/propagate-main-to-v7.1.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424

2525
steps:
2626
- name: Check out GEDCOM
27-
uses: actions/checkout@v5
27+
uses: actions/checkout@v6
2828

2929
- name: Set git config
3030
env:

.github/workflows/validate-yaml.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121

2222
steps:
2323
- name: Checkout GEDCOM
24-
uses: actions/checkout@v5
24+
uses: actions/checkout@v6
2525

2626
- name: Validate YAML
2727
run: yamllint .

build/hyperlink-code.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ def anchorify(m):
4444
return full
4545

4646
doc = re.sub(r'<code>(g7:[^<]*)</code></h', r'<code class="uri">\1</code></h', doc)
47-
doc = re.sub(r'<code>(g7.1:[^<]*)</code></h', r'<code class="uri">\1</code></h', doc)
4847

4948
chunks = re.split(r'(<pre[^>]*ged(?:struct|com)[^>]*>.*?</pre>)', doc, flags=re.DOTALL)
5049

build/hyperlink.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@ def slugify(bit):
2020
si = bit.rfind('`g7:')+4
2121
ei = bit.find('`', si)
2222
slug = bit[si:ei].replace('#','-')
23-
elif '`g7.1:' in bit:
24-
si = bit.rfind('`g7.1:')+6
25-
ei = bit.find('`', si)
26-
slug = bit[si:ei].replace('#','-')
2723
elif '`' in bit:
2824
bit = re.search('`[A-Z0-9_`.]+`', bit)
2925
slug = bit.group(0).replace('`','').replace('.','-')
@@ -91,7 +87,7 @@ def abnf(m):
9187
slug = table_tags[m.group(1)]
9288
return linkify(m.group(0), slug)
9389
return m.group(0)
94-
uried = re.sub(r'(?<![\[.`])`g7(?:\.1)?:[-A-Z0-9a-z`._#]+`', repl, line)
90+
uried = re.sub(r'(?<![\[.`])`g7:[-A-Z0-9a-z`._#]+`', repl, line)
9591
if istable: return uried
9692
tagged = re.sub(r'(?<![\[.`])`[A-Z0-9`._#]+`', repl, uried)
9793
abnfed = re.sub(r'(?<![\[.`])`([A-Za-z0-9]+)`', abnf, tagged)

build/uri-def.py

Lines changed: 31 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,9 @@ def find_data_types(txt, g7):
3434
for section in re.finditer(r'^#+ *([^\n]*)\n+((?:[^\n]|\n+[^\n#])*[^\n]*URI for[^\n]*data types? is(?:[^\n]|\n+[^\n#])*)', txt, re.M):
3535
for dt, uri in re.findall(r'URI[^\n]*`([^\n`]*)` data type[^\n]*`([^`\n:]*:[^\n`]*)`', section.group(0)):
3636
dturi[dt] = uri
37-
if uri.startswith('g7:') or uri.startswith('g7.1:'):
38-
slug = uri[uri.find(':')+1:]
39-
if '#' in uri: uri = uri[:uri.find('#')]
40-
if slug not in g7:
41-
g7[slug] = ('data type', [section.group(2).strip()])
37+
if uri.startswith('g7:'):
38+
if uri[3:] not in g7:
39+
g7[uri[3:]] = ('data type', [section.group(2).strip()])
4240
return dturi
4341

4442
def find_cat_tables(txt, g7, tagsets):
@@ -110,19 +108,18 @@ def find_cat_tables(txt, g7, tagsets):
110108
raise Exception("unexpected enumeration URI prefix "+repr(pfx))
111109
if pfx not in cats:
112110
cats[pfx] = meaning
113-
if pfx.startswith('g7:') or pfx.startswith('g7.1:'):
114-
slug = pfx[pfx.find(':')+1:]
115-
if slug in g7:
116-
raise Exception(pfx+' defined as an enumeration and a '+g7[slug][0])
111+
if pfx.startswith('g7:'):
112+
if pfx[3:] in g7:
113+
raise Exception(pfx+' defined as an enumeration and a '+g7[pfx[3:]][0])
117114
if label:
118-
g7[slug] = (yamltype, meaning, None, label)
115+
g7[pfx[3:]] = (yamltype, meaning, None, label)
119116
else:
120-
g7[slug] = (yamltype, meaning)
117+
g7[pfx[3:]] = (yamltype, meaning)
121118
return enums, calendars
122119

123120
def find_calendars(txt, g7):
124121
"""Looks for sections defining a `g7:cal-` URI"""
125-
for bit in re.finditer(r'#+ `[^`]*`[^\n]*\n+((?:\n+(?!#)|[^\n])*is `g7(?:\.1)?:(cal-[^`]*)`(?:\n+(?!#)|[^\n#])*)', txt):
122+
for bit in re.finditer(r'#+ `[^`]*`[^\n]*\n+((?:\n+(?!#)|[^\n])*is `g7:(cal-[^`]*)`(?:\n+(?!#)|[^\n#])*)', txt):
126123
m = re.search('The epoch markers? ([`_A-Z0-9, and]+) (is|are) permitted', bit.group(1))
127124
marker = [] if not m else re.findall(r'[A-Z0-9_]+', m[1])
128125
m = re.match(r'^The ([A-Z][A-Za-z]* )+calendar', bit.group(1))
@@ -226,27 +223,24 @@ def find_descriptions(txt, g7, ssp):
226223
for name,uri,desc in re.findall(r'#+ `[^`]*`[^\n]*\(([^)]*)\)[^\n]*`([^:`\n]*:[^`\n]*)`[^\n]*\n+((?:\n+(?!#)|[^\n])*)', txt):
227224
if uri not in ssp:
228225
raise Exception('Found section for '+uri+' but no gedstruct')
229-
if uri.startswith('g7:') or uri.startswith('g7.1:'):
230-
slug = uri[uri.find(':')+1:]
231-
g7.setdefault(slug,('structure',[],ssp[uri],name.strip()))[1].extend((
226+
if uri.startswith('g7:'):
227+
g7.setdefault(uri[3:],('structure',[],ssp[uri],name.strip()))[1].extend((
232228
name.strip(),
233229
desc.strip()
234230
))
235231
for other in re.findall(r'[Aa] type of `(\S*)`', desc):
236232
m = re.search('^#+ +`'+other+r'`[^\n`]*\n((?:[^\n]+|\n+(?!#))*)', txt, re.M)
237233
if m:
238-
g7[uri[uri.find(':')+1:]][1].append(m.group(1).strip())
234+
g7[uri[3:]][1].append(m.group(1).strip())
239235

240236
# error check that gedstruct and sections align
241237
for uri in ssp:
242238
if uri.startswith('g7:') and uri[3:] not in g7:
243239
raise Exception('Found gedstruct for '+uri+' but no section')
244-
if uri.startswith('g7.1:') and uri[5:] not in g7:
245-
raise Exception('Found gedstruct for '+uri+' but no section')
246240

247241
# gedstruct sections
248242
for uri, desc in re.findall(r'#+ *`[^`]*` *:=[^\n]*\n+`+[^\n]*\n+n [^\n]*\} *(\S+:\S+) *(?:\n [^\n]*)*\n`+[^\n]*\n+((?:[^\n]|\n(?!#))*)', txt):
249-
g7[uri[uri.find(':')+1:]][1].append(desc.strip())
243+
g7[uri[3:]][1].append(desc.strip())
250244

251245
tagsets = {}
252246
# tag tables
@@ -257,11 +251,7 @@ def find_descriptions(txt, g7, ssp):
257251
if header.startswith('Indi'): pfx = 'INDI-'
258252
for tag, name, desc in re.findall(r'`([A-Z_0-9]+)` *\| *([^|\n]*?) *\| *([^|\n]*[^ |\n]) *', table.group(2)):
259253
if '<br' in name:
260-
if '`g7:' in name:
261-
tag = name[name.find('`g7:')+4:name.rfind('`')]
262-
elif '`g7.1' in name:
263-
tag = name[name.find('`g7.1')+6:name.rfind('`')]
264-
else: assert False, "name without URI: "+repr(name)
254+
tag = name[name.find('`g7:')+4:name.rfind('`')]
265255
name = name[:name.find('<br')]
266256
if tag not in g7: tag = pfx+tag
267257
if tag not in g7:
@@ -282,7 +272,7 @@ def find_enum_by_link(txt, enums, tagsets):
282272
# 'g7:FAM-FACT',
283273
# )) ## do not do for enumset-EVEN
284274
enum_prefix = {k[k.find('enum-')+5:] for e in enums.values() for k in e }
285-
for sect in re.finditer(r'# *`(g7(?:\.1)?:enumset-[^`]*)`[\s\S]*?\n#', txt):
275+
for sect in re.finditer(r'# *`(g7:enumset-[^`]*)`[\s\S]*?\n#', txt):
286276
if '[Events]' in sect.group(0):
287277
key = sect.group(1).replace('`','').replace('.','-')
288278
for k in tagsets:
@@ -291,7 +281,7 @@ def find_enum_by_link(txt, enums, tagsets):
291281
for tag in tagsets[k]:
292282
if tag.startswith('INDI-') and tag[5:] in enum_prefix: tag = 'enum-'+tag[5:]
293283
if tag.startswith('FAM-') and tag[4:] in enum_prefix: tag = 'enum-'+tag[4:]
294-
tag = addpfx(tag)
284+
tag = 'g7:'+tag
295285
if tag in enums[key]: continue
296286
enums[key].append(tag)
297287
if '[Attributes]' in sect.group(0):
@@ -302,22 +292,22 @@ def find_enum_by_link(txt, enums, tagsets):
302292
for tag in tagsets[k]:
303293
if tag.startswith('INDI-') and tag[5:] in enum_prefix: tag = 'enum-'+tag[5:]
304294
if tag.startswith('FAM-') and tag[4:] in enum_prefix: tag = 'enum-'+tag[4:]
305-
tag = addpfx(tag)
295+
tag = 'g7:'+tag
306296
if tag in enums[key]: continue
307297
enums[key].append(tag)
308298
# enums.setdefault(key, []).extend(_ for _ in ['g7:'+_2.replace('INDI-','enum-').replace('FAM-','enum-') for _2 in tagsets[k]] if _ not in enums.get(key,[]))
309299

310300
def find_enumsets(txt):
311301
res = {}
312-
for sect in re.finditer(r'# *[^\n]*?`(g7(?:\.1)?:[^`]*)`([\s\S]*?)\n#', txt):
313-
if re.search(f'from set `g7(?:\.1)?:enumset-', sect.group(2)):
302+
for sect in re.finditer(r'# *[^\n]*?`(g7:[^`]*)`([\s\S]*?)\n#', txt):
303+
if 'from set `g7:enumset-' in sect.group(2):
314304
key = sect.group(1)
315-
val = re.search(r'from set `(g7(?:\.1)?:enumset-[^`]*)`', sect.group(2)).group(1)
305+
val = re.search(r'from set `(g7:enumset-[^`]*)`', sect.group(2)).group(1)
316306
res[key] = val
317307
return res
318308

319309
def tidy_markdown(md, indent, width=79):
320-
"""
310+
r"""
321311
The markdown files in the specification directory use the following Markdown dialect:
322312
323313
Part of GFM:
@@ -351,7 +341,7 @@ def tidy_markdown(md, indent, width=79):
351341
import mdformat
352342
out = mdformat.text(md, extensions={"gfm"}, options={"number":True, "wrap":width})
353343

354-
return out.rstrip().replace('\n','\n'+' '*indent).replace('\[','[').replace('\]',']')
344+
return out.rstrip().replace('\n','\n'+' '*indent).replace(r'\[','[').replace(r'\]',']')
355345

356346
def yaml_str_helper(pfx, md, width=79):
357347
txt = tidy_markdown(md, len(pfx), width)
@@ -362,16 +352,10 @@ def yaml_str_helper(pfx, md, width=79):
362352
return pfx + txt
363353

364354
def expand_prefix(txt, prefixes):
365-
global prerelease
366355
for key in sorted(prefixes.keys(), key=lambda x:-len(x)):
367356
k = key+':'
368357
if txt.startswith(k):
369-
uri = prefixes[key] + txt[len(k):]
370-
if 'https://gedcom.io/terms/v7.1/' in uri:
371-
prerelease = True
372-
return uri
373-
if 'https://gedcom.io/terms/v7.1/' in txt:
374-
prerelease = True
358+
return prefixes[key] + txt[len(k):]
375359
return txt
376360

377361
if __name__ == '__main__':
@@ -381,24 +365,14 @@ def expand_prefix(txt, prefixes):
381365
txt = get_text(specs)
382366

383367
prefixes = get_prefixes(txt)
384-
prefix_of = {} # generally {tag: 'g7'} or {"record-REPO":"g7.1"} but sometimes {"month-":"g7"} for a set of values
385-
for [pfx,slug] in re.findall('('+'|'.join(prefixes)+r'):([^\s`<>]+)', txt):
386-
assert prefix_of.get(slug,pfx) == pfx, f"Multiple prefixes for {slug}: {prefix_of[slug]} and {pfx}"
387-
prefix_of[slug] = pfx
388-
def addpfx(tag):
389-
if tag in prefix_of: return prefix_of[tag]+':'+tag
390-
if '-' in tag:
391-
lead = tag[:tag.find('-')+1]
392-
if lead in prefix_of: return prefix_of[lead]+':'+tag
393-
assert False, 'no prefix for '+tag+' in '+str(prefix_of)
394368
dtypes = find_data_types(txt, g7)
395369
rules = parse_rules(txt)
396370
ssp = parse_gedstruct(txt, rules, dtypes)
397371
tagsets = find_descriptions(txt, g7, ssp)
398372
enums, calendars = find_cat_tables(txt, g7, tagsets)
399373
find_enum_by_link(txt, enums, tagsets)
400374
for k in enums:
401-
g7[k[k.find(':')+1:]] = ('enumeration set',[])
375+
g7[k[3:]] = ('enumeration set',[])
402376
enumsets = find_enumsets(txt)
403377
find_calendars(txt, g7)
404378
dtypes_inv = {expand_prefix(v,prefixes):k for k,v in dtypes.items()}
@@ -411,19 +385,17 @@ def addpfx(tag):
411385

412386
for tag in g7:
413387
print('outputting', tag, '...', end=' ')
414-
prerelease = False
415388
maybe = join(dirname(specs[0]),'terms',tag)
416389
if exists(maybe):
417390
copyfile(maybe, join(dest,tag))
418391
print('by copying', maybe, '...', end=' ')
419392
continue
420-
thispath = join(dest,tag.replace('#','-'))
421-
with open(thispath, 'w') as fh:
393+
with open(join(dest,tag.replace('#','-')), 'w') as fh:
422394
fh.write('%YAML 1.2\n---\n')
423395
print('lang: en-US', file=fh)
424396
print('\ntype:',g7[tag][0], file=fh)
425397

426-
uri = expand_prefix(addpfx(tag),prefixes)
398+
uri = expand_prefix('g7:'+tag,prefixes)
427399
print('\nuri:', uri, file=fh)
428400

429401
if g7[tag][0] in ('structure', 'enumeration', 'calendar', 'month'):
@@ -452,7 +424,7 @@ def addpfx(tag):
452424
print('\npayload:', payload, file=fh)
453425
payload_lookup.append([uri, payload if payload != 'null' else ''])
454426
if d['pay'] and 'Enum' in d['pay']:
455-
setname = expand_prefix(enumsets[addpfx(tag)],prefixes)
427+
setname = expand_prefix(enumsets['g7:'+tag],prefixes)
456428
print('\nenumeration set: "'+setname+'"', file=fh)
457429
enum_lookup.append([uri,setname])
458430
# print('\nenumeration values:', file=fh)
@@ -478,7 +450,7 @@ def addpfx(tag):
478450
struct_lookup.append(['',ptag,uri])
479451
elif g7[tag][0] == 'calendar':
480452
print('\nmonths:', file=fh)
481-
for k in calendars[addpfx(tag)]:
453+
for k in calendars['g7:'+tag]:
482454
print(' - "'+expand_prefix(k, prefixes)+'"', file=fh)
483455
if len(g7[tag][2]) == 0:
484456
print('\nepochs: []', file=fh)
@@ -488,11 +460,11 @@ def addpfx(tag):
488460
print(' -', epoch, file=fh)
489461
elif g7[tag][0] == 'month':
490462
print('\ncalendars:', file=fh)
491-
for k in calendars[addpfx(tag)]:
463+
for k in calendars['g7:'+tag]:
492464
print(' - "'+expand_prefix(k, prefixes)+'"', file=fh)
493465
elif g7[tag][0] == 'enumeration set':
494466
print('\nenumeration values:', file=fh)
495-
for k in enums[addpfx(tag)]:
467+
for k in enums['g7:'+tag]:
496468
valname = expand_prefix(k, prefixes)
497469
print(' - "'+valname+'"', file=fh)
498470
enumset_lookup.append([uri, valname])
@@ -501,20 +473,11 @@ def addpfx(tag):
501473
# handle use in enumerations (which can include any tag type)
502474
is_used_by = False
503475
for tag2 in sorted(enums):
504-
if (addpfx(tag)) in enums[tag2]:
476+
if ('g7:'+tag) in enums[tag2]:
505477
if not is_used_by:
506478
print('\nvalue of:', file=fh)
507479
is_used_by = True
508480
print(' - "'+expand_prefix(tag2,prefixes)+'"', file=fh)
509-
510-
if prerelease:
511-
print('\nprerelease: true', file=fh)
512-
513-
# manually check for v7.1 subsuming v7.0
514-
if '/v7.1/' in uri:
515-
res = run(['git','show','main:'+thispath], capture_output=True)
516-
if not res.returncode:
517-
print('\nsubsumes:', uri.replace('/v7.1/','/v7/'), file=fh)
518481

519482
print('\ncontact: "https://gedcom.io/community/"', file=fh)
520483
fh.write('...\n')
@@ -542,4 +505,3 @@ def addpfx(tag):
542505
for row in data:
543506
print('\t'.join(row), file=f)
544507
print('done')
545-

specification/gedcom-0-introduction.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ is shorthand for a URI beginning with the corresponding URI prefix
147147
| Short Prefix | URI Prefix |
148148
|:-------------|:------------------------------------|
149149
| `g7` | `https://gedcom.io/terms/v7/` |
150-
| `g7.1` | `https://gedcom.io/terms/v7.1/` |
151150
| `xsd` | `http://www.w3.org/2001/XMLSchema#` |
152151
| `dcat` | `http://www.w3.org/ns/dcat#` |
153152

specification/gedcom-1-hierarchical-container-format.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ Extensions cannot change existing meanings, cardinalities, or calendars.
319319
A **tagged extension structure** is a structure whose tag matches production `extTag`. Tagged extension structures may appear as records or substructures of any other structure. Their meaning is defined by their tag, as is discussed more fully in the section [Extension Tags].
320320
321321
Any substructure of a tagged extension structure that uses a tag matching `stdTag` is an **extension-defined substructure**.
322-
Substructures of an extension-defined substructure that uses a tag matching `stdTag` are also extension-defined substructures, but this specification deprecates using a `stdTag` with a definition that does not match any standard type with that tag.
322+
Substructures of an extension-defined substructure that uses a tag matching `stdTag` are also extension-defined substructures.
323323
The meaning and use of each extension-defined substructure is defined by the tagged extension structure it occurs within, not by its tag alone nor by this specification.
324324
325325
:::example
@@ -343,7 +343,9 @@ deprecated.
343343
- Even though both `DATE`s appear to have `g7:type-DATE` payloads, we can't know that is the intended data type without consulting the defining specifications of `_LOC` and `_POP`, respectively. The first might be a `g7:type-DATE#period` and the second a `g7:type-DATE#exact`, for example.
344344
:::
345345
346-
If an extension-defined substructure has a tag that is also used by one or more standard structures, its meaning and payload type should match at least one of those standard structure types.
346+
Extension-defined substructures should match the structure type, payload, and substructure collection of at least one
347+
standard type with the same tag, though it can add more substructures to the substructure collection.
348+
This specification deprecates using a `stdTag` with a definition that does not match any standard type with that tag.
347349
348350
:::example
349351
An extension-defined substructure with tag "`DATE`" should provide a date or date period relevant to its superstructure, as do all `DATE`-tagged structures in this specification. Extensions should not use "`DATE`" to tag a structure describing anything else (even something that might reasonably be abbreviated "date", such as someone an individual dated).

specification/gedcom-3-structures-1-organization.md

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -345,8 +345,7 @@ A `MULTIMEDIA_RECORD` may contain a pointer to a `SOURCE_RECORD` and vice versa.
345345
#### `REPOSITORY_RECORD` :=
346346
347347
```gedstruct
348-
n @XREF:REPO@ REPO {1:1} g7.1:record-REPO
349-
+1 RESN <List:Enum> {0:1} g7:RESN
348+
n @XREF:REPO@ REPO {1:1} g7:record-REPO
350349
+1 NAME <Text> {1:1} g7:NAME
351350
+1 <<ADDRESS_STRUCTURE>> {0:1}
352351
+1 PHON <Special> {0:M} g7:PHON
@@ -371,8 +370,7 @@ Until such time, it is recommended that the repository record store current cont
371370
#### `SHARED_NOTE_RECORD` :=
372371
373372
```gedstruct
374-
n @XREF:SNOTE@ SNOTE <Text> {1:1} g7.1:record-SNOTE
375-
+1 RESN <List:Enum> {0:1} g7:RESN
373+
n @XREF:SNOTE@ SNOTE <Text> {1:1} g7:record-SNOTE
376374
+1 MIME <MediaType> {0:1} g7:MIME
377375
+1 LANG <Language> {0:1} g7:LANG
378376
+1 TRAN <Text> {0:M} g7:NOTE-TRAN
@@ -417,8 +415,7 @@ A `SHARED_NOTE_RECORD` may contain a pointer to a `SOURCE_RECORD` and vice versa
417415
#### `SOURCE_RECORD` :=
418416
419417
```gedstruct
420-
n @XREF:SOUR@ SOUR {1:1} g7.1:record-SOUR
421-
+1 RESN <List:Enum> {0:1} g7:RESN
418+
n @XREF:SOUR@ SOUR {1:1} g7:record-SOUR
422419
+1 DATA {0:1} g7:DATA
423420
+2 EVEN <List:Enum> {0:M} g7:DATA-EVEN
424421
+3 DATE <DatePeriod> {0:1} g7:DATA-EVEN-DATE
@@ -456,8 +453,7 @@ A `SOURCE_RECORD` may contain a pointer to a `MULTIMEDIA_RECORD` and vice versa.
456453
#### `SUBMITTER_RECORD` :=
457454
458455
```gedstruct
459-
n @XREF:SUBM@ SUBM {1:1} g7.1:record-SUBM
460-
+1 RESN <List:Enum> {0:1} g7:RESN
456+
n @XREF:SUBM@ SUBM {1:1} g7:record-SUBM
461457
+1 NAME <Text> {1:1} g7:NAME
462458
+1 <<ADDRESS_STRUCTURE>> {0:1}
463459
+1 PHON <Special> {0:M} g7:PHON

0 commit comments

Comments
 (0)