Skip to content

Commit 5761b14

Browse files
authored
Fixes for extracting metadata from proposals (microsoft#554)
* Handle the reference-style links used in proposal 0029 * Handle "Sponsor(s)", as used in proposal 0010 * Add a "--overwrite" flag to help with testing
1 parent 1b3d711 commit 5761b14

1 file changed

Lines changed: 49 additions & 34 deletions

File tree

.github/update_proposal_front_matter.py

Lines changed: 49 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55
This script processes .md files in the proposals directory and:
66
1. Extracts metadata from bullet points (Proposal, Author, Sponsor, Status, Planned Version)
77
2. Adds Jekyll front matter if it doesn't exist
8-
3. Warns if front matter already exists (and leaves it unchanged)
9-
4. Skips the templates subdirectory
8+
3. By default, preserves existing front matter values (warns about conflicts)
9+
4. With --overwrite flag, replaces existing front matter with extracted values
10+
5. Skips the templates subdirectory
1011
"""
1112

13+
import argparse
1214
import os
1315
import re
1416
import sys
@@ -34,7 +36,7 @@ def extract_metadata_from_content(content: str) -> Dict[str, str]:
3436
simple_patterns = {
3537
'proposal': r'^\*\s*Proposal:\s*\[([^\]]+)\]',
3638
'author': r'^\*\s*Author\(s\):\s*(.+)$',
37-
'sponsor': r'^\*\s*Sponsor:\s*(.+)$',
39+
'sponsor': r'^\*\s*Sponsor(?:\(s\))?:\s*(.+)$',
3840
'status': r'^\*\s*Status:\s*(.+)$',
3941
'planned_version': r'^\*\s*Planned\s+Version:\s*(.+)$'
4042
}
@@ -51,6 +53,8 @@ def save_current_field():
5153
if current_field == 'author' or current_field == 'sponsor':
5254
# Remove markdown links: [Name](url) -> Name
5355
value = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', value)
56+
# Remove reference-style markdown links: [Name][ref] -> Name
57+
value = re.sub(r'\[([^\]]+)\]\[[^\]]*\]', r'\1', value)
5458
# Clean up any remaining formatting
5559
value = re.sub(r'\s+', ' ', value) # normalize whitespace
5660
elif current_field == 'status':
@@ -176,32 +180,23 @@ def parse_existing_front_matter(content: str) -> Tuple[Dict[str, str], str]:
176180
return front_matter_dict, remaining_content
177181

178182

179-
def create_front_matter(metadata: Dict[str, str], title: Optional[str], existing_front_matter: Dict[str, str] = None) -> str:
183+
def create_front_matter(metadata: Dict[str, str], title: Optional[str], existing_front_matter: Dict[str, str] = None, overwrite: bool = False) -> str:
180184
"""Create Jekyll front matter from extracted metadata, merging with existing front matter."""
181185
if existing_front_matter is None:
182186
existing_front_matter = {}
183187

184188
# Start with existing front matter, then add/override with extracted metadata
185189
merged_front_matter = existing_front_matter.copy()
186190

187-
# Only add extracted values if they don't already exist in front matter
188-
if title and 'title' not in merged_front_matter:
189-
merged_front_matter['title'] = title
191+
# Add extracted values based on overwrite mode
192+
if title:
193+
if overwrite or 'title' not in merged_front_matter:
194+
merged_front_matter['title'] = title
190195

191-
if 'proposal' in metadata and 'proposal' not in merged_front_matter:
192-
merged_front_matter['proposal'] = metadata['proposal']
193-
194-
if 'author' in metadata and 'author' not in merged_front_matter:
195-
merged_front_matter['author'] = metadata['author']
196-
197-
if 'sponsor' in metadata and 'sponsor' not in merged_front_matter:
198-
merged_front_matter['sponsor'] = metadata['sponsor']
199-
200-
if 'status' in metadata and 'status' not in merged_front_matter:
201-
merged_front_matter['status'] = metadata['status']
202-
203-
if 'planned_version' in metadata and 'planned_version' not in merged_front_matter:
204-
merged_front_matter['planned_version'] = metadata['planned_version']
196+
for key in ['proposal', 'author', 'sponsor', 'status', 'planned_version']:
197+
if key in metadata:
198+
if overwrite or key not in merged_front_matter:
199+
merged_front_matter[key] = metadata[key]
205200

206201
# Convert to YAML format
207202
front_matter_lines = ['---']
@@ -228,7 +223,7 @@ def create_front_matter(metadata: Dict[str, str], title: Optional[str], existing
228223
return '\n'.join(front_matter_lines)
229224

230225

231-
def process_file(file_path: Path) -> bool:
226+
def process_file(file_path: Path, overwrite: bool = False) -> bool:
232227
"""Process a single markdown file. Returns True if file was modified."""
233228
try:
234229
with open(file_path, 'r', encoding='utf-8') as f:
@@ -260,28 +255,42 @@ def process_file(file_path: Path) -> bool:
260255
if key in metadata and key in existing_front_matter and existing_front_matter[key] != metadata[key]:
261256
conflicts.append(f"{key}: existing='{existing_front_matter[key]}' vs extracted='{metadata[key]}'")
262257

263-
# Log conflicts
258+
# Log conflicts (and what will happen)
264259
for conflict in conflicts:
265-
print(f"Warning: {file_path} has conflicting metadata - {conflict}", file=sys.stderr)
260+
if overwrite:
261+
print(f"Info: {file_path} - overwriting conflicting metadata - {conflict}", file=sys.stderr)
262+
else:
263+
print(f"Warning: {file_path} has conflicting metadata - {conflict}", file=sys.stderr)
266264

267-
# Check if we need to add any new fields
265+
# Check if we need to add any new fields or overwrite existing ones
268266
needs_update = False
269267
added_fields = []
270268

271-
if title and 'title' not in existing_front_matter:
272-
needs_update = True
273-
added_fields.append('title')
274-
275-
for key in ['proposal', 'author', 'sponsor', 'status', 'planned_version']:
276-
if key in metadata and key not in existing_front_matter:
269+
if overwrite:
270+
# In overwrite mode, update if we have any metadata to write
271+
if title or metadata:
272+
needs_update = True
273+
if title:
274+
added_fields.append('title')
275+
for key in ['proposal', 'author', 'sponsor', 'status', 'planned_version']:
276+
if key in metadata:
277+
added_fields.append(key)
278+
else:
279+
# In normal mode, only add fields that don't exist
280+
if title and 'title' not in existing_front_matter:
277281
needs_update = True
278-
added_fields.append(key)
282+
added_fields.append('title')
283+
284+
for key in ['proposal', 'author', 'sponsor', 'status', 'planned_version']:
285+
if key in metadata and key not in existing_front_matter:
286+
needs_update = True
287+
added_fields.append(key)
279288

280289
if has_existing_front_matter and not needs_update:
281290
return False
282291

283292
# Create merged front matter
284-
front_matter = create_front_matter(metadata, title, existing_front_matter)
293+
front_matter = create_front_matter(metadata, title, existing_front_matter, overwrite)
285294

286295
# Combine front matter with content (without existing front matter)
287296
new_content = front_matter + '\n' + content_without_front_matter
@@ -297,6 +306,12 @@ def process_file(file_path: Path) -> bool:
297306

298307
def main():
299308
"""Main function to process all proposal markdown files."""
309+
parser = argparse.ArgumentParser(description="Update Jekyll front matter for HLSL proposal markdown files")
310+
parser.add_argument('--overwrite', action='store_true',
311+
help="Overwrite existing front matter values with extracted metadata (default: only add missing fields)")
312+
313+
args = parser.parse_args()
314+
300315
# Get the script directory and find the proposals directory
301316
script_dir = Path(__file__).parent
302317
repo_root = script_dir.parent
@@ -325,7 +340,7 @@ def main():
325340

326341
modified_count = 0
327342
for file_path in sorted(md_files):
328-
if process_file(file_path):
343+
if process_file(file_path, args.overwrite):
329344
modified_count += 1
330345

331346
if modified_count > 0:

0 commit comments

Comments
 (0)