1+ #!/usr/bin/env python3
2+ """
3+ Generate examples metadata for Aidbox documentation.
4+ This script should be run from the root of the examples repository.
5+ """
6+
7+ import os
8+ import json
9+ import re
10+ from datetime import datetime
11+ from pathlib import Path
12+
13+ def parse_readme_frontmatter (readme_path ):
14+ """Extract frontmatter from README.md"""
15+ with open (readme_path , 'r' , encoding = 'utf-8' ) as f :
16+ content = f .read ()
17+
18+ # Extract frontmatter between ---
19+ frontmatter_match = re .match (r'^---\n(.*?)\n---' , content , re .DOTALL )
20+ if not frontmatter_match :
21+ return None
22+
23+ frontmatter = frontmatter_match .group (1 )
24+
25+ # Parse features and languages
26+ features = []
27+ languages = []
28+
29+ # Match features: [feature1, feature2, ...]
30+ features_match = re .search (r'features:\s*\[(.*?)\]' , frontmatter )
31+ if features_match :
32+ features_raw = features_match .group (1 )
33+ # Clean up and split
34+ features = [f .strip ().strip ('"\' ' ) for f in features_raw .split (',' )]
35+
36+ # Match languages: [lang1, lang2, ...]
37+ languages_match = re .search (r'languages:\s*\[(.*?)\]' , frontmatter )
38+ if languages_match :
39+ languages_raw = languages_match .group (1 )
40+ # Clean up and split
41+ languages = [l .strip ().strip ('"\' ' ) for l in languages_raw .split (',' )]
42+
43+ return {
44+ 'features' : features ,
45+ 'languages' : languages
46+ }
47+
48+ def extract_title_and_description (readme_path ):
49+ """Extract title (first # heading) and description (first paragraph)"""
50+ with open (readme_path , 'r' , encoding = 'utf-8' ) as f :
51+ content = f .read ()
52+
53+ # Remove frontmatter
54+ content = re .sub (r'^---\n.*?\n---\n' , '' , content , flags = re .DOTALL )
55+
56+ # Extract title (first # heading)
57+ title_match = re .search (r'^#\s+(.+)$' , content , re .MULTILINE )
58+ if title_match :
59+ title = title_match .group (1 ).strip ()
60+ else :
61+ # Fallback to directory name
62+ title = os .path .basename (os .path .dirname (readme_path ))
63+ # Convert kebab-case to Title Case
64+ title = ' ' .join (word .capitalize () for word in title .split ('-' ))
65+
66+ # Extract description (first non-empty paragraph after title)
67+ lines = content .split ('\n ' )
68+ description = ""
69+ in_paragraph = False
70+ skip_next = False
71+
72+ for i , line in enumerate (lines ):
73+ # Skip title line
74+ if line .startswith ('#' ):
75+ in_paragraph = True
76+ continue
77+
78+ # Skip empty lines after title
79+ if in_paragraph and not line .strip ():
80+ continue
81+
82+ # Get first content line that's not a list or code block
83+ if in_paragraph and line .strip ():
84+ if not line .startswith ('-' ) and not line .startswith ('*' ) and not line .startswith ('```' ):
85+ description = line .strip ()
86+ break
87+
88+ # If no description found, generate one from features
89+ if not description :
90+ metadata = parse_readme_frontmatter (readme_path )
91+ if metadata and metadata ['features' ]:
92+ features_str = ', ' .join (metadata ['features' ][:3 ])
93+ description = f"Example demonstrating { features_str } "
94+
95+ return title , description
96+
97+ def generate_examples_metadata ():
98+ """Generate metadata for all examples"""
99+ examples = []
100+ all_features = set ()
101+ all_languages = set ()
102+
103+ # Walk through all directories
104+ for root , dirs , files in os .walk ('.' ):
105+ # Skip hidden directories, scripts, and node_modules
106+ dirs [:] = [d for d in dirs if not d .startswith ('.' ) and d not in ['scripts' , 'node_modules' ]]
107+
108+ if 'README.md' in files :
109+ readme_path = os .path .join (root , 'README.md' )
110+
111+ # Skip root README
112+ if readme_path == './README.md' :
113+ continue
114+
115+ metadata = parse_readme_frontmatter (readme_path )
116+ if metadata :
117+ title , description = extract_title_and_description (readme_path )
118+
119+ # Generate ID from path (remove ./ prefix and replace / with -)
120+ example_id = root .replace ('./' , '' ).replace ('/' , '-' )
121+ if not example_id : # Root directory
122+ continue
123+
124+ # GitHub URLs
125+ github_path = root .replace ('./' , '' )
126+ github_url = f"https://github.com/Aidbox/examples/tree/main/{ github_path } "
127+ readme_url = f"https://github.com/Aidbox/examples/blob/main/{ github_path } /README.md"
128+
129+ example = {
130+ 'id' : example_id ,
131+ 'title' : title ,
132+ 'description' : description or f"Example demonstrating { ', ' .join (metadata ['features' ][:3 ])} " ,
133+ 'features' : metadata ['features' ],
134+ 'languages' : metadata ['languages' ],
135+ 'github_url' : github_url ,
136+ 'readme_url' : readme_url
137+ }
138+
139+ examples .append (example )
140+ all_features .update (metadata ['features' ])
141+ all_languages .update (metadata ['languages' ])
142+
143+ print (f"✓ Processed: { github_path } " )
144+ else :
145+ print (f"⚠ No frontmatter found in: { readme_path } " )
146+
147+ # Sort examples by title
148+ examples .sort (key = lambda x : x ['title' ].lower ())
149+
150+ # Sort features and languages
151+ features_list = sorted (list (all_features ))
152+ languages_list = sorted (list (all_languages ))
153+
154+ result = {
155+ 'timestamp' : datetime .utcnow ().isoformat () + 'Z' ,
156+ 'examples' : examples ,
157+ 'features_list' : features_list ,
158+ 'languages_list' : languages_list
159+ }
160+
161+ return result
162+
163+ def main ():
164+ """Main function"""
165+ print ("Generating examples metadata..." )
166+ print ("-" * 40 )
167+
168+ metadata = generate_examples_metadata ()
169+
170+ # Write to file
171+ output_file = 'examples-metadata.json'
172+ with open (output_file , 'w' , encoding = 'utf-8' ) as f :
173+ json .dump (metadata , f , indent = 2 , ensure_ascii = False )
174+
175+ print ("-" * 40 )
176+ print (f"✓ Generated metadata for { len (metadata ['examples' ])} examples" )
177+ print (f"✓ Found { len (metadata ['features_list' ])} unique features" )
178+ print (f"✓ Found { len (metadata ['languages_list' ])} programming languages" )
179+ print (f"✓ Output saved to: { output_file } " )
180+
181+ # Print summary
182+ print ("\n Languages found:" , ', ' .join (metadata ['languages_list' ]))
183+ print ("\n Top features (first 10):" , ', ' .join (metadata ['features_list' ][:10 ]))
184+ if len (metadata ['features_list' ]) > 10 :
185+ print (f" ... and { len (metadata ['features_list' ]) - 10 } more" )
186+
187+ if __name__ == '__main__' :
188+ main ()
0 commit comments