Skip to content

Commit f8f8777

Browse files
committed
Add Google Sheets sync helper scripts and documentation
1 parent e9b0bb7 commit f8f8777

5 files changed

Lines changed: 573 additions & 0 deletions

File tree

SETUP_GOOGLE_SHEETS_SYNC.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Google Sheets Team Sync Setup Guide
2+
3+
## Using Service Account (Recommended)
4+
5+
### Step 1: Create Service Account
6+
1. Go to [Google Cloud Console](https://console.cloud.google.com/)
7+
2. Navigate to **IAM & Admin****Service Accounts**
8+
3. Click **+ CREATE SERVICE ACCOUNT**
9+
4. Name it something like "sheets-sync"
10+
5. Click **Create and Continue** (skip optional steps)
11+
6. Click **Done**
12+
13+
### Step 2: Create and Download Key
14+
1. Click on your new service account
15+
2. Go to **Keys** tab
16+
3. Click **ADD KEY****Create new key**
17+
4. Choose **JSON** format
18+
5. Download the file and rename it to `service-account-key.json`
19+
6. Place it in your repository root (it's already in .gitignore)
20+
21+
### Step 3: Get Service Account Email
22+
1. Copy the service account email (looks like: `sheets-sync@your-project.iam.gserviceaccount.com`)
23+
24+
### Step 4: Share Google Sheet
25+
1. Open your Google Sheet
26+
2. Click **Share** button
27+
3. Paste the service account email
28+
4. Give it **Viewer** access
29+
5. Click **Send**
30+
31+
### Step 5: Run the Sync
32+
```bash
33+
cd "/Users/bwestove/cdac Dropbox/brandon westover/0_GithubRepos/bdsp-core.github.io"
34+
source venv/bin/activate
35+
python sync_team_service_account.py
36+
```
37+
38+
## Google Sheet Format
39+
Your sheet should have these columns:
40+
- Name
41+
- Link (optional URL)
42+
- Photo (filename)
43+
- Position
44+
- Email
45+
- Education1
46+
- Education2
47+
- Education3
48+
- Education4
49+
- Category (must be one of: Faculty, Alumni, Postdocs/Students/Staff, Collaborators)
50+
51+
## Automation with GitHub Actions
52+
Once the manual sync works, the GitHub Action will use the same service account to sync automatically.

extract_team_simple.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#!/usr/bin/env python3
2+
"""Extract team data from YAML files to CSV - simple version without dependencies."""
3+
4+
import re
5+
import csv
6+
7+
def parse_yaml_simple(filename):
8+
"""Simple YAML parser for the specific structure we have."""
9+
members = []
10+
current_member = {}
11+
12+
try:
13+
with open(filename, 'r') as f:
14+
lines = f.readlines()
15+
16+
for line in lines:
17+
line = line.rstrip()
18+
19+
# New member starts with "- name:"
20+
if line.startswith('- name:'):
21+
if current_member:
22+
members.append(current_member)
23+
current_member = {}
24+
# Extract name value
25+
name_value = line[7:].strip()
26+
current_member['name'] = name_value
27+
28+
# Other fields
29+
elif line.startswith(' '):
30+
if ':' in line:
31+
key, value = line.strip().split(':', 1)
32+
current_member[key.strip()] = value.strip()
33+
34+
# Don't forget the last member
35+
if current_member:
36+
members.append(current_member)
37+
38+
except FileNotFoundError:
39+
print(f"Warning: {filename} not found")
40+
41+
return members
42+
43+
def extract_link_and_name(name_field):
44+
"""Extract URL and clean name from HTML link if present."""
45+
# Check if it's an HTML link
46+
match = re.match(r'<a href="([^"]+)">([^<]+)</?a?/?>', name_field)
47+
if match:
48+
return match.group(2).strip(), match.group(1).strip()
49+
return name_field.strip(), ""
50+
51+
def main():
52+
"""Extract all team data to CSV."""
53+
all_rows = []
54+
55+
# Process each category
56+
categories = [
57+
('_data/faculty.yml', 'Faculty'),
58+
('_data/postdocsStudentsStaff.yml', 'Postdocs, Students, Staff'),
59+
('_data/alumni.yml', 'Alumni'),
60+
('_data/collaborators.yml', 'Collaborators')
61+
]
62+
63+
for yaml_file, category in categories:
64+
members = parse_yaml_simple(yaml_file)
65+
66+
for member in members:
67+
# Extract name and link
68+
name, link = extract_link_and_name(member.get('name', ''))
69+
70+
# Build row
71+
row = {
72+
'Name': name,
73+
'Link': link,
74+
'Photo': member.get('photo', ''),
75+
'Position': member.get('info', ''),
76+
'Email': member.get('email', ''),
77+
'Education1': member.get('education1', ''),
78+
'Education2': member.get('education2', ''),
79+
'Education3': member.get('education3', ''),
80+
'Education4': member.get('education4', ''),
81+
'Category': category
82+
}
83+
all_rows.append(row)
84+
85+
print(f"Extracted {len(members)} members from {yaml_file}")
86+
87+
# Write to CSV
88+
fieldnames = ['Name', 'Link', 'Photo', 'Position', 'Email',
89+
'Education1', 'Education2', 'Education3', 'Education4', 'Category']
90+
91+
with open('team_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
92+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
93+
writer.writeheader()
94+
writer.writerows(all_rows)
95+
96+
print(f"\nTotal: {len(all_rows)} team members exported to team_data.csv")
97+
print("\nYou can now copy the contents of team_data.csv to your Google Sheet!")
98+
99+
if __name__ == '__main__':
100+
main()

extract_team_to_csv.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/usr/bin/env python3
2+
"""Extract team data from YAML files to CSV for Google Sheets import."""
3+
4+
import yaml
5+
import csv
6+
import re
7+
8+
def extract_link_and_name(name_field):
9+
"""Extract URL and clean name from HTML link if present."""
10+
# Check if it's an HTML link
11+
match = re.match(r'<a href="([^"]+)">([^<]+)</?a?/?>', name_field)
12+
if match:
13+
return match.group(2).strip(), match.group(1).strip()
14+
return name_field.strip(), ""
15+
16+
def yaml_to_rows(yaml_file, category):
17+
"""Convert YAML data to rows for CSV."""
18+
rows = []
19+
20+
try:
21+
with open(f'_data/{yaml_file}', 'r') as f:
22+
data = yaml.safe_load(f)
23+
24+
if not data:
25+
return rows
26+
27+
for member in data:
28+
# Extract name and link
29+
name, link = extract_link_and_name(member.get('name', ''))
30+
31+
# Build row
32+
row = {
33+
'Name': name,
34+
'Link': link,
35+
'Photo': member.get('photo', ''),
36+
'Position': member.get('info', ''),
37+
'Email': member.get('email', ''),
38+
'Education1': member.get('education1', ''),
39+
'Education2': member.get('education2', ''),
40+
'Education3': member.get('education3', ''),
41+
'Education4': member.get('education4', ''),
42+
'Category': category
43+
}
44+
rows.append(row)
45+
46+
except FileNotFoundError:
47+
print(f"Warning: {yaml_file} not found")
48+
49+
return rows
50+
51+
def main():
52+
"""Extract all team data to CSV."""
53+
all_rows = []
54+
55+
# Process each category
56+
categories = [
57+
('faculty.yml', 'Faculty'),
58+
('postdocsStudentsStaff.yml', 'Postdocs, Students, Staff'),
59+
('alumni.yml', 'Alumni'),
60+
('collaborators.yml', 'Collaborators')
61+
]
62+
63+
for yaml_file, category in categories:
64+
rows = yaml_to_rows(yaml_file, category)
65+
all_rows.extend(rows)
66+
print(f"Extracted {len(rows)} members from {yaml_file}")
67+
68+
# Write to CSV
69+
fieldnames = ['Name', 'Link', 'Photo', 'Position', 'Email',
70+
'Education1', 'Education2', 'Education3', 'Education4', 'Category']
71+
72+
with open('team_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
73+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
74+
writer.writeheader()
75+
writer.writerows(all_rows)
76+
77+
print(f"\nTotal: {len(all_rows)} team members exported to team_data.csv")
78+
79+
if __name__ == '__main__':
80+
main()

0 commit comments

Comments
 (0)