Skip to content

Commit d0b77a3

Browse files
committed
update
1 parent ca5d4b6 commit d0b77a3

4 files changed

Lines changed: 158 additions & 0 deletions

File tree

public/robots.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# https://www.danielho.xyz/robots.txt
2+
3+
User-agent: *
4+
Allow: /
5+
6+
# AI crawlers — welcome
7+
User-agent: GPTBot
8+
Allow: /
9+
10+
User-agent: ClaudeBot
11+
Allow: /
12+
13+
User-agent: Google-Extended
14+
Allow: /
15+
16+
User-agent: PerplexityBot
17+
Allow: /
18+
19+
User-agent: Applebot-Extended
20+
Allow: /
21+
22+
# LLM-friendly content
23+
# See /llms.txt and /llms-full.txt for clean markdown versions
24+
25+
Sitemap: https://www.danielho.xyz/sitemap-index.xml

src/layouts/BaseLayout.astro

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ const canonicalURL = new URL(Astro.url.pathname, Astro.site);
2323
<link rel="canonical" href={canonicalURL} />
2424
<link rel="icon" href="/favicon.svg" type="image/svg+xml" />
2525

26+
<!-- LLM discovery -->
27+
<link rel="alternate" type="text/plain" href="/llms.txt" title="LLM-friendly site summary" />
28+
2629
<!-- Open Graph -->
2730
<meta property="og:type" content="website" />
2831
<meta property="og:url" content={canonicalURL} />
@@ -35,6 +38,15 @@ const canonicalURL = new URL(Astro.url.pathname, Astro.site);
3538
<meta name="twitter:title" content={title} />
3639
<meta name="twitter:description" content={description} />
3740
{ogImage && <meta name="twitter:image" content={new URL(ogImage, Astro.site)} />}
41+
<!-- Google tag (gtag.js) -->
42+
<script async src="https://www.googletagmanager.com/gtag/js?id=G-12ZHM7K2KH"></script>
43+
<script is:inline>
44+
window.dataLayer = window.dataLayer || [];
45+
function gtag(){dataLayer.push(arguments);}
46+
gtag('js', new Date());
47+
gtag('config', 'G-12ZHM7K2KH');
48+
</script>
49+
3850
<ViewTransitions />
3951
<script is:inline>
4052
function applyThemeToDoc(doc) {

src/pages/llms-full.txt.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import type { APIContext } from "astro";
2+
import { getCollection } from "astro:content";
3+
import fs from "node:fs/promises";
4+
import path from "node:path";
5+
6+
export async function GET(context: APIContext) {
7+
const posts = (await getCollection("writing"))
8+
.filter((post) => !post.data.draft)
9+
.sort((a, b) => b.data.pubDate.valueOf() - a.data.pubDate.valueOf());
10+
11+
const articles: string[] = [];
12+
13+
for (const post of posts) {
14+
const filePath = path.resolve(`./src/content/writing/${post.id}.mdx`);
15+
const raw = await fs.readFile(filePath, "utf-8");
16+
17+
// Strip frontmatter
18+
const content = raw.replace(/^---[\s\S]*?---\s*/, "");
19+
20+
// Strip MDX imports and JSX components
21+
const cleaned = content
22+
.replace(/^import\s+.*$/gm, "")
23+
.replace(/<[A-Z]\w*\s*\/>/g, "")
24+
.replace(/<figure>[\s\S]*?<\/figure>/g, "")
25+
.trim();
26+
27+
const date = post.data.pubDate.toLocaleDateString("en-US", {
28+
year: "numeric",
29+
month: "long",
30+
day: "numeric",
31+
});
32+
33+
const url = new URL(`/writing/${post.id}/`, context.site!);
34+
35+
let header = `# ${post.data.title}\n\nPublished: ${date}`;
36+
if (post.data.tags.length) header += `\nTags: ${post.data.tags.join(", ")}`;
37+
if (post.data.repo) header += `\nRepo: ${post.data.repo}`;
38+
if (post.data.website) header += `\nWebsite: ${post.data.website}`;
39+
header += `\nURL: ${url}`;
40+
41+
articles.push(`${header}\n\n${cleaned}`);
42+
}
43+
44+
const body = `# Daniel Ho — Full Site Content
45+
46+
> Software engineer, writer, potter.
47+
48+
This file contains the complete text content of ${context.site} in clean markdown for LLM consumption.
49+
50+
---
51+
52+
${articles.join("\n\n---\n\n")}
53+
`;
54+
55+
return new Response(body, {
56+
headers: { "Content-Type": "text/plain; charset=utf-8" },
57+
});
58+
}

src/pages/llms.txt.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import type { APIContext } from "astro";
2+
import { getCollection } from "astro:content";
3+
4+
export async function GET(context: APIContext) {
5+
const posts = (await getCollection("writing"))
6+
.filter((post) => !post.data.draft)
7+
.sort((a, b) => b.data.pubDate.valueOf() - a.data.pubDate.valueOf());
8+
9+
const site = context.site!;
10+
11+
const writingEntries = posts
12+
.map((post) => {
13+
const url = new URL(`/writing/${post.id}/`, site);
14+
return `- [${post.data.title}](${url}): ${post.data.description}`;
15+
})
16+
.join("\n");
17+
18+
const body = `# Daniel Ho
19+
20+
> Software engineer, writer, potter.
21+
22+
Personal website of Daniel Ho. I build software, write about what I'm learning, and make pottery when I'm away from the screen.
23+
24+
## About
25+
26+
Daniel Ho is a software engineer who has worked at Anime, Azuki, Koop (technical cofounder), Flexport, Meta, Salesforce, and NASA JPL. He studied at USC where he researched NLP and commonsense reasoning in pre-trained language models (BERT, GPT-2).
27+
28+
## Links
29+
30+
- Website: ${site}
31+
- GitHub: https://github.com/donutdaniel
32+
- X/Twitter: https://x.com/donutdho
33+
- LinkedIn: https://linkedin.com/in/danielho54
34+
- Email: danielho54@gmail.com
35+
36+
## Writing
37+
38+
${writingEntries}
39+
40+
## Publications
41+
42+
- [RICA: Evaluating Robust Inference Capabilities Based on Commonsense Axioms](https://arxiv.org/abs/2005.00782)
43+
44+
## Work
45+
46+
- Anime (anime.com): Social product. News aggregation, streaming, and experiences for anime fans.
47+
- Azuki (azuki.com): Web3 team. Anime NFT brand and community platform.
48+
- Koop (koop.xyz): Technical cofounder. On-chain rails for creators and communities.
49+
- Flexport: Platform and infra team. Dev environments, microservices, shipment domain modeling.
50+
- Meta: Messenger team. Infrastructure, profile and stories processing.
51+
- Salesforce: Core platform. File streaming service for real-time collaboration.
52+
- NASA JPL: Deep learning for CNN noise detection.
53+
- USC: NLP research on commonsense reasoning. TA for Discrete Math and Algorithms.
54+
55+
## Full Content
56+
57+
For complete article text in clean markdown, see: ${new URL("/llms-full.txt", site)}
58+
`;
59+
60+
return new Response(body, {
61+
headers: { "Content-Type": "text/plain; charset=utf-8" },
62+
});
63+
}

0 commit comments

Comments
 (0)