Skip to content

Commit ed0f0d4

Browse files
committed
feat(prompt): add GitHub directory prompt builders and signal formatting
1 parent bdacd46 commit ed0f0d4

1 file changed

Lines changed: 227 additions & 0 deletions

File tree

explain_this_repo/prompt.py

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,3 +303,230 @@ def build_file_simple_prompt(
303303
{_SECURITY_INSTRUCTION}
304304
"""
305305
return prompt.strip()
306+
307+
308+
def _directory_entry_text(entry: object) -> str:
309+
if isinstance(entry, dict):
310+
path = entry.get("path") or entry.get("name")
311+
entry_type = entry.get("type")
312+
if path and entry_type:
313+
return f"{path} ({entry_type})"
314+
if path:
315+
return str(path)
316+
if entry_type:
317+
return str(entry_type)
318+
return str(entry)
319+
return str(entry)
320+
321+
322+
def _format_directory_value(value: object, max_items: int = 20) -> str:
323+
if value is None:
324+
return ""
325+
326+
if isinstance(value, str):
327+
text = value.strip()
328+
return text
329+
330+
if isinstance(value, dict):
331+
items = list(value.items())
332+
if not items:
333+
return ""
334+
335+
if all(isinstance(v, (int, float)) for _, v in items):
336+
items = sorted(items, key=lambda kv: (-kv[1], str(kv[0])))
337+
338+
lines = [f"- {k}: {v}" for k, v in items[:max_items]]
339+
return "\n".join(lines)
340+
341+
try:
342+
items = list(value)
343+
except TypeError:
344+
text = str(value).strip()
345+
return text
346+
347+
if not items:
348+
return ""
349+
350+
lines = [f"- {_directory_entry_text(item)}" for item in items[:max_items]]
351+
return "\n".join(lines)
352+
353+
354+
def _format_directory_metadata(directory_path: str) -> str:
355+
return f"""<directory_metadata>
356+
Path: {escape_for_prompt_block(directory_path)}
357+
</directory_metadata>"""
358+
359+
360+
def _format_directory_signals(signals: dict | None, max_items: int = 20) -> str:
361+
if not signals:
362+
return "<directory_signals>\nNo signals extracted\n</directory_signals>"
363+
364+
parts: list[str] = []
365+
366+
file_count = signals.get("file_count")
367+
if file_count is not None:
368+
parts.append(f"File count: {file_count}")
369+
370+
dir_count = signals.get("dir_count")
371+
if dir_count is not None:
372+
parts.append(f"Directory count: {dir_count}")
373+
374+
files = signals.get("files") or signals.get("file_names")
375+
if files:
376+
formatted = _format_directory_value(files, max_items=max_items)
377+
if formatted:
378+
parts.append(f"Files:\n{formatted}")
379+
380+
subdirectories = signals.get("subdirectories") or signals.get("directories")
381+
if subdirectories:
382+
formatted = _format_directory_value(subdirectories, max_items=max_items)
383+
if formatted:
384+
parts.append(f"Subdirectories:\n{formatted}")
385+
386+
extensions = signals.get("extensions") or signals.get("extension_distribution")
387+
if extensions:
388+
formatted = _format_directory_value(extensions, max_items=max_items)
389+
if formatted:
390+
parts.append(f"Extension distribution:\n{formatted}")
391+
392+
known_keys = {
393+
"file_count",
394+
"dir_count",
395+
"files",
396+
"file_names",
397+
"subdirectories",
398+
"directories",
399+
"extensions",
400+
"extension_distribution",
401+
}
402+
403+
extra_parts: list[str] = []
404+
for key, value in signals.items():
405+
if key in known_keys:
406+
continue
407+
formatted = _format_directory_value(value, max_items=max_items)
408+
if formatted:
409+
extra_parts.append(f"{key}:\n{formatted}")
410+
411+
if extra_parts:
412+
parts.append("Other signals:\n" + "\n\n".join(extra_parts))
413+
414+
text = "\n\n".join(parts) if parts else "No signals extracted"
415+
return f"<directory_signals>\n{escape_for_prompt_block(text)}\n</directory_signals>"
416+
417+
418+
def build_directory_prompt(
419+
directory_path: str,
420+
signals: dict | None = None,
421+
detailed: bool = False,
422+
) -> str:
423+
metadata = _format_directory_metadata(directory_path)
424+
signals_block = _format_directory_signals(
425+
signals, max_items=40 if detailed else 20
426+
)
427+
428+
prompt = f"""You are a senior software engineer.
429+
430+
Explain this directory clearly.
431+
432+
{metadata}
433+
434+
{signals_block}
435+
436+
Instructions:
437+
- Explain what this directory is responsible for.
438+
- Explain what kinds of files and subdirectories exist here.
439+
- Explain what role it plays in the system.
440+
- Do not invent missing context.
441+
- If something is unclear, say so.
442+
- Avoid hype or marketing language.
443+
- Be concise and practical.
444+
- Use clear markdown headings.
445+
446+
{_SECURITY_INSTRUCTION}
447+
""".strip()
448+
449+
if detailed:
450+
prompt += """
451+
452+
Additional instructions:
453+
- Describe the most important files and subdirectories.
454+
- Mention patterns and boundaries if they can be inferred from the provided signals.
455+
- Explain how this directory fits into the repository if that can be inferred.
456+
"""
457+
458+
prompt += """
459+
460+
Output format:
461+
# Overview
462+
# What this directory does
463+
# What is inside
464+
# Role in the system
465+
# Notes or limitations
466+
"""
467+
468+
return prompt.strip()
469+
470+
471+
def build_directory_quick_prompt(
472+
directory_path: str,
473+
signals: dict | None = None,
474+
) -> str:
475+
metadata = _format_directory_metadata(directory_path)
476+
signals_block = _format_directory_signals(signals, max_items=8)
477+
478+
prompt = f"""You are a senior software engineer.
479+
480+
Write a ONE-SENTENCE plain-English definition of what this GitHub directory is for.
481+
482+
{metadata}
483+
484+
{signals_block}
485+
486+
Rules:
487+
- Output MUST be exactly 1 sentence.
488+
- Plain English.
489+
- No markdown.
490+
- No quotes.
491+
- No bullet points.
492+
- No extra text.
493+
- Do not invent details not present in the directory listing and signals.
494+
495+
{_SECURITY_INSTRUCTION}
496+
"""
497+
return prompt.strip()
498+
499+
500+
def build_directory_simple_prompt(
501+
directory_path: str,
502+
signals: dict | None = None,
503+
) -> str:
504+
metadata = _format_directory_metadata(directory_path)
505+
signals_block = _format_directory_signals(signals, max_items=12)
506+
507+
prompt = f"""You are a senior software engineer.
508+
509+
Summarize this GitHub directory in a concise bullet-point format.
510+
511+
{metadata}
512+
513+
{signals_block}
514+
515+
Output style rules:
516+
- Plain English.
517+
- No markdown.
518+
- Do NOT use headings like "Overview", "What this directory does", etc.
519+
- Start with exactly this line:
520+
Key points from the directory:
521+
- Then output 3 to 5 bullets only.
522+
- Each bullet MUST start with: ⬤
523+
- Each bullet title should be 1–3 words only.
524+
- Each bullet body should be 1–2 lines max.
525+
- Base bullets strictly on the provided directory listing and signals.
526+
- Do NOT invent details not present in the input.
527+
- Optional ending:
528+
Also interesting:
529+
530+
{_SECURITY_INSTRUCTION}
531+
"""
532+
return prompt.strip()

0 commit comments

Comments
 (0)