Skip to content

Commit 10e775c

Browse files
committed
Add segment_special_appeal script
1 parent d3a6faa commit 10e775c

2 files changed

Lines changed: 97 additions & 0 deletions

File tree

.vscode/settings.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"python-envs.defaultEnvManager": "ms-python.python:conda",
3+
"python-envs.defaultPackageManager": "ms-python.python:conda",
4+
"python-envs.pythonProjects": [],
5+
"python.analysis.typeCheckingMode": "basic"
6+
}

src/segment_special_appeal.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import json
2+
from openai import OpenAI
3+
4+
5+
def segment_special_appeal(
6+
appeal_text: str,
7+
model: str = "gpt-4.1-mini",
8+
temperature: float = 0.0
9+
) -> dict:
10+
"""
11+
Segments a Brazilian special appeal into rhetorical/legal sections.
12+
13+
Parameters
14+
----------
15+
appeal_text : str
16+
Full text of the special appeal.
17+
model : str
18+
OpenAI model to use.
19+
temperature : float
20+
Sampling temperature (keep at 0 for determinism).
21+
22+
Returns
23+
-------
24+
dict
25+
JSON-like dict with segmented sections.
26+
"""
27+
28+
client = OpenAI()
29+
30+
system_prompt = (
31+
"You are a legal text analysis assistant. "
32+
"Your task is to segment Brazilian legal documents based on their rhetorical and functional structure. "
33+
"Do NOT summarize, classify, interpret legal outcomes, or infer themes. "
34+
"Only segment the text into structurally meaningful sections."
35+
)
36+
37+
user_prompt = f"""
38+
Segment the following Brazilian special appeal into the sections listed below.
39+
40+
Sections:
41+
- identification
42+
- procedural_history
43+
- facts
44+
- legal_issues
45+
- legal_arguments
46+
- requests
47+
- other
48+
49+
Instructions:
50+
- Preserve the original text verbatim in each section.
51+
- Do NOT rewrite, paraphrase, or summarize.
52+
- If a section is not present, return it as an empty string.
53+
- Return the output strictly as a valid JSON object.
54+
- Do not include any explanation or commentary.
55+
56+
TEXT:
57+
\"\"\"
58+
{appeal_text}
59+
\"\"\"
60+
"""
61+
62+
response = client.chat.completions.create(
63+
model=model,
64+
temperature=temperature,
65+
messages=[
66+
{"role": "system", "content": system_prompt},
67+
{"role": "user", "content": user_prompt},
68+
]
69+
)
70+
71+
raw_output = response.choices[0].message.content
72+
73+
try:
74+
segmented = json.loads(raw_output)
75+
except json.JSONDecodeError as e:
76+
raise ValueError("Model output is not valid JSON") from e
77+
78+
return segmented
79+
80+
81+
if __name__ == "__main__":
82+
# Example usage (for testing)
83+
with open("recurso_especial.txt", "r", encoding="utf-8") as f:
84+
text = f.read()
85+
86+
result = segment_special_appeal(text)
87+
88+
with open("recurso_segmentado.json", "w", encoding="utf-8") as f:
89+
json.dump(result, f, ensure_ascii=False, indent=2)
90+
91+
print("Segmentation completed. Output saved to recurso_segmentado.json")

0 commit comments

Comments
 (0)