Skip to content

Commit bc9550d

Browse files
committed
Added Gliner
1 parent eec261e commit bc9550d

10 files changed

Lines changed: 1193 additions & 1 deletion

File tree

benchmarks/benchmark_banking77.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,23 @@ def run_benchmark(args):
160160

161161
import tempfile
162162

163+
from rulechef.training_logger import TrainingDataLogger
164+
163165
storage_dir = tempfile.mkdtemp(prefix="rulechef_bench_")
164166

167+
# Training logger
168+
log_path = Path(args.output).with_suffix(".training.jsonl")
169+
logger = TrainingDataLogger(
170+
str(log_path),
171+
run_metadata={
172+
"benchmark": "banking77",
173+
"model": args.model,
174+
"format": args.format,
175+
"num_classes": num_classes,
176+
},
177+
)
178+
print(f" Training log: {log_path}")
179+
165180
coordinator = None
166181
if args.agentic:
167182
from rulechef.coordinator import AgenticCoordinator
@@ -180,6 +195,7 @@ def run_benchmark(args):
180195
max_rules=args.max_rules,
181196
max_samples=args.max_samples,
182197
coordinator=coordinator,
198+
training_logger=logger,
183199
)
184200

185201
# 4. Add training examples (suppress per-example prints)

docs/api/rulechef.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ The main orchestrator class.
2020
- get_rules_summary
2121
- discover_task
2222
- start_observing
23+
- start_observing_gliner
2324
- stop_observing
25+
- stop_observing_gliner
2426
- trigger_manual_learning
2527
- get_buffer_stats
2628
- generate_llm_examples

docs/guide/advanced.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,44 @@ chef.stop_observing()
6666

6767
When `auto_learn=True`, learning triggers automatically based on the coordinator's decision. Streaming calls (`stream=True`) are also observed — RuleChef wraps the stream to capture content after it completes.
6868

69+
### GLiNER / GLiNER2 observation (`start_observing_gliner`)
70+
71+
Observe predictions from [GLiNER](https://github.com/urchade/GLiNER) (NER) or [GLiNER2](https://github.com/fastino-ai/GLiNER2) (NER, classification, structured extraction) models:
72+
73+
```python
74+
from gliner import GLiNER
75+
76+
model = GLiNER.from_pretrained("urchade/gliner_multi-v2.1")
77+
78+
chef = RuleChef(client=client, model="gpt-4o-mini")
79+
chef.start_observing_gliner(model, auto_learn=False)
80+
81+
# Use the model as normal — predictions are captured
82+
entities = model.predict_entities("Apple was founded by Steve Jobs.", ["company", "person"])
83+
84+
chef.learn_rules()
85+
chef.stop_observing_gliner()
86+
```
87+
88+
For GLiNER2, specify which method to observe:
89+
90+
```python
91+
from gliner2 import GLiNER2
92+
93+
model = GLiNER2.from_pretrained("fastino/gliner2")
94+
95+
# NER
96+
chef.start_observing_gliner(model, method="extract_entities", auto_learn=False)
97+
98+
# Classification
99+
chef.start_observing_gliner(model, method="classify_text", auto_learn=False)
100+
101+
# Structured extraction
102+
chef.start_observing_gliner(model, method="extract_json", auto_learn=False)
103+
```
104+
105+
No LLM calls are needed for task discovery — GLiNER output is already structured. The task type, schema, and labels are inferred automatically from the observed predictions.
106+
69107
## Training Data Logger (Distillation)
70108

71109
RuleChef can capture every LLM call made during rule synthesis as structured training data, suitable for fine-tuning a smaller model to replace the LLM. The logger is fully optional — pass a `TrainingDataLogger` instance and all calls (synthesis, patching, coordination, auditing) are written to a JSONL file.

examples/gliner_observation.py

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
"""Observe GLiNER / GLiNER2 predictions and learn rules from them.
2+
3+
Demonstrates:
4+
- GLiNER NER observation with monkey-patching
5+
- GLiNER2 classification and structured extraction observation
6+
- AgenticCoordinator with rule pruning
7+
- Incremental learning with corrections
8+
- grex-powered regex suggestions
9+
10+
Requirements:
11+
pip install rulechef[gliner] # for GLiNER
12+
pip install rulechef[gliner2] # for GLiNER2
13+
14+
Usage:
15+
export OPENAI_API_KEY='your-key'
16+
python examples/gliner_observation.py
17+
"""
18+
19+
import os
20+
21+
from openai import OpenAI
22+
23+
from rulechef import RuleChef
24+
from rulechef.coordinator import AgenticCoordinator
25+
26+
27+
def _make_client():
28+
api_key = os.environ.get("OPENAI_API_KEY")
29+
if not api_key:
30+
raise SystemExit("OPENAI_API_KEY is required")
31+
32+
client_kwargs = {"api_key": api_key}
33+
base_url = os.environ.get("OPENAI_BASE_URL")
34+
if base_url:
35+
client_kwargs["base_url"] = base_url
36+
return OpenAI(**client_kwargs)
37+
38+
39+
def gliner_ner():
40+
"""Observe GLiNER NER predictions → learn rules → evaluate."""
41+
from gliner import GLiNER
42+
43+
print("=" * 60)
44+
print("GLiNER NER Observation")
45+
print("=" * 60)
46+
47+
client = _make_client()
48+
model_name = os.environ.get("RULECHEF_MODEL", "gpt-4o-mini")
49+
50+
# Load GLiNER model
51+
gliner_model = GLiNER.from_pretrained("urchade/gliner_small-v2.1")
52+
53+
# Set up RuleChef with AgenticCoordinator + grex
54+
coordinator = AgenticCoordinator(
55+
llm_client=client,
56+
model=model_name,
57+
prune_after_learn=True,
58+
)
59+
chef = RuleChef(
60+
client=client,
61+
model=model_name,
62+
coordinator=coordinator,
63+
use_grex=True,
64+
)
65+
66+
# Observe GLiNER predictions (auto-detects predict_entities → NER)
67+
chef.start_observing_gliner(gliner_model, auto_learn=False)
68+
69+
labels = ["person", "company", "location"]
70+
texts = [
71+
"Apple was founded by Steve Jobs and Steve Wozniak in Cupertino.",
72+
"Elon Musk is the CEO of Tesla, headquartered in Austin, Texas.",
73+
"Microsoft, led by Satya Nadella, is based in Redmond, Washington.",
74+
"Jeff Bezos founded Amazon in Seattle in 1994.",
75+
"Sundar Pichai runs Google from Mountain View, California.",
76+
"Mark Zuckerberg created Facebook in Cambridge.",
77+
"Tim Cook took over Apple after Steve Jobs passed away in Palo Alto.",
78+
"Nvidia, led by Jensen Huang, is headquartered in Santa Clara.",
79+
"Larry Page and Sergey Brin started Google in Menlo Park.",
80+
"Satya Nadella transformed Microsoft from its headquarters in Redmond.",
81+
]
82+
83+
print("\nObserving 10 GLiNER predictions...")
84+
for text in texts:
85+
entities = gliner_model.predict_entities(text, labels, threshold=0.3)
86+
ents = ", ".join(f"{e['label']}:{e['text']}" for e in entities)
87+
print(f" {text[:55]:58s}{ents}")
88+
89+
print(f"\nBuffer: {chef.get_buffer_stats()['new_examples']} examples")
90+
91+
# Learn rules (full synthesis + refinement)
92+
print("\n--- Phase 1: Initial learning ---")
93+
result = chef.learn_rules(run_evaluation=True, max_refinement_iterations=3)
94+
if result:
95+
rules, eval_result = result
96+
print(f"\n {len(rules)} rules, F1={eval_result.micro_f1:.0%}")
97+
98+
# Add more data incrementally
99+
print("\n--- Phase 2: Incremental learning with 5 more examples ---")
100+
more_texts = [
101+
"Sam Altman leads OpenAI from San Francisco.",
102+
"Dario Amodei runs Anthropic from San Francisco, California.",
103+
"Intel, founded by Gordon Moore, is based in Santa Clara.",
104+
"Reed Hastings co-founded Netflix in Scotts Valley.",
105+
"Lisa Su is the CEO of AMD, based in Santa Clara.",
106+
]
107+
108+
for text in more_texts:
109+
gliner_model.predict_entities(text, labels, threshold=0.3)
110+
111+
result = chef.learn_rules(
112+
run_evaluation=True,
113+
max_refinement_iterations=2,
114+
incremental_only=True,
115+
)
116+
if result:
117+
rules, eval_result = result
118+
print(f"\n {len(rules)} rules after patch, F1={eval_result.micro_f1:.0%}")
119+
120+
# Test on unseen data
121+
print("\n--- Held-out test ---")
122+
chef.stop_observing_gliner()
123+
124+
test_texts = [
125+
"Pat Gelsinger was the CEO of Intel in Santa Clara.",
126+
"Andy Jassy runs Amazon from Seattle, Washington.",
127+
]
128+
for text in test_texts:
129+
gliner_ents = gliner_model.predict_entities(text, labels, threshold=0.3)
130+
rule_result = chef.extract({"text": text})
131+
132+
gliner_set = {(e["text"], e["label"]) for e in gliner_ents}
133+
rule_set = {(e["text"], e["type"]) for e in rule_result.get("entities", [])}
134+
overlap = len(gliner_set & rule_set)
135+
136+
print(f"\n {text}")
137+
print(f" GLiNER: {sorted(gliner_set)}")
138+
print(f" Rules: {sorted(rule_set)} ({overlap}/{len(gliner_set)} match)")
139+
140+
141+
def gliner2_classification():
142+
"""Observe GLiNER2 classification → learn rules."""
143+
from gliner2 import GLiNER2
144+
145+
print("\n" + "=" * 60)
146+
print("GLiNER2 Classification Observation")
147+
print("=" * 60)
148+
149+
client = _make_client()
150+
model_name = os.environ.get("RULECHEF_MODEL", "gpt-4o-mini")
151+
152+
extractor = GLiNER2.from_pretrained("fastino/gliner2-base-v1")
153+
154+
coordinator = AgenticCoordinator(
155+
llm_client=client,
156+
model=model_name,
157+
prune_after_learn=True,
158+
)
159+
chef = RuleChef(
160+
client=client,
161+
model=model_name,
162+
coordinator=coordinator,
163+
use_grex=True,
164+
)
165+
166+
# Observe classify_text → CLASSIFICATION task
167+
chef.start_observing_gliner(extractor, method="classify_text", auto_learn=False)
168+
169+
schema = {"sentiment": ["positive", "negative", "neutral"]}
170+
texts = [
171+
"I love this product! It's amazing and works perfectly.",
172+
"Terrible experience, the product broke after one day.",
173+
"The weather is okay today, nothing special.",
174+
"Best purchase I've ever made, highly recommend!",
175+
"Complete waste of money, worst quality ever.",
176+
"Average product, works fine for the price.",
177+
"Absolutely fantastic, exceeded all my expectations!",
178+
"Horrible customer service, will never buy again.",
179+
"It's a decent product, nothing extraordinary.",
180+
"Really happy with my purchase, fast delivery too!",
181+
]
182+
183+
print("\nObserving 10 classifications...")
184+
for text in texts:
185+
result = extractor.classify_text(text, schema)
186+
label = list(result.values())[0]
187+
print(f" {text[:55]:58s}{label}")
188+
189+
result = chef.learn_rules(run_evaluation=True, max_refinement_iterations=3)
190+
if result:
191+
rules, eval_result = result
192+
print(f"\n {len(rules)} rules, F1={eval_result.micro_f1:.0%}")
193+
194+
# Test
195+
print("\n--- Held-out test ---")
196+
chef.stop_observing_gliner()
197+
198+
for text in ["Great quality, very happy!", "Broke on day one, terrible."]:
199+
gliner_label = list(extractor.classify_text(text, schema).values())[0]
200+
rule_label = chef.extract({"text": text}).get("label", "")
201+
match = "✓" if gliner_label == rule_label else "✗"
202+
print(f" {match} {text:45s} GLiNER2={gliner_label:10s} Rules={rule_label}")
203+
204+
205+
def gliner2_extraction():
206+
"""Observe GLiNER2 structured extraction → learn rules."""
207+
from gliner2 import GLiNER2
208+
209+
print("\n" + "=" * 60)
210+
print("GLiNER2 Structured Extraction Observation")
211+
print("=" * 60)
212+
213+
client = _make_client()
214+
model_name = os.environ.get("RULECHEF_MODEL", "gpt-4o-mini")
215+
216+
extractor = GLiNER2.from_pretrained("fastino/gliner2-base-v1")
217+
218+
chef = RuleChef(client=client, model=model_name, use_grex=True)
219+
220+
# Observe extract_json → TRANSFORMATION task
221+
chef.start_observing_gliner(extractor, method="extract_json", auto_learn=False)
222+
223+
schema = {
224+
"people": [
225+
"name::str::Person name",
226+
"age::str::Age",
227+
"role::str::Job role",
228+
"company::str::Company",
229+
]
230+
}
231+
232+
texts = [
233+
"John Smith, age 35, works as a software engineer at Google.",
234+
"Maria Garcia, 28, is a data scientist at Microsoft.",
235+
"Bob Johnson, age 42, is a product manager at Amazon.",
236+
"Alice Chen, 31, works as a UX designer at Apple.",
237+
"David Kim, age 45, is a VP of engineering at Meta.",
238+
"Sarah Wilson, 29, is a machine learning engineer at Nvidia.",
239+
]
240+
241+
print("\nObserving 6 extractions...")
242+
for text in texts:
243+
result = extractor.extract_json(text, schema)
244+
for p in result.get("people", []):
245+
print(f" {text[:50]:53s}{p}")
246+
247+
result = chef.learn_rules(run_evaluation=True)
248+
if result:
249+
rules, eval_result = result
250+
print(f"\n {len(rules)} rules, F1={eval_result.micro_f1:.0%}")
251+
252+
# Test on unseen data
253+
print("\n--- Held-out test ---")
254+
chef.stop_observing_gliner()
255+
256+
for text in [
257+
"Emily Brown, age 38, is a senior architect at Oracle.",
258+
"James Lee, 33, works as a DevOps engineer at Spotify.",
259+
]:
260+
gliner_result = extractor.extract_json(text, schema)
261+
rule_result = chef.extract({"text": text})
262+
print(f"\n {text}")
263+
print(f" GLiNER2: {gliner_result.get('people', [])}")
264+
print(f" Rules: {rule_result.get('people', [])}")
265+
266+
267+
if __name__ == "__main__":
268+
import sys
269+
270+
# Run specific demo or all
271+
demos = {
272+
"ner": gliner_ner,
273+
"classify": gliner2_classification,
274+
"extract": gliner2_extraction,
275+
}
276+
277+
if len(sys.argv) > 1 and sys.argv[1] in demos:
278+
demos[sys.argv[1]]()
279+
else:
280+
gliner_ner()
281+
gliner2_classification()
282+
gliner2_extraction()

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ agentic = [
3636
spacy = [
3737
"spacy>=3.0.0",
3838
]
39+
gliner = [
40+
"gliner>=1.0",
41+
]
42+
gliner2 = [
43+
"gliner2>=0.1",
44+
]
3945
grex = [
4046
"grex>=1.0",
4147
]

0 commit comments

Comments
 (0)