-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathcreate_judge_example.py
More file actions
113 lines (91 loc) · 4.04 KB
/
create_judge_example.py
File metadata and controls
113 lines (91 loc) · 4.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import logging
from dotenv import load_dotenv
import asyncio
import ldclient
from ldclient import Context
from ldclient.config import Config
from ldai import LDAIClient, AIJudgeConfigDefault
from ldobserve import ObservabilityConfig, ObservabilityPlugin
load_dotenv()
logging.basicConfig()
logging.getLogger('ldclient').setLevel(logging.WARNING)
# Set sdk_key to your LaunchDarkly SDK key.
sdk_key = os.getenv('LAUNCHDARKLY_SDK_KEY')
# Set judge_key to the Judge key you want to use.
judge_key = os.getenv('LAUNCHDARKLY_JUDGE_KEY', 'sample-judge')
async def async_main():
if not sdk_key:
print("*** Please set the LAUNCHDARKLY_SDK_KEY env first")
exit()
ldclient.set_config(Config(sdk_key, plugins=[
ObservabilityPlugin(ObservabilityConfig(
service_name='hello-python-ai-judge',
))
]))
if not ldclient.get().is_initialized():
print("*** SDK failed to initialize. Please check your internet connection and SDK credential for any typo.")
exit()
aiclient = LDAIClient(ldclient.get())
print("*** SDK successfully initialized")
# Set up the evaluation context. This context should appear on your
# LaunchDarkly contexts dashboard soon after you run the demo.
context = (
Context
.builder('example-user-key')
.kind('user')
.name('Sandy')
.build()
)
try:
# Pass a default for improved resiliency when the AI config is unavailable
# or LaunchDarkly is unreachable; omit for a disabled default.
# Example (enabled default; judge default has three messages):
# default = AIJudgeConfigDefault(
# enabled=True,
# model={'name': 'gpt-4'},
# provider={'name': 'openai'},
# messages=[
# {'role': 'system', 'content': 'Your judge criteria here.'},
# {'role': 'assistant', 'content': 'MESSAGE HISTORY: {{message_history}}'},
# {'role': 'user', 'content': 'RESPONSE TO EVALUATE: {{response_to_evaluate}}'},
# ],
# )
# judge = aiclient.create_judge(judge_key, context, default)
judge = aiclient.create_judge(judge_key, context)
if not judge:
print(f"AI config '{judge_key}' is disabled. Verify the config key exists in your LaunchDarkly project and is not targeting a disabled variation.")
return
input_text = 'You are a helpful assistant for the company LaunchDarkly. How can you help me?'
output_text = 'I can answer any question you have except for questions about the company LaunchDarkly.'
print(f'\nEvaluating a sample input/output pair with the judge:')
print(f' Sample input: "{input_text}"')
print(f' Sample output: "{output_text}"')
print("Waiting for judge evaluation...")
judge_result = await judge.evaluate(input_text, output_text)
# If the output you're judging came from another AI Config, track the
# result on that config's tracker so the metric is attributed to the
# right config:
# ai_config.create_tracker().track_judge_result(judge_result)
print("\nJudge result:")
print(f"- judge_config_key: {judge_key}")
print(f" sampled: {judge_result.sampled}")
if judge_result.sampled:
print(f" success: {judge_result.success}")
print(f" error_message: {judge_result.error_message}")
print(f" metric_key: {judge_result.metric_key}")
print(f" score: {judge_result.score}")
print(f" reasoning: {judge_result.reasoning}")
print("\nDone!")
except Exception as err:
# In production, sanitize before logging — provider errors may include credentials.
print("Error:", err)
finally:
# Flush pending events and close the client.
ldclient.get().flush()
ldclient.get().close()
def main():
"""Synchronous entry point for Poetry script."""
asyncio.run(async_main())
if __name__ == "__main__":
main()