Skip to content

Commit e05e465

Browse files
authored
feat: Add examples for using AI Judges (#14)
1 parent 552a4aa commit e05e465

4 files changed

Lines changed: 221 additions & 3 deletions

File tree

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,24 @@ This example uses `OpenAI`, `Bedrock`, and `Gemini` LangChain provider packages.
7171
export LAUNCHDARKLY_DOCUMENTATION_CONFIG_KEY="code-review-documentation"
7272
```
7373
1. On the command line, run `poetry run langgraph-multi-agent-example`.
74+
75+
#### Judge setup (judge evaluation)
76+
77+
These examples demonstrate how to use LaunchDarkly's judge functionality to evaluate AI responses for accuracy, relevance, and other metrics.
78+
79+
1. Install dependencies with `poetry install -E langchain` or `poetry install --all-extras`.
80+
1. Set up API keys for the provider you want to use (OpenAI, Bedrock, or Gemini).
81+
1. [Create an AI Config](https://launchdarkly.com/docs/home/ai-configs/create) for chat functionality.
82+
1. [Create a Judge Config](https://launchdarkly.com/docs/home/ai-configs/judges) for evaluation.
83+
1. Set the required environment variables:
84+
```bash
85+
export LAUNCHDARKLY_SDK_KEY="your-sdk-key"
86+
export LAUNCHDARKLY_AI_CONFIG_KEY="sample-ai-config"
87+
export LAUNCHDARKLY_AI_JUDGE_KEY="sample-ai-judge-accuracy"
88+
```
89+
Note: The default values are `sample-ai-config` for AI Config and `sample-ai-judge-accuracy` for Judge Config if not specified.
90+
91+
##### Available judge examples:
92+
93+
- **Chat with automatic judge evaluation** (`poetry run chat-judge-example`): Uses the chat functionality which automatically evaluates responses with any judges defined in the AI config.
94+
- **Direct judge evaluation** (`poetry run direct-judge-example`): Evaluates specific input/output pairs using a judge configuration directly.

examples/chat_judge_example.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import os
2+
import json
3+
import asyncio
4+
import ldclient
5+
from ldclient import Context
6+
from ldclient.config import Config
7+
from ldai import LDAIClient, AICompletionConfigDefault
8+
9+
# Set sdk_key to your LaunchDarkly SDK key.
10+
sdk_key = os.getenv('LAUNCHDARKLY_SDK_KEY')
11+
12+
# Set config_key to the AI Config key you want to evaluate.
13+
ai_config_key = os.getenv('LAUNCHDARKLY_AI_CONFIG_KEY', 'sample-ai-config')
14+
15+
16+
async def async_main():
17+
if not sdk_key:
18+
print("*** Please set the LAUNCHDARKLY_SDK_KEY env first")
19+
exit()
20+
21+
ldclient.set_config(Config(sdk_key))
22+
23+
if not ldclient.get().is_initialized():
24+
print("*** SDK failed to initialize. Please check your internet connection and SDK credential for any typo.")
25+
exit()
26+
27+
aiclient = LDAIClient(ldclient.get())
28+
print("*** SDK successfully initialized")
29+
30+
# Set up the evaluation context. This context should appear on your
31+
# LaunchDarkly contexts dashboard soon after you run the demo.
32+
context = (
33+
Context
34+
.builder('example-user-key')
35+
.kind('user')
36+
.name('Sandy')
37+
.build()
38+
)
39+
40+
try:
41+
# Example using the chat functionality which automates the judge evaluation
42+
default_value = AICompletionConfigDefault(
43+
enabled=False,
44+
)
45+
46+
chat = await aiclient.create_chat(ai_config_key, context, default_value, {
47+
'companyName': 'LaunchDarkly',
48+
})
49+
50+
if not chat:
51+
print(f"*** AI chat configuration is not enabled for key: {ai_config_key}")
52+
return
53+
54+
print("\n*** Starting chat with automatic judge evaluation:")
55+
user_input = 'How can LaunchDarkly help me?'
56+
print("User Input:", user_input)
57+
58+
# The invoke method will automatically evaluate the chat response with any judges defined in the AI config
59+
chat_response = await chat.invoke(user_input)
60+
print("Chat Response:", chat_response.message.content)
61+
62+
# Log judge evaluation results with full detail
63+
if chat_response.evaluations is not None and len(chat_response.evaluations) > 0:
64+
# Note: Judge evaluations run asynchronously and do not block your application.
65+
# Results are automatically sent to LaunchDarkly for AI config metrics.
66+
# You only need to await if you want to access the evaluation results in your code.
67+
print("\nNote: Awaiting judge results (optional - done here for demonstration only).")
68+
eval_results = await asyncio.gather(*chat_response.evaluations)
69+
70+
# Convert results, replacing None with a message
71+
results_to_display = [
72+
result.to_dict() if result is not None else "not evaluated"
73+
for result in eval_results
74+
]
75+
76+
print("Judge results:")
77+
print(json.dumps(results_to_display, indent=2, default=str))
78+
79+
if None in eval_results:
80+
print("\nNote: Some judge evaluations were skipped.")
81+
print("This typically happens when the sample rate doesn't require this evaluation, or due to a configuration issue.")
82+
print("Check application logs for more details.")
83+
else:
84+
print("\nNo judge evaluations were performed.")
85+
print("This typically happens when the sample rate doesn't require this evaluation, or due to a configuration issue.")
86+
print("Check application logs for more details.")
87+
88+
print("Success.")
89+
except Exception as err:
90+
print("Error:", err)
91+
finally:
92+
# Close the client to flush events and close the connection.
93+
ldclient.get().close()
94+
95+
96+
def main():
97+
"""Synchronous entry point for Poetry script."""
98+
asyncio.run(async_main())
99+
100+
101+
if __name__ == "__main__":
102+
main()

examples/direct_judge_example.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import os
2+
import json
3+
import asyncio
4+
import ldclient
5+
from ldclient import Context
6+
from ldclient.config import Config
7+
from ldai import LDAIClient, AICompletionConfigDefault
8+
9+
# Set sdk_key to your LaunchDarkly SDK key.
10+
sdk_key = os.getenv('LAUNCHDARKLY_SDK_KEY')
11+
12+
# Set judge_key to the Judge key you want to use.
13+
judge_key = os.getenv('LAUNCHDARKLY_AI_JUDGE_KEY', 'sample-ai-judge-accuracy')
14+
15+
16+
async def async_main():
17+
if not sdk_key:
18+
print("*** Please set the LAUNCHDARKLY_SDK_KEY env first")
19+
exit()
20+
21+
ldclient.set_config(Config(sdk_key))
22+
23+
if not ldclient.get().is_initialized():
24+
print("*** SDK failed to initialize. Please check your internet connection and SDK credential for any typo.")
25+
exit()
26+
27+
aiclient = LDAIClient(ldclient.get())
28+
print("*** SDK successfully initialized")
29+
30+
# Set up the evaluation context. This context should appear on your
31+
# LaunchDarkly contexts dashboard soon after you run the demo.
32+
context = (
33+
Context
34+
.builder('example-user-key')
35+
.kind('user')
36+
.name('Sandy')
37+
.build()
38+
)
39+
40+
try:
41+
# Example of using the judge functionality with direct input and output
42+
# Get AI judge configuration from LaunchDarkly
43+
judge_default_value = AICompletionConfigDefault(
44+
enabled=False,
45+
)
46+
judge = await aiclient.create_judge(judge_key, context, judge_default_value)
47+
48+
if not judge:
49+
print(f"*** AI judge configuration is not enabled for key: {judge_key}")
50+
return
51+
52+
print("\n*** Starting direct judge evaluation of input and output:")
53+
input_text = 'You are a helpful assistant for the company LaunchDarkly. How can you help me?'
54+
output_text = 'I can answer any question you have except for questions about the company LaunchDarkly.'
55+
56+
print("Input:", input_text)
57+
print("Output:", output_text)
58+
59+
judge_response = await judge.evaluate(input_text, output_text)
60+
61+
if judge_response is None:
62+
print("\nJudge evaluation was skipped.")
63+
print("This typically happens when the sample rate doesn't require this evaluation, or due to a configuration issue.")
64+
print("Check application logs for more details.")
65+
return
66+
67+
# Track the judge evaluation scores on the tracker for the aiConfig you are evaluating
68+
# Example:
69+
# aiConfig.tracker.track_eval_scores(judge_response.evals)
70+
71+
# Convert JudgeResponse to dict for display using to_dict()
72+
judge_response_dict = judge_response.to_dict()
73+
print("Judge Response:")
74+
print(json.dumps(judge_response_dict, indent=2, default=str))
75+
76+
print("Success.")
77+
except Exception as err:
78+
print("Error:", err)
79+
finally:
80+
# Close the client to flush events and close the connection.
81+
ldclient.get().close()
82+
83+
84+
def main():
85+
"""Synchronous entry point for Poetry script."""
86+
asyncio.run(async_main())
87+
88+
89+
if __name__ == "__main__":
90+
main()

pyproject.toml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,21 @@ gemini-example = 'examples.gemini_example:main'
1414
langchain-example = 'examples.langchain_example:main'
1515
langgraph-agent-example = 'examples.langgraph_agent_example:main'
1616
langgraph-multi-agent-example = 'examples.langgraph_multi_agent_example:main'
17+
chat-judge-example = 'examples.chat_judge_example:main'
18+
direct-judge-example = 'examples.direct_judge_example:main'
1719

1820
[tool.poetry.dependencies]
19-
python = "^3.9"
20-
launchdarkly-server-sdk-ai = ">=0.6.0,<1.0.0"
21+
python = "^3.10"
22+
launchdarkly-server-sdk-ai = "^0.14.0"
23+
launchdarkly-server-sdk-ai-langchain = "^0.3.0"
24+
launchdarkly-server-sdk-ai-openai = "^0.1.0"
2125

2226
boto3 = { version = ">=0.2.0", optional = true }
2327
openai = { version = ">=0.2.0", optional = true }
2428
google-genai = { version = "^1.30.0", optional = true }
2529
langchain = {version = "^0.3.0", optional = true}
2630
langchain-aws = {version = "^0.2.30", optional = true}
31+
langchain-core = {version = "^0.3.0", optional = true}
2732
langchain-google-genai = {version = "^2.1.9", optional = true}
2833
langchain-openai = {version = "^0.3.30", optional = true}
2934
langgraph = {version = "^0.2.0", optional = true}
@@ -32,7 +37,7 @@ langgraph = {version = "^0.2.0", optional = true}
3237
bedrock = ["boto3"]
3338
openai = ["openai"]
3439
gemini = ["google-genai"]
35-
langchain = ["langchain", "langchain-openai", "langchain-google-genai", "langchain-aws"]
40+
langchain = ["langchain", "langchain-core", "langchain-openai", "langchain-google-genai", "langchain-aws"]
3641
langgraph = ["langgraph", "typing-extensions"]
3742

3843
[build-system]

0 commit comments

Comments
 (0)