from lunar import Lunar
from lunar.evals import exactMatch, contains
client = Lunar()
result = client.evals.run(
name="QA Test",
dataset=[
{"input": "What is 2+2?", "expected": "4"},
{"input": "Capital of France?", "expected": "Paris"},
],
task=lambda x: client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": x}]
).choices[0].message.content,
scorers=[exactMatch, contains],
)
# View results
print(f"Success rate: {result.summary.success_rate:.1%}")
for scorer_name, summary in result.summary.scores.items():
print(f"{scorer_name}: mean={summary.mean:.2f}, std={summary.std_dev:.2f}")