from lunar import Lunar
from lunar.evals import (
exactMatch,
exactMatchIgnoreCase,
contains,
containsIgnoreCase,
startsWith,
endsWith,
jsonValid,
notEmpty,
isNumeric,
)
client = Lunar()
# Test different scorers
dataset = [
{"input": "What is 2+2?", "expected": "4"},
{"input": "Generate valid JSON", "expected": None},
{"input": "Say hello", "expected": "Hello"},
]
result = client.evals.run(
name="Built-in Scorers Test",
dataset=dataset,
task=lambda x: client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": x}]
).choices[0].message.content,
scorers=[
exactMatch,
contains,
jsonValid,
notEmpty,
],
)
# Check results
for scorer_name, summary in result.summary.scores.items():
print(f"{scorer_name}: {summary.mean:.2f}")