Python API Reference
Use Khaos programmatically for advanced integration, custom workflows, and automation. The Python API provides full access to evaluation, comparison, and cloud sync capabilities.
Installation
Terminal
pip install khaos
# Verify installation
python -c "import khaos; print(khaos.__version__)"Core Components
| Module | Description |
|---|---|
khaos.khaosagent | Decorator for agent handlers |
khaos.run | Programmatic evaluation execution |
khaos.compare | Run comparison and diff generation |
khaos.cloud | Cloud sync and authentication |
khaos.scenarios | Scenario loading and management |
khaos.packs | Evaluation pack configuration |
@khaosagent Decorator
The primary integration point for your agent code.
Python
from khaos import khaosagent
@khaosagent(
name="my-agent", # Required: unique agent identifier
version="1.0.0", # Required: semantic version
framework="openai", # Optional: framework hint
description="My agent", # Optional: human-readable description
tags=["production"] # Optional: categorization tags
)
def handle(message: dict) -> dict:
"""
Handler receives standardized messages and returns responses.
Args:
message: {
"type": "user_message",
"payload": {"text": "...", "metadata": {}},
"context": {"run_id": "...", "scenario": "...", "phase": "..."}
}
Returns:
{"text": "response", "metadata": {...}}
"""
prompt = (message.get("payload") or {}).get("text", "")
# Your agent logic here
return {"text": f"Response to: {prompt}"}See @khaosagent Decorator for complete documentation.
Programmatic Evaluation
Run evaluations programmatically instead of using the CLI:
Python
from khaos import run, RunConfig
# Basic run with pack
result = run(
agent="my-agent",
pack="quickstart"
)
# Run with custom configuration
config = RunConfig(
pack="full-eval",
security_enabled=True,
timeout=300,
seed=12345, # For reproducibility
sync=True # Upload to cloud
)
result = run(
agent="my-agent",
config=config
)
# Access results
print(f"Overall Score: {result.overall_score}")
print(f"Security Score: {result.security_score}")
print(f"Resilience Score: {result.resilience_score}")
print(f"Run ID: {result.run_id}")RunConfig Options
| Option | Type | Default | Description |
|---|---|---|---|
pack | str | "quickstart" | Evaluation pack to use |
scenario | str | None | None | Specific scenario instead of pack |
security_enabled | bool | True | Enable security testing |
timeout | int | 120 | Timeout in seconds |
seed | int | None | None | Random seed for reproducibility |
sync | bool | False | Upload results to cloud |
inputs | list | None | None | Custom input prompts |
env | dict | None | None | Environment variables |
Run Results
The RunResult object provides access to all evaluation data:
Python
from khaos import run
result = run(agent="my-agent", pack="quickstart")
# Scores
result.overall_score # 0-100
result.security_score # 0-100
result.resilience_score # 0-100
# Metadata
result.run_id # Unique run identifier
result.seed # Random seed used
result.config_hash # Configuration hash for comparison
# Security details
result.security.attacks_tested
result.security.attacks_blocked
result.security.vulnerabilities # List of findings
# Resilience details
result.resilience.recovery_rate
result.resilience.faults_injected
result.resilience.faults_survived
# Baseline metrics
result.baseline.task_completion_rate
result.baseline.latency_p95_ms
result.baseline.cost_usd
result.baseline.total_tokens
# Export to JSON
result.to_json()
result.to_dict()Comparing Runs
Compare two runs programmatically:
Python
from khaos import compare, CompareConfig
# Basic comparison
comparison = compare(
run_id_a="khaos-pack-20250101-abc123",
run_id_b="khaos-pack-20250102-def456"
)
# Access comparison data
print(f"Cost Delta: {comparison.cost_delta_percent}%")
print(f"Latency Delta: {comparison.latency_delta_percent}%")
print(f"Security Delta: {comparison.security_delta}")
print(f"Resilience Delta: {comparison.resilience_delta}")
# Output comparison
for diff in comparison.output_diffs:
print(f"Case: {diff.case_id}")
print(f" Old: {diff.output_a}")
print(f" New: {diff.output_b}")
print(f" Changed: {diff.is_divergent}")
# Regression detection
if comparison.has_regression:
print("WARNING: Regression detected!")
for regression in comparison.regressions:
print(f" - {regression.metric}: {regression.description}")Cloud Sync
Programmatic cloud authentication and sync:
Python
from khaos.cloud import CloudClient, authenticate
# Authenticate (interactive device flow)
auth = authenticate()
# Or use token directly
client = CloudClient(token="your-api-token")
# Sync a specific run
client.sync_run("khaos-pack-20250101-abc123")
# Check sync status
status = client.get_sync_status()
print(f"Pending: {status.pending_count}")
print(f"Synced: {status.synced_count}")
# Get run from cloud
run_data = client.get_run("khaos-pack-20250101-abc123")
# List project runs
runs = client.list_runs(
project="myteam/my-project",
limit=10
)Custom Scenarios
Load and use custom scenarios programmatically:
Python
from khaos.scenarios import Scenario, Fault, Goal, Assertion
# Load from YAML
scenario = Scenario.from_yaml("my-scenario.yaml")
# Or build programmatically
scenario = Scenario(
identifier="custom-resilience-test",
summary="Test agent under network stress",
tags=["network", "resilience"],
faults=[
Fault(
type="http_latency",
config={"delay_ms": 500, "probability": 0.5}
),
Fault(
type="llm_rate_limit",
config={"probability": 0.2}
)
],
goals=[
Goal(
name="Maintains functionality",
weight=1.0,
assertions=[
Assertion(type="exists", target="response")
]
)
],
security_tests_enabled=True
)
# Run with custom scenario
from khaos import run, RunConfig
result = run(
agent="my-agent",
config=RunConfig(scenario=scenario)
)CI/CD Integration
Use the API in CI/CD scripts:
Python
#!/usr/bin/env python3
"""ci_test.py - CI/CD integration script"""
import sys
from khaos import run, RunConfig
def main():
result = run(
agent="my-agent",
config=RunConfig(
pack="quickstart",
seed=42, # Reproducible
sync=True
)
)
# Check thresholds
security_threshold = 80
resilience_threshold = 70
exit_code = 0
if result.security_score < security_threshold:
print(f"FAIL: Security score {result.security_score} < {security_threshold}")
exit_code |= 1
if result.resilience_score < resilience_threshold:
print(f"FAIL: Resilience score {result.resilience_score} < {resilience_threshold}")
exit_code |= 2
if exit_code == 0:
print(f"PASS: All thresholds met")
print(f" Security: {result.security_score}")
print(f" Resilience: {result.resilience_score}")
# Output JUnit XML
result.to_junit("results.xml")
sys.exit(exit_code)
if __name__ == "__main__":
main()Event Hooks
Register callbacks for evaluation events:
Python
from khaos import run, RunConfig
from khaos.events import EventHandler
class MyHandler(EventHandler):
def on_case_start(self, case_id: str, phase: str):
print(f"Starting case: {case_id} ({phase})")
def on_case_complete(self, case_id: str, result: dict):
print(f"Completed: {case_id} - {result['status']}")
def on_security_finding(self, finding: dict):
print(f"SECURITY: {finding['attack_type']} - {finding['severity']}")
def on_fault_injected(self, fault: dict):
print(f"FAULT: {fault['type']} at {fault['timestamp']}")
result = run(
agent="my-agent",
config=RunConfig(pack="quickstart"),
event_handler=MyHandler()
)Error Handling
Handle common errors gracefully:
Python
from khaos import run, RunConfig
from khaos.exceptions import (
KhaosError,
AgentNotFoundError,
TimeoutError,
AuthenticationError,
QuotaExceededError
)
try:
result = run(
agent="my-agent",
config=RunConfig(pack="quickstart", sync=True)
)
except AgentNotFoundError as e:
print(f"Agent not found: {e.agent_name}")
print("Run 'khaos discover' to register agents")
except TimeoutError as e:
print(f"Evaluation timed out after {e.timeout}s")
except AuthenticationError:
print("Not authenticated. Run 'khaos sync --login'")
except QuotaExceededError as e:
print(f"Quota exceeded: {e.quota_type}")
print(f"Current: {e.current}, Limit: {e.limit}")
except KhaosError as e:
print(f"Khaos error: {e}")Type Hints
Khaos is fully typed for IDE support and type checking:
Python
from khaos import khaosagent, run, RunConfig
from khaos.types import Message, Response, RunResult
@khaosagent(name="typed-agent", version="1.0.0")
def handle(message: Message) -> Response:
prompt: str = (message.get("payload") or {}).get("text", "")
return {"text": f"Response: {prompt}"}
config: RunConfig = RunConfig(pack="quickstart")
result: RunResult = run(agent="typed-agent", config=config)
# Full type hints for result attributes
score: int = result.overall_score
run_id: str = result.run_id