Python API Reference

Use Khaos programmatically for advanced integration, custom workflows, and automation. The Python API provides full access to evaluation, comparison, and cloud sync capabilities.

Installation

Terminal
pip install khaos

# Verify installation
python -c "import khaos; print(khaos.__version__)"

Core Components

ModuleDescription
khaos.khaosagentDecorator for agent handlers
khaos.runProgrammatic evaluation execution
khaos.compareRun comparison and diff generation
khaos.cloudCloud sync and authentication
khaos.scenariosScenario loading and management
khaos.packsEvaluation pack configuration

@khaosagent Decorator

The primary integration point for your agent code.

Python
from khaos import khaosagent

@khaosagent(
    name="my-agent",           # Required: unique agent identifier
    version="1.0.0",           # Required: semantic version
    framework="openai",        # Optional: framework hint
    description="My agent",    # Optional: human-readable description
    tags=["production"]        # Optional: categorization tags
)
def handle(message: dict) -> dict:
    """
    Handler receives standardized messages and returns responses.

    Args:
        message: {
            "type": "user_message",
            "payload": {"text": "...", "metadata": {}},
            "context": {"run_id": "...", "scenario": "...", "phase": "..."}
        }

    Returns:
        {"text": "response", "metadata": {...}}
    """
    prompt = (message.get("payload") or {}).get("text", "")
    # Your agent logic here
    return {"text": f"Response to: {prompt}"}

See @khaosagent Decorator for complete documentation.

Programmatic Evaluation

Run evaluations programmatically instead of using the CLI:

Python
from khaos import run, RunConfig

# Basic run with pack
result = run(
    agent="my-agent",
    pack="quickstart"
)

# Run with custom configuration
config = RunConfig(
    pack="full-eval",
    security_enabled=True,
    timeout=300,
    seed=12345,  # For reproducibility
    sync=True    # Upload to cloud
)

result = run(
    agent="my-agent",
    config=config
)

# Access results
print(f"Overall Score: {result.overall_score}")
print(f"Security Score: {result.security_score}")
print(f"Resilience Score: {result.resilience_score}")
print(f"Run ID: {result.run_id}")

RunConfig Options

OptionTypeDefaultDescription
packstr"quickstart"Evaluation pack to use
scenariostr | NoneNoneSpecific scenario instead of pack
security_enabledboolTrueEnable security testing
timeoutint120Timeout in seconds
seedint | NoneNoneRandom seed for reproducibility
syncboolFalseUpload results to cloud
inputslist | NoneNoneCustom input prompts
envdict | NoneNoneEnvironment variables

Run Results

The RunResult object provides access to all evaluation data:

Python
from khaos import run

result = run(agent="my-agent", pack="quickstart")

# Scores
result.overall_score      # 0-100
result.security_score     # 0-100
result.resilience_score   # 0-100

# Metadata
result.run_id             # Unique run identifier
result.seed               # Random seed used
result.config_hash        # Configuration hash for comparison

# Security details
result.security.attacks_tested
result.security.attacks_blocked
result.security.vulnerabilities  # List of findings

# Resilience details
result.resilience.recovery_rate
result.resilience.faults_injected
result.resilience.faults_survived

# Baseline metrics
result.baseline.task_completion_rate
result.baseline.latency_p95_ms
result.baseline.cost_usd
result.baseline.total_tokens

# Export to JSON
result.to_json()
result.to_dict()

Comparing Runs

Compare two runs programmatically:

Python
from khaos import compare, CompareConfig

# Basic comparison
comparison = compare(
    run_id_a="khaos-pack-20250101-abc123",
    run_id_b="khaos-pack-20250102-def456"
)

# Access comparison data
print(f"Cost Delta: {comparison.cost_delta_percent}%")
print(f"Latency Delta: {comparison.latency_delta_percent}%")
print(f"Security Delta: {comparison.security_delta}")
print(f"Resilience Delta: {comparison.resilience_delta}")

# Output comparison
for diff in comparison.output_diffs:
    print(f"Case: {diff.case_id}")
    print(f"  Old: {diff.output_a}")
    print(f"  New: {diff.output_b}")
    print(f"  Changed: {diff.is_divergent}")

# Regression detection
if comparison.has_regression:
    print("WARNING: Regression detected!")
    for regression in comparison.regressions:
        print(f"  - {regression.metric}: {regression.description}")

Cloud Sync

Programmatic cloud authentication and sync:

Python
from khaos.cloud import CloudClient, authenticate

# Authenticate (interactive device flow)
auth = authenticate()

# Or use token directly
client = CloudClient(token="your-api-token")

# Sync a specific run
client.sync_run("khaos-pack-20250101-abc123")

# Check sync status
status = client.get_sync_status()
print(f"Pending: {status.pending_count}")
print(f"Synced: {status.synced_count}")

# Get run from cloud
run_data = client.get_run("khaos-pack-20250101-abc123")

# List project runs
runs = client.list_runs(
    project="myteam/my-project",
    limit=10
)

Custom Scenarios

Load and use custom scenarios programmatically:

Python
from khaos.scenarios import Scenario, Fault, Goal, Assertion

# Load from YAML
scenario = Scenario.from_yaml("my-scenario.yaml")

# Or build programmatically
scenario = Scenario(
    identifier="custom-resilience-test",
    summary="Test agent under network stress",
    tags=["network", "resilience"],
    faults=[
        Fault(
            type="http_latency",
            config={"delay_ms": 500, "probability": 0.5}
        ),
        Fault(
            type="llm_rate_limit",
            config={"probability": 0.2}
        )
    ],
    goals=[
        Goal(
            name="Maintains functionality",
            weight=1.0,
            assertions=[
                Assertion(type="exists", target="response")
            ]
        )
    ],
    security_tests_enabled=True
)

# Run with custom scenario
from khaos import run, RunConfig

result = run(
    agent="my-agent",
    config=RunConfig(scenario=scenario)
)

CI/CD Integration

Use the API in CI/CD scripts:

Python
#!/usr/bin/env python3
"""ci_test.py - CI/CD integration script"""
import sys
from khaos import run, RunConfig

def main():
    result = run(
        agent="my-agent",
        config=RunConfig(
            pack="quickstart",
            seed=42,  # Reproducible
            sync=True
        )
    )

    # Check thresholds
    security_threshold = 80
    resilience_threshold = 70

    exit_code = 0

    if result.security_score < security_threshold:
        print(f"FAIL: Security score {result.security_score} < {security_threshold}")
        exit_code |= 1

    if result.resilience_score < resilience_threshold:
        print(f"FAIL: Resilience score {result.resilience_score} < {resilience_threshold}")
        exit_code |= 2

    if exit_code == 0:
        print(f"PASS: All thresholds met")
        print(f"  Security: {result.security_score}")
        print(f"  Resilience: {result.resilience_score}")

    # Output JUnit XML
    result.to_junit("results.xml")

    sys.exit(exit_code)

if __name__ == "__main__":
    main()

Event Hooks

Register callbacks for evaluation events:

Python
from khaos import run, RunConfig
from khaos.events import EventHandler

class MyHandler(EventHandler):
    def on_case_start(self, case_id: str, phase: str):
        print(f"Starting case: {case_id} ({phase})")

    def on_case_complete(self, case_id: str, result: dict):
        print(f"Completed: {case_id} - {result['status']}")

    def on_security_finding(self, finding: dict):
        print(f"SECURITY: {finding['attack_type']} - {finding['severity']}")

    def on_fault_injected(self, fault: dict):
        print(f"FAULT: {fault['type']} at {fault['timestamp']}")

result = run(
    agent="my-agent",
    config=RunConfig(pack="quickstart"),
    event_handler=MyHandler()
)

Error Handling

Handle common errors gracefully:

Python
from khaos import run, RunConfig
from khaos.exceptions import (
    KhaosError,
    AgentNotFoundError,
    TimeoutError,
    AuthenticationError,
    QuotaExceededError
)

try:
    result = run(
        agent="my-agent",
        config=RunConfig(pack="quickstart", sync=True)
    )
except AgentNotFoundError as e:
    print(f"Agent not found: {e.agent_name}")
    print("Run 'khaos discover' to register agents")
except TimeoutError as e:
    print(f"Evaluation timed out after {e.timeout}s")
except AuthenticationError:
    print("Not authenticated. Run 'khaos sync --login'")
except QuotaExceededError as e:
    print(f"Quota exceeded: {e.quota_type}")
    print(f"Current: {e.current}, Limit: {e.limit}")
except KhaosError as e:
    print(f"Khaos error: {e}")

Type Hints

Khaos is fully typed for IDE support and type checking:

Python
from khaos import khaosagent, run, RunConfig
from khaos.types import Message, Response, RunResult

@khaosagent(name="typed-agent", version="1.0.0")
def handle(message: Message) -> Response:
    prompt: str = (message.get("payload") or {}).get("text", "")
    return {"text": f"Response: {prompt}"}

config: RunConfig = RunConfig(pack="quickstart")
result: RunResult = run(agent="typed-agent", config=config)

# Full type hints for result attributes
score: int = result.overall_score
run_id: str = result.run_id