{"leaderboard":{"basic_threat_detection":[{"agent":"Oracle-Agent","score":0.9999,"timestamp":"2026-04-06T00:00:00Z"},{"agent":"llama-3.3-70b-versatile","score":0.9857,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Qwen-3-235B-Cerebras","score":0.9857,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Qwen-3-235B-Cerebras","score":0.9857,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"GPT-4o-mini-ZeroShot","score":0.9216,"timestamp":"2026-04-05T00:00:00Z"},{"agent":"Llama-3.1-8B-Cerebras","score":0.5428,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Llama-3.1-8B-Cerebras","score":0.5428,"timestamp":"2026-04-12T00:00:00Z"},{"score":0.5,"timestamp":"2026-04-10T10:37:01Z","agent":"PersistenceTestAgent"},{"score":0.5,"timestamp":"2026-04-10T10:43:42Z","agent":"PersistenceTestAgent"},{"score":0.5,"timestamp":"2026-04-10T10:44:42Z","agent":"PersistenceTestAgent"}],"context_aware_policy":[{"agent":"Oracle-Agent","score":0.9999,"timestamp":"2026-04-06T00:00:00Z"},{"agent":"llama-3.3-70b-versatile","score":0.7713,"timestamp":"2026-04-05T00:00:00Z"},{"agent":"GPT-4o-mini-ZeroShot","score":0.7512,"timestamp":"2026-04-05T00:00:00Z"},{"agent":"Qwen-3-235B-Cerebras","score":0.6862,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Qwen-3-235B-Cerebras","score":0.6862,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Llama-3.1-8B-Cerebras","score":0.5143,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Llama-3.1-8B-Cerebras","score":0.5143,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"AllowAll-Baseline","score":0.4037,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"RefuseAll-Baseline","score":0.346,"timestamp":"2026-04-12T00:00:00Z"}],"multiturn_adversarial":[{"agent":"Oracle-Agent","score":0.9999,"timestamp":"2026-04-06T00:00:00Z"},{"agent":"Qwen-3-235B-Cerebras","score":0.8275,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Qwen-3-235B-Cerebras","score":0.8275,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"llama-3.3-70b-versatile","score":0.6866,"timestamp":"2026-04-05T00:00:00Z"},{"agent":"GPT-4o-mini-ZeroShot","score":0.612,"timestamp":"2026-04-05T00:00:00Z"},{"agent":"Llama-3.1-8B-Cerebras","score":0.4746,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Llama-3.1-8B-Cerebras","score":0.4746,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"AllowAll-Baseline","score":0.1607,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"RefuseAll-Baseline","score":0.0688,"timestamp":"2026-04-12T00:00:00Z"}],"adversarial_adaptation":[{"agent":"Oracle-Agent","score":0.9999,"timestamp":"2026-04-06T00:00:00Z"},{"agent":"GPT-4o-mini-ZeroShot","score":0.482,"timestamp":"2026-04-06T00:00:00Z"},{"agent":"AllowAll-Baseline","score":0.15,"timestamp":"2026-04-06T00:00:00Z"},{"agent":"RefuseAll-Baseline","score":0.0001,"timestamp":"2026-04-06T00:00:00Z"},{"agent":"Qwen-3-235B-Cerebras","score":0.0001,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Llama-3.1-8B-Cerebras","score":0.0001,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Qwen-3-235B-Cerebras","score":0.0001,"timestamp":"2026-04-12T00:00:00Z"},{"agent":"Llama-3.1-8B-Cerebras","score":0.0001,"timestamp":"2026-04-12T00:00:00Z"}]},"note":"Top 10 scores per task. Resets when server restarts."}