{"schema_version":"onlylabs.public_signal.v1","title":"Anthropic Writing: Constitutional Classifiers","description":"Anthropic writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/c9ead551-035c-40d8-8ee9-da8713bacc95","json_url":"https://onlylabs.fyi/signals/c9ead551-035c-40d8-8ee9-da8713bacc95/signal.json","generated_at":"2026-06-11T04:15:28.959286+00:00","org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/anthropic","dossier_json_url":"https://onlylabs.fyi/labs/anthropic/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/c9ead551-035c-40d8-8ee9-da8713bacc95","signal_json":"https://onlylabs.fyi/signals/c9ead551-035c-40d8-8ee9-da8713bacc95/signal.json","source":"https://www.anthropic.com/research/constitutional-classifiers","lab_dossier":"https://onlylabs.fyi/labs/anthropic","lab_dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis":"https://onlylabs.fyi/analysis/anthropic","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":null},"answer_pack":{"answer":"Anthropic published Constitutional Classifiers. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Constitutional Classifiers: Defending against universal jailbreaks \\ Anthropic Alignment Constitutional Classifiers: Defending against universal jailbreaks Feb 3, 2025 A.... onlylabs links this event to 1 captured evidence page and 6 related writing signals.","signal_desk":"talking","source_context":{"source_url":"https://www.anthropic.com/research/constitutional-classifiers","source_host":"anthropic.com","occurred_at":"2025-02-03T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","context":null},"context_markers":[{"label":"Lab","value":"Anthropic","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"anthropic.com","source":"source"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/constitutional-classifiers"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:15:28.959286+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/c9ead551-035c-40d8-8ee9-da8713bacc95/signal.json","dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/c9ead551-035c-40d8-8ee9-da8713bacc95/signal.json","required":true},{"label":"source","url":"https://www.anthropic.com/research/constitutional-classifiers","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/anthropic/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/anthropic/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Anthropic's writing signal \"Constitutional Classifiers\" for frontier lab strategy."},"semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Constitutional Classifiers","text":"Anthropic published Constitutional Classifiers."},{"subject":"Constitutional Classifiers","predicate":"is classified as","object":"writing signal","text":"Constitutional Classifiers is classified as writing signal."},{"subject":"Constitutional Classifiers","predicate":"belongs to","object":"talking desk","text":"Constitutional Classifiers belongs to talking desk."},{"subject":"Constitutional Classifiers","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Constitutional Classifiers has evidence coverage 1 captured evidence page."},{"subject":"Constitutional Classifiers","predicate":"has captured page count","object":"1","text":"Constitutional Classifiers has captured page count 1."},{"subject":"Constitutional Classifiers","predicate":"has readable page count","object":"1","text":"Constitutional Classifiers has readable page count 1."},{"subject":"Constitutional Classifiers","predicate":"has related signal count","object":"6","text":"Constitutional Classifiers has related signal count 6."},{"subject":"Constitutional Classifiers","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Constitutional Classifiers has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Constitutional Classifiers","predicate":"has source host","object":"anthropic.com","text":"Constitutional Classifiers has source host anthropic.com."},{"subject":"Constitutional Classifiers","predicate":"has lab","object":"Anthropic","text":"Constitutional Classifiers has lab Anthropic."},{"subject":"Constitutional Classifiers","predicate":"has signal desk","object":"talking","text":"Constitutional Classifiers has signal desk talking."},{"subject":"Constitutional Classifiers","predicate":"has source host","object":"anthropic.com","text":"Constitutional Classifiers has source host anthropic.com."},{"subject":"Constitutional Classifiers","predicate":"has watch term","object":"Eval methodology","text":"Constitutional Classifiers has watch term Eval methodology."},{"subject":"Constitutional Classifiers","predicate":"has watch term","object":"Infrastructure","text":"Constitutional Classifiers has watch term Infrastructure."},{"subject":"Constitutional Classifiers","predicate":"has watch term","object":"Safety and alignment","text":"Constitutional Classifiers has watch term Safety and alignment."}]},"intelligence":{"signal_desk":"talking","answer":"Anthropic published Constitutional Classifiers. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Constitutional Classifiers: Defending against universal jailbreaks \\ Anthropic Alignment Constitutional Classifiers: Defending against universal jailbreaks Feb 3, 2025 A.... onlylabs links this event to 1 captured evidence page and 6 related writing signals.","semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Constitutional Classifiers","text":"Anthropic published Constitutional Classifiers."},{"subject":"Constitutional Classifiers","predicate":"is classified as","object":"writing signal","text":"Constitutional Classifiers is classified as writing signal."},{"subject":"Constitutional Classifiers","predicate":"belongs to","object":"talking desk","text":"Constitutional Classifiers belongs to talking desk."},{"subject":"Constitutional Classifiers","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Constitutional Classifiers has evidence coverage 1 captured evidence page."}]},"signal":{"id":"c9ead551-035c-40d8-8ee9-da8713bacc95","url":"https://onlylabs.fyi/signals/c9ead551-035c-40d8-8ee9-da8713bacc95","json_url":"https://onlylabs.fyi/signals/c9ead551-035c-40d8-8ee9-da8713bacc95/signal.json","source_url":"https://www.anthropic.com/research/constitutional-classifiers","title":"Constitutional Classifiers","summary":"Anthropic published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2025-02-03T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/constitutional-classifiers"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"url":"https://www.anthropic.com/research/constitutional-classifiers","final_url":"https://www.anthropic.com/research/constitutional-classifiers","title":"Constitutional Classifiers","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:15:28.959286+00:00","bytes":177051,"raw_path":"bb6ac706c8cdd78f80c84126c807c9668b398bd700caf0b6a24268520d9875ed.html","content_hash":"c4c2215fb139dd6d04cb1d183272d330d06ae40b31a87735f172e18a856ef9b0","excerpt_chars":1200,"truncated":true,"excerpt":"Constitutional Classifiers: Defending against universal jailbreaks \\ Anthropic Alignment Constitutional Classifiers: Defending against universal jailbreaks Feb 3, 2025 A new paper from the Anthropic Safeguards Research Team describes a method that defends AI models against universal jailbreaks. A prototype version of the method was robust to thousands of hours of human red teaming for universal jailbreaks, albeit with high overrefusal rates and compute overhead. An updated version achieved similar robustness on synthetic evaluations, and did so with a 0.38% increase in refusal rates and moderate additional compute costs. Large language models have extensive safety training to prevent harmful outputs. For example, we train Claude to refuse to respond to user queries involving the production of biological or chemical weapons. Nevertheless, models are still vulnerable to jailbreaks : inputs designed to bypass their safety guardrails and force them to produce harmful responses. Some jailbreaks flood the model with very long prompts ; others modify the style of the input , such as uSiNg uNuSuAl cApItALiZaTiOn. Historically, jailbreaks have proved difficult to detect and block: these..."},"evidence_pages":[{"url":"https://www.anthropic.com/research/constitutional-classifiers","final_url":"https://www.anthropic.com/research/constitutional-classifiers","title":"Constitutional Classifiers","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:15:28.959286+00:00","bytes":177051,"raw_path":"bb6ac706c8cdd78f80c84126c807c9668b398bd700caf0b6a24268520d9875ed.html","content_hash":"c4c2215fb139dd6d04cb1d183272d330d06ae40b31a87735f172e18a856ef9b0","excerpt_chars":1200,"truncated":true,"excerpt":"Constitutional Classifiers: Defending against universal jailbreaks \\ Anthropic Alignment Constitutional Classifiers: Defending against universal jailbreaks Feb 3, 2025 A new paper from the Anthropic Safeguards Research Team describes a method that defends AI models against universal jailbreaks. A prototype version of the method was robust to thousands of hours of human red teaming for universal jailbreaks, albeit with high overrefusal rates and compute overhead. An updated version achieved similar robustness on synthetic evaluations, and did so with a 0.38% increase in refusal rates and moderate additional compute costs. Large language models have extensive safety training to prevent harmful outputs. For example, we train Claude to refuse to respond to user queries involving the production of biological or chemical weapons. Nevertheless, models are still vulnerable to jailbreaks : inputs designed to bypass their safety guardrails and force them to produce harmful responses. Some jailbreaks flood the model with very long prompts ; others modify the style of the input , such as uSiNg uNuSuAl cApItALiZaTiOn. Historically, jailbreaks have proved difficult to detect and block: these..."}],"related_signals":[{"id":"6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","url":"https://onlylabs.fyi/signals/6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","source_url":"https://www.anthropic.com/research/agents-in-biology","title":"Agents In Biology","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-10T15:16:01+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"2648db51-9d6a-42a9-aece-a0ca5f9ce64f","url":"https://onlylabs.fyi/signals/2648db51-9d6a-42a9-aece-a0ca5f9ce64f","source_url":"https://www.anthropic.com/news/claude-fable-5-mythos-5","title":"Claude Fable 5 Mythos 5","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-09T20:27:50+00:00","first_seen_at":"2026-06-10T07:01:05.666054+00:00","date_source":"sitemap.lastmod"},{"id":"8475487f-45b4-4689-9bc5-8e4c6ca0457d","url":"https://onlylabs.fyi/signals/8475487f-45b4-4689-9bc5-8e4c6ca0457d","source_url":"https://www.anthropic.com/engineering/how-we-contain-claude","title":"How We Contain Claude","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-06T00:28:16+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","url":"https://onlylabs.fyi/signals/e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","source_url":"https://www.anthropic.com/research/making-claude-a-chemist","title":"Making Claude A Chemist","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T20:13:40+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"cc62deba-9682-4751-aa6b-81c3bd7122a0","url":"https://onlylabs.fyi/signals/cc62deba-9682-4751-aa6b-81c3bd7122a0","source_url":"https://www.anthropic.com/research/measuring-agent-autonomy","title":"Measuring Agent Autonomy","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:49:18+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"93da14fd-7141-4e17-abd6-1c8d52435c70","url":"https://onlylabs.fyi/signals/93da14fd-7141-4e17-abd6-1c8d52435c70","source_url":"https://www.anthropic.com/research/values-wild","title":"Values Wild","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:38:54+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"}]}