{"schema_version":"onlylabs.public_signal.v1","title":"Anthropic Writing: Discovering Language Model Behaviors With Model Written Evaluations","description":"Anthropic writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/475b44fc-ba5d-4483-bc99-2fb91f1a3b6e","json_url":"https://onlylabs.fyi/signals/475b44fc-ba5d-4483-bc99-2fb91f1a3b6e/signal.json","generated_at":"2026-06-11T04:18:06.004015+00:00","org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/anthropic","dossier_json_url":"https://onlylabs.fyi/labs/anthropic/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/475b44fc-ba5d-4483-bc99-2fb91f1a3b6e","signal_json":"https://onlylabs.fyi/signals/475b44fc-ba5d-4483-bc99-2fb91f1a3b6e/signal.json","source":"https://www.anthropic.com/research/discovering-language-model-behaviors-with-model-written-evaluations","lab_dossier":"https://onlylabs.fyi/labs/anthropic","lab_dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis":"https://onlylabs.fyi/analysis/anthropic","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"Anthropic published Discovering Language Model Behaviors With Model Written Evaluations. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Discovering Language Model Behaviors with Model-Written Evaluations \\ Anthropic Alignment Research Discovering Language Model Behaviors with Model-Written Evaluations.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://www.anthropic.com/research/discovering-language-model-behaviors-with-model-written-evaluations","source_host":"anthropic.com","occurred_at":"2022-12-19T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","context":null},"context_markers":[{"label":"Lab","value":"Anthropic","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"anthropic.com","source":"source"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"eval","source":"radar"},{"label":"Matched term","value":"evaluation","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/discovering-language-model-behaviors-with-model-written-evaluations"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:18:06.004015+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["eval","evaluation"],"score":15,"reason":"Anthropic has a writing signal matching evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/475b44fc-ba5d-4483-bc99-2fb91f1a3b6e/signal.json","dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Evals and quality?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/475b44fc-ba5d-4483-bc99-2fb91f1a3b6e/signal.json","required":true},{"label":"source","url":"https://www.anthropic.com/research/discovering-language-model-behaviors-with-model-written-evaluations","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/anthropic/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/anthropic/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Anthropic's writing signal \"Discovering Language Model Behaviors With Model Written Evaluations\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Discovering Language Model Behaviors With Model Written Evaluations","text":"Anthropic published Discovering Language Model Behaviors With Model Written Evaluations."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"is classified as","object":"writing signal","text":"Discovering Language Model Behaviors With Model Written Evaluations is classified as writing signal."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"belongs to","object":"talking desk","text":"Discovering Language Model Behaviors With Model Written Evaluations belongs to talking desk."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Discovering Language Model Behaviors With Model Written Evaluations has evidence coverage 1 captured evidence page."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"matches data-business lanes","object":"Evals and quality","text":"Discovering Language Model Behaviors With Model Written Evaluations matches data-business lanes Evals and quality."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has captured page count","object":"1","text":"Discovering Language Model Behaviors With Model Written Evaluations has captured page count 1."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has readable page count","object":"1","text":"Discovering Language Model Behaviors With Model Written Evaluations has readable page count 1."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has related signal count","object":"6","text":"Discovering Language Model Behaviors With Model Written Evaluations has related signal count 6."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Discovering Language Model Behaviors With Model Written Evaluations has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has source host","object":"anthropic.com","text":"Discovering Language Model Behaviors With Model Written Evaluations has source host anthropic.com."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has lab","object":"Anthropic","text":"Discovering Language Model Behaviors With Model Written Evaluations has lab Anthropic."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has signal desk","object":"talking","text":"Discovering Language Model Behaviors With Model Written Evaluations has signal desk talking."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has source host","object":"anthropic.com","text":"Discovering Language Model Behaviors With Model Written Evaluations has source host anthropic.com."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has radar lane","object":"Evals and quality","text":"Discovering Language Model Behaviors With Model Written Evaluations has radar lane Evals and quality."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has matched term","object":"eval","text":"Discovering Language Model Behaviors With Model Written Evaluations has matched term eval."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has matched term","object":"evaluation","text":"Discovering Language Model Behaviors With Model Written Evaluations has matched term evaluation."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has watch term","object":"Eval methodology","text":"Discovering Language Model Behaviors With Model Written Evaluations has watch term Eval methodology."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has watch term","object":"Data pipeline","text":"Discovering Language Model Behaviors With Model Written Evaluations has watch term Data pipeline."}]},"intelligence":{"signal_desk":"talking","answer":"Anthropic published Discovering Language Model Behaviors With Model Written Evaluations. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Discovering Language Model Behaviors with Model-Written Evaluations \\ Anthropic Alignment Research Discovering Language Model Behaviors with Model-Written Evaluations.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Discovering Language Model Behaviors With Model Written Evaluations","text":"Anthropic published Discovering Language Model Behaviors With Model Written Evaluations."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"is classified as","object":"writing signal","text":"Discovering Language Model Behaviors With Model Written Evaluations is classified as writing signal."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"belongs to","object":"talking desk","text":"Discovering Language Model Behaviors With Model Written Evaluations belongs to talking desk."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Discovering Language Model Behaviors With Model Written Evaluations has evidence coverage 1 captured evidence page."},{"subject":"Discovering Language Model Behaviors With Model Written Evaluations","predicate":"matches data-business lanes","object":"Evals and quality","text":"Discovering Language Model Behaviors With Model Written Evaluations matches data-business lanes Evals and quality."}]},"signal":{"id":"475b44fc-ba5d-4483-bc99-2fb91f1a3b6e","url":"https://onlylabs.fyi/signals/475b44fc-ba5d-4483-bc99-2fb91f1a3b6e","json_url":"https://onlylabs.fyi/signals/475b44fc-ba5d-4483-bc99-2fb91f1a3b6e/signal.json","source_url":"https://www.anthropic.com/research/discovering-language-model-behaviors-with-model-written-evaluations","title":"Discovering Language Model Behaviors With Model Written Evaluations","summary":"Anthropic published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2022-12-19T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/discovering-language-model-behaviors-with-model-written-evaluations"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":15,"matched_terms":["eval","evaluation"],"reason":"Anthropic has a writing signal matching evals and quality."}},"primary_evidence_page":{"url":"https://www.anthropic.com/research/discovering-language-model-behaviors-with-model-written-evaluations","final_url":"https://www.anthropic.com/research/discovering-language-model-behaviors-with-model-written-evaluations","title":"Discovering Language Model Behaviors With Model Written Evaluations","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:18:06.004015+00:00","bytes":106331,"raw_path":"f9030497897883cc5c1857ebdcaa1b7cb9771d80793a2f467637603cd3e0c1c0.html","content_hash":"7f41fef1e30c044f26518298a9b8c4584fe499debccdc024fb9dd3f1658fe57f","excerpt_chars":1200,"truncated":true,"excerpt":"Discovering Language Model Behaviors with Model-Written Evaluations \\ Anthropic Alignment Research Discovering Language Model Behaviors with Model-Written Evaluations Dec 19, 2022 Read Paper Abstract As language models (LMs) scale, they develop many novel behaviors, good and bad, exacerbating the need to evaluate how they behave. Prior work creates evaluations with crowdwork (which is time-consuming and expensive) or existing data sources (which are not always available). Here, we automatically generate evaluations with LMs. We explore approaches with varying amounts of human effort, from instructing LMs to write yes/no questions to making complex Winogender schemas with multiple stages of LM-based generation and filtering. Crowdworkers rate the examples as highly relevant and agree with 90-100% of labels, sometimes more so than corresponding human-written datasets. We generate 154 datasets and discover new cases of inverse scaling where LMs get worse with size. Larger LMs repeat back a dialog user&#x27;s preferred answer (\"sycophancy\") and express greater desire to pursue concerning goals like resource acquisition and goal preservation. We also find some of the first examples of..."},"evidence_pages":[{"url":"https://www.anthropic.com/research/discovering-language-model-behaviors-with-model-written-evaluations","final_url":"https://www.anthropic.com/research/discovering-language-model-behaviors-with-model-written-evaluations","title":"Discovering Language Model Behaviors With Model Written Evaluations","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:18:06.004015+00:00","bytes":106331,"raw_path":"f9030497897883cc5c1857ebdcaa1b7cb9771d80793a2f467637603cd3e0c1c0.html","content_hash":"7f41fef1e30c044f26518298a9b8c4584fe499debccdc024fb9dd3f1658fe57f","excerpt_chars":1200,"truncated":true,"excerpt":"Discovering Language Model Behaviors with Model-Written Evaluations \\ Anthropic Alignment Research Discovering Language Model Behaviors with Model-Written Evaluations Dec 19, 2022 Read Paper Abstract As language models (LMs) scale, they develop many novel behaviors, good and bad, exacerbating the need to evaluate how they behave. Prior work creates evaluations with crowdwork (which is time-consuming and expensive) or existing data sources (which are not always available). Here, we automatically generate evaluations with LMs. We explore approaches with varying amounts of human effort, from instructing LMs to write yes/no questions to making complex Winogender schemas with multiple stages of LM-based generation and filtering. Crowdworkers rate the examples as highly relevant and agree with 90-100% of labels, sometimes more so than corresponding human-written datasets. We generate 154 datasets and discover new cases of inverse scaling where LMs get worse with size. Larger LMs repeat back a dialog user&#x27;s preferred answer (\"sycophancy\") and express greater desire to pursue concerning goals like resource acquisition and goal preservation. We also find some of the first examples of..."}],"related_signals":[{"id":"6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","url":"https://onlylabs.fyi/signals/6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","source_url":"https://www.anthropic.com/research/agents-in-biology","title":"Agents In Biology","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-10T15:16:01+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"2648db51-9d6a-42a9-aece-a0ca5f9ce64f","url":"https://onlylabs.fyi/signals/2648db51-9d6a-42a9-aece-a0ca5f9ce64f","source_url":"https://www.anthropic.com/news/claude-fable-5-mythos-5","title":"Claude Fable 5 Mythos 5","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-09T20:27:50+00:00","first_seen_at":"2026-06-10T07:01:05.666054+00:00","date_source":"sitemap.lastmod"},{"id":"8475487f-45b4-4689-9bc5-8e4c6ca0457d","url":"https://onlylabs.fyi/signals/8475487f-45b4-4689-9bc5-8e4c6ca0457d","source_url":"https://www.anthropic.com/engineering/how-we-contain-claude","title":"How We Contain Claude","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-06T00:28:16+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","url":"https://onlylabs.fyi/signals/e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","source_url":"https://www.anthropic.com/research/making-claude-a-chemist","title":"Making Claude A Chemist","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T20:13:40+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"cc62deba-9682-4751-aa6b-81c3bd7122a0","url":"https://onlylabs.fyi/signals/cc62deba-9682-4751-aa6b-81c3bd7122a0","source_url":"https://www.anthropic.com/research/measuring-agent-autonomy","title":"Measuring Agent Autonomy","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:49:18+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"93da14fd-7141-4e17-abd6-1c8d52435c70","url":"https://onlylabs.fyi/signals/93da14fd-7141-4e17-abd6-1c8d52435c70","source_url":"https://www.anthropic.com/research/values-wild","title":"Values Wild","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:38:54+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"}]}