{"schema_version":"onlylabs.public_signal.v1","title":"Anthropic Writing: Swe Bench Sonnet","description":"Anthropic writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/fc7bb41b-1737-43ef-970e-02c9f0a27069","json_url":"https://onlylabs.fyi/signals/fc7bb41b-1737-43ef-970e-02c9f0a27069/signal.json","generated_at":"2026-06-11T04:16:57.774245+00:00","org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/anthropic","dossier_json_url":"https://onlylabs.fyi/labs/anthropic/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/fc7bb41b-1737-43ef-970e-02c9f0a27069","signal_json":"https://onlylabs.fyi/signals/fc7bb41b-1737-43ef-970e-02c9f0a27069/signal.json","source":"https://www.anthropic.com/engineering/swe-bench-sonnet","lab_dossier":"https://onlylabs.fyi/labs/anthropic","lab_dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis":"https://onlylabs.fyi/analysis/anthropic","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":null},"answer_pack":{"answer":"Anthropic published Swe Bench Sonnet. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Claude SWE-Bench Performance \\ Anthropic Engineering at Anthropic Raising the bar on SWE-bench Verified with Claude 3.5 Sonnet Published Jan 06, 2025 SWE-bench is an AI.... onlylabs links this event to 1 captured evidence page and 6 related writing signals.","signal_desk":"talking","source_context":{"source_url":"https://www.anthropic.com/engineering/swe-bench-sonnet","source_host":"anthropic.com","occurred_at":"2025-01-06T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","context":null},"context_markers":[{"label":"Lab","value":"Anthropic","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"anthropic.com","source":"source"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/engineering/swe-bench-sonnet"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:16:57.774245+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/fc7bb41b-1737-43ef-970e-02c9f0a27069/signal.json","dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/fc7bb41b-1737-43ef-970e-02c9f0a27069/signal.json","required":true},{"label":"source","url":"https://www.anthropic.com/engineering/swe-bench-sonnet","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/anthropic/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/anthropic/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Anthropic's writing signal \"Swe Bench Sonnet\" for frontier lab strategy."},"semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Swe Bench Sonnet","text":"Anthropic published Swe Bench Sonnet."},{"subject":"Swe Bench Sonnet","predicate":"is classified as","object":"writing signal","text":"Swe Bench Sonnet is classified as writing signal."},{"subject":"Swe Bench Sonnet","predicate":"belongs to","object":"talking desk","text":"Swe Bench Sonnet belongs to talking desk."},{"subject":"Swe Bench Sonnet","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Swe Bench Sonnet has evidence coverage 1 captured evidence page."},{"subject":"Swe Bench Sonnet","predicate":"has captured page count","object":"1","text":"Swe Bench Sonnet has captured page count 1."},{"subject":"Swe Bench Sonnet","predicate":"has readable page count","object":"1","text":"Swe Bench Sonnet has readable page count 1."},{"subject":"Swe Bench Sonnet","predicate":"has related signal count","object":"6","text":"Swe Bench Sonnet has related signal count 6."},{"subject":"Swe Bench Sonnet","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Swe Bench Sonnet has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Swe Bench Sonnet","predicate":"has source host","object":"anthropic.com","text":"Swe Bench Sonnet has source host anthropic.com."},{"subject":"Swe Bench Sonnet","predicate":"has lab","object":"Anthropic","text":"Swe Bench Sonnet has lab Anthropic."},{"subject":"Swe Bench Sonnet","predicate":"has signal desk","object":"talking","text":"Swe Bench Sonnet has signal desk talking."},{"subject":"Swe Bench Sonnet","predicate":"has source host","object":"anthropic.com","text":"Swe Bench Sonnet has source host anthropic.com."},{"subject":"Swe Bench Sonnet","predicate":"has watch term","object":"Eval methodology","text":"Swe Bench Sonnet has watch term Eval methodology."},{"subject":"Swe Bench Sonnet","predicate":"has watch term","object":"Data pipeline","text":"Swe Bench Sonnet has watch term Data pipeline."},{"subject":"Swe Bench Sonnet","predicate":"has watch term","object":"Agents and tool use","text":"Swe Bench Sonnet has watch term Agents and tool use."}]},"intelligence":{"signal_desk":"talking","answer":"Anthropic published Swe Bench Sonnet. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Claude SWE-Bench Performance \\ Anthropic Engineering at Anthropic Raising the bar on SWE-bench Verified with Claude 3.5 Sonnet Published Jan 06, 2025 SWE-bench is an AI.... onlylabs links this event to 1 captured evidence page and 6 related writing signals.","semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Swe Bench Sonnet","text":"Anthropic published Swe Bench Sonnet."},{"subject":"Swe Bench Sonnet","predicate":"is classified as","object":"writing signal","text":"Swe Bench Sonnet is classified as writing signal."},{"subject":"Swe Bench Sonnet","predicate":"belongs to","object":"talking desk","text":"Swe Bench Sonnet belongs to talking desk."},{"subject":"Swe Bench Sonnet","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Swe Bench Sonnet has evidence coverage 1 captured evidence page."}]},"signal":{"id":"fc7bb41b-1737-43ef-970e-02c9f0a27069","url":"https://onlylabs.fyi/signals/fc7bb41b-1737-43ef-970e-02c9f0a27069","json_url":"https://onlylabs.fyi/signals/fc7bb41b-1737-43ef-970e-02c9f0a27069/signal.json","source_url":"https://www.anthropic.com/engineering/swe-bench-sonnet","title":"Swe Bench Sonnet","summary":"Anthropic published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2025-01-06T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/engineering/swe-bench-sonnet"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"url":"https://www.anthropic.com/engineering/swe-bench-sonnet","final_url":"https://www.anthropic.com/engineering/swe-bench-sonnet","title":"Swe Bench Sonnet","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:16:57.774245+00:00","bytes":183990,"raw_path":"43d7916975114d603c0922afae58525ba646635999164b9c60948a083946a62e.html","content_hash":"a50aac59272a5c03f5dcc0b66f12220c718fd4aeab56cc188047fe4945ee960c","excerpt_chars":1200,"truncated":true,"excerpt":"Claude SWE-Bench Performance \\ Anthropic Engineering at Anthropic Raising the bar on SWE-bench Verified with Claude 3.5 Sonnet Published Jan 06, 2025 SWE-bench is an AI evaluation benchmark that assesses a model&#x27;s ability to complete real-world software engineering tasks. Our latest model, the upgraded Claude 3.5 Sonnet , achieved 49% on SWE-bench Verified, a software engineering evaluation, beating the previous state-of-the-art model&#x27;s 45%. This post explains the \"agent\" we built around the model, and is intended to help developers get the best possible performance out of Claude 3.5 Sonnet. SWE-bench is an AI evaluation benchmark that assesses a model&#x27;s ability to complete real-world software engineering tasks. Specifically, it tests how the model can resolve GitHub issues from popular open-source Python repositories. For each task in the benchmark, the AI model is given a set up Python environment and the checkout (a local working copy) of the repository from just before the issue was resolved. The model then needs to understand, modify, and test the code before submitting its proposed solution. Each solution is graded against the real unit tests from the pull..."},"evidence_pages":[{"url":"https://www.anthropic.com/engineering/swe-bench-sonnet","final_url":"https://www.anthropic.com/engineering/swe-bench-sonnet","title":"Swe Bench Sonnet","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:16:57.774245+00:00","bytes":183990,"raw_path":"43d7916975114d603c0922afae58525ba646635999164b9c60948a083946a62e.html","content_hash":"a50aac59272a5c03f5dcc0b66f12220c718fd4aeab56cc188047fe4945ee960c","excerpt_chars":1200,"truncated":true,"excerpt":"Claude SWE-Bench Performance \\ Anthropic Engineering at Anthropic Raising the bar on SWE-bench Verified with Claude 3.5 Sonnet Published Jan 06, 2025 SWE-bench is an AI evaluation benchmark that assesses a model&#x27;s ability to complete real-world software engineering tasks. Our latest model, the upgraded Claude 3.5 Sonnet , achieved 49% on SWE-bench Verified, a software engineering evaluation, beating the previous state-of-the-art model&#x27;s 45%. This post explains the \"agent\" we built around the model, and is intended to help developers get the best possible performance out of Claude 3.5 Sonnet. SWE-bench is an AI evaluation benchmark that assesses a model&#x27;s ability to complete real-world software engineering tasks. Specifically, it tests how the model can resolve GitHub issues from popular open-source Python repositories. For each task in the benchmark, the AI model is given a set up Python environment and the checkout (a local working copy) of the repository from just before the issue was resolved. The model then needs to understand, modify, and test the code before submitting its proposed solution. Each solution is graded against the real unit tests from the pull..."}],"related_signals":[{"id":"6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","url":"https://onlylabs.fyi/signals/6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","source_url":"https://www.anthropic.com/research/agents-in-biology","title":"Agents In Biology","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-10T15:16:01+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"2648db51-9d6a-42a9-aece-a0ca5f9ce64f","url":"https://onlylabs.fyi/signals/2648db51-9d6a-42a9-aece-a0ca5f9ce64f","source_url":"https://www.anthropic.com/news/claude-fable-5-mythos-5","title":"Claude Fable 5 Mythos 5","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-09T20:27:50+00:00","first_seen_at":"2026-06-10T07:01:05.666054+00:00","date_source":"sitemap.lastmod"},{"id":"8475487f-45b4-4689-9bc5-8e4c6ca0457d","url":"https://onlylabs.fyi/signals/8475487f-45b4-4689-9bc5-8e4c6ca0457d","source_url":"https://www.anthropic.com/engineering/how-we-contain-claude","title":"How We Contain Claude","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-06T00:28:16+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","url":"https://onlylabs.fyi/signals/e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","source_url":"https://www.anthropic.com/research/making-claude-a-chemist","title":"Making Claude A Chemist","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T20:13:40+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"cc62deba-9682-4751-aa6b-81c3bd7122a0","url":"https://onlylabs.fyi/signals/cc62deba-9682-4751-aa6b-81c3bd7122a0","source_url":"https://www.anthropic.com/research/measuring-agent-autonomy","title":"Measuring Agent Autonomy","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:49:18+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"93da14fd-7141-4e17-abd6-1c8d52435c70","url":"https://onlylabs.fyi/signals/93da14fd-7141-4e17-abd6-1c8d52435c70","source_url":"https://www.anthropic.com/research/values-wild","title":"Values Wild","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:38:54+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"}]}