{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Introducing the SWE-Lancer benchmark","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/e4a7915b-6df4-4335-bada-66114f9ac4ce","json_url":"https://onlylabs.fyi/signals/e4a7915b-6df4-4335-bada-66114f9ac4ce/signal.json","generated_at":"2026-06-08T15:45:49.962+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/e4a7915b-6df4-4335-bada-66114f9ac4ce","signal_json":"https://onlylabs.fyi/signals/e4a7915b-6df4-4335-bada-66114f9ac4ce/signal.json","source":"https://openai.com/index/swe-lancer","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Introducing the SWE-Lancer benchmark. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Notable new benchmark by OpenAI · Introducing the SWE-Lancer benchmark | OpenAI February 18, 2025 Introducing the SWE-Lancer benchmark Can frontier LLMs earn $1 million from real-world freelance software.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/swe-lancer","source_host":"openai.com","occurred_at":"2025-02-18T10:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Notability","value":"Notable new benchmark by OpenAI","source":"signal"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"benchmark","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/swe-lancer"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:45:49.962+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["benchmark"],"score":13,"reason":"OpenAI has a writing signal matching evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/e4a7915b-6df4-4335-bada-66114f9ac4ce/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Evals and quality?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/e4a7915b-6df4-4335-bada-66114f9ac4ce/signal.json","required":true},{"label":"source","url":"https://openai.com/index/swe-lancer","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Introducing the SWE-Lancer benchmark\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Introducing the SWE-Lancer benchmark","text":"OpenAI published Introducing the SWE-Lancer benchmark."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"is classified as","object":"writing signal","text":"Introducing the SWE-Lancer benchmark is classified as writing signal."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"belongs to","object":"talking desk","text":"Introducing the SWE-Lancer benchmark belongs to talking desk."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Introducing the SWE-Lancer benchmark has evidence coverage 1 captured evidence page."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"matches data-business lanes","object":"Evals and quality","text":"Introducing the SWE-Lancer benchmark matches data-business lanes Evals and quality."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has captured page count","object":"1","text":"Introducing the SWE-Lancer benchmark has captured page count 1."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has readable page count","object":"1","text":"Introducing the SWE-Lancer benchmark has readable page count 1."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has related signal count","object":"6","text":"Introducing the SWE-Lancer benchmark has related signal count 6."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Introducing the SWE-Lancer benchmark has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has source host","object":"openai.com","text":"Introducing the SWE-Lancer benchmark has source host openai.com."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has lab","object":"OpenAI","text":"Introducing the SWE-Lancer benchmark has lab OpenAI."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has signal desk","object":"talking","text":"Introducing the SWE-Lancer benchmark has signal desk talking."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has source host","object":"openai.com","text":"Introducing the SWE-Lancer benchmark has source host openai.com."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has notability","object":"Notable new benchmark by OpenAI","text":"Introducing the SWE-Lancer benchmark has notability Notable new benchmark by OpenAI."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has radar lane","object":"Evals and quality","text":"Introducing the SWE-Lancer benchmark has radar lane Evals and quality."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has matched term","object":"benchmark","text":"Introducing the SWE-Lancer benchmark has matched term benchmark."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has watch term","object":"Eval methodology","text":"Introducing the SWE-Lancer benchmark has watch term Eval methodology."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has watch term","object":"Data pipeline","text":"Introducing the SWE-Lancer benchmark has watch term Data pipeline."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Introducing the SWE-Lancer benchmark. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Notable new benchmark by OpenAI · Introducing the SWE-Lancer benchmark | OpenAI February 18, 2025 Introducing the SWE-Lancer benchmark Can frontier LLMs earn $1 million from real-world freelance software.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Introducing the SWE-Lancer benchmark","text":"OpenAI published Introducing the SWE-Lancer benchmark."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"is classified as","object":"writing signal","text":"Introducing the SWE-Lancer benchmark is classified as writing signal."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"belongs to","object":"talking desk","text":"Introducing the SWE-Lancer benchmark belongs to talking desk."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Introducing the SWE-Lancer benchmark has evidence coverage 1 captured evidence page."},{"subject":"Introducing the SWE-Lancer benchmark","predicate":"matches data-business lanes","object":"Evals and quality","text":"Introducing the SWE-Lancer benchmark matches data-business lanes Evals and quality."}]},"signal":{"id":"e4a7915b-6df4-4335-bada-66114f9ac4ce","url":"https://onlylabs.fyi/signals/e4a7915b-6df4-4335-bada-66114f9ac4ce","json_url":"https://onlylabs.fyi/signals/e4a7915b-6df4-4335-bada-66114f9ac4ce/signal.json","source_url":"https://openai.com/index/swe-lancer","title":"Introducing the SWE-Lancer benchmark","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2025-02-18T10:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/swe-lancer"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":13,"matched_terms":["benchmark"],"reason":"OpenAI has a writing signal matching evals and quality."}},"primary_evidence_page":{"url":"https://openai.com/index/swe-lancer","final_url":"https://openai.com/index/swe-lancer","title":"Introducing the SWE-Lancer benchmark","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:45:49.962+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Introducing the SWE-Lancer benchmark | OpenAI February 18, 2025 Introducing the SWE-Lancer benchmark Can frontier LLMs earn $1 million from real-world freelance software engineering? Read paper Access repository Loading… Share We introduce SWE-Lancer, a benchmark of over 1,400 freelance software engineering tasks from Upwork, valued at $1 million USD total in real-world payouts. SWE-Lancer encompasses both independent engineering tasks—ranging from $50 bug fixes to $32,000 feature implementations—and managerial tasks, where models choose between technical implementation proposals. Independent tasks are graded with end-to-end tests triple-verified by experienced software engineers, while managerial decisions are assessed against the choices of the original hired engineering managers. We evaluate model performance and find that frontier models are still unable to solve the majority of tasks. To facilitate future research, we open-source a unified Docker image and a public evaluation split, SWE-Lancer Diamond. By mapping model performance to monetary value, we hope SWE-Lancer enables greater research into the economic impact of AI model development. --- Update on July 28, 2025:..."},"evidence_pages":[{"url":"https://openai.com/index/swe-lancer","final_url":"https://openai.com/index/swe-lancer","title":"Introducing the SWE-Lancer benchmark","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:45:49.962+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Introducing the SWE-Lancer benchmark | OpenAI February 18, 2025 Introducing the SWE-Lancer benchmark Can frontier LLMs earn $1 million from real-world freelance software engineering? Read paper Access repository Loading… Share We introduce SWE-Lancer, a benchmark of over 1,400 freelance software engineering tasks from Upwork, valued at $1 million USD total in real-world payouts. SWE-Lancer encompasses both independent engineering tasks—ranging from $50 bug fixes to $32,000 feature implementations—and managerial tasks, where models choose between technical implementation proposals. Independent tasks are graded with end-to-end tests triple-verified by experienced software engineers, while managerial decisions are assessed against the choices of the original hired engineering managers. We evaluate model performance and find that frontier models are still unable to solve the majority of tasks. To facilitate future research, we open-source a unified Docker image and a public evaluation split, SWE-Lancer Diamond. By mapping model performance to monetary value, we hope SWE-Lancer enables greater research into the economic impact of AI model development. --- Update on July 28, 2025:..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}