{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Measuring the performance of our models on real-world tasks","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/2bdcf90f-4569-4be7-80ad-6928aba5d339","json_url":"https://onlylabs.fyi/signals/2bdcf90f-4569-4be7-80ad-6928aba5d339/signal.json","generated_at":"2026-06-08T15:45:35.285+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/2bdcf90f-4569-4be7-80ad-6928aba5d339","signal_json":"https://onlylabs.fyi/signals/2bdcf90f-4569-4be7-80ad-6928aba5d339/signal.json","source":"https://openai.com/index/gdpval","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Measuring the performance of our models on real-world tasks. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Notable evaluation post, moderate traction. · Measuring the performance of our models on real-world tasks | OpenAI September 25, 2025 Measuring the performance of our models on real-world tasks We’re introducing.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/gdpval","source_host":"openai.com","occurred_at":"2025-09-25T09:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Notability","value":"Notable evaluation post, moderate traction.","source":"signal"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"eval","source":"radar"},{"label":"Matched term","value":"evaluation","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/gdpval"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:45:35.285+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["eval","evaluation"],"score":16,"reason":"OpenAI has a writing signal matching evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/2bdcf90f-4569-4be7-80ad-6928aba5d339/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Evals and quality?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/2bdcf90f-4569-4be7-80ad-6928aba5d339/signal.json","required":true},{"label":"source","url":"https://openai.com/index/gdpval","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Measuring the performance of our models on real-world tasks\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Measuring the performance of our models on real-world tasks","text":"OpenAI published Measuring the performance of our models on real-world tasks."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"is classified as","object":"writing signal","text":"Measuring the performance of our models on real-world tasks is classified as writing signal."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"belongs to","object":"talking desk","text":"Measuring the performance of our models on real-world tasks belongs to talking desk."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Measuring the performance of our models on real-world tasks has evidence coverage 1 captured evidence page."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"matches data-business lanes","object":"Evals and quality","text":"Measuring the performance of our models on real-world tasks matches data-business lanes Evals and quality."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has captured page count","object":"1","text":"Measuring the performance of our models on real-world tasks has captured page count 1."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has readable page count","object":"1","text":"Measuring the performance of our models on real-world tasks has readable page count 1."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has related signal count","object":"6","text":"Measuring the performance of our models on real-world tasks has related signal count 6."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Measuring the performance of our models on real-world tasks has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has source host","object":"openai.com","text":"Measuring the performance of our models on real-world tasks has source host openai.com."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has lab","object":"OpenAI","text":"Measuring the performance of our models on real-world tasks has lab OpenAI."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has signal desk","object":"talking","text":"Measuring the performance of our models on real-world tasks has signal desk talking."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has source host","object":"openai.com","text":"Measuring the performance of our models on real-world tasks has source host openai.com."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has notability","object":"Notable evaluation post, moderate traction.","text":"Measuring the performance of our models on real-world tasks has notability Notable evaluation post, moderate traction.."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has radar lane","object":"Evals and quality","text":"Measuring the performance of our models on real-world tasks has radar lane Evals and quality."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has matched term","object":"eval","text":"Measuring the performance of our models on real-world tasks has matched term eval."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has matched term","object":"evaluation","text":"Measuring the performance of our models on real-world tasks has matched term evaluation."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has watch term","object":"Eval methodology","text":"Measuring the performance of our models on real-world tasks has watch term Eval methodology."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Measuring the performance of our models on real-world tasks. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Notable evaluation post, moderate traction. · Measuring the performance of our models on real-world tasks | OpenAI September 25, 2025 Measuring the performance of our models on real-world tasks We’re introducing.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Measuring the performance of our models on real-world tasks","text":"OpenAI published Measuring the performance of our models on real-world tasks."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"is classified as","object":"writing signal","text":"Measuring the performance of our models on real-world tasks is classified as writing signal."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"belongs to","object":"talking desk","text":"Measuring the performance of our models on real-world tasks belongs to talking desk."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Measuring the performance of our models on real-world tasks has evidence coverage 1 captured evidence page."},{"subject":"Measuring the performance of our models on real-world tasks","predicate":"matches data-business lanes","object":"Evals and quality","text":"Measuring the performance of our models on real-world tasks matches data-business lanes Evals and quality."}]},"signal":{"id":"2bdcf90f-4569-4be7-80ad-6928aba5d339","url":"https://onlylabs.fyi/signals/2bdcf90f-4569-4be7-80ad-6928aba5d339","json_url":"https://onlylabs.fyi/signals/2bdcf90f-4569-4be7-80ad-6928aba5d339/signal.json","source_url":"https://openai.com/index/gdpval","title":"Measuring the performance of our models on real-world tasks","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2025-09-25T09:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/gdpval"]},"facets":{},"traction":{"github_stars":null,"hn_points":42,"hn_comments":9,"hn_story_id":"45375392","hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":16,"matched_terms":["eval","evaluation"],"reason":"OpenAI has a writing signal matching evals and quality."}},"primary_evidence_page":{"url":"https://openai.com/index/gdpval","final_url":"https://openai.com/index/gdpval","title":"Measuring the performance of our models on real-world tasks","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:45:35.285+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Measuring the performance of our models on real-world tasks | OpenAI September 25, 2025 Measuring the performance of our models on real-world tasks We’re introducing GDPval, a new evaluation that measures model performance on economically valuable, real-world tasks across 44 occupations. Read the paper Visit evals.openai.com Share Our mission is to ensure that artificial general intelligence benefits all of humanity. As part of our mission, we want to transparently communicate progress on how AI models can help people in the real world. That’s why we’re introducing GDPval: a new evaluation designed to help us track how well our models and others perform on economically valuable, real-world tasks. We call this evaluation GDPval because we started with the concept of Gross Domestic Product (GDP) as a key economic indicator and drew tasks from the key occupations in the industries that contribute most to GDP. People often speculate about AI’s broader impact on society, but the clearest way to understand its potential is by looking at what models are already capable of doing. History shows that major technologies—from the internet to smartphones—took more than a decade to go from..."},"evidence_pages":[{"url":"https://openai.com/index/gdpval","final_url":"https://openai.com/index/gdpval","title":"Measuring the performance of our models on real-world tasks","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:45:35.285+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Measuring the performance of our models on real-world tasks | OpenAI September 25, 2025 Measuring the performance of our models on real-world tasks We’re introducing GDPval, a new evaluation that measures model performance on economically valuable, real-world tasks across 44 occupations. Read the paper Visit evals.openai.com Share Our mission is to ensure that artificial general intelligence benefits all of humanity. As part of our mission, we want to transparently communicate progress on how AI models can help people in the real world. That’s why we’re introducing GDPval: a new evaluation designed to help us track how well our models and others perform on economically valuable, real-world tasks. We call this evaluation GDPval because we started with the concept of Gross Domestic Product (GDP) as a key economic indicator and drew tasks from the key occupations in the industries that contribute most to GDP. People often speculate about AI’s broader impact on society, but the clearest way to understand its potential is by looking at what models are already capable of doing. History shows that major technologies—from the internet to smartphones—took more than a decade to go from..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}