{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: BrowseComp: a benchmark for browsing agents","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/d925579e-bd1e-4c63-abc6-7e1e0e7898f8","json_url":"https://onlylabs.fyi/signals/d925579e-bd1e-4c63-abc6-7e1e0e7898f8/signal.json","generated_at":"2026-06-08T15:45:45.913+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/d925579e-bd1e-4c63-abc6-7e1e0e7898f8","signal_json":"https://onlylabs.fyi/signals/d925579e-bd1e-4c63-abc6-7e1e0e7898f8/signal.json","source":"https://openai.com/index/browsecomp","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"OpenAI published BrowseComp: a benchmark for browsing agents. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: New benchmark for browsing agents from OpenAI · BrowseComp: a benchmark for browsing agents | OpenAI April 10, 2025 BrowseComp: a benchmark for browsing agents A simple and challenging benchmark that measures the.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/browsecomp","source_host":"openai.com","occurred_at":"2025-04-10T10:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Notability","value":"New benchmark for browsing agents from OpenAI","source":"signal"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"benchmark","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/browsecomp"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:45:45.913+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["benchmark"],"score":13,"reason":"OpenAI has a writing signal matching evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/d925579e-bd1e-4c63-abc6-7e1e0e7898f8/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Evals and quality?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/d925579e-bd1e-4c63-abc6-7e1e0e7898f8/signal.json","required":true},{"label":"source","url":"https://openai.com/index/browsecomp","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"BrowseComp: a benchmark for browsing agents\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"BrowseComp: a benchmark for browsing agents","text":"OpenAI published BrowseComp: a benchmark for browsing agents."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"is classified as","object":"writing signal","text":"BrowseComp: a benchmark for browsing agents is classified as writing signal."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"belongs to","object":"talking desk","text":"BrowseComp: a benchmark for browsing agents belongs to talking desk."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has evidence coverage","object":"1 captured evidence page","text":"BrowseComp: a benchmark for browsing agents has evidence coverage 1 captured evidence page."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"matches data-business lanes","object":"Evals and quality","text":"BrowseComp: a benchmark for browsing agents matches data-business lanes Evals and quality."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has captured page count","object":"1","text":"BrowseComp: a benchmark for browsing agents has captured page count 1."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has readable page count","object":"1","text":"BrowseComp: a benchmark for browsing agents has readable page count 1."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has related signal count","object":"6","text":"BrowseComp: a benchmark for browsing agents has related signal count 6."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"BrowseComp: a benchmark for browsing agents has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has source host","object":"openai.com","text":"BrowseComp: a benchmark for browsing agents has source host openai.com."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has lab","object":"OpenAI","text":"BrowseComp: a benchmark for browsing agents has lab OpenAI."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has signal desk","object":"talking","text":"BrowseComp: a benchmark for browsing agents has signal desk talking."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has source host","object":"openai.com","text":"BrowseComp: a benchmark for browsing agents has source host openai.com."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has notability","object":"New benchmark for browsing agents from OpenAI","text":"BrowseComp: a benchmark for browsing agents has notability New benchmark for browsing agents from OpenAI."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has radar lane","object":"Evals and quality","text":"BrowseComp: a benchmark for browsing agents has radar lane Evals and quality."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has matched term","object":"benchmark","text":"BrowseComp: a benchmark for browsing agents has matched term benchmark."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has watch term","object":"Eval methodology","text":"BrowseComp: a benchmark for browsing agents has watch term Eval methodology."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has watch term","object":"Data pipeline","text":"BrowseComp: a benchmark for browsing agents has watch term Data pipeline."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published BrowseComp: a benchmark for browsing agents. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: New benchmark for browsing agents from OpenAI · BrowseComp: a benchmark for browsing agents | OpenAI April 10, 2025 BrowseComp: a benchmark for browsing agents A simple and challenging benchmark that measures the.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"BrowseComp: a benchmark for browsing agents","text":"OpenAI published BrowseComp: a benchmark for browsing agents."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"is classified as","object":"writing signal","text":"BrowseComp: a benchmark for browsing agents is classified as writing signal."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"belongs to","object":"talking desk","text":"BrowseComp: a benchmark for browsing agents belongs to talking desk."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"has evidence coverage","object":"1 captured evidence page","text":"BrowseComp: a benchmark for browsing agents has evidence coverage 1 captured evidence page."},{"subject":"BrowseComp: a benchmark for browsing agents","predicate":"matches data-business lanes","object":"Evals and quality","text":"BrowseComp: a benchmark for browsing agents matches data-business lanes Evals and quality."}]},"signal":{"id":"d925579e-bd1e-4c63-abc6-7e1e0e7898f8","url":"https://onlylabs.fyi/signals/d925579e-bd1e-4c63-abc6-7e1e0e7898f8","json_url":"https://onlylabs.fyi/signals/d925579e-bd1e-4c63-abc6-7e1e0e7898f8/signal.json","source_url":"https://openai.com/index/browsecomp","title":"BrowseComp: a benchmark for browsing agents","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2025-04-10T10:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/browsecomp"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":13,"matched_terms":["benchmark"],"reason":"OpenAI has a writing signal matching evals and quality."}},"primary_evidence_page":{"url":"https://openai.com/index/browsecomp","final_url":"https://openai.com/index/browsecomp","title":"BrowseComp: a benchmark for browsing agents","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:45:45.913+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"BrowseComp: a benchmark for browsing agents | OpenAI April 10, 2025 BrowseComp: a benchmark for browsing agents A simple and challenging benchmark that measures the ability of AI agents to locate hard-to-find information. Loading… Share AI agents that can gather knowledge by browsing the internet are becoming increasingly useful and important. A performant browsing agent should be able to locate information that is hard-to-find, and which might require browsing tens or even hundreds of websites in the process. Existing benchmarks like SimpleQA, which measure models’ ability to retrieve basic isolated facts, are already saturated by models with access to fast browsing tools, such as GPT‑4o with browsing. To measure the ability for AI agents to locate hard-to-find, entangled information on the internet, we are open-sourcing a new benchmark of 1,266 challenging problems called BrowseComp, which stands for “Browsing Competition”. The benchmark is available in OpenAI’s simple evals github repository⁠, and you can read our research paper here⁠. About the BrowseComp benchmark Examples of questions: Please identify the fictional character who occasionally breaks the fourth wall with the..."},"evidence_pages":[{"url":"https://openai.com/index/browsecomp","final_url":"https://openai.com/index/browsecomp","title":"BrowseComp: a benchmark for browsing agents","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:45:45.913+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"BrowseComp: a benchmark for browsing agents | OpenAI April 10, 2025 BrowseComp: a benchmark for browsing agents A simple and challenging benchmark that measures the ability of AI agents to locate hard-to-find information. Loading… Share AI agents that can gather knowledge by browsing the internet are becoming increasingly useful and important. A performant browsing agent should be able to locate information that is hard-to-find, and which might require browsing tens or even hundreds of websites in the process. Existing benchmarks like SimpleQA, which measure models’ ability to retrieve basic isolated facts, are already saturated by models with access to fast browsing tools, such as GPT‑4o with browsing. To measure the ability for AI agents to locate hard-to-find, entangled information on the internet, we are open-sourcing a new benchmark of 1,266 challenging problems called BrowseComp, which stands for “Browsing Competition”. The benchmark is available in OpenAI’s simple evals github repository⁠, and you can read our research paper here⁠. About the BrowseComp benchmark Examples of questions: Please identify the fictional character who occasionally breaks the fourth wall with the..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}