{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Repo: openai/mle-bench","description":"OpenAI repo signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/0e0a2488-0939-46da-b3f8-5ca633fe9670","json_url":"https://onlylabs.fyi/signals/0e0a2488-0939-46da-b3f8-5ca633fe9670/signal.json","generated_at":"2026-06-11T04:01:17.979651+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/0e0a2488-0939-46da-b3f8-5ca633fe9670","signal_json":"https://onlylabs.fyi/signals/0e0a2488-0939-46da-b3f8-5ca633fe9670/signal.json","source":"https://github.com/openai/mle-bench","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"OpenAI published openai/mle-bench (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo openai/mle-bench · language Python · New benchmark repo from OpenAI. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Evals and quality in the data-business radar.","signal_desk":"repos","source_context":{"source_url":"https://github.com/openai/mle-bench","source_host":"github.com","occurred_at":"2024-10-08T17:07:40+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"openai/mle-bench","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Stars","value":"1,574","source":"traction"},{"label":"Notability","value":"New benchmark repo from OpenAI","source":"signal"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"benchmark","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/openai/mle-bench"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:01:17.979651+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["benchmark"],"score":15,"reason":"OpenAI has a repo signal matching evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/0e0a2488-0939-46da-b3f8-5ca633fe9670/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Which data-business lane explains this signal: Evals and quality?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","data_business_lane","evidence_url"],"data_business_relevance":"New repositories can expose organization build priorities early, especially around internal tooling, eval infrastructure, data systems, deployment, and agent workflows.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/0e0a2488-0939-46da-b3f8-5ca633fe9670/signal.json","required":true},{"label":"source","url":"https://github.com/openai/mle-bench","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's repo signal \"openai/mle-bench\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published repo","object":"openai/mle-bench","text":"OpenAI published repo openai/mle-bench."},{"subject":"openai/mle-bench","predicate":"is classified as","object":"repo signal","text":"openai/mle-bench is classified as repo signal."},{"subject":"openai/mle-bench","predicate":"belongs to","object":"repos desk","text":"openai/mle-bench belongs to repos desk."},{"subject":"openai/mle-bench","predicate":"has context","object":"Python","text":"openai/mle-bench has context Python."},{"subject":"openai/mle-bench","predicate":"has evidence coverage","object":"1 captured evidence page","text":"openai/mle-bench has evidence coverage 1 captured evidence page."},{"subject":"openai/mle-bench","predicate":"matches data-business lanes","object":"Evals and quality","text":"openai/mle-bench matches data-business lanes Evals and quality."},{"subject":"openai/mle-bench","predicate":"has captured page count","object":"1","text":"openai/mle-bench has captured page count 1."},{"subject":"openai/mle-bench","predicate":"has readable page count","object":"1","text":"openai/mle-bench has readable page count 1."},{"subject":"openai/mle-bench","predicate":"has related signal count","object":"6","text":"openai/mle-bench has related signal count 6."},{"subject":"openai/mle-bench","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"openai/mle-bench has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"openai/mle-bench","predicate":"has source host","object":"github.com","text":"openai/mle-bench has source host github.com."},{"subject":"openai/mle-bench","predicate":"has lab","object":"OpenAI","text":"openai/mle-bench has lab OpenAI."},{"subject":"openai/mle-bench","predicate":"has signal desk","object":"repos","text":"openai/mle-bench has signal desk repos."},{"subject":"openai/mle-bench","predicate":"has source host","object":"github.com","text":"openai/mle-bench has source host github.com."},{"subject":"openai/mle-bench","predicate":"has repository","object":"openai/mle-bench","text":"openai/mle-bench has repository openai/mle-bench."},{"subject":"openai/mle-bench","predicate":"has language","object":"Python","text":"openai/mle-bench has language Python."},{"subject":"openai/mle-bench","predicate":"has stars","object":"1,574","text":"openai/mle-bench has stars 1,574."},{"subject":"openai/mle-bench","predicate":"has notability","object":"New benchmark repo from OpenAI","text":"openai/mle-bench has notability New benchmark repo from OpenAI."},{"subject":"openai/mle-bench","predicate":"has radar lane","object":"Evals and quality","text":"openai/mle-bench has radar lane Evals and quality."}]},"intelligence":{"signal_desk":"repos","answer":"OpenAI published openai/mle-bench (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo openai/mle-bench · language Python · New benchmark repo from OpenAI. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Evals and quality in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published repo","object":"openai/mle-bench","text":"OpenAI published repo openai/mle-bench."},{"subject":"openai/mle-bench","predicate":"is classified as","object":"repo signal","text":"openai/mle-bench is classified as repo signal."},{"subject":"openai/mle-bench","predicate":"belongs to","object":"repos desk","text":"openai/mle-bench belongs to repos desk."},{"subject":"openai/mle-bench","predicate":"has context","object":"Python","text":"openai/mle-bench has context Python."},{"subject":"openai/mle-bench","predicate":"has evidence coverage","object":"1 captured evidence page","text":"openai/mle-bench has evidence coverage 1 captured evidence page."},{"subject":"openai/mle-bench","predicate":"matches data-business lanes","object":"Evals and quality","text":"openai/mle-bench matches data-business lanes Evals and quality."}]},"signal":{"id":"0e0a2488-0939-46da-b3f8-5ca633fe9670","url":"https://onlylabs.fyi/signals/0e0a2488-0939-46da-b3f8-5ca633fe9670","json_url":"https://onlylabs.fyi/signals/0e0a2488-0939-46da-b3f8-5ca633fe9670/signal.json","source_url":"https://github.com/openai/mle-bench","title":"openai/mle-bench","summary":"OpenAI published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2024-10-08T17:07:40+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/openai/mle-bench"]},"facets":{"repo":"openai/mle-bench","language":"Python"},"traction":{"github_stars":1574,"hn_points":3,"hn_comments":0,"hn_story_id":"45224996","hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":15,"matched_terms":["benchmark"],"reason":"OpenAI has a repo signal matching evals and quality."}},"primary_evidence_page":{"url":"https://github.com/openai/mle-bench","final_url":"https://github.com/openai/mle-bench","title":"openai/mle-bench repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T04:01:17.979651+00:00","bytes":27804,"raw_path":"a2fdd037b6b6065da0141414641a11082a7d1143cf5e0f78c5b552256c770948.json","content_hash":"498910ce8e47703156504f37d31886744a4db0dae50a29c2f6258888a0123bf1","excerpt_chars":1200,"truncated":true,"excerpt":"openai/mle-bench Description: MLE-bench is a benchmark for measuring how well AI agents perform at machine learning engineering Language: Python License: NOASSERTION Stars: 1573 Forks: 253 Open issues: 8 Created: 2024-10-08T17:07:40Z Pushed: 2026-04-24T17:33:44Z Default branch: main Fork: no Archived: no README: MLE-bench Code for the paper [\"MLE-Bench: Evaluating Machine Learning Agents on Machine Learning Engineering\"](https://arxiv.org/abs/2410.07095). We have released the code used to construct the dataset, the evaluation logic, as well as the agents we evaluated for this benchmark. Leaderboard *Update* (04-24-2026): We are currently not taking any new submissions to the leaderboard while we develop an improved process for ensuring submissions are fair and comparable. We will share updates on this process in the future. | Agent | LLM(s) used | Low == Lite (%) | Medium (%) | High (%) | All (%) | Running Time (hours) | Date | Source Code Available | Grading Reports Available | |-------|-------------|-----------------|------------|----------|---------|----------------------|------|----------------------|---------------------------| | [Famou-Agent..."},"evidence_pages":[{"url":"https://github.com/openai/mle-bench","final_url":"https://github.com/openai/mle-bench","title":"openai/mle-bench repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T04:01:17.979651+00:00","bytes":27804,"raw_path":"a2fdd037b6b6065da0141414641a11082a7d1143cf5e0f78c5b552256c770948.json","content_hash":"498910ce8e47703156504f37d31886744a4db0dae50a29c2f6258888a0123bf1","excerpt_chars":1200,"truncated":true,"excerpt":"openai/mle-bench Description: MLE-bench is a benchmark for measuring how well AI agents perform at machine learning engineering Language: Python License: NOASSERTION Stars: 1573 Forks: 253 Open issues: 8 Created: 2024-10-08T17:07:40Z Pushed: 2026-04-24T17:33:44Z Default branch: main Fork: no Archived: no README: MLE-bench Code for the paper [\"MLE-Bench: Evaluating Machine Learning Agents on Machine Learning Engineering\"](https://arxiv.org/abs/2410.07095). We have released the code used to construct the dataset, the evaluation logic, as well as the agents we evaluated for this benchmark. Leaderboard *Update* (04-24-2026): We are currently not taking any new submissions to the leaderboard while we develop an improved process for ensuring submissions are fair and comparable. We will share updates on this process in the future. | Agent | LLM(s) used | Low == Lite (%) | Medium (%) | High (%) | All (%) | Running Time (hours) | Date | Source Code Available | Grading Reports Available | |-------|-------------|-----------------|------------|----------|---------|----------------------|------|----------------------|---------------------------| | [Famou-Agent..."}],"related_signals":[{"id":"9b55a747-c2bc-494f-a3ad-ef484503c650","url":"https://onlylabs.fyi/signals/9b55a747-c2bc-494f-a3ad-ef484503c650","source_url":"https://github.com/openai/role-specific-plugins","title":"openai/role-specific-plugins","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-02T16:27:24+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"},{"id":"26c411d2-1675-4d85-a267-55c0a2a1b6b4","url":"https://onlylabs.fyi/signals/26c411d2-1675-4d85-a267-55c0a2a1b6b4","source_url":"https://github.com/openai/imagegencam","title":"openai/imagegencam","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-05-19T23:14:18+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"},{"id":"e7a21543-9852-40bb-9102-041209e64fa5","url":"https://onlylabs.fyi/signals/e7a21543-9852-40bb-9102-041209e64fa5","source_url":"https://github.com/openai/openai-cli","title":"openai/openai-cli","context":"Go","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-05-01T16:40:05+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"},{"id":"359b222c-96a1-40f5-b8d4-7a469ee9825c","url":"https://onlylabs.fyi/signals/359b222c-96a1-40f5-b8d4-7a469ee9825c","source_url":"https://github.com/openai/openai-realtime-meeting-assistant","title":"openai/openai-realtime-meeting-assistant","context":"Go","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-04-29T22:54:59+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"},{"id":"1bf8025d-8227-489e-8e04-57d8919213fc","url":"https://onlylabs.fyi/signals/1bf8025d-8227-489e-8e04-57d8919213fc","source_url":"https://github.com/openai/monitorability-evals","title":"openai/monitorability-evals","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-04-22T22:12:39+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"},{"id":"f61802fb-361e-4d2b-b871-57ea3a33fb22","url":"https://onlylabs.fyi/signals/f61802fb-361e-4d2b-b871-57ea3a33fb22","source_url":"https://github.com/openai/privacy-filter","title":"openai/privacy-filter","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-04-17T22:49:09+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"}]}