{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Efficient training of language models to fill in the middle","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/fb07d493-945e-4626-9a63-1ea95924d85d","json_url":"https://onlylabs.fyi/signals/fb07d493-945e-4626-9a63-1ea95924d85d/signal.json","generated_at":"2026-06-08T15:46:46.893+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/fb07d493-945e-4626-9a63-1ea95924d85d","signal_json":"https://onlylabs.fyi/signals/fb07d493-945e-4626-9a63-1ea95924d85d/signal.json","source":"https://openai.com/index/efficient-training-of-language-models-to-fill-in-the-middle","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Efficient training of language models to fill in the middle. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Efficient training of language models to fill in the middle | OpenAI July 28, 2022 Publication Efficient training of language models to fill in the middle Read paper.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/efficient-training-of-language-models-to-fill-in-the-middle","source_host":"openai.com","occurred_at":"2022-07-28T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Matched term","value":"training","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/efficient-training-of-language-models-to-fill-in-the-middle"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:46:46.893+00:00"},"data_business":{"matches":true,"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}],"matched_terms":["training"],"score":13,"reason":"OpenAI has a writing signal matching infrastructure."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/fb07d493-945e-4626-9a63-1ea95924d85d/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Infrastructure?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/fb07d493-945e-4626-9a63-1ea95924d85d/signal.json","required":true},{"label":"source","url":"https://openai.com/index/efficient-training-of-language-models-to-fill-in-the-middle","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Efficient training of language models to fill in the middle\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Efficient training of language models to fill in the middle","text":"OpenAI published Efficient training of language models to fill in the middle."},{"subject":"Efficient training of language models to fill in the middle","predicate":"is classified as","object":"writing signal","text":"Efficient training of language models to fill in the middle is classified as writing signal."},{"subject":"Efficient training of language models to fill in the middle","predicate":"belongs to","object":"talking desk","text":"Efficient training of language models to fill in the middle belongs to talking desk."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Efficient training of language models to fill in the middle has evidence coverage 1 captured evidence page."},{"subject":"Efficient training of language models to fill in the middle","predicate":"matches data-business lanes","object":"Infrastructure","text":"Efficient training of language models to fill in the middle matches data-business lanes Infrastructure."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has captured page count","object":"1","text":"Efficient training of language models to fill in the middle has captured page count 1."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has readable page count","object":"1","text":"Efficient training of language models to fill in the middle has readable page count 1."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has related signal count","object":"6","text":"Efficient training of language models to fill in the middle has related signal count 6."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Efficient training of language models to fill in the middle has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has source host","object":"openai.com","text":"Efficient training of language models to fill in the middle has source host openai.com."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has lab","object":"OpenAI","text":"Efficient training of language models to fill in the middle has lab OpenAI."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has signal desk","object":"talking","text":"Efficient training of language models to fill in the middle has signal desk talking."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has source host","object":"openai.com","text":"Efficient training of language models to fill in the middle has source host openai.com."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has radar lane","object":"Infrastructure","text":"Efficient training of language models to fill in the middle has radar lane Infrastructure."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has matched term","object":"training","text":"Efficient training of language models to fill in the middle has matched term training."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has watch term","object":"Eval methodology","text":"Efficient training of language models to fill in the middle has watch term Eval methodology."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has watch term","object":"Data pipeline","text":"Efficient training of language models to fill in the middle has watch term Data pipeline."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has watch term","object":"Infrastructure","text":"Efficient training of language models to fill in the middle has watch term Infrastructure."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Efficient training of language models to fill in the middle. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Efficient training of language models to fill in the middle | OpenAI July 28, 2022 Publication Efficient training of language models to fill in the middle Read paper.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Efficient training of language models to fill in the middle","text":"OpenAI published Efficient training of language models to fill in the middle."},{"subject":"Efficient training of language models to fill in the middle","predicate":"is classified as","object":"writing signal","text":"Efficient training of language models to fill in the middle is classified as writing signal."},{"subject":"Efficient training of language models to fill in the middle","predicate":"belongs to","object":"talking desk","text":"Efficient training of language models to fill in the middle belongs to talking desk."},{"subject":"Efficient training of language models to fill in the middle","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Efficient training of language models to fill in the middle has evidence coverage 1 captured evidence page."},{"subject":"Efficient training of language models to fill in the middle","predicate":"matches data-business lanes","object":"Infrastructure","text":"Efficient training of language models to fill in the middle matches data-business lanes Infrastructure."}]},"signal":{"id":"fb07d493-945e-4626-9a63-1ea95924d85d","url":"https://onlylabs.fyi/signals/fb07d493-945e-4626-9a63-1ea95924d85d","json_url":"https://onlylabs.fyi/signals/fb07d493-945e-4626-9a63-1ea95924d85d/signal.json","source_url":"https://openai.com/index/efficient-training-of-language-models-to-fill-in-the-middle","title":"Efficient training of language models to fill in the middle","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2022-07-28T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/efficient-training-of-language-models-to-fill-in-the-middle"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"}],"score":13,"matched_terms":["training"],"reason":"OpenAI has a writing signal matching infrastructure."}},"primary_evidence_page":{"url":"https://openai.com/index/efficient-training-of-language-models-to-fill-in-the-middle","final_url":"https://openai.com/index/efficient-training-of-language-models-to-fill-in-the-middle","title":"Efficient training of language models to fill in the middle","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:46:46.893+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Efficient training of language models to fill in the middle | OpenAI July 28, 2022 Publication Efficient training of language models to fill in the middle Read paper Loading… Share Abstract We show that autoregressive language models can learn to infill text after we apply a straightforward transformation to the dataset, which simply moves a span of text from the middle of a document to its end. While this data augmentation has garnered much interest in recent years, we provide extensive evidence that training models with a large fraction of data transformed in this way does not harm the original left-to-right generative capability, as measured by perplexity and sampling evaluations across a wide range of scales. Given the usefulness, simplicity, and efficiency of training models to fill-in-the-middle (FIM), we suggest that future autoregressive language models be trained with FIM by default. To this end, we run a series of ablations on key hyperparameters, such as the data transformation frequency, the structure of the transformation, and the method of selecting the infill span. We use these ablations to prescribe strong default settings and best practices to train FIM models. We..."},"evidence_pages":[{"url":"https://openai.com/index/efficient-training-of-language-models-to-fill-in-the-middle","final_url":"https://openai.com/index/efficient-training-of-language-models-to-fill-in-the-middle","title":"Efficient training of language models to fill in the middle","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:46:46.893+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Efficient training of language models to fill in the middle | OpenAI July 28, 2022 Publication Efficient training of language models to fill in the middle Read paper Loading… Share Abstract We show that autoregressive language models can learn to infill text after we apply a straightforward transformation to the dataset, which simply moves a span of text from the middle of a document to its end. While this data augmentation has garnered much interest in recent years, we provide extensive evidence that training models with a large fraction of data transformed in this way does not harm the original left-to-right generative capability, as measured by perplexity and sampling evaluations across a wide range of scales. Given the usefulness, simplicity, and efficiency of training models to fill-in-the-middle (FIM), we suggest that future autoregressive language models be trained with FIM by default. To this end, we run a series of ablations on key hyperparameters, such as the data transformation frequency, the structure of the transformation, and the method of selecting the infill span. We use these ablations to prescribe strong default settings and best practices to train FIM models. We..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}