{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Reinforcement learning with prediction-based rewards","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/3cb6ba52-665f-48e5-b71a-5a22e75d9af1","json_url":"https://onlylabs.fyi/signals/3cb6ba52-665f-48e5-b71a-5a22e75d9af1/signal.json","generated_at":"2026-06-08T15:47:03.152+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/3cb6ba52-665f-48e5-b71a-5a22e75d9af1","signal_json":"https://onlylabs.fyi/signals/3cb6ba52-665f-48e5-b71a-5a22e75d9af1/signal.json","source":"https://openai.com/index/reinforcement-learning-with-prediction-based-rewards","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data","json_url":"https://onlylabs.fyi/data-radar/data/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Reinforcement learning with prediction-based rewards. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Reinforcement learning with prediction-based rewards | OpenAI October 31, 2018 Reinforcement learning with prediction-based rewards Read paper View code Loading… Share.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Data demand in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/reinforcement-learning-with-prediction-based-rewards","source_host":"openai.com","occurred_at":"2018-10-31T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Radar lane","value":"Data demand","source":"radar"},{"label":"Matched term","value":"rag","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/reinforcement-learning-with-prediction-based-rewards"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:47:03.152+00:00"},"data_business":{"matches":true,"lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data","json_url":"https://onlylabs.fyi/data-radar/data/signals.json"}],"matched_terms":["rag"],"score":13,"reason":"OpenAI has a writing signal matching data demand."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/3cb6ba52-665f-48e5-b71a-5a22e75d9af1/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Data demand?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/3cb6ba52-665f-48e5-b71a-5a22e75d9af1/signal.json","required":true},{"label":"source","url":"https://openai.com/index/reinforcement-learning-with-prediction-based-rewards","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Reinforcement learning with prediction-based rewards\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Reinforcement learning with prediction-based rewards","text":"OpenAI published Reinforcement learning with prediction-based rewards."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"is classified as","object":"writing signal","text":"Reinforcement learning with prediction-based rewards is classified as writing signal."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"belongs to","object":"talking desk","text":"Reinforcement learning with prediction-based rewards belongs to talking desk."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Reinforcement learning with prediction-based rewards has evidence coverage 1 captured evidence page."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"matches data-business lanes","object":"Data demand","text":"Reinforcement learning with prediction-based rewards matches data-business lanes Data demand."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has captured page count","object":"1","text":"Reinforcement learning with prediction-based rewards has captured page count 1."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has readable page count","object":"1","text":"Reinforcement learning with prediction-based rewards has readable page count 1."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has related signal count","object":"6","text":"Reinforcement learning with prediction-based rewards has related signal count 6."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Reinforcement learning with prediction-based rewards has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has source host","object":"openai.com","text":"Reinforcement learning with prediction-based rewards has source host openai.com."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has lab","object":"OpenAI","text":"Reinforcement learning with prediction-based rewards has lab OpenAI."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has signal desk","object":"talking","text":"Reinforcement learning with prediction-based rewards has signal desk talking."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has source host","object":"openai.com","text":"Reinforcement learning with prediction-based rewards has source host openai.com."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has radar lane","object":"Data demand","text":"Reinforcement learning with prediction-based rewards has radar lane Data demand."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has matched term","object":"rag","text":"Reinforcement learning with prediction-based rewards has matched term rag."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has watch term","object":"RL environments","text":"Reinforcement learning with prediction-based rewards has watch term RL environments."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has watch term","object":"Infrastructure","text":"Reinforcement learning with prediction-based rewards has watch term Infrastructure."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has watch term","object":"Agents and tool use","text":"Reinforcement learning with prediction-based rewards has watch term Agents and tool use."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Reinforcement learning with prediction-based rewards. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Reinforcement learning with prediction-based rewards | OpenAI October 31, 2018 Reinforcement learning with prediction-based rewards Read paper View code Loading… Share.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Data demand in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Reinforcement learning with prediction-based rewards","text":"OpenAI published Reinforcement learning with prediction-based rewards."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"is classified as","object":"writing signal","text":"Reinforcement learning with prediction-based rewards is classified as writing signal."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"belongs to","object":"talking desk","text":"Reinforcement learning with prediction-based rewards belongs to talking desk."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Reinforcement learning with prediction-based rewards has evidence coverage 1 captured evidence page."},{"subject":"Reinforcement learning with prediction-based rewards","predicate":"matches data-business lanes","object":"Data demand","text":"Reinforcement learning with prediction-based rewards matches data-business lanes Data demand."}]},"signal":{"id":"3cb6ba52-665f-48e5-b71a-5a22e75d9af1","url":"https://onlylabs.fyi/signals/3cb6ba52-665f-48e5-b71a-5a22e75d9af1","json_url":"https://onlylabs.fyi/signals/3cb6ba52-665f-48e5-b71a-5a22e75d9af1/signal.json","source_url":"https://openai.com/index/reinforcement-learning-with-prediction-based-rewards","title":"Reinforcement learning with prediction-based rewards","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2018-10-31T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/reinforcement-learning-with-prediction-based-rewards"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data"}],"score":13,"matched_terms":["rag"],"reason":"OpenAI has a writing signal matching data demand."}},"primary_evidence_page":{"url":"https://openai.com/index/reinforcement-learning-with-prediction-based-rewards","final_url":"https://openai.com/index/reinforcement-learning-with-prediction-based-rewards","title":"Reinforcement learning with prediction-based rewards","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:47:03.152+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Reinforcement learning with prediction-based rewards | OpenAI October 31, 2018 Reinforcement learning with prediction-based rewards Read paper View code Loading… Share We’ve developed Random Network Distillation (RND)⁠, a prediction-based method for encouraging reinforcement learning agents to explore their environments through curiosity, which for the first time exceeds average human performance on Montezuma’s Revenge⁠. We’ve developed Random Network Distillation (RND)⁠, a prediction-based method for encouraging reinforcement learning agents to explore their environments through curiosity, which for the first timeA exceeds average human performance on Montezuma’s Revenge⁠. RND achieves state-of-the-art performance, periodically finds all 24 rooms and solves the first level without using demonstrations or having access to the underlying state of the game. RND incentivizes visiting unfamiliar states⁠ by measuring how hard it is to predict the output of a fixed random neural network on visited states. In unfamiliar states it’s hard to guess the output, and hence the reward is high. It can be applied to any reinforcement learning algorithm, is simple to implement and efficient to..."},"evidence_pages":[{"url":"https://openai.com/index/reinforcement-learning-with-prediction-based-rewards","final_url":"https://openai.com/index/reinforcement-learning-with-prediction-based-rewards","title":"Reinforcement learning with prediction-based rewards","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:47:03.152+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Reinforcement learning with prediction-based rewards | OpenAI October 31, 2018 Reinforcement learning with prediction-based rewards Read paper View code Loading… Share We’ve developed Random Network Distillation (RND)⁠, a prediction-based method for encouraging reinforcement learning agents to explore their environments through curiosity, which for the first time exceeds average human performance on Montezuma’s Revenge⁠. We’ve developed Random Network Distillation (RND)⁠, a prediction-based method for encouraging reinforcement learning agents to explore their environments through curiosity, which for the first timeA exceeds average human performance on Montezuma’s Revenge⁠. RND achieves state-of-the-art performance, periodically finds all 24 rooms and solves the first level without using demonstrations or having access to the underlying state of the game. RND incentivizes visiting unfamiliar states⁠ by measuring how hard it is to predict the output of a fixed random neural network on visited states. In unfamiliar states it’s hard to guess the output, and hence the reward is high. It can be applied to any reinforcement learning algorithm, is simple to implement and efficient to..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}