{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Text and code embeddings by contrastive pre-training","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/4e3b2900-b06c-4675-866b-f25d6e21ea8d","json_url":"https://onlylabs.fyi/signals/4e3b2900-b06c-4675-866b-f25d6e21ea8d/signal.json","generated_at":"2026-06-08T15:46:50.071+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/4e3b2900-b06c-4675-866b-f25d6e21ea8d","signal_json":"https://onlylabs.fyi/signals/4e3b2900-b06c-4675-866b-f25d6e21ea8d/signal.json","source":"https://openai.com/index/text-and-code-embeddings-by-contrastive-pre-training","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Text and code embeddings by contrastive pre-training. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Text and code embeddings by contrastive pre-training | OpenAI January 24, 2022 Publication Text and code embeddings by contrastive pre-training Read paper Read blog.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/text-and-code-embeddings-by-contrastive-pre-training","source_host":"openai.com","occurred_at":"2022-01-24T08:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Matched term","value":"training","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/text-and-code-embeddings-by-contrastive-pre-training"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:46:50.071+00:00"},"data_business":{"matches":true,"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}],"matched_terms":["training"],"score":13,"reason":"OpenAI has a writing signal matching infrastructure."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/4e3b2900-b06c-4675-866b-f25d6e21ea8d/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Infrastructure?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/4e3b2900-b06c-4675-866b-f25d6e21ea8d/signal.json","required":true},{"label":"source","url":"https://openai.com/index/text-and-code-embeddings-by-contrastive-pre-training","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Text and code embeddings by contrastive pre-training\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Text and code embeddings by contrastive pre-training","text":"OpenAI published Text and code embeddings by contrastive pre-training."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"is classified as","object":"writing signal","text":"Text and code embeddings by contrastive pre-training is classified as writing signal."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"belongs to","object":"talking desk","text":"Text and code embeddings by contrastive pre-training belongs to talking desk."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Text and code embeddings by contrastive pre-training has evidence coverage 1 captured evidence page."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"matches data-business lanes","object":"Infrastructure","text":"Text and code embeddings by contrastive pre-training matches data-business lanes Infrastructure."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has captured page count","object":"1","text":"Text and code embeddings by contrastive pre-training has captured page count 1."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has readable page count","object":"1","text":"Text and code embeddings by contrastive pre-training has readable page count 1."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has related signal count","object":"6","text":"Text and code embeddings by contrastive pre-training has related signal count 6."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Text and code embeddings by contrastive pre-training has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has source host","object":"openai.com","text":"Text and code embeddings by contrastive pre-training has source host openai.com."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has lab","object":"OpenAI","text":"Text and code embeddings by contrastive pre-training has lab OpenAI."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has signal desk","object":"talking","text":"Text and code embeddings by contrastive pre-training has signal desk talking."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has source host","object":"openai.com","text":"Text and code embeddings by contrastive pre-training has source host openai.com."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has radar lane","object":"Infrastructure","text":"Text and code embeddings by contrastive pre-training has radar lane Infrastructure."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has matched term","object":"training","text":"Text and code embeddings by contrastive pre-training has matched term training."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has watch term","object":"Eval methodology","text":"Text and code embeddings by contrastive pre-training has watch term Eval methodology."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has watch term","object":"Data pipeline","text":"Text and code embeddings by contrastive pre-training has watch term Data pipeline."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has watch term","object":"Infrastructure","text":"Text and code embeddings by contrastive pre-training has watch term Infrastructure."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Text and code embeddings by contrastive pre-training. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Text and code embeddings by contrastive pre-training | OpenAI January 24, 2022 Publication Text and code embeddings by contrastive pre-training Read paper Read blog.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Text and code embeddings by contrastive pre-training","text":"OpenAI published Text and code embeddings by contrastive pre-training."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"is classified as","object":"writing signal","text":"Text and code embeddings by contrastive pre-training is classified as writing signal."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"belongs to","object":"talking desk","text":"Text and code embeddings by contrastive pre-training belongs to talking desk."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Text and code embeddings by contrastive pre-training has evidence coverage 1 captured evidence page."},{"subject":"Text and code embeddings by contrastive pre-training","predicate":"matches data-business lanes","object":"Infrastructure","text":"Text and code embeddings by contrastive pre-training matches data-business lanes Infrastructure."}]},"signal":{"id":"4e3b2900-b06c-4675-866b-f25d6e21ea8d","url":"https://onlylabs.fyi/signals/4e3b2900-b06c-4675-866b-f25d6e21ea8d","json_url":"https://onlylabs.fyi/signals/4e3b2900-b06c-4675-866b-f25d6e21ea8d/signal.json","source_url":"https://openai.com/index/text-and-code-embeddings-by-contrastive-pre-training","title":"Text and code embeddings by contrastive pre-training","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2022-01-24T08:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/text-and-code-embeddings-by-contrastive-pre-training"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"}],"score":13,"matched_terms":["training"],"reason":"OpenAI has a writing signal matching infrastructure."}},"primary_evidence_page":{"url":"https://openai.com/index/text-and-code-embeddings-by-contrastive-pre-training","final_url":"https://openai.com/index/text-and-code-embeddings-by-contrastive-pre-training","title":"Text and code embeddings by contrastive pre-training","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:46:50.071+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Text and code embeddings by contrastive pre-training | OpenAI January 24, 2022 Publication Text and code embeddings by contrastive pre-training Read paper Read blog Loading… Share Abstract Text embeddings are useful features in many applications such as semantic search and computing text similarity. Previous work typically trains models customized for different use cases, varying in dataset choice, training objective and model architecture. In this work, we show that contrastive pre-training on unsupervised data at scale leads to high quality vector representations of text and code. The same unsupervised text embeddings that achieve new state-of-the-art results in linear-probe classification also display impressive semantic search capabilities and sometimes even perform competitively with fine-tuned models. On linear-probe classification accuracy averaging over 7 tasks, our best unsupervised model achieves a relative improvement of 4% and 1.8% over previous best unsupervised and supervised text embedding models respectively. The same text embeddings when evaluated on large-scale semantic search attains a relative improvement of 23.4%, 14.7%, and 10.6% over previous best..."},"evidence_pages":[{"url":"https://openai.com/index/text-and-code-embeddings-by-contrastive-pre-training","final_url":"https://openai.com/index/text-and-code-embeddings-by-contrastive-pre-training","title":"Text and code embeddings by contrastive pre-training","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:46:50.071+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Text and code embeddings by contrastive pre-training | OpenAI January 24, 2022 Publication Text and code embeddings by contrastive pre-training Read paper Read blog Loading… Share Abstract Text embeddings are useful features in many applications such as semantic search and computing text similarity. Previous work typically trains models customized for different use cases, varying in dataset choice, training objective and model architecture. In this work, we show that contrastive pre-training on unsupervised data at scale leads to high quality vector representations of text and code. The same unsupervised text embeddings that achieve new state-of-the-art results in linear-probe classification also display impressive semantic search capabilities and sometimes even perform competitively with fine-tuned models. On linear-probe classification accuracy averaging over 7 tasks, our best unsupervised model achieves a relative improvement of 4% and 1.8% over previous best unsupervised and supervised text embedding models respectively. The same text embeddings when evaluated on large-scale semantic search attains a relative improvement of 23.4%, 14.7%, and 10.6% over previous best..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}