{"schema_version":"onlylabs.public_signal.v1","title":"Amazon (Nova) Repo: amazon-science/document-haystack","description":"Amazon (Nova) repo signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/996374ab-2270-4771-8ce8-ae672865ee7d","json_url":"https://onlylabs.fyi/signals/996374ab-2270-4771-8ce8-ae672865ee7d/signal.json","generated_at":"2026-06-11T03:58:51.887135+00:00","org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/amazon","dossier_json_url":"https://onlylabs.fyi/labs/amazon/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/996374ab-2270-4771-8ce8-ae672865ee7d","signal_json":"https://onlylabs.fyi/signals/996374ab-2270-4771-8ce8-ae672865ee7d/signal.json","source":"https://github.com/amazon-science/document-haystack","lab_dossier":"https://onlylabs.fyi/labs/amazon","lab_dossier_json":"https://onlylabs.fyi/labs/amazon/dossier.json","analysis":"https://onlylabs.fyi/analysis/amazon","analysis_json":"https://onlylabs.fyi/analysis/amazon/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/amazon/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":null},"answer_pack":{"answer":"Amazon (Nova) published amazon-science/document-haystack (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo amazon-science/document-haystack · language Python · Low-stars research repo. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","signal_desk":"repos","source_context":{"source_url":"https://github.com/amazon-science/document-haystack","source_host":"github.com","occurred_at":"2025-04-23T17:27:24+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"Amazon (Nova)","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"amazon-science/document-haystack","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Stars","value":"4","source":"traction"},{"label":"Notability","value":"Low-stars research repo","source":"signal"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Model card","source":"model"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/amazon-science/document-haystack"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T03:58:51.887135+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/996374ab-2270-4771-8ce8-ae672865ee7d/signal.json","dossier_json":"https://onlylabs.fyi/labs/amazon/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/amazon/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/amazon/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","data_business_lane","evidence_url"],"data_business_relevance":"New repositories can expose organization build priorities early, especially around internal tooling, eval infrastructure, data systems, deployment, and agent workflows.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/996374ab-2270-4771-8ce8-ae672865ee7d/signal.json","required":true},{"label":"source","url":"https://github.com/amazon-science/document-haystack","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/amazon/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/amazon/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Amazon (Nova)'s repo signal \"amazon-science/document-haystack\" for frontier lab strategy."},"semantic_triples":[{"subject":"Amazon (Nova)","predicate":"published repo","object":"amazon-science/document-haystack","text":"Amazon (Nova) published repo amazon-science/document-haystack."},{"subject":"amazon-science/document-haystack","predicate":"is classified as","object":"repo signal","text":"amazon-science/document-haystack is classified as repo signal."},{"subject":"amazon-science/document-haystack","predicate":"belongs to","object":"repos desk","text":"amazon-science/document-haystack belongs to repos desk."},{"subject":"amazon-science/document-haystack","predicate":"has context","object":"Python","text":"amazon-science/document-haystack has context Python."},{"subject":"amazon-science/document-haystack","predicate":"has evidence coverage","object":"1 captured evidence page","text":"amazon-science/document-haystack has evidence coverage 1 captured evidence page."},{"subject":"amazon-science/document-haystack","predicate":"has captured page count","object":"1","text":"amazon-science/document-haystack has captured page count 1."},{"subject":"amazon-science/document-haystack","predicate":"has readable page count","object":"1","text":"amazon-science/document-haystack has readable page count 1."},{"subject":"amazon-science/document-haystack","predicate":"has related signal count","object":"6","text":"amazon-science/document-haystack has related signal count 6."},{"subject":"amazon-science/document-haystack","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"amazon-science/document-haystack has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"amazon-science/document-haystack","predicate":"has source host","object":"github.com","text":"amazon-science/document-haystack has source host github.com."},{"subject":"amazon-science/document-haystack","predicate":"has lab","object":"Amazon (Nova)","text":"amazon-science/document-haystack has lab Amazon (Nova)."},{"subject":"amazon-science/document-haystack","predicate":"has signal desk","object":"repos","text":"amazon-science/document-haystack has signal desk repos."},{"subject":"amazon-science/document-haystack","predicate":"has source host","object":"github.com","text":"amazon-science/document-haystack has source host github.com."},{"subject":"amazon-science/document-haystack","predicate":"has repository","object":"amazon-science/document-haystack","text":"amazon-science/document-haystack has repository amazon-science/document-haystack."},{"subject":"amazon-science/document-haystack","predicate":"has language","object":"Python","text":"amazon-science/document-haystack has language Python."},{"subject":"amazon-science/document-haystack","predicate":"has stars","object":"4","text":"amazon-science/document-haystack has stars 4."},{"subject":"amazon-science/document-haystack","predicate":"has notability","object":"Low-stars research repo","text":"amazon-science/document-haystack has notability Low-stars research repo."},{"subject":"amazon-science/document-haystack","predicate":"has watch term","object":"Eval methodology","text":"amazon-science/document-haystack has watch term Eval methodology."}]},"intelligence":{"signal_desk":"repos","answer":"Amazon (Nova) published amazon-science/document-haystack (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo amazon-science/document-haystack · language Python · Low-stars research repo. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","semantic_triples":[{"subject":"Amazon (Nova)","predicate":"published repo","object":"amazon-science/document-haystack","text":"Amazon (Nova) published repo amazon-science/document-haystack."},{"subject":"amazon-science/document-haystack","predicate":"is classified as","object":"repo signal","text":"amazon-science/document-haystack is classified as repo signal."},{"subject":"amazon-science/document-haystack","predicate":"belongs to","object":"repos desk","text":"amazon-science/document-haystack belongs to repos desk."},{"subject":"amazon-science/document-haystack","predicate":"has context","object":"Python","text":"amazon-science/document-haystack has context Python."},{"subject":"amazon-science/document-haystack","predicate":"has evidence coverage","object":"1 captured evidence page","text":"amazon-science/document-haystack has evidence coverage 1 captured evidence page."}]},"signal":{"id":"996374ab-2270-4771-8ce8-ae672865ee7d","url":"https://onlylabs.fyi/signals/996374ab-2270-4771-8ce8-ae672865ee7d","json_url":"https://onlylabs.fyi/signals/996374ab-2270-4771-8ce8-ae672865ee7d/signal.json","source_url":"https://github.com/amazon-science/document-haystack","title":"amazon-science/document-haystack","summary":"Amazon (Nova) published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2025-04-23T17:27:24+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/amazon-science/document-haystack"]},"facets":{"repo":"amazon-science/document-haystack","language":"Python"},"traction":{"github_stars":4,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"url":"https://github.com/amazon-science/document-haystack","final_url":"https://github.com/amazon-science/document-haystack","title":"amazon-science/document-haystack repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:58:51.887135+00:00","bytes":22987,"raw_path":"20f01aac73b1143837a1f3d51f9b59fecd78f20d0542cbcc078bc917d3ff5f9c.json","content_hash":"937dc7cfa0b21aa46468096facc6f5576932f2ae406bdddcca968174007a1064","excerpt_chars":1200,"truncated":true,"excerpt":"amazon-science/document-haystack Language: Python License: NOASSERTION Stars: 4 Forks: 1 Open issues: 0 Created: 2025-04-23T17:27:24Z Pushed: 2025-07-30T21:59:33Z Default branch: main Fork: no Archived: no README: Document Haystack Benchmark This repository contains the inference and evaluation scripts for the paper “[Document Haystack: A Long Context Multimodal Image/Document Understanding Vision LLM Benchmark](https://arxiv.org/abs/2507.15882)”. 📑 Abstract Paper The proliferation of multimodal Large Language Models has significantly advanced the ability to analyze and understand complex data inputs from different modalities. However, the processing of long documents remains under-explored, largely due to a lack of suitable benchmarks. To address this, we introduce Document Haystack, a comprehensive benchmark designed to evaluate the performance of Vision Language Models (VLMs) on long, visually complex documents. Document Haystack features documents ranging from 5 to 200 pages and strategically inserts pure text or multimodal text+image \"needles\" at various depths within the documents to challenge VLMs' retrieval capabilities. Comprising 400 document variants and a total of..."},"evidence_pages":[{"url":"https://github.com/amazon-science/document-haystack","final_url":"https://github.com/amazon-science/document-haystack","title":"amazon-science/document-haystack repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:58:51.887135+00:00","bytes":22987,"raw_path":"20f01aac73b1143837a1f3d51f9b59fecd78f20d0542cbcc078bc917d3ff5f9c.json","content_hash":"937dc7cfa0b21aa46468096facc6f5576932f2ae406bdddcca968174007a1064","excerpt_chars":1200,"truncated":true,"excerpt":"amazon-science/document-haystack Language: Python License: NOASSERTION Stars: 4 Forks: 1 Open issues: 0 Created: 2025-04-23T17:27:24Z Pushed: 2025-07-30T21:59:33Z Default branch: main Fork: no Archived: no README: Document Haystack Benchmark This repository contains the inference and evaluation scripts for the paper “[Document Haystack: A Long Context Multimodal Image/Document Understanding Vision LLM Benchmark](https://arxiv.org/abs/2507.15882)”. 📑 Abstract Paper The proliferation of multimodal Large Language Models has significantly advanced the ability to analyze and understand complex data inputs from different modalities. However, the processing of long documents remains under-explored, largely due to a lack of suitable benchmarks. To address this, we introduce Document Haystack, a comprehensive benchmark designed to evaluate the performance of Vision Language Models (VLMs) on long, visually complex documents. Document Haystack features documents ranging from 5 to 200 pages and strategically inserts pure text or multimodal text+image \"needles\" at various depths within the documents to challenge VLMs' retrieval capabilities. Comprising 400 document variants and a total of..."}],"related_signals":[{"id":"087c32a2-6ad0-4981-9315-11fdd32a0153","url":"https://onlylabs.fyi/signals/087c32a2-6ad0-4981-9315-11fdd32a0153","source_url":"https://github.com/amazon-science/reskill","title":"amazon-science/reskill","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-06-04T02:13:35+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"e5701aed-6cd3-48dd-bfa6-ef839031e2e8","url":"https://onlylabs.fyi/signals/e5701aed-6cd3-48dd-bfa6-ef839031e2e8","source_url":"https://github.com/amazon-science/dualkv-flash-attn-for-rl","title":"amazon-science/dualkv-flash-attn-for-rl","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-27T17:38:58+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"8af28f0c-7331-4b08-b517-e18b3555e503","url":"https://onlylabs.fyi/signals/8af28f0c-7331-4b08-b517-e18b3555e503","source_url":"https://github.com/amazon-science/EvoMAS","title":"amazon-science/EvoMAS","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-19T19:23:29+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"e3ff8718-7daa-4ebd-a3e6-3d825c538b74","url":"https://onlylabs.fyi/signals/e3ff8718-7daa-4ebd-a3e6-3d825c538b74","source_url":"https://github.com/amazon-science/adaptive-layerwise-perturbation","title":"amazon-science/adaptive-layerwise-perturbation","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-14T17:44:17+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"9afcd328-0124-485c-8ace-9c3ad546e316","url":"https://onlylabs.fyi/signals/9afcd328-0124-485c-8ace-9c3ad546e316","source_url":"https://github.com/amazon-science/temporal-reasoning-dataset","title":"amazon-science/temporal-reasoning-dataset","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-13T13:07:08+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"e19ce80b-3d6a-4aaf-9b1a-82d1b19ab682","url":"https://onlylabs.fyi/signals/e19ce80b-3d6a-4aaf-9b1a-82d1b19ab682","source_url":"https://github.com/amazon-science/PROF-GRPO","title":"amazon-science/PROF-GRPO","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-12T19:43:55+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"}]}