{"schema_version":"onlylabs.public_signal.v1","title":"Nous Research Repo: NousResearch/lm-eval-harness","description":"Nous Research repo signal with public source context, captured evidence pages, related signals, and category-scoped analysis context.","url":"https://onlylabs.fyi/signals/bd273414-45ea-4afd-99e5-b623c84d1f38","json_url":"https://onlylabs.fyi/signals/bd273414-45ea-4afd-99e5-b623c84d1f38/signal.json","generated_at":"2026-06-11T03:17:50.783568+00:00","org":{"slug":"nous","name":"Nous Research","category":"neolab","category_label":"Neolab","dossier_url":"https://onlylabs.fyi/labs/nous","dossier_json_url":"https://onlylabs.fyi/labs/nous/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/bd273414-45ea-4afd-99e5-b623c84d1f38","signal_json":"https://onlylabs.fyi/signals/bd273414-45ea-4afd-99e5-b623c84d1f38/signal.json","source":"https://github.com/NousResearch/lm-eval-harness","lab_dossier":"https://onlylabs.fyi/labs/nous","lab_dossier_json":"https://onlylabs.fyi/labs/nous/dossier.json","analysis":"https://onlylabs.fyi/analysis/nous","analysis_json":"https://onlylabs.fyi/analysis/nous/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/nous/evidence.json","category":"https://onlylabs.fyi/neolabs","category_json":"https://onlylabs.fyi/neolabs.json","category_feed":"https://onlylabs.fyi/neolabs/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json?category=neolab","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":null},"answer_pack":{"answer":"Nous Research published NousResearch/lm-eval-harness (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo NousResearch/lm-eval-harness · language Python · New repo with low traction. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","signal_desk":"repos","source_context":{"source_url":"https://github.com/NousResearch/lm-eval-harness","source_host":"github.com","occurred_at":"2024-12-20T01:13:30+00:00","first_seen_at":"2026-06-06T01:49:37.982614+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"Nous Research","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"NousResearch/lm-eval-harness","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Stars","value":"11","source":"traction"},{"label":"Notability","value":"New repo with low traction","source":"signal"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Model card","source":"model"},{"label":"Watch term","value":"Infrastructure","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/NousResearch/lm-eval-harness"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T03:17:50.783568+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/bd273414-45ea-4afd-99e5-b623c84d1f38/signal.json","dossier_json":"https://onlylabs.fyi/labs/nous/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/nous/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/nous/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json?category=neolab","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","evidence_url"],"data_business_relevance":"Data-business lane extraction is scoped to frontier labs; for this category, interpret the repository as source-grounded category strategy evidence.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/bd273414-45ea-4afd-99e5-b623c84d1f38/signal.json","required":true},{"label":"source","url":"https://github.com/NousResearch/lm-eval-harness","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/nous/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/nous/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Nous Research's repo signal \"NousResearch/lm-eval-harness\" for neolab strategy."},"semantic_triples":[{"subject":"Nous Research","predicate":"published repo","object":"NousResearch/lm-eval-harness","text":"Nous Research published repo NousResearch/lm-eval-harness."},{"subject":"NousResearch/lm-eval-harness","predicate":"is classified as","object":"repo signal","text":"NousResearch/lm-eval-harness is classified as repo signal."},{"subject":"NousResearch/lm-eval-harness","predicate":"belongs to","object":"repos desk","text":"NousResearch/lm-eval-harness belongs to repos desk."},{"subject":"NousResearch/lm-eval-harness","predicate":"has context","object":"Python","text":"NousResearch/lm-eval-harness has context Python."},{"subject":"NousResearch/lm-eval-harness","predicate":"has evidence coverage","object":"1 captured evidence page","text":"NousResearch/lm-eval-harness has evidence coverage 1 captured evidence page."},{"subject":"NousResearch/lm-eval-harness","predicate":"has captured page count","object":"1","text":"NousResearch/lm-eval-harness has captured page count 1."},{"subject":"NousResearch/lm-eval-harness","predicate":"has readable page count","object":"1","text":"NousResearch/lm-eval-harness has readable page count 1."},{"subject":"NousResearch/lm-eval-harness","predicate":"has related signal count","object":"6","text":"NousResearch/lm-eval-harness has related signal count 6."},{"subject":"NousResearch/lm-eval-harness","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"NousResearch/lm-eval-harness has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"NousResearch/lm-eval-harness","predicate":"has source host","object":"github.com","text":"NousResearch/lm-eval-harness has source host github.com."},{"subject":"NousResearch/lm-eval-harness","predicate":"has lab","object":"Nous Research","text":"NousResearch/lm-eval-harness has lab Nous Research."},{"subject":"NousResearch/lm-eval-harness","predicate":"has signal desk","object":"repos","text":"NousResearch/lm-eval-harness has signal desk repos."},{"subject":"NousResearch/lm-eval-harness","predicate":"has source host","object":"github.com","text":"NousResearch/lm-eval-harness has source host github.com."},{"subject":"NousResearch/lm-eval-harness","predicate":"has repository","object":"NousResearch/lm-eval-harness","text":"NousResearch/lm-eval-harness has repository NousResearch/lm-eval-harness."},{"subject":"NousResearch/lm-eval-harness","predicate":"has language","object":"Python","text":"NousResearch/lm-eval-harness has language Python."},{"subject":"NousResearch/lm-eval-harness","predicate":"has stars","object":"11","text":"NousResearch/lm-eval-harness has stars 11."},{"subject":"NousResearch/lm-eval-harness","predicate":"has notability","object":"New repo with low traction","text":"NousResearch/lm-eval-harness has notability New repo with low traction."},{"subject":"NousResearch/lm-eval-harness","predicate":"has watch term","object":"Eval methodology","text":"NousResearch/lm-eval-harness has watch term Eval methodology."}]},"intelligence":{"signal_desk":"repos","answer":"Nous Research published NousResearch/lm-eval-harness (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo NousResearch/lm-eval-harness · language Python · New repo with low traction. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","semantic_triples":[{"subject":"Nous Research","predicate":"published repo","object":"NousResearch/lm-eval-harness","text":"Nous Research published repo NousResearch/lm-eval-harness."},{"subject":"NousResearch/lm-eval-harness","predicate":"is classified as","object":"repo signal","text":"NousResearch/lm-eval-harness is classified as repo signal."},{"subject":"NousResearch/lm-eval-harness","predicate":"belongs to","object":"repos desk","text":"NousResearch/lm-eval-harness belongs to repos desk."},{"subject":"NousResearch/lm-eval-harness","predicate":"has context","object":"Python","text":"NousResearch/lm-eval-harness has context Python."},{"subject":"NousResearch/lm-eval-harness","predicate":"has evidence coverage","object":"1 captured evidence page","text":"NousResearch/lm-eval-harness has evidence coverage 1 captured evidence page."}]},"signal":{"id":"bd273414-45ea-4afd-99e5-b623c84d1f38","url":"https://onlylabs.fyi/signals/bd273414-45ea-4afd-99e5-b623c84d1f38","json_url":"https://onlylabs.fyi/signals/bd273414-45ea-4afd-99e5-b623c84d1f38/signal.json","source_url":"https://github.com/NousResearch/lm-eval-harness","title":"NousResearch/lm-eval-harness","summary":"Nous Research published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"nous","name":"Nous Research","category":"neolab"},"occurred_at":"2024-12-20T01:13:30+00:00","first_seen_at":"2026-06-06T01:49:37.982614+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/NousResearch/lm-eval-harness"]},"facets":{"repo":"NousResearch/lm-eval-harness","language":"Python"},"traction":{"github_stars":11,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"url":"https://github.com/NousResearch/lm-eval-harness","final_url":"https://github.com/NousResearch/lm-eval-harness","title":"NousResearch/lm-eval-harness repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:17:50.783568+00:00","bytes":45040,"raw_path":"2a5fee100c4e129255de017d985fd83b8c53f83939e354a61c642ae3b68ee63f.json","content_hash":"576fae4e8852f7b92760417e593c1ce57ac81b0bdedbd5ca75c1147356546b31","excerpt_chars":1200,"truncated":true,"excerpt":"NousResearch/lm-eval-harness Language: Python License: MIT Stars: 11 Forks: 3 Open issues: 2 Created: 2024-12-20T01:13:30Z Pushed: 2025-06-29T00:23:31Z Default branch: nous Fork: no Archived: no README: Language Model Evaluation Harness [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10256836.svg)](https://doi.org/10.5281/zenodo.10256836) --- *Latest News 📣* - [2025/02] Added [SGLang](https://docs.sglang.ai/) support! - [2024/09] We are prototyping allowing users of LM Evaluation Harness to create and evaluate on text+image multimodal input, text output tasks, and have just added the `hf-multimodal` and `vllm-vlm` model types and `mmmu` task as a prototype feature. We welcome users to try out this in-progress feature and stress-test it for themselves, and suggest they check out [`lmms-eval`](https://github.com/EvolvingLMMs-Lab/lmms-eval), a wonderful project originally forking off of the lm-evaluation-harness, for a broader range of multimodal tasks, models, and features. - [2024/07] [API model](docs/API_guide.md) support has been updated and refactored, introducing support for batched and async requests, and making it significantly easier to customize and use for your own..."},"evidence_pages":[{"url":"https://github.com/NousResearch/lm-eval-harness","final_url":"https://github.com/NousResearch/lm-eval-harness","title":"NousResearch/lm-eval-harness repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:17:50.783568+00:00","bytes":45040,"raw_path":"2a5fee100c4e129255de017d985fd83b8c53f83939e354a61c642ae3b68ee63f.json","content_hash":"576fae4e8852f7b92760417e593c1ce57ac81b0bdedbd5ca75c1147356546b31","excerpt_chars":1200,"truncated":true,"excerpt":"NousResearch/lm-eval-harness Language: Python License: MIT Stars: 11 Forks: 3 Open issues: 2 Created: 2024-12-20T01:13:30Z Pushed: 2025-06-29T00:23:31Z Default branch: nous Fork: no Archived: no README: Language Model Evaluation Harness [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10256836.svg)](https://doi.org/10.5281/zenodo.10256836) --- *Latest News 📣* - [2025/02] Added [SGLang](https://docs.sglang.ai/) support! - [2024/09] We are prototyping allowing users of LM Evaluation Harness to create and evaluate on text+image multimodal input, text output tasks, and have just added the `hf-multimodal` and `vllm-vlm` model types and `mmmu` task as a prototype feature. We welcome users to try out this in-progress feature and stress-test it for themselves, and suggest they check out [`lmms-eval`](https://github.com/EvolvingLMMs-Lab/lmms-eval), a wonderful project originally forking off of the lm-evaluation-harness, for a broader range of multimodal tasks, models, and features. - [2024/07] [API model](docs/API_guide.md) support has been updated and refactored, introducing support for batched and async requests, and making it significantly easier to customize and use for your own..."}],"related_signals":[{"id":"8c537d36-674f-4a6d-8078-e61250d0fead","url":"https://onlylabs.fyi/signals/8c537d36-674f-4a6d-8078-e61250d0fead","source_url":"https://github.com/NousResearch/hermes-compression-eval","title":"NousResearch/hermes-compression-eval","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"nous","name":"Nous Research","category":"neolab"},"occurred_at":"2026-05-16T09:41:37+00:00","first_seen_at":"2026-06-06T01:49:37.982614+00:00","date_source":"source"},{"id":"277c6ce4-3bfd-4913-bea0-5bf8be0cfcc9","url":"https://onlylabs.fyi/signals/277c6ce4-3bfd-4913-bea0-5bf8be0cfcc9","source_url":"https://github.com/NousResearch/hermes-example-plugins","title":"NousResearch/hermes-example-plugins","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"nous","name":"Nous Research","category":"neolab"},"occurred_at":"2026-05-10T13:29:46+00:00","first_seen_at":"2026-06-06T01:49:37.982614+00:00","date_source":"source"},{"id":"f80092ae-2b05-4d8e-820d-acf5003f94b7","url":"https://onlylabs.fyi/signals/f80092ae-2b05-4d8e-820d-acf5003f94b7","source_url":"https://github.com/NousResearch/kanban-video-pipeline","title":"NousResearch/kanban-video-pipeline","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"nous","name":"Nous Research","category":"neolab"},"occurred_at":"2026-05-02T17:01:19+00:00","first_seen_at":"2026-06-06T01:49:37.982614+00:00","date_source":"source"},{"id":"57c59b7f-0edf-4b01-9a60-1d76004341e9","url":"https://onlylabs.fyi/signals/57c59b7f-0edf-4b01-9a60-1d76004341e9","source_url":"https://github.com/NousResearch/tinker-nemogym","title":"NousResearch/tinker-nemogym","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"nous","name":"Nous Research","category":"neolab"},"occurred_at":"2026-04-21T15:20:07+00:00","first_seen_at":"2026-06-06T01:49:37.982614+00:00","date_source":"source"},{"id":"2c1f03a4-776d-47a8-baed-3b9b868b7ff1","url":"https://onlylabs.fyi/signals/2c1f03a4-776d-47a8-baed-3b9b868b7ff1","source_url":"https://github.com/NousResearch/autoreason","title":"NousResearch/autoreason","context":"TeX","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"nous","name":"Nous Research","category":"neolab"},"occurred_at":"2026-03-28T19:34:36+00:00","first_seen_at":"2026-06-06T01:49:37.982614+00:00","date_source":"source"},{"id":"b5f1430b-5005-498c-99c4-4ae7d7a896fa","url":"https://onlylabs.fyi/signals/b5f1430b-5005-498c-99c4-4ae7d7a896fa","source_url":"https://github.com/NousResearch/yhack","title":"NousResearch/yhack","context":"HTML","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"nous","name":"Nous Research","category":"neolab"},"occurred_at":"2026-03-28T19:03:58+00:00","first_seen_at":"2026-06-06T01:49:37.982614+00:00","date_source":"source"}]}