{"schema_version":"onlylabs.public_signal.v1","title":"Clarifai Repo: Clarifai/clarifai-pyspark","description":"Clarifai repo signal with public source context, captured evidence pages, related signals, and category-scoped analysis context.","url":"https://onlylabs.fyi/signals/ae6c22cd-3db8-4ca8-a377-865e1ad42c65","json_url":"https://onlylabs.fyi/signals/ae6c22cd-3db8-4ca8-a377-865e1ad42c65/signal.json","generated_at":"2026-06-11T04:09:14.538376+00:00","org":{"slug":"clarifai","name":"Clarifai","category":"neocloud","category_label":"Neocloud","dossier_url":"https://onlylabs.fyi/labs/clarifai","dossier_json_url":"https://onlylabs.fyi/labs/clarifai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/ae6c22cd-3db8-4ca8-a377-865e1ad42c65","signal_json":"https://onlylabs.fyi/signals/ae6c22cd-3db8-4ca8-a377-865e1ad42c65/signal.json","source":"https://github.com/Clarifai/clarifai-pyspark","lab_dossier":"https://onlylabs.fyi/labs/clarifai","lab_dossier_json":"https://onlylabs.fyi/labs/clarifai/dossier.json","analysis":"https://onlylabs.fyi/analysis/clarifai","analysis_json":"https://onlylabs.fyi/analysis/clarifai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/clarifai/evidence.json","category":"https://onlylabs.fyi/neoclouds","category_json":"https://onlylabs.fyi/neoclouds.json","category_feed":"https://onlylabs.fyi/neoclouds/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json?category=neocloud","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":null},"answer_pack":{"answer":"Clarifai published Clarifai/clarifai-pyspark (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo Clarifai/clarifai-pyspark · language Python. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","signal_desk":"repos","source_context":{"source_url":"https://github.com/Clarifai/clarifai-pyspark","source_host":"github.com","occurred_at":"2023-10-16T13:00:01+00:00","first_seen_at":"2026-06-05T22:32:15.505408+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"Clarifai","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"Clarifai/clarifai-pyspark","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Stars","value":"7","source":"traction"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/Clarifai/clarifai-pyspark"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:09:14.538376+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/ae6c22cd-3db8-4ca8-a377-865e1ad42c65/signal.json","dossier_json":"https://onlylabs.fyi/labs/clarifai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/clarifai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/clarifai/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json?category=neocloud","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","evidence_url"],"data_business_relevance":"Data-business lane extraction is scoped to frontier labs; for this category, interpret the repository as source-grounded category strategy evidence.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/ae6c22cd-3db8-4ca8-a377-865e1ad42c65/signal.json","required":true},{"label":"source","url":"https://github.com/Clarifai/clarifai-pyspark","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/clarifai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/clarifai/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Clarifai's repo signal \"Clarifai/clarifai-pyspark\" for neocloud strategy."},"semantic_triples":[{"subject":"Clarifai","predicate":"published repo","object":"Clarifai/clarifai-pyspark","text":"Clarifai published repo Clarifai/clarifai-pyspark."},{"subject":"Clarifai/clarifai-pyspark","predicate":"is classified as","object":"repo signal","text":"Clarifai/clarifai-pyspark is classified as repo signal."},{"subject":"Clarifai/clarifai-pyspark","predicate":"belongs to","object":"repos desk","text":"Clarifai/clarifai-pyspark belongs to repos desk."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has context","object":"Python","text":"Clarifai/clarifai-pyspark has context Python."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Clarifai/clarifai-pyspark has evidence coverage 1 captured evidence page."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has captured page count","object":"1","text":"Clarifai/clarifai-pyspark has captured page count 1."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has readable page count","object":"1","text":"Clarifai/clarifai-pyspark has readable page count 1."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has related signal count","object":"6","text":"Clarifai/clarifai-pyspark has related signal count 6."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"Clarifai/clarifai-pyspark has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has source host","object":"github.com","text":"Clarifai/clarifai-pyspark has source host github.com."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has lab","object":"Clarifai","text":"Clarifai/clarifai-pyspark has lab Clarifai."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has signal desk","object":"repos","text":"Clarifai/clarifai-pyspark has signal desk repos."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has source host","object":"github.com","text":"Clarifai/clarifai-pyspark has source host github.com."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has repository","object":"Clarifai/clarifai-pyspark","text":"Clarifai/clarifai-pyspark has repository Clarifai/clarifai-pyspark."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has language","object":"Python","text":"Clarifai/clarifai-pyspark has language Python."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has stars","object":"7","text":"Clarifai/clarifai-pyspark has stars 7."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has watch term","object":"Data pipeline","text":"Clarifai/clarifai-pyspark has watch term Data pipeline."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has watch term","object":"Infrastructure","text":"Clarifai/clarifai-pyspark has watch term Infrastructure."}]},"intelligence":{"signal_desk":"repos","answer":"Clarifai published Clarifai/clarifai-pyspark (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo Clarifai/clarifai-pyspark · language Python. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","semantic_triples":[{"subject":"Clarifai","predicate":"published repo","object":"Clarifai/clarifai-pyspark","text":"Clarifai published repo Clarifai/clarifai-pyspark."},{"subject":"Clarifai/clarifai-pyspark","predicate":"is classified as","object":"repo signal","text":"Clarifai/clarifai-pyspark is classified as repo signal."},{"subject":"Clarifai/clarifai-pyspark","predicate":"belongs to","object":"repos desk","text":"Clarifai/clarifai-pyspark belongs to repos desk."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has context","object":"Python","text":"Clarifai/clarifai-pyspark has context Python."},{"subject":"Clarifai/clarifai-pyspark","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Clarifai/clarifai-pyspark has evidence coverage 1 captured evidence page."}]},"signal":{"id":"ae6c22cd-3db8-4ca8-a377-865e1ad42c65","url":"https://onlylabs.fyi/signals/ae6c22cd-3db8-4ca8-a377-865e1ad42c65","json_url":"https://onlylabs.fyi/signals/ae6c22cd-3db8-4ca8-a377-865e1ad42c65/signal.json","source_url":"https://github.com/Clarifai/clarifai-pyspark","title":"Clarifai/clarifai-pyspark","summary":"Clarifai published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"clarifai","name":"Clarifai","category":"neocloud"},"occurred_at":"2023-10-16T13:00:01+00:00","first_seen_at":"2026-06-05T22:32:15.505408+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/Clarifai/clarifai-pyspark"]},"facets":{"repo":"Clarifai/clarifai-pyspark","language":"Python"},"traction":{"github_stars":7,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"url":"https://github.com/Clarifai/clarifai-pyspark","final_url":"https://github.com/Clarifai/clarifai-pyspark","title":"Clarifai/clarifai-pyspark repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T04:09:14.538376+00:00","bytes":11622,"raw_path":"404f19b9802adc6c67f60551d5bf7d9a785caa737a2b04a0686b885a0813cd3b.json","content_hash":"9d02e2e828c431f63caef40b75b3a8bf7aa162a1c23016fd0bcc18786c2e5d32","excerpt_chars":1200,"truncated":true,"excerpt":"Clarifai/clarifai-pyspark Description: Interfaces for Unstructured data and ML pipelines with Databricks and Clarifai Language: Python License: Apache-2.0 Stars: 7 Forks: 2 Open issues: 2 Created: 2023-10-16T13:00:01Z Pushed: 2024-11-11T13:19:56Z Default branch: main Fork: no Archived: no README: ClarifaiPySpark Introduction This readme provides overview of the Software Development Kit (SDK) under development for integrating Clarifai with Databricks. The primary use case for this SDK is to facilitate the interaction between Databricks and Clarifai for tasks related to uploading client datasets, annotating data, and exporting and storing annotations in Spark DataFrames or Delta tables. ![Screenshot 2023-11-17 at 5 21 04 PM](https://github.com/Clarifai/clarifai-pyspark/assets/143642606/7b6bfc6a-19b9-48d7-8013-24e79fc5aacf) The initial use case for this SDK revolves around three main objectives: Uploading Client Datasets into Clarifai App: The SDK should enable the seamless upload of datasets into the Clarifai application, simplifying the process of data transfer from Databricks to Clarifai. Annotate the Data: It should provide features for data annotation, making it easier for users..."},"evidence_pages":[{"url":"https://github.com/Clarifai/clarifai-pyspark","final_url":"https://github.com/Clarifai/clarifai-pyspark","title":"Clarifai/clarifai-pyspark repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T04:09:14.538376+00:00","bytes":11622,"raw_path":"404f19b9802adc6c67f60551d5bf7d9a785caa737a2b04a0686b885a0813cd3b.json","content_hash":"9d02e2e828c431f63caef40b75b3a8bf7aa162a1c23016fd0bcc18786c2e5d32","excerpt_chars":1200,"truncated":true,"excerpt":"Clarifai/clarifai-pyspark Description: Interfaces for Unstructured data and ML pipelines with Databricks and Clarifai Language: Python License: Apache-2.0 Stars: 7 Forks: 2 Open issues: 2 Created: 2023-10-16T13:00:01Z Pushed: 2024-11-11T13:19:56Z Default branch: main Fork: no Archived: no README: ClarifaiPySpark Introduction This readme provides overview of the Software Development Kit (SDK) under development for integrating Clarifai with Databricks. The primary use case for this SDK is to facilitate the interaction between Databricks and Clarifai for tasks related to uploading client datasets, annotating data, and exporting and storing annotations in Spark DataFrames or Delta tables. ![Screenshot 2023-11-17 at 5 21 04 PM](https://github.com/Clarifai/clarifai-pyspark/assets/143642606/7b6bfc6a-19b9-48d7-8013-24e79fc5aacf) The initial use case for this SDK revolves around three main objectives: Uploading Client Datasets into Clarifai App: The SDK should enable the seamless upload of datasets into the Clarifai application, simplifying the process of data transfer from Databricks to Clarifai. Annotate the Data: It should provide features for data annotation, making it easier for users..."}],"related_signals":[{"id":"f59750d7-149f-4d5e-a7da-d34882a27500","url":"https://onlylabs.fyi/signals/f59750d7-149f-4d5e-a7da-d34882a27500","source_url":"https://github.com/Clarifai/skills","title":"Clarifai/skills","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"clarifai","name":"Clarifai","category":"neocloud"},"occurred_at":"2026-03-26T20:46:06+00:00","first_seen_at":"2026-06-05T22:32:15.505408+00:00","date_source":"source"},{"id":"c4ee3dd1-14cf-4aae-afa0-16d3877c0379","url":"https://onlylabs.fyi/signals/c4ee3dd1-14cf-4aae-afa0-16d3877c0379","source_url":"https://github.com/Clarifai/pipeline-examples","title":"Clarifai/pipeline-examples","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"clarifai","name":"Clarifai","category":"neocloud"},"occurred_at":"2026-01-15T15:46:39+00:00","first_seen_at":"2026-06-05T22:32:15.505408+00:00","date_source":"source"},{"id":"2f25f002-0ddb-4005-81e7-8c13a4a65733","url":"https://onlylabs.fyi/signals/2f25f002-0ddb-4005-81e7-8c13a4a65733","source_url":"https://github.com/Clarifai/pipeline-engine","title":"Clarifai/pipeline-engine","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"clarifai","name":"Clarifai","category":"neocloud"},"occurred_at":"2025-07-15T19:15:16+00:00","first_seen_at":"2026-06-05T22:32:15.505408+00:00","date_source":"source"},{"id":"c7b16a2b-418d-4ef5-8c03-a52084485fca","url":"https://onlylabs.fyi/signals/c7b16a2b-418d-4ef5-8c03-a52084485fca","source_url":"https://github.com/Clarifai/langchain_clarifai","title":"Clarifai/langchain_clarifai","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"clarifai","name":"Clarifai","category":"neocloud"},"occurred_at":"2025-06-24T13:59:27+00:00","first_seen_at":"2026-06-05T22:32:15.505408+00:00","date_source":"source"},{"id":"f9f513a1-a61b-44d1-8828-1311f67619cd","url":"https://onlylabs.fyi/signals/f9f513a1-a61b-44d1-8828-1311f67619cd","source_url":"https://github.com/Clarifai/clarifai-mcp-server-local","title":"Clarifai/clarifai-mcp-server-local","context":"Go","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"clarifai","name":"Clarifai","category":"neocloud"},"occurred_at":"2025-04-08T20:26:44+00:00","first_seen_at":"2026-06-05T22:32:15.505408+00:00","date_source":"source"},{"id":"58de4659-c6cd-4eca-b172-d4931bca73b4","url":"https://onlylabs.fyi/signals/58de4659-c6cd-4eca-b172-d4931bca73b4","source_url":"https://github.com/Clarifai/runners-examples","title":"Clarifai/runners-examples","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"clarifai","name":"Clarifai","category":"neocloud"},"occurred_at":"2025-03-28T14:53:32+00:00","first_seen_at":"2026-06-05T22:32:15.505408+00:00","date_source":"source"}]}