{"data":{"slug":"corpus-engine","term":"Corpus Engine","bucket":"data","definition":"The full ingestion-enrichment-publish pipeline. Daily Modal jobs at 20:05, 20:30, 21:00 UTC.","short_definition":"The full ingestion-enrichment-publish pipeline. Daily Modal jobs at 20:05, 20:30, 21:00 UTC.","long_definition":"Corpus Engine is the offline brain. Daily at 20:05 UTC it ingests new arXiv papers; at 20:30 it enriches them with extraction, knowledge graph, build passport, scoring, and tier assignment; at 21:00 it publishes articles, outreach, media, SEO, and growth events. Everything else in the product reads from what the engine produces.","related_terms":["freshness-ledger","viability-score"],"related_term_routes":[{"slug":"freshness-ledger","term":"Freshness Ledger","route":"/resources/glossary/freshness-ledger"},{"slug":"viability-score","term":"Viability Score","route":"/resources/glossary/viability-score"}],"canonical_route":"/resources/glossary/corpus-engine","api_route":"/api/v1/resources/glossary/corpus-engine","jsonld_id":"https://sciencetostartup.com/resources/glossary/corpus-engine","variants":[],"tldr":"The full ingestion-enrichment-publish pipeline. Daily Modal jobs at 20:05, 20:30, 21:00 UTC.","key_points":[],"quality_tier":null,"citation_count":null,"source_state":"curated_static","source_module":"apps/web/data/glossary/terms.ts","definition_sections":{"schema_version":1,"intro":"Corpus Engine is the offline brain. Daily at 20:05 UTC it ingests new arXiv papers; at 20:30 it enriches them with extraction, knowledge graph, build passport, scoring, and tier assignment; at 21:00 it publishes articles, outreach, media, SEO, and growth events. Everything else in the product reads from what the engine produces.","sections":[{"title":"Definition","items":[{"subtitle":"Corpus Engine","text":"Corpus Engine is the offline brain. Daily at 20:05 UTC it ingests new arXiv papers; at 20:30 it enriches them with extraction, knowledge graph, build passport, scoring, and tier assignment; at 21:00 it publishes articles, outreach, media, SEO, and growth events. Everything else in the product reads from what the engine produces."}]},{"title":"Related vocabulary","items":[{"subtitle":"Freshness Ledger","text":"The per-category staleness record at /api/freshness.json. Three statuses per category: fresh, stale, critical."},{"subtitle":"Viability Score","text":"A 0-10 rating of an AI paper's commercial viability. Higher scores cluster with code, demo, dataset, and beats-SOTA signals."}]}],"cited_arxiv_ids":[]}},"meta":{"canonical_route":"/resources/glossary/corpus-engine","api_route":"/api/v1/resources/glossary/corpus-engine","source":{"label":"curated glossary catalog","source_state":"curated_static","source_module":"apps/web/data/glossary/terms.ts","method_version":"public_glossary_curated_terms_v2","freshness":{"status":"versioned","observed_at":null,"fresh_until":null,"reason":"Git-versioned curated catalog; daily ingestion freshness windows do not apply.","reason_code":"git_versioned_curated_catalog"},"source_count":111,"bucket_count":7,"buckets":["scoring","surfaces","agents","distribution","data","foresight","buildability"]}}}