{"data":{"slug":"paper-chunker","term":"Paper Chunker","bucket":"data","definition":"Splits paper full text into overlapping semantic chunks for embedding, retrieval, and citation-anchored answers.","short_definition":"Splits paper full text into overlapping semantic chunks for embedding, retrieval, and citation-anchored answers.","long_definition":"Paper Chunker is the upstream of every retrieval-augmented surface. It splits the full text of an exhaustively-extracted paper into overlapping semantic chunks, each carrying a section header, a page anchor, and the embedding model version. Signal Canvas and the Research Kernel retrieve from the chunker output so every cited claim resolves to a chunk in the original paper.","related_terms":["tier-exhaustive","research-kernel","node-embedding"],"related_term_routes":[{"slug":"tier-exhaustive","term":"Tier 3 Exhaustive Extraction","route":"/resources/glossary/tier-exhaustive"},{"slug":"research-kernel","term":"Research Kernel","route":"/resources/glossary/research-kernel"},{"slug":"node-embedding","term":"Node Embedding","route":"/resources/glossary/node-embedding"}],"canonical_route":"/resources/glossary/paper-chunker","api_route":"/api/v1/resources/glossary/paper-chunker","jsonld_id":"https://sciencetostartup.com/resources/glossary/paper-chunker","variants":[],"tldr":"Splits paper full text into overlapping semantic chunks for embedding, retrieval, and citation-anchored answers.","key_points":[],"quality_tier":null,"citation_count":null,"source_state":"curated_static","source_module":"apps/web/data/glossary/terms.ts","definition_sections":{"schema_version":1,"intro":"Paper Chunker is the upstream of every retrieval-augmented surface. It splits the full text of an exhaustively-extracted paper into overlapping semantic chunks, each carrying a section header, a page anchor, and the embedding model version. Signal Canvas and the Research Kernel retrieve from the chunker output so every cited claim resolves to a chunk in the original paper.","sections":[{"title":"Definition","items":[{"subtitle":"Paper Chunker","text":"Paper Chunker is the upstream of every retrieval-augmented surface. It splits the full text of an exhaustively-extracted paper into overlapping semantic chunks, each carrying a section header, a page anchor, and the embedding model version. Signal Canvas and the Research Kernel retrieve from the chunker output so every cited claim resolves to a chunk in the original paper."}]},{"title":"Related vocabulary","items":[{"subtitle":"Tier 3 Exhaustive Extraction","text":"The deepest extraction tier: full text, tables, formulas, citation graph. Reserved for top-quartile viability papers."},{"subtitle":"Research Kernel","text":"The backend orchestrator behind Signal Canvas. Plans queries, retrieves sources, compiles answers, and emits SSE streams."},{"subtitle":"Node Embedding","text":"Graph-structural embeddings for corpus entities — methods, datasets, benchmarks, labs. Used for similarity and clustering."}]}],"cited_arxiv_ids":[]}},"meta":{"canonical_route":"/resources/glossary/paper-chunker","api_route":"/api/v1/resources/glossary/paper-chunker","source":{"label":"curated glossary catalog","source_state":"curated_static","source_module":"apps/web/data/glossary/terms.ts","method_version":"public_glossary_curated_terms_v2","freshness":{"status":"versioned","observed_at":null,"fresh_until":null,"reason":"Git-versioned curated catalog; daily ingestion freshness windows do not apply.","reason_code":"git_versioned_curated_catalog"},"source_count":111,"bucket_count":7,"buckets":["scoring","surfaces","agents","distribution","data","foresight","buildability"]}}}