DocumentParsedevent type
Document parse pipeline completed (PDF/DOCX/XLSX/PPTX/text), producing chunked content ready for embedding.
Full spec
{
"name": "DocumentParsed",
"description": "Document parse pipeline completed (PDF/DOCX/XLSX/PPTX/text), producing chunked content ready for embedding.",
"current_version": 1,
"actor_type_allowed": [
"system"
],
"object_type": "Document",
"payload_versions": {
"v1": {
"type": "object",
"additionalProperties": false,
"required": [
"document_id",
"parser",
"chunk_count",
"parsed_at"
],
"properties": {
"document_id": {
"type": "string",
"format": "uuid"
},
"parser": {
"type": "string",
"minLength": 1,
"description": "Parser identifier (e.g., 'pdf-text-layer', 'mammoth-docx', 'xlsx-sheet-aware')."
},
"parser_version": {
"type": "string"
},
"chunk_count": {
"type": "integer",
"minimum": 0
},
"structural_outline": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "Preserved structural headers/section labels for retrieval grounding."
},
"parsed_at": {
"type": "string",
"format": "date-time"
}
}
}
},
"projections_consuming": [
"documents_view",
"audit_log"
],
"canonicalizer_module": "event-canonicalizers/document_parsed.ts"
}