GCC Build OSv0
/api

DocumentParsedevent type

Document parse pipeline completed (PDF/DOCX/XLSX/PPTX/text), producing chunked content ready for embedding.

Full spec

{
  "name": "DocumentParsed",
  "description": "Document parse pipeline completed (PDF/DOCX/XLSX/PPTX/text), producing chunked content ready for embedding.",
  "current_version": 1,
  "actor_type_allowed": [
    "system"
  ],
  "object_type": "Document",
  "payload_versions": {
    "v1": {
      "type": "object",
      "additionalProperties": false,
      "required": [
        "document_id",
        "parser",
        "chunk_count",
        "parsed_at"
      ],
      "properties": {
        "document_id": {
          "type": "string",
          "format": "uuid"
        },
        "parser": {
          "type": "string",
          "minLength": 1,
          "description": "Parser identifier (e.g., 'pdf-text-layer', 'mammoth-docx', 'xlsx-sheet-aware')."
        },
        "parser_version": {
          "type": "string"
        },
        "chunk_count": {
          "type": "integer",
          "minimum": 0
        },
        "structural_outline": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "default": [],
          "description": "Preserved structural headers/section labels for retrieval grounding."
        },
        "parsed_at": {
          "type": "string",
          "format": "date-time"
        }
      }
    }
  },
  "projections_consuming": [
    "documents_view",
    "audit_log"
  ],
  "canonicalizer_module": "event-canonicalizers/document_parsed.ts"
}