Raw Record Source

{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreiamal7kn5agswcudjd545aru3ekqrkskmheqdnrjt3tka3xcnhzdy",
    "uri": "at://did:plc:nfto3lv2rcs5s7h7digotzlu/app.bsky.feed.post/3mljirh6r4ma2"
  },
  "coverImage": {
    "$type": "blob",
    "ref": {
      "$link": "bafkreig7saeenw6c3prrms24e2bpcmq7mskq26az4ajg73ufx33cf4gf2a"
    },
    "mimeType": "image/png",
    "size": 32236
  },
  "path": "/packages/kreuzberg",
  "publishedAt": "2026-05-10T19:31:55.922Z",
  "site": "https://pub.dev",
  "textContent": "High-performance document intelligence library — extract text, metadata, tables from 97+ formats including PDF, DOCX, images, and email. Changelog excerpt: - Initial release candidate - Document extraction (text, metadata, tables) from 97+ formats - OCR via Tesseract, PaddleOCR, VLM backends - HTML-to-Markdown conversion - PDF rendering - Code intelligence via tree-sitter (248 languages) - MIME type detection (118+ extensions) - LLM-powered structured extraction - Batch document processing - Embeddings generation via ONNX Runtime",
  "title": "v1.0.0 of kreuzberg",
  "updatedAt": "2026-05-10T18:51:58.098Z"
}