{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreieyze7eoa6nwyi5pe2g7hieljnppzfvhshtcdjkweqcpqqmi7hgxa",
    "uri": "at://did:plc:ymc4gnswuis3jispcsmozvs2/app.bsky.feed.post/3mj34rjwypkw2"
  },
  "coverImage": {
    "$type": "blob",
    "ref": {
      "$link": "bafkreia2xyrdi5pn6xmbjnljibwr4he2ajvycyc3k2lszlfqnwk56ykx34"
    },
    "mimeType": "image/png",
    "size": 179322
  },
  "path": "/2026/04/09/turboquant-reducing-llm-memory-usage-with-vector-quantization/",
  "publishedAt": "2026-04-09T14:00:43.000Z",
  "site": "https://hackaday.com",
  "tags": [
    "Artificial Intelligence",
    "Featured",
    "large language model",
    "quantization error",
    "vector quantization",
    "…read more"
  ],
  "textContent": "Large language models (LLMs) aren’t actually giant computer brains. Instead, they are effectively massive vector spaces in which the probabilities of tokens occurring in a specific order is encoded. Billions …read more",
  "title": "TurboQuant: Reducing LLM Memory Usage With Vector Quantization"
}