{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreieyze7eoa6nwyi5pe2g7hieljnppzfvhshtcdjkweqcpqqmi7hgxa",
"uri": "at://did:plc:ymc4gnswuis3jispcsmozvs2/app.bsky.feed.post/3mj34rjwypkw2"
},
"coverImage": {
"$type": "blob",
"ref": {
"$link": "bafkreia2xyrdi5pn6xmbjnljibwr4he2ajvycyc3k2lszlfqnwk56ykx34"
},
"mimeType": "image/png",
"size": 179322
},
"path": "/2026/04/09/turboquant-reducing-llm-memory-usage-with-vector-quantization/",
"publishedAt": "2026-04-09T14:00:43.000Z",
"site": "https://hackaday.com",
"tags": [
"Artificial Intelligence",
"Featured",
"large language model",
"quantization error",
"vector quantization",
"…read more"
],
"textContent": "Large language models (LLMs) aren’t actually giant computer brains. Instead, they are effectively massive vector spaces in which the probabilities of tokens occurring in a specific order is encoded. Billions …read more",
"title": "TurboQuant: Reducing LLM Memory Usage With Vector Quantization"
}