Raw Record Source

{
  "$type": "site.standard.document",
  "contributors": [
    {
      "did": "did:plc:igunvse2uemkwmci3igoxhu5",
      "displayName": "Oz Akan",
      "role": "author"
    }
  ],
  "coverImage": {
    "$type": "blob",
    "ref": {
      "$link": "bafkreihm3tpdhvn6zgr4bfnwgl26umoynd3bh63unclkeeofcvx6s7ajba"
    },
    "mimeType": "image/png",
    "size": 57435
  },
  "description": "How language models convert token IDs into meaningful vector representations that capture semantic relationships.",
  "path": "/techs/09-token-to-embedding",
  "publishedAt": "2025-07-31T21:00:00.000Z",
  "site": "at://did:plc:igunvse2uemkwmci3igoxhu5/site.standard.publication/luminary-blog",
  "tags": [
    "aiml",
    "llm",
    "embeddings",
    "tokens"
  ],
  "textContent": "We know LLMs don't understand words, so we need to convert words into tokens first and then tokens to embeddings. Tokens are integers and they don't map to words directly. Embeddings are N-dimensional vectors. How do one map to another? It seems confusing.",
  "title": "Words, Tokens and Embeddings"
}