{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreifgwqugrrbkk3lddz2ppier7bpm2ehfss6ayc23h627v6nlijt5vi",
    "uri": "at://did:plc:lk3jfj3zq4k4wxnk474axylu/app.bsky.feed.post/3mhbjv4e2ghq2"
  },
  "path": "/t/special-tokens-cause-500-on-text-embedding-3-small-but-not-other-models/1377013#post_1",
  "publishedAt": "2026-03-17T17:08:46.000Z",
  "site": "https://community.openai.com",
  "textContent": "It appears that text-embedding-3-small has recently (at least as of March 16) stopped working for text containing special tokens:\n\nTime zone: PDT\n\n\n    \"\"\"Minimal repro: causes 500 on text-embedding-3-small but not other models.\"\"\"\n\n    import asyncio\n\n    import openai\n    from dotenv import load_dotenv\n\n    TOKENS = [\n        \"<|endoftext|>\",\n        \"<|im_start|>\",\n        \"<|im_end|>\",\n        \"<|fim_prefix|>\",\n        \"<|fim_middle|>\",\n        \"<|fim_suffix|>\",\n        \"<|endofprompt|>\",\n    ]\n    MODELS = [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]\n\n\n    async def main():\n        client = openai.AsyncOpenAI()\n        for token in TOKENS:\n            text = f\"Here is some text.{token}Here is some more text.\"\n            for model in MODELS:\n                try:\n                    await client.embeddings.create(input=text, model=model)\n                    print(f\"{token:25s} {model}: SUCCESS\")\n                except openai.InternalServerError as e:\n                    print(f\"{token:25s} {model}: 500 ERROR\")\n        await client.close()\n\n\n    if __name__ == \"__main__\":\n        load_dotenv()\n        asyncio.run(main())\n\n\nOutput:\n\n\n    <|endoftext|>             text-embedding-3-small: 500 ERROR\n    <|endoftext|>             text-embedding-3-large: SUCCESS\n    <|endoftext|>             text-embedding-ada-002: SUCCESS\n    <|im_start|>              text-embedding-3-small: 500 ERROR\n    <|im_start|>              text-embedding-3-large: SUCCESS\n    <|im_start|>              text-embedding-ada-002: SUCCESS\n    <|im_end|>                text-embedding-3-small: 500 ERROR\n    <|im_end|>                text-embedding-3-large: SUCCESS\n    <|im_end|>                text-embedding-ada-002: SUCCESS\n    <|fim_prefix|>            text-embedding-3-small: 500 ERROR\n    <|fim_prefix|>            text-embedding-3-large: SUCCESS\n    <|fim_prefix|>            text-embedding-ada-002: SUCCESS\n    <|fim_middle|>            text-embedding-3-small: 500 ERROR\n    <|fim_middle|>            text-embedding-3-large: SUCCESS\n    <|fim_middle|>            text-embedding-ada-002: SUCCESS\n    <|fim_suffix|>            text-embedding-3-small: 500 ERROR\n    <|fim_suffix|>            text-embedding-3-large: SUCCESS\n    <|fim_suffix|>            text-embedding-ada-002: SUCCESS\n    <|endofprompt|>           text-embedding-3-small: 500 ERROR\n    <|endofprompt|>           text-embedding-3-large: SUCCESS\n    <|endofprompt|>           text-embedding-ada-002: SUCCESS\n",
  "title": "Special tokens cause 500 on text-embedding-3-small but not other models"
}