{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreifgwqugrrbkk3lddz2ppier7bpm2ehfss6ayc23h627v6nlijt5vi",
"uri": "at://did:plc:lk3jfj3zq4k4wxnk474axylu/app.bsky.feed.post/3mhbjv4e2ghq2"
},
"path": "/t/special-tokens-cause-500-on-text-embedding-3-small-but-not-other-models/1377013#post_1",
"publishedAt": "2026-03-17T17:08:46.000Z",
"site": "https://community.openai.com",
"textContent": "It appears that text-embedding-3-small has recently (at least as of March 16) stopped working for text containing special tokens:\n\nTime zone: PDT\n\n\n \"\"\"Minimal repro: causes 500 on text-embedding-3-small but not other models.\"\"\"\n\n import asyncio\n\n import openai\n from dotenv import load_dotenv\n\n TOKENS = [\n \"<|endoftext|>\",\n \"<|im_start|>\",\n \"<|im_end|>\",\n \"<|fim_prefix|>\",\n \"<|fim_middle|>\",\n \"<|fim_suffix|>\",\n \"<|endofprompt|>\",\n ]\n MODELS = [\"text-embedding-3-small\", \"text-embedding-3-large\", \"text-embedding-ada-002\"]\n\n\n async def main():\n client = openai.AsyncOpenAI()\n for token in TOKENS:\n text = f\"Here is some text.{token}Here is some more text.\"\n for model in MODELS:\n try:\n await client.embeddings.create(input=text, model=model)\n print(f\"{token:25s} {model}: SUCCESS\")\n except openai.InternalServerError as e:\n print(f\"{token:25s} {model}: 500 ERROR\")\n await client.close()\n\n\n if __name__ == \"__main__\":\n load_dotenv()\n asyncio.run(main())\n\n\nOutput:\n\n\n <|endoftext|> text-embedding-3-small: 500 ERROR\n <|endoftext|> text-embedding-3-large: SUCCESS\n <|endoftext|> text-embedding-ada-002: SUCCESS\n <|im_start|> text-embedding-3-small: 500 ERROR\n <|im_start|> text-embedding-3-large: SUCCESS\n <|im_start|> text-embedding-ada-002: SUCCESS\n <|im_end|> text-embedding-3-small: 500 ERROR\n <|im_end|> text-embedding-3-large: SUCCESS\n <|im_end|> text-embedding-ada-002: SUCCESS\n <|fim_prefix|> text-embedding-3-small: 500 ERROR\n <|fim_prefix|> text-embedding-3-large: SUCCESS\n <|fim_prefix|> text-embedding-ada-002: SUCCESS\n <|fim_middle|> text-embedding-3-small: 500 ERROR\n <|fim_middle|> text-embedding-3-large: SUCCESS\n <|fim_middle|> text-embedding-ada-002: SUCCESS\n <|fim_suffix|> text-embedding-3-small: 500 ERROR\n <|fim_suffix|> text-embedding-3-large: SUCCESS\n <|fim_suffix|> text-embedding-ada-002: SUCCESS\n <|endofprompt|> text-embedding-3-small: 500 ERROR\n <|endofprompt|> text-embedding-3-large: SUCCESS\n <|endofprompt|> text-embedding-ada-002: SUCCESS\n",
"title": "Special tokens cause 500 on text-embedding-3-small but not other models"
}