Raw Record Source

{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreif7y34hcg3nqrbwygiscjng2yqum4vaczic4hi3tof5qmvwrnrbwa",
    "uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3ml3omat7jvo2"
  },
  "path": "/t/the-bpe-pre-tokenizer-was-not-recognized/175714#post_6",
  "publishedAt": "2026-05-05T07:09:20.000Z",
  "site": "https://discuss.huggingface.co",
  "tags": [
    "Qwen/Qwen3.5-4B-Base · Hugging Face"
  ],
  "textContent": "I believe the following should be added to the llama.cpp-convert_hf_to_gguf.py function:\n\ndef get_vocab_base_pre(self, tokenizer)\n\nif chkhsh == “1444df51289cfa8063b96f0e62b1125440111bc79a52003ea14b6eac7016fd5f”:\n\n# ref: Qwen/Qwen3.5-4B-Base · Hugging Face\n\nres = “qwen2”",
  "title": "The BPE pre-tokenizer was not recognized!"
}