{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreib4alueembz3hvkniz32jtcuhfkocx7p3ej3kewvyo5owyqyyavme",
"uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3mkahmpakjat2"
},
"path": "/t/cpu-offloading-error-scenario/175522#post_2",
"publishedAt": "2026-04-24T11:44:31.000Z",
"site": "https://discuss.huggingface.co",
"textContent": "If I add the device map to the peft I get below error.\n\n>\n> if isinstance(base_model, PeftModel):\n> base_model = base_model.merge_and_unload()\n>\n> model = PeftModel.from_pretrained(\n> base_model,\n> lora_path,\n> adapter_name=lora_source_client_name,\n> device_map=device_map,\n> is_trainable=False\n> )\n>\n\nError:\n\n> 2026-04-24 13:38:40,528 | Worker (7392) | ERROR | Worker error: 22\n> Traceback (most recent call last):\n> File “E:\\Folder\\inference_worker.py”, line 511, in inference_worker_loop\n> outputs = model.generate(\n> ^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\peft\\peft_model.py”, line 2122, in generate\n> outputs = self.base_model.generate(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\utils_contextlib.py”, line 120, in decorate_context\n> return func(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\generation\\utils.py”, line 2543, in generate\n> result = decoding_method(\n> ^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\generation\\utils.py”, line 2736, in _sample\n> outputs = self._prefill(\n> ^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\generation\\utils.py”, line 3768, in _prefill\n> return self(**model_inputs, return_dict=True)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1773, in _wrapped_call_impl\n> return self._call_impl(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1784, in _call_impl\n> return forward_call(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\accelerate\\hooks.py”, line 192, in new_forward\n> output = module._old_forward(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\utils\\generic.py”, line 887, in wrapper\n> output = func(self, *args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\models\\gemma4\\modeling_gemma4.py”, line 2516, in forward\n> outputs = self.model(\n> ^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1773, in _wrapped_call_impl\n> return self._call_impl(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1784, in _call_impl\n> return forward_call(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\accelerate\\hooks.py”, line 192, in new_forward\n> output = module._old_forward(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\utils\\generic.py”, line 963, in wrapper\n> output = func(self, *args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\utils\\generic.py”, line 887, in wrapper\n> output = func(self, *args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\models\\gemma4\\modeling_gemma4.py”, line 2374, in forward\n> outputs = self.language_model(\n> ^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1773, in _wrapped_call_impl\n> return self._call_impl(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1784, in _call_impl\n> return forward_call(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\accelerate\\hooks.py”, line 192, in new_forward\n> output = module._old_forward(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\utils\\generic.py”, line 963, in wrapper\n> output = func(self, *args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\utils\\output_capturing.py”, line 248, in wrapper\n> outputs = func(self, *args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\models\\gemma4\\modeling_gemma4.py”, line 1675, in forward\n> hidden_states = decoder_layer(\n> ^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\modeling_layers.py”, line 93, in **call**\n> return super().**call**(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1773, in _wrapped_call_impl\n> return self._call_impl(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1784, in _call_impl\n> return forward_call(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\accelerate\\hooks.py”, line 192, in new_forward\n> output = module._old_forward(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\models\\gemma4\\modeling_gemma4.py”, line 1379, in forward\n> hidden_states, _ = self.self_attn(\n> ^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1773, in _wrapped_call_impl\n> return self._call_impl(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1784, in _call_impl\n> return forward_call(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\accelerate\\hooks.py”, line 192, in new_forward\n> output = module._old_forward(*args, **kwargs)\n> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\models\\gemma4\\modeling_gemma4.py”, line 1219, in forward\n> key_states, value_states = shared_kv_states[self.kv_shared_layer_index]\n> ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n> KeyError: 22\n> 2026-04-24 13:38:40,553 | Worker (1696) | ERROR | Worker returned error: Worker error: 22",
"title": "CPU offloading error scenario"
}