{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreib4alueembz3hvkniz32jtcuhfkocx7p3ej3kewvyo5owyqyyavme",
    "uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3mkahmpakjat2"
  },
  "path": "/t/cpu-offloading-error-scenario/175522#post_2",
  "publishedAt": "2026-04-24T11:44:31.000Z",
  "site": "https://discuss.huggingface.co",
  "textContent": "If I add the device map to the peft I get below error.\n\n>\n>     if isinstance(base_model, PeftModel):\n>         base_model = base_model.merge_and_unload()\n>\n>     model = PeftModel.from_pretrained(\n>         base_model,\n>         lora_path,\n>         adapter_name=lora_source_client_name,\n>         device_map=device_map,\n>         is_trainable=False\n>     )\n>\n\nError:\n\n> 2026-04-24 13:38:40,528 | Worker (7392) | ERROR | Worker error: 22\n>  Traceback (most recent call last):\n>  File “E:\\Folder\\inference_worker.py”, line 511, in inference_worker_loop\n>  outputs = model.generate(\n>  ^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\peft\\peft_model.py”, line 2122, in generate\n>  outputs = self.base_model.generate(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\utils_contextlib.py”, line 120, in decorate_context\n>  return func(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\generation\\utils.py”, line 2543, in generate\n>  result = decoding_method(\n>  ^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\generation\\utils.py”, line 2736, in _sample\n>  outputs = self._prefill(\n>  ^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\generation\\utils.py”, line 3768, in _prefill\n>  return self(**model_inputs, return_dict=True)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1773, in _wrapped_call_impl\n>  return self._call_impl(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1784, in _call_impl\n>  return forward_call(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\accelerate\\hooks.py”, line 192, in new_forward\n>  output = module._old_forward(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\utils\\generic.py”, line 887, in wrapper\n>  output = func(self, *args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\models\\gemma4\\modeling_gemma4.py”, line 2516, in forward\n>  outputs = self.model(\n>  ^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1773, in _wrapped_call_impl\n>  return self._call_impl(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1784, in _call_impl\n>  return forward_call(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\accelerate\\hooks.py”, line 192, in new_forward\n>  output = module._old_forward(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\utils\\generic.py”, line 963, in wrapper\n>  output = func(self, *args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\utils\\generic.py”, line 887, in wrapper\n>  output = func(self, *args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\models\\gemma4\\modeling_gemma4.py”, line 2374, in forward\n>  outputs = self.language_model(\n>  ^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1773, in _wrapped_call_impl\n>  return self._call_impl(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1784, in _call_impl\n>  return forward_call(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\accelerate\\hooks.py”, line 192, in new_forward\n>  output = module._old_forward(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\utils\\generic.py”, line 963, in wrapper\n>  output = func(self, *args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\utils\\output_capturing.py”, line 248, in wrapper\n>  outputs = func(self, *args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\models\\gemma4\\modeling_gemma4.py”, line 1675, in forward\n>  hidden_states = decoder_layer(\n>  ^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\modeling_layers.py”, line 93, in **call**\n>  return super().**call**(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1773, in _wrapped_call_impl\n>  return self._call_impl(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1784, in _call_impl\n>  return forward_call(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\accelerate\\hooks.py”, line 192, in new_forward\n>  output = module._old_forward(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\models\\gemma4\\modeling_gemma4.py”, line 1379, in forward\n>  hidden_states, _ = self.self_attn(\n>  ^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1773, in _wrapped_call_impl\n>  return self._call_impl(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\torch\\nn\\modules\\module.py”, line 1784, in _call_impl\n>  return forward_call(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\accelerate\\hooks.py”, line 192, in new_forward\n>  output = module._old_forward(*args, **kwargs)\n>  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  File “E:\\Folder\\gemma_env\\Lib\\site-packages\\transformers\\models\\gemma4\\modeling_gemma4.py”, line 1219, in forward\n>  key_states, value_states = shared_kv_states[self.kv_shared_layer_index]\n>  ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n>  KeyError: 22\n>  2026-04-24 13:38:40,553 | Worker (1696) | ERROR | Worker returned error: Worker error: 22",
  "title": "CPU offloading error scenario"
}