CPU offloading error scenario
If I add the device map to the peft I get below error.
if isinstance(base_model, PeftModel): base_model = base_model.merge_and_unload() model = PeftModel.from_pretrained( base_model, lora_path, adapter_name=lora_source_client_name, device_map=device_map, is_trainable=False )
Error:
2026-04-24 13:38:40,528 | Worker (7392) | ERROR | Worker error: 22 Traceback (most recent call last): File “E:\Folder\inference_worker.py”, line 511, in inference_worker_loop outputs = model.generate( ^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\peft\peft_model.py”, line 2122, in generate outputs = self.base_model.generate(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\utils_contextlib.py”, line 120, in decorate_context return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\generation\utils.py”, line 2543, in generate result = decoding_method( ^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\generation\utils.py”, line 2736, in _sample outputs = self._prefill( ^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\generation\utils.py”, line 3768, in _prefill return self(**model_inputs, return_dict=True) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\nn\modules\module.py”, line 1773, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\nn\modules\module.py”, line 1784, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\accelerate\hooks.py”, line 192, in new_forward output = module._old_forward(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\utils\generic.py”, line 887, in wrapper output = func(self, *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\models\gemma4\modeling_gemma4.py”, line 2516, in forward outputs = self.model( ^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\nn\modules\module.py”, line 1773, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\nn\modules\module.py”, line 1784, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\accelerate\hooks.py”, line 192, in new_forward output = module._old_forward(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\utils\generic.py”, line 963, in wrapper output = func(self, *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\utils\generic.py”, line 887, in wrapper output = func(self, *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\models\gemma4\modeling_gemma4.py”, line 2374, in forward outputs = self.language_model( ^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\nn\modules\module.py”, line 1773, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\nn\modules\module.py”, line 1784, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\accelerate\hooks.py”, line 192, in new_forward output = module._old_forward(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\utils\generic.py”, line 963, in wrapper output = func(self, *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\utils\output_capturing.py”, line 248, in wrapper outputs = func(self, *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\models\gemma4\modeling_gemma4.py”, line 1675, in forward hidden_states = decoder_layer( ^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\modeling_layers.py”, line 93, in call return super().call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\nn\modules\module.py”, line 1773, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\nn\modules\module.py”, line 1784, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\accelerate\hooks.py”, line 192, in new_forward output = module._old_forward(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\models\gemma4\modeling_gemma4.py”, line 1379, in forward hidden_states, _ = self.self_attn( ^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\nn\modules\module.py”, line 1773, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\torch\nn\modules\module.py”, line 1784, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\accelerate\hooks.py”, line 192, in new_forward output = module._old_forward(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File “E:\Folder\gemma_env\Lib\site-packages\transformers\models\gemma4\modeling_gemma4.py”, line 1219, in forward key_states, value_states = shared_kv_states[self.kv_shared_layer_index]
KeyError: 22 2026-04-24 13:38:40,553 | Worker (1696) | ERROR | Worker returned error: Worker error: 22
Discussion in the ATmosphere