{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreihmz3wjku7snds2ylr3gw3y675hkrzfbd5qsac5rstbmaxzrdgcjq",
    "uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3mmre6tu2opi2"
  },
  "path": "/t/training-lora-for-ltx2-3-voice-sound-only/176239#post_1",
  "publishedAt": "2026-05-26T14:57:49.000Z",
  "site": "https://discuss.huggingface.co",
  "textContent": "Hello guys,\n\nI am kind of stuck at the moment. I am trying to train Lora for voice only through Ostris AI ToolKit - VPS - Vast RTX5090. Here is the thing, I want individually or separately train voice lora only for my character. So when I manage that, I will train a video lora with character+voice. But as I mentioned above I am stuck. I am getting multiple errors from ostris AI. I got 27 clips between 6-10 seconds all well captioned.\nThis is the error which mostly appears among others - RuntimeError: Internal error: Internal Writer Error: Background writer channel closed.\nNot even sure if my lora training settings are correct\n\nThank you for all the answers if some appears lol\n\njob: “extension”\nconfig:\n\"\nprocess:\n\n  * type: “diffusion_trainer”\ntraining_folder: “/workspace/ai-toolkit/output”\nsqlite_db_path: “./aitk_db.db”\ndevice: “cuda”\ntrigger_word: “”\nperformance_log_every: 10\nnetwork:\ntype: “lora”\nlinear: 32\nlinear_alpha: 32\nconv: 16\nconv_alpha: 16\nlokr_full_rank: true\nlokr_factor: -1\nnetwork_kwargs:\nignore_if_contains:\nsave:\ndtype: “bf16”\nsave_every: 500\nmax_step_saves_to_keep: 4\nsave_format: “diffusers”\npush_to_hub: false\ndatasets:\n  * folder_path: “/workspace/ai-toolkit/datasets/ema_voice”\nmask_path: null\nmask_min_value: 0.1\ndefault_caption: “”\ncaption_ext: “txt”\ncaption_dropout_rate: 0.05\ncache_latents_to_disk: true\nis_reg: false\nnetwork_weight: 1\nresolution:\n  * 512\ncontrols:\nshrink_video_to_frames: true\nnum_frames: 1\nflip_x: false\nflip_y: false\nnum_repeats: 1\ndo_i2v: false\ndo_audio: true\nfps: 24\nauto_frame_count: true\ntrain:\nbatch_size: 1\nbypass_guidance_embedding: false\nsteps: 5000\ngradient_accumulation: 1\ntrain_unet: true\ntrain_text_encoder: false\ngradient_checkpointing: true\nnoise_scheduler: “flowmatch”\noptimizer: “adamw8bit”\ntimestep_type: “weighted”\ncontent_or_style: “balanced”\noptimizer_params:\nweight_decay: 0.0001\nunload_text_encoder: false\ncache_text_embeddings: false\nlr: 0.0001\nema_config:\nuse_ema: false\nema_decay: 0.99\nskip_first_sample: false\nforce_first_sample: false\ndisable_sampling: false\ndtype: “bf16”\ndiff_output_preservation: false\ndiff_output_preservation_multiplier: 1\ndiff_output_preservation_class: “person”\nswitch_boundary_every: 1\nloss_type: “mse”\naudio_loss_multiplier: 1\nlogging:\nlog_every: 1\nuse_ui_logger: true\nmodel:\nname_or_path: “Lightricks/LTX-2.3/ltx-2.3-22b-dev.safetensors”\nquantize: true\nqtype: “qfloat8”\nquantize_te: true\nqtype_te: “qfloat8”\narch: “ltx2.3”\nlow_vram: true\nmodel_kwargs: {}\nlayer_offloading: false\nlayer_offloading_text_encoder_percent: 1\nlayer_offloading_transformer_percent: 1\nsample:\nsampler: “flowmatch”\nsample_every: 500\nwidth: 768\nheight: 768\nsamples:\n\n\n\n\n        neg: \"\"\n        seed: 42\n        walk_seed: true\n        guidance_scale: 4\n        sample_steps: 30\n        num_frames: 121\n        fps: 24\n\n\nmeta:\nname: “[name]”\nversion: “1.0”",
  "title": "Training lora for LTX2.3 voice / sound only"
}