{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreih6g6fyx2ztlpw2pxlx6rgp7lsboiurny5ofh5crpdxyeqvg7us4e",
    "uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3mghohnu73l72"
  },
  "path": "/t/qwen3-5-4b-loss-exploding/174057#post_1",
  "publishedAt": "2026-03-07T09:50:54.000Z",
  "site": "https://discuss.huggingface.co",
  "textContent": "\n\n\n\n\n\n\n\n\n\nI am using a combined shuffled dataset that consists of high reasoning claude opus 4.5, 4.6, and gemini 3 pro messages from huggingface itself. Even if i lower the lr it keeps exploding at a further step.",
  "title": "Qwen3.5-4B loss exploding"
}