Raw Record Source

{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreia6hmvrd6dbphpbgt7twtmycwf2pkqw4tfoycpv224wydylksws5a",
    "uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3miwbmv3tabe2"
  },
  "path": "/t/would-this-concept-model-work/175056#post_1",
  "publishedAt": "2026-04-07T17:01:27.000Z",
  "site": "https://discuss.huggingface.co",
  "textContent": "It’s an mdlm with ternary bit and hybrid q8 and q4 activation and 3 bit kvcache used with block diffusion. The training code is really messy so I don’t really want to share the pytoarch training code but I’m trying to train a 1b module with 40b training token.",
  "title": "Would this concept model work?"
}