{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreieetm4cy2duw6jjsv7fian7vagew73cfmmgyrlx23vati5wv5ovce",
    "uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3mhopow472gb2"
  },
  "path": "/t/arcus-h-open-benchmark-for-rl-behavioral-stability-under-stress-built-on-sb3/174387#post_5",
  "publishedAt": "2026-03-22T18:56:44.000Z",
  "site": "https://discuss.huggingface.co",
  "textContent": "Thank you. this is the clearest taxonomy framing I’ve seen for this problem, and the distinction between CD (input-side) and VI (feedback-side) for frozen SB3 policies is exactly the argument I needed to make explicit. I’ll implement all of this in the future version",
  "title": "ARCUS-H: Open benchmark for RL behavioral stability under stress (built on SB3)"
}