{
"$type": "site.standard.document",
"bskyPostRef": {
"cid": "bafyreieetm4cy2duw6jjsv7fian7vagew73cfmmgyrlx23vati5wv5ovce",
"uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3mhopow472gb2"
},
"path": "/t/arcus-h-open-benchmark-for-rl-behavioral-stability-under-stress-built-on-sb3/174387#post_5",
"publishedAt": "2026-03-22T18:56:44.000Z",
"site": "https://discuss.huggingface.co",
"textContent": "Thank you. this is the clearest taxonomy framing I’ve seen for this problem, and the distinction between CD (input-side) and VI (feedback-side) for frozen SB3 policies is exactly the argument I needed to make explicit. I’ll implement all of this in the future version",
"title": "ARCUS-H: Open benchmark for RL behavioral stability under stress (built on SB3)"
}