Raw Record Source

{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreibyn2zwqjw67acjlid5l7fzhwakkiec6p4o7h2gx2tmokirof2a7e",
    "uri": "at://did:plc:lk3jfj3zq4k4wxnk474axylu/app.bsky.feed.post/3mf5ij4obj4q2"
  },
  "path": "/t/gptbot-ignoring-robots-txt-and-hammering-single-url-in-a-loop-potential-infinite-crawl-bug/1374560#post_2",
  "publishedAt": "2026-02-18T15:21:18.000Z",
  "site": "https://community.openai.com",
  "tags": [
    "https://openai.com/gptbot.json"
  ],
  "textContent": "It’s entirely plausible that someone is _pretending_ to be GPTBot. Have you checked the IP Address(es) attached to the requests?\n\nYou can match it here, and in the future ensure that any user agent as `GPTBOT` is valid\n\nhttps://openai.com/gptbot.json\n\nLastly, you could be more explicit and include this in your robots.txt (not sure if this would help)\n\n\n    User-agent: GPTBot\n    Disallow: /*?*nocache=*\n",
  "title": "GPTBot ignoring robots.txt and hammering single URL in a loop — potential infinite crawl bug"
}