{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreidcdsl4s6p26trhcrokh52vrsa32evzx7eug2azmrhvftzfto2e6m",
    "uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3mhifyboqbvq2"
  },
  "path": "/t/how-do-i-prepare-datasets-for-training-nlp-models/174420#post_1",
  "publishedAt": "2026-03-20T07:17:51.000Z",
  "site": "https://discuss.huggingface.co",
  "textContent": "What steps should I follow to collect, clean, and organize text data so it can be used to train an AI language model?",
  "title": "How do I prepare datasets for training NLP models?"
}