{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreidkijhndjcr2n2iofub2h3owmmmxjk7xw3j7dep3tv5no7fbgahq4",
    "uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3mnkbjykbdvb2"
  },
  "path": "/t/how-can-i-build-a-high-quality-dataset/176571#post_1",
  "publishedAt": "2026-06-05T13:06:48.000Z",
  "site": "https://discuss.huggingface.co",
  "textContent": "I want to build a high-quality Persian assistance dataset for an SLM.\n\nI have already used models like ChatGPT to generate a small Persian assistance dataset, but the overall quality was not good enough. Since the model I want to fine-tune is small, I need a larger dataset with much less noise and better overall quality.\n\nI want to ask how I can build the fine-tuning dataset I need in a high-quality way.",
  "title": "How can i build a High Quality dataset?"
}