{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreigmu7tanz4ryop6c3yn5oh6cmm6rt4tf23feb5y7t6nzm574qnooy",
    "uri": "at://did:plc:pgryn3ephfd2xgft23qokfzt/app.bsky.feed.post/3mibfd34no5k2"
  },
  "path": "/t/indic-faker-generate-realistic-indian-synthetic-data-for-nlp-ml-8-languages-native-scripts-batch-dataframe-export/174762#post_2",
  "publishedAt": "2026-03-30T09:06:50.000Z",
  "site": "https://discuss.huggingface.co",
  "textContent": "**Amazing work on the Indic synthetic profiles dataset!**\nThis kind of tooling is **super valuable for Indian language NLP** , especially for low-resource contexts where real data is limited. Synthetic profiles can really help with pre-training, fine-tuning, and evaluation workflows by boosting diversity in language, scripts, and entity types.\n\nReally appreciate the effort to support multiple languages and scripts — this will make it easier for researchers and developers to build more inclusive models. If you’re planning future releases, it’d be great to see metrics about **quality checks** , **language coverage** , or **benchmarking insights**.\n\nThanks for contributing this to the community!",
  "title": "Indic-faker: Generate realistic Indian synthetic data for NLP/ML — 8 languages, native scripts, batch DataFrame export"
}