{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreibj37zzmpx6x42onjjkopthfykr6n6ov4cz6vnyfvnfdghx3h5avm",
    "uri": "at://did:plc:jo3wjj2gx46alocis4wubmwr/app.bsky.feed.post/3mgzrvx34xuj2"
  },
  "path": "/blog/2026/03/14/wikipedia-article-transform/",
  "publishedAt": "2026-03-13T23:30:00.000Z",
  "site": "https://thottingal.in",
  "tags": [
    "webfetch",
    "https://en.wikipedia.org/wiki/2026_Winter_Olympics"
  ],
  "textContent": "We are witnessing a resurgence and evolution of Command Line Interfaces (CLIs), accelerated by AI agents. Text-based, scriptable CLI tools work very well with LLM-based workflows. Accessing Wikipedia articles during an agent session is common. Usually, a  webfetch call is used to get the HTML for a page from a URL like https://en.wikipedia.org/wiki/2026_Winter_Olympics.\n\nThat works, and LLMs are smart enough to read HTML. But there is a cost: HTML is for rendering, so the model must ignore a lot of non-content markup to get to the useful text. i That increases token usage and adds context noise. Can we improve this?",
  "title": "CLI for transforming Wikipedia articles to text, markdown, and JSON"
}