Raw Record Source

{
  "$type": "site.standard.document",
  "bskyPostRef": {
    "cid": "bafyreidgoxv6b4rg63vovuxkpmnelvi6oluy7mpws36cii5jnglcsblmu4",
    "commit": {
      "cid": "bafyreihzmkdoda3jijnuhtttl7zdgu25rby7n4fdk2qjhgqjxglezvrhom",
      "rev": "3mngpcwa7ve26"
    },
    "uri": "at://did:plc:5g2hkj2od4zr3tlpb2su364e/app.bsky.feed.post/3mngpcw7zzu26",
    "validationStatus": "valid"
  },
  "content": {
    "$type": "pub.lemma.blog.entry",
    "content": "I got Claude to write a verification script to see if a site will display nice previews on Bluesky. After fixing some bugs I got it in pretty good shape. Now you don't have to wonder why Bluesky is not showing your blog post the way you expected! This applies only when you paste a blog post link in a Bluesky post, not for Bluesky posts that are generated by apps.\n\nSadly, the Bluesky crawler doesn't execute Javascript so these previews won't work for client-side apps/SPAs, including Lemma. But when pasting a link to a server-side blog platform it should tell you exactly what's missing if you're not getting the nice previews on Bluesky.\n\nSave the file as `check_bluesky_preview.py`\n```python\n#!/usr/bin/env python3\n\"\"\"\nCheck if a blog post URL meets all requirements for enhanced Bluesky preview\ncards via Standard Site (site.standard.*) integration.\n\nUsage:\n    python check_bluesky_preview.py <url>\n\nRequirements:\n    pip install requests\n\"\"\"\n\nimport sys\nimport argparse\nfrom html.parser import HTMLParser\n\ntry:\n    import requests\n    requests.packages.urllib3.disable_warnings()\nexcept ImportError:\n    print(\"Error: 'requests' library required. Install with: pip install requests\")\n    sys.exit(1)\n\n\n# ── Terminal colors ────────────────────────────────────────────────────────────\n\nGREEN  = \"\\033[32m\"\nRED    = \"\\033[31m\"\nYELLOW = \"\\033[33m\"\nCYAN   = \"\\033[36m\"\nBOLD   = \"\\033[1m\"\nRESET  = \"\\033[0m\"\n\nPASS_SYM = f\"{GREEN}✓{RESET}\"\nFAIL_SYM = f\"{RED}✗{RESET}\"\nWARN_SYM = f\"{YELLOW}⚠{RESET}\"\nINFO_SYM = f\"{CYAN}ℹ{RESET}\"\n\n\n# ── HTML parser ────────────────────────────────────────────────────────────────\n\nclass HeadLinkParser(HTMLParser):\n    \"\"\"Collects <link> tags found inside <head>.\"\"\"\n\n    def __init__(self):\n        super().__init__()\n        self.links: list[dict] = []\n        self._in_head = False\n\n    def handle_starttag(self, tag, attrs):\n        if tag == \"head\":\n            self._in_head = True\n        elif tag == \"body\":\n            self._in_head = False\n        elif tag == \"link\" and self._in_head:\n            self.links.append(dict(attrs))\n\n    def handle_endtag(self, tag):\n        if tag == \"head\":\n            self._in_head = False\n\n\n# ── AT Protocol helpers ────────────────────────────────────────────────────────\n\ndef parse_at_uri(at_uri: str) -> dict | None:\n    if not at_uri or not at_uri.startswith(\"at://\"):\n        return None\n    parts = at_uri[5:].split(\"/\", 2)\n    return {\n        \"did\":        parts[0] if len(parts) > 0 else None,\n        \"collection\": parts[1] if len(parts) > 1 else None,\n        \"rkey\":       parts[2] if len(parts) > 2 else None,\n    }\n\n\ndef resolve_pds(did: str, session: requests.Session) -> str | None:\n    \"\"\"Return the PDS service endpoint for a DID, or None on failure.\"\"\"\n    try:\n        if did.startswith(\"did:plc:\"):\n            resp = session.get(f\"https://plc.directory/{did}\", timeout=10)\n            resp.raise_for_status()\n            doc = resp.json()\n        elif did.startswith(\"did:web:\"):\n            domain = did[8:]\n            resp = session.get(f\"https://{domain}/.well-known/did.json\", timeout=10)\n            resp.raise_for_status()\n            doc = resp.json()\n        else:\n            return None\n\n        for svc in doc.get(\"service\", []):\n            if svc.get(\"type\") == \"AtprotoPersonalDataServer\":\n                return svc.get(\"serviceEndpoint\")\n    except Exception:\n        pass\n    return None\n\n\ndef fetch_record(at_uri: str, session: requests.Session) -> tuple[dict | None, str | None]:\n    \"\"\"Fetch an AT Protocol record. Returns (value_dict, error_string).\"\"\"\n    parsed = parse_at_uri(at_uri)\n    if not parsed or not all([parsed[\"did\"], parsed[\"collection\"], parsed[\"rkey\"]]):\n        return None, f\"Malformed AT-URI: {at_uri}\"\n\n    pds = resolve_pds(parsed[\"did\"], session)\n    if not pds:\n        return None, f\"Could not resolve PDS for DID: {parsed['did']}\"\n\n    try:\n        resp = session.get(\n            f\"{pds}/xrpc/com.atproto.repo.getRecord\",\n            params={\n                \"repo\":       parsed[\"did\"],\n                \"collection\": parsed[\"collection\"],\n                \"rkey\":       parsed[\"rkey\"],\n            },\n            timeout=10,\n        )\n        resp.raise_for_status()\n        return resp.json().get(\"value\", {}), None\n    except requests.HTTPError as e:\n        return None, f\"HTTP {e.response.status_code}\"\n    except Exception as e:\n        return None, str(e)\n\n\n# ── Result tracker ─────────────────────────────────────────────────────────────\n\nclass Results:\n    def __init__(self):\n        self.passed = 0\n        self.failed = 0\n        self.warned = 0\n\n    def ok(self, msg: str):\n        print(f\"  {PASS_SYM} {msg}\")\n        self.passed += 1\n\n    def fail(self, msg: str):\n        print(f\"  {FAIL_SYM} {msg}\")\n        self.failed += 1\n\n    def warn(self, msg: str):\n        print(f\"  {WARN_SYM} {msg}\")\n        self.warned += 1\n\n    def info(self, msg: str):\n        print(f\"  {INFO_SYM} {msg}\")\n\n\n# ── Main checker ───────────────────────────────────────────────────────────────\n\ndef check_url(url: str):\n    session = requests.Session()\n    session.headers[\"User-Agent\"] = \"StandardSiteChecker/1.0 (Bluesky preview card validator)\"\n    r = Results()\n\n    print(f\"\\n{BOLD}Checking:{RESET} {url}\\n\")\n\n    # ── 1. Fetch page ──────────────────────────────────────────────────────────\n    section(\"1\", \"Fetching page\")\n    html = \"\"\n    try:\n        resp = session.get(url, timeout=15)\n        resp.raise_for_status()\n        html = resp.text\n        r.ok(f\"Page loaded (HTTP {resp.status_code}, {len(html):,} bytes)\")\n    except Exception as e:\n        r.fail(f\"Failed to fetch page: {e}\")\n\n    # ── 2. HTML link tags ──────────────────────────────────────────────────────\n    section(\"2\", \"HTML <link> tags in <head> (must be server-side rendered)\")\n    doc_uri = pub_uri = None\n    if html:\n        parser = HeadLinkParser()\n        parser.feed(html)\n        for link in parser.links:\n            rel = link.get(\"rel\", \"\")\n            href = link.get(\"href\", \"\")\n            if rel == \"site.standard.document\":\n                doc_uri = href\n            elif rel == \"site.standard.publication\":\n                pub_uri = href\n\n        if doc_uri:\n            r.ok(f\"<link rel='site.standard.document'>  {doc_uri}\")\n        else:\n            r.fail(\"<link rel='site.standard.document'> not found in <head>\")\n\n        if pub_uri:\n            r.ok(f\"<link rel='site.standard.publication'>  {pub_uri}\")\n        else:\n            r.fail(\"<link rel='site.standard.publication'> not found in <head>\")\n    else:\n        r.fail(\"Skipped — page could not be fetched\")\n\n    # ── 3. AT-URI format validation ────────────────────────────────────────────\n    section(\"3\", \"AT-URI format\")\n    doc_parsed = pub_parsed = None\n\n    if doc_uri:\n        p = parse_at_uri(doc_uri)\n        if p and p[\"did\"] and p[\"collection\"] == \"site.standard.document\" and p[\"rkey\"]:\n            r.ok(\"Document AT-URI is well-formed\")\n            doc_parsed = p\n        else:\n            r.fail(f\"Document AT-URI is malformed: {doc_uri!r}\")\n    else:\n        r.fail(\"Skipped — no site.standard.document link tag found\")\n\n    if pub_uri:\n        p = parse_at_uri(pub_uri)\n        if p and p[\"did\"] and p[\"collection\"] == \"site.standard.publication\" and p[\"rkey\"]:\n            r.ok(\"Publication AT-URI is well-formed\")\n            pub_parsed = p\n        else:\n            r.fail(f\"Publication AT-URI is malformed: {pub_uri!r}\")\n    else:\n        r.fail(\"Skipped — no site.standard.publication link tag found\")\n\n    # ── 4. site.standard.document record ──────────────────────────────────────\n    section(\"4\", \"site.standard.document record\")\n    doc_record = None\n    if doc_parsed:\n        doc_record, err = fetch_record(doc_uri, session)\n        if err:\n            r.fail(f\"Could not fetch document record: {err}\")\n        else:\n            r.ok(\"Record fetched successfully from PDS\")\n\n            for field in (\"title\", \"publishedAt\", \"site\"):\n                if doc_record.get(field):\n                    label = doc_record[field]\n                    if isinstance(label, str) and len(label) > 80:\n                        label = label[:77] + \"…\"\n                    r.ok(f\"{field}: {label}\")\n                else:\n                    r.fail(f\"Missing required field: {field}\")\n\n            if doc_record.get(\"textContent\"):\n                words = len(doc_record[\"textContent\"].split())\n                mins  = max(1, round(words / 200))\n                r.ok(f\"textContent: {words:,} words (~{mins} min read) — enables reading time display\")\n            else:\n                r.warn(\"textContent missing — reading time will not be shown on card\")\n    else:\n        r.fail(\"Skipped — no valid document AT-URI\")\n\n    # ── 5. site.standard.publication record ───────────────────────────────────\n    section(\"5\", \"site.standard.publication record\")\n    pub_record = None\n    if pub_parsed:\n        pub_record, err = fetch_record(pub_uri, session)\n        if err:\n            r.fail(f\"Could not fetch publication record: {err}\")\n        else:\n            r.ok(\"Record fetched successfully from PDS\")\n\n            for field in (\"name\", \"url\"):\n                if pub_record.get(field):\n                    r.ok(f\"{field}: {pub_record[field]}\")\n                else:\n                    r.fail(f\"Missing required field: {field}\")\n\n            if pub_record.get(\"icon\"):\n                r.ok(\"icon present — publication logo/avatar will appear in card\")\n            else:\n                r.warn(\"icon missing — no publication logo/avatar in card (site.standard.publication.icon is the site's brand image, not the post image)\")\n\n            if pub_record.get(\"basicTheme\"):\n                r.ok(\"basicTheme present — custom card colors available\")\n            else:\n                r.warn(\"basicTheme missing — card will use default colors\")\n    else:\n        r.fail(\"Skipped — no valid publication AT-URI\")\n\n    # ── 6. .well-known publication verification ────────────────────────────────\n    # NOTE: Bluesky uses <link> tag discovery, not .well-known, to show preview\n    # cards. The .well-known endpoint is part of the Standard Site ownership-\n    # verification spec but is NOT required for Bluesky enhanced cards to appear.\n    section(\"6\", \".well-known publication verification (Standard Site spec — not required for Bluesky cards)\")\n    if pub_record and pub_record.get(\"url\"):\n        pub_base = pub_record[\"url\"]\n        wk_url = pub_base.rstrip(\"/\") + \"/.well-known/site.standard.publication\"\n        r.info(f\"Checking: {wk_url}\")\n        try:\n            wk_resp = session.get(wk_url, timeout=10)\n            if wk_resp.status_code == 200:\n                r.ok(\".well-known endpoint reachable (HTTP 200)\")\n                returned = wk_resp.text.strip().strip('\"')\n                if returned == pub_uri:\n                    r.ok(\".well-known AT-URI matches <link> tag — publication verified\")\n                else:\n                    r.warn(\n                        f\".well-known AT-URI mismatch\\n\"\n                        f\"      expected: {pub_uri}\\n\"\n                        f\"      got:      {returned}\"\n                    )\n            else:\n                r.warn(f\".well-known endpoint returned HTTP {wk_resp.status_code} (not required for Bluesky cards)\")\n        except Exception as e:\n            r.warn(f\".well-known endpoint unreachable: {e} (not required for Bluesky cards)\")\n    else:\n        r.warn(\"Skipped — publication record not available\")\n\n    # ── 7. Cross-reference: document.site → publication ───────────────────────\n    section(\"7\", \"Cross-reference: document.site → publication\")\n    if doc_record and pub_uri and pub_record:\n        doc_site = doc_record.get(\"site\", \"\")\n        pub_url  = pub_record.get(\"url\", \"\")\n        if doc_site == pub_uri or doc_site == pub_url:\n            r.ok(f\"document.site matches publication ({doc_site!r})\")\n        else:\n            r.warn(\n                f\"document.site ({doc_site!r}) does not directly equal\\n\"\n                f\"      publication AT-URI: {pub_uri}\\n\"\n                f\"      publication url:    {pub_url}\"\n            )\n    else:\n        r.warn(\"Skipped — document or publication record not available\")\n\n    summary(r)\n\n\n# ── Helpers ────────────────────────────────────────────────────────────────────\n\ndef section(num: str, title: str):\n    print(f\"\\n{BOLD}[{num}] {title}{RESET}\")\n\n\ndef summary(r: Results):\n    print(f\"\\n{BOLD}{'─' * 60}{RESET}\")\n    print(\n        f\"{BOLD}Results:{RESET}  \"\n        f\"{GREEN}{r.passed} passed{RESET}  \"\n        f\"{RED}{r.failed} failed{RESET}  \"\n        f\"{YELLOW}{r.warned} warnings{RESET}\"\n    )\n    if r.failed == 0 and r.warned == 0:\n        print(f\"\\n{GREEN}{BOLD}All checks passed — this URL will get enhanced Bluesky preview cards.{RESET}\")\n    elif r.failed == 0:\n        print(\n            f\"\\n{YELLOW}{BOLD}No failures ({r.warned} warning(s)).\"\n            f\" Preview cards will work but may be missing some enhancements.{RESET}\"\n        )\n    else:\n        print(f\"\\n{RED}{BOLD}{r.failed} check(s) failed — enhanced preview cards will not appear.{RESET}\")\n    print()\n\n\n# ── Entry point ────────────────────────────────────────────────────────────────\n\ndef main():\n    ap = argparse.ArgumentParser(\n        description=\"Check if a blog post URL meets all requirements for enhanced Bluesky preview cards.\"\n    )\n    ap.add_argument(\"url\", help=\"Blog post URL to check\")\n    args = ap.parse_args()\n\n    url = args.url\n    if not url.startswith((\"http://\", \"https://\")):\n        url = \"https://\" + url\n\n    check_url(url)\n\n\nif __name__ == \"__main__\":\n    main()\n\n```\n\nCall it with the blog post url you want to check e.g.\n```sh\npython3 check_bluesky_preview.py https://atproto.com/blog/indexing-standard-site\n```\n",
    "license": "CC BY"
  },
  "coverImage": {
    "$type": "blob",
    "ref": {
      "$link": "bafkreibhsmk3zul4nlpesi3qewmisoo6bzrn43frmtq7ffd4u5xfqt4dka"
    },
    "mimeType": "image/jpeg",
    "size": 212396
  },
  "path": "/3mngpcvxeh22e",
  "publishedAt": "2026-06-04T03:41:28.332Z",
  "site": "at://did:plc:5g2hkj2od4zr3tlpb2su364e/site.standard.publication/3mmhg6ulmlszy",
  "textContent": "I got Claude to write a verification script to see if a site will display nice previews on Bluesky. After fixing some bugs I got it in pretty good shape. Now you don't have to wonder why Bluesky is not showing your blog post the way you expected! This applies only when you paste a blog post link in a Bluesky post, not for Bluesky posts that are generated by apps.\n\nSadly, the Bluesky crawler doesn't execute Javascript so these previews won't work for client-side apps/SPAs, including Lemma. But when pasting a link to a server-side blog platform it should tell you exactly what's missing if you're not getting the nice previews on Bluesky.\n\nSave the file as checkblueskypreview.py\n\nCall it with the blog post url you want to check e.g.",
  "title": "Verifying standard.site integration with Bluesky",
  "updatedAt": "2026-06-04T05:19:12.677Z"
}