{
"$type": "site.standard.document",
"content": {
"$type": "pub.leaflet.content",
"pages": [
{
"$type": "pub.leaflet.pages.linearDocument",
"blocks": [
{
"$type": "pub.leaflet.pages.linearDocument#block",
"block": {
"$type": "pub.leaflet.blocks.text",
"plaintext": "This latest ternary toy:"
}
},
{
"$type": "pub.leaflet.pages.linearDocument#block",
"block": {
"$type": "pub.leaflet.blocks.unorderedList",
"children": [
{
"$type": "pub.leaflet.blocks.unorderedList#listItem",
"content": {
"$type": "pub.leaflet.blocks.text",
"plaintext": "Uses a SPLADE model to extract semantic features from a poster's corpus"
}
},
{
"$type": "pub.leaflet.blocks.unorderedList#listItem",
"content": {
"$type": "pub.leaflet.blocks.text",
"plaintext": "Identifies the three features across the corpus-of-corpuses that explain the most variance"
}
},
{
"$type": "pub.leaflet.blocks.unorderedList#listItem",
"content": {
"$type": "pub.leaflet.blocks.text",
"facets": [
{
"features": [
{
"$type": "pub.leaflet.richtext.facet#italic"
}
],
"index": {
"byteEnd": 145,
"byteStart": 138
}
}
],
"plaintext": "Takes the ten highest-scoring posts for each axis and feeds them into an LLM, along with the SPLADE feature names, to ask: what are these really?"
}
},
{
"$type": "pub.leaflet.blocks.unorderedList#listItem",
"content": {
"$type": "pub.leaflet.blocks.text",
"plaintext": "Displays as a ternary diagram that lets you drill into individual posts"
}
},
{
"$type": "pub.leaflet.blocks.unorderedList#listItem",
"content": {
"$type": "pub.leaflet.blocks.text",
"plaintext": "With an optional 3D graph tab"
}
}
]
}
},
{
"$type": "pub.leaflet.pages.linearDocument#block",
"block": {
"$type": "pub.leaflet.blocks.text",
"plaintext": "Right now I'm having it talk to a local Qwen. For a cloudflare deployment, you probably want something like:"
}
},
{
"$type": "pub.leaflet.pages.linearDocument#block",
"block": {
"$type": "pub.leaflet.blocks.unorderedList",
"children": [
{
"$type": "pub.leaflet.blocks.unorderedList#listItem",
"content": {
"$type": "pub.leaflet.blocks.text",
"facets": [
{
"features": [
{
"$type": "pub.leaflet.richtext.facet#code"
}
],
"index": {
"byteEnd": 28,
"byteStart": 4
}
}
],
"plaintext": "Use Xenova/bge-large-en-v1.5 via cloudflare instead of the SPLADE model in webgpu. Claude says to tell your Claude: \"Take splade-pca.html. Remove the SPLADE model loading and just use Xenova/bge-large-en-v1.5 with feature-extraction pipeline from the start — look at the useDenseEmbedder path in loadModel, make that the only path.\""
}
},
{
"$type": "pub.leaflet.blocks.unorderedList#listItem",
"content": {
"$type": "pub.leaflet.blocks.text",
"plaintext": "Instead of talking to a local llama, have the service talk to a cloud LLM. You don't need much juice for this step and the token count is pretty low."
}
}
]
}
},
{
"$type": "pub.leaflet.pages.linearDocument#block",
"block": {
"$type": "pub.leaflet.blocks.code",
"plaintext": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>splade-pca — mino.mobi</title>\n<script type=\"importmap\">\n{\n \"imports\": {\n \"three\": \"https://cdn.jsdelivr.net/npm/three@0.170.0/build/three.module.min.js\",\n \"three/addons/\": \"https://cdn.jsdelivr.net/npm/three@0.170.0/examples/jsm/\"\n }\n}\n</script>\n<style>\n:root {\n --bg: #faf9f6; --text: #1a1a1a; --muted: #777; --rule: #ccc; --link: #8b0000;\n --pc1: #b06090; --pc2: #5090b0; --pc3: #70a060;\n --mono: 'SF Mono','Cascadia Code','Fira Code',Menlo,monospace;\n --serif: 'Iowan Old Style','Palatino Linotype',Palatino,Georgia,serif;\n}\n@media (prefers-color-scheme: dark) {\n :root { --bg:#0f0f0f; --text:#d4d4d4; --muted:#777; --rule:#333; --link:#c45;\n --pc1:#d080b0; --pc2:#70b0d0; --pc3:#90c080; }\n}\n* { margin:0; padding:0; box-sizing:border-box; }\nbody { background:var(--bg); color:var(--text); font-family:var(--serif);\n line-height:1.7; padding:4rem 2rem; max-width:960px; margin:0 auto; }\nh1 { font-family:var(--mono); font-size:0.85rem; font-weight:400; letter-spacing:0.15em;\n text-transform:lowercase; color:var(--muted); margin-bottom:0.5rem; }\nh1 a { color:var(--muted); text-decoration:none; }\nh1 a:hover { color:var(--text); }\n.subtitle { font-size:1.15rem; color:var(--text); margin-bottom:1rem; }\n.desc { font-size:0.95rem; color:var(--muted); margin-bottom:2.5rem; }\n.hidden { display:none !important; }\n.tabs { display:flex; gap:1rem; margin-bottom:1rem; }\n.tab { font-family:var(--mono); font-size:0.7rem; letter-spacing:0.05em; color:var(--muted);\n cursor:pointer; padding-bottom:0.3rem; border:none; border-bottom:1px solid transparent; background:none; }\n.tab.active { color:var(--text); border-bottom-color:var(--link); }\n.tab:hover { color:var(--text); }\ntextarea { width:100%; font-family:var(--mono); font-size:0.8rem; padding:0.75rem;\n border:1px solid var(--rule); background:var(--bg); color:var(--text);\n resize:vertical; margin-bottom:0.75rem; }\ntextarea:focus { outline:none; border-color:var(--link); }\n.list-row { display:flex; gap:0.5rem; margin-bottom:0.75rem; }\n.list-row input { flex:1; font-family:var(--mono); font-size:0.8rem; padding:0.5rem 0.75rem;\n border:1px solid var(--rule); background:var(--bg); color:var(--text); }\n.list-row input:focus { outline:none; border-color:var(--link); }\nbutton { font-family:var(--mono); font-size:0.75rem; letter-spacing:0.05em;\n padding:0.5rem 1.25rem; border:1px solid var(--rule); background:var(--bg);\n color:var(--text); cursor:pointer; white-space:nowrap; }\nbutton:hover { border-color:var(--link); color:var(--link); }\nbutton:disabled { opacity:0.4; cursor:not-allowed; }\n.labeler-row { display:flex; gap:0.5rem; align-items:center; margin-bottom:0.5rem; flex-wrap:wrap; }\n.labeler-row select { font-family:var(--mono); font-size:0.75rem; padding:0.35rem 0.6rem;\n border:1px solid var(--rule); background:var(--bg); color:var(--text); cursor:pointer; }\n.labeler-row select:focus { outline:none; border-color:var(--link); }\n.labeler-extra { margin-bottom:1.25rem; }\n.labeler-extra input { width:100%; font-family:var(--mono); font-size:0.75rem; padding:0.4rem 0.75rem;\n border:1px solid var(--rule); background:var(--bg); color:var(--text); }\n.labeler-extra input:focus { outline:none; border-color:var(--link); }\n.labeler-hint { font-family:var(--mono); font-size:0.7rem; color:var(--muted); margin-top:0.3rem; }\n.action-row { display:flex; gap:0.75rem; align-items:center; margin-bottom:2rem; }\n.handle-count { font-family:var(--mono); font-size:0.7rem; color:var(--muted); }\n.progress-track { width:100%; height:1px; background:var(--rule); margin-bottom:1.5rem; overflow:hidden; }\n.progress-fill { height:100%; background:var(--link); width:0%; transition:width 0.3s ease; }\n.status-line { font-family:var(--mono); font-size:0.75rem; color:var(--muted); margin-bottom:1.5rem; }\n.status-line.error { color:var(--link); }\n.view-tabs { display:flex; gap:1rem; margin-bottom:1rem; }\n#resultsLayout { display:grid; grid-template-columns:1fr 280px; gap:2rem; align-items:start; margin-bottom:2.5rem; }\n#postCol { position:sticky; top:1.5rem; max-height:calc(100vh - 3rem); overflow-y:auto; }\n.chart-wrap { position:relative; }\n.chart-wrap canvas { width:100%; aspect-ratio:8/7; display:block; touch-action:none; }\n#threeWrap { width:100%; aspect-ratio:8/7; position:relative; overflow:hidden; }\n#threeWrap canvas { width:100%!important; height:100%!important; display:block; }\n.ax-label { position:absolute; pointer-events:none; font-family:var(--mono); font-size:0.65rem;\n background:transparent; padding:0.2rem 0.35rem; line-height:1.3; text-align:center;\n transform:translate(-50%,-50%); white-space:nowrap; }\n.tooltip { position:absolute; pointer-events:none; font-family:var(--mono); font-size:0.7rem;\n background:var(--bg); border:1px solid var(--rule); padding:0.4rem 0.6rem;\n color:var(--text); white-space:nowrap; z-index:10; line-height:1.4; }\n.readout { font-family:var(--mono); font-size:0.75rem; color:var(--muted); min-height:3.5rem;\n display:flex; align-items:center; gap:0.75rem; padding:0.75rem 0;\n border-bottom:1px solid var(--rule); margin-bottom:0.5rem; }\n.readout-avatar { width:36px; height:36px; border-radius:50%; object-fit:cover; flex-shrink:0; }\n.readout-handle { color:var(--text); font-size:0.8rem; font-weight:700; }\n.readout-handle a { color:var(--text); text-decoration:none; }\n.readout-handle a:hover { text-decoration:underline; }\n.readout-scores { display:flex; gap:0.5rem; margin-top:0.25rem; flex-wrap:wrap; }\n.readout-score { font-family:var(--mono); font-size:0.65rem; padding:0.1rem 0.35rem; border-radius:1px; }\n.section-header { font-family:var(--mono); font-size:0.7rem; color:var(--muted); letter-spacing:0.05em;\n margin-bottom:0.75rem; padding-bottom:0.5rem; border-bottom:1px solid var(--rule); }\n.post-row { display:flex; align-items:flex-start; gap:0.5rem; padding:0.4rem 0;\n border-bottom:1px solid var(--rule); line-height:1.5; }\n.post-row:last-child { border-bottom:none; }\n.post-pcs { display:flex; flex-direction:column; gap:0.15rem; flex-shrink:0; padding-top:0.15rem; }\n.post-pc { font-family:var(--mono); font-size:0.58rem; padding:0.05rem 0.25rem; border-radius:1px; white-space:nowrap; }\n.post-text { flex:1; min-width:0; font-size:0.82rem; color:var(--text); word-break:break-word; }\nfooter { margin-top:4rem; padding-top:1.5rem; border-top:1px solid var(--rule);\n font-family:var(--mono); font-size:0.7rem; color:var(--muted); letter-spacing:0.05em; }\nfooter a { color:var(--muted); text-decoration:none; }\nfooter a:hover { color:var(--text); }\n@media (max-width:700px) { #resultsLayout { grid-template-columns:1fr; } #postCol { position:static; max-height:none; } }\n@media (max-width:560px) { body { padding:1.5rem 0.75rem; } }\n</style>\n</head>\n<body>\n\n<h1><a href=\"/\">mino.mobi</a> / splade-pca</h1>\n<p class=\"subtitle\">No anchors. No labels. Just structure.</p>\n<p class=\"desc\">\n Posts encoded into vocabulary space. Covariance taken. Principal components extracted.\n The axes mean whatever the data says they mean.\n</p>\n\n<div class=\"tabs\" id=\"inputTabs\">\n <button class=\"tab active\" data-tab=\"paste\">paste handles</button>\n <button class=\"tab\" data-tab=\"list\">load list</button>\n</div>\n<div id=\"pastePanel\">\n <textarea id=\"handleList\" rows=\"5\" placeholder=\"one handle per line alice.bsky.social bob.bsky.social\" spellcheck=\"false\"></textarea>\n</div>\n<div id=\"listPanel\" class=\"hidden\">\n <div class=\"list-row\">\n <input type=\"text\" id=\"listUrl\" placeholder=\"https://bsky.app/profile/.../lists/...\" autocomplete=\"off\" spellcheck=\"false\">\n <button id=\"loadListBtn\">load list</button>\n </div>\n</div>\n<div class=\"labeler-row\">\n <span style=\"font-family:var(--mono);font-size:0.7rem;color:var(--muted)\">axis labels:</span>\n <select id=\"labeler\">\n <option value=\"none\">corpus stats</option>\n <option value=\"browser\">browser LLM · WebGPU (~500MB)</option>\n <option value=\"llama\">llama-server · local</option>\n <option value=\"anthropic\">anthropic api</option>\n </select>\n</div>\n<div class=\"labeler-extra hidden\" id=\"labelerExtra\">\n <input type=\"password\" id=\"labelerInput\" autocomplete=\"off\" spellcheck=\"false\">\n <div class=\"labeler-hint\" id=\"labelerHint\"></div>\n</div>\n<div class=\"action-row\">\n <button id=\"mapBtn\" disabled>map</button>\n <span class=\"handle-count\" id=\"handleCount\">0 handles</span>\n</div>\n<div class=\"progress-track hidden\" id=\"progressBar\"><div class=\"progress-fill\" id=\"progressFill\"></div></div>\n<div class=\"status-line hidden\" id=\"status\"></div>\n\n<div id=\"results\" class=\"hidden\">\n <div class=\"view-tabs\">\n <button class=\"tab active\" id=\"viewTernaryBtn\">ternary</button>\n <button class=\"tab\" id=\"view3dBtn\">3d</button>\n </div>\n <div id=\"resultsLayout\">\n <div id=\"chartCol\">\n <div id=\"ternaryWrap\" class=\"chart-wrap\">\n <canvas id=\"ternaryChart\"></canvas>\n <div class=\"tooltip hidden\" id=\"tooltip\"></div>\n </div>\n <div id=\"threeWrap\" class=\"hidden\"></div>\n <div class=\"readout\" id=\"readout\">hover a hex to inspect · click to expand</div>\n </div>\n <div id=\"postCol\"><div id=\"postList\"></div></div>\n </div>\n</div>\n\n<footer><a href=\"/\">mino.mobi</a> · <a href=\"/ternary\">ternary</a></footer>\n\n<script type=\"module\">\nimport * as THREE from 'three';\nimport { OrbitControls } from 'three/addons/controls/OrbitControls.js';\nimport { AutoTokenizer, AutoModelForMaskedLM, pipeline as tfPipeline }\n from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3/dist/transformers.min.js';\n\n// ── Constants ──────────────────────────────────────\nconst DEVICE = navigator.gpu ? 'webgpu' : 'wasm';\nconst BSKY = 'https://public.api.bsky.app';\nconst POSTS_PER_USER = 30;\nconst BATCH_SIZE = navigator.gpu ? 8 : 4;\nconst MAX_LEN = 64;\nconst POWER_ITER = 60;\nconst HEX_RADIUS_MAX = 13;\nconst HEX_RADIUS_MIN = 4;\n\nconst GRAM_WGSL = `\nstruct Uni { N: u32, D: u32 }\n@group(0) @binding(0) var<uniform> uni: Uni;\n@group(0) @binding(1) var<storage, read> Xc: array<f32>;\n@group(0) @binding(2) var<storage, read_write> G: array<f32>;\n@compute @workgroup_size(8, 8)\nfn main(@builtin(global_invocation_id) id: vec3<u32>) {\n let i = id.x; let k = id.y; let N = uni.N; let D = uni.D;\n if (i >= N || k >= N) { return; }\n var s: f32 = 0.0;\n for (var j: u32 = 0u; j < D; j = j + 1u) { s = s + Xc[i*D+j] * Xc[k*D+j]; }\n G[i*N+k] = s;\n}`;\n\n// ── State ──────────────────────────────────────────\nlet tokenizer = null;\nlet mlmModel = null;\nlet useDenseEmbedder = false;\nlet denseEmbedder = null;\nlet vocabSize = 30522;\nlet gpuDevice = null;\nlet gramPipeline = null;\nlet avatarImages = {}; // canvas-2d usage\nlet chartState = null;\nlet threeObjs = null;\nlet viewMode = 'ternary';\nlet lockedHandle = null;\n\n// ── Utilities ──────────────────────────────────────\nconst $ = id => document.getElementById(id);\nconst show = el => el.classList.remove('hidden');\nconst hide = el => el.classList.add('hidden');\nfunction escHtml(s) { return String(s).replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>'); }\nfunction setStatus(msg, err) { const e=$('status'); show(e); e.textContent=msg; e.classList.toggle('error',!!err); }\nfunction setProgress(pct) { show($('progressBar')); $('progressFill').style.width=pct+'%'; }\nfunction getColors() {\n const cs = getComputedStyle(document.documentElement);\n return ['text','muted','rule','bg','pc1','pc2','pc3','link'].reduce((o,k)=>{o[k]=cs.getPropertyValue('--'+k).trim();return o;},{});\n}\nfunction getHandles() {\n const raw = $('handleList').value.trim(); if (!raw) return [];\n return raw.split(/[\\n,]+/).map(h=>h.trim().replace(/^@/,'')).filter(h=>h.length>0);\n}\nfunction updateCount() {\n const n=getHandles().length;\n $('handleCount').textContent=n+' handle'+(n!==1?'s':'');\n $('mapBtn').disabled=n<2;\n}\n\n// ── BSKY API ───────────────────────────────────────\nasync function resolveHandle(handle) {\n if (handle.startsWith('did:')) return handle;\n const r = await fetch(BSKY+'/xrpc/com.atproto.identity.resolveHandle?handle='+encodeURIComponent(handle));\n if (!r.ok) throw new Error('Could not resolve: '+handle);\n return (await r.json()).did;\n}\nasync function fetchProfile(did) {\n const r = await fetch(BSKY+'/xrpc/app.bsky.actor.getProfile?actor='+encodeURIComponent(did));\n return r.ok ? r.json() : null;\n}\nasync function fetchRecentPosts(did, max) {\n const texts=[]; let cursor;\n while (texts.length < max) {\n let url=BSKY+'/xrpc/app.bsky.feed.getAuthorFeed?actor='+encodeURIComponent(did)+'&limit=100&filter=posts_and_author_threads';\n if (cursor) url+='&cursor='+encodeURIComponent(cursor);\n const r=await fetch(url); if (!r.ok) break;\n const data=await r.json(); const feed=data.feed||[]; if (!feed.length) break;\n for (let i=0;i<feed.length&&texts.length<max;i++) {\n const item=feed[i]; if (item.reason) continue;\n const t=item.post&&item.post.record&&item.post.record.text;\n if (t&&t.length>5&&!t.startsWith('…')) texts.push(t);\n }\n cursor=data.cursor; if (!cursor) break;\n }\n return texts;\n}\nasync function loadUserData(handle) {\n const did=await resolveHandle(handle); const prof=await fetchProfile(did);\n const texts=await fetchRecentPosts(did, POSTS_PER_USER);\n return { handle:prof?prof.handle:handle, did, avatar:prof?prof.avatar:null, texts };\n}\nasync function batchLoad(handles, concurrency, onProgress) {\n const results=[]; let idx=0,completed=0;\n async function worker() {\n while (idx<handles.length) {\n const i=idx++;\n try { results.push(await loadUserData(handles[i])); } catch(e) {}\n completed++; if (onProgress) onProgress(completed,handles.length);\n }\n }\n await Promise.all(Array.from({length:Math.min(concurrency,handles.length)},worker));\n return results;\n}\nfunction parseListUrl(url) { const m=url.match(/\\/profile\\/([^/]+)\\/lists\\/([^/?#]+)/); return m?{actor:m[1],rkey:m[2]}:null; }\nasync function fetchListMembers(actor, rkey) {\n const did=await resolveHandle(actor);\n const atUri='at://'+did+'/app.bsky.graph.list/'+rkey;\n const handles=[]; let cursor;\n while (true) {\n let url=BSKY+'/xrpc/app.bsky.graph.getList?list='+encodeURIComponent(atUri)+'&limit=100';\n if (cursor) url+='&cursor='+encodeURIComponent(cursor);\n const r=await fetch(url); if (!r.ok) throw new Error('Failed to load list (HTTP '+r.status+')');\n const data=await r.json(); const items=data.items||[]; if (!items.length) break;\n for (const item of items) { if (item.subject&&item.subject.handle) handles.push(item.subject.handle); }\n cursor=data.cursor; if (!cursor) break;\n }\n return handles;\n}\n\n// ── Model loading ──────────────────────────────────\n// Tries a proper SPLADE model first (vocab-space sparse features, semantically trained).\n// Falls back to BGE-large dense embeddings (still semantically meaningful, just not sparse/vocab-indexed).\nasync function loadModel(onProgress) {\n const spladesModels = [\n 'naver/splade-cocondenser-selfdistil',\n 'naver/efficient-splade-VI-BT-large-query',\n ];\n for (const modelId of spladesModels) {\n try {\n onProgress('loading tokenizer (' + modelId + ')…');\n tokenizer = await AutoTokenizer.from_pretrained(modelId);\n onProgress('loading splade model…');\n mlmModel = await AutoModelForMaskedLM.from_pretrained(modelId, {\n device: DEVICE, dtype: 'q8',\n progress_callback: p => { if (p&&p.progress!=null) onProgress('downloading: '+Math.round(p.progress)+'%'); },\n });\n vocabSize = 30522; useDenseEmbedder = false;\n onProgress('splade ready (' + modelId + ')');\n return;\n } catch(e) { tokenizer=null; mlmModel=null; }\n }\n // Dense semantic fallback — still meaningful, just not vocabulary-indexed\n onProgress('splade unavailable, using bge-large (dense semantic)…');\n denseEmbedder = await tfPipeline('feature-extraction', 'Xenova/bge-large-en-v1.5', {\n device: DEVICE, dtype: 'q8',\n progress_callback: p => { if (p&&p.progress!=null) onProgress('downloading: '+Math.round(p.progress)+'%'); },\n });\n vocabSize = 1024; useDenseEmbedder = true;\n onProgress('bge-large ready');\n}\n\n// ── SPLADE-style inference ─────────────────────────\n// Returns { userVec, postVecs } — postVecs is array of per-post Float32Arrays\nasync function embedUser(texts) {\n const postVecs = [];\n if (!texts.length) return { userVec: new Float32Array(vocabSize), postVecs };\n\n if (useDenseEmbedder) {\n const userVec = new Float32Array(vocabSize);\n for (let i=0; i<texts.length; i+=BATCH_SIZE) {\n const batch=texts.slice(i,i+BATCH_SIZE);\n const out=await denseEmbedder(batch, { pooling:'mean', normalize:true });\n for (let b=0; b<batch.length; b++) {\n const pv=new Float32Array(vocabSize);\n for (let j=0; j<vocabSize; j++) pv[j]=out.data[b*vocabSize+j];\n postVecs.push(pv);\n for (let j=0; j<vocabSize; j++) userVec[j]+=pv[j];\n }\n }\n const n=texts.length;\n for (let j=0; j<vocabSize; j++) userVec[j]/=n;\n return { userVec, postVecs };\n }\n\n // SPLADE: relu(log1p(logits)), max-pool over tokens per post, then max across posts for user vec\n const userVec = new Float32Array(vocabSize);\n for (let i=0; i<texts.length; i+=BATCH_SIZE) {\n const batch=texts.slice(i,i+BATCH_SIZE);\n const inputs=await tokenizer(batch, { padding:true, truncation:true, max_length:MAX_LEN, return_tensors:'pt' });\n const { logits }=await mlmModel(inputs);\n const bSize=batch.length;\n const seqLen=inputs.attention_mask.dims[1];\n const attnData=inputs.attention_mask.data;\n const logData=logits.data;\n for (let b=0; b<bSize; b++) {\n const pv=new Float32Array(vocabSize);\n for (let t=0; t<seqLen; t++) {\n if (attnData[b*seqLen+t]===0) continue;\n const tOff=(b*seqLen+t)*vocabSize;\n for (let v=0; v<vocabSize; v++) {\n const val=Math.log1p(Math.max(0, logData[tOff+v]));\n if (val>pv[v]) pv[v]=val;\n }\n }\n postVecs.push(pv);\n for (let v=0; v<vocabSize; v++) { if (pv[v]>userVec[v]) userVec[v]=pv[v]; }\n }\n }\n return { userVec, postVecs };\n}\n\n// ── Corpus-based axis labels ───────────────────────\n// Correlates per-user word frequencies with PC projections.\n// Top correlated words are the axis labels — always readable actual words.\nfunction extractCorpusLabels(userData, projections, k) {\n const N = userData.length;\n const STOP = new Set([\n 'the','and','for','are','but','not','you','all','this','that','with','have','from',\n 'they','what','their','would','there','been','were','when','more','will','she','was',\n 'his','her','has','had','its','who','our','out','can','did','get','him','now','may',\n 'use','how','any','came','come','like','just','also','about','said','then','over',\n 'very','well','much','them','some','want','know','dont','into','your','its','its',\n 'one','two','even','only','back','still','here','after','where','those','being',\n 'these','other','such','than','should','through','because','really','people',\n 'going','think','feel','good','time','look','make','let','never','always','every',\n ]);\n\n const userWords = userData.map(u => {\n const wf = {};\n for (const text of u.texts) {\n for (const t of (text.toLowerCase().match(/\\b[a-z]{3,15}\\b/g) || []))\n if (!STOP.has(t)) wf[t] = (wf[t]||0) + 1;\n }\n return wf;\n });\n\n return [0,1,2].map(comp => {\n const scores = projections.map(p => p[comp]);\n const mean = scores.reduce((a,b)=>a+b,0)/N;\n const centered = scores.map(s => s-mean);\n const corr = {};\n for (let i=0; i<N; i++)\n for (const [w,c] of Object.entries(userWords[i]))\n corr[w] = (corr[w]||0) + centered[i] * Math.log1p(c);\n return Object.entries(corr)\n .filter(([,v]) => v > 0)\n .sort((a,b) => b[1]-a[1])\n .slice(0, k).map(([w]) => w);\n });\n}\n\n// ── PCA directions in vocab space ─────────────────\n// V_k = Xc.T @ u_k: the k-th PC direction expressed in vocabulary (or embedding) space.\nfunction computePCDirections(Xc, N, D, eigenvecs) {\n return eigenvecs.map(u => {\n const v = new Float32Array(D);\n for (let j=0; j<D; j++) {\n let s=0; for (let i=0; i<N; i++) s+=Xc[i*D+j]*u[i]; v[j]=s;\n }\n normalizeVec(v);\n return v;\n });\n}\n\n// ── Per-post projection ────────────────────────────\n// Returns [pc1_pct, pc2_pct, pc3_pct] normalized to sum=100 for display.\nfunction projectPost(postVec, means, Vk) {\n const raw = Vk.map(vk => {\n let s=0;\n for (let j=0; j<postVec.length; j++) s+=(postVec[j]-means[j])*vk[j];\n return s;\n });\n const minR = Math.min(...raw);\n let vs = raw.map(r => Math.max(r-minR, 0.02));\n const tot = vs.reduce((a,b)=>a+b, 0.001);\n return vs.map(v => Math.round(v/tot*100));\n}\n\n// ── LLM axis labeling ──────────────────────────────\n\n// Brief prompt for small/browser models — fits in ~512 tokens\nfunction axisPrompt(userData, projections, comp) {\n const indexed = userData.map((u, i) => ({ u, score: projections[i][comp] }));\n indexed.sort((a, b) => b.score - a.score);\n const high = indexed.slice(0, 2).flatMap(({ u }) => u.texts.slice(0, 4));\n const low = indexed.slice(-2).flatMap(({ u }) => u.texts.slice(0, 4));\n return 'HIGH end posts:\\n' + high.map(t => '- ' + t.slice(0, 200)).join('\\n') +\n '\\n\\nLOW end posts:\\n' + low.map(t => '- ' + t.slice(0, 200)).join('\\n') +\n '\\n\\nDescribe the HIGH end in 3 words or fewer. Specific content or style. Just the label, nothing else.';\n}\n\n// Rich prompt for capable local models.\n// Uses per-post PC scores (derived from embeddings) to select the most axis-aligned\n// posts from across the whole corpus — so the embeddings actively curate what the LLM sees.\n// corpusWords (word-frequency signals) are passed as an additional vocabulary hint.\nfunction axisPromptRich(userData, projections, comp, corpusWords) {\n // Collect every post with its embedding-derived PC score for this component\n const allPosts = [];\n userData.forEach(u => {\n (u.postScores || []).forEach((ps, pi) => {\n if (u.texts[pi]) allPosts.push({ handle: u.handle, text: u.texts[pi], score: ps[comp] });\n });\n });\n allPosts.sort((a, b) => b.score - a.score);\n\n const fmt = posts =>\n posts.map(p => ` @${p.handle}: \"${p.text.replace(/\"/g, '’')}\"`).join('\\n');\n\n const hints = corpusWords && corpusWords[comp] && corpusWords[comp].length\n ? `Vocabulary signals for the HIGH end (word-frequency correlation): ${corpusWords[comp].join(', ')}\\n\\n`\n : '';\n\n return hints +\n `Posts scoring HIGHEST on this semantic dimension (selected by PCA projection):\\n${fmt(allPosts.slice(0, 8))}\\n\\n` +\n `Posts scoring LOWEST:\\n${fmt(allPosts.slice(-8))}\\n\\n` +\n `What does the HIGH end represent, contrasted with the low end? Give a 3–5 word label for the HIGH end. Just the label.`;\n}\n\n// Anthropic claude-haiku\nasync function labelAxisWithClaude(apiKey, userData, projections, comp) {\n const res = await fetch('https://api.anthropic.com/v1/messages', {\n method: 'POST',\n headers: { 'Content-Type':'application/json', 'x-api-key':apiKey, 'anthropic-version':'2023-06-01' },\n body: JSON.stringify({ model:'claude-haiku-4-5-20251001', max_tokens:20,\n messages:[{ role:'user', content:axisPrompt(userData, projections, comp) }] }),\n });\n if (!res.ok) throw new Error('anthropic ' + res.status);\n return (await res.json()).content[0].text.trim().replace(/[.!?'\"]+$/, '').toLowerCase();\n}\n\n// llama-server (llama.cpp) — all 3 axes in one request so the model can differentiate them\n// Run: llama-server --model model.gguf --port 8080 --jinja\nasync function labelAllAxesWithLlamaServer(port, userData, projections, corpusWords) {\n const sections = [0, 1, 2].map(comp => {\n const allPosts = [];\n userData.forEach(u => {\n (u.postScores || []).forEach((ps, pi) => {\n if (u.texts[pi]) allPosts.push({ handle: u.handle, text: u.texts[pi], score: ps[comp] });\n });\n });\n allPosts.sort((a, b) => b.score - a.score);\n const fmt = posts => posts.map(p => ` @${p.handle}: \"${p.text.replace(/\"/g, '’')}\"`).join('\\n');\n const hints = corpusWords && corpusWords[comp] && corpusWords[comp].length\n ? `Vocabulary signals: ${corpusWords[comp].join(', ')}\\n` : '';\n return `**Dimension ${comp + 1}**\\n${hints}HIGH end:\\n${fmt(allPosts.slice(0, 6))}\\nLOW end:\\n${fmt(allPosts.slice(-6))}`;\n });\n\n const prompt = sections.join('\\n\\n') +\n '\\n\\nGive a distinct 3–5 word label for the HIGH end of each dimension. ' +\n 'Make the three labels meaningfully different from each other. Reply in exactly this format:\\n1: label\\n2: label\\n3: label';\n\n const res = await fetch('http://localhost:' + port + '/v1/chat/completions', {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n messages: [\n { role: 'system', content: 'Reply only in the exact format requested. No preamble, no explanation.' },\n { role: 'user', content: prompt },\n ],\n max_tokens: 5120,\n temperature: 0,\n stream: false,\n }),\n });\n if (!res.ok) throw new Error('llama-server ' + res.status + ' (port ' + port + ')');\n\n const msg = (await res.json()).choices[0].message;\n // llama-server separates Qwen3 thinking into reasoning_content; content has the final answer\n const raw = (msg.content || '').trim() || (msg.reasoning_content || '').trim();\n // Also strip any leaked <think> tags just in case\n const text = raw.replace(/<think>[\\s\\S]*?<\\/think>/gi, '').replace(/<\\/?think>/gi, '').trim();\n\n const labels = [null, null, null];\n for (const line of text.split('\\n')) {\n const m = line.match(/^([123])[.:)\\s]\\s*(.+)/);\n if (m) labels[parseInt(m[1]) - 1] = m[2].trim().replace(/[.!?'\"]+$/, '').toLowerCase();\n }\n return labels;\n}\n\n// Browser LLM via Transformers.js text-generation (WebGPU/WASM)\nlet labelGen = null;\nasync function getLabelGen(onProgress) {\n if (labelGen) return labelGen;\n onProgress('loading label model (SmolLM2-360M, ~500MB, cached after first run)…');\n labelGen = await tfPipeline('text-generation', 'HuggingFaceTB/SmolLM2-360M-Instruct', {\n device: DEVICE, dtype: 'q4',\n progress_callback: p => { if (p&&p.progress!=null) onProgress('label model: '+Math.round(p.progress)+'%'); },\n });\n return labelGen;\n}\n\nfunction cleanSmallModelOutput(raw) {\n // Small models often echo prompts or add filler — strip it down to 1-3 words\n let t = raw.trim()\n .split(/\\n/)[0] // first line only\n .replace(/^label:?\\s*/i, '') // remove \"Label:\" prefix\n .replace(/['\"«»\"\"'']/g, '') // remove quotes\n .replace(/^[-–—•*]\\s*/, '') // remove bullet prefix\n .replace(/\\s*[.!?;,]+\\s*$/, '') // trailing punctuation\n .trim();\n // If it still looks like a fragment of our prompt, give up\n if (/high.end|low.end|posts|end\\s*of/i.test(t)) return null;\n return t.split(/\\s+/).slice(0, 4).join(' ').toLowerCase() || null;\n}\n\nasync function labelAxisInBrowser(userData, projections, comp, corpusWords, onProgress) {\n const gen = await getLabelGen(onProgress);\n const hints = (corpusWords[comp] || []).join(', ') || 'various';\n // Single representative post from the top user — short enough for tiny models\n const indexed = userData.map((u, i) => ({ u, score: projections[i][comp] }));\n indexed.sort((a, b) => b.score - a.score);\n const post = (indexed[0].u.texts[0] || '').slice(0, 120);\n const messages = [\n { role: 'system', content: 'Reply with a 2-word label only. No explanation.' },\n { role: 'user', content: `Related words: ${hints}\\nExample: \"${post}\"\\nLabel:` },\n ];\n const out = await gen(messages, { max_new_tokens: 12, do_sample: false });\n const reply = out[0].generated_text;\n const raw = Array.isArray(reply) ? reply.at(-1).content : String(reply);\n return cleanSmallModelOutput(raw);\n}\n\nasync function labelAxes(userData, projections, corpusWords, onProgress) {\n const labeler = $('labeler').value;\n if (labeler === 'none') return [null, null, null];\n const input = ($('labelerInput').value || '').trim();\n\n if (labeler === 'browser') {\n // Sequential — one tiny model at a time to avoid OOM\n const results = [];\n for (let comp = 0; comp < 3; comp++) {\n results.push(await labelAxisInBrowser(userData, projections, comp, corpusWords, onProgress).catch(() => null));\n }\n return results;\n }\n if (labeler === 'anthropic') {\n if (!input) return [null,null,null];\n localStorage.setItem('splade_api_key', input);\n return Promise.all([0,1,2].map(comp =>\n labelAxisWithClaude(input, userData, projections, comp).catch(() => null)\n ));\n }\n if (labeler === 'llama') {\n const port = input || '8080';\n return labelAllAxesWithLlamaServer(port, userData, projections, corpusWords).catch(() => [null, null, null]);\n }\n return [null,null,null];\n}\n\n// ── WebGPU Gram matrix ─────────────────────────────\nasync function setupWebGPU() {\n if (!navigator.gpu) return;\n try {\n const adapter=await navigator.gpu.requestAdapter(); if (!adapter) return;\n gpuDevice=await adapter.requestDevice();\n const mod=gpuDevice.createShaderModule({ code:GRAM_WGSL });\n gramPipeline=await gpuDevice.createComputePipelineAsync({ layout:'auto', compute:{ module:mod, entryPoint:'main' } });\n } catch(e) { gpuDevice=null; }\n}\n\nasync function computeGram(Xc, N, D) {\n if (gpuDevice && gramPipeline) {\n try {\n const uniBuf=gpuDevice.createBuffer({ size:8, usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST });\n gpuDevice.queue.writeBuffer(uniBuf, 0, new Uint32Array([N,D]));\n const xcBuf=gpuDevice.createBuffer({ size:Xc.byteLength, usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST });\n gpuDevice.queue.writeBuffer(xcBuf, 0, Xc);\n const gSize=N*N*4;\n const gBuf=gpuDevice.createBuffer({ size:gSize, usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC });\n const rBuf=gpuDevice.createBuffer({ size:gSize, usage:GPUBufferUsage.COPY_DST|GPUBufferUsage.MAP_READ });\n const bg=gpuDevice.createBindGroup({ layout:gramPipeline.getBindGroupLayout(0), entries:[\n {binding:0,resource:{buffer:uniBuf}},{binding:1,resource:{buffer:xcBuf}},{binding:2,resource:{buffer:gBuf}},\n ]});\n const enc=gpuDevice.createCommandEncoder();\n const pass=enc.beginComputePass();\n pass.setPipeline(gramPipeline); pass.setBindGroup(0,bg);\n pass.dispatchWorkgroups(Math.ceil(N/8),Math.ceil(N/8)); pass.end();\n enc.copyBufferToBuffer(gBuf,0,rBuf,0,gSize);\n gpuDevice.queue.submit([enc.finish()]);\n await rBuf.mapAsync(GPUMapMode.READ);\n const result=new Float32Array(rBuf.getMappedRange().slice(0));\n rBuf.unmap(); uniBuf.destroy(); xcBuf.destroy(); gBuf.destroy(); rBuf.destroy();\n return result;\n } catch(e) {}\n }\n const G=new Float32Array(N*N);\n for (let i=0;i<N;i++) for (let k=0;k<=i;k++) {\n let s=0; for (let j=0;j<D;j++) s+=Xc[i*D+j]*Xc[k*D+j];\n G[i*N+k]=s; G[k*N+i]=s;\n }\n return G;\n}\n\n// ── PCA via kernel trick ───────────────────────────\nasync function runPCA(userMatrix, N) {\n const D=vocabSize;\n const means=new Float32Array(D);\n for (let i=0;i<N;i++) for (let j=0;j<D;j++) means[j]+=userMatrix[i*D+j];\n for (let j=0;j<D;j++) means[j]/=N;\n const Xc=new Float32Array(N*D);\n for (let i=0;i<N;i++) for (let j=0;j<D;j++) Xc[i*D+j]=userMatrix[i*D+j]-means[j];\n\n const G=await computeGram(Xc,N,D);\n const Gw=G.slice(); const eigenvecs=[],eigenvals=[];\n for (let comp=0;comp<3;comp++) {\n let v=new Float32Array(N);\n for (let i=0;i<N;i++) v[i]=Math.random()-0.5; normalizeVec(v);\n let lambda=0;\n for (let iter=0;iter<POWER_ITER;iter++) {\n const w=new Float32Array(N);\n for (let i=0;i<N;i++) { let s=0; for (let k=0;k<N;k++) s+=Gw[i*N+k]*v[k]; w[i]=s; }\n lambda=vecNorm(w); if (lambda<1e-10) break;\n for (let i=0;i<N;i++) v[i]=w[i]/lambda;\n }\n eigenvecs.push(v.slice()); eigenvals.push(lambda);\n for (let i=0;i<N;i++) for (let k=0;k<N;k++) Gw[i*N+k]-=lambda*v[i]*v[k];\n }\n const projections=[];\n for (let i=0;i<N;i++) projections.push([\n Math.sqrt(Math.max(0,eigenvals[0]))*eigenvecs[0][i],\n Math.sqrt(Math.max(0,eigenvals[1]))*eigenvecs[1][i],\n Math.sqrt(Math.max(0,eigenvals[2]))*eigenvecs[2][i],\n ]);\n return { projections, eigenvals, Xc, eigenvecs, means };\n}\n\nfunction vecNorm(v) { let s=0; for (let i=0;i<v.length;i++) s+=v[i]*v[i]; return Math.sqrt(s); }\nfunction normalizeVec(v) { const l=vecNorm(v)||1; for (let i=0;i<v.length;i++) v[i]/=l; }\n\n// ── Ternary normalization ──────────────────────────\nfunction toTernary(projections) {\n const mins=[Infinity,Infinity,Infinity], maxs=[-Infinity,-Infinity,-Infinity];\n for (const p of projections) p.forEach((v,k) => { if(v<mins[k])mins[k]=v; if(v>maxs[k])maxs[k]=v; });\n return projections.map(p => {\n let vs=p.map((v,k)=>Math.max((v-mins[k])/(maxs[k]-mins[k]||0.001),0.02));\n const tot=vs.reduce((a,b)=>a+b,0);\n return { pc1:Math.round(vs[0]/tot*100), pc2:Math.round(vs[1]/tot*100), pc3:Math.round(vs[2]/tot*100) };\n });\n}\n\n// ── Hex geometry ───────────────────────────────────\nfunction pointInTriangle(px,py,ax,ay,bx,by,cx,cy) {\n const d=(by-cy)*(ax-cx)+(cx-bx)*(ay-cy);\n const a=((by-cy)*(px-cx)+(cx-bx)*(py-cy))/d;\n const b=((cy-ay)*(px-cx)+(ax-cx)*(py-cy))/d;\n return a>=-0.001&&b>=-0.001&&(1-a-b)>=-0.001;\n}\nfunction generateHexCells(Ax,Ay,Bx,By,Cx,Cy,hexR) {\n const cenX=(Ax+Bx+Cx)/3,cenY=(Ay+By+Cy)/3;\n const scale=Math.max(0.5,1-(hexR*0.9)/((Ay-Cy)*0.5));\n const iAx=cenX+(Ax-cenX)*scale,iAy=cenY+(Ay-cenY)*scale;\n const iBx=cenX+(Bx-cenX)*scale,iBy=cenY+(By-cenY)*scale;\n const iCx=cenX+(Cx-cenX)*scale,iCy=cenY+(Cy-cenY)*scale;\n const hexW=Math.sqrt(3)*hexR,rowH=1.5*hexR;\n const minY=Math.min(iAy,iBy,iCy),maxY=Math.max(iAy,iBy,iCy);\n const minX=Math.min(iAx,iBx,iCx),maxX=Math.max(iAx,iBx,iCx);\n const cells=[]; let row=0;\n for (let y=minY+hexR;y<=maxY;y+=rowH) {\n const off=(row%2)?hexW*0.5:0;\n for (let x=minX+hexW*0.5+off;x<=maxX;x+=hexW)\n if (pointInTriangle(x,y,iAx,iAy,iBx,iBy,iCx,iCy)) cells.push({x,y,occupant:null});\n row++;\n }\n return cells;\n}\nfunction assignUsersToCells(cells,users,t2xy) {\n const ranked=users.map(u=>{const dp1=u.pc1-33.33,dp2=u.pc2-33.33,dp3=u.pc3-33.33;return{user:u,ext:Math.sqrt(dp1*dp1+dp2*dp2+dp3*dp3)};});\n ranked.sort((a,b)=>b.ext-a.ext);\n const occ={};\n for (const {user:u} of ranked) {\n const ideal=t2xy(u.pc1,u.pc2,u.pc3); let bestIdx=-1,bestDist=Infinity;\n for (let c=0;c<cells.length;c++) { if(occ[c])continue; const dx=cells[c].x-ideal.x,dy=cells[c].y-ideal.y; const d=dx*dx+dy*dy; if(d<bestDist){bestDist=d;bestIdx=c;} }\n if (bestIdx>=0) { occ[bestIdx]=true; cells[bestIdx].occupant=u; }\n }\n}\nfunction hexPath(ctx,cx,cy,r) {\n ctx.beginPath();\n for (let i=0;i<6;i++) { const a=Math.PI/3*i-Math.PI/6; if(i===0)ctx.moveTo(cx+r*Math.cos(a),cy+r*Math.sin(a)); else ctx.lineTo(cx+r*Math.cos(a),cy+r*Math.sin(a)); }\n ctx.closePath();\n}\n\n// ── Ternary draw ───────────────────────────────────\nfunction drawTernary(canvas, data, highlightHandle, pcLabels) {\n const ctx=canvas.getContext('2d');\n const dpr=window.devicePixelRatio||1;\n const rect=canvas.getBoundingClientRect(); if (rect.width===0) return;\n const W=rect.width,H=rect.height;\n canvas.width=W*dpr; canvas.height=H*dpr; ctx.scale(dpr,dpr);\n const col=getColors();\n const mobile=W<480;\n const pad=mobile?28:48;\n const pcFs=mobile?13:20; // size of PC₁/₂/₃ label\n const maxSemFs=mobile?14:24; // max size of semantic sub-label\n const botReserve=mobile?52:76; // vertical space below triangle for bottom labels\n const sideLp=mobile?18:52; // outward offset for rotated side labels\n ctx.clearRect(0,0,W,H);\n let triW=W-pad*2, triH=triW*Math.sqrt(3)/2;\n if (triH+pad+botReserve>H) { triH=H-pad-botReserve; triW=triH*2/Math.sqrt(3); }\n const cx=W/2;\n const Ax=cx-triW/2,Ay=pad+triH, Bx=cx+triW/2,By=pad+triH, Cx=cx,Cy=pad;\n\n function t2xy(p1,p2,p3) { const tot=p1+p2+p3||1; return{x:(p1/tot)*Ax+(p2/tot)*Cx+(p3/tot)*Bx,y:(p1/tot)*Ay+(p2/tot)*Cy+(p3/tot)*By}; }\n\n // Fit text to a max pixel width by reducing font size\n function fitFs(text, maxW, maxSz, minSz=7) {\n let sz=maxSz;\n while (sz>minSz) { ctx.font=sz+'px monospace'; if(ctx.measureText(text).width<=maxW)break; sz--; }\n return sz;\n }\n\n ctx.strokeStyle=col.rule; ctx.lineWidth=0.5; ctx.globalAlpha=0.4;\n for (let i=1;i<10;i++) {\n const t=i/10;\n const pairs=[[t2xy(1-t,t,0),t2xy(0,t,1-t)],[t2xy(t,1-t,0),t2xy(t,0,1-t)],[t2xy(1-t,0,t),t2xy(0,1-t,t)]];\n for (const [a,b] of pairs) { ctx.beginPath(); ctx.moveTo(a.x,a.y); ctx.lineTo(b.x,b.y); ctx.stroke(); }\n }\n ctx.globalAlpha=1;\n ctx.strokeStyle=col.text; ctx.lineWidth=1.5;\n ctx.beginPath(); ctx.moveTo(Ax,Ay); ctx.lineTo(Bx,By); ctx.lineTo(Cx,Cy); ctx.closePath(); ctx.stroke();\n\n const toks=pcLabels||[[],[],[]];\n const maxSemW=triW*0.82; // semantic label targets 82% of edge length\n\n // PC₁ bottom — horizontal\n ctx.textAlign='center';\n ctx.font='bold '+pcFs+'px monospace'; ctx.fillStyle=col.pc1;\n ctx.fillText('PC₁', cx, Ay+8+pcFs);\n const s0=toks[0].join(' · ');\n if (s0) {\n const sf0=fitFs(s0, maxSemW, maxSemFs);\n ctx.font=sf0+'px monospace'; ctx.fillStyle=col.pc1; ctx.globalAlpha=0.72;\n ctx.fillText(s0, cx, Ay+10+pcFs+sf0);\n ctx.globalAlpha=1;\n }\n\n // PC₂ left edge — rotated\n ctx.save(); ctx.translate((Ax+Cx)/2-sideLp,(Ay+Cy)/2); ctx.rotate(-Math.PI/3); ctx.textAlign='center';\n ctx.font='bold '+pcFs+'px monospace'; ctx.fillStyle=col.pc2; ctx.fillText('PC₂',0,0);\n const s1=toks[1].join(' · ');\n if (s1) {\n const sf1=fitFs(s1, maxSemW, maxSemFs);\n ctx.font=sf1+'px monospace'; ctx.fillStyle=col.pc2; ctx.globalAlpha=0.72;\n ctx.fillText(s1, 0, pcFs+5);\n ctx.globalAlpha=1;\n }\n ctx.restore();\n\n // PC₃ right edge — rotated\n ctx.save(); ctx.translate((Bx+Cx)/2+sideLp,(By+Cy)/2); ctx.rotate(Math.PI/3); ctx.textAlign='center';\n ctx.font='bold '+pcFs+'px monospace'; ctx.fillStyle=col.pc3; ctx.fillText('PC₃',0,0);\n const s2=toks[2].join(' · ');\n if (s2) {\n const sf2=fitFs(s2, maxSemW, maxSemFs);\n ctx.font=sf2+'px monospace'; ctx.fillStyle=col.pc3; ctx.globalAlpha=0.72;\n ctx.fillText(s2, 0, pcFs+5);\n ctx.globalAlpha=1;\n }\n ctx.restore();\n\n let hexR=HEX_RADIUS_MAX;\n const nU=data.length;\n if (nU>0) hexR=Math.max(HEX_RADIUS_MIN,Math.min(HEX_RADIUS_MAX,Math.sqrt(triW*triW/(2.6*nU*1.4))));\n let cells=generateHexCells(Ax,Ay,Bx,By,Cx,Cy,hexR);\n while (cells.length<nU&&hexR>HEX_RADIUS_MIN) { hexR=Math.max(HEX_RADIUS_MIN,hexR*0.85); cells=generateHexCells(Ax,Ay,Bx,By,Cx,Cy,hexR); }\n assignUsersToCells(cells,data,t2xy);\n canvas._cells=cells; canvas._hexR=hexR; canvas._t2xy=t2xy;\n\n ctx.strokeStyle=col.rule; ctx.lineWidth=0.5; ctx.globalAlpha=0.25;\n for (const c of cells) { if (!c.occupant) { hexPath(ctx,c.x,c.y,hexR*0.92); ctx.stroke(); } }\n ctx.globalAlpha=1;\n const drawR=hexR*0.92;\n for (const cell of cells) {\n if (!cell.occupant) continue;\n const u=cell.occupant;\n const img=avatarImages[u.handle];\n const hl=u.handle===highlightHandle||u.handle===lockedHandle;\n if (img&&img.complete&&img.naturalWidth>0) {\n ctx.save(); hexPath(ctx,cell.x,cell.y,drawR); ctx.clip();\n const is=drawR*2.2; ctx.drawImage(img,cell.x-is/2,cell.y-is/2,is,is); ctx.restore();\n } else {\n ctx.save(); hexPath(ctx,cell.x,cell.y,drawR); ctx.fillStyle=col.muted+'40'; ctx.fill(); ctx.restore();\n }\n hexPath(ctx,cell.x,cell.y,drawR);\n ctx.strokeStyle=hl?col.link:col.text+'60'; ctx.lineWidth=hl?2.5:0.8; ctx.stroke();\n }\n}\n\n// ── Post list ──────────────────────────────────────\nfunction renderPostList(user) {\n const el=$('postList'); if (!user||!user.texts||!user.texts.length) { el.innerHTML=''; return; }\n const col=getColors();\n let html='<div class=\"section-header\">posts · @'+escHtml(user.handle)+' <span style=\"font-weight:400\">('+user.texts.length+')</span></div>';\n for (let i=0; i<user.texts.length; i++) {\n const ps=user.postScores&&user.postScores[i];\n let pcHtml='';\n if (ps) {\n const dom=ps.indexOf(Math.max(...ps));\n const pcKeys=['pc1','pc2','pc3'];\n pcHtml='<div class=\"post-pcs\">'+ps.map((v,k)=>{\n const c=col[pcKeys[k]]; const w=dom===k?';font-weight:700':'';\n return '<span class=\"post-pc\" style=\"color:'+c+';border:1px solid '+c+w+'\">'+['PC₁','PC₂','PC₃'][k]+' '+v+'</span>';\n }).join('')+'</div>';\n }\n html+='<div class=\"post-row\">'+pcHtml+'<div class=\"post-text\">'+escHtml(user.texts[i])+'</div></div>';\n }\n el.innerHTML=html;\n}\n\n// ── Chart readout ──────────────────────────────────\nfunction updateReadout(user) {\n const el=$('readout');\n if (!user) { el.innerHTML='hover a hex to inspect · click to expand'; return; }\n const imgTag=user.avatar?'<img class=\"readout-avatar\" src=\"'+escHtml(user.avatar)+'\" alt=\"\">'\n :'<div class=\"readout-avatar\" style=\"background:var(--rule)\"></div>';\n el.innerHTML=imgTag+'<div><div class=\"readout-handle\"><a href=\"https://bsky.app/profile/'+escHtml(user.handle)+'\" target=\"_blank\" rel=\"noopener\">@'+escHtml(user.handle)+'</a></div>'\n +'<div class=\"readout-scores\">'\n +'<span class=\"readout-score\" style=\"color:var(--pc1);border:1px solid var(--pc1)\">PC₁ '+user.pc1+'</span>'\n +'<span class=\"readout-score\" style=\"color:var(--pc2);border:1px solid var(--pc2)\">PC₂ '+user.pc2+'</span>'\n +'<span class=\"readout-score\" style=\"color:var(--pc3);border:1px solid var(--pc3)\">PC₃ '+user.pc3+'</span>'\n +'</div></div>';\n}\n\n// ── Ternary interaction ────────────────────────────\nfunction setupTernaryInteraction(canvas) {\n const tooltip=$('tooltip');\n function findHit(mx,my) {\n const cells=canvas._cells||[]; const r=(canvas._hexR||22)*0.92;\n for (const c of cells) { if (!c.occupant) continue; const dx=c.x-mx,dy=c.y-my; if(dx*dx+dy*dy<r*r) return c; }\n return null;\n }\n function redraw() { drawTernary(canvas,chartState.ternaryData,canvas._lastHl,chartState.pcTopTokens); }\n function handleHover(cx,cy) {\n const rect=canvas.getBoundingClientRect(); const mx=cx-rect.left,my=cy-rect.top;\n const hit=findHit(mx,my);\n if (!hit) {\n if (canvas._lastHl) { canvas._lastHl=null; redraw(); updateReadout(null); }\n hide(tooltip); return;\n }\n const r=hit.occupant;\n if (canvas._lastHl!==r.handle) { canvas._lastHl=r.handle; redraw(); updateReadout(r); }\n tooltip.textContent='@'+r.handle; show(tooltip);\n let tx=mx+14,ty=my-28; if(tx+180>rect.width)tx=mx-180; if(ty<0)ty=my+16;\n tooltip.style.left=tx+'px'; tooltip.style.top=ty+'px';\n }\n canvas.onmousemove=e=>handleHover(e.clientX,e.clientY);\n canvas.onmouseleave=()=>hide(tooltip);\n canvas.onclick=e=>{\n const rect=canvas.getBoundingClientRect();\n const hit=findHit(e.clientX-rect.left,e.clientY-rect.top);\n if (!hit||!hit.occupant) { lockedHandle=null; renderPostList(null); redraw(); return; }\n const r=hit.occupant;\n if (lockedHandle===r.handle) { lockedHandle=null; renderPostList(null); } else { lockedHandle=r.handle; renderPostList(r); }\n redraw();\n };\n canvas.ontouchstart=e=>{e.preventDefault(); if(e.touches.length)handleHover(e.touches[0].clientX,e.touches[0].clientY);};\n canvas.ontouchmove=e=>{e.preventDefault(); if(e.touches.length)handleHover(e.touches[0].clientX,e.touches[0].clientY);};\n}\n\n// ── Avatar preloading ──────────────────────────────\nfunction preloadAvatars(data) {\n return new Promise(resolve => {\n let rem=0;\n for (const u of data) {\n if (!u.avatar) continue; rem++;\n const img=new Image();\n img.onload=img.onerror=()=>{ if(--rem<=0)resolve(); };\n img.src=u.avatar; avatarImages[u.handle]=img;\n }\n if (!rem) resolve();\n setTimeout(resolve, 5000);\n });\n}\n\n// ── Three.js 3D view ───────────────────────────────\nfunction makeCircleTex(size, color) {\n const c=document.createElement('canvas'); c.width=c.height=size;\n const ctx=c.getContext('2d');\n ctx.beginPath(); ctx.arc(size/2,size/2,size/2-1,0,Math.PI*2);\n ctx.fillStyle=color; ctx.fill();\n return new THREE.CanvasTexture(c);\n}\n\nfunction init3D(data3d, pcTopTokens) {\n const wrap=$('threeWrap'); wrap.innerHTML='';\n const W=wrap.clientWidth||600, H=W*7/8;\n const renderer=new THREE.WebGLRenderer({ antialias:true });\n renderer.setSize(W,H); renderer.setPixelRatio(window.devicePixelRatio);\n renderer.setClearColor(0x0a0a0a);\n wrap.appendChild(renderer.domElement);\n const scene=new THREE.Scene();\n const camera=new THREE.PerspectiveCamera(55,W/H,0.01,100);\n camera.position.set(0,0,6);\n const controls=new OrbitControls(camera,renderer.domElement);\n controls.enableDamping=true; controls.dampingFactor=0.08;\n\n // Normalize axes to unit std-dev\n const N=data3d.length;\n const axes=[[],[],[]];\n for (const u of data3d) { axes[0].push(u.raw[0]); axes[1].push(u.raw[1]); axes[2].push(u.raw[2]); }\n const scaled=data3d.map(u=>axes.map((ax,k)=>{\n const mean=ax.reduce((a,b)=>a+b,0)/N;\n const std=Math.sqrt(ax.reduce((a,b)=>a+(b-mean)**2,0)/N)||1;\n return (u.raw[k]-mean)/std;\n }));\n\n // Axis lines\n const axColors=[0xd080b0,0x70b0d0,0x90c080];\n [[1,0,0],[0,1,0],[0,0,1]].forEach(([x,y,z],k)=>{\n const geo=new THREE.BufferGeometry().setFromPoints([new THREE.Vector3(0,0,0),new THREE.Vector3(x*2.5,y*2.5,z*2.5)]);\n scene.add(new THREE.Line(geo,new THREE.LineBasicMaterial({color:axColors[k],opacity:0.3,transparent:true})));\n });\n\n // HTML axis labels (positioned over canvas each frame)\n const axLabelEls=[];\n const toks=pcTopTokens||[[],[],[]];\n ['PC₁','PC₂','PC₃'].forEach((label,k)=>{\n const div=document.createElement('div');\n div.className='ax-label';\n div.style.color='#'+axColors[k].toString(16).padStart(6,'0');\n div.innerHTML='<b>'+label+'</b>'+(toks[k].length?'<br>'+toks[k].join(' · '):'');\n wrap.appendChild(div);\n axLabelEls.push(div);\n });\n const axTips=[new THREE.Vector3(2.6,0,0),new THREE.Vector3(0,2.6,0),new THREE.Vector3(0,0,2.6)];\n\n function updateAxLabels() {\n const rect=renderer.domElement.getBoundingClientRect();\n axTips.forEach((tip,k)=>{\n const v=tip.clone().project(camera);\n const x=(v.x+1)/2*rect.width, y=(1-v.y)/2*rect.height;\n axLabelEls[k].style.left=x+'px'; axLabelEls[k].style.top=y+'px';\n });\n }\n\n // Tooltip\n const tooltip=document.createElement('div');\n tooltip.style.cssText='position:absolute;pointer-events:none;font-family:monospace;font-size:0.7rem;background:var(--bg);border:1px solid var(--rule);padding:0.3rem 0.5rem;color:var(--text);display:none;z-index:10';\n wrap.appendChild(tooltip);\n\n // Sprites — colored circles (bsky CDN doesn't support CORS, so WebGL avatar textures aren't possible)\n const sprites=[];\n data3d.forEach((u,i)=>{\n const [x,y,z]=scaled[i];\n const hue=(i*137.5)%360;\n const obj=new THREE.Sprite(new THREE.SpriteMaterial({ map:makeCircleTex(64,`hsl(${hue},65%,60%)`), transparent:true }));\n obj.scale.setScalar(0.22); obj.position.set(x,y,z); obj.userData={handle:u.handle};\n scene.add(obj); sprites.push(obj);\n });\n\n // Interaction\n const raycaster=new THREE.Raycaster(), mouse=new THREE.Vector2();\n function getHit(e) {\n const rect=renderer.domElement.getBoundingClientRect();\n mouse.x=((e.clientX-rect.left)/rect.width)*2-1;\n mouse.y=-((e.clientY-rect.top)/rect.height)*2+1;\n raycaster.setFromCamera(mouse,camera);\n const hits=raycaster.intersectObjects(sprites);\n return hits.length?hits[0].object:null;\n }\n renderer.domElement.addEventListener('mousemove', e=>{\n const hit=getHit(e);\n if (hit) {\n const rect=renderer.domElement.getBoundingClientRect();\n const u=chartState.ternaryData.find(u=>u.handle===hit.userData.handle);\n tooltip.textContent='@'+hit.userData.handle; tooltip.style.display='block';\n tooltip.style.left=(e.clientX-rect.left+12)+'px'; tooltip.style.top=(e.clientY-rect.top-24)+'px';\n updateReadout(u);\n } else { tooltip.style.display='none'; }\n });\n renderer.domElement.addEventListener('click', e=>{\n const hit=getHit(e);\n if (!hit) { lockedHandle=null; renderPostList(null); return; }\n const h=hit.userData.handle;\n const u=chartState.ternaryData.find(u=>u.handle===h);\n if (lockedHandle===h) { lockedHandle=null; renderPostList(null); } else { lockedHandle=h; renderPostList(u); }\n });\n\n let animId;\n function animate() { animId=requestAnimationFrame(animate); controls.update(); renderer.render(scene,camera); updateAxLabels(); }\n animate();\n threeObjs={ renderer, animId, controls };\n}\n\n// ── Label helpers ──────────────────────────────────\nfunction applyLabels(labels) {\n if (!chartState) return;\n chartState.pcTopTokens = labels;\n const canvas = $('ternaryChart');\n drawTernary(canvas, chartState.ternaryData, canvas._lastHl, labels);\n if (threeObjs) {\n $('threeWrap').querySelectorAll('.ax-label').forEach((el, k) => {\n el.innerHTML = '<b>'+['PC₁','PC₂','PC₃'][k]+'</b>'+(labels[k].length?'<br>'+labels[k].join(' · '):'');\n });\n }\n}\n\nfunction applyStatusLine() {\n const labels = chartState ? chartState.pcTopTokens : [[],[],[]];\n const prefix = escHtml((useDenseEmbedder?'dense':'splade')+' · '+(gpuDevice?'webgpu':'js')+' gram');\n const pcColors = ['var(--pc1)','var(--pc2)','var(--pc3)'];\n const axParts = labels.map((t,i) =>\n `<span style=\"color:${pcColors[i]}\">PC${i+1}: ${escHtml(t.length?t.join(' · '):'—')}</span>`\n ).join(' ');\n const el=$('status'); show(el);\n el.innerHTML = prefix+' '+axParts;\n el.classList.remove('error');\n}\n\nasync function relabel() {\n if (!chartState) return;\n const labeler = $('labeler').value;\n if (labeler === 'none') {\n applyLabels(chartState.corpusLabels);\n applyStatusLine();\n return;\n }\n // Reconstruct from cached state\n const userData = chartState.ternaryData; // has texts\n const projections = chartState.data3d.map(u=>u.raw);\n const corpus = chartState.corpusLabels;\n setStatus('labelling axes…');\n try {\n const llmLabels = await labelAxes(userData, projections, corpus, msg=>setStatus(msg));\n applyLabels(corpus.map((toks,k) => llmLabels[k] ? [llmLabels[k]] : toks));\n applyStatusLine();\n } catch(e) { setStatus('labeling failed: '+e.message, true); }\n}\n\n// ── Main run ───────────────────────────────────────\nasync function run() {\n const handles=getHandles(); if (handles.length<2) return;\n const btn=$('mapBtn'); btn.disabled=true;\n avatarImages={}; chartState=null; lockedHandle=null;\n $('postList').innerHTML='';\n if (threeObjs) { cancelAnimationFrame(threeObjs.animId); threeObjs.renderer.dispose(); threeObjs=null; }\n\n setProgress(5); setStatus('fetching posts…');\n let userData;\n try {\n userData=await batchLoad(handles,6,(done,total)=>{\n setProgress(5+(done/total)*25); setStatus('fetching posts… '+done+'/'+total);\n });\n } catch(e) { setStatus('fetch error: '+e.message,true); btn.disabled=false; return; }\n userData=userData.filter(u=>u.texts&&u.texts.length>=2);\n if (userData.length<2) { setStatus('not enough posts retrieved',true); btn.disabled=false; return; }\n\n setProgress(30); setStatus('loading model…');\n try { await loadModel(msg=>setStatus(msg)); }\n catch(e) { setStatus('model error: '+e.message,true); btn.disabled=false; return; }\n\n setProgress(45); setStatus('computing embeddings…');\n const N=userData.length;\n const userMatrix=new Float32Array(N*vocabSize);\n const allPostVecs=[]; // [user][post] Float32Arrays — needed for per-post projection\n for (let i=0;i<N;i++) {\n setStatus('embedding '+(i+1)+'/'+N+': @'+userData[i].handle);\n setProgress(45+(i/N)*30);\n const { userVec, postVecs }=await embedUser(userData[i].texts);\n userMatrix.set(userVec, i*vocabSize);\n allPostVecs.push(postVecs);\n }\n\n setProgress(78); setStatus('computing PCA'+(gpuDevice?' (WebGPU)':'')+'…');\n let pcaResult;\n try { await setupWebGPU(); pcaResult=await runPCA(userMatrix,N); }\n catch(e) { setStatus('PCA error: '+e.message,true); btn.disabled=false; return; }\n\n // PC directions in vocab space (for per-post projection)\n const Vk=computePCDirections(pcaResult.Xc,N,vocabSize,pcaResult.eigenvecs);\n\n setProgress(85); setStatus('labelling axes…');\n const pcTopTokens=extractCorpusLabels(userData, pcaResult.projections, 3);\n\n setProgress(88); setStatus('projecting posts…');\n const allPostScores=allPostVecs.map(postVecs=>\n postVecs.map(pv=>projectPost(pv,pcaResult.means,Vk))\n );\n\n setProgress(92); setStatus('preloading avatars…');\n await preloadAvatars(userData);\n\n const ternaryScores=toTernary(pcaResult.projections);\n const ternaryData=userData.map((u,i)=>({\n handle:u.handle, avatar:u.avatar, texts:u.texts,\n pc1:ternaryScores[i].pc1, pc2:ternaryScores[i].pc2, pc3:ternaryScores[i].pc3,\n postScores:allPostScores[i],\n }));\n const data3d=userData.map((u,i)=>({ handle:u.handle, avatar:u.avatar, raw:pcaResult.projections[i] }));\n\n chartState = { ternaryData, data3d, pcTopTokens, corpusLabels: pcTopTokens };\n\n setProgress(100);\n show($('results'));\n const canvas=$('ternaryChart');\n drawTernary(canvas,ternaryData,null,pcTopTokens);\n setupTernaryInteraction(canvas);\n if (viewMode==='3d') requestAnimationFrame(()=>init3D(data3d,pcTopTokens));\n window.addEventListener('resize',()=>{\n if (viewMode==='ternary'&&chartState) drawTernary(canvas,chartState.ternaryData,canvas._lastHl,chartState.pcTopTokens);\n });\n btn.disabled=false;\n\n applyStatusLine();\n if ($('labeler').value !== 'none') relabel();\n}\n\n// ── View toggle ────────────────────────────────────\n$('viewTernaryBtn').addEventListener('click',()=>{\n if (viewMode==='ternary') return;\n viewMode='ternary';\n $('viewTernaryBtn').classList.add('active'); $('view3dBtn').classList.remove('active');\n show($('ternaryWrap')); hide($('threeWrap'));\n});\n$('view3dBtn').addEventListener('click',()=>{\n if (viewMode==='3d') return;\n viewMode='3d';\n $('view3dBtn').classList.add('active'); $('viewTernaryBtn').classList.remove('active');\n hide($('ternaryWrap')); show($('threeWrap'));\n if (chartState&&!threeObjs) requestAnimationFrame(()=>init3D(chartState.data3d,chartState.pcTopTokens));\n});\n\n// ── Input UI ───────────────────────────────────────\ndocument.querySelectorAll('#inputTabs .tab').forEach(tab=>{\n tab.addEventListener('click',()=>{\n document.querySelectorAll('#inputTabs .tab').forEach(t=>t.classList.toggle('active',t===tab));\n if (tab.dataset.tab==='paste') { show($('pastePanel')); hide($('listPanel')); }\n else { hide($('pastePanel')); show($('listPanel')); }\n });\n});\n$('loadListBtn').addEventListener('click',async()=>{\n const url=$('listUrl').value.trim(); if (!url) return;\n const parsed=parseListUrl(url); if (!parsed) { setStatus('could not parse list URL',true); return; }\n const btn=$('loadListBtn'); btn.disabled=true; btn.textContent='loading…';\n setStatus('fetching list members…'); setProgress(20);\n try {\n const handles=await fetchListMembers(parsed.actor,parsed.rkey);\n if (!handles.length) { setStatus('empty list',true); setProgress(0); return; }\n $('handleList').value=handles.join('\\n');\n show($('pastePanel')); hide($('listPanel'));\n document.querySelectorAll('#inputTabs .tab').forEach(t=>t.classList.toggle('active',t.dataset.tab==='paste'));\n updateCount(); setStatus(handles.length+' members loaded'); setProgress(100);\n } catch(e) { setStatus('error: '+e.message,true); setProgress(0); }\n finally { btn.disabled=false; btn.textContent='load list'; }\n});\n$('handleList').addEventListener('input',updateCount);\n$('mapBtn').addEventListener('click',run);\nupdateCount();\n\n// Labeler UI\nconst LABELER_CONFIG = {\n none: { show: false },\n browser: { show: false },\n anthropic: { show: true, placeholder: 'anthropic api key', hint: 'key is only stored in localStorage', type: 'password', storageKey: 'splade_api_key' },\n llama: { show: true, placeholder: 'port (default 8080)', hint: 'llama-server --model model.gguf --port 8080 --jinja', type: 'text', storageKey: 'splade_llama_port' },\n};\nfunction updateLabelerUI() {\n const cfg = LABELER_CONFIG[$('labeler').value];\n const extra = $('labelerExtra');\n const input = $('labelerInput');\n const hint = $('labelerHint');\n if (cfg.show) {\n show(extra);\n input.type = cfg.type || 'text';\n input.placeholder = cfg.placeholder || '';\n hint.textContent = cfg.hint || '';\n const saved = cfg.storageKey && localStorage.getItem(cfg.storageKey);\n if (saved && !input.value) input.value = saved;\n } else {\n hide(extra);\n }\n}\n$('labeler').addEventListener('change', () => { updateLabelerUI(); if (chartState) relabel(); });\n$('labelerInput').addEventListener('keydown', e => { if (e.key==='Enter' && chartState) relabel(); });\nupdateLabelerUI();\n</script>\n</body>\n</html>\n",
"syntaxHighlightingTheme": "github-dark"
}
},
{
"$type": "pub.leaflet.pages.linearDocument#block",
"block": {
"$type": "pub.leaflet.blocks.text",
"plaintext": ""
}
}
],
"id": "019e80b8-20e4-7ddc-baab-ac251eeeca7e"
}
]
},
"description": "",
"path": "/3mn6vw2e36c26",
"publishedAt": "2026-06-01T01:18:14.534Z",
"site": "https://leaflet.pub/p/did:plc:vw4e7blkwzdokanwp24k3igr",
"tags": [],
"theme": {
"pageWidth": 1200,
"showPageBackground": true
},
"title": "fun with semantic clustering"
}