How can I tell the size of a model before downloading it?
Use Hugging Face’s generic search= parameter. It behaves like the site search: partial/fuzzy-ish matching across model names and metadata.
#!/usr/bin/env python3
from future import annotations
import argparse from huggingface_hub import HfApi from huggingface_hub.utils import HfHubHTTPError
def human_size(num_bytes: int | None) → str: if not num_bytes: return “0 B”
units = ["B", "KB", "MB", "GB", "TB"]
size = float(num_bytes)
for unit in units:
if size < 1024 or unit == units[-1]:
return f"{size:.2f} {unit}"
size /= 1024
return f"{size:.2f} TB"
def get_repo_size(api: HfApi, repo_id: str) → tuple[int, list[tuple[str, int]]]: info = api.model_info(repo_id, files_metadata=True)
files = []
total = 0
for sibling in info.siblings or []:
name = sibling.rfilename
size = sibling.size or 0
files.append((name, size))
total += size
files.sort(key=lambda x: x[1], reverse=True)
return total, files
def main() → int: parser = argparse.ArgumentParser( description=“Search Hugging Face models by partial term and show approximate repo sizes.” ) parser.add_argument(“query”, help=“Partial search term, e.g. qwen, llama, flux, embedding, whisper”) parser.add_argument(“-n”, “–limit”, type=int, default=20, help=“Number of model results to check”) parser.add_argument(“–show-files”, action=“store_true”, help=“Show largest files per repo”) parser.add_argument(“–top-files”, type=int, default=5, help=“Number of files to show with --show-files”) parser.add_argument(“–sort”, default=“downloads”, help=“HF sort field: downloads, likes, lastModified, createdAt”) parser.add_argument(“–ascending”, action=“store_true”, help=“Sort HF results ascending”)
args = parser.parse_args()
api = HfApi()
models = api.list_models(
search=args.query,
limit=args.limit,
sort=args.sort,
direction=1 if args.ascending else -1,
full=True,
)
rows = []
for model in models:
repo_id = model.modelId
try:
total_size, files = get_repo_size(api, repo_id)
except HfHubHTTPError as e:
rows.append({
"repo_id": repo_id,
"size": None,
"downloads": getattr(model, "downloads", None),
"likes": getattr(model, "likes", None),
"pipeline": getattr(model, "pipeline_tag", None),
"error": str(e),
"files": [],
})
continue
rows.append({
"repo_id": repo_id,
"size": total_size,
"downloads": getattr(model, "downloads", None),
"likes": getattr(model, "likes", None),
"pipeline": getattr(model, "pipeline_tag", None),
"error": None,
"files": files,
})
rows.sort(key=lambda r: r["size"] or 0, reverse=True)
print()
print(f"Search: {args.query}")
print()
print(f"{'SIZE':>12} {'DOWNLOADS':>10} {'LIKES':>7} {'TYPE':<24} MODEL")
print("-" * 90)
for row in rows:
size = human_size(row["size"])
downloads = row["downloads"] if row["downloads"] is not None else "-"
likes = row["likes"] if row["likes"] is not None else "-"
pipeline = row["pipeline"] or "-"
print(f"{size:>12} {downloads:>10} {likes:>7} {pipeline:<24} {row['repo_id']}")
if row["error"]:
print(f"{'':>12} error: {row['error']}")
if args.show_files and row["files"]:
for filename, file_size in row["files"][:args.top_files]:
print(f"{'':>12} {human_size(file_size):>10} {filename}")
print()
return 0
if name == “main ”: raise SystemExit(main())
Discussion in the ATmosphere