{
  "schema": "canxp.models.v1",
  "generated_at": "2026-04-28T11:56:05.039724+00:00",
  "host": "https://models.canxp.ai",
  "host_index_url": "https://models.canxp.ai/",
  "model_index_path": "/mlc-ai/",
  "url_compatibility": {
    "native": "https://models.canxp.ai/<repo_id>/<file>",
    "huggingface_resolve_compat": "https://models.canxp.ai/<repo_id>/resolve/<branch>/<file>",
    "huggingface_raw_compat": "https://models.canxp.ai/<repo_id>/raw/<branch>/<file>",
    "notes": "URLs of the form /resolve/<branch>/<path> and /raw/<branch>/<path> are rewritten to the file path so existing HuggingFace-style URLs work. CORS is wide-open for browser WebLLM clients."
  },
  "selection_strategy": {
    "source_a": "All 91 unique HuggingFace mlc-ai model IDs referenced in WebLLM's official src/config.ts prebuiltAppConfig (the canonical supported set).",
    "source_b": "Top mlc-ai HuggingFace org models by download count not already in source A, chosen to broaden coverage with widely-used base families (Mistral, DeepSeek-R1-Distill, TinyLlama, Hermes 2 Pro, RedPajama, Qwen 2.5 Coder).",
    "size_cap_gb": 6,
    "size_cap_rationale": "Browsers struggle to load WebLLM models above ~6 GB due to memory and bandwidth constraints; all 70B-class (~30 GB at q3f16_1) and Llama-2-13B (6.8 GB) variants were excluded.",
    "deduplication": "When a base model existed in both sources A and B with different quantizations, the WebLLM-official quant variants were preferred and B contributions were added for families not covered by A.",
    "total_models": 100,
    "total_size_gb": 195.7
  },
  "quantization_legend": {
    "q0f16": "Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant.",
    "q0f32": "Full FP32 precision. Largest size, highest stability. Reference quality.",
    "q3f16_1": "3-bit weight quantization, FP16 activations. Most aggressive compression \u2014 used for very large models that wouldn't otherwise fit.",
    "q4f16_0": "4-bit weight quantization (older variant), FP16 activations. Browser-friendly.",
    "q4f16_1": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
    "q4f32_0": "4-bit weight quantization (older variant), FP32 activations. For FP32-only GPUs.",
    "q4f32_1": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support."
  },
  "family_distribution": {
    "Llama-3.2-1B": 4,
    "Qwen2.5-0.5B": 4,
    "Qwen3-0.6B": 4,
    "SmolLM2-360M": 4,
    "Qwen2-0.5B": 3,
    "Qwen3.5-0.8B": 3,
    "DeepSeek-R1-Distill-Qwen-1.5B": 2,
    "Hermes-3-Llama-3.1-8B": 2,
    "Hermes-3-Llama-3.2-3B": 2,
    "Llama-2-7b": 2,
    "Llama-3-8B": 2,
    "Llama-3.1-8B": 2,
    "Llama-3.2-3B": 2,
    "Phi-3-mini": 2,
    "Phi-3.5-mini": 2,
    "Phi-4-mini": 2,
    "Qwen2-1.5B": 2,
    "Qwen2-7B": 2,
    "Qwen2-Math-7B": 2,
    "Qwen2.5-1.5B": 2,
    "Qwen2.5-3B": 2,
    "Qwen2.5-7B": 2,
    "Qwen3-1.7B": 2,
    "Qwen3-4B": 2,
    "Qwen3-8B": 2,
    "Qwen3.5-2B": 2,
    "Qwen3.5-4B": 2,
    "Qwen3.5-9B": 2,
    "SmolLM2-1.7B": 2,
    "SmolLM2-135M": 2,
    "gemma-2-2b-it": 2,
    "gemma-2-2b-jpn-it": 2,
    "gemma-2-9b-it": 2,
    "gemma-2b-it": 2,
    "phi-1_5": 2,
    "phi-2": 2,
    "stablelm-2-zephyr-1_6b": 2,
    "DeepSeek-R1-Distill-Llama-8B": 1,
    "DeepSeek-R1-Distill-Qwen-7B": 1,
    "Hermes-2-Pro-Llama-3-8B": 1,
    "Hermes-2-Pro-Mistral-7B": 1,
    "Mistral-7B-Instruct-v0.2": 1,
    "Mistral-7B-Instruct-v0.3": 1,
    "Qwen1.5-0.5B": 1,
    "Qwen2.5-Coder-1.5B": 1,
    "Qwen2.5-Coder-3B": 1,
    "Qwen2.5-Coder-7B": 1,
    "RedPajama-INCITE-Chat-3B": 1,
    "TinyLlama-1.1B": 1,
    "WizardMath-7B": 1,
    "gemma3-1b-it": 1,
    "snowflake-arctic-embed-m": 1,
    "snowflake-arctic-embed-s": 1
  },
  "models": [
    {
      "model_id": "mlc-ai/DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC",
      "name": "DeepSeek R1 Distill Llama 8B",
      "huggingface_url": "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
        "url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
        "author": "deepseek-ai",
        "author_url": "https://huggingface.co/deepseek-ai",
        "downloads": 1990634,
        "likes": 856,
        "last_modified": "2025-02-24T03:32:07.000Z",
        "license": "mit",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "DeepSeek-R1-Distill-Llama-8B",
      "description": "Llama-3 8B distilled from DeepSeek R1's reasoning chains. Inherits R1's chain-of-thought reasoning style.",
      "use_cases": [
        "reasoning",
        "math",
        "step-by-step"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4526794285,
      "size_gb": 4.216,
      "downloads": 3773,
      "likes": 0,
      "last_modified": "2025-09-07T18:55:55.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
        "base_model:quantized:deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. Llama-3 8B distilled from DeepSeek R1's reasoning chains. Inherits R1's chain-of-thought reasoning style. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.216 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f16_1-MLC",
      "name": "DeepSeek R1 Distill Qwen 1.5B",
      "huggingface_url": "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "author": "deepseek-ai",
        "author_url": "https://huggingface.co/deepseek-ai",
        "downloads": 502546,
        "likes": 1494,
        "last_modified": "2025-02-24T03:32:35.000Z",
        "license": "mit",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "DeepSeek-R1-Distill-Qwen-1.5B",
      "description": "Qwen 2.5 1.5B distilled from DeepSeek R1. Browser-friendly chain-of-thought reasoning.",
      "use_cases": [
        "reasoning",
        "math",
        "tiny models"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1007110947,
      "size_gb": 0.938,
      "downloads": 33,
      "likes": 3,
      "last_modified": "2025-09-07T18:55:40.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "base_model:quantized:deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Qwen 2.5 1.5B distilled from DeepSeek R1. Browser-friendly chain-of-thought reasoning. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.938 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f32_1-MLC",
      "name": "DeepSeek R1 Distill Qwen 1.5B",
      "huggingface_url": "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "author": "deepseek-ai",
        "author_url": "https://huggingface.co/deepseek-ai",
        "downloads": 502546,
        "likes": 1494,
        "last_modified": "2025-02-24T03:32:35.000Z",
        "license": "mit",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "DeepSeek-R1-Distill-Qwen-1.5B",
      "description": "Qwen 2.5 1.5B distilled from DeepSeek R1. Browser-friendly chain-of-thought reasoning.",
      "use_cases": [
        "reasoning",
        "math",
        "tiny models"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 1007234999,
      "size_gb": 0.938,
      "downloads": 2,
      "likes": 0,
      "last_modified": "2025-09-07T18:55:42.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "base_model:quantized:deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Qwen 2.5 1.5B distilled from DeepSeek R1. Browser-friendly chain-of-thought reasoning. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.938 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC",
      "name": "DeepSeek R1 Distill Qwen 7B",
      "huggingface_url": "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
        "url": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
        "author": "deepseek-ai",
        "author_url": "https://huggingface.co/deepseek-ai",
        "downloads": 573416,
        "likes": 827,
        "last_modified": "2025-02-24T03:32:20.000Z",
        "license": "mit",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "DeepSeek-R1-Distill-Qwen-7B",
      "description": "Qwen 2.5 7B distilled from DeepSeek R1. Strongest browser-runnable R1-distilled reasoning model.",
      "use_cases": [
        "reasoning",
        "math",
        "agentic"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4291579836,
      "size_gb": 3.997,
      "downloads": 2092,
      "likes": 2,
      "last_modified": "2025-09-07T18:55:40.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
        "base_model:quantized:deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. Qwen 2.5 7B distilled from DeepSeek R1. Strongest browser-runnable R1-distilled reasoning model. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 3.997 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
      "name": "Hermes 2 Pro (Llama 3 8B)",
      "huggingface_url": "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "NousResearch/Hermes-2-Pro-Llama-3-8B",
        "url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B",
        "author": "NousResearch",
        "author_url": "https://huggingface.co/NousResearch",
        "downloads": 222370,
        "likes": 448,
        "last_modified": "2024-09-14T16:29:41.000Z",
        "license": "llama3",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Hermes-2-Pro-Llama-3-8B",
      "description": "NousResearch's Hermes 2 Pro on Llama 3 8B. Fine-tuned for advanced function-calling and JSON-structured output.",
      "use_cases": [
        "function calling",
        "JSON output",
        "agentic"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4527211820,
      "size_gb": 4.216,
      "downloads": 228,
      "likes": 0,
      "last_modified": "2025-09-07T18:52:37.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:NousResearch/Hermes-2-Pro-Llama-3-8B",
        "base_model:quantized:NousResearch/Hermes-2-Pro-Llama-3-8B",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. NousResearch's Hermes 2 Pro on Llama 3 8B. Fine-tuned for advanced function-calling and JSON-structured output. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.216 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC",
      "name": "Hermes 2 Pro (Mistral 7B)",
      "huggingface_url": "https://huggingface.co/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "NousResearch/Hermes-2-Pro-Mistral-7B",
        "url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B",
        "author": "NousResearch",
        "author_url": "https://huggingface.co/NousResearch",
        "downloads": 4569,
        "likes": 501,
        "last_modified": "2024-09-08T08:08:34.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Hermes-2-Pro-Mistral-7B",
      "description": "NousResearch's Hermes 2 Pro on Mistral 7B. Function-calling and JSON-structured output specialist.",
      "use_cases": [
        "function calling",
        "JSON output",
        "agentic"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4076607460,
      "size_gb": 3.797,
      "downloads": 207,
      "likes": 3,
      "last_modified": "2025-09-07T18:52:42.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:NousResearch/Hermes-2-Pro-Mistral-7B",
        "base_model:quantized:NousResearch/Hermes-2-Pro-Mistral-7B",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. NousResearch's Hermes 2 Pro on Mistral 7B. Function-calling and JSON-structured output specialist. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 3.797 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Hermes-3-Llama-3.1-8B-q4f16_1-MLC",
      "name": "Hermes 3 (Llama 3.1 8B)",
      "huggingface_url": "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.1-8B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Hermes-3-Llama-3.1-8B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Hermes-3-Llama-3.1-8B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "NousResearch/Hermes-3-Llama-3.1-8B",
        "url": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
        "author": "NousResearch",
        "author_url": "https://huggingface.co/NousResearch",
        "downloads": 149032,
        "likes": 413,
        "last_modified": "2024-09-08T07:39:55.000Z",
        "license": "llama3",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Hermes-3-Llama-3.1-8B",
      "description": "NousResearch's Hermes 3 on Llama 3.1 8B base. Enhanced reasoning, agentic flow, tool-use orientation.",
      "use_cases": [
        "chat",
        "agentic",
        "tool use"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4526847881,
      "size_gb": 4.216,
      "downloads": 8096,
      "likes": 1,
      "last_modified": "2025-09-07T18:53:18.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:NousResearch/Hermes-3-Llama-3.1-8B",
        "base_model:quantized:NousResearch/Hermes-3-Llama-3.1-8B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. NousResearch's Hermes 3 on Llama 3.1 8B base. Enhanced reasoning, agentic flow, tool-use orientation. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.216 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Hermes-3-Llama-3.1-8B-q4f32_1-MLC",
      "name": "Hermes 3 (Llama 3.1 8B)",
      "huggingface_url": "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.1-8B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Hermes-3-Llama-3.1-8B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Hermes-3-Llama-3.1-8B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "NousResearch/Hermes-3-Llama-3.1-8B",
        "url": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
        "author": "NousResearch",
        "author_url": "https://huggingface.co/NousResearch",
        "downloads": 149032,
        "likes": 413,
        "last_modified": "2024-09-08T07:39:55.000Z",
        "license": "llama3",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Hermes-3-Llama-3.1-8B",
      "description": "NousResearch's Hermes 3 on Llama 3.1 8B base. Enhanced reasoning, agentic flow, tool-use orientation.",
      "use_cases": [
        "chat",
        "agentic",
        "tool use"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 4526994912,
      "size_gb": 4.216,
      "downloads": 70,
      "likes": 0,
      "last_modified": "2025-09-07T18:53:19.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:NousResearch/Hermes-3-Llama-3.1-8B",
        "base_model:quantized:NousResearch/Hermes-3-Llama-3.1-8B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. NousResearch's Hermes 3 on Llama 3.1 8B base. Enhanced reasoning, agentic flow, tool-use orientation. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 4.216 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Hermes-3-Llama-3.2-3B-q4f16_1-MLC",
      "name": "Hermes 3 (Llama 3.2 3B)",
      "huggingface_url": "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.2-3B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Hermes-3-Llama-3.2-3B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Hermes-3-Llama-3.2-3B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "NousResearch/Hermes-3-Llama-3.2-3B",
        "url": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B",
        "author": "NousResearch",
        "author_url": "https://huggingface.co/NousResearch",
        "downloads": 15633,
        "likes": 178,
        "last_modified": "2024-12-18T22:32:55.000Z",
        "license": "llama3",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Hermes-3-Llama-3.2-3B",
      "description": "NousResearch's Hermes 3 on Llama 3.2 3B base. Compact agentic chat assistant.",
      "use_cases": [
        "chat",
        "agentic",
        "edge"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1816806576,
      "size_gb": 1.692,
      "downloads": 2193,
      "likes": 0,
      "last_modified": "2025-09-07T18:55:37.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:NousResearch/Hermes-3-Llama-3.2-3B",
        "base_model:quantized:NousResearch/Hermes-3-Llama-3.2-3B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. NousResearch's Hermes 3 on Llama 3.2 3B base. Compact agentic chat assistant. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 1.692 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Hermes-3-Llama-3.2-3B-q4f32_1-MLC",
      "name": "Hermes 3 (Llama 3.2 3B)",
      "huggingface_url": "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.2-3B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Hermes-3-Llama-3.2-3B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Hermes-3-Llama-3.2-3B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "NousResearch/Hermes-3-Llama-3.2-3B",
        "url": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B",
        "author": "NousResearch",
        "author_url": "https://huggingface.co/NousResearch",
        "downloads": 15633,
        "likes": 178,
        "last_modified": "2024-12-18T22:32:55.000Z",
        "license": "llama3",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Hermes-3-Llama-3.2-3B",
      "description": "NousResearch's Hermes 3 on Llama 3.2 3B base. Compact agentic chat assistant.",
      "use_cases": [
        "chat",
        "agentic",
        "edge"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 1816926427,
      "size_gb": 1.692,
      "downloads": 203,
      "likes": 0,
      "last_modified": "2025-09-07T18:55:36.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:NousResearch/Hermes-3-Llama-3.2-3B",
        "base_model:quantized:NousResearch/Hermes-3-Llama-3.2-3B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. NousResearch's Hermes 3 on Llama 3.2 3B base. Compact agentic chat assistant. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 1.692 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC",
      "name": "Llama 2 7B Chat",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-2-7b-chat-hf-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Llama-2-7b-chat-hf",
        "url": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": 377831,
        "likes": 4745,
        "last_modified": "2024-04-17T08:40:48.000Z",
        "license": "llama2",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Llama-2-7b",
      "description": "Meta's foundational open-license chat LLM. 7B parameters, pretrained on 2T tokens and RLHF-tuned. Established the modern open-weight assistant ecosystem.",
      "use_cases": [
        "chat",
        "general assistant"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 3793674246,
      "size_gb": 3.533,
      "downloads": 3107,
      "likes": 9,
      "last_modified": "2025-09-07T18:50:20.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Llama-2-7b-chat-hf",
        "base_model:quantized:meta-llama/Llama-2-7b-chat-hf",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's foundational open-license chat LLM. 7B parameters, pretrained on 2T tokens and RLHF-tuned. Established the modern open-weight assistant ecosystem. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 3.533 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC",
      "name": "Llama 2 7B Chat",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-2-7b-chat-hf-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Llama-2-7b-chat-hf",
        "url": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": 377831,
        "likes": 4745,
        "last_modified": "2024-04-17T08:40:48.000Z",
        "license": "llama2",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Llama-2-7b",
      "description": "Meta's foundational open-license chat LLM. 7B parameters, pretrained on 2T tokens and RLHF-tuned. Established the modern open-weight assistant ecosystem.",
      "use_cases": [
        "chat",
        "general assistant"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 4215371651,
      "size_gb": 3.926,
      "downloads": 54,
      "likes": 2,
      "last_modified": "2025-09-07T18:50:21.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Llama-2-7b-chat-hf",
        "base_model:quantized:meta-llama/Llama-2-7b-chat-hf",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's foundational open-license chat LLM. 7B parameters, pretrained on 2T tokens and RLHF-tuned. Established the modern open-weight assistant ecosystem. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 3.926 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC",
      "name": "Llama 3 8B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Meta-Llama-3-8B-Instruct",
        "url": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": 1460299,
        "likes": 4496,
        "last_modified": "2025-06-18T23:49:51.000Z",
        "license": "llama3",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Llama-3-8B",
      "description": "Meta's third-generation Llama at 8B, pretrained on 15T tokens. Major gains in reasoning, coding, and instruction-following over Llama 2.",
      "use_cases": [
        "chat",
        "reasoning",
        "coding"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4526948698,
      "size_gb": 4.216,
      "downloads": 506,
      "likes": 6,
      "last_modified": "2025-09-07T18:53:39.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Meta-Llama-3-8B-Instruct",
        "base_model:quantized:meta-llama/Meta-Llama-3-8B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's third-generation Llama at 8B, pretrained on 15T tokens. Major gains in reasoning, coding, and instruction-following over Llama 2. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.216 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC",
      "name": "Llama 3 8B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Meta-Llama-3-8B-Instruct",
        "url": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": 1460299,
        "likes": 4496,
        "last_modified": "2025-06-18T23:49:51.000Z",
        "license": "llama3",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Llama-3-8B",
      "description": "Meta's third-generation Llama at 8B, pretrained on 15T tokens. Major gains in reasoning, coding, and instruction-following over Llama 2.",
      "use_cases": [
        "chat",
        "reasoning",
        "coding"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 4527095192,
      "size_gb": 4.216,
      "downloads": 677,
      "likes": 1,
      "last_modified": "2025-09-07T18:53:40.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Meta-Llama-3-8B-Instruct",
        "base_model:quantized:meta-llama/Meta-Llama-3-8B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's third-generation Llama at 8B, pretrained on 15T tokens. Major gains in reasoning, coding, and instruction-following over Llama 2. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 4.216 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-3.1-8B-Instruct-q4f16_1-MLC",
      "name": "Llama 3.1 8B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-3.1-8B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-3.1-8B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
        "url": "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": null,
        "likes": null,
        "last_modified": null,
        "license": "see base model card",
        "languages": [],
        "pipeline_tag": null
      },
      "family": "Llama-3.1-8B",
      "description": "Meta's Llama 3.1 update with 128K extended context, multilingual coverage across 8 languages, and refined post-training.",
      "use_cases": [
        "chat",
        "long-context",
        "multilingual"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4526842081,
      "size_gb": 4.216,
      "downloads": 1305,
      "likes": 4,
      "last_modified": "2025-09-07T18:53:35.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Llama-3.1-8B-Instruct",
        "base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's Llama 3.1 update with 128K extended context, multilingual coverage across 8 languages, and refined post-training. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.216 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC",
      "name": "Llama 3.1 8B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
        "url": "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": null,
        "likes": null,
        "last_modified": null,
        "license": "see base model card",
        "languages": [],
        "pipeline_tag": null
      },
      "family": "Llama-3.1-8B",
      "description": "Meta's Llama 3.1 update with 128K extended context, multilingual coverage across 8 languages, and refined post-training.",
      "use_cases": [
        "chat",
        "long-context",
        "multilingual"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 4526989112,
      "size_gb": 4.216,
      "downloads": 750,
      "likes": 3,
      "last_modified": "2025-09-07T18:53:34.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Llama-3.1-8B-Instruct",
        "base_model:quantized:meta-llama/Llama-3.1-8B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's Llama 3.1 update with 128K extended context, multilingual coverage across 8 languages, and refined post-training. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 4.216 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-3.2-1B-Instruct-q0f16-MLC",
      "name": "Llama 3.2 1B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q0f16-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-1B-Instruct-q0f16-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-1B-Instruct-q0f16-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Llama-3.2-1B-Instruct",
        "url": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": 5344861,
        "likes": 1385,
        "last_modified": "2024-10-24T15:07:51.000Z",
        "license": "llama3.2",
        "languages": [
          "en",
          "de",
          "fr",
          "it",
          "pt",
          "hi",
          "es",
          "th"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Llama-3.2-1B",
      "description": "Meta's compact Llama 3.2 model. Pruned and distilled from larger Llamas, purpose-built for on-device and edge use. Strong instruction-following at <1GB quantized.",
      "use_cases": [
        "mobile chat",
        "edge",
        "browser"
      ],
      "quantization": "q0f16",
      "quantization_description": "Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant.",
      "size_bytes": 2480870526,
      "size_gb": 2.31,
      "downloads": 192,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:18.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Llama-3.2-1B-Instruct",
        "base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's compact Llama 3.2 model. Pruned and distilled from larger Llamas, purpose-built for on-device and edge use. Strong instruction-following at <1GB quantized. Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant. Quantized size 2.31 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-3.2-1B-Instruct-q0f32-MLC",
      "name": "Llama 3.2 1B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q0f32-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-1B-Instruct-q0f32-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-1B-Instruct-q0f32-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Llama-3.2-1B-Instruct",
        "url": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": 5344861,
        "likes": 1385,
        "last_modified": "2024-10-24T15:07:51.000Z",
        "license": "llama3.2",
        "languages": [
          "en",
          "de",
          "fr",
          "it",
          "pt",
          "hi",
          "es",
          "th"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Llama-3.2-1B",
      "description": "Meta's compact Llama 3.2 model. Pruned and distilled from larger Llamas, purpose-built for on-device and edge use. Strong instruction-following at <1GB quantized.",
      "use_cases": [
        "mobile chat",
        "edge",
        "browser"
      ],
      "quantization": "q0f32",
      "quantization_description": "Full FP32 precision. Largest size, highest stability. Reference quality.",
      "size_bytes": 2480917942,
      "size_gb": 2.311,
      "downloads": 26,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:20.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Llama-3.2-1B-Instruct",
        "base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's compact Llama 3.2 model. Pruned and distilled from larger Llamas, purpose-built for on-device and edge use. Strong instruction-following at <1GB quantized. Full FP32 precision. Largest size, highest stability. Reference quality. Quantized size 2.311 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-3.2-1B-Instruct-q4f16_1-MLC",
      "name": "Llama 3.2 1B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-1B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-1B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Llama-3.2-1B-Instruct",
        "url": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": 5344861,
        "likes": 1385,
        "last_modified": "2024-10-24T15:07:51.000Z",
        "license": "llama3.2",
        "languages": [
          "en",
          "de",
          "fr",
          "it",
          "pt",
          "hi",
          "es",
          "th"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Llama-3.2-1B",
      "description": "Meta's compact Llama 3.2 model. Pruned and distilled from larger Llamas, purpose-built for on-device and edge use. Strong instruction-following at <1GB quantized.",
      "use_cases": [
        "mobile chat",
        "edge",
        "browser"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 704522448,
      "size_gb": 0.656,
      "downloads": 42703,
      "likes": 5,
      "last_modified": "2025-09-07T18:54:20.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Llama-3.2-1B-Instruct",
        "base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's compact Llama 3.2 model. Pruned and distilled from larger Llamas, purpose-built for on-device and edge use. Strong instruction-following at <1GB quantized. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.656 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-3.2-1B-Instruct-q4f32_1-MLC",
      "name": "Llama 3.2 1B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-1B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-1B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Llama-3.2-1B-Instruct",
        "url": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": 5344861,
        "likes": 1385,
        "last_modified": "2024-10-24T15:07:51.000Z",
        "license": "llama3.2",
        "languages": [
          "en",
          "de",
          "fr",
          "it",
          "pt",
          "hi",
          "es",
          "th"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Llama-3.2-1B",
      "description": "Meta's compact Llama 3.2 model. Pruned and distilled from larger Llamas, purpose-built for on-device and edge use. Strong instruction-following at <1GB quantized.",
      "use_cases": [
        "mobile chat",
        "edge",
        "browser"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 704588843,
      "size_gb": 0.656,
      "downloads": 7494,
      "likes": 1,
      "last_modified": "2025-09-07T18:54:21.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Llama-3.2-1B-Instruct",
        "base_model:quantized:meta-llama/Llama-3.2-1B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's compact Llama 3.2 model. Pruned and distilled from larger Llamas, purpose-built for on-device and edge use. Strong instruction-following at <1GB quantized. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.656 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-3.2-3B-Instruct-q4f16_1-MLC",
      "name": "Llama 3.2 3B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-3.2-3B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-3B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-3B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Llama-3.2-3B-Instruct",
        "url": "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": 1988470,
        "likes": 2113,
        "last_modified": "2024-10-24T15:07:29.000Z",
        "license": "llama3.2",
        "languages": [
          "en",
          "de",
          "fr",
          "it",
          "pt",
          "hi",
          "es",
          "th"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Llama-3.2-3B",
      "description": "Meta's mid-sized Llama 3.2 model. Distilled/pruned for edge use; excellent chat quality at ~2GB quantized.",
      "use_cases": [
        "chat",
        "edge",
        "browser"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1816811105,
      "size_gb": 1.692,
      "downloads": 5862,
      "likes": 5,
      "last_modified": "2025-09-07T18:54:23.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Llama-3.2-3B-Instruct",
        "base_model:quantized:meta-llama/Llama-3.2-3B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's mid-sized Llama 3.2 model. Distilled/pruned for edge use; excellent chat quality at ~2GB quantized. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 1.692 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Llama-3.2-3B-Instruct-q4f32_1-MLC",
      "name": "Llama 3.2 3B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Llama-3.2-3B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-3B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Llama-3.2-3B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "meta-llama/Llama-3.2-3B-Instruct",
        "url": "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct",
        "author": "meta-llama",
        "author_url": "https://huggingface.co/meta-llama",
        "downloads": 1988470,
        "likes": 2113,
        "last_modified": "2024-10-24T15:07:29.000Z",
        "license": "llama3.2",
        "languages": [
          "en",
          "de",
          "fr",
          "it",
          "pt",
          "hi",
          "es",
          "th"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Llama-3.2-3B",
      "description": "Meta's mid-sized Llama 3.2 model. Distilled/pruned for edge use; excellent chat quality at ~2GB quantized.",
      "use_cases": [
        "chat",
        "edge",
        "browser"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 1816930956,
      "size_gb": 1.692,
      "downloads": 8498,
      "likes": 2,
      "last_modified": "2025-09-07T18:54:25.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:meta-llama/Llama-3.2-3B-Instruct",
        "base_model:quantized:meta-llama/Llama-3.2-3B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Meta's mid-sized Llama 3.2 model. Distilled/pruned for edge use; excellent chat quality at ~2GB quantized. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 1.692 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC",
      "name": "Mistral 7B Instruct v0.2",
      "huggingface_url": "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Mistral-7B-Instruct-v0.2-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "mistralai/Mistral-7B-Instruct-v0.2",
        "url": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
        "author": "mistralai",
        "author_url": "https://huggingface.co/mistralai",
        "downloads": 2067579,
        "likes": 3126,
        "last_modified": "2025-07-24T16:57:21.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Mistral-7B-Instruct-v0.2",
      "description": "Mistral AI's v0.2 7B instruct. Sliding-window + grouped-query attention. Long the de-facto 7B baseline.",
      "use_cases": [
        "chat",
        "general assistant"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4076749637,
      "size_gb": 3.797,
      "downloads": 3302,
      "likes": 4,
      "last_modified": "2025-09-07T18:50:22.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:mistralai/Mistral-7B-Instruct-v0.2",
        "base_model:quantized:mistralai/Mistral-7B-Instruct-v0.2",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. Mistral AI's v0.2 7B instruct. Sliding-window + grouped-query attention. Long the de-facto 7B baseline. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 3.797 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Mistral-7B-Instruct-v0.3-q4f16_1-MLC",
      "name": "Mistral 7B Instruct v0.3",
      "huggingface_url": "https://huggingface.co/mlc-ai/Mistral-7B-Instruct-v0.3-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Mistral-7B-Instruct-v0.3-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Mistral-7B-Instruct-v0.3-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "mistralai/Mistral-7B-Instruct-v0.3",
        "url": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3",
        "author": "mistralai",
        "author_url": "https://huggingface.co/mistralai",
        "downloads": 3103899,
        "likes": 2538,
        "last_modified": "2025-12-03T12:13:48.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": null
      },
      "family": "Mistral-7B-Instruct-v0.3",
      "description": "Mistral AI's v0.3 with extended vocabulary, function-calling support, and v3 tokenizer.",
      "use_cases": [
        "chat",
        "function calling"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4080593693,
      "size_gb": 3.8,
      "downloads": 10972,
      "likes": 0,
      "last_modified": "2025-09-07T18:51:49.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:mistralai/Mistral-7B-Instruct-v0.3",
        "base_model:quantized:mistralai/Mistral-7B-Instruct-v0.3",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. Mistral AI's v0.3 with extended vocabulary, function-calling support, and v3 tokenizer. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 3.8 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC",
      "name": "Phi-3 Mini 4K Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Phi-3-mini-4k-instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "microsoft/Phi-3-mini-4k-instruct",
        "url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
        "author": "microsoft",
        "author_url": "https://huggingface.co/microsoft",
        "downloads": 746649,
        "likes": 1415,
        "last_modified": "2025-12-10T20:22:18.000Z",
        "license": "mit",
        "languages": [
          "en",
          "fr"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Phi-3-mini",
      "description": "Microsoft's Phi-3 Mini \u2014 3.8B production-grade small model. Comparable to mid-scale Llamas at a fraction of the size.",
      "use_cases": [
        "chat",
        "reasoning",
        "edge"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 2152382199,
      "size_gb": 2.005,
      "downloads": 23286,
      "likes": 0,
      "last_modified": "2025-09-07T18:55:16.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:microsoft/Phi-3-mini-4k-instruct",
        "base_model:quantized:microsoft/Phi-3-mini-4k-instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Microsoft's Phi-3 Mini \u2014 3.8B production-grade small model. Comparable to mid-scale Llamas at a fraction of the size. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 2.005 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC",
      "name": "Phi-3 Mini 4K Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Phi-3-mini-4k-instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "microsoft/Phi-3-mini-4k-instruct",
        "url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
        "author": "microsoft",
        "author_url": "https://huggingface.co/microsoft",
        "downloads": 746649,
        "likes": 1415,
        "last_modified": "2025-12-10T20:22:18.000Z",
        "license": "mit",
        "languages": [
          "en",
          "fr"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Phi-3-mini",
      "description": "Microsoft's Phi-3 Mini \u2014 3.8B production-grade small model. Comparable to mid-scale Llamas at a fraction of the size.",
      "use_cases": [
        "chat",
        "reasoning",
        "edge"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 2152422301,
      "size_gb": 2.005,
      "downloads": 231,
      "likes": 0,
      "last_modified": "2025-09-07T18:55:17.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:microsoft/Phi-3-mini-4k-instruct",
        "base_model:quantized:microsoft/Phi-3-mini-4k-instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Microsoft's Phi-3 Mini \u2014 3.8B production-grade small model. Comparable to mid-scale Llamas at a fraction of the size. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 2.005 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Phi-3.5-mini-instruct-q4f16_1-MLC",
      "name": "Phi-3.5 Mini Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Phi-3.5-mini-instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Phi-3.5-mini-instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Phi-3.5-mini-instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "microsoft/Phi-3.5-mini-instruct",
        "url": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
        "author": "microsoft",
        "author_url": "https://huggingface.co/microsoft",
        "downloads": 727640,
        "likes": 973,
        "last_modified": "2025-12-10T20:22:56.000Z",
        "license": "mit",
        "languages": [
          "multilingual"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Phi-3.5-mini",
      "description": "Refinement of Phi-3 with improved multilingual support, function-calling, and longer-context retention.",
      "use_cases": [
        "chat",
        "function calling",
        "multilingual"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 2152285156,
      "size_gb": 2.004,
      "downloads": 9662,
      "likes": 5,
      "last_modified": "2025-09-07T18:53:16.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:microsoft/Phi-3.5-mini-instruct",
        "base_model:quantized:microsoft/Phi-3.5-mini-instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Refinement of Phi-3 with improved multilingual support, function-calling, and longer-context retention. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 2.004 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Phi-3.5-mini-instruct-q4f32_1-MLC",
      "name": "Phi-3.5 Mini Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Phi-3.5-mini-instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Phi-3.5-mini-instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Phi-3.5-mini-instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "microsoft/Phi-3.5-mini-instruct",
        "url": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
        "author": "microsoft",
        "author_url": "https://huggingface.co/microsoft",
        "downloads": 727640,
        "likes": 973,
        "last_modified": "2025-12-10T20:22:56.000Z",
        "license": "mit",
        "languages": [
          "multilingual"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Phi-3.5-mini",
      "description": "Refinement of Phi-3 with improved multilingual support, function-calling, and longer-context retention.",
      "use_cases": [
        "chat",
        "function calling",
        "multilingual"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 2152426000,
      "size_gb": 2.005,
      "downloads": 151,
      "likes": 0,
      "last_modified": "2025-09-07T18:53:14.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:microsoft/Phi-3.5-mini-instruct",
        "base_model:quantized:microsoft/Phi-3.5-mini-instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Refinement of Phi-3 with improved multilingual support, function-calling, and longer-context retention. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 2.005 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Phi-4-mini-instruct-q4f16_1-MLC",
      "name": "Phi-4 Mini Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Phi-4-mini-instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Phi-4-mini-instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Phi-4-mini-instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "microsoft/Phi-4-mini-instruct",
        "url": "https://huggingface.co/microsoft/Phi-4-mini-instruct",
        "author": "microsoft",
        "author_url": "https://huggingface.co/microsoft",
        "downloads": 1490838,
        "likes": 730,
        "last_modified": "2025-12-10T20:24:40.000Z",
        "license": "mit",
        "languages": [
          "multilingual",
          "ar",
          "zh",
          "cs",
          "da",
          "nl",
          "en",
          "fi",
          "fr",
          "de",
          "he",
          "hu",
          "it",
          "ja",
          "ko",
          "no",
          "pl",
          "pt",
          "ru",
          "es",
          "sv",
          "th",
          "tr",
          "uk"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Phi-4-mini",
      "description": "Microsoft's Phi-4 Mini \u2014 most capable small model in the Phi family. Strong reasoning, tool-use, and agentic workflows.",
      "use_cases": [
        "chat",
        "reasoning",
        "tools",
        "agentic"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 2180186008,
      "size_gb": 2.03,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Microsoft's Phi-4 Mini \u2014 most capable small model in the Phi family. Strong reasoning, tool-use, and agentic workflows. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 2.03 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Phi-4-mini-instruct-q4f32_1-MLC",
      "name": "Phi-4 Mini Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Phi-4-mini-instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Phi-4-mini-instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Phi-4-mini-instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "microsoft/Phi-4-mini-instruct",
        "url": "https://huggingface.co/microsoft/Phi-4-mini-instruct",
        "author": "microsoft",
        "author_url": "https://huggingface.co/microsoft",
        "downloads": 1490838,
        "likes": 730,
        "last_modified": "2025-12-10T20:24:40.000Z",
        "license": "mit",
        "languages": [
          "multilingual",
          "ar",
          "zh",
          "cs",
          "da",
          "nl",
          "en",
          "fi",
          "fr",
          "de",
          "he",
          "hu",
          "it",
          "ja",
          "ko",
          "no",
          "pl",
          "pt",
          "ru",
          "es",
          "sv",
          "th",
          "tr",
          "uk"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Phi-4-mini",
      "description": "Microsoft's Phi-4 Mini \u2014 most capable small model in the Phi family. Strong reasoning, tool-use, and agentic workflows.",
      "use_cases": [
        "chat",
        "reasoning",
        "tools",
        "agentic"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 2180322277,
      "size_gb": 2.031,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Microsoft's Phi-4 Mini \u2014 most capable small model in the Phi family. Strong reasoning, tool-use, and agentic workflows. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 2.031 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen1.5-0.5B-Chat-q4f16_1-MLC",
      "name": "Qwen 1.5 0.5B Chat",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen1.5-0.5B-Chat-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen1.5-0.5B-Chat-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen1.5-0.5B-Chat-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen1.5-0.5B-Chat",
        "url": "https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 81474,
        "likes": 93,
        "last_modified": "2024-04-30T07:19:52.000Z",
        "license": "other",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen1.5-0.5B",
      "description": "Alibaba's early-generation Qwen 1.5 \u2014 strong English/Chinese bilingual chat at the smallest practical scale.",
      "use_cases": [
        "bilingual chat",
        "tiny models"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 360458973,
      "size_gb": 0.336,
      "downloads": 594,
      "likes": 0,
      "last_modified": "2025-09-07T18:52:43.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen1.5-0.5B-Chat",
        "base_model:quantized:Qwen/Qwen1.5-0.5B-Chat",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. Alibaba's early-generation Qwen 1.5 \u2014 strong English/Chinese bilingual chat at the smallest practical scale. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.336 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2-0.5B-Instruct-q0f16-MLC",
      "name": "Qwen 2 0.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q0f16-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2-0.5B-Instruct-q0f16-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2-0.5B-Instruct-q0f16-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2-0.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2-0.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 645643,
        "likes": 200,
        "last_modified": "2024-08-21T10:23:36.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2-0.5B",
      "description": "Alibaba's Qwen 2 at 0.5B. Multilingual training across 27 languages, 32K context, very compact.",
      "use_cases": [
        "multilingual chat",
        "tiny models"
      ],
      "quantization": "q0f16",
      "quantization_description": "Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant.",
      "size_bytes": 999804552,
      "size_gb": 0.931,
      "downloads": 57,
      "likes": 0,
      "last_modified": "2025-09-07T18:52:50.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2-0.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2-0.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2 at 0.5B. Multilingual training across 27 languages, 32K context, very compact. Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant. Quantized size 0.931 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2-0.5B-Instruct-q0f32-MLC",
      "name": "Qwen 2 0.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q0f32-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2-0.5B-Instruct-q0f32-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2-0.5B-Instruct-q0f32-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2-0.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2-0.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 645643,
        "likes": 200,
        "last_modified": "2024-08-21T10:23:36.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2-0.5B",
      "description": "Alibaba's Qwen 2 at 0.5B. Multilingual training across 27 languages, 32K context, very compact.",
      "use_cases": [
        "multilingual chat",
        "tiny models"
      ],
      "quantization": "q0f32",
      "quantization_description": "Full FP32 precision. Largest size, highest stability. Reference quality.",
      "size_bytes": 999873603,
      "size_gb": 0.931,
      "downloads": 41,
      "likes": 0,
      "last_modified": "2025-09-07T18:52:49.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2-0.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2-0.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2 at 0.5B. Multilingual training across 27 languages, 32K context, very compact. Full FP32 precision. Largest size, highest stability. Reference quality. Quantized size 0.931 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2 0.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2-0.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2-0.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 645643,
        "likes": 200,
        "last_modified": "2024-08-21T10:23:36.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2-0.5B",
      "description": "Alibaba's Qwen 2 at 0.5B. Multilingual training across 27 languages, 32K context, very compact.",
      "use_cases": [
        "multilingual chat",
        "tiny models"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 289856228,
      "size_gb": 0.27,
      "downloads": 2045,
      "likes": 1,
      "last_modified": "2025-09-07T18:52:48.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2-0.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2-0.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2 at 0.5B. Multilingual training across 27 languages, 32K context, very compact. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.27 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2-1.5B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2 1.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2-1.5B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2-1.5B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2-1.5B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2-1.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2-1.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 3487346,
        "likes": 162,
        "last_modified": "2024-06-06T14:36:57.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2-1.5B",
      "description": "Alibaba's Qwen 2 at 1.5B. Multilingual, 32K context, strong reasoning per parameter.",
      "use_cases": [
        "chat",
        "multilingual"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 880382684,
      "size_gb": 0.82,
      "downloads": 376,
      "likes": 1,
      "last_modified": "2025-09-07T18:52:51.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2-1.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2-1.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2 at 1.5B. Multilingual, 32K context, strong reasoning per parameter. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.82 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2-1.5B-Instruct-q4f32_1-MLC",
      "name": "Qwen 2 1.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2-1.5B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2-1.5B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2-1.5B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2-1.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2-1.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 3487346,
        "likes": 162,
        "last_modified": "2024-06-06T14:36:57.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2-1.5B",
      "description": "Alibaba's Qwen 2 at 1.5B. Multilingual, 32K context, strong reasoning per parameter.",
      "use_cases": [
        "chat",
        "multilingual"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 880506374,
      "size_gb": 0.82,
      "downloads": 137,
      "likes": 0,
      "last_modified": "2025-09-07T18:52:52.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2-1.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2-1.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2 at 1.5B. Multilingual, 32K context, strong reasoning per parameter. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.82 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2-7B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2 7B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2-7B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2-7B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2-7B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2-7B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2-7B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 569047,
        "likes": 685,
        "last_modified": "2024-08-21T10:29:04.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2-7B",
      "description": "Alibaba's Qwen 2 flagship 7B. 32K context, refined reasoning and tool-use.",
      "use_cases": [
        "chat",
        "reasoning",
        "multilingual"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4296123849,
      "size_gb": 4.001,
      "downloads": 55,
      "likes": 0,
      "last_modified": "2025-09-07T18:52:54.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2-7B-Instruct",
        "base_model:quantized:Qwen/Qwen2-7B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2 flagship 7B. 32K context, refined reasoning and tool-use. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.001 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2-7B-Instruct-q4f32_1-MLC",
      "name": "Qwen 2 7B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2-7B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2-7B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2-7B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2-7B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2-7B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 569047,
        "likes": 685,
        "last_modified": "2024-08-21T10:29:04.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2-7B",
      "description": "Alibaba's Qwen 2 flagship 7B. 32K context, refined reasoning and tool-use.",
      "use_cases": [
        "chat",
        "reasoning",
        "multilingual"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 4296260140,
      "size_gb": 4.001,
      "downloads": 22,
      "likes": 0,
      "last_modified": "2025-09-07T18:52:55.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2-7B-Instruct",
        "base_model:quantized:Qwen/Qwen2-7B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2 flagship 7B. 32K context, refined reasoning and tool-use. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 4.001 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2-Math-7B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2 Math 7B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2-Math-7B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2-Math-7B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2-Math-7B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2-Math-7B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2-Math-7B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 1034,
        "likes": 44,
        "last_modified": "2024-08-12T13:46:15.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2-Math-7B",
      "description": "Math-specialized Qwen 2 with chain-of-thought training. Designed for symbolic and numerical problem-solving.",
      "use_cases": [
        "mathematics",
        "reasoning",
        "step-by-step"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4296023075,
      "size_gb": 4.001,
      "downloads": 12,
      "likes": 0,
      "last_modified": "2025-09-07T18:53:03.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2-Math-7B-Instruct",
        "base_model:quantized:Qwen/Qwen2-Math-7B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Math-specialized Qwen 2 with chain-of-thought training. Designed for symbolic and numerical problem-solving. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.001 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2-Math-7B-Instruct-q4f32_1-MLC",
      "name": "Qwen 2 Math 7B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2-Math-7B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2-Math-7B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2-Math-7B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2-Math-7B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2-Math-7B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 1034,
        "likes": 44,
        "last_modified": "2024-08-12T13:46:15.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2-Math-7B",
      "description": "Math-specialized Qwen 2 with chain-of-thought training. Designed for symbolic and numerical problem-solving.",
      "use_cases": [
        "mathematics",
        "reasoning",
        "step-by-step"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 4296159999,
      "size_gb": 4.001,
      "downloads": 5,
      "likes": 0,
      "last_modified": "2025-09-07T18:53:04.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2-Math-7B-Instruct",
        "base_model:quantized:Qwen/Qwen2-Math-7B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Math-specialized Qwen 2 with chain-of-thought training. Designed for symbolic and numerical problem-solving. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 4.001 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-0.5B-Instruct-q0f16-MLC",
      "name": "Qwen 2.5 0.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-0.5B-Instruct-q0f16-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-0.5B-Instruct-q0f16-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-0.5B-Instruct-q0f16-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-0.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 6089483,
        "likes": 505,
        "last_modified": "2024-09-25T12:32:56.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-0.5B",
      "description": "Alibaba's Qwen 2.5 at 0.5B \u2014 refined post-training, 18T-token pretraining base, surprisingly capable for its size.",
      "use_cases": [
        "tiny models",
        "edge",
        "multilingual"
      ],
      "quantization": "q0f16",
      "quantization_description": "Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant.",
      "size_bytes": 999696306,
      "size_gb": 0.931,
      "downloads": 97,
      "likes": 1,
      "last_modified": "2025-09-07T18:54:10.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-0.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-0.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2.5 at 0.5B \u2014 refined post-training, 18T-token pretraining base, surprisingly capable for its size. Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant. Quantized size 0.931 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-0.5B-Instruct-q0f32-MLC",
      "name": "Qwen 2.5 0.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-0.5B-Instruct-q0f32-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-0.5B-Instruct-q0f32-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-0.5B-Instruct-q0f32-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-0.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 6089483,
        "likes": 505,
        "last_modified": "2024-09-25T12:32:56.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-0.5B",
      "description": "Alibaba's Qwen 2.5 at 0.5B \u2014 refined post-training, 18T-token pretraining base, surprisingly capable for its size.",
      "use_cases": [
        "tiny models",
        "edge",
        "multilingual"
      ],
      "quantization": "q0f32",
      "quantization_description": "Full FP32 precision. Largest size, highest stability. Reference quality.",
      "size_bytes": 999764173,
      "size_gb": 0.931,
      "downloads": 28,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:11.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-0.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-0.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2.5 at 0.5B \u2014 refined post-training, 18T-token pretraining base, surprisingly capable for its size. Full FP32 precision. Largest size, highest stability. Reference quality. Quantized size 0.931 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2.5 0.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-0.5B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-0.5B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-0.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 6089483,
        "likes": 505,
        "last_modified": "2024-09-25T12:32:56.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-0.5B",
      "description": "Alibaba's Qwen 2.5 at 0.5B \u2014 refined post-training, 18T-token pretraining base, surprisingly capable for its size.",
      "use_cases": [
        "tiny models",
        "edge",
        "multilingual"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 289693824,
      "size_gb": 0.27,
      "downloads": 44515,
      "likes": 4,
      "last_modified": "2025-09-07T18:53:24.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-0.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-0.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2.5 at 0.5B \u2014 refined post-training, 18T-token pretraining base, surprisingly capable for its size. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.27 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-0.5B-Instruct-q4f32_1-MLC",
      "name": "Qwen 2.5 0.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-0.5B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-0.5B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-0.5B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-0.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 6089483,
        "likes": 505,
        "last_modified": "2024-09-25T12:32:56.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-0.5B",
      "description": "Alibaba's Qwen 2.5 at 0.5B \u2014 refined post-training, 18T-token pretraining base, surprisingly capable for its size.",
      "use_cases": [
        "tiny models",
        "edge",
        "multilingual"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 289795065,
      "size_gb": 0.27,
      "downloads": 1555,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:08.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-0.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-0.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2.5 at 0.5B \u2014 refined post-training, 18T-token pretraining base, surprisingly capable for its size. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.27 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-1.5B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2.5 1.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-1.5B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-1.5B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-1.5B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-1.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 9813491,
        "likes": 680,
        "last_modified": "2024-09-25T12:32:50.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-1.5B",
      "description": "Alibaba's Qwen 2.5 at 1.5B. Strong general-purpose chat with notable math and code ability.",
      "use_cases": [
        "chat",
        "code",
        "math"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 880289238,
      "size_gb": 0.82,
      "downloads": 23935,
      "likes": 1,
      "last_modified": "2025-09-07T18:53:43.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-1.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-1.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2.5 at 1.5B. Strong general-purpose chat with notable math and code ability. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.82 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-1.5B-Instruct-q4f32_1-MLC",
      "name": "Qwen 2.5 1.5B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-1.5B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-1.5B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-1.5B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-1.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 9813491,
        "likes": 680,
        "last_modified": "2024-09-25T12:32:50.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-1.5B",
      "description": "Alibaba's Qwen 2.5 at 1.5B. Strong general-purpose chat with notable math and code ability.",
      "use_cases": [
        "chat",
        "code",
        "math"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 880412341,
      "size_gb": 0.82,
      "downloads": 422,
      "likes": 0,
      "last_modified": "2025-09-07T18:53:45.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-1.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-1.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2.5 at 1.5B. Strong general-purpose chat with notable math and code ability. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.82 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-3B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2.5 3B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-3B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-3B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-3B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-3B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 9143309,
        "likes": 449,
        "last_modified": "2024-09-25T12:33:00.000Z",
        "license": "other",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-3B",
      "description": "Alibaba's Qwen 2.5 at 3B. Sweet spot for browser deployment \u2014 punches well above its weight on benchmarks.",
      "use_cases": [
        "chat",
        "code",
        "math"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1748010498,
      "size_gb": 1.628,
      "downloads": 948,
      "likes": 2,
      "last_modified": "2025-09-07T18:53:46.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-3B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-3B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2.5 at 3B. Sweet spot for browser deployment \u2014 punches well above its weight on benchmarks. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 1.628 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-3B-Instruct-q4f32_1-MLC",
      "name": "Qwen 2.5 3B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-3B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-3B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-3B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-3B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 9143309,
        "likes": 449,
        "last_modified": "2024-09-25T12:33:00.000Z",
        "license": "other",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-3B",
      "description": "Alibaba's Qwen 2.5 at 3B. Sweet spot for browser deployment \u2014 punches well above its weight on benchmarks.",
      "use_cases": [
        "chat",
        "code",
        "math"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 1748173685,
      "size_gb": 1.628,
      "downloads": 114,
      "likes": 0,
      "last_modified": "2025-09-07T18:53:48.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-3B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-3B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2.5 at 3B. Sweet spot for browser deployment \u2014 punches well above its weight on benchmarks. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 1.628 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-7B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2.5 7B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-7B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-7B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-7B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-7B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 12582644,
        "likes": 1244,
        "last_modified": "2025-01-12T02:10:10.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-7B",
      "description": "Alibaba's Qwen 2.5 flagship small-model. Competitive with much larger models on reasoning, code, and math.",
      "use_cases": [
        "chat",
        "code",
        "reasoning"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4296032717,
      "size_gb": 4.001,
      "downloads": 488,
      "likes": 2,
      "last_modified": "2025-09-07T18:53:50.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-7B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-7B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2.5 flagship small-model. Competitive with much larger models on reasoning, code, and math. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.001 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-7B-Instruct-q4f32_1-MLC",
      "name": "Qwen 2.5 7B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-7B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-7B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-7B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-7B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 12582644,
        "likes": 1244,
        "last_modified": "2025-01-12T02:10:10.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-7B",
      "description": "Alibaba's Qwen 2.5 flagship small-model. Competitive with much larger models on reasoning, code, and math.",
      "use_cases": [
        "chat",
        "code",
        "reasoning"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 4296169641,
      "size_gb": 4.001,
      "downloads": 57,
      "likes": 0,
      "last_modified": "2025-09-07T18:53:51.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-7B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-7B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 2.5 flagship small-model. Competitive with much larger models on reasoning, code, and math. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 4.001 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-Coder-1.5B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2.5 Coder 1.5B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-Coder-1.5B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-Coder-1.5B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-Coder-1.5B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 206886,
        "likes": 118,
        "last_modified": "2025-01-12T02:05:01.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-Coder-1.5B",
      "description": "Code-specialized Qwen 2.5 at 1.5B. Strong competitor to much larger code-only models.",
      "use_cases": [
        "code completion",
        "code chat"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 880289173,
      "size_gb": 0.82,
      "downloads": 1931,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:01.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-Coder-1.5B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-Coder-1.5B-Instruct",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. Code-specialized Qwen 2.5 at 1.5B. Strong competitor to much larger code-only models. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.82 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2.5 Coder 3B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-Coder-3B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 446815,
        "likes": 101,
        "last_modified": "2025-01-12T02:04:23.000Z",
        "license": "other",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-Coder-3B",
      "description": "Code-specialized Qwen 2.5 at 3B. Browser-friendly code assistant.",
      "use_cases": [
        "code completion",
        "code chat"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1748010542,
      "size_gb": 1.628,
      "downloads": 1556,
      "likes": 0,
      "last_modified": "2025-09-07T18:55:01.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-Coder-3B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-Coder-3B-Instruct",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. Code-specialized Qwen 2.5 at 3B. Browser-friendly code assistant. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 1.628 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen2.5-Coder-7B-Instruct-q4f16_1-MLC",
      "name": "Qwen 2.5 Coder 7B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen2.5-Coder-7B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-Coder-7B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen2.5-Coder-7B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen2.5-Coder-7B-Instruct",
        "url": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 2086890,
        "likes": 699,
        "last_modified": "2025-01-12T02:03:41.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen2.5-Coder-7B",
      "description": "Code-specialized Qwen 2.5 at 7B. Top-tier open code model in its size class.",
      "use_cases": [
        "code generation",
        "code review"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4296032752,
      "size_gb": 4.001,
      "downloads": 322,
      "likes": 1,
      "last_modified": "2025-09-07T18:54:03.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen2.5-Coder-7B-Instruct",
        "base_model:quantized:Qwen/Qwen2.5-Coder-7B-Instruct",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. Code-specialized Qwen 2.5 at 7B. Top-tier open code model in its size class. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.001 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3-0.6B-q0f16-MLC",
      "name": "Qwen 3 0.6B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3-0.6B-q0f16-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3-0.6B-q0f16-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3-0.6B-q0f16-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3-0.6B",
        "url": "https://huggingface.co/Qwen/Qwen3-0.6B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 18497766,
        "likes": 1214,
        "last_modified": "2025-07-26T03:46:27.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen3-0.6B",
      "description": "Alibaba's Qwen 3 at 0.6B. Hybrid-thinking architecture with toggleable reasoning mode and 128K context.",
      "use_cases": [
        "chat",
        "tiny models",
        "reasoning toggle"
      ],
      "quantization": "q0f16",
      "quantization_description": "Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant.",
      "size_bytes": 1208166869,
      "size_gb": 1.125,
      "downloads": 439,
      "likes": 0,
      "last_modified": "2026-04-18T20:59:10.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen3-0.6B",
        "base_model:quantized:Qwen/Qwen3-0.6B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3 at 0.6B. Hybrid-thinking architecture with toggleable reasoning mode and 128K context. Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant. Quantized size 1.125 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3-0.6B-q0f32-MLC",
      "name": "Qwen 3 0.6B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3-0.6B-q0f32-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3-0.6B-q0f32-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3-0.6B-q0f32-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3-0.6B",
        "url": "https://huggingface.co/Qwen/Qwen3-0.6B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 18497766,
        "likes": 1214,
        "last_modified": "2025-07-26T03:46:27.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen3-0.6B",
      "description": "Alibaba's Qwen 3 at 0.6B. Hybrid-thinking architecture with toggleable reasoning mode and 128K context.",
      "use_cases": [
        "chat",
        "tiny models",
        "reasoning toggle"
      ],
      "quantization": "q0f32",
      "quantization_description": "Full FP32 precision. Largest size, highest stability. Reference quality.",
      "size_bytes": 1208255802,
      "size_gb": 1.125,
      "downloads": 66,
      "likes": 0,
      "last_modified": "2026-04-18T20:59:11.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen3-0.6B",
        "base_model:quantized:Qwen/Qwen3-0.6B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3 at 0.6B. Hybrid-thinking architecture with toggleable reasoning mode and 128K context. Full FP32 precision. Largest size, highest stability. Reference quality. Quantized size 1.125 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3-0.6B-q4f16_1-MLC",
      "name": "Qwen 3 0.6B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3-0.6B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3-0.6B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3-0.6B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3-0.6B",
        "url": "https://huggingface.co/Qwen/Qwen3-0.6B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 18497766,
        "likes": 1214,
        "last_modified": "2025-07-26T03:46:27.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen3-0.6B",
      "description": "Alibaba's Qwen 3 at 0.6B. Hybrid-thinking architecture with toggleable reasoning mode and 128K context.",
      "use_cases": [
        "chat",
        "tiny models",
        "reasoning toggle"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 351517143,
      "size_gb": 0.327,
      "downloads": 6104,
      "likes": 1,
      "last_modified": "2026-04-18T20:59:12.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen3-0.6B",
        "base_model:quantized:Qwen/Qwen3-0.6B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3 at 0.6B. Hybrid-thinking architecture with toggleable reasoning mode and 128K context. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.327 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3-0.6B-q4f32_1-MLC",
      "name": "Qwen 3 0.6B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3-0.6B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3-0.6B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3-0.6B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3-0.6B",
        "url": "https://huggingface.co/Qwen/Qwen3-0.6B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 18497766,
        "likes": 1214,
        "last_modified": "2025-07-26T03:46:27.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen3-0.6B",
      "description": "Alibaba's Qwen 3 at 0.6B. Hybrid-thinking architecture with toggleable reasoning mode and 128K context.",
      "use_cases": [
        "chat",
        "tiny models",
        "reasoning toggle"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 351644955,
      "size_gb": 0.327,
      "downloads": 275,
      "likes": 1,
      "last_modified": "2026-04-18T20:59:12.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen3-0.6B",
        "base_model:quantized:Qwen/Qwen3-0.6B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3 at 0.6B. Hybrid-thinking architecture with toggleable reasoning mode and 128K context. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.327 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3-1.7B-q4f16_1-MLC",
      "name": "Qwen 3 1.7B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3-1.7B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3-1.7B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3-1.7B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3-1.7B",
        "url": "https://huggingface.co/Qwen/Qwen3-1.7B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 6077216,
        "likes": 453,
        "last_modified": "2025-07-26T03:46:32.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen3-1.7B",
      "description": "Alibaba's Qwen 3 at 1.7B. Hybrid-thinking, 128K context, agentic tool-use.",
      "use_cases": [
        "chat",
        "reasoning",
        "tools"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 984156278,
      "size_gb": 0.917,
      "downloads": 1189,
      "likes": 0,
      "last_modified": "2026-04-18T20:59:14.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen3-1.7B",
        "base_model:quantized:Qwen/Qwen3-1.7B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3 at 1.7B. Hybrid-thinking, 128K context, agentic tool-use. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.917 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3-1.7B-q4f32_1-MLC",
      "name": "Qwen 3 1.7B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3-1.7B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3-1.7B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3-1.7B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3-1.7B",
        "url": "https://huggingface.co/Qwen/Qwen3-1.7B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 6077216,
        "likes": 453,
        "last_modified": "2025-07-26T03:46:32.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen3-1.7B",
      "description": "Alibaba's Qwen 3 at 1.7B. Hybrid-thinking, 128K context, agentic tool-use.",
      "use_cases": [
        "chat",
        "reasoning",
        "tools"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 984289034,
      "size_gb": 0.917,
      "downloads": 155,
      "likes": 0,
      "last_modified": "2026-04-18T20:59:15.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen3-1.7B",
        "base_model:quantized:Qwen/Qwen3-1.7B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3 at 1.7B. Hybrid-thinking, 128K context, agentic tool-use. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.917 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3-4B-q4f16_1-MLC",
      "name": "Qwen 3 4B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3-4B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3-4B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3-4B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3-4B",
        "url": "https://huggingface.co/Qwen/Qwen3-4B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 6234721,
        "likes": 605,
        "last_modified": "2025-07-26T03:46:39.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen3-4B",
      "description": "Alibaba's Qwen 3 at 4B. Hybrid-thinking reasoning, agentic tool-use, 128K context.",
      "use_cases": [
        "chat",
        "reasoning",
        "agentic"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 2279167154,
      "size_gb": 2.123,
      "downloads": 3572,
      "likes": 2,
      "last_modified": "2026-04-18T20:59:22.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen3-4B",
        "base_model:quantized:Qwen/Qwen3-4B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3 at 4B. Hybrid-thinking reasoning, agentic tool-use, 128K context. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 2.123 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3-4B-q4f32_1-MLC",
      "name": "Qwen 3 4B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3-4B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3-4B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3-4B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3-4B",
        "url": "https://huggingface.co/Qwen/Qwen3-4B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 6234721,
        "likes": 605,
        "last_modified": "2025-07-26T03:46:39.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen3-4B",
      "description": "Alibaba's Qwen 3 at 4B. Hybrid-thinking reasoning, agentic tool-use, 128K context.",
      "use_cases": [
        "chat",
        "reasoning",
        "agentic"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 2279345573,
      "size_gb": 2.123,
      "downloads": 135,
      "likes": 0,
      "last_modified": "2026-04-18T20:59:23.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen3-4B",
        "base_model:quantized:Qwen/Qwen3-4B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3 at 4B. Hybrid-thinking reasoning, agentic tool-use, 128K context. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 2.123 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3-8B-q4f16_1-MLC",
      "name": "Qwen 3 8B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3-8B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3-8B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3-8B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3-8B",
        "url": "https://huggingface.co/Qwen/Qwen3-8B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 9024066,
        "likes": 1067,
        "last_modified": "2025-07-26T03:49:13.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen3-8B",
      "description": "Alibaba's Qwen 3 at 8B. Flagship small Qwen 3 \u2014 toggleable reasoning, agentic, multilingual.",
      "use_cases": [
        "chat",
        "reasoning",
        "agentic"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4623997938,
      "size_gb": 4.306,
      "downloads": 397,
      "likes": 0,
      "last_modified": "2026-04-18T20:59:24.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen3-8B",
        "base_model:quantized:Qwen/Qwen3-8B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3 at 8B. Flagship small Qwen 3 \u2014 toggleable reasoning, agentic, multilingual. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.306 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3-8B-q4f32_1-MLC",
      "name": "Qwen 3 8B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3-8B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3-8B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3-8B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3-8B",
        "url": "https://huggingface.co/Qwen/Qwen3-8B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 9024066,
        "likes": 1067,
        "last_modified": "2025-07-26T03:49:13.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "Qwen3-8B",
      "description": "Alibaba's Qwen 3 at 8B. Flagship small Qwen 3 \u2014 toggleable reasoning, agentic, multilingual.",
      "use_cases": [
        "chat",
        "reasoning",
        "agentic"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 4624185984,
      "size_gb": 4.307,
      "downloads": 250,
      "likes": 0,
      "last_modified": "2026-04-18T20:59:25.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Qwen/Qwen3-8B",
        "base_model:quantized:Qwen/Qwen3-8B",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3 at 8B. Flagship small Qwen 3 \u2014 toggleable reasoning, agentic, multilingual. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 4.307 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3.5-0.8B-q0f16-MLC",
      "name": "Qwen 3.5 0.8B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3.5-0.8B-q0f16-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-0.8B-q0f16-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-0.8B-q0f16-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3.5-0.8B",
        "url": "https://huggingface.co/Qwen/Qwen3.5-0.8B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 2956012,
        "likes": 512,
        "last_modified": "2026-03-02T11:26:58.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "image-text-to-text"
      },
      "family": "Qwen3.5-0.8B",
      "description": "Alibaba's Qwen 3.5 at 0.8B. Refined Qwen 3 with stronger reasoning and broader multilingual coverage.",
      "use_cases": [
        "chat",
        "tiny models"
      ],
      "quantization": "q0f16",
      "quantization_description": "Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant.",
      "size_bytes": 1527922583,
      "size_gb": 1.423,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3.5 at 0.8B. Refined Qwen 3 with stronger reasoning and broader multilingual coverage. Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant. Quantized size 1.423 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3.5-0.8B-q4f16_1-MLC",
      "name": "Qwen 3.5 0.8B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3.5-0.8B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-0.8B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-0.8B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3.5-0.8B",
        "url": "https://huggingface.co/Qwen/Qwen3.5-0.8B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 2956012,
        "likes": 512,
        "last_modified": "2026-03-02T11:26:58.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "image-text-to-text"
      },
      "family": "Qwen3.5-0.8B",
      "description": "Alibaba's Qwen 3.5 at 0.8B. Refined Qwen 3 with stronger reasoning and broader multilingual coverage.",
      "use_cases": [
        "chat",
        "tiny models"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 447177073,
      "size_gb": 0.416,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3.5 at 0.8B. Refined Qwen 3 with stronger reasoning and broader multilingual coverage. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.416 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3.5-0.8B-q4f32_1-MLC",
      "name": "Qwen 3.5 0.8B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3.5-0.8B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-0.8B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-0.8B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3.5-0.8B",
        "url": "https://huggingface.co/Qwen/Qwen3.5-0.8B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 2956012,
        "likes": 512,
        "last_modified": "2026-03-02T11:26:58.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "image-text-to-text"
      },
      "family": "Qwen3.5-0.8B",
      "description": "Alibaba's Qwen 3.5 at 0.8B. Refined Qwen 3 with stronger reasoning and broader multilingual coverage.",
      "use_cases": [
        "chat",
        "tiny models"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 447175335,
      "size_gb": 0.416,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3.5 at 0.8B. Refined Qwen 3 with stronger reasoning and broader multilingual coverage. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.416 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3.5-2B-q4f16_1-MLC",
      "name": "Qwen 3.5 2B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3.5-2B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-2B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-2B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3.5-2B",
        "url": "https://huggingface.co/Qwen/Qwen3.5-2B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 1701802,
        "likes": 265,
        "last_modified": "2026-03-02T11:26:29.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "image-text-to-text"
      },
      "family": "Qwen3.5-2B",
      "description": "Alibaba's Qwen 3.5 at 2B. Mid-tier compact model with refined reasoning.",
      "use_cases": [
        "chat",
        "reasoning"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1082564401,
      "size_gb": 1.008,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3.5 at 2B. Mid-tier compact model with refined reasoning. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 1.008 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3.5-2B-q4f32_1-MLC",
      "name": "Qwen 3.5 2B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3.5-2B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-2B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-2B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3.5-2B",
        "url": "https://huggingface.co/Qwen/Qwen3.5-2B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 1701802,
        "likes": 265,
        "last_modified": "2026-03-02T11:26:29.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "image-text-to-text"
      },
      "family": "Qwen3.5-2B",
      "description": "Alibaba's Qwen 3.5 at 2B. Mid-tier compact model with refined reasoning.",
      "use_cases": [
        "chat",
        "reasoning"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 1082562665,
      "size_gb": 1.008,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3.5 at 2B. Mid-tier compact model with refined reasoning. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 1.008 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3.5-4B-q4f16_1-MLC",
      "name": "Qwen 3.5 4B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3.5-4B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-4B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-4B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3.5-4B",
        "url": "https://huggingface.co/Qwen/Qwen3.5-4B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 3966264,
        "likes": 497,
        "last_modified": "2026-03-02T00:52:52.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "image-text-to-text"
      },
      "family": "Qwen3.5-4B",
      "description": "Alibaba's Qwen 3.5 at 4B. Browser-friendly with strong reasoning.",
      "use_cases": [
        "chat",
        "reasoning"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 2390497405,
      "size_gb": 2.226,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3.5 at 4B. Browser-friendly with strong reasoning. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 2.226 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3.5-4B-q4f32_1-MLC",
      "name": "Qwen 3.5 4B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3.5-4B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-4B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-4B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3.5-4B",
        "url": "https://huggingface.co/Qwen/Qwen3.5-4B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 3966264,
        "likes": 497,
        "last_modified": "2026-03-02T00:52:52.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "image-text-to-text"
      },
      "family": "Qwen3.5-4B",
      "description": "Alibaba's Qwen 3.5 at 4B. Browser-friendly with strong reasoning.",
      "use_cases": [
        "chat",
        "reasoning"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 2390495095,
      "size_gb": 2.226,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3.5 at 4B. Browser-friendly with strong reasoning. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 2.226 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3.5-9B-q4f16_1-MLC",
      "name": "Qwen 3.5 9B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3.5-9B-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-9B-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-9B-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3.5-9B",
        "url": "https://huggingface.co/Qwen/Qwen3.5-9B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 7121429,
        "likes": 1358,
        "last_modified": "2026-03-02T00:51:43.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "image-text-to-text"
      },
      "family": "Qwen3.5-9B",
      "description": "Alibaba's Qwen 3.5 flagship small. Top-tier 9B-class model.",
      "use_cases": [
        "chat",
        "reasoning",
        "agentic"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 5061443935,
      "size_gb": 4.714,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3.5 flagship small. Top-tier 9B-class model. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.714 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/Qwen3.5-9B-q4f32_1-MLC",
      "name": "Qwen 3.5 9B",
      "huggingface_url": "https://huggingface.co/mlc-ai/Qwen3.5-9B-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-9B-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/Qwen3.5-9B-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Qwen/Qwen3.5-9B",
        "url": "https://huggingface.co/Qwen/Qwen3.5-9B",
        "author": "Qwen",
        "author_url": "https://huggingface.co/Qwen",
        "downloads": 7121429,
        "likes": 1358,
        "last_modified": "2026-03-02T00:51:43.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "image-text-to-text"
      },
      "family": "Qwen3.5-9B",
      "description": "Alibaba's Qwen 3.5 flagship small. Top-tier 9B-class model.",
      "use_cases": [
        "chat",
        "reasoning",
        "agentic"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 5061441618,
      "size_gb": 4.714,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Alibaba's Qwen 3.5 flagship small. Top-tier 9B-class model. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 4.714 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
      "name": "RedPajama INCITE Chat 3B v1",
      "huggingface_url": "https://huggingface.co/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
        "url": "https://huggingface.co/togethercomputer/RedPajama-INCITE-Chat-3B-v1",
        "author": "togethercomputer",
        "author_url": "https://huggingface.co/togethercomputer",
        "downloads": 1825,
        "likes": 152,
        "last_modified": "2023-05-09T14:59:47.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "RedPajama-INCITE-Chat-3B",
      "description": "TogetherAI's chat fine-tune of the RedPajama-INCITE base. Early permissive-license alternative to Llama 1.",
      "use_cases": [
        "chat",
        "research baseline"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1566738607,
      "size_gb": 1.459,
      "downloads": 3167,
      "likes": 3,
      "last_modified": "2025-09-07T18:50:28.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:togethercomputer/RedPajama-INCITE-Chat-3B-v1",
        "base_model:quantized:togethercomputer/RedPajama-INCITE-Chat-3B-v1",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. TogetherAI's chat fine-tune of the RedPajama-INCITE base. Early permissive-license alternative to Llama 1. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 1.459 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/SmolLM2-1.7B-Instruct-q4f16_1-MLC",
      "name": "SmolLM2 1.7B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/SmolLM2-1.7B-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/SmolLM2-1.7B-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/SmolLM2-1.7B-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
        "url": "https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct",
        "author": "HuggingFaceTB",
        "author_url": "https://huggingface.co/HuggingFaceTB",
        "downloads": 181634,
        "likes": 729,
        "last_modified": "2025-04-21T20:51:14.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "SmolLM2-1.7B",
      "description": "HuggingFace's largest SmolLM2 variant. Trained on heavily curated, high-quality data.",
      "use_cases": [
        "chat",
        "reasoning"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 966375820,
      "size_gb": 0.9,
      "downloads": 700,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:41.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:HuggingFaceTB/SmolLM2-1.7B-Instruct",
        "base_model:quantized:HuggingFaceTB/SmolLM2-1.7B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. HuggingFace's largest SmolLM2 variant. Trained on heavily curated, high-quality data. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.9 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/SmolLM2-1.7B-Instruct-q4f32_1-MLC",
      "name": "SmolLM2 1.7B Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/SmolLM2-1.7B-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/SmolLM2-1.7B-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/SmolLM2-1.7B-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
        "url": "https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct",
        "author": "HuggingFaceTB",
        "author_url": "https://huggingface.co/HuggingFaceTB",
        "downloads": 181634,
        "likes": 729,
        "last_modified": "2025-04-21T20:51:14.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "SmolLM2-1.7B",
      "description": "HuggingFace's largest SmolLM2 variant. Trained on heavily curated, high-quality data.",
      "use_cases": [
        "chat",
        "reasoning"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 966475764,
      "size_gb": 0.9,
      "downloads": 165,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:43.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:HuggingFaceTB/SmolLM2-1.7B-Instruct",
        "base_model:quantized:HuggingFaceTB/SmolLM2-1.7B-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. HuggingFace's largest SmolLM2 variant. Trained on heavily curated, high-quality data. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.9 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/SmolLM2-135M-Instruct-q0f16-MLC",
      "name": "SmolLM2 135M Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/SmolLM2-135M-Instruct-q0f16-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/SmolLM2-135M-Instruct-q0f16-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/SmolLM2-135M-Instruct-q0f16-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "HuggingFaceTB/SmolLM2-135M-Instruct",
        "url": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct",
        "author": "HuggingFaceTB",
        "author_url": "https://huggingface.co/HuggingFaceTB",
        "downloads": 1407123,
        "likes": 311,
        "last_modified": "2025-09-22T20:43:15.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "SmolLM2-135M",
      "description": "HuggingFace's tiniest instruct model \u2014 135M parameters. Targets low-resource environments and instant load.",
      "use_cases": [
        "tiny models",
        "instant load",
        "demos"
      ],
      "quantization": "q0f16",
      "quantization_description": "Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant.",
      "size_bytes": 272550981,
      "size_gb": 0.254,
      "downloads": 4595,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:46.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:HuggingFaceTB/SmolLM2-135M-Instruct",
        "base_model:quantized:HuggingFaceTB/SmolLM2-135M-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. HuggingFace's tiniest instruct model \u2014 135M parameters. Targets low-resource environments and instant load. Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant. Quantized size 0.254 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/SmolLM2-135M-Instruct-q0f32-MLC",
      "name": "SmolLM2 135M Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/SmolLM2-135M-Instruct-q0f32-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/SmolLM2-135M-Instruct-q0f32-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/SmolLM2-135M-Instruct-q0f32-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "HuggingFaceTB/SmolLM2-135M-Instruct",
        "url": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct",
        "author": "HuggingFaceTB",
        "author_url": "https://huggingface.co/HuggingFaceTB",
        "downloads": 1407123,
        "likes": 311,
        "last_modified": "2025-09-22T20:43:15.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "SmolLM2-135M",
      "description": "HuggingFace's tiniest instruct model \u2014 135M parameters. Targets low-resource environments and instant load.",
      "use_cases": [
        "tiny models",
        "instant load",
        "demos"
      ],
      "quantization": "q0f32",
      "quantization_description": "Full FP32 precision. Largest size, highest stability. Reference quality.",
      "size_bytes": 272619986,
      "size_gb": 0.254,
      "downloads": 562,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:47.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:HuggingFaceTB/SmolLM2-135M-Instruct",
        "base_model:quantized:HuggingFaceTB/SmolLM2-135M-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. HuggingFace's tiniest instruct model \u2014 135M parameters. Targets low-resource environments and instant load. Full FP32 precision. Largest size, highest stability. Reference quality. Quantized size 0.254 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/SmolLM2-360M-Instruct-q0f16-MLC",
      "name": "SmolLM2 360M Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q0f16-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/SmolLM2-360M-Instruct-q0f16-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/SmolLM2-360M-Instruct-q0f16-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "HuggingFaceTB/SmolLM2-360M-Instruct",
        "url": "https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct",
        "author": "HuggingFaceTB",
        "author_url": "https://huggingface.co/HuggingFaceTB",
        "downloads": 369825,
        "likes": 185,
        "last_modified": "2025-09-22T20:44:35.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "SmolLM2-360M",
      "description": "HuggingFace's compact 360M instruct model. Browser-friendly with surprising chat quality for size.",
      "use_cases": [
        "tiny models",
        "edge"
      ],
      "quantization": "q0f16",
      "quantization_description": "Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant.",
      "size_bytes": 727179096,
      "size_gb": 0.677,
      "downloads": 199,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:43.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:HuggingFaceTB/SmolLM2-360M-Instruct",
        "base_model:quantized:HuggingFaceTB/SmolLM2-360M-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. HuggingFace's compact 360M instruct model. Browser-friendly with surprising chat quality for size. Full FP16 precision, no weight quantization. Largest size, highest fidelity. Use when GPU memory and bandwidth are abundant. Quantized size 0.677 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/SmolLM2-360M-Instruct-q0f32-MLC",
      "name": "SmolLM2 360M Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q0f32-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/SmolLM2-360M-Instruct-q0f32-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/SmolLM2-360M-Instruct-q0f32-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "HuggingFaceTB/SmolLM2-360M-Instruct",
        "url": "https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct",
        "author": "HuggingFaceTB",
        "author_url": "https://huggingface.co/HuggingFaceTB",
        "downloads": 369825,
        "likes": 185,
        "last_modified": "2025-09-22T20:44:35.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "SmolLM2-360M",
      "description": "HuggingFace's compact 360M instruct model. Browser-friendly with surprising chat quality for size.",
      "use_cases": [
        "tiny models",
        "edge"
      ],
      "quantization": "q0f32",
      "quantization_description": "Full FP32 precision. Largest size, highest stability. Reference quality.",
      "size_bytes": 727255965,
      "size_gb": 0.677,
      "downloads": 57,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:44.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:HuggingFaceTB/SmolLM2-360M-Instruct",
        "base_model:quantized:HuggingFaceTB/SmolLM2-360M-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. HuggingFace's compact 360M instruct model. Browser-friendly with surprising chat quality for size. Full FP32 precision. Largest size, highest stability. Reference quality. Quantized size 0.677 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/SmolLM2-360M-Instruct-q4f16_1-MLC",
      "name": "SmolLM2 360M Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/SmolLM2-360M-Instruct-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/SmolLM2-360M-Instruct-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "HuggingFaceTB/SmolLM2-360M-Instruct",
        "url": "https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct",
        "author": "HuggingFaceTB",
        "author_url": "https://huggingface.co/HuggingFaceTB",
        "downloads": 369825,
        "likes": 185,
        "last_modified": "2025-09-22T20:44:35.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "SmolLM2-360M",
      "description": "HuggingFace's compact 360M instruct model. Browser-friendly with surprising chat quality for size.",
      "use_cases": [
        "tiny models",
        "edge"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 207242960,
      "size_gb": 0.193,
      "downloads": 1991,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:45.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:HuggingFaceTB/SmolLM2-360M-Instruct",
        "base_model:quantized:HuggingFaceTB/SmolLM2-360M-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. HuggingFace's compact 360M instruct model. Browser-friendly with surprising chat quality for size. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.193 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/SmolLM2-360M-Instruct-q4f32_1-MLC",
      "name": "SmolLM2 360M Instruct",
      "huggingface_url": "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/SmolLM2-360M-Instruct-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/SmolLM2-360M-Instruct-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "HuggingFaceTB/SmolLM2-360M-Instruct",
        "url": "https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct",
        "author": "HuggingFaceTB",
        "author_url": "https://huggingface.co/HuggingFaceTB",
        "downloads": 369825,
        "likes": 185,
        "last_modified": "2025-09-22T20:44:35.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "SmolLM2-360M",
      "description": "HuggingFace's compact 360M instruct model. Browser-friendly with surprising chat quality for size.",
      "use_cases": [
        "tiny models",
        "edge"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 207365832,
      "size_gb": 0.193,
      "downloads": 294,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:46.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:HuggingFaceTB/SmolLM2-360M-Instruct",
        "base_model:quantized:HuggingFaceTB/SmolLM2-360M-Instruct",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. HuggingFace's compact 360M instruct model. Browser-friendly with surprising chat quality for size. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.193 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC",
      "name": "TinyLlama 1.1B Chat v1.0",
      "huggingface_url": "https://huggingface.co/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        "url": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        "author": "TinyLlama",
        "author_url": "https://huggingface.co/TinyLlama",
        "downloads": 3172589,
        "likes": 1574,
        "last_modified": "2024-03-17T05:07:08.000Z",
        "license": "apache-2.0",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "TinyLlama-1.1B",
      "description": "Open-source 1.1B chat model trained on 3T tokens. Reference baseline for the smallest practical chat model.",
      "use_cases": [
        "tiny models",
        "research baseline"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 621440671,
      "size_gb": 0.579,
      "downloads": 967,
      "likes": 0,
      "last_modified": "2025-09-07T18:51:55.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        "base_model:quantized:TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        "region:us"
      ],
      "in_webllm_official": false,
      "selection_rationale": "Selected as a popular mlc-ai community quantization beyond WebLLM's official list, to broaden coverage of well-known model families. Open-source 1.1B chat model trained on 3T tokens. Reference baseline for the smallest practical chat model. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.579 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC",
      "name": "WizardMath 7B v1.1",
      "huggingface_url": "https://huggingface.co/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/WizardMath-7B-V1.1-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "WizardLMTeam/WizardMath-7B-V1.1",
        "url": "https://huggingface.co/WizardLMTeam/WizardMath-7B-V1.1",
        "author": "WizardLMTeam",
        "author_url": "https://huggingface.co/WizardLMTeam",
        "downloads": 19734,
        "likes": 84,
        "last_modified": "2024-01-12T11:39:28.000Z",
        "license": "see base model card",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "WizardMath-7B",
      "description": "Math-focused Llama 2 fine-tune using Reinforced Evol-Instruct on math problem chains.",
      "use_cases": [
        "mathematics",
        "step-by-step reasoning"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 4076708542,
      "size_gb": 3.797,
      "downloads": 49,
      "likes": 0,
      "last_modified": "2025-09-07T18:50:31.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:WizardLMTeam/WizardMath-7B-V1.1",
        "base_model:quantized:WizardLMTeam/WizardMath-7B-V1.1",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Math-focused Llama 2 fine-tune using Reinforced Evol-Instruct on math problem chains. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 3.797 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/gemma-2-2b-it-q4f16_1-MLC",
      "name": "Gemma 2 2B IT",
      "huggingface_url": "https://huggingface.co/mlc-ai/gemma-2-2b-it-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/gemma-2-2b-it-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/gemma-2-2b-it-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "google/gemma-2-2b-it",
        "url": "https://huggingface.co/google/gemma-2-2b-it",
        "author": "google",
        "author_url": "https://huggingface.co/google",
        "downloads": 350168,
        "likes": 1342,
        "last_modified": "2024-08-27T19:41:44.000Z",
        "license": "gemma",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "gemma-2-2b-it",
      "description": "Google's Gemma 2 at 2B with instruction tuning. Knowledge-distilled from larger Gemini models for high quality at small scale.",
      "use_cases": [
        "chat",
        "edge"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1492990789,
      "size_gb": 1.39,
      "downloads": 19350,
      "likes": 8,
      "last_modified": "2025-09-07T18:52:12.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:google/gemma-2-2b-it",
        "base_model:quantized:google/gemma-2-2b-it",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Google's Gemma 2 at 2B with instruction tuning. Knowledge-distilled from larger Gemini models for high quality at small scale. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 1.39 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/gemma-2-2b-it-q4f32_1-MLC",
      "name": "Gemma 2 2B IT",
      "huggingface_url": "https://huggingface.co/mlc-ai/gemma-2-2b-it-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/gemma-2-2b-it-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/gemma-2-2b-it-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "google/gemma-2-2b-it",
        "url": "https://huggingface.co/google/gemma-2-2b-it",
        "author": "google",
        "author_url": "https://huggingface.co/google",
        "downloads": 350168,
        "likes": 1342,
        "last_modified": "2024-08-27T19:41:44.000Z",
        "license": "gemma",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "gemma-2-2b-it",
      "description": "Google's Gemma 2 at 2B with instruction tuning. Knowledge-distilled from larger Gemini models for high quality at small scale.",
      "use_cases": [
        "chat",
        "edge"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 1493118005,
      "size_gb": 1.391,
      "downloads": 1053,
      "likes": 0,
      "last_modified": "2025-09-07T18:52:10.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:google/gemma-2-2b-it",
        "base_model:quantized:google/gemma-2-2b-it",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Google's Gemma 2 at 2B with instruction tuning. Knowledge-distilled from larger Gemini models for high quality at small scale. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 1.391 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/gemma-2-2b-jpn-it-q4f16_1-MLC",
      "name": "Gemma 2 2B Japanese IT",
      "huggingface_url": "https://huggingface.co/mlc-ai/gemma-2-2b-jpn-it-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/gemma-2-2b-jpn-it-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/gemma-2-2b-jpn-it-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "google/gemma-2-2b-jpn-it",
        "url": "https://huggingface.co/google/gemma-2-2b-jpn-it",
        "author": "google",
        "author_url": "https://huggingface.co/google",
        "downloads": 20741,
        "likes": 216,
        "last_modified": "2024-10-02T15:49:51.000Z",
        "license": "gemma",
        "languages": [
          "ja"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "gemma-2-2b-jpn-it",
      "description": "Gemma 2 2B fine-tuned for Japanese. Strong native Japanese fluency while retaining English capability.",
      "use_cases": [
        "Japanese chat",
        "multilingual"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1492990953,
      "size_gb": 1.39,
      "downloads": 177,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:32.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:google/gemma-2-2b-jpn-it",
        "base_model:quantized:google/gemma-2-2b-jpn-it",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Gemma 2 2B fine-tuned for Japanese. Strong native Japanese fluency while retaining English capability. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 1.39 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/gemma-2-2b-jpn-it-q4f32_1-MLC",
      "name": "Gemma 2 2B Japanese IT",
      "huggingface_url": "https://huggingface.co/mlc-ai/gemma-2-2b-jpn-it-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/gemma-2-2b-jpn-it-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/gemma-2-2b-jpn-it-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "google/gemma-2-2b-jpn-it",
        "url": "https://huggingface.co/google/gemma-2-2b-jpn-it",
        "author": "google",
        "author_url": "https://huggingface.co/google",
        "downloads": 20741,
        "likes": 216,
        "last_modified": "2024-10-02T15:49:51.000Z",
        "license": "gemma",
        "languages": [
          "ja"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "gemma-2-2b-jpn-it",
      "description": "Gemma 2 2B fine-tuned for Japanese. Strong native Japanese fluency while retaining English capability.",
      "use_cases": [
        "Japanese chat",
        "multilingual"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 1493118169,
      "size_gb": 1.391,
      "downloads": 58,
      "likes": 0,
      "last_modified": "2025-09-07T18:54:36.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:google/gemma-2-2b-jpn-it",
        "base_model:quantized:google/gemma-2-2b-jpn-it",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Gemma 2 2B fine-tuned for Japanese. Strong native Japanese fluency while retaining English capability. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 1.391 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/gemma-2-9b-it-q4f16_1-MLC",
      "name": "Gemma 2 9B IT",
      "huggingface_url": "https://huggingface.co/mlc-ai/gemma-2-9b-it-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/gemma-2-9b-it-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/gemma-2-9b-it-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "google/gemma-2-9b-it",
        "url": "https://huggingface.co/google/gemma-2-9b-it",
        "author": "google",
        "author_url": "https://huggingface.co/google",
        "downloads": 459638,
        "likes": 793,
        "last_modified": "2024-08-27T19:41:49.000Z",
        "license": "gemma",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "gemma-2-9b-it",
      "description": "Google's Gemma 2 at 9B with instruction tuning. Top-tier 9B-class open model with strong reasoning.",
      "use_cases": [
        "chat",
        "reasoning"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 5221577039,
      "size_gb": 4.863,
      "downloads": 369,
      "likes": 0,
      "last_modified": "2025-09-07T18:52:16.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:google/gemma-2-9b-it",
        "base_model:quantized:google/gemma-2-9b-it",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Google's Gemma 2 at 9B with instruction tuning. Top-tier 9B-class open model with strong reasoning. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 4.863 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/gemma-2-9b-it-q4f32_1-MLC",
      "name": "Gemma 2 9B IT",
      "huggingface_url": "https://huggingface.co/mlc-ai/gemma-2-9b-it-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/gemma-2-9b-it-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/gemma-2-9b-it-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "google/gemma-2-9b-it",
        "url": "https://huggingface.co/google/gemma-2-9b-it",
        "author": "google",
        "author_url": "https://huggingface.co/google",
        "downloads": 459638,
        "likes": 793,
        "last_modified": "2024-08-27T19:41:49.000Z",
        "license": "gemma",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "gemma-2-9b-it",
      "description": "Google's Gemma 2 at 9B with instruction tuning. Top-tier 9B-class open model with strong reasoning.",
      "use_cases": [
        "chat",
        "reasoning"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 5221795449,
      "size_gb": 4.863,
      "downloads": 173,
      "likes": 0,
      "last_modified": "2025-09-07T18:52:17.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:google/gemma-2-9b-it",
        "base_model:quantized:google/gemma-2-9b-it",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Google's Gemma 2 at 9B with instruction tuning. Top-tier 9B-class open model with strong reasoning. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 4.863 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/gemma-2b-it-q4f16_1-MLC",
      "name": "Gemma 1 2B IT",
      "huggingface_url": "https://huggingface.co/mlc-ai/gemma-2b-it-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/gemma-2b-it-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/gemma-2b-it-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "google/gemma-2b-it",
        "url": "https://huggingface.co/google/gemma-2b-it",
        "author": "google",
        "author_url": "https://huggingface.co/google",
        "downloads": 71384,
        "likes": 876,
        "last_modified": "2024-09-27T12:19:02.000Z",
        "license": "gemma",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "gemma-2b-it",
      "description": "Google's first-gen open-weight Gemma 2B instruction-tuned model. Derived from Gemini research, optimized for responsible commercial use.",
      "use_cases": [
        "chat",
        "small models"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1431953822,
      "size_gb": 1.334,
      "downloads": 3884,
      "likes": 6,
      "last_modified": "2025-09-07T18:52:26.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:google/gemma-2b-it",
        "base_model:quantized:google/gemma-2b-it",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Google's first-gen open-weight Gemma 2B instruction-tuned model. Derived from Gemini research, optimized for responsible commercial use. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 1.334 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/gemma-2b-it-q4f32_1-MLC",
      "name": "Gemma 1 2B IT",
      "huggingface_url": "https://huggingface.co/mlc-ai/gemma-2b-it-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/gemma-2b-it-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/gemma-2b-it-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "google/gemma-2b-it",
        "url": "https://huggingface.co/google/gemma-2b-it",
        "author": "google",
        "author_url": "https://huggingface.co/google",
        "downloads": 71384,
        "likes": 876,
        "last_modified": "2024-09-27T12:19:02.000Z",
        "license": "gemma",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "gemma-2b-it",
      "description": "Google's first-gen open-weight Gemma 2B instruction-tuned model. Derived from Gemini research, optimized for responsible commercial use.",
      "use_cases": [
        "chat",
        "small models"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 1432032363,
      "size_gb": 1.334,
      "downloads": 647,
      "likes": 2,
      "last_modified": "2025-09-07T18:52:25.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:google/gemma-2b-it",
        "base_model:quantized:google/gemma-2b-it",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Google's first-gen open-weight Gemma 2B instruction-tuned model. Derived from Gemini research, optimized for responsible commercial use. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 1.334 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/gemma3-1b-it-q4f16_1-MLC",
      "name": "Gemma 3 1B IT",
      "huggingface_url": "https://huggingface.co/mlc-ai/gemma3-1b-it-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/gemma3-1b-it-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/gemma3-1b-it-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "google/gemma-3-1b-it",
        "url": "https://huggingface.co/google/gemma-3-1b-it",
        "author": "google",
        "author_url": "https://huggingface.co/google",
        "downloads": 531764,
        "likes": 935,
        "last_modified": "2025-04-04T13:12:40.000Z",
        "license": "gemma",
        "languages": [],
        "pipeline_tag": "text-generation"
      },
      "family": "gemma3-1b-it",
      "description": "Google's Gemma 3 at 1B. Latest-gen with multimodal-aware architecture, extended context, refined instruction-tuning.",
      "use_cases": [
        "chat",
        "edge",
        "tiny models"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 602051899,
      "size_gb": 0.561,
      "downloads": null,
      "likes": null,
      "last_modified": null,
      "tags": [],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Google's Gemma 3 at 1B. Latest-gen with multimodal-aware architecture, extended context, refined instruction-tuning. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.561 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/phi-1_5-q4f16_1-MLC",
      "name": "Phi-1.5",
      "huggingface_url": "https://huggingface.co/mlc-ai/phi-1_5-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/phi-1_5-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/phi-1_5-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "microsoft/phi-1_5",
        "url": "https://huggingface.co/microsoft/phi-1_5",
        "author": "microsoft",
        "author_url": "https://huggingface.co/microsoft",
        "downloads": 77774,
        "likes": 1358,
        "last_modified": "2025-11-24T16:58:09.000Z",
        "license": "mit",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "phi-1_5",
      "description": "Microsoft's Phi-1.5 \u2014 1.3B early proof-of-concept that high-quality 'textbook-style' synthetic data produces strong reasoning at small scale.",
      "use_cases": [
        "reasoning",
        "research baseline"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 802662178,
      "size_gb": 0.748,
      "downloads": 103,
      "likes": 0,
      "last_modified": "2025-09-07T18:50:37.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:microsoft/phi-1_5",
        "base_model:quantized:microsoft/phi-1_5",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Microsoft's Phi-1.5 \u2014 1.3B early proof-of-concept that high-quality 'textbook-style' synthetic data produces strong reasoning at small scale. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.748 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/phi-1_5-q4f32_1-MLC",
      "name": "Phi-1.5",
      "huggingface_url": "https://huggingface.co/mlc-ai/phi-1_5-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/phi-1_5-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/phi-1_5-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "microsoft/phi-1_5",
        "url": "https://huggingface.co/microsoft/phi-1_5",
        "author": "microsoft",
        "author_url": "https://huggingface.co/microsoft",
        "downloads": 77774,
        "likes": 1358,
        "last_modified": "2025-11-24T16:58:09.000Z",
        "license": "mit",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "phi-1_5",
      "description": "Microsoft's Phi-1.5 \u2014 1.3B early proof-of-concept that high-quality 'textbook-style' synthetic data produces strong reasoning at small scale.",
      "use_cases": [
        "reasoning",
        "research baseline"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 802761303,
      "size_gb": 0.748,
      "downloads": 39,
      "likes": 1,
      "last_modified": "2025-09-07T18:50:44.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:microsoft/phi-1_5",
        "base_model:quantized:microsoft/phi-1_5",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Microsoft's Phi-1.5 \u2014 1.3B early proof-of-concept that high-quality 'textbook-style' synthetic data produces strong reasoning at small scale. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.748 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/phi-2-q4f16_1-MLC",
      "name": "Phi-2",
      "huggingface_url": "https://huggingface.co/mlc-ai/phi-2-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/phi-2-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/phi-2-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "microsoft/phi-2",
        "url": "https://huggingface.co/microsoft/phi-2",
        "author": "microsoft",
        "author_url": "https://huggingface.co/microsoft",
        "downloads": 632963,
        "likes": 3449,
        "last_modified": "2025-12-08T11:35:44.000Z",
        "license": "mit",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "phi-2",
      "description": "Microsoft's Phi-2 \u2014 2.7B model trained on textbook-quality synthetic data. Punches far above its weight on reasoning benchmarks.",
      "use_cases": [
        "reasoning",
        "small models"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 1569155446,
      "size_gb": 1.461,
      "downloads": 3152,
      "likes": 2,
      "last_modified": "2025-09-07T18:50:32.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:microsoft/phi-2",
        "base_model:quantized:microsoft/phi-2",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Microsoft's Phi-2 \u2014 2.7B model trained on textbook-quality synthetic data. Punches far above its weight on reasoning benchmarks. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 1.461 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/phi-2-q4f32_1-MLC",
      "name": "Phi-2",
      "huggingface_url": "https://huggingface.co/mlc-ai/phi-2-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/phi-2-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/phi-2-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "microsoft/phi-2",
        "url": "https://huggingface.co/microsoft/phi-2",
        "author": "microsoft",
        "author_url": "https://huggingface.co/microsoft",
        "downloads": 632963,
        "likes": 3449,
        "last_modified": "2025-12-08T11:35:44.000Z",
        "license": "mit",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "phi-2",
      "description": "Microsoft's Phi-2 \u2014 2.7B model trained on textbook-quality synthetic data. Punches far above its weight on reasoning benchmarks.",
      "use_cases": [
        "reasoning",
        "small models"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 1569347818,
      "size_gb": 1.462,
      "downloads": 16,
      "likes": 2,
      "last_modified": "2025-09-07T18:50:47.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:microsoft/phi-2",
        "base_model:quantized:microsoft/phi-2",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Microsoft's Phi-2 \u2014 2.7B model trained on textbook-quality synthetic data. Punches far above its weight on reasoning benchmarks. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 1.462 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/snowflake-arctic-embed-m-q0f32-MLC",
      "name": "Snowflake Arctic Embed M",
      "huggingface_url": "https://huggingface.co/mlc-ai/snowflake-arctic-embed-m-q0f32-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/snowflake-arctic-embed-m-q0f32-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/snowflake-arctic-embed-m-q0f32-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Snowflake/snowflake-arctic-embed-m",
        "url": "https://huggingface.co/Snowflake/snowflake-arctic-embed-m",
        "author": "Snowflake",
        "author_url": "https://huggingface.co/Snowflake",
        "downloads": 584728,
        "likes": 165,
        "last_modified": "2024-12-13T20:51:22.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "sentence-similarity"
      },
      "family": "snowflake-arctic-embed-m",
      "description": "Snowflake's medium text-embedding model for retrieval (RAG). Produces dense vectors for semantic search \u2014 not a chat model.",
      "use_cases": [
        "embeddings",
        "retrieval",
        "RAG"
      ],
      "quantization": "q0f32",
      "quantization_description": "Full FP32 precision. Largest size, highest stability. Reference quality.",
      "size_bytes": 218667974,
      "size_gb": 0.204,
      "downloads": 279,
      "likes": 2,
      "last_modified": "2025-09-07T18:53:23.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Snowflake/snowflake-arctic-embed-m",
        "base_model:quantized:Snowflake/snowflake-arctic-embed-m",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Snowflake's medium text-embedding model for retrieval (RAG). Produces dense vectors for semantic search \u2014 not a chat model. Full FP32 precision. Largest size, highest stability. Reference quality. Quantized size 0.204 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/snowflake-arctic-embed-s-q0f32-MLC",
      "name": "Snowflake Arctic Embed S",
      "huggingface_url": "https://huggingface.co/mlc-ai/snowflake-arctic-embed-s-q0f32-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/snowflake-arctic-embed-s-q0f32-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/snowflake-arctic-embed-s-q0f32-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "Snowflake/snowflake-arctic-embed-s",
        "url": "https://huggingface.co/Snowflake/snowflake-arctic-embed-s",
        "author": "Snowflake",
        "author_url": "https://huggingface.co/Snowflake",
        "downloads": 35226,
        "likes": 25,
        "last_modified": "2024-12-04T16:58:54.000Z",
        "license": "apache-2.0",
        "languages": [],
        "pipeline_tag": "sentence-similarity"
      },
      "family": "snowflake-arctic-embed-s",
      "description": "Snowflake's small text-embedding model for retrieval. Lightweight RAG-oriented embedder.",
      "use_cases": [
        "embeddings",
        "retrieval",
        "RAG"
      ],
      "quantization": "q0f32",
      "quantization_description": "Full FP32 precision. Largest size, highest stability. Reference quality.",
      "size_bytes": 67305447,
      "size_gb": 0.063,
      "downloads": 244,
      "likes": 1,
      "last_modified": "2025-09-07T18:53:21.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:Snowflake/snowflake-arctic-embed-s",
        "base_model:quantized:Snowflake/snowflake-arctic-embed-s",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Snowflake's small text-embedding model for retrieval. Lightweight RAG-oriented embedder. Full FP32 precision. Largest size, highest stability. Reference quality. Quantized size 0.063 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC",
      "name": "StableLM 2 Zephyr 1.6B",
      "huggingface_url": "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/stablelm-2-zephyr-1_6b-q4f16_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "stabilityai/stablelm-2-zephyr-1_6b",
        "url": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b",
        "author": "stabilityai",
        "author_url": "https://huggingface.co/stabilityai",
        "downloads": 5585,
        "likes": 187,
        "last_modified": "2024-06-03T15:16:39.000Z",
        "license": "other",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "stablelm-2-zephyr-1_6b",
      "description": "Stability AI's StableLM 2 fine-tuned with Zephyr-style DPO. Compact yet capable conversational model.",
      "use_cases": [
        "chat",
        "small models"
      ],
      "quantization": "q4f16_1",
      "quantization_description": "4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off.",
      "size_bytes": 933062696,
      "size_gb": 0.869,
      "downloads": 109,
      "likes": 0,
      "last_modified": "2025-09-07T18:51:44.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:stabilityai/stablelm-2-zephyr-1_6b",
        "base_model:quantized:stabilityai/stablelm-2-zephyr-1_6b",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Stability AI's StableLM 2 fine-tuned with Zephyr-style DPO. Compact yet capable conversational model. 4-bit weight quantization (current standard), FP16 activations. The WebLLM default for FP16-capable GPUs \u2014 best size/quality trade-off. Quantized size 0.869 GB \u2014 within the 6 GB cap for browser-deliverable models."
    },
    {
      "model_id": "mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC",
      "name": "StableLM 2 Zephyr 1.6B",
      "huggingface_url": "https://huggingface.co/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC",
      "served_url": "https://models.canxp.ai/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/",
      "hf_compat_url": "https://models.canxp.ai/mlc-ai/stablelm-2-zephyr-1_6b-q4f32_1-MLC/resolve/main/",
      "author": "mlc-ai",
      "author_url": "https://huggingface.co/mlc-ai",
      "base_model": {
        "id": "stabilityai/stablelm-2-zephyr-1_6b",
        "url": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b",
        "author": "stabilityai",
        "author_url": "https://huggingface.co/stabilityai",
        "downloads": 5585,
        "likes": 187,
        "last_modified": "2024-06-03T15:16:39.000Z",
        "license": "other",
        "languages": [
          "en"
        ],
        "pipeline_tag": "text-generation"
      },
      "family": "stablelm-2-zephyr-1_6b",
      "description": "Stability AI's StableLM 2 fine-tuned with Zephyr-style DPO. Compact yet capable conversational model.",
      "use_cases": [
        "chat",
        "small models"
      ],
      "quantization": "q4f32_1",
      "quantization_description": "4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support.",
      "size_bytes": 933187121,
      "size_gb": 0.869,
      "downloads": 24,
      "likes": 0,
      "last_modified": "2025-09-07T18:51:45.000Z",
      "tags": [
        "mlc-llm",
        "web-llm",
        "base_model:stabilityai/stablelm-2-zephyr-1_6b",
        "base_model:quantized:stabilityai/stablelm-2-zephyr-1_6b",
        "region:us"
      ],
      "in_webllm_official": true,
      "selection_rationale": "Listed in WebLLM's official prebuiltAppConfig \u2014 directly supported by the WebLLM runtime. Stability AI's StableLM 2 fine-tuned with Zephyr-style DPO. Compact yet capable conversational model. 4-bit weight quantization, FP32 activations. For browsers/GPUs lacking FP16 support. Quantized size 0.869 GB \u2014 within the 6 GB cap for browser-deliverable models."
    }
  ]
}