{"models":[{"slug":"qwen2.5-coder:1.5b","displayName":"Qwen 2.5 Coder 1.5B","family":"qwen2.5-coder","useCases":["coding"],"runtimes":["ollama","mlx"],"minMemoryBytes":2147483648,"recommendedMemoryBytes":8589934592,"quants":["q4_K_M","q8_0"],"addedAt":"2026-05-28","confidence":"community","positioning":"Tiny coding model for fast autocomplete on 8 GB Macs.","notes":"Best as a draft/autocomplete model — not for long-form code generation."},{"slug":"qwen2.5-coder:7b","displayName":"Qwen 2.5 Coder 7B","family":"qwen2.5-coder","useCases":["coding"],"runtimes":["ollama","mlx"],"minMemoryBytes":8589934592,"recommendedMemoryBytes":17179869184,"quants":["q4_K_M","q5_K_M","q8_0"],"addedAt":"2026-05-28","confidence":"community","positioning":"Solid coding default for 16 GB+ Macs; strong instruction following."},{"slug":"qwen2.5-coder:14b","displayName":"Qwen 2.5 Coder 14B","family":"qwen2.5-coder","useCases":["coding"],"runtimes":["ollama","mlx"],"minMemoryBytes":17179869184,"recommendedMemoryBytes":25769803776,"quants":["q4_K_M","q5_K_M"],"addedAt":"2026-05-28","confidence":"community","positioning":"Stronger coding — fits 24 GB+ Apple Silicon at default quant.","supersedes":[{"slug":"qwen2.5-coder:7b","reason":"Materially better at multi-file refactors and longer code spans, given the headroom."}]},{"slug":"qwen2.5-coder:32b","displayName":"Qwen 2.5 Coder 32B","family":"qwen2.5-coder","useCases":["coding"],"runtimes":["ollama","mlx"],"minMemoryBytes":25769803776,"recommendedMemoryBytes":38654705664,"quants":["q4_K_M","q5_K_M"],"addedAt":"2026-05-28","confidence":"community","positioning":"Frontier-tier local coding model — fits M-series Max/Ultra.","supersedes":[{"slug":"qwen2.5-coder:14b","reason":"Better at architectural reasoning and unfamiliar codebases; needs the memory."}]},{"slug":"llama3.2:3b","displayName":"Llama 3.2 3B","family":"llama3.2","useCases":["chat"],"runtimes":["ollama","mlx","lmstudio"],"minMemoryBytes":4294967296,"recommendedMemoryBytes":8589934592,"quants":["q4_K_M","q8_0"],"addedAt":"2026-05-28","confidence":"community","positioning":"Small, fast chat — viable on 8 GB Macs and as a low-latency draft model."},{"slug":"qwen2.5:7b","displayName":"Qwen 2.5 7B","family":"qwen2.5","useCases":["chat","rag"],"runtimes":["ollama","mlx"],"minMemoryBytes":8589934592,"recommendedMemoryBytes":17179869184,"quants":["q4_K_M","q5_K_M","q8_0"],"addedAt":"2026-05-28","confidence":"community","positioning":"Strong general chat at 7B. Use Coder variant for code-heavy work."},{"slug":"llama3.1:8b","displayName":"Llama 3.1 8B","family":"llama3.1","useCases":["chat","rag"],"runtimes":["ollama","mlx","lmstudio","llamacpp"],"minMemoryBytes":8589934592,"recommendedMemoryBytes":17179869184,"quants":["q4_K_M","q5_K_M","q8_0"],"addedAt":"2026-05-28","confidence":"community","positioning":"Older default — still solid for chat/RAG with the broadest runtime support."},{"slug":"qwen2.5:14b","displayName":"Qwen 2.5 14B","family":"qwen2.5","useCases":["chat","rag"],"runtimes":["ollama","mlx"],"minMemoryBytes":17179869184,"recommendedMemoryBytes":25769803776,"quants":["q4_K_M","q5_K_M"],"addedAt":"2026-05-28","confidence":"community","positioning":"Capable general chat — preferred over 7B once memory allows.","supersedes":[{"slug":"qwen2.5:7b","reason":"Better at longer context and nuanced instruction following."}]},{"slug":"deepseek-r1-distill-qwen:7b","displayName":"DeepSeek R1 Distill (Qwen 7B)","family":"deepseek-r1","useCases":["reasoning","chat"],"runtimes":["ollama","mlx"],"minMemoryBytes":8589934592,"recommendedMemoryBytes":17179869184,"quants":["q4_K_M","q5_K_M"],"addedAt":"2026-05-28","confidence":"community","positioning":"Compact reasoning model — visible chain-of-thought useful for code planning + math.","notes":"Verbose by default — outputs <think> blocks. Strip or surface them depending on the surface."},{"slug":"deepseek-r1-distill-qwen:14b","displayName":"DeepSeek R1 Distill (Qwen 14B)","family":"deepseek-r1","useCases":["reasoning","chat"],"runtimes":["ollama","mlx"],"minMemoryBytes":17179869184,"recommendedMemoryBytes":25769803776,"quants":["q4_K_M","q5_K_M"],"addedAt":"2026-05-28","confidence":"community","positioning":"Mid-tier reasoning — stronger on multi-step problems than the 7B at the memory cost.","supersedes":[{"slug":"deepseek-r1-distill-qwen:7b","reason":"Materially better at multi-step reasoning; takes the headroom seriously."}]},{"slug":"nomic-embed-text","displayName":"Nomic Embed Text v1.5","family":"nomic","useCases":["embedding"],"runtimes":["ollama","mlx"],"minMemoryBytes":1073741824,"recommendedMemoryBytes":2147483648,"addedAt":"2026-05-28","confidence":"community","positioning":"Default embedding model for local RAG. Tiny footprint, broad runtime support."},{"slug":"mxbai-embed-large","displayName":"MixedBread Embed Large","family":"mxbai","useCases":["embedding"],"runtimes":["ollama"],"minMemoryBytes":1073741824,"recommendedMemoryBytes":2147483648,"addedAt":"2026-05-28","confidence":"community","positioning":"Alternative embedding — competitive quality, slightly larger vectors than Nomic."},{"slug":"llama3.2-vision:11b","displayName":"Llama 3.2 Vision 11B","family":"llama3.2","useCases":["vision","chat"],"runtimes":["ollama"],"minMemoryBytes":12884901888,"recommendedMemoryBytes":17179869184,"quants":["q4_K_M"],"addedAt":"2026-05-28","confidence":"community","positioning":"Image-aware chat — for screenshot Q&A, OCR-like extraction, and visual reasoning.","notes":"Ollama-only at default quant today. MLX support exists for separate weights."},{"slug":"whisper-large-v3","displayName":"Whisper Large v3","family":"whisper","useCases":["stt"],"runtimes":["llamacpp","mlx"],"minMemoryBytes":4294967296,"recommendedMemoryBytes":8589934592,"addedAt":"2026-05-28","confidence":"community","positioning":"Gold-standard local transcription — strong multilingual + low WER.","notes":"Typically loaded via whisper.cpp or mlx-whisper, not via Ollama. Used by Hermes for STT."},{"slug":"distil-whisper-large-v3","displayName":"Distil-Whisper Large v3","family":"whisper","useCases":["stt"],"runtimes":["llamacpp","mlx"],"minMemoryBytes":2147483648,"recommendedMemoryBytes":4294967296,"addedAt":"2026-05-28","confidence":"community","positioning":"~6× faster than Whisper Large v3 with a small accuracy trade-off — good for live transcription.","supersedes":[{"slug":"whisper-large-v3","reason":"Materially faster TTS-to-text loop; preferred for real-time use cases."}],"notes":"Optimized for English. Pair with Whisper Large for multilingual."}]}