
  <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
    <channel>
      <title>Starmorph AI Web Development Blog</title>
      <link>https://blog.starmorph.com/blog</link>
      <description>Artificial Intelligence and Web Development Blog.</description>
      <language>en-us</language>
      <managingEditor>dylan@starmorph.com (Dylan Boudro)</managingEditor>
      <webMaster>dylan@starmorph.com (Dylan Boudro)</webMaster>
      <lastBuildDate>Fri, 10 Apr 2026 10:00:00 GMT</lastBuildDate>
      <atom:link href="https://blog.starmorph.com/tags/inference/feed.xml" rel="self" type="application/rss+xml"/>
      
  <item>
    <guid>https://blog.starmorph.com/blog/apple-silicon-llm-inference-optimization-guide</guid>
    <title>Apple Silicon LLM Inference Optimization: The Complete Guide to Maximum Performance</title>
    <link>https://blog.starmorph.com/blog/apple-silicon-llm-inference-optimization-guide</link>
    <description>A comprehensive guide to maximizing LLM inference performance on Apple Silicon — MLX vs llama.cpp benchmarks, quantization formats, RAM requirements, MoE models, speculative decoding, KV cache optimization, and the best models for every Mac configuration.</description>
    <pubDate>Fri, 10 Apr 2026 10:00:00 GMT</pubDate>
    <author>dylan@starmorph.com (Dylan Boudro)</author>
    <category>apple-silicon</category><category>llm</category><category>local-ai</category><category>mlx</category><category>ollama</category><category>quantization</category><category>inference</category><category>mac-mini</category><category>performance</category><category>developer-tools</category>
  </item>

    </channel>
  </rss>
