Skip to content

Commit 36ef042

Browse files
committed
Add GGUF-level WHT rotation for Q_K quantization
Add experimental --quant-wht support for storing Q_K/Q8_0 matmul weights in WHT-rotated domain, with GGUF metadata, loader flags, CPU/CUDA activation WHT preprocess, and exact imatrix scoring for rotated Q_K candidates.
1 parent dc60b16 commit 36ef042

15 files changed

Lines changed: 753 additions & 50 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<html>
2+
<head><meta charset="utf-8" /></head>
3+
<body>
4+
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
5+
<script charset="utf-8" src="https://cdn.plot.ly/plotly-3.3.1.min.js" integrity="sha256-4rD3fugVb/nVJYUv5Ky3v+fYXoouHaBSP20WIJuEiWg=" crossorigin="anonymous"></script> <div id="f3e3cbc3-bb53-4111-8115-6996ecb44b1a" class="plotly-graph-div" style="height:900px; width:1400px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("f3e3cbc3-bb53-4111-8115-6996ecb44b1a")) { Plotly.newPlot( "f3e3cbc3-bb53-4111-8115-6996ecb44b1a", [{"hovertemplate":"BF16\u003cbr\u003esize=%{x:.4f} GB\u003cbr\u003ep999_kld=%{y:.6f}\u003cextra\u003e\u003c\u002fextra\u003e","marker":{"size":10},"mode":"markers+text","name":"BF16","text":["BF16"],"textposition":"top right","x":[7.8458],"y":[0.0],"type":"scatter"},{"hovertemplate":"%{text}\u003cbr\u003esize=%{x:.4f} GB\u003cbr\u003ep999_kld=%{y:.6f}\u003cextra\u003e\u003c\u002fextra\u003e","marker":{"size":8},"mode":"lines+markers+text","name":"Base Q_K\u002fQ8_0","text":["Q2_K","Q3_K","Q4_K","Q5_K","Q6_K","Q8_0"],"textposition":"top center","x":{"dtype":"f8","bdata":"tFn1udqK\u002fD+Y3ZOHhdoAQFjKMsSxLgRARUdy+Q\u002fpBkBz1xLyQc8JQMKGp1fKshBA"},"y":{"dtype":"f8","bdata":"yxRzEHT0I0DKNnAH6jQQQPp9\u002f+bFyQJAlGsKZHaW8j91dFyN7ErRP3MPCd\u002f7G7g\u002f"},"type":"scatter"},{"hovertemplate":"%{text}\u003cbr\u003esize=%{x:.4f} GB\u003cbr\u003ep999_kld=%{y:.6f}\u003cextra\u003e\u003c\u002fextra\u003e","marker":{"size":8},"mode":"lines+markers+text","name":"IQ","text":["IQ2_S","IQ3_XXS","IQ3_S","IQ4_XS"],"textposition":"top center","x":{"dtype":"f8","bdata":"BFYOLbKd+D+ppE5AE2H8P1InoImw4f8\u002fQYLix5i7AkA="},"y":{"dtype":"f8","bdata":"\u002ffohNlioH0A3\u002fG66ZdcRQICeBgySfgpAn3HhQEgWAEA="},"type":"scatter"},{"hovertemplate":"%{text}\u003cbr\u003esize=%{x:.4f} GB\u003cbr\u003ep999_kld=%{y:.6f}\u003cextra\u003e\u003c\u002fextra\u003e","marker":{"size":8},"mode":"lines+markers+text","name":"PQ","text":["PQ2_K","PQ3_K","PQ4_K"],"textposition":"top center","x":{"dtype":"f8","bdata":"hXzQs1n1\u002fD\u002feAgmKH2MBQGdEaW\u002fwhQRA"},"y":{"dtype":"f8","bdata":"C0RPyqSOI0A5Drxa7iwPQCkJibSN3wFA"},"type":"scatter"},{"hovertemplate":"%{text}\u003cbr\u003esize=%{x:.4f} GB\u003cbr\u003ep999_kld=%{y:.6f}\u003cextra\u003e\u003c\u002fextra\u003e","marker":{"size":8},"mode":"lines+markers+text","name":"PQ_IM","text":["PQ2_K_IM","PQ3_K_IM","PQ4_K_IM","PQ4_K_IM_XL"],"textposition":"top center","x":{"dtype":"f8","bdata":"hXzQs1n1\u002fD\u002feAgmKH2MBQGdEaW\u002fwhQRADXGsi9voBUA="},"y":{"dtype":"f8","bdata":"xLXaw17oHEAUQZyHExgFQI9WtaSjHPM\u002foP8evHbp6D8="},"type":"scatter"},{"hovertemplate":"%{text}\u003cbr\u003esize=%{x:.4f} GB\u003cbr\u003ep999_kld=%{y:.6f}\u003cextra\u003e\u003c\u002fextra\u003e","marker":{"size":8},"mode":"lines+markers+text","name":"UD","text":["UD-IQ2_XXS","UD-IQ2_M","UD-Q2_K_XL","UD-IQ3_XXS","UD-Q4_K_XL"],"textposition":"top center","x":{"dtype":"f8","bdata":"LNSa5h2n9j9E+u3rwDn6P+xRuB6F6\u002fw\u002f7C+7Jw8L\u002fT9R2ht8YbIFQA=="},"y":{"dtype":"f8","bdata":"q+y7IvgHIUCafR6jPJMYQJuqe2RzNRhAw\u002fUoXI8iEkCkGvZ7Yh3rPw=="},"type":"scatter"},{"hovertemplate":"%{text}\u003cbr\u003esize=%{x:.4f} GB\u003cbr\u003ep999_kld=%{y:.6f}\u003cextra\u003e\u003c\u002fextra\u003e","marker":{"size":8},"mode":"lines+markers+text","name":"WHT","text":["Q2_K_WHT","Q3_K_WHT","Q4_K_WHT","Q5_K_WHT","Q6_K_WHT","Q8_0_WHT"],"textposition":"top center","x":{"dtype":"f8","bdata":"tFn1udqK\u002fD+Y3ZOHhdoAQFjKMsSxLgRARUdy+Q\u002fpBkBz1xLyQc8JQMKGp1fKshBA"},"y":{"dtype":"f8","bdata":"K8O4G0T7IkAz+zxGeXYRQAStwJDVLfw\u002fZ5lFKLYC5z+4XP3YJD\u002fRPydO7ncoCqw\u002f"},"type":"scatter"},{"hovertemplate":"%{text}\u003cbr\u003esize=%{x:.4f} GB\u003cbr\u003ep999_kld=%{y:.6f}\u003cextra\u003e\u003c\u002fextra\u003e","marker":{"size":8},"mode":"lines+markers+text","name":"WHT_IM","text":["Q2_K_WHT_IM","Q3_K_WHT_IM","Q4_K_WHT_IM","Q4_K_WHT_IM_XL","Q5_K_WHT_IM","Q6_K_WHT_IM","Q8_0_WHT_IM"],"textposition":"top center","x":{"dtype":"f8","bdata":"tFn1udqK\u002fD+Y3ZOHhdoAQFjKMsSxLgRA8x\u002fSb1+HBUBFR3L5D+kGQHPXEvJBzwlAwoanV8qyEEA="},"y":{"dtype":"f8","bdata":"p7BSQUXVHECgwabOo8IJQNF3t7JEp\u002fU\u002fcY+lD11Q6z\u002fd7A+U2\u002fbZP43xYfay7cY\u002fJ07udygKrD8="},"type":"scatter"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermap":[{"type":"scattermap","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"title":{"text":"Model size vs p999_kld (interactive)"},"xaxis":{"title":{"text":"Model size (GB)"}},"yaxis":{"title":{"text":"p999_kld"}},"hovermode":"closest","width":1400,"height":900,"legend":{"title":{"text":"Series (click to hide\u002fshow)"}}}, {"responsive": true} ) }; </script> </div>
6+
</body>
7+
</html>

ggml/include/ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,7 @@ extern "C" {
658658
GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
659659
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
660660
GGML_TENSOR_FLAG_COMPUTE = 16, // ...must be computed
661+
GGML_TENSOR_FLAG_QUANT_WHT = 32, // ...stores quantized weights in WHT-rotated domain
661662
};
662663

663664
enum ggml_tri_type {

0 commit comments

Comments
 (0)