ooples
diff --git a/‎src/AutoML/CompressionOptimizer.cs‎
Lines changed: 453 additions & 0 deletions b/‎src/AutoML/CompressionOptimizer.cs‎
Lines changed: 453 additions & 0 deletions
diff --git a/‎src/Deployment/Configuration/CompressionConfig.cs‎
Lines changed: 144 additions & 0 deletions b/‎src/Deployment/Configuration/CompressionConfig.cs‎
Lines changed: 144 additions & 0 deletions
diff --git a/‎src/Deployment/Configuration/DeploymentConfiguration.cs‎
Lines changed: 15 additions & 2 deletions b/‎src/Deployment/Configuration/DeploymentConfiguration.cs‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎src/Enums/CompressionMode.cs‎
Lines changed: 61 additions & 0 deletions b/‎src/Enums/CompressionMode.cs‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎src/Enums/CompressionType.cs‎
Lines changed: 140 additions & 0 deletions b/‎src/Enums/CompressionType.cs‎
Lines changed: 140 additions & 0 deletions
@@ -0,0 +1,144 @@
+using AiDotNet.Enums;
+
+namespace AiDotNet.Deployment.Configuration;
+
+/// <summary>
+/// Configuration for model compression - reducing model size while preserving accuracy.
+/// </summary>
+/// <remarks>
+/// <para><b>For Beginners:</b> Model compression makes your trained AI model smaller and faster to load.
+/// Think of it like compressing a ZIP file - you get a smaller file that can be restored to its original form.
+///
+/// Why use compression?
+/// - Smaller model files (50-90% size reduction)
+/// - Faster model loading and deployment
+/// - Lower storage and bandwidth costs
+/// - Enables deployment on resource-constrained devices
+///
+/// Trade-offs:
+/// - Some compression types are lossy (slight accuracy reduction, typically 1-2%)
+/// - Compression/decompression adds a small processing overhead
+///
+/// Compression happens automatically when you save (serialize) a model and
+/// decompression happens automatically when you load (deserialize) it.
+/// You never need to handle compression manually.
+///
+/// Example:
+/// <code>
+/// // Use automatic compression (recommended for most cases)
+/// var result = await builder
+///     .ConfigureModel(model)
+///     .ConfigureCompression()
+///     .BuildAsync(x, y);
+///
+/// // Or customize compression settings
+/// var result = await builder
+///     .ConfigureCompression(new CompressionConfig
+///     {
+///         Mode = CompressionMode.Full,
+///         Type = CompressionType.HybridHuffmanClustering,
+///         NumClusters = 256
+///     })
+///     .BuildAsync(x, y);
+/// </code>
+/// </para>
+/// </remarks>
+public class CompressionConfig
+{
+    /// <summary>
+    /// Gets or sets the compression mode (default: Automatic).
+    /// </summary>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> Choose how compression is applied:
+    /// - None: No compression (full size, maximum accuracy)
+    /// - Automatic: System chooses best approach (recommended)
+    /// - WeightsOnly: Compress only model weights
+    /// - Full: Compress entire serialized model
+    /// </para>
+    /// </remarks>
+    public ModelCompressionMode Mode { get; set; } = ModelCompressionMode.Automatic;
+
+    /// <summary>
+    /// Gets or sets the compression algorithm type (default: WeightClustering).
+    /// </summary>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> Different algorithms offer different trade-offs:
+    /// - WeightClustering: Groups similar weights (good balance of speed and compression)
+    /// - HuffmanEncoding: Lossless variable-length encoding (no accuracy loss)
+    /// - HybridHuffmanClustering: Combines both for maximum compression
+    /// </para>
+    /// </remarks>
+    public CompressionType Type { get; set; } = CompressionType.WeightClustering;
+
+    /// <summary>
+    /// Gets or sets the number of clusters for weight clustering (default: 256).
+    /// </summary>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> This is like choosing how many "bins" to sort weights into.
+    /// 256 clusters is the industry standard (equivalent to 8-bit quantization).
+    /// More clusters = higher accuracy but less compression.
+    /// Fewer clusters = more compression but lower accuracy.
+    ///
+    /// Common values:
+    /// - 16: Aggressive compression (4-bit equivalent)
+    /// - 256: Standard compression (8-bit equivalent, recommended)
+    /// - 65536: Light compression (16-bit equivalent)
+    /// </para>
+    /// </remarks>
+    public int NumClusters { get; set; } = 256;
+
+    /// <summary>
+    /// Gets or sets the decimal precision for Huffman encoding (default: 4).
+    /// </summary>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> Controls how many decimal places to keep when rounding weights
+    /// for Huffman encoding. Higher precision = better accuracy but less compression.
+    /// 4 decimal places is a good default for most models.
+    /// </para>
+    /// </remarks>
+    public int Precision { get; set; } = 4;
+
+    /// <summary>
+    /// Gets or sets the convergence tolerance for clustering algorithms (default: 1e-6).
+    /// </summary>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> This determines when the clustering algorithm stops iterating.
+    /// Smaller values = more precise clusters but slower compression.
+    /// The default (0.000001) works well for most cases.
+    /// </para>
+    /// </remarks>
+    public double Tolerance { get; set; } = 1e-6;
+
+    /// <summary>
+    /// Gets or sets the maximum iterations for clustering algorithms (default: 100).
+    /// </summary>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> Limits how long the clustering algorithm runs.
+    /// More iterations can improve cluster quality but takes longer.
+    /// 100 iterations is sufficient for most models.
+    /// </para>
+    /// </remarks>
+    public int MaxIterations { get; set; } = 100;
+
+    /// <summary>
+    /// Gets or sets the random seed for reproducible compression (default: null for random).
+    /// </summary>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> Set this to a specific number if you want compression
+    /// to produce identical results every time. Useful for testing and debugging.
+    /// Leave as null for normal usage.
+    /// </para>
+    /// </remarks>
+    public int? RandomSeed { get; set; }
+
+    /// <summary>
+    /// Gets or sets the maximum acceptable accuracy loss percentage (default: 2.0).
+    /// </summary>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> If compression would cause more than this percentage
+    /// of accuracy loss, the system will warn you or use a less aggressive compression.
+    /// 2% is acceptable for most applications. Set to 0 for lossless compression only.
+    /// </para>
+    /// </remarks>
+    public double MaxAccuracyLossPercent { get; set; } = 2.0;
+}
@@ -41,6 +41,17 @@ public class DeploymentConfiguration
     /// </summary>
     public GpuAccelerationConfig? GpuAcceleration { get; set; }
 
+    /// <summary>
+    /// Gets or sets the compression configuration (null = no compression).
+    /// </summary>
+    /// <remarks>
+    /// <para><b>For Beginners:</b> When configured, compression is automatically applied during
+    /// model serialization (saving) and reversed during deserialization (loading).
+    /// This reduces model file sizes by 50-90% with minimal accuracy impact.
+    /// </para>
+    /// </remarks>
+    public CompressionConfig? Compression { get; set; }
+
     /// <summary>
     /// Creates a deployment configuration from individual config objects.
     /// </summary>
@@ -51,7 +62,8 @@ public static DeploymentConfiguration Create(
         ABTestingConfig? abTesting,
         TelemetryConfig? telemetry,
         ExportConfig? export,
-        GpuAccelerationConfig? gpuAcceleration)
+        GpuAccelerationConfig? gpuAcceleration,
+        CompressionConfig? compression = null)
     {
         return new DeploymentConfiguration
         {
@@ -61,7 +73,8 @@ public static DeploymentConfiguration Create(
             ABTesting = abTesting,
             Telemetry = telemetry,
             Export = export,
-            GpuAcceleration = gpuAcceleration
+            GpuAcceleration = gpuAcceleration,
+            Compression = compression
         };
     }
 }
@@ -0,0 +1,61 @@
+namespace AiDotNet.Enums;
+
+/// <summary>
+/// Defines the mode of model compression to apply during serialization.
+/// </summary>
+/// <remarks>
+/// <para>
+/// <b>For Beginners:</b> Compression mode determines when and how your model gets compressed.
+/// Like choosing between automatically archiving files vs manually selecting what to archive,
+/// you can let the system decide the best approach or take control yourself.
+/// </para>
+/// </remarks>
+public enum ModelCompressionMode
+{
+    /// <summary>
+    /// No compression is applied. The model is stored at full size.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> Use this when you need maximum accuracy and don't care about file size,
+    /// or when debugging to ensure compression isn't affecting your results.
+    /// </para>
+    /// </remarks>
+    None,
+
+    /// <summary>
+    /// The system automatically selects the best compression strategy based on model characteristics.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> This is the recommended default. The system analyzes your model and
+    /// chooses the compression approach that provides the best balance of size reduction and
+    /// accuracy preservation. Like auto settings on a camera, it works well for most cases.
+    /// </para>
+    /// </remarks>
+    Automatic,
+
+    /// <summary>
+    /// Compresses only the model weights, leaving other metadata uncompressed.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> Weights are the learned parameters that make up most of a model's size.
+    /// This mode compresses just those weights while keeping configuration and metadata readable.
+    /// Good when you need to inspect model settings but want smaller storage.
+    /// </para>
+    /// </remarks>
+    WeightsOnly,
+
+    /// <summary>
+    /// Compresses the entire serialized model including all metadata.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> This provides maximum compression by compressing everything.
+    /// Best for production deployment where you want the smallest possible file size
+    /// and don't need to inspect the model contents directly.
+    /// </para>
+    /// </remarks>
+    Full
+}
@@ -0,0 +1,140 @@
+namespace AiDotNet.Enums;
+
+/// <summary>
+/// Defines the types of model compression strategies available in the AiDotNet library.
+/// </summary>
+/// <remarks>
+/// <para>
+/// <b>For Beginners:</b> Model compression reduces the size of AI models while trying to maintain their accuracy.
+/// Think of it like compressing a photo - you want a smaller file size but still a recognizable image.
+/// Different compression techniques work better for different scenarios and model types.
+/// </para>
+/// </remarks>
+public enum CompressionType
+{
+    /// <summary>
+    /// No compression applied to the model.
+    /// </summary>
+    None,
+
+    /// <summary>
+    /// Weight clustering groups similar weight values together and replaces them with cluster representatives.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> Weight clustering is like organizing a messy drawer by grouping similar items.
+    /// Instead of storing thousands of slightly different weight values (like 0.501, 0.502, 0.503),
+    /// the model groups them into clusters and stores just the cluster centers (like 0.5).
+    /// This dramatically reduces model size while maintaining most of the model's intelligence.
+    /// </para>
+    /// </remarks>
+    WeightClustering,
+
+    /// <summary>
+    /// Huffman encoding uses variable-length codes where frequent values get shorter codes.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> Huffman encoding is like text message abbreviations. Common words like
+    /// "you" become "u" (shorter), while rare words keep their full spelling. Similarly, weights that
+    /// appear often in your model get stored with fewer bits, and rare weights use more bits.
+    /// This creates an efficient compression without losing any information.
+    /// </para>
+    /// </remarks>
+    HuffmanEncoding,
+
+    /// <summary>
+    /// Product quantization divides weight vectors into sub-vectors and quantizes each separately.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> Product quantization is like describing a color by breaking it into
+    /// red, green, and blue components separately, then rounding each component to the nearest
+    /// standard value. For model weights, it divides weight vectors into smaller pieces, compresses
+    /// each piece independently, then combines them. This provides better compression than treating
+    /// all weights the same way.
+    /// </para>
+    /// </remarks>
+    ProductQuantization,
+
+    /// <summary>
+    /// Combines weight clustering with quantization for improved compression.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> This hybrid approach first groups similar weights (clustering) and then
+    /// further compresses the cluster centers using quantization. It's like first organizing your
+    /// closet by type (shirts, pants, etc.), then within each type, arranging by color codes.
+    /// This two-stage process achieves better compression than either technique alone.
+    /// </para>
+    /// </remarks>
+    HybridClusteringQuantization,
+
+    /// <summary>
+    /// Combines weight clustering with pruning (removing unimportant weights).
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> This combines two powerful techniques: clustering (grouping similar weights)
+    /// and pruning (removing weights that barely affect the output). It's like cleaning and organizing
+    /// a room - you throw away things you don't need (pruning) and organize what's left (clustering).
+    /// This can achieve extreme compression while maintaining good accuracy.
+    /// </para>
+    /// </remarks>
+    HybridClusteringPruning,
+
+    /// <summary>
+    /// Combines Huffman encoding with weight clustering for maximum compression.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> This technique first groups weights into clusters, then uses Huffman encoding
+    /// to efficiently store which cluster each weight belongs to. It's like first organizing books by
+    /// category, then creating a shorthand code where popular categories get short codes (like "F" for
+    /// Fiction) and rare categories get longer codes. This layered approach maximizes compression.
+    /// </para>
+    /// </remarks>
+    HybridHuffmanClustering,
+
+    /// <summary>
+    /// Sparse pruning removes small-magnitude weights, setting them to zero.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> Sparse pruning is like weeding a garden - you remove the smallest,
+    /// least important weights (weeds) to make room for the important ones (flowers). Research shows
+    /// that 90%+ of neural network weights can often be removed with minimal accuracy loss.
+    /// The remaining weights are stored in a sparse format that only records non-zero values.
+    /// </para>
+    /// </remarks>
+    SparsePruning,
+
+    /// <summary>
+    /// Low-rank matrix factorization approximates weight matrices with lower-rank representations.
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> Low-rank factorization is like summarizing a complex document.
+    /// A large weight matrix is replaced with two smaller matrices that, when multiplied together,
+    /// approximate the original. This reduces both storage and computation. It works especially
+    /// well for layers with redundant patterns in their weights.
+    /// </para>
+    /// </remarks>
+    LowRankFactorization,
+
+    /// <summary>
+    /// Deep Compression combines pruning, quantization, and Huffman coding (Han et al. 2015).
+    /// </summary>
+    /// <remarks>
+    /// <para>
+    /// <b>For Beginners:</b> Deep Compression is the "full treatment" that combines multiple techniques:
+    /// 1. Prune: Remove unimportant weights (typically 90%+ of weights)
+    /// 2. Quantize: Group remaining weights into clusters (8-256 clusters)
+    /// 3. Encode: Use Huffman coding for efficient storage
+    ///
+    /// This three-stage pipeline from the famous Han et al. 2015 paper achieves 35-50x compression
+    /// on large neural networks with minimal accuracy loss.
+    /// </para>
+    /// </remarks>
+    DeepCompression
+}