sekia
diff --git a/‎README.pod‎
Lines changed: 61 additions & 54 deletions b/‎README.pod‎
Lines changed: 61 additions & 54 deletions
@@ -1,11 +1,12 @@
 =head1 NAME
 
-Algorithm::LibLinear - A Perl binding for LIBLINEAR, a library for classification/regression using linear SVM and logistic regression.
+Algorithm::LibLinear - A Perl binding for LIBLINEAR, a library for classification, regression, and outlier detection using linear Support Vector Machines (SVM) and logistic regression
 
 =head1 SYNOPSIS
 
   use Algorithm::LibLinear;
-  # Constructs a model for L2-regularized L2 loss support vector classification.
+
+  # Instantiate a learner for L2-regularized L2-loss support vector classification (SVC).
   my $learner = Algorithm::LibLinear->new(
     cost => 1,
     epsilon => 0.01,
@@ -15,15 +16,20 @@ Algorithm::LibLinear - A Perl binding for LIBLINEAR, a library for classificatio
       +{ label => -1, weight => 1, },
     ],
   );
-  # Loads a training data set from DATA filehandle.
+
+  # Load a training dataset from the DATA filehandle.
   my $data_set = Algorithm::LibLinear::DataSet->load(fh => \*DATA);
-  # Updates training parameter.
+
+  # Automatically find optimal parameters.
   $learner->find_parameters(data_set => $data_set, num_folds => 5, update => 1);
-  # Executes cross validation.
+
+  # Perform cross-validation to evaluate performance.
   my $accuracy = $learner->cross_validation(data_set => $data_set, num_folds => 5);
-  # Executes training.
+
+  # Train the model on the dataset.
   my $classifier = $learner->train(data_set => $data_set);
-  # Determines which (+1 or -1) is the class for the given feature to belong.
+
+  # Predict the class label (+1 or -1) for a given feature vector.
   my $class_label = $classifier->predict(feature => +{ 1 => 0.38, 2 => -0.5, ... });
 
   __DATA__
@@ -36,138 +42,139 @@ Algorithm::LibLinear - A Perl binding for LIBLINEAR, a library for classificatio
 
 =head1 DESCRIPTION
 
-Algorithm::LibLinear is an XS module that provides features of LIBLINEAR, a fast C library for classification and regression.
+C<Algorithm::LibLinear> is an XS binding for LIBLINEAR, a fast C/C++ library for linear classification, regression, and outlier detection.
 
-Current version is based on LIBLINEAR 2.48, released on January 5, 2025.
+This version is compatible with LIBLINEAR 2.48, released on January 5, 2025.
 
 =head1 METHODS
 
-=head2 new([bias => -1.0] [, cost => 1] [, epsilon => 0.1] [, loss_sensitivity => 0.1] [, nu => 0.5] [, regularize_bias => 1] [, solver => 'L2R_L2LOSS_SVC_DUAL'] [, weights => []])
+=head2 new([bias => -1.0] [, cost => 1] [, epsilon => undef] [, loss_sensitivity => 0.1] [, nu => 0.5] [, regularize_bias => 1] [, recalculate_weights => 0] [, solver => 'L2R_L2LOSS_SVC_DUAL'] [, weights => []])
 
-Constructor. You can set several named parameters:
+Constructor. Accepts the following optional named parameters, also accessible via getter methods of the same name.
 
 =over 4
 
 =item bias
 
-Bias term to be added to prediction result (i.e., C<-B> option for LIBLINEAR's C<train> command.).
-
-This parameter makes sense only when its value is positive.
+The bias term added to feature vectors (corresponding to the C<-B> option of the LIBLINEAR C<train> command). This term is active only when its value is positive.
 
 =item cost
 
-Penalty cost for misclassification (C<-c>.)
+The penalty parameter C (corresponding to the C<-c> option).
 
 =item epsilon
 
-Termination criterion (C<-e>.)
-
-Default value of this parameter depends on the value of C<solver>.
+The tolerance of the termination criterion (corresponding to the C<-e> option). The default depends on the chosen C<solver>.
 
 =item loss_sensitivity
 
-Epsilon in loss function of SVR (C<-p>.)
+The epsilon parameter (p) in the loss function of Support Vector Regression (SVR), corresponding to the C<-p> option.
 
 =item nu
 
-Nu parameter of one-class SVM (C<-n>.)
+The nu parameter for one-class SVM (corresponding to the C<-n> option).
 
 =item regularize_bias
 
-Whether to regularize the bias term (C<-R>, negated.)
+A boolean indicating whether to include the bias term in regularization (corresponding to the negation of the C<-R> option). Defaults to true.
+
+=item recalculate_weights
+
+A boolean indicating whether to recalculate class weights dynamically. This option is valid only for the dual solvers of L2-regularized L1- or L2-loss Support Vector Classifiers (C<L2R_L1LOSS_SVC_DUAL> and C<L2R_L2LOSS_SVC_DUAL>).
 
 =item solver
 
-Kind of solver (C<-s>.)
+The solver type to use (corresponding to the C<-s> option).
 
-For classification:
+Supported solvers for classification:
 
 =over 4
 
-=item 'L2R_LR' - L2-regularized logistic regression
+=item * 'L2R_LR' - L2-regularized logistic regression
 
-=item 'L2R_L2LOSS_SVC_DUAL' - L2-regularized L2-loss SVC (dual problem)
+=item * 'L2R_L2LOSS_SVC_DUAL' - L2-regularized L2-loss SVC (dual)
 
-=item 'L2R_L2LOSS_SVC' - L2-regularized L2-loss SVC (primal problem)
+=item * 'L2R_L2LOSS_SVC' - L2-regularized L2-loss SVC (primal)
 
-=item 'L2R_L1LOSS_SVC_DUAL' - L2-regularized L1-loss SVC (dual problem)
+=item * 'L2R_L1LOSS_SVC_DUAL' - L2-regularized L1-loss SVC (dual)
 
-=item 'MCSVM_CS' - Crammer-Singer multi-class SVM
+=item * 'MCSVM_CS' - Crammer and Singer multi-class SVM
 
-=item 'L1R_L2LOSS_SVC' - L1-regularized L2-loss SVC
+=item * 'L1R_L2LOSS_SVC' - L1-regularized L2-loss SVC
 
-=item 'L1R_LR' - L1-regularized logistic regression (primal problem)
+=item * 'L1R_LR' - L1-regularized logistic regression (primal)
 
-=item 'L1R_LR_DUAL' -  L1-regularized logistic regression (dual problem)
+=item * 'L2R_LR_DUAL' - L2-regularized logistic regression (dual)
 
 =back
 
-For regression:
+Supported solvers for regression:
 
 =over 4
 
-=item 'L2R_L2LOSS_SVR' - L2-regularized L2-loss SVR (primal problem)
+=item * 'L2R_L2LOSS_SVR' - L2-regularized L2-loss SVR (primal)
 
-=item 'L2R_L2LOSS_SVR_DUAL' - L2-regularized L2-loss SVR (dual problem)
+=item * 'L2R_L2LOSS_SVR_DUAL' - L2-regularized L2-loss SVR (dual)
 
-=item 'L2R_L1LOSS_SVR_DUAL' - L2-regularized L1-loss SVR (dual problem)
+=item * 'L2R_L1LOSS_SVR_DUAL' - L2-regularized L1-loss SVR (dual)
 
 =back
 
-For outlier detection:
+Supported solvers for outlier detection:
 
 =over 4
 
-=item 'ONECLASS_SVM' - One-class SVM
+=item * 'ONECLASS_SVM' - One-class SVM
 
 =back
 
 =item weights
 
-Weights to adjust the cost parameter of different classes (C<-wi>.)
+An array reference used to adjust the penalty cost for specific classes (corresponding to the C<-wi> option).
 
-For example,
+For example:
 
   my $learner = Algorithm::LibLinear->new(
     weights => [
       +{ label => 1, weight => 0.5 },
-      +{ label => 2, weight => 1 },
+      +{ label => 2, weight => 1.0 },
       +{ label => 3, weight => 0.5 },
     ],
   );
 
-is giving a doubling weight for class 2. This means that samples belonging to class 2 have stronger effect than other samples belonging class 1 or 3 on learning.
+This configuration doubles the penalty weight for class 2, making samples belonging to class 2 have twice the influence on training compared to those in classes 1 or 3.
 
-This option is useful when the number of training samples of each class is not balanced.
+This option is particularly useful for addressing class imbalance in the training dataset.
 
 =back
 
 =head2 cross_validation(data_set => $data_set, num_folds => $num_folds)
 
-Evaluates training parameter using N-fold cross validation method.
-Given data set will be split into N parts. N-1 of them will be used as a training set and the rest 1 part will be used as a test set.
-The evaluation iterates N times using each different part as a test set. Then average accuracy is returned as result.
+Evaluates training performance using N-fold cross-validation.
+The dataset is partitioned into N equal-sized folds. For each fold, the model is trained on the remaining N-1 folds and evaluated on the held-out fold.
+
+Returns the average classification accuracy for classification solvers, or the mean squared error (MSE) for regression solvers.
 
 =head2 find_cost_parameter(data_set => $data_set, num_folds => $num_folds [, initial => -1.0] [, update => 0])
 
-Deprecated. Use C<find_parameters> instead.
+B<Deprecated.> Use C<find_parameters> instead.
 
-Shorthand alias for C<find_parameters> only works on C<cost> parameter.
-Notice that C<loss_sensitivity> is affected too when C<update> is set.
+A convenience wrapper around C<find_parameters> that tunes only the C<cost> parameter. Note that if C<update> is enabled, the C<loss_sensitivity> parameter may also be updated in the process.
 
 =head2 find_parameters(data_set => $data_set, num_folds => $num_folds [, initial_cost => -1.0] [, initial_loss_sensitivity => -1.0] [, update => 0])
 
-Finds the best parameters by N-fold cross validation. If C<initial_cost> or C<initial_loss_sensitivity> is a negative, the value is automatically calculated.
-Works only for 3 solvers: C<'L2R_LR'>, C<'L2R_L2LOSS_SVC'> and C<'L2R_L2LOSS_SVR'>. Error will be thrown for otherwise.
+Finds the optimal hyperparameters using N-fold cross-validation. If C<initial_cost> or C<initial_loss_sensitivity> is negative, its optimal value is automatically determined.
+
+This method is supported only by the C<'L2R_LR'>, C<'L2R_L2LOSS_SVC'>, and C<'L2R_L2LOSS_SVR'> solvers. It throws an exception if called when using any other solver.
 
-When C<update> is set true, the instance is updated to use the found parameters. This behaviour is disabled by default.
+If the C<update> parameter is true, the learner instance is updated in-place with the discovered parameters. This is disabled by default.
 
-Return value is an ArrayRef containing 3 values: found C<cost>, found C<loss_sensitivity> (only if solver is C<'L2R_L2LOSS_SVR'>) and mean accuracy of cross validation with the found parameters.
+Returns an array reference containing three elements: the optimal C<cost>, the optimal C<loss_sensitivity> (which is C<undef> unless the solver is C<'L2R_L2LOSS_SVR'>), and the evaluation metric (average accuracy or MSE) achieved with these parameters.
 
 =head2 train(data_set => $data_set)
 
-Executes training and returns a trained L<Algorithm::LibLinear::Model> instance.
-C<data_set> is same as the C<cross_validation>'s.
+Trains a model on the provided dataset and returns an L<Algorithm::LibLinear::Model> instance.
+The C<data_set> argument must be an L<Algorithm::LibLinear::DataSet> instance.
 
 =head1 AUTHOR