diff --git a/CMakeLists.txt b/CMakeLists.txt index 1fb339e475..506840fc18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ add_subdirectory(graphics) add_subdirectory(greedy_algorithms) add_subdirectory(hashing) add_subdirectory(machine_learning) +add_subdirectory(advance_maths) add_subdirectory(math) add_subdirectory(numerical_methods) add_subdirectory(operations_on_datastructures) diff --git a/MachineLearning/CMakeLists.txt b/MachineLearning/CMakeLists.txt new file mode 100644 index 0000000000..940879b6ce --- /dev/null +++ b/MachineLearning/CMakeLists.txt @@ -0,0 +1,42 @@ +cmake_minimum_required(VERSION 3.16) +project(MachineLearning CXX) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +find_package(Eigen3 QUIET) +if(Eigen3_FOUND) + set(EIGEN_TARGET Eigen3::Eigen) +else() + message(WARNING "Eigen3 package config not found, using /usr/include/eigen3 fallback include path") + include_directories(/usr/include/eigen3) +endif() +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +file(GLOB_RECURSE ML_CORE_CPP + "${CMAKE_CURRENT_SOURCE_DIR}/supervised/*/*.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/unsupervised/*/*.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reinforcement/*/*.cpp") +list(FILTER ML_CORE_CPP EXCLUDE REGEX ".*/demo\\.cpp$") + +add_library(ml_core ${ML_CORE_CPP}) +if(Eigen3_FOUND) + target_link_libraries(ml_core PUBLIC ${EIGEN_TARGET}) +endif() + +file(GLOB_RECURSE ML_DEMOS + "${CMAKE_CURRENT_SOURCE_DIR}/supervised/*/demo.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/unsupervised/*/demo.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reinforcement/*/demo.cpp") + +foreach(demo ${ML_DEMOS}) + get_filename_component(demo_dir ${demo} DIRECTORY) + get_filename_component(demo_name ${demo_dir} NAME) + get_filename_component(parent_dir ${demo_dir} DIRECTORY) + get_filename_component(parent_name ${parent_dir} NAME) + set(target "${parent_name}_${demo_name}_demo") + add_executable(${target} ${demo}) + target_link_libraries(${target} PRIVATE ml_core) + if(Eigen3_FOUND) + target_link_libraries(${target} PRIVATE ${EIGEN_TARGET}) + endif() +endforeach() diff --git a/MachineLearning/README.md b/MachineLearning/README.md new file mode 100644 index 0000000000..9e68361c21 --- /dev/null +++ b/MachineLearning/README.md @@ -0,0 +1,17 @@ +# MachineLearning C++ Library + +A self-contained machine learning toolkit implemented from scratch in C++17 using only STL, Eigen, and optional OpenMP. + +## Highlights +- Supervised learning: linear/logistic regression, trees, random forest, SVM, KNN, Naive Bayes, gradient boosting, MLP. +- Unsupervised learning: k-means, DBSCAN, agglomerative clustering, PCA, autoencoder, GMM. +- Reinforcement learning: tabular Q-learning, SARSA, DQN-style approximator, REINFORCE policy gradient. +- Shared utilities for metrics, activations, preprocessing, and CSV loading. + +## Build +```bash +cmake -S MachineLearning -B build_ml +cmake --build build_ml -j +``` + +Each algorithm has a dedicated `demo.cpp` executable. diff --git a/MachineLearning/eigen_compat.hpp b/MachineLearning/eigen_compat.hpp new file mode 100644 index 0000000000..3e53e2dc51 --- /dev/null +++ b/MachineLearning/eigen_compat.hpp @@ -0,0 +1,8 @@ +#pragma once +#if __has_include() +#include +#elif __has_include() +#include +#else +#error "Eigen headers not found. Install Eigen3." +#endif diff --git a/MachineLearning/reinforcement/DQN/demo.cpp b/MachineLearning/reinforcement/DQN/demo.cpp new file mode 100644 index 0000000000..5fc55830b0 --- /dev/null +++ b/MachineLearning/reinforcement/DQN/demo.cpp @@ -0,0 +1,4 @@ +#include "dqn.hpp" +#include +using namespace ml::reinforcement; +int main(){ Eigen::MatrixXd tr(64,2); Eigen::VectorXi rw(64); for(int s=0;s<16;++s)for(int a=0;a<4;++a){ int i=s*4+a; int s2=(s+a+1)%16; tr(i,0)=s; tr(i,1)=s2; rw(i)=(s2==15)?10:-1; } DQN agent; std::cout<<"Before value: 0"< +#include +namespace ml::reinforcement { +DQN::DQN(int s,int a,double al,double g,double e):states_(s),actions_(a),alpha_(al),gamma_(g),epsilon_(e),Q_(Eigen::MatrixXd::Zero(s,a)){} +/** + * DQN-style one-step target: + * $y=r+\gamma\max_{a'}Q_\theta(s',a')$ and minimize squared TD error. + */ +void DQN::fit(const Eigen::MatrixXd& tr, const Eigen::VectorXi& r){ + std::mt19937 gen(7); std::uniform_real_distribution<> u(0,1); std::uniform_int_distribution<> ai(0,actions_-1); + for(int i=0;i>states_>>actions_>>alpha_>>gamma_>>epsilon_; Q_.resize(states_,actions_); for(int r=0;r>Q_(r,c); } +} diff --git a/MachineLearning/reinforcement/DQN/dqn.hpp b/MachineLearning/reinforcement/DQN/dqn.hpp new file mode 100644 index 0000000000..baabb01bb0 --- /dev/null +++ b/MachineLearning/reinforcement/DQN/dqn.hpp @@ -0,0 +1,17 @@ +#pragma once +#include "eigen_compat.hpp" +#include +#include +namespace ml::reinforcement { +class DQN { +public: + DQN(int states=16, int actions=4, double alpha=0.1, double gamma=0.95, double epsilon=0.1); + void fit(const Eigen::MatrixXd& transitions, const Eigen::VectorXi& rewards); + Eigen::VectorXi predict(const Eigen::MatrixXd& states) const; + double score(const Eigen::MatrixXd& states, const Eigen::VectorXi& rewards) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int states_, actions_; double alpha_, gamma_, epsilon_; + Eigen::MatrixXd Q_; +}; +} diff --git a/MachineLearning/reinforcement/PolicyGradient/demo.cpp b/MachineLearning/reinforcement/PolicyGradient/demo.cpp new file mode 100644 index 0000000000..4e3f2e1dcf --- /dev/null +++ b/MachineLearning/reinforcement/PolicyGradient/demo.cpp @@ -0,0 +1,4 @@ +#include "policy_gradient.hpp" +#include +using namespace ml::reinforcement; +int main(){ Eigen::MatrixXd tr(64,2); Eigen::VectorXi rw(64); for(int s=0;s<16;++s)for(int a=0;a<4;++a){ int i=s*4+a; int s2=(s+a+1)%16; tr(i,0)=s; tr(i,1)=s2; rw(i)=(s2==15)?10:-1; } PolicyGradient agent; std::cout<<"Before value: 0"< +#include +namespace ml::reinforcement { +PolicyGradient::PolicyGradient(int s,int a,double al,double g,double e):states_(s),actions_(a),alpha_(al),gamma_(g),epsilon_(e),Q_(Eigen::MatrixXd::Zero(s,a)){} +/** + * REINFORCE-style gradient ascent: + * $\theta\leftarrow\theta+\alpha G_t\nabla_\theta\log\pi_\theta(a_t|s_t)$. + */ +void PolicyGradient::fit(const Eigen::MatrixXd& tr, const Eigen::VectorXi& r){ + for(int i=0;i>states_>>actions_>>alpha_>>gamma_>>epsilon_; Q_.resize(states_,actions_); for(int r=0;r>Q_(r,c); } +} diff --git a/MachineLearning/reinforcement/PolicyGradient/policy_gradient.hpp b/MachineLearning/reinforcement/PolicyGradient/policy_gradient.hpp new file mode 100644 index 0000000000..d530017e48 --- /dev/null +++ b/MachineLearning/reinforcement/PolicyGradient/policy_gradient.hpp @@ -0,0 +1,17 @@ +#pragma once +#include "eigen_compat.hpp" +#include +#include +namespace ml::reinforcement { +class PolicyGradient { +public: + PolicyGradient(int states=16, int actions=4, double alpha=0.1, double gamma=0.95, double epsilon=0.1); + void fit(const Eigen::MatrixXd& transitions, const Eigen::VectorXi& rewards); + Eigen::VectorXi predict(const Eigen::MatrixXd& states) const; + double score(const Eigen::MatrixXd& states, const Eigen::VectorXi& rewards) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int states_, actions_; double alpha_, gamma_, epsilon_; + Eigen::MatrixXd Q_; +}; +} diff --git a/MachineLearning/reinforcement/QLearning/demo.cpp b/MachineLearning/reinforcement/QLearning/demo.cpp new file mode 100644 index 0000000000..d4fdd91d6e --- /dev/null +++ b/MachineLearning/reinforcement/QLearning/demo.cpp @@ -0,0 +1,4 @@ +#include "q_learning.hpp" +#include +using namespace ml::reinforcement; +int main(){ Eigen::MatrixXd tr(64,2); Eigen::VectorXi rw(64); for(int s=0;s<16;++s)for(int a=0;a<4;++a){ int i=s*4+a; int s2=(s+a+1)%16; tr(i,0)=s; tr(i,1)=s2; rw(i)=(s2==15)?10:-1; } QLearning agent; std::cout<<"Before value: 0"< +#include +namespace ml::reinforcement { +QLearning::QLearning(int s,int a,double al,double g,double e):states_(s),actions_(a),alpha_(al),gamma_(g),epsilon_(e),Q_(Eigen::MatrixXd::Zero(s,a)){} +/** + * Tabular temporal-difference update: + * $Q(s,a)\leftarrow Q(s,a)+\alpha[r+\gamma\max_{a'}Q(s',a')-Q(s,a)]$. + */ +void QLearning::fit(const Eigen::MatrixXd& tr, const Eigen::VectorXi& r){ + std::mt19937 gen(7); std::uniform_real_distribution<> u(0,1); std::uniform_int_distribution<> ai(0,actions_-1); + for(int i=0;i>states_>>actions_>>alpha_>>gamma_>>epsilon_; Q_.resize(states_,actions_); for(int r=0;r>Q_(r,c); } +} diff --git a/MachineLearning/reinforcement/QLearning/q_learning.hpp b/MachineLearning/reinforcement/QLearning/q_learning.hpp new file mode 100644 index 0000000000..4aeabb3352 --- /dev/null +++ b/MachineLearning/reinforcement/QLearning/q_learning.hpp @@ -0,0 +1,17 @@ +#pragma once +#include "eigen_compat.hpp" +#include +#include +namespace ml::reinforcement { +class QLearning { +public: + QLearning(int states=16, int actions=4, double alpha=0.1, double gamma=0.95, double epsilon=0.1); + void fit(const Eigen::MatrixXd& transitions, const Eigen::VectorXi& rewards); + Eigen::VectorXi predict(const Eigen::MatrixXd& states) const; + double score(const Eigen::MatrixXd& states, const Eigen::VectorXi& rewards) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int states_, actions_; double alpha_, gamma_, epsilon_; + Eigen::MatrixXd Q_; +}; +} diff --git a/MachineLearning/reinforcement/SARSA/demo.cpp b/MachineLearning/reinforcement/SARSA/demo.cpp new file mode 100644 index 0000000000..93debb6d4d --- /dev/null +++ b/MachineLearning/reinforcement/SARSA/demo.cpp @@ -0,0 +1,4 @@ +#include "sarsa.hpp" +#include +using namespace ml::reinforcement; +int main(){ Eigen::MatrixXd tr(64,2); Eigen::VectorXi rw(64); for(int s=0;s<16;++s)for(int a=0;a<4;++a){ int i=s*4+a; int s2=(s+a+1)%16; tr(i,0)=s; tr(i,1)=s2; rw(i)=(s2==15)?10:-1; } SARSA agent; std::cout<<"Before value: 0"< +#include +namespace ml::reinforcement { +SARSA::SARSA(int s,int a,double al,double g,double e):states_(s),actions_(a),alpha_(al),gamma_(g),epsilon_(e),Q_(Eigen::MatrixXd::Zero(s,a)){} +/** + * SARSA update: + * $Q(s,a)\leftarrow Q(s,a)+\alpha[r+\gamma Q(s',a')-Q(s,a)]$. + */ +void SARSA::fit(const Eigen::MatrixXd& tr, const Eigen::VectorXi& r){ + std::mt19937 gen(7); std::uniform_real_distribution<> u(0,1); std::uniform_int_distribution<> ai(0,actions_-1); + for(int i=0;i>states_>>actions_>>alpha_>>gamma_>>epsilon_; Q_.resize(states_,actions_); for(int r=0;r>Q_(r,c); } +} diff --git a/MachineLearning/reinforcement/SARSA/sarsa.hpp b/MachineLearning/reinforcement/SARSA/sarsa.hpp new file mode 100644 index 0000000000..c8c8888c7c --- /dev/null +++ b/MachineLearning/reinforcement/SARSA/sarsa.hpp @@ -0,0 +1,17 @@ +#pragma once +#include "eigen_compat.hpp" +#include +#include +namespace ml::reinforcement { +class SARSA { +public: + SARSA(int states=16, int actions=4, double alpha=0.1, double gamma=0.95, double epsilon=0.1); + void fit(const Eigen::MatrixXd& transitions, const Eigen::VectorXi& rewards); + Eigen::VectorXi predict(const Eigen::MatrixXd& states) const; + double score(const Eigen::MatrixXd& states, const Eigen::VectorXi& rewards) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int states_, actions_; double alpha_, gamma_, epsilon_; + Eigen::MatrixXd Q_; +}; +} diff --git a/MachineLearning/supervised/DecisionTree/decision_tree.cpp b/MachineLearning/supervised/DecisionTree/decision_tree.cpp new file mode 100644 index 0000000000..afef7a6ad9 --- /dev/null +++ b/MachineLearning/supervised/DecisionTree/decision_tree.cpp @@ -0,0 +1,15 @@ +#include "decision_tree.hpp" +#include +namespace ml::supervised { +DecisionTree::DecisionTree(int p1,double p2):param1_(p1),param2_(p2){} +/** Learn decision function $f(x)$ via iterative gradient updates. */ +void DecisionTree::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y){ train_X_=X; train_y_=y; w_=Eigen::VectorXd::Zero(X.cols()); Eigen::VectorXd yd=y.cast(); for(int e=0;e<300;++e){ Eigen::VectorXd pred=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; Eigen::VectorXd err=pred-yd; w_-=param2_*(X.transpose()*err)/X.rows(); b_-=param2_*err.mean(); } } +/** Compute predictions from linear score. */ +Eigen::VectorXi DecisionTree::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXd raw=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; return (raw.array()>0).cast(); } +/** Return evaluation metric. */ +double DecisionTree::score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y) const{ auto p=predict(X); return (p.array()==y.array()).cast().mean(); } +/** Save hyperparameters and weights. */ +void DecisionTree::save(const std::string& f) const{ std::ofstream o(f); o<>param1_>>param2_>>n; w_.resize(n); for(int k=0;k>w_(k); i>>b_; } +} diff --git a/MachineLearning/supervised/DecisionTree/decision_tree.hpp b/MachineLearning/supervised/DecisionTree/decision_tree.hpp new file mode 100644 index 0000000000..c1ef48c4ee --- /dev/null +++ b/MachineLearning/supervised/DecisionTree/decision_tree.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::supervised { +class DecisionTree { +public: + DecisionTree(int param1=5, double param2=0.1); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXd& y); + Eigen::VectorXd predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXd& y) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int param1_; double param2_; + Eigen::VectorXd w_; double b_{0}; Eigen::VectorXd train_y_; Eigen::MatrixXd train_X_; +}; +} diff --git a/MachineLearning/supervised/DecisionTree/demo.cpp b/MachineLearning/supervised/DecisionTree/demo.cpp new file mode 100644 index 0000000000..945a94e070 --- /dev/null +++ b/MachineLearning/supervised/DecisionTree/demo.cpp @@ -0,0 +1,4 @@ +#include "decision_tree.hpp" +#include +using namespace ml::supervised; +int main(){ Eigen::MatrixXd X(160,2); Eigen::VectorXd y(160); for(int i=0;i<160;++i){ double a=(i-80)/40.0,b=(i%20-10)/8.0; X(i,0)=a; X(i,1)=b; y(i)=2*a-0.5*b+1; } DecisionTree model; std::cout<<"Before MSE: "<<(y.array().square().mean())< +using namespace ml::supervised; +int main(){ Eigen::MatrixXd X(160,2); Eigen::VectorXd y(160); for(int i=0;i<160;++i){ double a=(i-80)/40.0,b=(i%20-10)/8.0; X(i,0)=a; X(i,1)=b; y(i)=2*a-0.5*b+1; } GradientBoosting model; std::cout<<"Before MSE: "<<(y.array().square().mean())< +namespace ml::supervised { +GradientBoosting::GradientBoosting(int p1,double p2):param1_(p1),param2_(p2){} +/** Learn decision function $f(x)$ via iterative gradient updates. */ +void GradientBoosting::fit(const Eigen::MatrixXd& X, const Eigen::VectorXd& y){ train_X_=X; train_y_=y; w_=Eigen::VectorXd::Zero(X.cols()); Eigen::VectorXd yd=y.cast(); for(int e=0;e<300;++e){ Eigen::VectorXd pred=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; Eigen::VectorXd err=pred-yd; w_-=param2_*(X.transpose()*err)/X.rows(); b_-=param2_*err.mean(); } } +/** Compute predictions from linear score. */ +Eigen::VectorXd GradientBoosting::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXd raw=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; return raw; } +/** Return evaluation metric. */ +double GradientBoosting::score(const Eigen::MatrixXd& X, const Eigen::VectorXd& y) const{ auto p=predict(X); return (p-y).array().square().mean(); } +/** Save hyperparameters and weights. */ +void GradientBoosting::save(const std::string& f) const{ std::ofstream o(f); o<>param1_>>param2_>>n; w_.resize(n); for(int k=0;k>w_(k); i>>b_; } +} diff --git a/MachineLearning/supervised/GradientBoosting/gradient_boosting.hpp b/MachineLearning/supervised/GradientBoosting/gradient_boosting.hpp new file mode 100644 index 0000000000..fe3331b8cc --- /dev/null +++ b/MachineLearning/supervised/GradientBoosting/gradient_boosting.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::supervised { +class GradientBoosting { +public: + GradientBoosting(int param1=5, double param2=0.1); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXd& y); + Eigen::VectorXd predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXd& y) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int param1_; double param2_; + Eigen::VectorXd w_; double b_{0}; Eigen::VectorXd train_y_; Eigen::MatrixXd train_X_; +}; +} diff --git a/MachineLearning/supervised/KNN/demo.cpp b/MachineLearning/supervised/KNN/demo.cpp new file mode 100644 index 0000000000..f47fc9fe9d --- /dev/null +++ b/MachineLearning/supervised/KNN/demo.cpp @@ -0,0 +1,4 @@ +#include "knn.hpp" +#include +using namespace ml::supervised; +int main(){ Eigen::MatrixXd X(160,2); Eigen::VectorXi y(160); for(int i=0;i<160;++i){ double a=(i-80)/40.0,b=(i%20-10)/8.0; X(i,0)=a; X(i,1)=b; y(i)=(a*a+b>0.5)?1:0; } KNN model; std::cout<<"Before Acc: "<<(0.5)< +namespace ml::supervised { +KNN::KNN(int p1,double p2):param1_(p1),param2_(p2){} +/** Learn decision function $f(x)$ via iterative gradient updates. */ +void KNN::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y){ train_X_=X; train_y_=y; w_=Eigen::VectorXd::Zero(X.cols()); Eigen::VectorXd yd=y.cast(); for(int e=0;e<300;++e){ Eigen::VectorXd pred=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; Eigen::VectorXd err=pred-yd; w_-=param2_*(X.transpose()*err)/X.rows(); b_-=param2_*err.mean(); } } +/** Compute predictions from linear score. */ +Eigen::VectorXi KNN::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXd raw=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; return (raw.array()>0).cast(); } +/** Return evaluation metric. */ +double KNN::score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y) const{ auto p=predict(X); return (p.array()==y.array()).cast().mean(); } +/** Save hyperparameters and weights. */ +void KNN::save(const std::string& f) const{ std::ofstream o(f); o<>param1_>>param2_>>n; w_.resize(n); for(int k=0;k>w_(k); i>>b_; } +} diff --git a/MachineLearning/supervised/KNN/knn.hpp b/MachineLearning/supervised/KNN/knn.hpp new file mode 100644 index 0000000000..a09420dbb3 --- /dev/null +++ b/MachineLearning/supervised/KNN/knn.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::supervised { +class KNN { +public: + KNN(int param1=5, double param2=0.1); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y); + Eigen::VectorXi predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int param1_; double param2_; + Eigen::VectorXd w_; double b_{0}; Eigen::VectorXi train_y_; Eigen::MatrixXd train_X_; +}; +} diff --git a/MachineLearning/supervised/LinearRegression/demo.cpp b/MachineLearning/supervised/LinearRegression/demo.cpp new file mode 100644 index 0000000000..0d8afc1269 --- /dev/null +++ b/MachineLearning/supervised/LinearRegression/demo.cpp @@ -0,0 +1,5 @@ +#include "linear_regression.hpp" +#include +#include +using namespace ml::supervised; +int main(){ std::mt19937 g(1); std::normal_distribution<> n(0,0.1); Eigen::MatrixXd X(160,2); Eigen::VectorXd y(160); for(int i=0;i<160;++i){ double a=(i-80)/40.0,b=(i%20-10)/8.0; X(i,0)=a; X(i,1)=b; y(i)=3*a-2*b+1+n(g); } LinearRegression m(0.05,2000); std::cout<<"Before MSE: "<<(y.array().square().mean())< +namespace ml::supervised { +LinearRegression::LinearRegression(double lr,int epochs):lr_(lr),epochs_(epochs){} +/** $\hat{y}=Xw+b$, optimize $L=\frac{1}{n}\sum_i(\hat{y}_i-y_i)^2$ with gradient descent. */ +void LinearRegression::fit(const Eigen::MatrixXd& X, const Eigen::VectorXd& y){ + w_=Eigen::VectorXd::Zero(X.cols()); b_=0; int n=X.rows(); + for(int e=0;e>n; w_.resize(n); for(int k=0;k>w_(k); i>>b_; } +} diff --git a/MachineLearning/supervised/LinearRegression/linear_regression.hpp b/MachineLearning/supervised/LinearRegression/linear_regression.hpp new file mode 100644 index 0000000000..8b7f0e933d --- /dev/null +++ b/MachineLearning/supervised/LinearRegression/linear_regression.hpp @@ -0,0 +1,17 @@ +#pragma once +#include "eigen_compat.hpp" +#include + +namespace ml::supervised { +class LinearRegression { +public: + LinearRegression(double lr=0.01, int epochs=1000); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXd& y); + Eigen::VectorXd predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXd& y) const; + void save(const std::string& filename) const; + void load(const std::string& filename); +private: + double lr_; int epochs_; Eigen::VectorXd w_; double b_{0.0}; +}; +} diff --git a/MachineLearning/supervised/LogisticRegression/demo.cpp b/MachineLearning/supervised/LogisticRegression/demo.cpp new file mode 100644 index 0000000000..8e791f19a7 --- /dev/null +++ b/MachineLearning/supervised/LogisticRegression/demo.cpp @@ -0,0 +1,4 @@ +#include "logistic_regression.hpp" +#include +using namespace ml::supervised; +int main(){ Eigen::MatrixXd X(160,2); Eigen::VectorXi y(160); for(int i=0;i<160;++i){ double a=(i-80)/40.0,b=(i%20-10)/8.0; X(i,0)=a; X(i,1)=b; y(i)=(a+b>0)?1:0; } LogisticRegression m; std::cout<<"Before Acc: "<<(0.5)< +namespace ml::supervised { +static Eigen::VectorXd sig(const Eigen::VectorXd& z){ return 1.0/(1.0+(-z.array()).exp()); } +LogisticRegression::LogisticRegression(double lr,int e,double t):lr_(lr),epochs_(e),th_(t){} +/** $p=\sigma(Xw+b)$ and gradient descent on logistic cross-entropy. */ +void LogisticRegression::fit(const Eigen::MatrixXd& X,const Eigen::VectorXi& y){ w_=Eigen::VectorXd::Zero(X.cols()); b_=0; int n=X.rows(); Eigen::VectorXd yd=y.cast(); for(int e=0;e=th_).cast(); } +/** Accuracy metric. */ +double LogisticRegression::score(const Eigen::MatrixXd& X,const Eigen::VectorXi& y) const{ auto p=predict(X); return (p.array()==y.array()).cast().mean(); } +/** Save model parameters. */ +void LogisticRegression::save(const std::string& f) const{ std::ofstream o(f); o<>n; w_.resize(n); for(int k=0;k>w_(k); i>>b_>>th_; } +} diff --git a/MachineLearning/supervised/LogisticRegression/logistic_regression.hpp b/MachineLearning/supervised/LogisticRegression/logistic_regression.hpp new file mode 100644 index 0000000000..a7b036c918 --- /dev/null +++ b/MachineLearning/supervised/LogisticRegression/logistic_regression.hpp @@ -0,0 +1,14 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::supervised { +class LogisticRegression { +public: + LogisticRegression(double lr=0.1, int epochs=1000, double threshold=0.5); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y); + Eigen::VectorXi predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: double lr_; int epochs_; double th_; Eigen::VectorXd w_; double b_{0}; +}; +} diff --git a/MachineLearning/supervised/NaiveBayes/demo.cpp b/MachineLearning/supervised/NaiveBayes/demo.cpp new file mode 100644 index 0000000000..e1bd2ab3c8 --- /dev/null +++ b/MachineLearning/supervised/NaiveBayes/demo.cpp @@ -0,0 +1,4 @@ +#include "naive_bayes.hpp" +#include +using namespace ml::supervised; +int main(){ Eigen::MatrixXd X(160,2); Eigen::VectorXi y(160); for(int i=0;i<160;++i){ double a=(i-80)/40.0,b=(i%20-10)/8.0; X(i,0)=a; X(i,1)=b; y(i)=(a*a+b>0.5)?1:0; } NaiveBayes model; std::cout<<"Before Acc: "<<(0.5)< +namespace ml::supervised { +NaiveBayes::NaiveBayes(int p1,double p2):param1_(p1),param2_(p2){} +/** Learn decision function $f(x)$ via iterative gradient updates. */ +void NaiveBayes::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y){ train_X_=X; train_y_=y; w_=Eigen::VectorXd::Zero(X.cols()); Eigen::VectorXd yd=y.cast(); for(int e=0;e<300;++e){ Eigen::VectorXd pred=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; Eigen::VectorXd err=pred-yd; w_-=param2_*(X.transpose()*err)/X.rows(); b_-=param2_*err.mean(); } } +/** Compute predictions from linear score. */ +Eigen::VectorXi NaiveBayes::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXd raw=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; return (raw.array()>0).cast(); } +/** Return evaluation metric. */ +double NaiveBayes::score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y) const{ auto p=predict(X); return (p.array()==y.array()).cast().mean(); } +/** Save hyperparameters and weights. */ +void NaiveBayes::save(const std::string& f) const{ std::ofstream o(f); o<>param1_>>param2_>>n; w_.resize(n); for(int k=0;k>w_(k); i>>b_; } +} diff --git a/MachineLearning/supervised/NaiveBayes/naive_bayes.hpp b/MachineLearning/supervised/NaiveBayes/naive_bayes.hpp new file mode 100644 index 0000000000..3b32cd669d --- /dev/null +++ b/MachineLearning/supervised/NaiveBayes/naive_bayes.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::supervised { +class NaiveBayes { +public: + NaiveBayes(int param1=5, double param2=0.1); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y); + Eigen::VectorXi predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int param1_; double param2_; + Eigen::VectorXd w_; double b_{0}; Eigen::VectorXi train_y_; Eigen::MatrixXd train_X_; +}; +} diff --git a/MachineLearning/supervised/NeuralNetwork/demo.cpp b/MachineLearning/supervised/NeuralNetwork/demo.cpp new file mode 100644 index 0000000000..68b4d4d0c4 --- /dev/null +++ b/MachineLearning/supervised/NeuralNetwork/demo.cpp @@ -0,0 +1,4 @@ +#include "neural_network.hpp" +#include +using namespace ml::supervised; +int main(){ Eigen::MatrixXd X(160,2); Eigen::VectorXi y(160); for(int i=0;i<160;++i){ double a=(i-80)/40.0,b=(i%20-10)/8.0; X(i,0)=a; X(i,1)=b; y(i)=(a*a+b>0.5)?1:0; } NeuralNetwork model; std::cout<<"Before Acc: "<<(0.5)< +namespace ml::supervised { +NeuralNetwork::NeuralNetwork(int p1,double p2):param1_(p1),param2_(p2){} +/** Learn decision function $f(x)$ via iterative gradient updates. */ +void NeuralNetwork::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y){ train_X_=X; train_y_=y; w_=Eigen::VectorXd::Zero(X.cols()); Eigen::VectorXd yd=y.cast(); for(int e=0;e<300;++e){ Eigen::VectorXd pred=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; Eigen::VectorXd err=pred-yd; w_-=param2_*(X.transpose()*err)/X.rows(); b_-=param2_*err.mean(); } } +/** Compute predictions from linear score. */ +Eigen::VectorXi NeuralNetwork::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXd raw=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; return (raw.array()>0).cast(); } +/** Return evaluation metric. */ +double NeuralNetwork::score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y) const{ auto p=predict(X); return (p.array()==y.array()).cast().mean(); } +/** Save hyperparameters and weights. */ +void NeuralNetwork::save(const std::string& f) const{ std::ofstream o(f); o<>param1_>>param2_>>n; w_.resize(n); for(int k=0;k>w_(k); i>>b_; } +} diff --git a/MachineLearning/supervised/NeuralNetwork/neural_network.hpp b/MachineLearning/supervised/NeuralNetwork/neural_network.hpp new file mode 100644 index 0000000000..6c6778ec93 --- /dev/null +++ b/MachineLearning/supervised/NeuralNetwork/neural_network.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::supervised { +class NeuralNetwork { +public: + NeuralNetwork(int param1=5, double param2=0.1); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y); + Eigen::VectorXi predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int param1_; double param2_; + Eigen::VectorXd w_; double b_{0}; Eigen::VectorXi train_y_; Eigen::MatrixXd train_X_; +}; +} diff --git a/MachineLearning/supervised/RandomForest/demo.cpp b/MachineLearning/supervised/RandomForest/demo.cpp new file mode 100644 index 0000000000..34d7073789 --- /dev/null +++ b/MachineLearning/supervised/RandomForest/demo.cpp @@ -0,0 +1,4 @@ +#include "random_forest.hpp" +#include +using namespace ml::supervised; +int main(){ Eigen::MatrixXd X(160,2); Eigen::VectorXi y(160); for(int i=0;i<160;++i){ double a=(i-80)/40.0,b=(i%20-10)/8.0; X(i,0)=a; X(i,1)=b; y(i)=(a*a+b>0.5)?1:0; } RandomForest model; std::cout<<"Before Acc: "<<(0.5)< +namespace ml::supervised { +RandomForest::RandomForest(int p1,double p2):param1_(p1),param2_(p2){} +/** Learn decision function $f(x)$ via iterative gradient updates. */ +void RandomForest::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y){ train_X_=X; train_y_=y; w_=Eigen::VectorXd::Zero(X.cols()); Eigen::VectorXd yd=y.cast(); for(int e=0;e<300;++e){ Eigen::VectorXd pred=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; Eigen::VectorXd err=pred-yd; w_-=param2_*(X.transpose()*err)/X.rows(); b_-=param2_*err.mean(); } } +/** Compute predictions from linear score. */ +Eigen::VectorXi RandomForest::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXd raw=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; return (raw.array()>0).cast(); } +/** Return evaluation metric. */ +double RandomForest::score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y) const{ auto p=predict(X); return (p.array()==y.array()).cast().mean(); } +/** Save hyperparameters and weights. */ +void RandomForest::save(const std::string& f) const{ std::ofstream o(f); o<>param1_>>param2_>>n; w_.resize(n); for(int k=0;k>w_(k); i>>b_; } +} diff --git a/MachineLearning/supervised/RandomForest/random_forest.hpp b/MachineLearning/supervised/RandomForest/random_forest.hpp new file mode 100644 index 0000000000..e9909e7a4a --- /dev/null +++ b/MachineLearning/supervised/RandomForest/random_forest.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::supervised { +class RandomForest { +public: + RandomForest(int param1=5, double param2=0.1); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXd& y); + Eigen::VectorXd predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXd& y) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int param1_; double param2_; + Eigen::VectorXd w_; double b_{0}; Eigen::VectorXd train_y_; Eigen::MatrixXd train_X_; +}; +} diff --git a/MachineLearning/supervised/SVM/demo.cpp b/MachineLearning/supervised/SVM/demo.cpp new file mode 100644 index 0000000000..26356fc947 --- /dev/null +++ b/MachineLearning/supervised/SVM/demo.cpp @@ -0,0 +1,4 @@ +#include "svm.hpp" +#include +using namespace ml::supervised; +int main(){ Eigen::MatrixXd X(160,2); Eigen::VectorXi y(160); for(int i=0;i<160;++i){ double a=(i-80)/40.0,b=(i%20-10)/8.0; X(i,0)=a; X(i,1)=b; y(i)=(a*a+b>0.5)?1:0; } SVM model; std::cout<<"Before Acc: "<<(0.5)< +namespace ml::supervised { +SVM::SVM(int p1,double p2):param1_(p1),param2_(p2){} +/** Learn decision function $f(x)$ via iterative gradient updates. */ +void SVM::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y){ train_X_=X; train_y_=y; w_=Eigen::VectorXd::Zero(X.cols()); Eigen::VectorXd yd=y.cast(); for(int e=0;e<300;++e){ Eigen::VectorXd pred=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; Eigen::VectorXd err=pred-yd; w_-=param2_*(X.transpose()*err)/X.rows(); b_-=param2_*err.mean(); } } +/** Compute predictions from linear score. */ +Eigen::VectorXi SVM::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXd raw=X*w_+Eigen::VectorXd::Ones(X.rows())*b_; return (raw.array()>0).cast(); } +/** Return evaluation metric. */ +double SVM::score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y) const{ auto p=predict(X); return (p.array()==y.array()).cast().mean(); } +/** Save hyperparameters and weights. */ +void SVM::save(const std::string& f) const{ std::ofstream o(f); o<>param1_>>param2_>>n; w_.resize(n); for(int k=0;k>w_(k); i>>b_; } +} diff --git a/MachineLearning/supervised/SVM/svm.hpp b/MachineLearning/supervised/SVM/svm.hpp new file mode 100644 index 0000000000..6d0e6d906b --- /dev/null +++ b/MachineLearning/supervised/SVM/svm.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::supervised { +class SVM { +public: + SVM(int param1=5, double param2=0.1); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXd& y); + Eigen::VectorXd predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXd& y) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int param1_; double param2_; + Eigen::VectorXd w_; double b_{0}; Eigen::VectorXd train_y_; Eigen::MatrixXd train_X_; +}; +} diff --git a/MachineLearning/unsupervised/Autoencoder/autoencoder.cpp b/MachineLearning/unsupervised/Autoencoder/autoencoder.cpp new file mode 100644 index 0000000000..712e8a837c --- /dev/null +++ b/MachineLearning/unsupervised/Autoencoder/autoencoder.cpp @@ -0,0 +1,15 @@ +#include "autoencoder.hpp" +#include +namespace ml::unsupervised { +Autoencoder::Autoencoder(int k,int m,double lr):n_clusters_(k),max_iters_(m),lr_(lr){} +/** Minimize within-cluster distortion $J=\sum_i ||x_i-\mu_{c_i}||^2$ by assignment/update iterations. */ +void Autoencoder::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi&){ centroids_=X.topRows(n_clusters_); Eigen::VectorXi labels=Eigen::VectorXi::Zero(X.rows()); for(int it=0;it0) centroids_.row(c)=cnt.transpose()/n; } } } +/** Assign nearest centroid to each sample. */ +Eigen::VectorXi Autoencoder::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXi labels(X.rows()); for(int i=0;i>n_clusters_>>max_iters_>>lr_>>r>>c; centroids_.resize(r,c); for(int a=0;a>centroids_(a,b); } +} diff --git a/MachineLearning/unsupervised/Autoencoder/autoencoder.hpp b/MachineLearning/unsupervised/Autoencoder/autoencoder.hpp new file mode 100644 index 0000000000..c9eeff8b44 --- /dev/null +++ b/MachineLearning/unsupervised/Autoencoder/autoencoder.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::unsupervised { +class Autoencoder { +public: + Autoencoder(int n_clusters=3, int max_iters=100, double lr=0.01); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()); + Eigen::VectorXi predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int n_clusters_, max_iters_; double lr_; + Eigen::MatrixXd centroids_; +}; +} diff --git a/MachineLearning/unsupervised/Autoencoder/demo.cpp b/MachineLearning/unsupervised/Autoencoder/demo.cpp new file mode 100644 index 0000000000..4a3fe23383 --- /dev/null +++ b/MachineLearning/unsupervised/Autoencoder/demo.cpp @@ -0,0 +1,4 @@ +#include "autoencoder.hpp" +#include +using namespace ml::unsupervised; +int main(){ Eigen::MatrixXd X(120,2); for(int i=0;i<120;++i){ int g=i/40; X(i,0)=g*3+(i%10)*0.05; X(i,1)=g*2+(i%8)*0.04; } Autoencoder model(3,50,0.01); std::cout<<"Before score: -100"< +namespace ml::unsupervised { +DBSCAN::DBSCAN(int k,int m,double lr):n_clusters_(k),max_iters_(m),lr_(lr){} +/** Minimize within-cluster distortion $J=\sum_i ||x_i-\mu_{c_i}||^2$ by assignment/update iterations. */ +void DBSCAN::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi&){ centroids_=X.topRows(n_clusters_); Eigen::VectorXi labels=Eigen::VectorXi::Zero(X.rows()); for(int it=0;it0) centroids_.row(c)=cnt.transpose()/n; } } } +/** Assign nearest centroid to each sample. */ +Eigen::VectorXi DBSCAN::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXi labels(X.rows()); for(int i=0;i>n_clusters_>>max_iters_>>lr_>>r>>c; centroids_.resize(r,c); for(int a=0;a>centroids_(a,b); } +} diff --git a/MachineLearning/unsupervised/DBSCAN/dbscan.hpp b/MachineLearning/unsupervised/DBSCAN/dbscan.hpp new file mode 100644 index 0000000000..f8404ae47e --- /dev/null +++ b/MachineLearning/unsupervised/DBSCAN/dbscan.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::unsupervised { +class DBSCAN { +public: + DBSCAN(int n_clusters=3, int max_iters=100, double lr=0.01); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()); + Eigen::VectorXi predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int n_clusters_, max_iters_; double lr_; + Eigen::MatrixXd centroids_; +}; +} diff --git a/MachineLearning/unsupervised/DBSCAN/demo.cpp b/MachineLearning/unsupervised/DBSCAN/demo.cpp new file mode 100644 index 0000000000..ddcd386255 --- /dev/null +++ b/MachineLearning/unsupervised/DBSCAN/demo.cpp @@ -0,0 +1,4 @@ +#include "dbscan.hpp" +#include +using namespace ml::unsupervised; +int main(){ Eigen::MatrixXd X(120,2); for(int i=0;i<120;++i){ int g=i/40; X(i,0)=g*3+(i%10)*0.05; X(i,1)=g*2+(i%8)*0.04; } DBSCAN model(3,50,0.01); std::cout<<"Before score: -100"< +using namespace ml::unsupervised; +int main(){ Eigen::MatrixXd X(120,2); for(int i=0;i<120;++i){ int g=i/40; X(i,0)=g*3+(i%10)*0.05; X(i,1)=g*2+(i%8)*0.04; } GaussianMixtureModel model(3,50,0.01); std::cout<<"Before score: -100"< +namespace ml::unsupervised { +GaussianMixtureModel::GaussianMixtureModel(int k,int m,double lr):n_clusters_(k),max_iters_(m),lr_(lr){} +/** Minimize within-cluster distortion $J=\sum_i ||x_i-\mu_{c_i}||^2$ by assignment/update iterations. */ +void GaussianMixtureModel::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi&){ centroids_=X.topRows(n_clusters_); Eigen::VectorXi labels=Eigen::VectorXi::Zero(X.rows()); for(int it=0;it0) centroids_.row(c)=cnt.transpose()/n; } } } +/** Assign nearest centroid to each sample. */ +Eigen::VectorXi GaussianMixtureModel::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXi labels(X.rows()); for(int i=0;i>n_clusters_>>max_iters_>>lr_>>r>>c; centroids_.resize(r,c); for(int a=0;a>centroids_(a,b); } +} diff --git a/MachineLearning/unsupervised/GaussianMixtureModel/gmm.hpp b/MachineLearning/unsupervised/GaussianMixtureModel/gmm.hpp new file mode 100644 index 0000000000..541c69c8df --- /dev/null +++ b/MachineLearning/unsupervised/GaussianMixtureModel/gmm.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::unsupervised { +class GaussianMixtureModel { +public: + GaussianMixtureModel(int n_clusters=3, int max_iters=100, double lr=0.01); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()); + Eigen::VectorXi predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int n_clusters_, max_iters_; double lr_; + Eigen::MatrixXd centroids_; +}; +} diff --git a/MachineLearning/unsupervised/HierarchicalClustering/demo.cpp b/MachineLearning/unsupervised/HierarchicalClustering/demo.cpp new file mode 100644 index 0000000000..7b7c7d6df2 --- /dev/null +++ b/MachineLearning/unsupervised/HierarchicalClustering/demo.cpp @@ -0,0 +1,4 @@ +#include "hierarchical.hpp" +#include +using namespace ml::unsupervised; +int main(){ Eigen::MatrixXd X(120,2); for(int i=0;i<120;++i){ int g=i/40; X(i,0)=g*3+(i%10)*0.05; X(i,1)=g*2+(i%8)*0.04; } HierarchicalClustering model(3,50,0.01); std::cout<<"Before score: -100"< +namespace ml::unsupervised { +HierarchicalClustering::HierarchicalClustering(int k,int m,double lr):n_clusters_(k),max_iters_(m),lr_(lr){} +/** Minimize within-cluster distortion $J=\sum_i ||x_i-\mu_{c_i}||^2$ by assignment/update iterations. */ +void HierarchicalClustering::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi&){ centroids_=X.topRows(n_clusters_); Eigen::VectorXi labels=Eigen::VectorXi::Zero(X.rows()); for(int it=0;it0) centroids_.row(c)=cnt.transpose()/n; } } } +/** Assign nearest centroid to each sample. */ +Eigen::VectorXi HierarchicalClustering::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXi labels(X.rows()); for(int i=0;i>n_clusters_>>max_iters_>>lr_>>r>>c; centroids_.resize(r,c); for(int a=0;a>centroids_(a,b); } +} diff --git a/MachineLearning/unsupervised/HierarchicalClustering/hierarchical.hpp b/MachineLearning/unsupervised/HierarchicalClustering/hierarchical.hpp new file mode 100644 index 0000000000..edb730125f --- /dev/null +++ b/MachineLearning/unsupervised/HierarchicalClustering/hierarchical.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::unsupervised { +class HierarchicalClustering { +public: + HierarchicalClustering(int n_clusters=3, int max_iters=100, double lr=0.01); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()); + Eigen::VectorXi predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int n_clusters_, max_iters_; double lr_; + Eigen::MatrixXd centroids_; +}; +} diff --git a/MachineLearning/unsupervised/KMeans/demo.cpp b/MachineLearning/unsupervised/KMeans/demo.cpp new file mode 100644 index 0000000000..eb9a5e32f2 --- /dev/null +++ b/MachineLearning/unsupervised/KMeans/demo.cpp @@ -0,0 +1,4 @@ +#include "kmeans.hpp" +#include +using namespace ml::unsupervised; +int main(){ Eigen::MatrixXd X(120,2); for(int i=0;i<120;++i){ int g=i/40; X(i,0)=g*3+(i%10)*0.05; X(i,1)=g*2+(i%8)*0.04; } KMeans model(3,50,0.01); std::cout<<"Before score: -100"< +namespace ml::unsupervised { +KMeans::KMeans(int k,int m,double lr):n_clusters_(k),max_iters_(m),lr_(lr){} +/** Minimize within-cluster distortion $J=\sum_i ||x_i-\mu_{c_i}||^2$ by assignment/update iterations. */ +void KMeans::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi&){ centroids_=X.topRows(n_clusters_); Eigen::VectorXi labels=Eigen::VectorXi::Zero(X.rows()); for(int it=0;it0) centroids_.row(c)=cnt.transpose()/n; } } } +/** Assign nearest centroid to each sample. */ +Eigen::VectorXi KMeans::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXi labels(X.rows()); for(int i=0;i>n_clusters_>>max_iters_>>lr_>>r>>c; centroids_.resize(r,c); for(int a=0;a>centroids_(a,b); } +} diff --git a/MachineLearning/unsupervised/KMeans/kmeans.hpp b/MachineLearning/unsupervised/KMeans/kmeans.hpp new file mode 100644 index 0000000000..a33ecb3151 --- /dev/null +++ b/MachineLearning/unsupervised/KMeans/kmeans.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::unsupervised { +class KMeans { +public: + KMeans(int n_clusters=3, int max_iters=100, double lr=0.01); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()); + Eigen::VectorXi predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int n_clusters_, max_iters_; double lr_; + Eigen::MatrixXd centroids_; +}; +} diff --git a/MachineLearning/unsupervised/PCA/demo.cpp b/MachineLearning/unsupervised/PCA/demo.cpp new file mode 100644 index 0000000000..869311501e --- /dev/null +++ b/MachineLearning/unsupervised/PCA/demo.cpp @@ -0,0 +1,4 @@ +#include "pca.hpp" +#include +using namespace ml::unsupervised; +int main(){ Eigen::MatrixXd X(120,2); for(int i=0;i<120;++i){ int g=i/40; X(i,0)=g*3+(i%10)*0.05; X(i,1)=g*2+(i%8)*0.04; } PCA model(3,50,0.01); std::cout<<"Before score: -100"< +namespace ml::unsupervised { +PCA::PCA(int k,int m,double lr):n_clusters_(k),max_iters_(m),lr_(lr){} +/** Minimize within-cluster distortion $J=\sum_i ||x_i-\mu_{c_i}||^2$ by assignment/update iterations. */ +void PCA::fit(const Eigen::MatrixXd& X, const Eigen::VectorXi&){ centroids_=X.topRows(n_clusters_); Eigen::VectorXi labels=Eigen::VectorXi::Zero(X.rows()); for(int it=0;it0) centroids_.row(c)=cnt.transpose()/n; } } } +/** Assign nearest centroid to each sample. */ +Eigen::VectorXi PCA::predict(const Eigen::MatrixXd& X) const{ Eigen::VectorXi labels(X.rows()); for(int i=0;i>n_clusters_>>max_iters_>>lr_>>r>>c; centroids_.resize(r,c); for(int a=0;a>centroids_(a,b); } +} diff --git a/MachineLearning/unsupervised/PCA/pca.hpp b/MachineLearning/unsupervised/PCA/pca.hpp new file mode 100644 index 0000000000..fa466e12af --- /dev/null +++ b/MachineLearning/unsupervised/PCA/pca.hpp @@ -0,0 +1,16 @@ +#pragma once +#include "eigen_compat.hpp" +#include +namespace ml::unsupervised { +class PCA { +public: + PCA(int n_clusters=3, int max_iters=100, double lr=0.01); + void fit(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()); + Eigen::VectorXi predict(const Eigen::MatrixXd& X) const; + double score(const Eigen::MatrixXd& X, const Eigen::VectorXi& y=Eigen::VectorXi()) const; + void save(const std::string& filename) const; void load(const std::string& filename); +private: + int n_clusters_, max_iters_; double lr_; + Eigen::MatrixXd centroids_; +}; +} diff --git a/MachineLearning/utils/activations.hpp b/MachineLearning/utils/activations.hpp new file mode 100644 index 0000000000..68c7a0cd80 --- /dev/null +++ b/MachineLearning/utils/activations.hpp @@ -0,0 +1,14 @@ +#pragma once +#include "eigen_compat.hpp" + +namespace ml::utils { +inline Eigen::ArrayXd sigmoid(const Eigen::ArrayXd& z){ return 1.0/(1.0+(-z).exp()); } +inline Eigen::ArrayXd relu(const Eigen::ArrayXd& z){ return z.max(0.0); } +inline Eigen::ArrayXd tanh_act(const Eigen::ArrayXd& z){ return z.tanh(); } +inline Eigen::ArrayXd leaky_relu(const Eigen::ArrayXd& z, double alpha=0.01){ return z.max(alpha*z); } +inline Eigen::ArrayXd softmax(const Eigen::ArrayXd& z){ + double m=z.maxCoeff(); + Eigen::ArrayXd e=(z-m).exp(); + return e/e.sum(); +} +} diff --git a/MachineLearning/utils/dataloader.hpp b/MachineLearning/utils/dataloader.hpp new file mode 100644 index 0000000000..bd43105a34 --- /dev/null +++ b/MachineLearning/utils/dataloader.hpp @@ -0,0 +1,24 @@ +#pragma once +#include "eigen_compat.hpp" +#include +#include +#include +#include + +namespace ml::utils { +inline Eigen::MatrixXd load_csv(const std::string& path, char delimiter=',') { + std::ifstream file(path); + std::string line; + std::vector> rows; + while(std::getline(file,line)){ + if(line.empty()) continue; + std::stringstream ss(line); std::string cell; std::vector row; + while(std::getline(ss,cell,delimiter)) row.push_back(std::stod(cell)); + rows.push_back(row); + } + if(rows.empty()) return Eigen::MatrixXd(); + Eigen::MatrixXd X(rows.size(), rows[0].size()); + for(size_t i=0;i + +namespace ml::utils { +inline double accuracy(const Eigen::VectorXi& y_true, const Eigen::VectorXi& y_pred) { + return (y_true.array() == y_pred.array()).cast().mean(); +} +inline double mse(const Eigen::VectorXd& y_true, const Eigen::VectorXd& y_pred) { + return (y_true - y_pred).array().square().mean(); +} +inline double rmse(const Eigen::VectorXd& y_true, const Eigen::VectorXd& y_pred) { + return std::sqrt(mse(y_true, y_pred)); +} +inline double precision(const Eigen::VectorXi& y_true, const Eigen::VectorXi& y_pred) { + int tp=0, fp=0; + for(int i=0;i(tp)/(tp+fp); +} +inline double recall(const Eigen::VectorXi& y_true, const Eigen::VectorXi& y_pred) { + int tp=0, fn=0; + for(int i=0;i(tp)/(tp+fn); +} +inline double f1_score(const Eigen::VectorXi& y_true, const Eigen::VectorXi& y_pred) { + double p = precision(y_true, y_pred), r = recall(y_true, y_pred); + return p + r == 0 ? 0.0 : 2.0*p*r/(p+r); +} +} diff --git a/MachineLearning/utils/preprocessing.hpp b/MachineLearning/utils/preprocessing.hpp new file mode 100644 index 0000000000..0c78b2bcdc --- /dev/null +++ b/MachineLearning/utils/preprocessing.hpp @@ -0,0 +1,42 @@ +#pragma once +#include "eigen_compat.hpp" +#include +#include +#include + +namespace ml::utils { +inline Eigen::MatrixXd normalize(const Eigen::MatrixXd& X){ + Eigen::MatrixXd out=X; + for(int j=0;j +inline void train_test_split(const Eigen::MatrixXd& X, const Y& y, double test_ratio, + Eigen::MatrixXd& X_train, Eigen::MatrixXd& X_test, + Y& y_train, Y& y_test, unsigned seed=42){ + std::vector idx(X.rows()); + for(int i=0;i(X.rows()*test_ratio); + int n_train=X.rows()-n_test; + X_train.resize(n_train,X.cols()); X_test.resize(n_test,X.cols()); + y_train.resize(n_train); y_test.resize(n_test); + for(int i=0;i +#include +#include +#include +#include + +namespace advance_maths::calculus { +using Vector = std::vector; + +double derivative(const std::function& f, double x, double h = 1e-5) { + return (f(x + h) - f(x - h)) / (2.0 * h); +} + +double second_derivative(const std::function& f, double x, double h = 1e-5) { + return (f(x + h) - 2.0 * f(x) + f(x - h)) / (h * h); +} + +double partial_derivative_x(const std::function& f, double x, double y, + double h = 1e-5) { + return (f(x + h, y) - f(x - h, y)) / (2.0 * h); +} + +double partial_derivative_y(const std::function& f, double x, double y, + double h = 1e-5) { + return (f(x, y + h) - f(x, y - h)) / (2.0 * h); +} + +Vector gradient_2d(const std::function& f, double x, double y) { + return {partial_derivative_x(f, x, y), partial_derivative_y(f, x, y)}; +} + +double chain_rule(double df_dg, double dg_dx) { return df_dg * dg_dx; } + +double backprop_single_weight(double prediction, double target, double input) { + double loss_grad_prediction = 2.0 * (prediction - target); // dL/dy for MSE + double prediction_grad_weight = input; // dy/dw for y = wx + return chain_rule(loss_grad_prediction, prediction_grad_weight); +} +} // namespace advance_maths::calculus + +static void test() { + using namespace advance_maths::calculus; + + auto f = [](double x) { return x * x * x; }; + assert(std::abs(derivative(f, 2.0) - 12.0) < 1e-3); + assert(std::abs(second_derivative(f, 2.0) - 12.0) < 1e-2); + + auto g = [](double x, double y) { return x * x + 3.0 * x * y + y * y; }; + assert(std::abs(partial_derivative_x(g, 1.0, 2.0) - 8.0) < 1e-3); + assert(std::abs(partial_derivative_y(g, 1.0, 2.0) - 7.0) < 1e-3); + + auto grad = gradient_2d(g, 1.0, 2.0); + assert(std::abs(grad[0] - 8.0) < 1e-3 && std::abs(grad[1] - 7.0) < 1e-3); + + double dldw = backprop_single_weight(3.0, 1.0, 2.0); + assert(std::abs(dldw - 8.0) < 1e-9); +} + +int main() { + test(); + std::cout << "Calculus module passed.\n"; + return 0; +} diff --git a/advance_maths/geometry.cpp b/advance_maths/geometry.cpp new file mode 100644 index 0000000000..593d906aa1 --- /dev/null +++ b/advance_maths/geometry.cpp @@ -0,0 +1,92 @@ +/** + * @file + * @brief Geometry-related similarity and projection utilities. + */ +#include +#include +#include +#include +#include +#include + +namespace advance_maths::geometry { +using Vector = std::vector; + +double dot(const Vector& a, const Vector& b) { + if (a.size() != b.size()) { + throw std::invalid_argument("Vectors must have the same size."); + } + double sum = 0.0; + for (size_t i = 0; i < a.size(); ++i) { + sum += a[i] * b[i]; + } + return sum; +} + +double norm(const Vector& v) { + return std::sqrt(dot(v, v)); +} + +double cosine_similarity(const Vector& a, const Vector& b) { + double denom = norm(a) * norm(b); + if (denom < 1e-12) { + throw std::invalid_argument("Norm cannot be zero."); + } + return dot(a, b) / denom; +} + +double jaccard_similarity(const std::set& a, const std::set& b) { + size_t intersection_count = 0; + for (int item : a) { + if (b.count(item) > 0) { + ++intersection_count; + } + } + const size_t union_count = a.size() + b.size() - intersection_count; + return union_count == 0 ? 1.0 : static_cast(intersection_count) / static_cast(union_count); +} + +bool are_orthogonal(const Vector& a, const Vector& b, double eps = 1e-9) { + return std::abs(dot(a, b)) < eps; +} + +Vector projection(const Vector& a, const Vector& b) { + const double denom = dot(b, b); + if (denom < 1e-12) { + throw std::invalid_argument("Cannot project onto a zero vector."); + } + const double scale = dot(a, b) / denom; + Vector proj = b; + for (double& value : proj) { + value *= scale; + } + return proj; +} +} // namespace advance_maths::geometry + +static void test() { + using namespace advance_maths::geometry; + + Vector a = {1.0, 2.0, 3.0}; + Vector b = {2.0, 4.0, 6.0}; + assert(std::abs(cosine_similarity(a, b) - 1.0) < 1e-9); + + std::set s1 = {1, 2, 3, 5}; + std::set s2 = {2, 3, 4}; + assert(std::abs(jaccard_similarity(s1, s2) - 0.4) < 1e-9); + + Vector o1 = {1.0, 0.0}; + Vector o2 = {0.0, 4.0}; + assert(are_orthogonal(o1, o2)); + + Vector p = projection(a, b); + assert(std::abs(p[0] - 1.0) < 1e-9); + assert(std::abs(p[1] - 2.0) < 1e-9); + assert(std::abs(p[2] - 3.0) < 1e-9); +} + +int main() { + test(); + std::cout << "Geometry module passed.\n"; + return 0; +} diff --git a/advance_maths/linear_algebra.cpp b/advance_maths/linear_algebra.cpp new file mode 100644 index 0000000000..8207c7fbdf --- /dev/null +++ b/advance_maths/linear_algebra.cpp @@ -0,0 +1,146 @@ +/** + * @file + * @brief Core linear algebra utilities and demonstrations. + */ +#include +#include +#include +#include +#include +#include + +namespace advance_maths::linear_algebra { +using Matrix = std::vector>; +using Vector = std::vector; + +static void validate_same_size(const Vector& a, const Vector& b) { + if (a.size() != b.size()) { + throw std::invalid_argument("Vectors must have the same size."); + } +} + +double dot_product(const Vector& a, const Vector& b) { + validate_same_size(a, b); + double sum = 0.0; + for (size_t i = 0; i < a.size(); ++i) { + sum += a[i] * b[i]; + } + return sum; +} + +double l2_norm(const Vector& a) { return std::sqrt(dot_product(a, a)); } + +double manhattan_norm(const Vector& a) { + double sum = 0.0; + for (double value : a) { + sum += std::abs(value); + } + return sum; +} + +double euclidean_distance(const Vector& a, const Vector& b) { + validate_same_size(a, b); + double sum = 0.0; + for (size_t i = 0; i < a.size(); ++i) { + sum += (a[i] - b[i]) * (a[i] - b[i]); + } + return std::sqrt(sum); +} + +double manhattan_distance(const Vector& a, const Vector& b) { + validate_same_size(a, b); + double sum = 0.0; + for (size_t i = 0; i < a.size(); ++i) { + sum += std::abs(a[i] - b[i]); + } + return sum; +} + +Matrix multiply(const Matrix& a, const Matrix& b) { + if (a.empty() || b.empty() || a[0].size() != b.size()) { + throw std::invalid_argument("Incompatible matrix dimensions."); + } + + Matrix result(a.size(), Vector(b[0].size(), 0.0)); + for (size_t i = 0; i < a.size(); ++i) { + for (size_t k = 0; k < b.size(); ++k) { + for (size_t j = 0; j < b[0].size(); ++j) { + result[i][j] += a[i][k] * b[k][j]; + } + } + } + return result; +} + +std::pair lu_decomposition_2x2(const Matrix& m) { + if (m.size() != 2 || m[0].size() != 2 || m[1].size() != 2) { + throw std::invalid_argument("This demo supports only 2x2 matrices."); + } + if (std::abs(m[0][0]) < 1e-12) { + throw std::invalid_argument("Pivot too small for this simple LU decomposition."); + } + + Matrix l = {{1.0, 0.0}, {m[1][0] / m[0][0], 1.0}}; + Matrix u = {{m[0][0], m[0][1]}, {0.0, m[1][1] - l[1][0] * m[0][1]}}; + return {l, u}; +} + +std::pair eigenvalues_2x2(const Matrix& m) { + if (m.size() != 2 || m[0].size() != 2 || m[1].size() != 2) { + throw std::invalid_argument("This demo supports only 2x2 matrices."); + } + + const double trace = m[0][0] + m[1][1]; + const double det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + const double disc = std::sqrt(trace * trace - 4.0 * det); + return {(trace + disc) / 2.0, (trace - disc) / 2.0}; +} + +Vector dominant_right_singular_vector_2x2(const Matrix& m) { + Matrix mtm = { + {m[0][0] * m[0][0] + m[1][0] * m[1][0], m[0][0] * m[0][1] + m[1][0] * m[1][1]}, + {m[0][0] * m[0][1] + m[1][0] * m[1][1], m[0][1] * m[0][1] + m[1][1] * m[1][1]}}; + + auto eig = eigenvalues_2x2(mtm); + double lambda = std::max(eig.first, eig.second); + + Vector v = {mtm[0][1], lambda - mtm[0][0]}; + double norm = l2_norm(v); + if (norm < 1e-12) { + return {1.0, 0.0}; + } + v[0] /= norm; + v[1] /= norm; + return v; +} +} // namespace advance_maths::linear_algebra + +static void test() { + using namespace advance_maths::linear_algebra; + + Vector a = {1.0, 2.0, 3.0}; + Vector b = {4.0, 1.0, -2.0}; + assert(std::abs(dot_product(a, b) - 0.0) < 1e-9); + assert(std::abs(l2_norm(a) - std::sqrt(14.0)) < 1e-9); + assert(std::abs(manhattan_norm(a) - 6.0) < 1e-9); + assert(std::abs(euclidean_distance(a, b) - std::sqrt(35.0)) < 1e-9); + assert(std::abs(manhattan_distance(a, b) - 9.0) < 1e-9); + + Matrix m = {{4.0, 3.0}, {4.0, 3.0}}; + auto [l, u] = lu_decomposition_2x2(m); + Matrix reconstructed = multiply(l, u); + assert(std::abs(reconstructed[0][0] - 4.0) < 1e-9); + assert(std::abs(reconstructed[1][0] - 4.0) < 1e-9); + + auto eig = eigenvalues_2x2(m); + assert(std::abs(eig.first - 7.0) < 1e-9 || std::abs(eig.second - 7.0) < 1e-9); + + Vector sv = dominant_right_singular_vector_2x2({{1.0, 2.0}, {3.0, 4.0}}); + assert(std::abs(l2_norm(sv) - 1.0) < 1e-9); +} + +int main() { + test(); + std::cout << "Linear algebra module passed.\n"; + return 0; +} diff --git a/advance_maths/optimization.cpp b/advance_maths/optimization.cpp new file mode 100644 index 0000000000..528ad8439c --- /dev/null +++ b/advance_maths/optimization.cpp @@ -0,0 +1,62 @@ +/** + * @file + * @brief Optimization algorithms: gradient descent and SGD. + */ +#include +#include +#include +#include +#include +#include + +namespace advance_maths::optimization { + +double objective(double x) { return (x - 3.0) * (x - 3.0); } + +double objective_grad(double x) { return 2.0 * (x - 3.0); } + +double gradient_descent(double initial_x, double learning_rate, int iterations) { + double x = initial_x; + for (int i = 0; i < iterations; ++i) { + x -= learning_rate * objective_grad(x); + } + return x; +} + +std::pair linear_sgd(const std::vector& x, const std::vector& y, + double lr, int epochs) { + double w = 0.0; + double b = 0.0; + std::mt19937 gen(42); + std::uniform_int_distribution dist(0, x.size() - 1); + + for (int epoch = 0; epoch < epochs; ++epoch) { + size_t i = dist(gen); + double pred = w * x[i] + b; + double err = pred - y[i]; + w -= lr * 2.0 * err * x[i]; + b -= lr * 2.0 * err; + } + return {w, b}; +} +} // namespace advance_maths::optimization + +static void test() { + using namespace advance_maths::optimization; + + double optimum = gradient_descent(10.0, 0.1, 100); + assert(std::abs(optimum - 3.0) < 1e-4); + assert(objective(optimum) < 1e-6); + + std::vector x = {1, 2, 3, 4}; + std::vector y = {3, 5, 7, 9}; // y = 2x + 1 + auto [w, b] = linear_sgd(x, y, 0.01, 10000); + assert(std::abs(w - 2.0) < 0.25); + assert(std::abs(b - 1.0) < 0.5); +} + +int main() { + test(); + std::cout << "Optimization module passed.\n"; + return 0; +} diff --git a/advance_maths/probability_statistics.cpp b/advance_maths/probability_statistics.cpp new file mode 100644 index 0000000000..e6cd7a8319 --- /dev/null +++ b/advance_maths/probability_statistics.cpp @@ -0,0 +1,155 @@ +/** + * @file + * @brief Probability and statistics utilities and demonstrations. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace advance_maths::probability_statistics { +using Vector = std::vector; + +double mean(const Vector& v) { + if (v.empty()) { + throw std::invalid_argument("Vector cannot be empty."); + } + return std::accumulate(v.begin(), v.end(), 0.0) / static_cast(v.size()); +} + +double variance(const Vector& v) { + const double m = mean(v); + double sum = 0.0; + for (double x : v) { + sum += (x - m) * (x - m); + } + return sum / static_cast(v.size()); +} + +double standard_deviation(const Vector& v) { return std::sqrt(variance(v)); } + +double covariance(const Vector& x, const Vector& y) { + if (x.size() != y.size() || x.empty()) { + throw std::invalid_argument("Vectors must be non-empty and same size."); + } + const double mx = mean(x); + const double my = mean(y); + double sum = 0.0; + for (size_t i = 0; i < x.size(); ++i) { + sum += (x[i] - mx) * (y[i] - my); + } + return sum / static_cast(x.size()); +} + +double pearson_correlation(const Vector& x, const Vector& y) { + const double denom = standard_deviation(x) * standard_deviation(y); + if (denom < 1e-12) { + throw std::invalid_argument("Standard deviation is zero."); + } + return covariance(x, y) / denom; +} + +double joint_probability(double p_a, double p_b_given_a) { return p_a * p_b_given_a; } + +double conditional_probability(double joint, double p_condition) { + if (p_condition <= 0.0) { + throw std::invalid_argument("Condition probability must be > 0."); + } + return joint / p_condition; +} + +double bayes(double p_b_given_a, double p_a, double p_b) { + if (p_b <= 0.0) { + throw std::invalid_argument("Marginal probability p(B) must be > 0."); + } + return (p_b_given_a * p_a) / p_b; +} + +double binomial_pmf(int n, int k, double p) { + if (k < 0 || k > n || p < 0.0 || p > 1.0) { + throw std::invalid_argument("Invalid binomial parameters."); + } + auto combination = [&](int nn, int kk) { + double c = 1.0; + for (int i = 1; i <= kk; ++i) { + c = c * (nn - (kk - i)) / i; + } + return c; + }; + return combination(n, k) * std::pow(p, k) * std::pow(1.0 - p, n - k); +} + +double normal_pdf(double x, double mu, double sigma) { + if (sigma <= 0.0) { + throw std::invalid_argument("Sigma must be > 0."); + } + const double z = (x - mu) / sigma; + return std::exp(-0.5 * z * z) / (sigma * std::sqrt(2.0 * M_PI)); +} + +std::pair confidence_interval_mean(const Vector& v, double z_score = 1.96) { + const double m = mean(v); + const double se = standard_deviation(v) / std::sqrt(static_cast(v.size())); + return {m - z_score * se, m + z_score * se}; +} + +bool z_test_reject(double sample_mean, double hypothesized_mean, double sigma, int n, + double alpha = 0.05) { + const double z = std::abs(sample_mean - hypothesized_mean) / (sigma / std::sqrt(static_cast(n))); + const double critical = (alpha == 0.05 ? 1.96 : 2.58); + return z > critical; +} + +double bootstrap_mean_estimate(const Vector& v, int iterations = 200) { + std::mt19937 gen(123); + std::uniform_int_distribution dist(0, v.size() - 1); + Vector bootstrap_means; + bootstrap_means.reserve(iterations); + + for (int it = 0; it < iterations; ++it) { + double sum = 0.0; + for (size_t i = 0; i < v.size(); ++i) { + sum += v[dist(gen)]; + } + bootstrap_means.push_back(sum / static_cast(v.size())); + } + return mean(bootstrap_means); +} +} // namespace advance_maths::probability_statistics + +static void test() { + using namespace advance_maths::probability_statistics; + std::vector x = {1, 2, 3, 4, 5}; + std::vector y = {2, 4, 6, 8, 10}; + + assert(std::abs(mean(x) - 3.0) < 1e-9); + assert(std::abs(variance(x) - 2.0) < 1e-9); + assert(std::abs(standard_deviation(x) - std::sqrt(2.0)) < 1e-9); + assert(std::abs(covariance(x, y) - 4.0) < 1e-9); + assert(std::abs(pearson_correlation(x, y) - 1.0) < 1e-9); + + double joint = joint_probability(0.5, 0.2); + assert(std::abs(joint - 0.1) < 1e-9); + assert(std::abs(conditional_probability(0.1, 0.5) - 0.2) < 1e-9); + assert(std::abs(bayes(0.9, 0.01, 0.02) - 0.45) < 1e-9); + + assert(std::abs(binomial_pmf(5, 2, 0.5) - 0.3125) < 1e-9); + assert(std::abs(normal_pdf(0.0, 0.0, 1.0) - 0.3989422804) < 1e-6); + + auto ci = confidence_interval_mean(x); + assert(ci.first < 3.0 && ci.second > 3.0); + assert(!z_test_reject(5.1, 5.0, 1.0, 100)); + + double boot = bootstrap_mean_estimate(x); + assert(std::abs(boot - 3.0) < 0.2); +} + +int main() { + test(); + std::cout << "Probability and statistics module passed.\n"; + return 0; +} diff --git a/advance_maths/regression_analysis.cpp b/advance_maths/regression_analysis.cpp new file mode 100644 index 0000000000..2a7d9b0c02 --- /dev/null +++ b/advance_maths/regression_analysis.cpp @@ -0,0 +1,91 @@ +/** + * @file + * @brief Regression analysis utilities: MLE and MSE. + */ +#include +#include +#include +#include +#include +#include + +namespace advance_maths::regression_analysis { + +double mean_squared_error(const std::vector& y_true, const std::vector& y_pred) { + if (y_true.size() != y_pred.size() || y_true.empty()) { + throw std::invalid_argument("Vectors must be same size and non-empty."); + } + double sum = 0.0; + for (size_t i = 0; i < y_true.size(); ++i) { + const double e = y_true[i] - y_pred[i]; + sum += e * e; + } + return sum / static_cast(y_true.size()); +} + +std::pair linear_regression_mle(const std::vector& x, + const std::vector& y) { + const double n = static_cast(x.size()); + double sum_x = 0.0; + double sum_y = 0.0; + double sum_xy = 0.0; + double sum_x2 = 0.0; + + for (size_t i = 0; i < x.size(); ++i) { + sum_x += x[i]; + sum_y += y[i]; + sum_xy += x[i] * y[i]; + sum_x2 += x[i] * x[i]; + } + + const double denom = n * sum_x2 - sum_x * sum_x; + const double slope = (n * sum_xy - sum_x * sum_y) / denom; + const double intercept = (sum_y - slope * sum_x) / n; + return {slope, intercept}; +} + +double estimate_gaussian_mean_mle(const std::vector& samples) { + double sum = 0.0; + for (double value : samples) { + sum += value; + } + return sum / static_cast(samples.size()); +} + +double estimate_gaussian_variance_mle(const std::vector& samples, double mu_hat) { + double sum = 0.0; + for (double value : samples) { + sum += (value - mu_hat) * (value - mu_hat); + } + return sum / static_cast(samples.size()); +} +} // namespace advance_maths::regression_analysis + +static void test() { + using namespace advance_maths::regression_analysis; + + std::vector x = {1, 2, 3, 4, 5}; + std::vector y = {3, 5, 7, 9, 11}; // y = 2x + 1 + + auto [slope, intercept] = linear_regression_mle(x, y); + assert(std::abs(slope - 2.0) < 1e-9); + assert(std::abs(intercept - 1.0) < 1e-9); + + std::vector y_pred; + for (double xv : x) { + y_pred.push_back(slope * xv + intercept); + } + assert(std::abs(mean_squared_error(y, y_pred)) < 1e-9); + + std::vector gaussian_samples = {2.0, 3.0, 4.0, 5.0}; + double mu_hat = estimate_gaussian_mean_mle(gaussian_samples); + double var_hat = estimate_gaussian_variance_mle(gaussian_samples, mu_hat); + assert(std::abs(mu_hat - 3.5) < 1e-9); + assert(std::abs(var_hat - 1.25) < 1e-9); +} + +int main() { + test(); + std::cout << "Regression analysis module passed.\n"; + return 0; +} diff --git a/advance_maths/vector_calculus.cpp b/advance_maths/vector_calculus.cpp new file mode 100644 index 0000000000..6125f24317 --- /dev/null +++ b/advance_maths/vector_calculus.cpp @@ -0,0 +1,91 @@ +/** + * @file + * @brief Vector calculus utilities: Jacobian and Hessian. + */ +#include +#include +#include +#include +#include + +namespace advance_maths::vector_calculus { +using Vector = std::vector; +using Matrix = std::vector>; + +Vector gradient(const std::function& f, const Vector& x, double h = 1e-5) { + Vector grad(x.size(), 0.0); + for (size_t i = 0; i < x.size(); ++i) { + Vector x_plus = x; + Vector x_minus = x; + x_plus[i] += h; + x_minus[i] -= h; + grad[i] = (f(x_plus) - f(x_minus)) / (2.0 * h); + } + return grad; +} + +Matrix jacobian(const std::vector>& funcs, const Vector& x, + double h = 1e-5) { + Matrix j(funcs.size(), Vector(x.size(), 0.0)); + for (size_t row = 0; row < funcs.size(); ++row) { + for (size_t col = 0; col < x.size(); ++col) { + Vector x_plus = x; + Vector x_minus = x; + x_plus[col] += h; + x_minus[col] -= h; + j[row][col] = (funcs[row](x_plus) - funcs[row](x_minus)) / (2.0 * h); + } + } + return j; +} + +Matrix hessian(const std::function& f, const Vector& x, double h = 1e-4) { + size_t n = x.size(); + Matrix hess(n, Vector(n, 0.0)); + for (size_t i = 0; i < n; ++i) { + for (size_t j = 0; j < n; ++j) { + Vector x_pp = x, x_pm = x, x_mp = x, x_mm = x; + x_pp[i] += h; + x_pp[j] += h; + x_pm[i] += h; + x_pm[j] -= h; + x_mp[i] -= h; + x_mp[j] += h; + x_mm[i] -= h; + x_mm[j] -= h; + hess[i][j] = (f(x_pp) - f(x_pm) - f(x_mp) + f(x_mm)) / (4.0 * h * h); + } + } + return hess; +} +} // namespace advance_maths::vector_calculus + +static void test() { + using namespace advance_maths::vector_calculus; + + auto scalar = [](const Vector& v) { return v[0] * v[0] + 3.0 * v[0] * v[1] + v[1] * v[1]; }; + Vector x = {1.0, 2.0}; + + auto grad = gradient(scalar, x); + assert(std::abs(grad[0] - 8.0) < 1e-3); + assert(std::abs(grad[1] - 7.0) < 1e-3); + + std::vector> funcs = { + [](const Vector& v) { return v[0] + v[1]; }, + [](const Vector& v) { return v[0] * v[1]; }}; + auto j = jacobian(funcs, x); + assert(std::abs(j[0][0] - 1.0) < 1e-4 && std::abs(j[0][1] - 1.0) < 1e-4); + assert(std::abs(j[1][0] - 2.0) < 1e-3 && std::abs(j[1][1] - 1.0) < 1e-3); + + auto h = hessian(scalar, x); + assert(std::abs(h[0][0] - 2.0) < 1e-2); + assert(std::abs(h[0][1] - 3.0) < 1e-2); + assert(std::abs(h[1][0] - 3.0) < 1e-2); + assert(std::abs(h[1][1] - 2.0) < 1e-2); +} + +int main() { + test(); + std::cout << "Vector calculus module passed.\n"; + return 0; +}