From e8c0dbb42c0a48b6557a387a202d1cf159b03b3b Mon Sep 17 00:00:00 2001 From: khanhhado1208 Date: Thu, 28 May 2026 23:59:03 +0300 Subject: [PATCH 1/5] Implement blocked matrix multiplication and validation logic --- CMakeLists.txt | 2 +- main.cpp | 123 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 111 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b04fd0..abcfc6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ if(APPLE) endif() -add_executable(matmul main_ans.cpp) +add_executable(matmul main.cpp) if(OpenMP_CXX_FOUND) diff --git a/main.cpp b/main.cpp index 65bf108..2f2f17b 100644 --- a/main.cpp +++ b/main.cpp @@ -5,22 +5,86 @@ #include void naive_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p) { - //TODO : Implement naive matrix multiplication + for (uint32_t i = 0; i < m; i++) { + for (uint32_t j = 0; j < p; j++) { + float sum = 0.0f; + for (uint32_t k = 0; k < n; k++) { + sum += A[i * n + k] * B[k * p + j]; + } + C[i * p + j] = sum; + } + } } void blocked_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p, uint32_t block_size) { - // TODO: Implement blocked matrix multiplication - // A is m x n, B is n x p, C is m x p - // Use block_size to divide matrices into submatrices + for (uint32_t i = 0; i < m * p; i++) { + C[i] = 0.0f; + } + + for (uint32_t ii = 0; ii < m; ii += block_size) { + for (uint32_t jj = 0; jj < p; jj += block_size) { + for (uint32_t kk = 0; kk < n; kk += block_size) { + uint32_t i_end = std::min(ii + block_size, m); + uint32_t j_end = std::min(jj + block_size, p); + uint32_t k_end = std::min(kk + block_size, n); + + for (uint32_t i = ii; i < i_end; i++) { + for (uint32_t j = jj; j < j_end; j++) { + float sum = C[i * p + j]; + for (uint32_t k = kk; k < k_end; k++) { + sum += A[i * n + k] * B[k * p + j]; + } + C[i * p + j] = sum; + } + } + } + } + } } void parallel_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p) { - // TODO: Implement parallel matrix multiplication using OpenMP - // A is m x n, B is n x p, C is m x p + naive_matmul(C, A, B, m, n, p); } bool validate_result(const std::string &result_file, const std::string &reference_file) { - //TODO : Implement result validation + std::ifstream result(result_file); + std::ifstream ref(reference_file); + + if (!result.is_open() || !ref.is_open()) { + return false; + } + + uint32_t rm, rp, fm, fp; + result >> rm >> rp; + ref >> fm >> fp; + + if (rm != fm || rp != fp) { + return false; + } + + for (uint32_t i = 0; i < rm * rp; i++) { + float a, b; + result >> a; + ref >> b; + if (std::fabs(a - b) > 1e-3f) { + return false; + } + } + + return true; +} + +void write_result(const std::string &result_file, float *C, uint32_t m, uint32_t p) { + std::ofstream out(result_file); + out << m << " " << p << "\n"; + for (uint32_t i = 0; i < m; i++) { + for (uint32_t j = 0; j < p; j++) { + out << C[i * p + j]; + if (j + 1 < p) out << " "; + } + out << "\n"; + } + out.close(); } int main(int argc, char *argv[]) { @@ -42,10 +106,43 @@ int main(int argc, char *argv[]) { std::string result_file = folder + "result.raw"; std::string reference_file = folder + "output.raw"; - // TODO Read input0.raw (matrix A) +std::ifstream input0(input0_file); +if (!input0.is_open()) { + std::cerr << "Failed to open " << input0_file << std::endl; + return 1; +} + +uint32_t m, n; +input0 >> m >> n; +float *A = new float[m * n]; +for (uint32_t i = 0; i < m * n; i++) { + input0 >> A[i]; +} +input0.close(); - // TODO Read input1.raw (matrix B) + +std::ifstream input1(input1_file); +if (!input1.is_open()) { + std::cerr << "Failed to open " << input1_file << std::endl; + delete[] A; + return 1; +} + +uint32_t n2, p; +input1 >> n2 >> p; + +if (n != n2) { + std::cerr << "Matrix dimension mismatch: input0 columns != input1 rows" << std::endl; + delete[] A; + return 1; +} + +float *B = new float[n * p]; +for (uint32_t i = 0; i < n * p; i++) { + input1 >> B[i]; +} +input1.close(); // Allocate memory for result matrices @@ -58,7 +155,7 @@ int main(int argc, char *argv[]) { naive_matmul(C_naive, A, B, m, n, p); double naive_time = omp_get_wtime() - start_time; - // TODO Write naive result to file + write_result(result_file, C_naive, m, p); // Validate naive result @@ -69,10 +166,10 @@ int main(int argc, char *argv[]) { // Measure performance of blocked_matmul (use block_size = 32 as default) start_time = omp_get_wtime(); - blocked_matmul(C_blocked, A, B, m, n, p, 32); + blocked_matmul(C_blocked, A, B, m, n, p, 64); double blocked_time = omp_get_wtime() - start_time; - // TODO Write blocked result to file + write_result(result_file, C_blocked, m, p); // Validate blocked result @@ -86,7 +183,7 @@ int main(int argc, char *argv[]) { parallel_matmul(C_parallel, A, B, m, n, p); double parallel_time = omp_get_wtime() - start_time; - // TODO Write parallel result to file + write_result(result_file, C_parallel, m, p); // Validate parallel result From d384fde4a245c01798b788d62fedc04e80ef6a3f Mon Sep 17 00:00:00 2001 From: Abhishek Date: Sat, 30 May 2026 20:23:24 +0300 Subject: [PATCH 2/5] Implement openMP multiplication and add gitignore --- .gitignore | 29 +++++++++++++ main.cpp | 119 +++++++++++++++++++++++++---------------------------- 2 files changed, 85 insertions(+), 63 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..335b146 --- /dev/null +++ b/.gitignore @@ -0,0 +1,29 @@ +# Compiled binaries +matmul +# Object files +*.o +*.a +*.so +logs/ +objs/ + +# Build artifacts +*.d + +# CMake +CMakeFiles/ +CMakeCache.txt +cmake_install.cmake +Makefile + +# Editor/IDE +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Output files +data/*/result.raw \ No newline at end of file diff --git a/main.cpp b/main.cpp index 2f2f17b..93d6224 100644 --- a/main.cpp +++ b/main.cpp @@ -1,8 +1,9 @@ -#include +#include +#include #include -#include +#include #include -#include +#include void naive_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p) { for (uint32_t i = 0; i < m; i++) { @@ -43,7 +44,18 @@ void blocked_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32 } void parallel_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p) { - naive_matmul(C, A, B, m, n, p); + // Implement parallel matrix multiplication using OpenMP + // A is m x n, B is n x p, C is m x p + + #pragma omp parallel for schedule(static) + for (uint32_t i = 0; i < m; i++) { + for (uint32_t j = 0; j < p; j++) { + C[i * p + j] = 0; + for (uint32_t k = 0; k < n; k++) { + C[i * p + j] += A[i * n + k] * B[k * p + j]; + } + } + } } bool validate_result(const std::string &result_file, const std::string &reference_file) { @@ -54,19 +66,19 @@ bool validate_result(const std::string &result_file, const std::string &referenc return false; } - uint32_t rm, rp, fm, fp; - result >> rm >> rp; - ref >> fm >> fp; + uint32_t result_m, result_p, ref_m, ref_p; + result >> result_m >> result_p; + ref >> ref_m >> ref_p; - if (rm != fm || rp != fp) { + if (result_m != ref_m || result_p != ref_p) { return false; } - for (uint32_t i = 0; i < rm * rp; i++) { - float a, b; - result >> a; - ref >> b; - if (std::fabs(a - b) > 1e-3f) { + for (uint32_t i = 0; i < result_m * result_p; i++) { + float A_val, B_val; + result >> A_val; + ref >> B_val; + if (std::fabs(A_val - B_val) > 1e-3f) { return false; } } @@ -74,17 +86,29 @@ bool validate_result(const std::string &result_file, const std::string &referenc return true; } -void write_result(const std::string &result_file, float *C, uint32_t m, uint32_t p) { - std::ofstream out(result_file); - out << m << " " << p << "\n"; - for (uint32_t i = 0; i < m; i++) { - for (uint32_t j = 0; j < p; j++) { - out << C[i * p + j]; - if (j + 1 < p) out << " "; +float *read_input(const std::string file_path, uint32_t &row, uint32_t &col) { + std::ifstream f(file_path); + f >> row >> col; + float *matrix = new float[row * col](); + for (uint32_t i = 0; i < row * col; i++) { + f >> matrix[i]; + } + f.close(); + + return matrix; +} + +void write_result(const std::string file_path, float *matrix, uint32_t &row, uint32_t &col) { + std::ofstream of(file_path); + of << row << " " << col << "\n"; + for (uint32_t i = 0; i < row * col; i++) { + of << matrix[i]; + if ((i + 1) % col == 0) { + of << "\n"; + } else { + of << " "; } - out << "\n"; } - out.close(); } int main(int argc, char *argv[]) { @@ -106,44 +130,13 @@ int main(int argc, char *argv[]) { std::string result_file = folder + "result.raw"; std::string reference_file = folder + "output.raw"; -std::ifstream input0(input0_file); -if (!input0.is_open()) { - std::cerr << "Failed to open " << input0_file << std::endl; - return 1; -} - -uint32_t m, n; -input0 >> m >> n; - -float *A = new float[m * n]; -for (uint32_t i = 0; i < m * n; i++) { - input0 >> A[i]; -} -input0.close(); - - -std::ifstream input1(input1_file); -if (!input1.is_open()) { - std::cerr << "Failed to open " << input1_file << std::endl; - delete[] A; - return 1; -} - -uint32_t n2, p; -input1 >> n2 >> p; - -if (n != n2) { - std::cerr << "Matrix dimension mismatch: input0 columns != input1 rows" << std::endl; - delete[] A; - return 1; -} - -float *B = new float[n * p]; -for (uint32_t i = 0; i < n * p; i++) { - input1 >> B[i]; -} -input1.close(); + // Read input0.raw (matrix A) + uint32_t m, n; + float *A = read_input(input0_file, m, n); + // Read input1.raw (matrix B) + uint32_t p; + float *B = read_input(input1_file, n, p); // Allocate memory for result matrices float *C_naive = new float[m * p]; @@ -155,9 +148,9 @@ input1.close(); naive_matmul(C_naive, A, B, m, n, p); double naive_time = omp_get_wtime() - start_time; + // Write naive result to file write_result(result_file, C_naive, m, p); - // Validate naive result bool naive_correct = validate_result(result_file, reference_file); if (!naive_correct) { @@ -166,12 +159,12 @@ input1.close(); // Measure performance of blocked_matmul (use block_size = 32 as default) start_time = omp_get_wtime(); - blocked_matmul(C_blocked, A, B, m, n, p, 64); + blocked_matmul(C_blocked, A, B, m, n, p, 8); double blocked_time = omp_get_wtime() - start_time; + // Write blocked result to file write_result(result_file, C_blocked, m, p); - // Validate blocked result bool blocked_correct = validate_result(result_file, reference_file); if (!blocked_correct) { @@ -183,9 +176,9 @@ input1.close(); parallel_matmul(C_parallel, A, B, m, n, p); double parallel_time = omp_get_wtime() - start_time; + // Write parallel result to file write_result(result_file, C_parallel, m, p); - // Validate parallel result bool parallel_correct = validate_result(result_file, reference_file); if (!parallel_correct) { From e25a31b76678dff8b55401d187715ff1d4f6e2c6 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Sun, 31 May 2026 20:04:25 +0300 Subject: [PATCH 3/5] Add python script to run and generate test results --- .python-version | 1 + pyproject.toml | 10 +++ test_run.py | 21 +++++ uv.lock | 204 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 236 insertions(+) create mode 100644 .python-version create mode 100644 pyproject.toml create mode 100644 test_run.py create mode 100644 uv.lock diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..2c07333 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9ae3456 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "assignment-4" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "pandas>=3.0.3", + "tabulate>=0.10.0", +] diff --git a/test_run.py b/test_run.py new file mode 100644 index 0000000..48344e8 --- /dev/null +++ b/test_run.py @@ -0,0 +1,21 @@ +import subprocess +import re +import os +import pandas as pd + +os.environ["OMP_NUM_THREADS"] = "8" + +results = [] + +for i in range(10): + out = subprocess.run(["./matmul", str(i)], capture_output=True, text=True).stdout + print(out, end="\n") + header = re.search(r"^Case (\d+) \((\d+x\d+x\d+)\)", out, re.MULTILINE) + timings = {"Test Case": int(header.group(1)), "Dimensions (m x n x p)": header.group(2)} + for m in re.finditer(r"^(\w+) (time|speedup): ([\d.e+-]+)", out, re.MULTILINE): + label, kind, value = m.group(1), m.group(2), float(m.group(3)) + key = f"{label} Time (s)" if kind == "time" else f"{label} Speedup" + timings[key] = value if kind == "time" else f"{value}x" + results.append(timings) + +df = pd.DataFrame(results).to_markdown("timing_results.md", index=False) \ No newline at end of file diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..6e0aa46 --- /dev/null +++ b/uv.lock @@ -0,0 +1,204 @@ +version = 1 +revision = 3 +requires-python = ">=3.11" +resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'win32'", + "python_full_version >= '3.14' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.14' and sys_platform == 'win32'", + "python_full_version < '3.14' and sys_platform == 'emscripten'", + "python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", +] + +[[package]] +name = "assignment-4" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "pandas" }, + { name = "tabulate" }, +] + +[package.metadata] +requires-dist = [ + { name = "pandas", specifier = ">=3.0.3" }, + { name = "tabulate", specifier = ">=0.10.0" }, +] + +[[package]] +name = "numpy" +version = "2.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/ad/fed0499ce6a338d2a03ebae59cd15093910c8875328855781952abf6c2fe/numpy-2.4.6.tar.gz", hash = "sha256:f3a3570c4a2a16746ac2c31a7c7c7b0c186b95ce902e33db6f28094ed7387dda", size = 20735807, upload-time = "2026-05-18T23:37:14.07Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/49/ec46835a70be8fa6446c495126ac84fdb28cb2558e1620ffb87a10c8b64c/numpy-2.4.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0280e0356c0829a18d9de1cb7eee50ec22ca639878d7240307ca0943d73cd2c4", size = 16969194, upload-time = "2026-05-18T23:33:13.503Z" }, + { url = "https://files.pythonhosted.org/packages/0e/0d/f5957185c0ee2f3e12f78715aa9e3b353fd83633316c8532b38faa37e3f6/numpy-2.4.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:110f8b71aacb688ec69062bb7f6938a0f8acb01b7c1c4beb453c65b6d234584d", size = 14964111, upload-time = "2026-05-18T23:33:17.795Z" }, + { url = "https://files.pythonhosted.org/packages/ad/40/40a40ee0ddf7ceb782c49af278894b686e586d65d8c1889c8b5da01a3d7d/numpy-2.4.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4cfe66903cc32a9921a6733d96b19bb6abf310397581bbad89c228f5abaf0ee8", size = 5469159, upload-time = "2026-05-18T23:33:20.654Z" }, + { url = "https://files.pythonhosted.org/packages/63/13/f9a8046535cb21deae82f8d03de9617e08882d274fad2539630761888228/numpy-2.4.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8155154c7c691289fe18f510b5d4657c68c67989f293f0535a91360392ff6538", size = 6798936, upload-time = "2026-05-18T23:33:22.987Z" }, + { url = "https://files.pythonhosted.org/packages/33/a8/6fa8c1a345a8c85dbb21932c447bee07c30a2c2a3f31e369c0a84b300147/numpy-2.4.6-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ab0a9c4ffb1a6d95ef519fe4247dba8eb6b18ad93999f76b7f657039acabd47", size = 15966692, upload-time = "2026-05-18T23:33:26.62Z" }, + { url = "https://files.pythonhosted.org/packages/02/03/74fe2a4cb3817d94d86402f2506554130a2f01414e299b5a843e5a8a957f/numpy-2.4.6-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:89cd468399cfd2504718f0ba50e410dca55a170b61a02ad92bb18c8a65186e93", size = 16918164, upload-time = "2026-05-18T23:33:29.955Z" }, + { url = "https://files.pythonhosted.org/packages/c5/80/3615be3313f7e7696609bc194b9f0101da809df79e859bdb84e0cd043f46/numpy-2.4.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c2d37ab77531417474168eb79d6d80b14f821a966818505d03013d0833edb7a8", size = 17322877, upload-time = "2026-05-18T23:33:34.724Z" }, + { url = "https://files.pythonhosted.org/packages/ca/ac/a691e0fe2675e370d0e08ff905adc49a1c8830e8cae03efe4477e92cd55d/numpy-2.4.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f407cb6b8e9d6d8c626bc73c945db1706035af8fd632295547bf1c9e46d092d6", size = 18651487, upload-time = "2026-05-18T23:33:38.217Z" }, + { url = "https://files.pythonhosted.org/packages/15/a7/9bc1cd626d7bf6869bfedf27b91b6ab5dd607758bf8e959d6fa80c6a59cb/numpy-2.4.6-cp311-cp311-win32.whl", hash = "sha256:ddea102b48f9e339f3948bf22040944184627a30fdf7f858667673b9c5f033c8", size = 6233945, upload-time = "2026-05-18T23:33:41.331Z" }, + { url = "https://files.pythonhosted.org/packages/c5/31/7fc6239c12bce7e931463251cca4426c465e1876ba3cc785402ef4dd8f4e/numpy-2.4.6-cp311-cp311-win_amd64.whl", hash = "sha256:1e254a00cdf42b1e4d5b3d68d33af63268d41340d8885df2ab6470f2e1500147", size = 12608406, upload-time = "2026-05-18T23:33:44.131Z" }, + { url = "https://files.pythonhosted.org/packages/27/83/140f85a466595a16382996a1bf06b2b54bcd597488921b0c9daaeeda72af/numpy-2.4.6-cp311-cp311-win_arm64.whl", hash = "sha256:ed9749eef4cbd126da3dc1d6bcb3a57f5eb7ac6a6484146bdbf743f552dfc577", size = 10479528, upload-time = "2026-05-18T23:33:50.725Z" }, + { url = "https://files.pythonhosted.org/packages/95/2a/3d7b5ac8aac24feaf9ad7ed58f45b0bbc06d37e4338ae84c9f2298b570f9/numpy-2.4.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:001fbb8e08d942dd57599e781f2472269ee7f2755fae407b4f67b2f0b17da3f1", size = 16689119, upload-time = "2026-05-18T23:33:54.065Z" }, + { url = "https://files.pythonhosted.org/packages/ea/12/92c4c131527599e8288d6918e888d88726f84d805d784b771f32408aeaef/numpy-2.4.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ebfb099f8dcf083deef3ac1ca4c1503f387cf76296fcb3816b66f5ecb5f54fdb", size = 14699246, upload-time = "2026-05-18T23:33:57.621Z" }, + { url = "https://files.pythonhosted.org/packages/ad/fe/c0a6b7b2ca128a8fb228575147073b660656734b8ebe4d76c8fd748dcc79/numpy-2.4.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:3213d622a0283a39a93d188f3cf72b26862df52fbb4ca3697f51705016523d41", size = 5204410, upload-time = "2026-05-18T23:34:00.302Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d4/9770d14ba719432bb90a421bfd443872ed0f70f7264b64bec12ea363d5fd/numpy-2.4.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:357cc07a6d7b0b182ff02249616a03742827ebb1277546b5c7cd7f7620a45698", size = 6551240, upload-time = "2026-05-18T23:34:02.852Z" }, + { url = "https://files.pythonhosted.org/packages/c9/c6/50a46a6205feba2343f1d6d17438107c5dc491ed1c736e6ea68689fd906b/numpy-2.4.6-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f9fb9157b4ce2971008323afe46053787b526ef624fea915b261468a8421a0f", size = 15671012, upload-time = "2026-05-18T23:34:05.485Z" }, + { url = "https://files.pythonhosted.org/packages/99/60/14115e6364fa676c5397c2ad3004e527e9aa487abf5d0706ec81bbd08529/numpy-2.4.6-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90f9849678c75fe7afa2d348ac842c168b0a4d3d61919687216dfc547976d853", size = 16645538, upload-time = "2026-05-18T23:34:09.265Z" }, + { url = "https://files.pythonhosted.org/packages/ae/c5/693cbe59e57db94d2231fa519ca3978dc9e19da5a8f088588f5c6e947ff2/numpy-2.4.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c1a2af6c6ef86344a6b0db6b97834208bf598db514f2b155042439b62605601a", size = 17020706, upload-time = "2026-05-18T23:34:13.053Z" }, + { url = "https://files.pythonhosted.org/packages/ef/fc/85b7c4eff9b4966ade25c2273cf7e7012e92366c032058653934b37de044/numpy-2.4.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e5805d5a22fd19c8ccff10a9561f9df94436b0545619ea579db2d3c35294bce2", size = 18368541, upload-time = "2026-05-18T23:34:17.024Z" }, + { url = "https://files.pythonhosted.org/packages/f6/81/e1b27545deedce7f4a0b348618c6b62d74e36a4dc9ccd42f3eb2f85eee32/numpy-2.4.6-cp312-cp312-win32.whl", hash = "sha256:e3eeb0aabd6bd5ce64faae67e9935203a6991b4bc2a485a767fbafb2c5125f45", size = 5962825, upload-time = "2026-05-18T23:34:20.3Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ca/feab00bd44aa5fe1ad2c18f08b4d3bb92e26484b0b1d1443897809ed528c/numpy-2.4.6-cp312-cp312-win_amd64.whl", hash = "sha256:d8e8286dd7cea7895157318d1b91cdacac64c479f3cbc8dce548331728484751", size = 12321687, upload-time = "2026-05-18T23:34:23.095Z" }, + { url = "https://files.pythonhosted.org/packages/63/cf/5a6d34850a39d1093558564f77ee8e8e0bee5061151b8f05a55711001ec7/numpy-2.4.6-cp312-cp312-win_arm64.whl", hash = "sha256:4081eb135ac24158bd51cdfbef16f1c64df7063b1143f24731387137c092bec8", size = 10221482, upload-time = "2026-05-18T23:34:25.876Z" }, + { url = "https://files.pythonhosted.org/packages/fb/82/bdab26d7438c6791ca31b7c024ca37c1eab8b726ba236129005cd4a06e45/numpy-2.4.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:511dbaf848decaaaf4b4ca48032619fb3138710c4bf7da7617765edad1ef96b0", size = 16684648, upload-time = "2026-05-18T23:34:29.41Z" }, + { url = "https://files.pythonhosted.org/packages/1b/30/a80189bcc7f5e4258b3fbc3968d909d1756f54d023299ecc39ad6fdb9ef8/numpy-2.4.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bf162abab1c1a736333192707cef898e735a5ca00f38f27eeedf44b39d9e85eb", size = 14693902, upload-time = "2026-05-18T23:34:33.013Z" }, + { url = "https://files.pythonhosted.org/packages/97/12/70b5d0d7c15e1ebb8a6a84a8caa1d19e181d84fb58bb6d70aca29099dec1/numpy-2.4.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:043191bfa8eab18c776647b62723ac9dddece59743b13f49b2016094129c2b3f", size = 5198992, upload-time = "2026-05-18T23:34:36.132Z" }, + { url = "https://files.pythonhosted.org/packages/ba/8c/ebd2a8f8a83541f8d38cc5667e8c2b69cecfd30da6e45693e8158857d44b/numpy-2.4.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:6180d8b35af935aed8ece3a85e0a43f87393ae0ac87c8d2c8bd2c993f7270ef3", size = 6546944, upload-time = "2026-05-18T23:34:38.484Z" }, + { url = "https://files.pythonhosted.org/packages/bb/c5/7b863a97a91671a0338f4253bd3b5a3d3852f0692dae91711c9f4a10e787/numpy-2.4.6-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72fbe16c6fac95aedf5937fa873445cec2110be35d8a4e9433d7501fd98dae6b", size = 15669392, upload-time = "2026-05-18T23:34:41.257Z" }, + { url = "https://files.pythonhosted.org/packages/a5/9d/3584b9984ca4c047aea75214ce1a4c4c73d849bd71b604264b7f5653f8a8/numpy-2.4.6-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7830bab239b79cda9c08c2da014761cafb48da6150e1da17ac06283f43b6089", size = 16633220, upload-time = "2026-05-18T23:34:45.075Z" }, + { url = "https://files.pythonhosted.org/packages/05/ae/7c67fba23bd98caec7c99261f3a16072ade14813486b0282cb29846de832/numpy-2.4.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ef4aea96ce4d3b074422cb4f2f64e216bf9e213004bb58ecfdf50ea02ea8eb9a", size = 17020800, upload-time = "2026-05-18T23:34:49.065Z" }, + { url = "https://files.pythonhosted.org/packages/d9/5d/3b6725cb31d983c5e66916f5d36f6d7e5521129e4c4404d64f918292a5b6/numpy-2.4.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dfa20cc6ca228e6b155b11da03825975ce66aea520985dbbddf0f2a5a495c605", size = 18357600, upload-time = "2026-05-18T23:34:52.709Z" }, + { url = "https://files.pythonhosted.org/packages/f7/da/2ccc6c2fe8898dee01d90c75c5f5f914a23daf99e3e0f59516a08760c8b5/numpy-2.4.6-cp313-cp313-win32.whl", hash = "sha256:56b39e5e0622a09a25bf5baf62f4bcf0cb8a41ae6e2819cf49bbc5a74c083f91", size = 5961134, upload-time = "2026-05-18T23:34:55.618Z" }, + { url = "https://files.pythonhosted.org/packages/b5/cd/9cc4dc876fb065d5c220aae4d5e14826b2715331bb7618ce1fb07a679d99/numpy-2.4.6-cp313-cp313-win_amd64.whl", hash = "sha256:c4fc99836233ea196540b17ab0983aff60ed07941751930f5f4d05bc3b3b7359", size = 12318598, upload-time = "2026-05-18T23:34:58.928Z" }, + { url = "https://files.pythonhosted.org/packages/39/1e/c0bcba1f8694116485fe28fd1be698c278fcda4141c5b0e53a2aed8b12a8/numpy-2.4.6-cp313-cp313-win_arm64.whl", hash = "sha256:a7c711e21628b52034bb5ab8d1bce291f752fcc5e92accc615778acee1ff4778", size = 10222272, upload-time = "2026-05-18T23:35:02.167Z" }, + { url = "https://files.pythonhosted.org/packages/63/6d/cc5619247c8f4204e507f5883528372e4ac4bb189e579fb859a12e480b1f/numpy-2.4.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:112b06a867b235ef466ed3508ddf0238050df9c727cafb5301ac385b899189a1", size = 14821197, upload-time = "2026-05-18T23:35:05.468Z" }, + { url = "https://files.pythonhosted.org/packages/00/58/f1c39161c87d9e9bed660f1ed4bafc0e403d5ec9650b6dd77aead07d489b/numpy-2.4.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:eaf7fa2de5c0be8ae6ff8e9bea2ccd725e980541244521d8d4b5f3354a27babe", size = 5326287, upload-time = "2026-05-18T23:35:08.693Z" }, + { url = "https://files.pythonhosted.org/packages/af/57/3917ab0fd97f271a8694513581b8a36c655f111c446852c302f04ccdb6fc/numpy-2.4.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:7265a2f3d436e54ef9f2b52b5c937e6be778781bd97a590319d7348f1c1ca997", size = 6646763, upload-time = "2026-05-18T23:35:11.459Z" }, + { url = "https://files.pythonhosted.org/packages/eb/0f/037e64c494b67581ae18193d770adef354c41f3f2c8ebf865602d949bf8f/numpy-2.4.6-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f74a575920ab21fe304421a3fc28793d82e299cae9eccb37084e9fc7f3617c20", size = 15728070, upload-time = "2026-05-18T23:35:14.79Z" }, + { url = "https://files.pythonhosted.org/packages/21/a6/5d2bae9c9542eb4df16dc9c46dc79c186e9bad53805dfa5399a6023c6db0/numpy-2.4.6-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ede83e07a75dd06bc501566c1eca2afc0d61677c1472ac9ad93fdee6e638a48d", size = 16681752, upload-time = "2026-05-18T23:35:18.836Z" }, + { url = "https://files.pythonhosted.org/packages/92/14/23d1dfb410ae362cd59ce53e936b1513d545eb40db3949ced632e19a459e/numpy-2.4.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:68bb27509ac1b9a3443094260f6326150663b06abe40b73a2f81160623da5b67", size = 17086024, upload-time = "2026-05-18T23:35:22.52Z" }, + { url = "https://files.pythonhosted.org/packages/4b/6e/23595a2c642cdf3bc567877064bdd7f91c8b0038a4453cf2daf7248eafe9/numpy-2.4.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a0df0043bdb289bde1f62da130d20df23d58b45429f752bc7a8fc5325a225ecd", size = 18403398, upload-time = "2026-05-18T23:35:26.398Z" }, + { url = "https://files.pythonhosted.org/packages/8a/90/0ac3bc947217e66dec77e7cbc6a1979d1af70b6461b82f620d3bccd5e4c8/numpy-2.4.6-cp313-cp313t-win32.whl", hash = "sha256:29a287e0cf63ff528da061de6b9f64a4618da591ca1046aafc54062e40ca7eab", size = 6084971, upload-time = "2026-05-18T23:35:29.387Z" }, + { url = "https://files.pythonhosted.org/packages/77/71/5673e351671a1d2bd6063b91b44f70c0affea7d1516fa7a6572941ba4aa1/numpy-2.4.6-cp313-cp313t-win_amd64.whl", hash = "sha256:25c692919ac5a01f170a3bfcd62d745b24fd095c353d50812637d6fcab442e75", size = 12458532, upload-time = "2026-05-18T23:35:32.175Z" }, + { url = "https://files.pythonhosted.org/packages/3f/88/19d3503c5046e688f049274b27a3ef3d771152fa80d3ba3d01a3dff61abe/numpy-2.4.6-cp313-cp313t-win_arm64.whl", hash = "sha256:1e978ec1e8bd0e0e4de6bb75de9d30cbb74db6b6a2bb727618613703ca0167dd", size = 10291881, upload-time = "2026-05-18T23:35:35.465Z" }, + { url = "https://files.pythonhosted.org/packages/f8/91/3ab2044d05fd16d343c5ac2e69b127f1b2854040dd20b193257c78028bd3/numpy-2.4.6-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06ca2f61ec4385a07a6977c55ba998a4466c123642b4a32694d3128fce18c079", size = 16683458, upload-time = "2026-05-18T23:35:38.353Z" }, + { url = "https://files.pythonhosted.org/packages/8e/62/764ce66fa4147ae6d73071a3abf804ffe606f174618697c571acdf26a7c9/numpy-2.4.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:38efbc8de75c7a0fc1ac190162d892787f3f47b57cc291231aafee36b80982b7", size = 14704559, upload-time = "2026-05-18T23:35:42.14Z" }, + { url = "https://files.pythonhosted.org/packages/60/61/23f27c172f022e04025b7dc2367f4d63c1a398120607ec896228649a6f48/numpy-2.4.6-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:d581b735e177fdcdce6fed8e7e8880a3fb6ee4e3653a3ac6af01c6f4c03effc5", size = 5209716, upload-time = "2026-05-18T23:35:45.377Z" }, + { url = "https://files.pythonhosted.org/packages/03/71/21cf70dc6ea3e3acb95fc53a265b2fc248b981f0194ceb5b475271b8809d/numpy-2.4.6-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:0a041d3d761dc3c35cc56ce0351506a02bcbc25f7b169f652435141a17db9096", size = 6543947, upload-time = "2026-05-18T23:35:47.926Z" }, + { url = "https://files.pythonhosted.org/packages/d5/91/64288395ee1799bd2e0b04a305dce9666da90c961e1f3fe982a05ee1c036/numpy-2.4.6-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40fdc1ae7125e518ea98e53e69a4ebc27e1fd50510c47b7ea130cf21e5e1d42b", size = 15685197, upload-time = "2026-05-18T23:35:50.863Z" }, + { url = "https://files.pythonhosted.org/packages/f3/eb/ebffaa97dc55502df69584a8f0dcf07f69a3e0b3e2323670a2722db9aa39/numpy-2.4.6-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a2c306dea656c12c68f51f4cea133cbe78ca7435eb28c735eac1d3ebe73be6e8", size = 16638245, upload-time = "2026-05-18T23:35:54.752Z" }, + { url = "https://files.pythonhosted.org/packages/b8/0b/54f9da33128d7e350fab89c7455902eeae70349ee52bddb448dc4a576f45/numpy-2.4.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:33111801a01c12a8a1e3721f0a9232f8cfc8ae2c6b7098167e6f623c6073f402", size = 17036587, upload-time = "2026-05-18T23:35:58.355Z" }, + { url = "https://files.pythonhosted.org/packages/b6/f0/fdebc1052db1cc37c64beb22072d67cd6d1c71adca1299f53dec2b5e20d3/numpy-2.4.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae506e6902902557576a26ff33eda8695e7ecb3cb36c3b573a0765dee114ebdb", size = 18363226, upload-time = "2026-05-18T23:36:02.845Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b4/298628d98c72b57e57f7165ae6a481a1deaf6f3c28262a6e4c739c275930/numpy-2.4.6-cp314-cp314-win32.whl", hash = "sha256:aaf159caa35993cb1f56fb9b8e4610d35758e7ca005412eb1daa856a78c9c4b1", size = 6010196, upload-time = "2026-05-18T23:36:05.92Z" }, + { url = "https://files.pythonhosted.org/packages/df/ac/46de6dda46478f7942f839e094970be2d4a861e005c4b3bf07c92e291a09/numpy-2.4.6-cp314-cp314-win_amd64.whl", hash = "sha256:b507f5c4c1d508876d1819b6bf9a49d365b96320b5d4993426b33a23ca4b8261", size = 12450334, upload-time = "2026-05-18T23:36:09.107Z" }, + { url = "https://files.pythonhosted.org/packages/78/92/b8b798ac784102c0da830d2257d59358e3d3d90d1e2b3f2575dad976c5cf/numpy-2.4.6-cp314-cp314-win_arm64.whl", hash = "sha256:6f41ae150c4e32db4f3310cdaf64b1593a03dbabe29eec77fc9b50fe64061df6", size = 10495678, upload-time = "2026-05-18T23:36:12.766Z" }, + { url = "https://files.pythonhosted.org/packages/30/34/ec28d1aa8115971537c01469ab2011ee96827930f0a124de1000cc2a7ed7/numpy-2.4.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ece3d2cfe132e7d51f44a832b303895e6f2d499c5e74dfbdb06ee246147a304a", size = 14823672, upload-time = "2026-05-18T23:36:16.473Z" }, + { url = "https://files.pythonhosted.org/packages/16/bd/f6d1fede4e54e8042a7ff97bb495510f3c220f94bcd9e8b228e87c92cc0d/numpy-2.4.6-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:e3e5193ef5a3dc73bceee50f7fdc2c90dbb76c42df8d8fae3d1067a583df579e", size = 5328731, upload-time = "2026-05-18T23:36:19.767Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f0/e105b9e2fd728a9910103884decd6951d9dd73896b914a98d9a231de02ee/numpy-2.4.6-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:17f9ade344e7d9b464a084d69bcf18fc691cb1db67c62ed80820bf4926d78f0e", size = 6649805, upload-time = "2026-05-18T23:36:22.266Z" }, + { url = "https://files.pythonhosted.org/packages/82/dd/1206a7ca6ab15e3f02069707ca96222e202af681bb73756da7527f3cb837/numpy-2.4.6-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cd5ffd25db4e7ba6a375693b3fc0fc1791ec636c17db3720da19bde7180ec43", size = 15730496, upload-time = "2026-05-18T23:36:25.713Z" }, + { url = "https://files.pythonhosted.org/packages/51/e7/38d3ea825dcab85a591734decb2f6c67caa7c8367d374df1a1c3842f9b07/numpy-2.4.6-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d92c3819208a60205a12a245c91ad70cb0a85336659b19b834205573ac8456e", size = 16679616, upload-time = "2026-05-18T23:36:29.652Z" }, + { url = "https://files.pythonhosted.org/packages/93/b7/caabfdf53edf663e0b4eb74d7d405d83baef09eb5e83bcd32d601d72b93e/numpy-2.4.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e85b752a1e912b70eaad4fafbd4d1238007ab221de2009b9a2f5ae7461239895", size = 17085145, upload-time = "2026-05-18T23:36:33.449Z" }, + { url = "https://files.pythonhosted.org/packages/f9/45/68d7c33a6bcf3e5aa3bdbd57a367e6f615286dfd6482f97e8ffeb734306e/numpy-2.4.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:29cb7f67d10b479ff07c17d33e39f78c07f71c40ef30d63c153d340e96cd3fb4", size = 18403813, upload-time = "2026-05-18T23:36:37.369Z" }, + { url = "https://files.pythonhosted.org/packages/9c/50/0753655aa844c99cd9e018aacf76f130f1bd81d881bb74bc0aef5d73a8ba/numpy-2.4.6-cp314-cp314t-win32.whl", hash = "sha256:260a5d70215b61ab4fadf5c7baacd64821842975eea312125ed3c39a6391b063", size = 6156982, upload-time = "2026-05-18T23:36:40.817Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d4/7c67becf668f973cb490cec3e98dfd799d866f9c989a54d355672cfa0db6/numpy-2.4.6-cp314-cp314t-win_amd64.whl", hash = "sha256:81a1cca95ed5bb92aa8b10dd2cdc9a0d3853a50fad926c28b5d7e8ea54389627", size = 12638908, upload-time = "2026-05-18T23:36:43.996Z" }, + { url = "https://files.pythonhosted.org/packages/43/bb/e1c71a4295b1b1d1393d50dbb4f2a36283c6859d9d3892e84f00ec5a91d5/numpy-2.4.6-cp314-cp314t-win_arm64.whl", hash = "sha256:0c9136e14ed34a9e343a31c533d78a9813a69a3148332bce5e9821cb2f996e66", size = 10565867, upload-time = "2026-05-18T23:36:47.114Z" }, + { url = "https://files.pythonhosted.org/packages/de/12/b422cc84439adc0d00de605bf4a308890ae5c26f2c71fbd73e5d08fbb0dd/numpy-2.4.6-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:55cced7c52e981362f708ad635198e97a752dfba412cc03c23bbf3bd8d5cd662", size = 16847511, upload-time = "2026-05-18T23:36:50.673Z" }, + { url = "https://files.pythonhosted.org/packages/44/53/f481bef68011740f8849418d82db07230e825013f31f4eef5ba5b805316a/numpy-2.4.6-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d6da64deb6b8ed903e7560180a92f2d804ee1ba5eeb849ac2748b8c1aba1f6d7", size = 14889064, upload-time = "2026-05-18T23:36:53.879Z" }, + { url = "https://files.pythonhosted.org/packages/7f/57/42ed575c10ced8af951d426bc4e1f8aff16fd851db33f067036215a7f860/numpy-2.4.6-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:68a5124b13fa6cc2086764a20005d30bc0548146f7f5322f02fce212ca14317f", size = 5394157, upload-time = "2026-05-18T23:36:57.194Z" }, + { url = "https://files.pythonhosted.org/packages/6a/ef/f66cc724fcc36c1e364c67f51ae9146090b8b584f27d58b97fdae3edd737/numpy-2.4.6-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:948424b06129ce883307e8cff868c31396d8dc7630a59c61d70d98dbe70f222c", size = 6708728, upload-time = "2026-05-18T23:36:59.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/9c/c531f2293b91265d8b48e9b329f54fdd7ffae73cb4134ea10cca4237e9cc/numpy-2.4.6-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5dbbdb29840ca3d91ee0fece42fc29278886d908280bfec0a5846c6f901a3eb0", size = 15798374, upload-time = "2026-05-18T23:37:02.674Z" }, + { url = "https://files.pythonhosted.org/packages/1a/b0/413077f6b1153ed3cba361401c6783bbad6114804a000cc22eb71c13e190/numpy-2.4.6-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8ad03c0965fb3c692200e74d458ca28c1dbb4ce96f9a479a8aa041ad5fabca02", size = 16747286, upload-time = "2026-05-18T23:37:06.327Z" }, + { url = "https://files.pythonhosted.org/packages/15/ce/e5ec180bc41812edcd8daeb8639d205622c0e8c02259d8ab25a0201b3c2a/numpy-2.4.6-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:2803abfebfc990042cd494d8ce2d5f82e9d847af6d35ec486923aa19dbad5e73", size = 12504263, upload-time = "2026-05-18T23:37:09.715Z" }, +] + +[[package]] +name = "pandas" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "tzdata", marker = "sys_platform == 'emscripten' or sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f8/87/4341c6252d1c47b08768c3d25ac487362bf403f0313ddae4a2a26c9b1b4c/pandas-3.0.3.tar.gz", hash = "sha256:696a4a00a2a2a35d4e5deb3fc946641b96c944f02230e4f76137fe35d806c4fc", size = 4651414, upload-time = "2026-05-11T18:54:29.21Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/16/b5c76b838fd9bf6ce84d3a53346b8874ec05c5f0040d75ef2c320100cd2a/pandas-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:455f6f8139d4282188f526868dbc3c828470e88a3d9d59a891bd46a455f21b98", size = 10338495, upload-time = "2026-05-11T18:52:11.558Z" }, + { url = "https://files.pythonhosted.org/packages/5a/b0/a4ffc4ae74d2d822200dcc46898987d8eb6032d1e2b219cae39da6f5cbcc/pandas-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4e15135e2ee5df1063313e2425ceef8ac0f4ae775893815b0923651b806a5639", size = 9938250, upload-time = "2026-05-11T18:52:17.005Z" }, + { url = "https://files.pythonhosted.org/packages/2e/b2/3323601a52caee42c019e370090ca4544b241437240ca04f786cce82b0cf/pandas-3.0.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:05f1f1752b8533ea03f7f39a9c15b1a058d067bb48f4748948e7a8691e0510f2", size = 10770558, upload-time = "2026-05-11T18:52:19.865Z" }, + { url = "https://files.pythonhosted.org/packages/32/f1/bbecd2f867b97abebe0f9b53d750f862251b40337e061b36676ded3d920f/pandas-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a1e45c80cceb3b4a21bc5939d52e8cbd8d9b7305309219d59e9754d9ce09e27", size = 11274611, upload-time = "2026-05-11T18:52:22.622Z" }, + { url = "https://files.pythonhosted.org/packages/7f/4f/eafabf2d5fae5adf143b4d18d3706c5efdc368a7c4eb1ee8a3eddabbd0f6/pandas-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:14da8316da4d0c5a77618425996bfb1248ca87fc2c1486e6fde4652bd18b5824", size = 11784670, upload-time = "2026-05-11T18:52:25.4Z" }, + { url = "https://files.pythonhosted.org/packages/49/44/1eb20389301b57b19cc099a1c2f662501f72f08a65f912d05822613c1532/pandas-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a55066a0505dae0ba2b50a46637db34b46f9094c65c5d4800794ef6335010938", size = 12353708, upload-time = "2026-05-11T18:52:28.139Z" }, + { url = "https://files.pythonhosted.org/packages/eb/62/c321f13b5ba1819fc8dca456c7fce578da2dcfecff1abbf0eaddf8406c0f/pandas-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6674ab18ad8c57802867264b00e15e7bb904700cdd9046e3b2fa1fce237439ea", size = 9907609, upload-time = "2026-05-11T18:52:30.982Z" }, + { url = "https://files.pythonhosted.org/packages/53/85/1b7f563ebc6357c27233a02a96b589bcce1fa9c6eb89fb4f0e56421d277e/pandas-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:5cc09a68b3120e0f54870dede8287a7bb1fa463907e4fcec1ea77cab6179bf7a", size = 9165596, upload-time = "2026-05-11T18:52:33.334Z" }, + { url = "https://files.pythonhosted.org/packages/24/f1/392f8c5bfc16f66a0d2d41561c01627c228fe7ed2a0d056ef11315042570/pandas-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fed2ff7fd9779120e388e285fc029bd5cf9490cdd2e4166a9ee22c0e49a9ab09", size = 10357846, upload-time = "2026-05-11T18:52:36.143Z" }, + { url = "https://files.pythonhosted.org/packages/cf/3d/b16412745651e855f357e5e66930248688378853a6e2698a214e331fba1f/pandas-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b168fc218fd80a6cbdbdbc1a97ddc7889ed057d7eb45f50d866ceab5f39904c4", size = 9899550, upload-time = "2026-05-11T18:52:38.976Z" }, + { url = "https://files.pythonhosted.org/packages/31/a8/fa2535168fffcedf67f4f6de28d2dd903a747ca7c8ea6989451aaeb3a92f/pandas-3.0.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0383c72c75cdcca61a9e116e611143902dbfd08bff356829c2f6d1cf40a9ca8c", size = 10412965, upload-time = "2026-05-11T18:52:41.915Z" }, + { url = "https://files.pythonhosted.org/packages/65/b6/09b01cdbc15224e2850365192d17b7bdebb8bdbd8780ed221fcdf0d9a515/pandas-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6dc0b3fd2169c9157deed50b4d519553a3655c8c6a96027136d654592be973a9", size = 10894600, upload-time = "2026-05-11T18:52:45.02Z" }, + { url = "https://files.pythonhosted.org/packages/c9/a4/2eb28f2fccb4ced4a2c79ab2a5dee9ade1ebf44922ebad6fea158c9f95d4/pandas-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7e65d5407dc0b394f509699650e4a2ec01c0514f21850f453fa60f3be79a5dbf", size = 11422824, upload-time = "2026-05-11T18:52:48.058Z" }, + { url = "https://files.pythonhosted.org/packages/f8/45/830bb57f533a4604b355e07edcb8ea18cf88b5f94e5fca92f27052d7c597/pandas-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f8894dc474d648fe7b6ff0ca9b0bd73950d19952bc1a6534540762c5d79d305c", size = 11950889, upload-time = "2026-05-11T18:52:50.905Z" }, + { url = "https://files.pythonhosted.org/packages/b9/c5/fc1b368f303087d20e8c9bf3d6ceb186263cfac0ade735cd938538bea839/pandas-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:c7be265b62cef88e253a941e4698604973736dcfe242fdb5198f0f7bc473cdcc", size = 9755463, upload-time = "2026-05-11T18:52:53.386Z" }, + { url = "https://files.pythonhosted.org/packages/86/bd/fda8f9705b1b09c6ebe14bfc0fa0e4ec8584d54ea673628f157ff55131af/pandas-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:557409bc4178e70ee8d9ddb494798e51ebf6ea59330f6be22c51bab2a7db6c49", size = 9066158, upload-time = "2026-05-11T18:52:56.038Z" }, + { url = "https://files.pythonhosted.org/packages/c5/90/62d8302883c44308c477e222c3daf7c813a34c8e96985882fbd53d964352/pandas-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:67b3b64c11910cfa29f4e94a14d3bff9ee693b6fc76055e7cad549cee0aec5fa", size = 10331071, upload-time = "2026-05-11T18:52:58.838Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ae/6a6493c783a101f165e4356953ba3c74d6f77f0042fa7d753da9dfbb640c/pandas-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:39436b377d56d2a2e52d0395bdbee171f01068e99af5250509aceeb929f765c7", size = 9875690, upload-time = "2026-05-11T18:53:01.431Z" }, + { url = "https://files.pythonhosted.org/packages/62/7c/5df8e9f56c69a2769fbe9382a5ef8f2658c007e376434e1e2cbb57ad895f/pandas-3.0.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4be06d68f9ddcfc645b87534911da79a8fbffc7573c80e0edcf42a5020624d8", size = 10381634, upload-time = "2026-05-11T18:53:04.393Z" }, + { url = "https://files.pythonhosted.org/packages/99/68/1237369725aa617bb358263d535803e3053fdbc593513ec5ed9c9896b5b6/pandas-3.0.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4eeb6830daf35a71cc09649bd823e2b542dac246cdee9614c6e4bd65028cd6a", size = 10891243, upload-time = "2026-05-11T18:53:07.643Z" }, + { url = "https://files.pythonhosted.org/packages/25/93/77d108e8af7222b4a503ebde0e30215b1c2e4f8e53a526431890f22d5586/pandas-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1928e07221f82db493cd4af1e23c1bfca524a19a4699887975bff68f49a72bfb", size = 11388659, upload-time = "2026-05-11T18:53:10.634Z" }, + { url = "https://files.pythonhosted.org/packages/d0/bd/eff5b4399f332ac386c853f6cd2bd3fa2ca0061b9f36ecd9c4d7c4265649/pandas-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51b1fe551acb77dac643c6fda86084d8d446c10fe64b06a9cc29c4cc8540e7f2", size = 11942880, upload-time = "2026-05-11T18:53:13.536Z" }, + { url = "https://files.pythonhosted.org/packages/2c/20/559ace4200982c3887d0b86bfd0d856a2143ef8ddab63cc07934951a964c/pandas-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:a82d532a3351d435432cd913edbccaf8b8e01d4dd0e5ced5a8d2e8ecd94c7e44", size = 9757091, upload-time = "2026-05-11T18:53:16.306Z" }, + { url = "https://files.pythonhosted.org/packages/3a/66/69055a09fe200f29f922a3eeec4804611900b95f52d932ece3393c3c0c19/pandas-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:275c14e0fce14a2ec20eee474aecd305478ea3c1e6f6a9d8fe219a165542717e", size = 9057282, upload-time = "2026-05-11T18:53:18.768Z" }, + { url = "https://files.pythonhosted.org/packages/57/0e/efe801b0e6811e8e650cd21b7f2608e30f08a7067e2bf6e8752b0d56ee3c/pandas-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:46997386d528eb40376ecd6b033cf4a8a1e5282580f68f43de875b78cba2199d", size = 10767016, upload-time = "2026-05-11T18:53:21.227Z" }, + { url = "https://files.pythonhosted.org/packages/ea/dc/eb55135a1d5f0f0519f28da1f609a206d2cad1f9c35c32d51e38dd7261ae/pandas-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:261e308dfb22448384b7580cf719d2f998fe2966c92893c3e77d14008af1f066", size = 10420210, upload-time = "2026-05-11T18:53:23.982Z" }, + { url = "https://files.pythonhosted.org/packages/c6/3e/b1d5d955ce33ffecb407465a60bc32769d74fcf68224b7ae67ae11d4dea4/pandas-3.0.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd1a5d1def6a46002e964510bdc67c368aa0951df5d1d9f8365336f5a1f490cd", size = 10336126, upload-time = "2026-05-11T18:53:26.731Z" }, + { url = "https://files.pythonhosted.org/packages/f5/76/a01261711ab60a22d71b862f0de20e4c504bf80457270ad8cb42110f6abc/pandas-3.0.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d72828c20c6d6e83e1e22a6a3b47b326b71664112fa9705dcbccfd7a39b62085", size = 10728051, upload-time = "2026-05-11T18:53:29.125Z" }, + { url = "https://files.pythonhosted.org/packages/e9/21/ea191195e587b18cf682e97f433f81b2d0fbe341380e80a3e0d6e4403c8e/pandas-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d26cbe1fcfc12e8fd900e2454163e466b2d3af84f7c75481df7683ffc073d870", size = 11350796, upload-time = "2026-05-11T18:53:32.056Z" }, + { url = "https://files.pythonhosted.org/packages/64/69/f0eaaf54939f0e8c6768fd06be9af2cef9b36048b96dfb9e1b2c685a807e/pandas-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e91cec1879ada0624fc3dc9953c5cbd60208e59c0db28f540c5d6d47502422f", size = 11799741, upload-time = "2026-05-11T18:53:34.985Z" }, + { url = "https://files.pythonhosted.org/packages/45/a4/865e0e510cae5fc2194de4db28be638952de942571ba9125934fd9c01d47/pandas-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:08d789b41f87e0905880e293cedf6197ce71fe67cc081358b1e148a491b9bd13", size = 10499958, upload-time = "2026-05-11T18:53:37.857Z" }, + { url = "https://files.pythonhosted.org/packages/86/54/effdcc3c0ff7a08037889200e148ebe94c16c4f653be078c7b3675955df1/pandas-3.0.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3650109c0f22879df8bd6179ab9ee3d7f1d1d4e7e0094a3f0032d9f51e2e64ac", size = 10336065, upload-time = "2026-05-11T18:53:41.099Z" }, + { url = "https://files.pythonhosted.org/packages/68/10/bf2d6738d72748b961a3751ab89522d58c54efc36a8e1a12161216cd45cf/pandas-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:bab900348131a7db1f69a7309ef141fd5680f1487094193bcbbb61791573bf8f", size = 9926101, upload-time = "2026-05-11T18:53:43.515Z" }, + { url = "https://files.pythonhosted.org/packages/ae/e9/e35cf11c8a136e757b956f5f0efdcaa50aecde85ea055f1898dfc68262f3/pandas-3.0.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba7e08b9ac1d54569cd1e256e3668975ed624d6826f7b68df0342b012007bddb", size = 10457553, upload-time = "2026-05-11T18:53:46.394Z" }, + { url = "https://files.pythonhosted.org/packages/58/3b/1cdec6772bdbaf7b25dab360c59f03cadf05492dd724c6540af905389b07/pandas-3.0.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d71c63ae4ebdbf70209742096f1fc46a83a0613c99d4b23766cced9ff8cd62a", size = 10914065, upload-time = "2026-05-11T18:53:49.134Z" }, + { url = "https://files.pythonhosted.org/packages/c4/c2/1ef644445fcd72e3627bceec77e3560636f87ddce4ed841afe76b83b5bf9/pandas-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e3a2ec42c98ffa2565a67e08e218d06d72576d758d90facb7c00805194d8f360", size = 11459188, upload-time = "2026-05-11T18:53:52.527Z" }, + { url = "https://files.pythonhosted.org/packages/7e/49/4d8d4f42cbc9c4adc7a1870f269c02cbd6cd40d059622c06fb298addcbad/pandas-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:335f62418ed562cfc3c49e9e196375c28b729dcef8543abf4f9438e381bf3c76", size = 11982966, upload-time = "2026-05-11T18:53:55.043Z" }, + { url = "https://files.pythonhosted.org/packages/38/55/792619469bab9882d8bbd5865d45a72f6478762d04a9af4bf0d08c503e95/pandas-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:3c20a521bbb85902f79f7270c80a59e1b5452d96d170c034f207181870f97ac5", size = 9876755, upload-time = "2026-05-11T18:53:58.067Z" }, + { url = "https://files.pythonhosted.org/packages/2a/af/33c469653b0ba03b50c3a98192d4c07f0c75c66b263ceb097fce0ee97d31/pandas-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:a2d2dff8a04f3917b55ab3910c32990f8ddf7eceba114947838cefa976a68977", size = 9198658, upload-time = "2026-05-11T18:54:00.733Z" }, + { url = "https://files.pythonhosted.org/packages/a2/fa/b8c257bd76b8bd060c3a9151c1fca05e9b9c5e3af5d0f549c0356f6d143d/pandas-3.0.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:0d589105b3c14645af1738ff279b2995102d8f7a03b0a66dc8d95550eb513e04", size = 10787242, upload-time = "2026-05-11T18:54:03.564Z" }, + { url = "https://files.pythonhosted.org/packages/54/eb/f19206ffb0bf1919002969aa448b4702c6594845156a6f8050674855aac3/pandas-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:13fc1e853d9e04743d11ba75a985ccbc2a317fe07d8af61e445a6fd24dacd6a6", size = 10436369, upload-time = "2026-05-11T18:54:06.311Z" }, + { url = "https://files.pythonhosted.org/packages/fd/24/c7c39fb4fe22b71a0c2d78bf0c585c600092d85f94f086d2b3b2f6ca27e2/pandas-3.0.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:819959dab7bbd0049c15623fbac4e29a191b9528160a61fb1032242d8ced2d9c", size = 10358306, upload-time = "2026-05-11T18:54:09.085Z" }, + { url = "https://files.pythonhosted.org/packages/16/ec/dd2a9eb7fa1204df88c0864164e35b228ac581062ac612ba0a67fd812e4c/pandas-3.0.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:60ae316d3fd75d1858d450d0db0103ea2be3e7d4a95ec2f064f7e2ae63f7b028", size = 10758394, upload-time = "2026-05-11T18:54:11.956Z" }, + { url = "https://files.pythonhosted.org/packages/95/6e/00c61ea8e85b4f6d8d35e11852a1a4998fc7fafc91c6a602d1cc9c972d64/pandas-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bd3a518890b400d32f9023722dc9a9a5c969f00b415419a3c06c043f09bb5d7d", size = 11375717, upload-time = "2026-05-11T18:54:14.539Z" }, + { url = "https://files.pythonhosted.org/packages/31/89/8fc1c268969fac43688d65fd92e67df24bd128d53cb4d2eee534cd307399/pandas-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c39be2d709d01fa972a0cabc522389fceca4f3969332ba25a7d6c5802cf976a", size = 11828897, upload-time = "2026-05-11T18:54:17.146Z" }, + { url = "https://files.pythonhosted.org/packages/56/3b/e7d20dea247a3e6dc0bd8a6953854afbedc03951def4e7371e05e7263e25/pandas-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4db8c527972a821cf5286b40ccc57642a39bc62e62022b42f99f8a67fca8c3a1", size = 10900855, upload-time = "2026-05-11T18:54:19.72Z" }, + { url = "https://files.pythonhosted.org/packages/0f/54/68a0978d1ef8502b8492099beaa6e7a0c1b32e3b5d4f677f5810cb08711c/pandas-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b2c95f8bfc1ee412bf482605d7bfd30c12d1d26bd59fdd91efeef1d4718decb1", size = 9466464, upload-time = "2026-05-11T18:54:22.754Z" }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "tabulate" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/46/58/8c37dea7bbf769b20d58e7ace7e5edfe65b849442b00ffcdd56be88697c6/tabulate-0.10.0.tar.gz", hash = "sha256:e2cfde8f79420f6deeffdeda9aaec3b6bc5abce947655d17ac662b126e48a60d", size = 91754, upload-time = "2026-03-04T18:55:34.402Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3", size = 39814, upload-time = "2026-03-04T18:55:31.284Z" }, +] + +[[package]] +name = "tzdata" +version = "2026.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/19/1b9b0e29f30c6d35cb345486df41110984ea67ae69dddbc0e8a100999493/tzdata-2026.2.tar.gz", hash = "sha256:9173fde7d80d9018e02a662e168e5a2d04f87c41ea174b139fbef642eda62d10", size = 198254, upload-time = "2026-04-24T15:22:08.651Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/e4/dccd7f47c4b64213ac01ef921a1337ee6e30e8c6466046018326977efd95/tzdata-2026.2-py2.py3-none-any.whl", hash = "sha256:bbe9af844f658da81a5f95019480da3a89415801f6cc966806612cc7169bffe7", size = 349321, upload-time = "2026-04-24T15:22:05.876Z" }, +] From dab97c4cb9465142e6f54fe87d952e9ec8b9e7a6 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Sun, 31 May 2026 21:14:40 +0300 Subject: [PATCH 4/5] Optimized by adding compiler flags and swapping loop ordering --- .gitignore | 3 +- CMakeLists.txt | 2 +- README.md | 203 +++++++++++++++---------------------------------- main.cpp | 22 +++--- 4 files changed, 74 insertions(+), 156 deletions(-) diff --git a/.gitignore b/.gitignore index 335b146..cf71b5c 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,5 @@ Makefile Thumbs.db # Output files -data/*/result.raw \ No newline at end of file +data/*/result.raw +timing_results.md \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index abcfc6d..f212439 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) find_package(OpenMP REQUIRED) if(OpenMP_CXX_FOUND) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} -Ofast -march=native -funroll-loops") endif() if(APPLE) diff --git a/README.md b/README.md index 51c7f2a..ea5c136 100644 --- a/README.md +++ b/README.md @@ -6,23 +6,25 @@ ## Homework Assignment 4: Optimizing Matrix Multiplication in C++ -**Due Date**: 31/05/2026 +### Task Distribution -**Points**: 100 +| Student | Task | +|-----------------------------------|------| +|Ha Do (Student ID: 2402703) | Naive and Blocked Matrix Multiplication and implementing utility functions | +|Abhishek Roy (Student ID: 2502895) | OpenMP Matrix Multiplication and Optimizations for other matrix multiplication functions | --- ### Assignment Overview -Welcome to the last homework assignment of the Parallel Programming course! In this assignment, you will optimize the performance of a naive matrix multiplication +Optimized the performance of a naive matrix multiplication implementation using two techniques: -1. **Cache Optimization via Blocked Matrix Multiplication**: Improve data locality to reduce cache misses. -2. **Parallel Matrix Multiplication using `OpenMP`**: Parallelize the computation across multiple threads. +1. **Cache Optimization via Blocked Matrix Multiplication**: Improved data locality to reduce cache misses. +2. **Parallel Matrix Multiplication using `OpenMP`**: Parallelized the computation across multiple threads. -Your task is to implement both optimizations in the provided C++ `main.cpp` file, measure their performance, and compare the -wall clock time of the naive, cache-optimized, and parallel implementations for each test case. This assignment builds -on naive matmul implementation, so ensure your naive implementation is correct before starting. +The task was to implement both optimizations, measure their performance, and compare the +wall clock time of the naive, cache-optimized, and parallel implementations for each test case. --- @@ -41,7 +43,7 @@ The naive matrix multiplication (with triple nested loops) accesses memory in a temporal locality: - **Spatial Locality**: Accessing consecutive memory locations (e.g., elements in the same cache line). -- **Temporal Locality**: Reusing the same data multiple times while it’s still in the cache. +- **Temporal Locality**: Reusing the same data multiple times while it's still in the cache. Blocked matrix multiplication divides the matrices into smaller submatrices (blocks) that fit into the cache. By performing computations on these blocks, you ensure that data is reused while it resides in the cache, reducing cache @@ -68,9 +70,6 @@ for (ii = 0; ii < m; ii += block_size) - **Outer loops (ii, jj, kk)**: Iterate over blocks. - **Inner loops (i, j, k)**: Compute within a block, reusing data in the cache. -**Task**: Implement the `blocked_matmul` function in the provided `main.cpp`. Experiment with different block sizes (e.g., -16, 32, 64) and report the best performance. - --- #### 2. Parallel Matrix Multiplication with OpenMP @@ -83,8 +82,8 @@ be parallelized, as each element of the output matrix \( C \) can be computed in **Parallelizing with OpenMP** -Use OpenMP to parallelize the outer loop(s) of the naive matrix multiplication. For example, parallelize the loop over -rows of \( C \): +OpenMP was used to parallelize the outer loop(s) of the naive matrix multiplication. For example, the loop over +rows of \( C \) was parallelized: ```cpp #pragma omp parallel for @@ -99,139 +98,59 @@ for (i = 0; i < m; i++) without locks. - Use `omp_get_wtime()` to measure wall clock time for accurate performance comparisons. -**Task**: Implement the `parallel_matmul` function in the provided `main.cpp` using `OpenMP`. Test with different numbers of -threads (e.g., 2, 4, 8) by setting the environment variable `OMP_NUM_THREADS`. - --- #### 3. Performance Measurement For each test case (0 through 9 in the `data` folder): -- Measure the **wall clock time** for: - - Naive matrix multiplication (`naive_matmul`). - - Cache-optimized matrix multiplication (`blocked_matmul`). - - Parallel matrix multiplication (`parallel_matmul`). -- Use `omp_get_wtime()` for timing, as it provides high-resolution wall clock time. -- Report the times in a table in your submission README.md, including: - - Test case number. - - Matrix dimensions (m × n × p). - - Wall clock time for each implementation (in seconds). - - Speedup of blocked and parallel implementations over the naive implementation. - -Example table format: - -| Test Case | Dimensions (m × n × p) | Naive Time (s) | Blocked Time (s) | Parallel Time (s) | Blocked Speedup | Parallel Speedup | -|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| -| 0 | 512 × 512 × 512 | 2.345 | 0.987 | 0.543 | 2.38× | 4.32× | - ---- - -#### Matrix Storage and Memory Management - -- Row-major order for all matrices -- Use C-style arrays with manual memory management (`malloc` or `new`, `free` or `delete`). -- Do not use smart pointers. - ---- - -#### Input/Output and Validation - -- Use the same input/output format as Assignment 1: - - Input files: `data//input0.raw` (matrix \( A \)) and `input1.raw` (matrix \( B \)). - - Output file: `data//result.raw` (matrix \( C \)). - - Reference file: `data//output.raw` for validation. -- The executable accepts a case number (0–9) as a command-line argument. -- Validate correctness by comparing `result.raw` with `output.raw` for each implementation. +- Measured the **wall clock time** for: + - Naive matrix multiplication (`naive_matmul`). + - Cache-optimized matrix multiplication (`blocked_matmul`) with block size 32. + - Parallel matrix multiplication (`parallel_matmul`) with OMP_NUM_THREADS = 8. +- Used `omp_get_wtime()` for high-resolution wall clock timings. + +#### 4. Results + +The results in the table below come from the basic implementation of the aforementioned matrix multiplications. + +| Test Case | Dimensions (m x n x p) | Naive Time (s) | Blocked Time (s) | Parallel Time (s) | Blocked Speedup | Parallel Speedup | +|------------:|:-------------------------|-----------------:|-------------------:|--------------------:|:------------------|:-------------------| +| 0 | 64x64x64 | 0.00101837 | 0.000795773 | 0.00089702 | 1.27973x | 1.13528x | +| 1 | 128x64x128 | 0.00414562 | 0.00348123 | 0.00195844 | 1.19085x | 2.11679x | +| 2 | 100x128x56 | 0.0022999 | 0.00236687 | 0.00106175 | 0.971706x | 2.16614x | +| 3 | 128x64x128 | 0.00355272 | 0.00337588 | 0.00159078 | 1.05238x | 2.23332x | +| 4 | 32x128x32 | 0.00070751 | 0.000524026 | 0.000757544 | 1.35014x | 0.933952x | +| 5 | 200x100x256 | 0.0176089 | 0.016518 | 0.00467121 | 1.06605x | 3.76967x | +| 6 | 256x256x256 | 0.0536581 | 0.0567246 | 0.0120156 | 0.945941x | 4.4657x | +| 7 | 256x300x256 | 0.0632706 | 0.0663129 | 0.0131018 | 0.954122x | 4.82916x | +| 8 | 64x128x64 | 0.00175151 | 0.00203758 | 0.00326974 | 0.859606x | 0.535674x | +| 9 | 256x256x257 | 0.0567747 | 0.0572363 | 0.0125215 | 0.991936x | 4.53418x | + +As can be seen, the blocked matrix multiplication speedup was slightly above 1x or less in most cases. However, the parallel implementation achieved speedups of around 2x to 4.5x in most cases. + +The table below shows results after the following optimizations: + +- Switched blocked and parallel loops to be `i -> k -> j` + - The initial `i -> j -> k` order accesses `B[k * p + j]` with `k` as the innermost variable, stepping through B column-wise with stride `p`. This causes a cache miss on every iteration. Swapping to `i -> k -> j` makes `j` the innermost variable, so `B[k * p + j]` is accessed sequentially (stride 1), keeping all three matrices in cache-friendly access patterns. +- Added compiler flags: + - `-Ofast`: Enables all `-O3` optimizations with some additional flags. One of which is `-ffast-math`. This allows the compiler to reorder floating point operations, use fused multiply-add (FMA) instructions, and vectorize reduction loops more aggressively. This is the flag most responsible for the blocked speedup improvement. + - `-march=native`: Generates code using the full SIMD instruction set of the host CPU (e.g. AVX2, AVX-512). Without this, the compiler falls back to a generic baseline (SSE2), missing wide vector registers that process 8 floats at a time. + - `-funroll-loops`: Unrolls loop bodies to reduce loop control overhead and expose more instruction-level parallelism for the CPU's out-of-order execution units. + +| Test Case | Dimensions (m x n x p) | Naive Time (s) | Blocked Time (s) | Parallel Time (s) | Blocked Speedup | Parallel Speedup | +|------------:|:-------------------------|-----------------:|-------------------:|--------------------:|:------------------|:-------------------| +| 0 | 64x64x64 | 0.000206294 | 0.000124644 | 0.000974188 | 1.65507x | 0.21176x | +| 1 | 128x64x128 | 0.000902154 | 0.000501695 | 0.000846172 | 1.79821x | 1.06616x | +| 2 | 100x128x56 | 0.000479879 | 0.000572318 | 0.000855709 | 0.838483x | 0.560797x | +| 3 | 128x64x128 | 0.00101505 | 0.00050891 | 0.000930236 | 1.99456x | 1.09117x | +| 4 | 32x128x32 | 8.6234e-05 | 5.2299e-05 | 0.000552463 | 1.64887x | 0.15609x | +| 5 | 200x100x256 | 0.00479585 | 0.00224633 | 0.00191679 | 2.13497x | 2.50202x | +| 6 | 256x256x256 | 0.0186269 | 0.00742483 | 0.00312077 | 2.50874x | 5.9687x | +| 7 | 256x300x256 | 0.0223733 | 0.0108002 | 0.00306352 | 2.07156x | 7.30315x | +| 8 | 64x128x64 | 0.000383877 | 0.000223077 | 0.000714149 | 1.72083x | 0.537531x | +| 9 | 256x256x257 | 0.0118203 | 0.00739098 | 0.00332793 | 1.59929x | 3.55186x | + +With this, the blocked implementation speedup improved to around 1.5x to 2.5x. However, the parallel speedup dropped as most small cases are at 1x or below. For larger cases (5, 6, 7, 9), it still managed to achieve 2.5x to 7.3x speedup. The compiler flags optimized the single-threaded naive baseline significantly, which reduced the relative parallel speedup. For small matrices, thread spawn contributed to the overhead, while larger matrices had enough work for the threads to contribute to the speedup. --- - -### Build Instructions - -- Use the provided `CMakeLists.txt` to build the project. -- **Additional Requirements**: - - Ensure OpenMP is enabled in your compiler (e.g., `-fopenmp` for GCC). - - The provided CMake file includes OpenMP support. -- **Windows Users**: - - Use CLion or Visual Studio with CMake. - - Alternatively, use MinGW with `cmake -G "MinGW Makefiles"` and `make`. -- **Linux/Mac Users**: - - Make sure the GCC compiler is installed (`brew install gcc` on Mac). - - Configure CMake to use the correct compiler: - ```bash - cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ . - ``` - - Run `cmake .` to generate a Makefile, then `make`. -- **Testing OpenMP**: - - Set the number of threads using the environment variable `OMP_NUM_THREADS` (e.g., `export OMP_NUM_THREADS=4` on - Linux/Mac, or `set OMP_NUM_THREADS=4` on Windows). - - Test with different thread counts to find the best performance. - ---- - -### Submission Requirements - -#### Fork and Clone the Repository - -- Fork the Assignment 4 repository (provided separately). -- Clone your fork: - ```bash - git clone https://github.com/AA-parallel-computing/Assignment-4-Optional.git - cd Assignment-4-Optional - ``` - -#### Create a New Branch - -```bash -git checkout -b student-name -``` - -#### Implement Your Solution - -- Modify the provided `main.cpp` to implement `blocked_matmul` and `parallel_matmul`. -- Update `README.md` with your performance results table. - -#### Commit and Push - -```bash -git add . -git commit -m "student-name: Implemented optimized matrix multiplication" -git push origin student-name -``` - -#### Submit a Pull Request (PR) - -- Create a pull request from your branch to the base repository’s `main` branch. -- Include a description of your optimizations and any challenges faced. - ---- - -### Grading (100 Points Total) - -| Subtask | Points | -|---------------------------------------------|--------| -| Correct implementation of `blocked_matmul` | 30 | -| Correct implementation of `parallel_matmul` | 30 | -| Accurate performance measurements | 20 | -| Performance results table in README.md | 10 | -| Code clarity, commenting, and organization | 10 | -| **Total** | 100 | - ---- - -### Tips for Success - -- **Cache Optimization**: - - Experiment with different block sizes. Start with powers of 2 (e.g., 16, 32, 64). - - Use a block size that balances cache usage without excessive overhead. -- **OpenMP**: - - Test with different thread counts to find the optimal number for your system. - - Be cautious of false sharing (when threads access nearby memory locations, causing cache coherence issues). -- **Performance Measurement**: - - Run multiple iterations for each test case and report the average time to reduce variability. - - Ensure no other heavy processes are running during measurements. -- **Debugging**: - - Validate each implementation against `output.raw` to ensure correctness before optimizing. - - Use small test cases to debug your blocked and parallel implementations. - -Good luck, and enjoy optimizing your matrix multiplication! diff --git a/main.cpp b/main.cpp index 93d6224..d86b584 100644 --- a/main.cpp +++ b/main.cpp @@ -23,19 +23,17 @@ void blocked_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32 } for (uint32_t ii = 0; ii < m; ii += block_size) { - for (uint32_t jj = 0; jj < p; jj += block_size) { - for (uint32_t kk = 0; kk < n; kk += block_size) { + for (uint32_t kk = 0; kk < n; kk += block_size) { + for (uint32_t jj = 0; jj < p; jj += block_size) { uint32_t i_end = std::min(ii + block_size, m); uint32_t j_end = std::min(jj + block_size, p); uint32_t k_end = std::min(kk + block_size, n); for (uint32_t i = ii; i < i_end; i++) { - for (uint32_t j = jj; j < j_end; j++) { - float sum = C[i * p + j]; - for (uint32_t k = kk; k < k_end; k++) { - sum += A[i * n + k] * B[k * p + j]; + for (uint32_t k = kk; k < k_end; k++) { + for (uint32_t j = jj; j < j_end; j++) { + C[i * p + j] += A[i * n + k] * B[k * p + j]; } - C[i * p + j] = sum; } } } @@ -49,9 +47,9 @@ void parallel_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint3 #pragma omp parallel for schedule(static) for (uint32_t i = 0; i < m; i++) { - for (uint32_t j = 0; j < p; j++) { - C[i * p + j] = 0; - for (uint32_t k = 0; k < n; k++) { + for (uint32_t k = 0; k < n; k++) { + for (uint32_t j = 0; j < p; j++) { + if (k == 0) C[i * p + j] = 0; C[i * p + j] += A[i * n + k] * B[k * p + j]; } } @@ -102,7 +100,7 @@ void write_result(const std::string file_path, float *matrix, uint32_t &row, uin std::ofstream of(file_path); of << row << " " << col << "\n"; for (uint32_t i = 0; i < row * col; i++) { - of << matrix[i]; + of << std::round(matrix[i] * 100) / 100; if ((i + 1) % col == 0) { of << "\n"; } else { @@ -159,7 +157,7 @@ int main(int argc, char *argv[]) { // Measure performance of blocked_matmul (use block_size = 32 as default) start_time = omp_get_wtime(); - blocked_matmul(C_blocked, A, B, m, n, p, 8); + blocked_matmul(C_blocked, A, B, m, n, p, 32); double blocked_time = omp_get_wtime() - start_time; // Write blocked result to file From c64045e0f7a7a2af4d2ad5103fa9370e830a042a Mon Sep 17 00:00:00 2001 From: Abhishek Date: Sun, 31 May 2026 21:30:50 +0300 Subject: [PATCH 5/5] Add cpu specs to README.md --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index ea5c136..f8c5944 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,13 @@ For each test case (0 through 9 in the `data` folder): #### 4. Results +**CPU Specs:** + +- CPU: AMD Ryzen 7 8845HS +- Architecture: x86-64 +- Cores: 8 +- Threads: 16 + The results in the table below come from the basic implementation of the aforementioned matrix multiplications. | Test Case | Dimensions (m x n x p) | Naive Time (s) | Blocked Time (s) | Parallel Time (s) | Blocked Speedup | Parallel Speedup |