diff --git a/.github/workflows/makefile.yml b/.github/workflows/makefile.yml
new file mode 100644
index 0000000..65127d1
--- /dev/null
+++ b/.github/workflows/makefile.yml
@@ -0,0 +1,28 @@
+name: Makefile CI
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@main
+
+    - name : Set up build environment
+      run: sudo apt install -y libssl-dev libxxhash-dev
+      working-directory: build
+
+    - name: Check base build
+      run: make all
+      working-directory: build
+
+    - name: Check SIMD build
+      run: make simd_all
+      working-directory: build
+
diff --git a/.gitignore b/.gitignore
index 200344d..da1d92a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,4 +35,8 @@
 .vscode/*
 
 # MacOS files
-**/.DS_Store
\ No newline at end of file
+**/.DS_Store
+
+results.txt
+results_*.txt
+results_graph.png
\ No newline at end of file
diff --git a/build/Makefile b/build/Makefile
index 7659ec5..7dd71fc 100644
--- a/build/Makefile
+++ b/build/Makefile
@@ -1,4 +1,5 @@
-export ${EXTRA_COMPILER_FLAGS}
+# Note: The default build target 'all' does not use SIMD acceleration
+
 DEDUP_BUILD_PATH = ../dedup/build
 MEASURE_DEDUP_BUILD_PATH = ../measure-dedup/build
 MEASURE_LOW_ENTROPY_BUILD_PATH = ../measure-low-entropy/build
@@ -25,8 +26,41 @@ all:
 	cp $(SUPPORTING_TOOLS_PATH)/archive_extract.sh .
 	cp $(SUPPORTING_TOOLS_PATH)/archive_ctl_path.cfg .
 
+# To enable acceleration, pass the appropriate flags as EXTRA_COMPILER_FLAGS.
+# For SSE-128, pass '-msse -msse2 -msse3 -msse4.1' as EXTRA_COMPILER_FLAGS
+# For AVX-256, pass '-mavx -mavx2' as EXTRA_COMPILER_FLAGS
+# For BMI2, pass '-mbmi -mbmi2' as EXTRA_COMPILER_FLAGS (used only in VSEQ)
+# For AVX-512, pass '-mavx512f -mavx512vl -mavx512bw' as EXTRA_COMPILER_FLAGS
+
+
+.PHONY: sse128
+sse128:
+	$(MAKE) EXTRA_COMPILER_FLAGS="-msse -msse2 -msse3 -msse4.1" all
+	
+.PHONY: avx256
+avx256:
+	$(MAKE) EXTRA_COMPILER_FLAGS="-mavx -mavx2" all
+
+.PHONY: avx512
+avx512:
+	$(MAKE) EXTRA_COMPILER_FLAGS="-mavx512f -mavx512vl -mavx512bw" all
+
+.PHONY: arm_neon128
+arm_neon128:
+	$(MAKE) all
 
+.PHONY: ibm_altivec128
+ibm_altivec128:
+	$(MAKE) EXTRA_COMPILER_FLAGS="-maltivec=le -mpowerpc64le" all
 
+.PHONY: simd_all
+simd_all:
+	$(MAKE) EXTRA_COMPILER_FLAGS="-msse -msse2 -msse3 -msse4.1 -mavx -mavx2 -mbmi -mbmi2" all
+	
+.PHONY: simd512_all
+simd512_all:
+	$(MAKE) EXTRA_COMPILER_FLAGS="-msse -msse2 -msse3 -msse4.1 -mavx -mavx2 -mbmi -mbmi2 -mavx512f -mavx512vl -mavx512bw" all
+	
 .PHONY: clean
 clean:
 	cd $(DEDUP_BUILD_PATH) && make $@
diff --git a/build/config.txt b/build/config.txt
index 6053a09..e34d58d 100644
--- a/build/config.txt
+++ b/build/config.txt
@@ -35,6 +35,10 @@ ae_avg_block_size=8448
 ram_max_block_size=32768
 ram_avg_block_size=8448
 
+# MAXP Parameters
+maxp_window_size = 960
+maxp_max_block_size = 32768
+
 # TTTD Parameters
 tttd_min_block_size=4096
 tttd_avg_block_size=8192
diff --git a/build/config_simd512_8kb/aemax_8kb.conf b/build/config_simd512_8kb/aemax_8kb.conf
new file mode 100644
index 0000000..b62c77a
--- /dev/null
+++ b/build/config_simd512_8kb/aemax_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/aemax_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=max
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd512_8kb/aemin_8kb.conf b/build/config_simd512_8kb/aemin_8kb.conf
new file mode 100644
index 0000000..10862ed
--- /dev/null
+++ b/build/config_simd512_8kb/aemin_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/aemin_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=min
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd512_8kb/crc32_8kb.conf b/build/config_simd512_8kb/crc32_8kb.conf
new file mode 100644
index 0000000..fcbbec3
--- /dev/null
+++ b/build/config_simd512_8kb/crc32_8kb.conf
@@ -0,0 +1,15 @@
+# General Parameters
+chunking_algo = crc
+hashing_algo = xxhash128
+output_file = ./hashes_simd512_8kb/crc32_8kb.out
+buffer_size = 32768
+simd_mode=none
+
+# 8k Avg Chunk Size
+crc_hash_bits=13
+crc_window_size=256
+crc_window_step_size=1
+
+crc_min_block_size=1024
+crc_avg_block_size=8192
+crc_max_block_size=32768
diff --git a/build/config_simd512_8kb/fastcdc_8kb.conf b/build/config_simd512_8kb/fastcdc_8kb.conf
new file mode 100644
index 0000000..9edce20
--- /dev/null
+++ b/build/config_simd512_8kb/fastcdc_8kb.conf
@@ -0,0 +1,14 @@
+# General Parameters
+chunking_algo=fastcdc
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/fastcdc_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# FastCDC Parameters
+fastcdc_min_block_size=2048
+fastcdc_avg_block_size=8192
+fastcdc_max_block_size=32768
+fastcdc_normalization_level=2
+fastcdc_disable_normalization=false
\ No newline at end of file
diff --git a/build/config_simd512_8kb/gear_8kb.conf b/build/config_simd512_8kb/gear_8kb.conf
new file mode 100644
index 0000000..c1e4f0c
--- /dev/null
+++ b/build/config_simd512_8kb/gear_8kb.conf
@@ -0,0 +1,12 @@
+# General Parameters
+chunking_algo=gear
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/gear_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# Gear Chunking parameters
+gear_min_block_size=2048
+gear_avg_block_size=8192
+gear_max_block_size=32768
diff --git a/build/config_simd512_8kb/maxp_8kb.conf b/build/config_simd512_8kb/maxp_8kb.conf
new file mode 100644
index 0000000..b651540
--- /dev/null
+++ b/build/config_simd512_8kb/maxp_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=maxp
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/maxp_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# MAXP Parameters
+maxp_window_size = 960
+maxp_max_block_size = 32768
\ No newline at end of file
diff --git a/build/config_simd512_8kb/rabins_8kb.conf b/build/config_simd512_8kb/rabins_8kb.conf
new file mode 100644
index 0000000..bb9f2b4
--- /dev/null
+++ b/build/config_simd512_8kb/rabins_8kb.conf
@@ -0,0 +1,13 @@
+# General Parameters
+chunking_algo=rabins
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/rabins_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# Rabin Chunking Parameters
+rabinc_min_block_size=2048
+rabinc_avg_block_size=8192
+rabinc_max_block_size=32768
+rabinc_window_size=48
diff --git a/build/config_simd512_8kb/ram_8kb.conf b/build/config_simd512_8kb/ram_8kb.conf
new file mode 100644
index 0000000..26b2aa5
--- /dev/null
+++ b/build/config_simd512_8kb/ram_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ram
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/ram_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# RAM Parameters
+ram_max_block_size=32768
+ram_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd512_8kb/tttd_8kb.conf b/build/config_simd512_8kb/tttd_8kb.conf
new file mode 100644
index 0000000..65c22ac
--- /dev/null
+++ b/build/config_simd512_8kb/tttd_8kb.conf
@@ -0,0 +1,18 @@
+# General Parameters
+chunking_algo=tttd
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/tttd_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# TTTD Parameters
+tttd_min_block_size=2048
+tttd_avg_block_size=8192
+tttd_max_block_size=32768
+
+# Rabin Chunking Parameters
+rabinc_min_block_size=2048
+rabinc_avg_block_size=8192
+rabinc_max_block_size=32768
+rabinc_window_size=48
diff --git a/build/config_simd512_8kb/vaemax512_8kb.conf b/build/config_simd512_8kb/vaemax512_8kb.conf
new file mode 100644
index 0000000..956fef8
--- /dev/null
+++ b/build/config_simd512_8kb/vaemax512_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/vaemax512_8kb.out
+simd_mode=avx512
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=max
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd512_8kb/vaemin512_8kb.conf b/build/config_simd512_8kb/vaemin512_8kb.conf
new file mode 100644
index 0000000..49254e2
--- /dev/null
+++ b/build/config_simd512_8kb/vaemin512_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/vaemin512_8kb.out
+simd_mode=avx512
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=min
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd512_8kb/vmaxp512_8kb.conf b/build/config_simd512_8kb/vmaxp512_8kb.conf
new file mode 100644
index 0000000..cc27192
--- /dev/null
+++ b/build/config_simd512_8kb/vmaxp512_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=maxp
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/vmaxp512_8kb.out
+simd_mode=avx512
+
+buffer_size=32768
+
+# MAXP Parameters
+maxp_window_size = 960
+maxp_max_block_size = 32768
\ No newline at end of file
diff --git a/build/config_simd512_8kb/vram512_8kb.conf b/build/config_simd512_8kb/vram512_8kb.conf
new file mode 100644
index 0000000..b60fc47
--- /dev/null
+++ b/build/config_simd512_8kb/vram512_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ram
+hashing_algo=xxhash128
+output_file=./hashes_simd512_8kb/vram512_8kb.out
+simd_mode=avx512
+
+buffer_size=32768
+
+# RAM Parameters
+ram_max_block_size=32768
+ram_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_8kb copy/aemax_8kb.conf b/build/config_simd_8kb copy/aemax_8kb.conf
new file mode 100644
index 0000000..d62bff7
--- /dev/null
+++ b/build/config_simd_8kb copy/aemax_8kb.conf	
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/aemax_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=max
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_8kb copy/aemin_8kb.conf b/build/config_simd_8kb copy/aemin_8kb.conf
new file mode 100644
index 0000000..f19eeda
--- /dev/null
+++ b/build/config_simd_8kb copy/aemin_8kb.conf	
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/aemin_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=min
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_8kb copy/crc32_8kb.conf b/build/config_simd_8kb copy/crc32_8kb.conf
new file mode 100644
index 0000000..6eaf0b1
--- /dev/null
+++ b/build/config_simd_8kb copy/crc32_8kb.conf	
@@ -0,0 +1,15 @@
+# General Parameters
+chunking_algo = crc
+hashing_algo = xxhash128
+output_file = ./hashes_simd_8kb/crc32_8kb.out
+buffer_size = 32768
+simd_mode=none
+
+# 8k Avg Chunk Size
+crc_hash_bits=13
+crc_window_size=256
+crc_window_step_size=1
+
+crc_min_block_size=1024
+crc_avg_block_size=8192
+crc_max_block_size=32768
diff --git a/build/config_simd_8kb copy/fastcdc_8kb.conf b/build/config_simd_8kb copy/fastcdc_8kb.conf
new file mode 100644
index 0000000..5e2e6b4
--- /dev/null
+++ b/build/config_simd_8kb copy/fastcdc_8kb.conf	
@@ -0,0 +1,14 @@
+# General Parameters
+chunking_algo=fastcdc
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/fastcdc_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# FastCDC Parameters
+fastcdc_min_block_size=2048
+fastcdc_avg_block_size=8192
+fastcdc_max_block_size=32768
+fastcdc_normalization_level=2
+fastcdc_disable_normalization=false
\ No newline at end of file
diff --git a/build/config_simd_8kb copy/gear_8kb.conf b/build/config_simd_8kb copy/gear_8kb.conf
new file mode 100644
index 0000000..306e70a
--- /dev/null
+++ b/build/config_simd_8kb copy/gear_8kb.conf	
@@ -0,0 +1,12 @@
+# General Parameters
+chunking_algo=gear
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/gear_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# Gear Chunking parameters
+gear_min_block_size=2048
+gear_avg_block_size=8192
+gear_max_block_size=32768
diff --git a/build/config_simd_8kb copy/maxp_8kb.conf b/build/config_simd_8kb copy/maxp_8kb.conf
new file mode 100644
index 0000000..65e48e7
--- /dev/null
+++ b/build/config_simd_8kb copy/maxp_8kb.conf	
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=maxp
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/maxp_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# MAXP Parameters
+maxp_window_size = 960
+maxp_max_block_size = 32768
\ No newline at end of file
diff --git a/build/config_simd_8kb copy/rabins_8kb.conf b/build/config_simd_8kb copy/rabins_8kb.conf
new file mode 100644
index 0000000..b5b2919
--- /dev/null
+++ b/build/config_simd_8kb copy/rabins_8kb.conf	
@@ -0,0 +1,13 @@
+# General Parameters
+chunking_algo=rabins
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/rabins_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# Rabin Chunking Parameters
+rabinc_min_block_size=2048
+rabinc_avg_block_size=8192
+rabinc_max_block_size=32768
+rabinc_window_size=48
diff --git a/build/config_simd_8kb copy/ram_8kb.conf b/build/config_simd_8kb copy/ram_8kb.conf
new file mode 100644
index 0000000..1ec14ef
--- /dev/null
+++ b/build/config_simd_8kb copy/ram_8kb.conf	
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ram
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/ram_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# RAM Parameters
+ram_max_block_size=32768
+ram_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_8kb copy/tttd_8kb.conf b/build/config_simd_8kb copy/tttd_8kb.conf
new file mode 100644
index 0000000..aa168d0
--- /dev/null
+++ b/build/config_simd_8kb copy/tttd_8kb.conf	
@@ -0,0 +1,18 @@
+# General Parameters
+chunking_algo=tttd
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/tttd_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# TTTD Parameters
+tttd_min_block_size=2048
+tttd_avg_block_size=8192
+tttd_max_block_size=32768
+
+# Rabin Chunking Parameters
+rabinc_min_block_size=2048
+rabinc_avg_block_size=8192
+rabinc_max_block_size=32768
+rabinc_window_size=48
diff --git a/build/config_simd_8kb copy/vaemax256_8kb.conf b/build/config_simd_8kb copy/vaemax256_8kb.conf
new file mode 100644
index 0000000..1f02697
--- /dev/null
+++ b/build/config_simd_8kb copy/vaemax256_8kb.conf	
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/vaemax256_8kb.out
+simd_mode=avx256
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=max
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_8kb copy/vaemin256_8kb.conf b/build/config_simd_8kb copy/vaemin256_8kb.conf
new file mode 100644
index 0000000..985dc71
--- /dev/null
+++ b/build/config_simd_8kb copy/vaemin256_8kb.conf	
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/vaemin256_8kb.out
+simd_mode=avx256
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=min
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_8kb copy/vmaxp256_8kb.conf b/build/config_simd_8kb copy/vmaxp256_8kb.conf
new file mode 100644
index 0000000..0fe58b9
--- /dev/null
+++ b/build/config_simd_8kb copy/vmaxp256_8kb.conf	
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=maxp
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/vmaxp256_8kb.out
+simd_mode=avx256
+
+buffer_size=32768
+
+# MAXP Parameters
+maxp_window_size = 960
+maxp_max_block_size = 32768
\ No newline at end of file
diff --git a/build/config_simd_8kb copy/vram256_8kb.conf b/build/config_simd_8kb copy/vram256_8kb.conf
new file mode 100644
index 0000000..10e0484
--- /dev/null
+++ b/build/config_simd_8kb copy/vram256_8kb.conf	
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ram
+hashing_algo=xxhash128
+output_file=./hashes_simd_8kb/vram256_8kb.out
+simd_mode=avx256
+
+buffer_size=32768
+
+# RAM Parameters
+ram_max_block_size=32768
+ram_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/aemax_8kb.conf b/build/config_simd_compare_8kb/aemax_8kb.conf
new file mode 100644
index 0000000..c556b88
--- /dev/null
+++ b/build/config_simd_compare_8kb/aemax_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/aemax_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=max
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/aemin_8kb.conf b/build/config_simd_compare_8kb/aemin_8kb.conf
new file mode 100644
index 0000000..b18d789
--- /dev/null
+++ b/build/config_simd_compare_8kb/aemin_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/aemin_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=min
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/maxp_8kb.conf b/build/config_simd_compare_8kb/maxp_8kb.conf
new file mode 100644
index 0000000..7603a4a
--- /dev/null
+++ b/build/config_simd_compare_8kb/maxp_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=maxp
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/maxp_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# MAXP Parameters
+maxp_window_size = 960
+maxp_max_block_size = 32768
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/ram_8kb.conf b/build/config_simd_compare_8kb/ram_8kb.conf
new file mode 100644
index 0000000..d0f978f
--- /dev/null
+++ b/build/config_simd_compare_8kb/ram_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ram
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/ram_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# RAM Parameters
+ram_max_block_size=32768
+ram_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vaemax128_8kb.conf b/build/config_simd_compare_8kb/vaemax128_8kb.conf
new file mode 100644
index 0000000..6ae1bad
--- /dev/null
+++ b/build/config_simd_compare_8kb/vaemax128_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vaemax128_8kb.out
+simd_mode=sse128
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=max
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vaemax256_8kb.conf b/build/config_simd_compare_8kb/vaemax256_8kb.conf
new file mode 100644
index 0000000..db54b5a
--- /dev/null
+++ b/build/config_simd_compare_8kb/vaemax256_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vaemax256_8kb.out
+simd_mode=avx256
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=max
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vaemax512_8kb.conf b/build/config_simd_compare_8kb/vaemax512_8kb.conf
new file mode 100644
index 0000000..b31813d
--- /dev/null
+++ b/build/config_simd_compare_8kb/vaemax512_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vaemax512_8kb.out
+simd_mode=avx512
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=max
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vaemin128_8kb.conf b/build/config_simd_compare_8kb/vaemin128_8kb.conf
new file mode 100644
index 0000000..ffde8ba
--- /dev/null
+++ b/build/config_simd_compare_8kb/vaemin128_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vaemin128_8kb.out
+simd_mode=sse128
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=min
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vaemin256_8kb.conf b/build/config_simd_compare_8kb/vaemin256_8kb.conf
new file mode 100644
index 0000000..92834eb
--- /dev/null
+++ b/build/config_simd_compare_8kb/vaemin256_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vaemin256_8kb.out
+simd_mode=avx256
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=min
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vaemin512_8kb.conf b/build/config_simd_compare_8kb/vaemin512_8kb.conf
new file mode 100644
index 0000000..7c50faf
--- /dev/null
+++ b/build/config_simd_compare_8kb/vaemin512_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vaemin512_8kb.out
+simd_mode=avx512
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=min
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vmaxp128_8kb.conf b/build/config_simd_compare_8kb/vmaxp128_8kb.conf
new file mode 100644
index 0000000..110befa
--- /dev/null
+++ b/build/config_simd_compare_8kb/vmaxp128_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=maxp
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vmaxp128_8kb.out
+simd_mode=sse128
+
+buffer_size=32768
+
+# MAXP Parameters
+maxp_window_size = 960
+maxp_max_block_size = 32768
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vmaxp256_8kb.conf b/build/config_simd_compare_8kb/vmaxp256_8kb.conf
new file mode 100644
index 0000000..0e55d71
--- /dev/null
+++ b/build/config_simd_compare_8kb/vmaxp256_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=maxp
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vmaxp256_8kb.out
+simd_mode=avx256
+
+buffer_size=32768
+
+# MAXP Parameters
+maxp_window_size = 960
+maxp_max_block_size = 32768
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vmaxp512_8kb.conf b/build/config_simd_compare_8kb/vmaxp512_8kb.conf
new file mode 100644
index 0000000..e6f45c6
--- /dev/null
+++ b/build/config_simd_compare_8kb/vmaxp512_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=maxp
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vmaxp512_8kb.out
+simd_mode=avx512
+
+buffer_size=32768
+
+# MAXP Parameters
+maxp_window_size = 960
+maxp_max_block_size = 32768
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vram128_8kb.conf b/build/config_simd_compare_8kb/vram128_8kb.conf
new file mode 100644
index 0000000..b7fcb85
--- /dev/null
+++ b/build/config_simd_compare_8kb/vram128_8kb.conf
@@ -0,0 +1,12 @@
+# General Parameters
+chunking_algo=ram
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vram128_8kb.out
+simd_mode=sse128
+
+buffer_size=32768
+
+# RAM Parameters
+ram_max_block_size=32768
+ram_avg_block_size=8448
+
diff --git a/build/config_simd_compare_8kb/vram256_8kb.conf b/build/config_simd_compare_8kb/vram256_8kb.conf
new file mode 100644
index 0000000..3c0913c
--- /dev/null
+++ b/build/config_simd_compare_8kb/vram256_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ram
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vram256_8kb.out
+simd_mode=avx256
+
+buffer_size=32768
+
+# RAM Parameters
+ram_max_block_size=32768
+ram_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_simd_compare_8kb/vram512_8kb.conf b/build/config_simd_compare_8kb/vram512_8kb.conf
new file mode 100644
index 0000000..c81790f
--- /dev/null
+++ b/build/config_simd_compare_8kb/vram512_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ram
+hashing_algo=xxhash128
+output_file=./hashes_simd_compare_8kb/vram512_8kb.out
+simd_mode=avx512
+
+buffer_size=32768
+
+# RAM Parameters
+ram_max_block_size=32768
+ram_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_unaccelerated_8kb/aemax_8kb.conf b/build/config_unaccelerated_8kb/aemax_8kb.conf
new file mode 100644
index 0000000..5f86e5a
--- /dev/null
+++ b/build/config_unaccelerated_8kb/aemax_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_unaccelerated_8kb/aemax_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=max
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_unaccelerated_8kb/aemin_8kb.conf b/build/config_unaccelerated_8kb/aemin_8kb.conf
new file mode 100644
index 0000000..9d95668
--- /dev/null
+++ b/build/config_unaccelerated_8kb/aemin_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ae
+hashing_algo=xxhash128
+output_file=./hashes_unaccelerated_8kb/aemin_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# AE Parameters
+ae_extreme_mode=min
+ae_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_unaccelerated_8kb/crc32_8kb.conf b/build/config_unaccelerated_8kb/crc32_8kb.conf
new file mode 100644
index 0000000..7ba397c
--- /dev/null
+++ b/build/config_unaccelerated_8kb/crc32_8kb.conf
@@ -0,0 +1,15 @@
+# General Parameters
+chunking_algo = crc
+hashing_algo = xxhash128
+output_file = ./hashes_unaccelerated_8kb/crc32_8kb.out
+buffer_size = 32768
+simd_mode=none
+
+# 8k Avg Chunk Size
+crc_hash_bits=13
+crc_window_size=256
+crc_window_step_size=1
+
+crc_min_block_size=1024
+crc_avg_block_size=8192
+crc_max_block_size=32768
diff --git a/build/config_unaccelerated_8kb/fastcdc_8kb.conf b/build/config_unaccelerated_8kb/fastcdc_8kb.conf
new file mode 100644
index 0000000..ad6c3ff
--- /dev/null
+++ b/build/config_unaccelerated_8kb/fastcdc_8kb.conf
@@ -0,0 +1,14 @@
+# General Parameters
+chunking_algo=fastcdc
+hashing_algo=xxhash128
+output_file=./hashes_unaccelerated_8kb/fastcdc_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# FastCDC Parameters
+fastcdc_min_block_size=2048
+fastcdc_avg_block_size=8192
+fastcdc_max_block_size=32768
+fastcdc_normalization_level=2
+fastcdc_disable_normalization=false
\ No newline at end of file
diff --git a/build/config_unaccelerated_8kb/gear_8kb.conf b/build/config_unaccelerated_8kb/gear_8kb.conf
new file mode 100644
index 0000000..1a375b2
--- /dev/null
+++ b/build/config_unaccelerated_8kb/gear_8kb.conf
@@ -0,0 +1,12 @@
+# General Parameters
+chunking_algo=gear
+hashing_algo=xxhash128
+output_file=./hashes_unaccelerated_8kb/gear_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# Gear Chunking parameters
+gear_min_block_size=2048
+gear_avg_block_size=8192
+gear_max_block_size=32768
diff --git a/build/config_unaccelerated_8kb/maxp_8kb.conf b/build/config_unaccelerated_8kb/maxp_8kb.conf
new file mode 100644
index 0000000..1b01b6c
--- /dev/null
+++ b/build/config_unaccelerated_8kb/maxp_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=maxp
+hashing_algo=xxhash128
+output_file=./hashes_unaccelerated_8kb/maxp_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# MAXP Parameters
+maxp_window_size = 960
+maxp_max_block_size = 32768
\ No newline at end of file
diff --git a/build/config_unaccelerated_8kb/rabins_8kb.conf b/build/config_unaccelerated_8kb/rabins_8kb.conf
new file mode 100644
index 0000000..bc7bc6e
--- /dev/null
+++ b/build/config_unaccelerated_8kb/rabins_8kb.conf
@@ -0,0 +1,13 @@
+# General Parameters
+chunking_algo=rabins
+hashing_algo=xxhash128
+output_file=./hashes_unaccelerated_8kb/rabins_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# Rabin Chunking Parameters
+rabinc_min_block_size=2048
+rabinc_avg_block_size=8192
+rabinc_max_block_size=32768
+rabinc_window_size=48
diff --git a/build/config_unaccelerated_8kb/ram_8kb.conf b/build/config_unaccelerated_8kb/ram_8kb.conf
new file mode 100644
index 0000000..ffb7afa
--- /dev/null
+++ b/build/config_unaccelerated_8kb/ram_8kb.conf
@@ -0,0 +1,11 @@
+# General Parameters
+chunking_algo=ram
+hashing_algo=xxhash128
+output_file=./hashes_unaccelerated_8kb/ram_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# RAM Parameters
+ram_max_block_size=32768
+ram_avg_block_size=8448
\ No newline at end of file
diff --git a/build/config_unaccelerated_8kb/tttd_8kb.conf b/build/config_unaccelerated_8kb/tttd_8kb.conf
new file mode 100644
index 0000000..7fa9420
--- /dev/null
+++ b/build/config_unaccelerated_8kb/tttd_8kb.conf
@@ -0,0 +1,18 @@
+# General Parameters
+chunking_algo=tttd
+hashing_algo=xxhash128
+output_file=./hashes_unaccelerated_8kb/tttd_8kb.out
+simd_mode=none
+
+buffer_size=32768
+
+# TTTD Parameters
+tttd_min_block_size=2048
+tttd_avg_block_size=8192
+tttd_max_block_size=32768
+
+# Rabin Chunking Parameters
+rabinc_min_block_size=2048
+rabinc_avg_block_size=8192
+rabinc_max_block_size=32768
+rabinc_window_size=48
diff --git a/build/dedup_script.sh b/build/dedup_script.sh
index 98c0e17..0552747 100755
--- a/build/dedup_script.sh
+++ b/build/dedup_script.sh
@@ -11,9 +11,7 @@ function display_help() {
     echo "Usage: $0 [OPTIONS] <DIRECTORY>"
     echo "Options:"
     echo "  -h, --help          Show this help message"
-    echo "  -c, Compress all files in the given directory"
-    echo "  -s, Silent mode (no output to console)"
-    echo "  -t, current time or suffix for output naming"
+    echo "  -c, config directory name (without config_ included) Eg: -c simd_8kb"
     exit 1
 }
 
@@ -24,14 +22,6 @@ while (( "$#" )); do
         display_help
         ;;
     -c)
-      COMPRESS=true
-      shift
-      ;;
-    -s)
-      SILENT=true
-      shift
-      ;;
-    -t)
       now="$2"
       shift 2
       ;;
@@ -53,17 +43,12 @@ if [[ -z "$DIRECTORY" ]]; then
   exit 1
 fi
 
-if [[ $COMPRESS == true ]]; then
-  # Compress all files in the given directory
-  if [[ $SILENT == false ]]; then echo "Compressing files in directory $DIRECTORY"; fi
-  gzip $DIRECTORY/*
-fi
-
 # Execute dedup.exe with each config file
 if [[ $SILENT == false ]]; then echo "Running dedup.exe and ./measure-dedup.exe for each configuration file"; fi
-rm results.txt
-rm -rf ./hashes_${now}/
-mkdir ./hashes_${now}/
+rm -f results.txt
+rm -rf "./hashes_${now}/"
+mkdir "./hashes_${now}/"
+echo "Dataset path: $DIRECTORY" >> ./results.txt
 for config_file in $(ls config_${now}); do
   echo "==================" >> ./results.txt
   echo $config_file >> ./results.txt
diff --git a/build/plot_results.py b/build/plot_results.py
new file mode 100755
index 0000000..801b9fa
--- /dev/null
+++ b/build/plot_results.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+import seaborn as sns
+import matplotlib.pyplot as plt
+import os ,sys
+
+# Dictionary to hold throughput values 
+throughput_vals = {}
+space_savings_vals = {}
+dataset_name = ""
+
+rename_dict = { "aemin":"AE-Min",
+                "aemax":"AE-Max",
+                "crc32":"CRC32",
+                "fastcdc":"FastCDC",
+                "gear":"Gear",
+                "maxp":"MAXP",
+                "rabins":"Rabin",
+                "ram":"RAM",
+                "tttd":"TTTD",
+                "vaemax128":"VAEMax-128",
+                "vaemax256":"VAEMax-256",
+                "vaemax512":"VAEMax-512",                
+                "vaemin128":"VAEMin-128",
+                "vaemin256":"VAEMin-256",
+                "vaemin512":"VAEMin-512",
+                "vmaxp128":"VMAXP-128",
+                "vmaxp256":"VMAXP-256",
+                "vmaxp512":"VMAXP-512",
+                "vram128":"VRAM-128",
+                "vram256":"VRAM-256",
+                "vram512":"VRAM-512",
+}
+
+def build_custom_order(keys):
+    custom_order = []
+    if("CRC32" in keys):
+        custom_order.append("CRC32")
+    if("FastCDC" in keys):
+        custom_order.append("FastCDC")
+    if("Gear" in keys):
+        custom_order.append("Gear")
+    if("Rabin" in keys):
+        custom_order.append("Rabin")
+    if("TTTD" in keys):
+        custom_order.append("TTTD")
+    
+    if("AE-Max" in keys):
+        custom_order.append("AE-Max")
+    if("VAEMax-128" in keys):
+        custom_order.append("VAEMax-128")
+    if("VAEMax-256" in keys):
+        custom_order.append("VAEMax-256")
+    if("VAEMax-512" in keys):
+        custom_order.append("VAEMax-512")
+    
+    if("AE-Min" in keys):
+        custom_order.append("AE-Min")
+    if("VAEMin-128" in keys):
+        custom_order.append("VAEMin-128")
+    if("VAEMin-256" in keys):
+        custom_order.append("VAEMin-256")
+    if("VAEMin-512" in keys):
+        custom_order.append("VAEMin-512")
+    
+    if("MAXP" in keys):
+        custom_order.append("MAXP")
+    if("VMAXP-128" in keys):
+        custom_order.append("VMAXP-128")
+    if("VMAXP-256" in keys):
+        custom_order.append("VMAXP-256")
+    if("VMAXP-512" in keys):
+        custom_order.append("VMAXP-512")
+    
+    if("RAM" in keys):
+        custom_order.append("RAM")
+    if("VRAM-128" in keys):
+        custom_order.append("VRAM-128")
+    if("VRAM-256" in keys):
+        custom_order.append("VRAM-256")
+    if("VRAM-512" in keys):
+        custom_order.append("VRAM-512")
+    
+    return custom_order
+
+
+# Read results.txt generated by dedup_script.sh
+def read_src_throughput(results_file_path):
+    global throughput_vals, dataset_name
+    with open(results_file_path, 'r') as results_file:
+        curr_algo = None
+        curr_throughput = None
+        for line in results_file:
+            if("Dataset path:" in line):
+                dataset_name = line.split(":")[1].strip()
+                if(dataset_name[-1] == "/"):
+                    dataset_name = dataset_name[:-1]
+                dataset_name = os.path.basename(dataset_name)
+            if(".conf" in line):
+                curr_algo = line.strip().split(".")[0].strip().split("_")[0].strip()
+                if(curr_algo in rename_dict.keys()):
+                    curr_algo = rename_dict[curr_algo]
+            elif("Chunking Throughput" in line):
+                curr_throughput = float(line.strip().split(":")[1].strip()) / 1000 # Convert throughput to GB/s
+                if(curr_algo == None):
+                    raise Exception("Throughput value detected but no chunking technique? Check results file format for errors.")
+                throughput_vals[curr_algo] = curr_throughput
+                curr_throughput = None
+                curr_algo = None
+
+def read_src_spacesavings(results_file_path):
+    global space_savings_vals
+    with open(results_file_path, 'r') as results_file:
+        curr_algo = None
+        space_savings = None
+        for line in results_file:
+            if(".conf" in line):
+                curr_algo = line.strip().split(".")[0].strip().split("_")[0].strip()
+                if(curr_algo in rename_dict.keys()):
+                    curr_algo = rename_dict[curr_algo]
+            elif("Space savings" in line):
+                space_savings = float(line.strip().split(":")[1].strip().strip("%"))
+                if(curr_algo == None):
+                    raise Exception("Space savings value detected but no chunking technique? Check results file format for errors.")
+                space_savings_vals[curr_algo] = space_savings
+                space_savings = None
+                curr_algo = None
+
+# Plot bar graph
+def plot_results():
+    global throughput_vals
+    sns.set_style('darkgrid')
+    sns.set_context("poster")
+    plt.rcParams['figure.dpi'] = 300
+    plt.tick_params(axis='x', labelsize=26)
+    plt.tick_params(axis='y', labelsize=26)
+    
+    fig, ax = plt.subplots(2, 1, figsize=(32, 24))
+
+    custom_order = build_custom_order(space_savings_vals.keys())
+
+    sns.barplot(x = space_savings_vals.keys(), y = space_savings_vals.values(), ax=ax[0], gap=0.5, hue=space_savings_vals.keys(), legend=False, palette="inferno", edgecolor='black', order=custom_order, hue_order=custom_order)
+    ax[0].set_xlabel("CDC Algorithm", fontsize=30)
+    ax[0].set_ylabel("Space Savings (%)", fontsize=30)
+    ax[0].set_title("Space Savings", fontsize=45)
+    
+    custom_order = build_custom_order(throughput_vals.keys())
+
+    sns.barplot(x = throughput_vals.keys(), y = throughput_vals.values(), ax=ax[1], gap=0.5, hue=throughput_vals.keys(), legend=False, palette="inferno", edgecolor='black', order=custom_order, hue_order=custom_order)
+    ax[1].set_xlabel("CDC Algorithm", fontsize=30)
+    ax[1].set_ylabel("Throughput (GB/s)", fontsize=30)
+    ax[1].set_title("Chunking Throughput", fontsize=45)
+
+    plt.suptitle("Dataset statistics for " + dataset_name, fontsize=60)
+    plt.tight_layout()
+    
+    plt.savefig('results_graph.png')
+
+    
+
+    
+if(__name__ == "__main__"):
+    if(len(sys.argv) != 2):
+        print("Usage: python3 plot_results.py <path_to_results>")
+        print("\t <path_to_results>: Path to results.txt file generated by dedup_script.sh")
+        sys.exit(0)
+    
+    results_path = sys.argv[1]
+    read_src_throughput(results_path)
+    read_src_spacesavings(results_path)
+    plot_results()
+
+    print("Throughput graph plotted and saved as results_graph.png.")
+
diff --git a/build/plot_throughput_graph.py b/build/plot_throughput_graph.py
deleted file mode 100644
index 5028a17..0000000
--- a/build/plot_throughput_graph.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python3
-import seaborn as sns
-import matplotlib.pyplot as plt
-import os ,sys
-
-# Dictionary to hold throughput values 
-throughput_vals = {}
-
-# Read results.txt generated by dedup_script.sh
-def read_src(resultS_file_path):
-    global throughput_vals
-    with open(resultS_file_path, 'r') as results_file:
-        curr_algo = None
-        curr_throughput = None
-        for line in results_file:
-            if(".conf" in line):
-                curr_algo = line.strip().split(".")[0].strip().split("_")[0].strip()
-            elif("Chunking Throughput" in line):
-                curr_throughput = float(line.strip().split(":")[1].strip())
-                throughput_vals[curr_algo] = curr_throughput
-                if(curr_algo == None):
-                    raise Exception("Throughput value detected but no chunking technique? Check results file format for errors.")
-                curr_throughput = None
-                curr_algo = None
-
-# Plot bar graph
-def plot_results():
-    global throughput_vals
-    sns.barplot(x = throughput_vals.keys(), y = throughput_vals.values())
-    plt.tick_params(axis='x', labelsize=8)
-    plt.xlabel("CDC Algorithm")
-    plt.ylabel("Throughput (MB/s)")
-    plt.suptitle("Throughput of CDC Algorithms on given dataset")
-    plt.savefig('results_graph.png')
-
-    
-
-    
-if(__name__ == "__main__"):
-    if(len(sys.argv) != 2):
-        print("Usage: python3 plot_throughput_graph.py <path_to_results>")
-        print("\t <path_to_results>: Path to results.txt file generated by dedup_script.sh")
-        sys.exit(0)
-    
-    results_path = sys.argv[1]
-    read_src(results_path)
-    plot_results()
-
-    print("Throughput graph plotted and saved as results_graph.png.")
-
diff --git a/dedup/build/Makefile b/dedup/build/Makefile
index 329bb00..bd1bd99 100644
--- a/dedup/build/Makefile
+++ b/dedup/build/Makefile
@@ -20,7 +20,7 @@ INCLUDE_FLAGS += -I ${INCLUDE_PATH_HASHING}
 INCLUDE_FLAGS += -I ${INCLUDE_PATH_CONFIG}
 INCLUDE_FLAGS += -I ${INCLUDE_PATH_OPENSSL}
 
-LD_FLAGS = -L /usr/local/opt/openssl@3/lib -lcrypto
+LD_FLAGS = -L /usr/local/opt/openssl@3/lib -lcrypto -lxxhash
 
 SRC_PATH = ../src
 SRC_MAIN = $(wildcard $(SRC_PATH)/*.cpp)
@@ -41,14 +41,20 @@ SRC_CONFIG = $(wildcard $(SRC_PATH_CONFIG)/*.cpp)
 OBJS_CONFIG = $(SRC_CONFIG:$(SRC_PATH_CONFIG)/%.cpp=%.o)
 DEPS_CONFIG = $(wildcard $(INCLUDE_PATH_CONFIG)/*.hpp)
 
+# Only compiles unaccelerated code by default.
+# To enable acceleration, pass the appropriate flags as EXTRA_COMPILER_FLAGS.
+# 	For SSE-128, pass '-msse -msse2 -msse3 -msse4.1' as EXTRA_COMPILER_FLAGS
+# 	For AVX-256, pass '-mavx -mavx2' as EXTRA_COMPILER_FLAGS
+# 	For BMI2, pass '-mbmi -mbmi2' as EXTRA_COMPILER_FLAGS (used only in VSEQ)
+# 	For AVX-512, pass '-mavx512f -mavx512vl -mavx512bw' as EXTRA_COMPILER_FLAGS
 
-COMPILER_FLAGS= -std=c++17 -Wall -Wextra -Wno-format -msse -msse2 -msse3 -msse4.1 -mavx -mavx2 -O3 ${EXTRA_COMPILER_FLAGS}
+# 	For everything except AVX-512, pass '-msse -msse2 -msse3 -msse4.1 -mavx -mavx2 -mbmi -mbmi2' as EXTRA_COMPILER_FLAGS
+
+COMPILER_FLAGS= -std=c++17 -Wall -Wextra -Wno-format -O3 -Wno-implicit-fallthrough ${EXTRA_COMPILER_FLAGS}
 
 CC = g++
 RM = rm -f
 
-# For AVX-512, pass '-mavx512f -mavx512vl -mavx512bw' as EXTRA_COMPILER_FLAGS
-
 DEBUG = 1
 
 ifeq ($(DEBUG), 1)
diff --git a/dedup/include/chunking/ae_chunking.hpp b/dedup/include/chunking/ae_chunking.hpp
index bc772ba..1947736 100644
--- a/dedup/include/chunking/ae_chunking.hpp
+++ b/dedup/include/chunking/ae_chunking.hpp
@@ -4,20 +4,43 @@
 #include <math.h>
 #include <iostream>
 
-#include "chunking_common.hpp"
+#include "avx_chunking_common.hpp"
 #include "config.hpp"
 
 #include <cstring>
 
 #define DEFAULT_AE_AVG_BLOCK_SIZE 4096
 
-class AE_Chunking : public virtual Chunking_Technique {
+class AE_Chunking : public virtual AVX_Chunking_Technique {
    private:
     uint64_t avg_block_size;
     uint64_t window_size;
     uint64_t curr_pos;
     AE_Mode extreme_mode;
 
+    /**
+     * @brief SSE128, AVX256 and AVX512 arrays for SIMD operations
+     * 
+     */
+    #if defined(__SSE3__)
+    __m128i* sse_array;
+    #endif
+
+    #if defined(__AVX2__)
+    __m256i* avx256_array;
+    #endif
+
+    #if defined(__AVX512F__)
+    __m512i* avx512_array;
+    #endif
+
+    #if defined(__ARM_NEON)
+    uint8x16_t* neon_array;
+    #endif
+
+    #if defined(__ALTIVEC__)
+    __vector unsigned char* altivec_array;
+    #endif
 
     /**
      * @brief finds the next cut point in an array of bytes
@@ -26,8 +49,30 @@ class AE_Chunking : public virtual Chunking_Technique {
      * @return: cutpoint position in the buffer 
      */
     uint64_t find_cutpoint(char* buff, uint64_t size) override;
+    uint64_t find_cutpoint_native(char* buff, uint64_t size);
+   
+    #if defined(__SSE3__)
+    uint64_t find_cutpoint_sse128(char* buff, uint64_t size);
+    #endif
+    
+    #if defined(__AVX2__)
+    uint64_t find_cutpoint_avx256(char* buff, uint64_t size);
+    #endif
+    
+    #if defined(__AVX512F__)
+    uint64_t find_cutpoint_avx512(char* buff, uint64_t size);
+    #endif
+
+    #if defined(__ARM_NEON)
+    uint64_t find_cutpoint_neon(char* buff, uint64_t size);
+    #endif
+
+    #if defined(__ALTIVEC__)
+    uint64_t find_cutpoint_altivec(char* buff, uint64_t size);
+    #endif
+
 
-   public:
+    public:
     /**
      * @brief Default constructor.
      * @return: void
diff --git a/dedup/include/chunking/avx_chunking_common.hpp b/dedup/include/chunking/avx_chunking_common.hpp
index d584193..7d0752b 100644
--- a/dedup/include/chunking/avx_chunking_common.hpp
+++ b/dedup/include/chunking/avx_chunking_common.hpp
@@ -2,7 +2,16 @@
 #define _AVX_COMMON_CHUNKING_
 
 #include <chunking_common.hpp>
-#include <x86intrin.h>
+
+#if defined(__SSE3__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__BMI2__)
+    #include <x86intrin.h>
+#elif defined(__ARM_NEON)
+    #include <arm_neon.h>
+#elif defined(__ALTIVEC__)
+    #include <altivec.h>
+    #undef vector // Avoid conflict with std::vector
+    #undef bool // Avoid conflict with std::bool
+#endif
 
 #include <iostream>
 #include <cstdint>
@@ -19,27 +28,211 @@
 #define AVX512_REGISTER_SIZE_BYTES 64
 #define AVX512_REGISTER_SIZE_INT32 16
 
+#define NEON_REGISTER_SIZE_BITS 128
+#define NEON_REGISTER_SIZE_BYTES 16
+
+#define ALTIVEC_REGISTER_SIZE_BITS 128
+#define ALTIVEC_REGISTER_SIZE_BYTES 16
+
 class AVX_Chunking_Technique: public virtual Chunking_Technique{
     protected:
 
     SIMD_Mode simd_mode;
-    const __m128i K_INV_ZERO = _mm_set1_epi8(0xFF);//_mm_set1_epi8(-1);
+
+    #ifdef __SSE3__
+        const __m128i K_INV_ZERO = _mm_set1_epi8(0xFF);
+    #endif
+    
+    #ifdef __AVX2__
+        const __m256i K_INV_ZERO_256 = _mm256_set1_epi8(0xFF);
+    #endif
+
+    // For debugging only
+    uint64_t chunk_counter;
 
     public:
     /**
-     * @brief Finds maximum value in the region between <start_pos, end_pos> within the data stream *buff using 128-bit SSE instructions
+     * @brief Finds maximum value in the region between <start_pos, end_pos> within the data stream *buff using SIMD instructions
      * 
      * @param buff Data Stream
      * @param start_pos Starting position of scanned region
      * @param end_pos Ending position of scanned region
      * @return uint8_t Maximum Value
      */
-    uint8_t find_maximum_sse128(char *buff, uint64_t start_pos, uint64_t end_pos, __m128i *xmm_array);
-    uint8_t find_maximum_avx256(char *buff, uint64_t start_pos, uint64_t end_pos, __m256i *xmm_array);
+    #ifdef __SSE3__
+        uint8_t find_maximum_sse128(char *buff, uint64_t start_pos, uint64_t end_pos, __m128i *xmm_array);
+    #endif
+    
+    #ifdef __AVX2__
+        uint8_t find_maximum_avx256(char *buff, uint64_t start_pos, uint64_t end_pos, __m256i *ymm_array);
+    #endif
+
+    #if defined(__AVX512F__)
+        uint8_t find_maximum_avx512(char *buff, uint64_t start_pos, uint64_t end_pos, __m512i *zmm_array);
+    #endif
+
+    #if defined(__ARM_NEON)
+        uint8_t find_maximum_neon(char *buff, uint64_t start_pos, uint64_t end_pos, uint8x16_t *neon_array);
+    #endif
+
+    #ifdef __ALTIVEC__
+        uint8_t find_maximum_altivec(char *buff, uint64_t start_pos, uint64_t end_pos, __vector unsigned char *vec_array);
+    #endif
+
+    /**
+     * @brief Finds minimum value in the region between <start_pos, end_pos> within the data stream *buff using SIMD instructions
+     * 
+     * @param buff Data Stream
+     * @param start_pos Starting position of scanned region
+     * @param end_pos Ending position of scanned region
+     * @return uint8_t Minimum Value
+     */
+
+    #ifdef __SSE3__
+        uint8_t find_minimum_sse128(char *buff, uint64_t start_pos, uint64_t end_pos, __m128i *xmm_array);
+    #endif
+    
+    #ifdef __AVX2__
+        uint8_t find_minimum_avx256(char *buff, uint64_t start_pos, uint64_t end_pos, __m256i *ymm_array);
+    #endif  
+
+    #if defined(__AVX512F__)
+        uint8_t find_minimum_avx512(char *buff, uint64_t start_pos, uint64_t end_pos, __m512i *zmm_array);
+    #endif
+
+    #if defined(__ARM_NEON)
+        uint8_t find_minimum_neon(char *buff, uint64_t start_pos, uint64_t end_pos, uint8x16_t *neon_array);
+    #endif
+
+    #ifdef __ALTIVEC__
+        uint8_t find_minimum_altivec(char *buff, uint64_t start_pos, uint64_t end_pos, __vector unsigned char *vec_array);
+    #endif
+
+    /**
+     * @brief Executes a range scan comparing bytes serially against the target value between <start_pos, end_pos> within the data stream *buff using SIMD instructions.
+     * Comparison operator: GEQ (>=)
+     * 
+     * @param buff Data Stream
+     * @param start_pos Starting position of scanned region
+     * @param end_pos Ending position of scanned region
+     * @param target_value Target value
+     * @return uint64_t Position of first matching candidate. 
+     * @return end_position if no match found
+     */
+    #ifdef __SSE3__
+        uint64_t range_scan_geq_sse128(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #ifdef __AVX2__
+        uint64_t range_scan_geq_avx256(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #if defined(__AVX512F__)
+        uint64_t range_scan_geq_avx512(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #if defined(__ARM_NEON)
+        uint64_t range_scan_geq_neon(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #if defined(__ALTIVEC__)
+        uint64_t range_scan_geq_altivec(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+    
+    /**
+     * @brief Executes a range scan comparing bytes serially against the target value between <start_pos, end_pos> within the data stream *buff using SIMD instructions.
+     * Comparison operator: GT (>)
+     * 
+     * @param buff Data Stream
+     * @param start_pos Starting position of scanned region
+     * @param end_pos Ending position of scanned region
+     * @param target_value Target value
+     * @return uint64_t Position of first matching candidate. 
+     * @return end_position if no match found
+     */
+    #ifdef __SSE3__
+        uint64_t range_scan_gt_sse128(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #ifdef __AVX2__
+        uint64_t range_scan_gt_avx256(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #if defined(__AVX512F__)
+        uint64_t range_scan_gt_avx512(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #if defined(__ARM_NEON)
+        uint64_t range_scan_gt_neon(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #if defined(__ALTIVEC__)
+        uint64_t range_scan_gt_altivec(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+     /**
+     * @brief Executes a range scan comparing bytes serially against the target value between <start_pos, end_pos> within the data stream *buff using SIMD instructions.
+     * Comparison operator: LEQ (<=)
+     * 
+     * @param buff Data Stream
+     * @param start_pos Starting position of scanned region
+     * @param end_pos Ending position of scanned region
+     * @param target_value Target value
+     * @return uint64_t Position of first matching candidate. 
+     * @return end_position if no match found
+     */
+    #ifdef __SSE3__
+        uint64_t range_scan_leq_sse128(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #ifdef __AVX2__
+        uint64_t range_scan_leq_avx256(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #if defined(__AVX512F__)
+        uint64_t range_scan_leq_avx512(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #if defined(__ARM_NEON)
+        uint64_t range_scan_leq_neon(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #ifdef __ALTIVEC__
+        uint64_t range_scan_leq_altivec(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    /**
+     * @brief Executes a range scan comparing bytes serially against the target value between <start_pos, end_pos> within the data stream *buff using SIMD instructions.
+     * Comparison operator: LT (<)
+     * 
+     * @param buff Data Stream
+     * @param start_pos Starting position of scanned region
+     * @param end_pos Ending position of scanned region
+     * @param target_value Target value
+     * @return uint64_t Position of first matching candidate. 
+     * @return end_position if no match found
+     */
+    #ifdef __SSE3__
+        uint64_t range_scan_lt_sse128(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #ifdef __AVX2__
+        uint64_t range_scan_lt_avx256(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
     #if defined(__AVX512F__)
-        uint8_t find_maximum_avx512(char *buff, uint64_t start_pos, uint64_t end_pos, __m512i *ymm_array);
+        uint64_t range_scan_lt_avx512(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+    #if defined(__ARM_NEON)
+        uint64_t range_scan_lt_neon(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
     #endif
 
+    #ifdef __ALTIVEC__
+        uint64_t range_scan_lt_altivec(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value);
+    #endif
+
+
     /**
      * @brief Helper functions to compare 2 128-bit XMM registers
      * 
@@ -47,28 +240,42 @@ class AVX_Chunking_Technique: public virtual Chunking_Technique{
      * @param b Second XMM register containing 8-bit packed values
      * @return __m128i Result of bytewise comparison
      */
-    inline __m128i Greater8uSSE(__m128i a, __m128i b){
-       return _mm_andnot_si128(_mm_cmpeq_epi8(_mm_min_epu8(a, b), a), K_INV_ZERO);
-    }
+    #ifdef __SSE3__
+        inline __m128i Greater8uSSE(__m128i a, __m128i b){
+        return _mm_andnot_si128(_mm_cmpeq_epi8(_mm_min_epu8(a, b), a), K_INV_ZERO);
+        }
 
-    inline __m128i GreaterOrEqual8uSSE(__m128i a, __m128i b){
-        return _mm_cmpeq_epi8(_mm_max_epu8(a, b), a);
-    }
+        inline __m128i GreaterOrEqual8uSSE(__m128i a, __m128i b){
+            return _mm_cmpeq_epi8(_mm_max_epu8(a, b), a);
+        }
 
-    inline __m128i Lesser8uSSE(__m128i a, __m128i b){
-            return _mm_andnot_si128(_mm_cmpeq_epi8(_mm_max_epu8(a, b), a), K_INV_ZERO);
-    }
+        inline __m128i Lesser8uSSE(__m128i a, __m128i b){
+                return _mm_andnot_si128(_mm_cmpeq_epi8(_mm_max_epu8(a, b), a), K_INV_ZERO);
+        }
 
-    inline __m128i LesserOrEqual8uSSE(__m128i a, __m128i b){
-            return _mm_cmpeq_epi8(_mm_min_epu8(a, b), a);
-    }
-    inline __m128i NotEqual8uSSE(__m128i a, __m128i b){
-            return _mm_andnot_si128(_mm_cmpeq_epi8(a, b), K_INV_ZERO);
-    }
+        inline __m128i LesserOrEqual8uSSE(__m128i a, __m128i b){
+                return _mm_cmpeq_epi8(_mm_min_epu8(a, b), a);
+        }
+        inline __m128i NotEqual8uSSE(__m128i a, __m128i b){
+                return _mm_andnot_si128(_mm_cmpeq_epi8(a, b), K_INV_ZERO);
+        }
+    #endif
 
-    inline __m256i GreaterOrEqual8uAVX256(__m256i a, __m256i b){
-        return _mm256_cmpeq_epi8(_mm256_max_epu8(a, b), a);
-    }
+    #ifdef __AVX2__
+        inline __m256i Greater8uAVX256(__m256i a, __m256i b){
+        return _mm256_andnot_si256(_mm256_cmpeq_epi8(_mm256_min_epu8(a, b), a), K_INV_ZERO_256);
+        }
+        inline __m256i Lesser8uAVX256(__m256i a, __m256i b){
+                return _mm256_andnot_si256(_mm256_cmpeq_epi8(_mm256_max_epu8(a, b), a), K_INV_ZERO_256);
+        }
+
+        inline __m256i GreaterOrEqual8uAVX256(__m256i a, __m256i b){
+            return _mm256_cmpeq_epi8(_mm256_max_epu8(a, b), a);
+        }
+        inline __m256i LesserOrEqual8uAVX256(__m256i a, __m256i b){
+            return _mm256_cmpeq_epi8(_mm256_min_epu8(a, b), a);
+        }
+    #endif
 
 
     
@@ -108,20 +315,6 @@ class AVX_Chunking_Technique: public virtual Chunking_Technique{
     }
     #endif
 
-
-    /**
-     * @brief Check if an integer has any byte with zero in it.
-     * Found this off the Stanford bitchacks page: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
-     * @param v Integer (4 bytes)
-     * @return true If any byte is zero
-     * @return false If all bytes are > 0
-     */
-    
-    inline bool hasZeroByte(unsigned int v)
-    {
-        return ~((((v & 0x7F7F7F7F) + 0x7F7F7F7F) | v) | 0x7F7F7F7F);
-    }
-
 };
 
 #endif
\ No newline at end of file
diff --git a/dedup/include/chunking/chunking_common.hpp b/dedup/include/chunking/chunking_common.hpp
index 5e0b167..ef3b7bc 100644
--- a/dedup/include/chunking/chunking_common.hpp
+++ b/dedup/include/chunking/chunking_common.hpp
@@ -17,6 +17,9 @@
 #include <memory>
 #include <sstream>
 #include <istream>
+#include <cstdint>
+#include <iostream>
+
 #include "hash.hpp"
 #include "config.hpp"
 #include "file_chunk.hpp"
@@ -47,13 +50,18 @@ class Chunking_Technique{
         std::chrono::duration<double, std::milli> total_time_hashing =
         std::chrono::duration<double, std::milli>::zero();
         /**
-         * @brief Chunk a file using a chunking technique and return the struct File_Chunks from this operation
+         * @brief Chunk a buffer using a chunking technique and return a single chunk boundary from this operation
          * 
-         * @param file_path: String containing path to file
-         * @return: Vector of struct File_Chunk
+         * @param buffer: Data stream of bytes
+         * @param buffer_size: Size of buffer
+         * @return: uint64_t indicating boundary position
          */
-        virtual uint64_t find_cutpoint(char*, uint64_t buffer_size){
-            return buffer_size;
+        virtual uint64_t find_cutpoint(char* buffer, uint64_t buffer_size){
+            if(buffer != nullptr)
+                return buffer_size;
+            else
+                std::cout << "Null buffer received" << std::endl; 
+            return 0;
         }
 
         /**
diff --git a/dedup/include/chunking/maxp_chunking.hpp b/dedup/include/chunking/maxp_chunking.hpp
new file mode 100644
index 0000000..c203421
--- /dev/null
+++ b/dedup/include/chunking/maxp_chunking.hpp
@@ -0,0 +1,88 @@
+#ifndef _MAXP_CHUNKING_
+#define _MAXP_CHUNKING_
+
+#include <math.h>
+#include <iostream>
+
+#include "avx_chunking_common.hpp"
+#include "chunking_common.hpp"
+#include "config.hpp"
+
+#include <cstring>
+
+#define DEFAULT_MAXP_WINDOW_SIZE 128
+#define DEFAULT_MAXP_MAX_BLOCK_SIZE 65536
+
+class MAXP_Chunking : public virtual AVX_Chunking_Technique {
+   private:
+    uint64_t max_block_size;
+    uint64_t window_size;
+
+    #ifdef __SSE3__
+    __m128i *xmm_array;
+    #endif
+
+    #ifdef __AVX2__
+    __m256i *ymm_array;
+    #endif
+    
+    #if defined(__AVX512F__)
+    __m512i *zmm_array;
+    #endif
+
+    #if defined(__ARM_NEON)
+    uint8x16_t *neon_array;
+    #endif
+
+    #ifdef __ALTIVEC__
+    __vector unsigned char *altivec_array;
+    #endif
+
+    /**
+     * @brief finds the next cut point in an array of bytes
+     * @param buff: the buff to find the cutpoint in.
+     * @param size: the size of the buffer
+     * @return: cutpoint position in the buffer 
+     */
+    uint64_t find_cutpoint(char* buff, uint64_t size) override;
+    uint64_t find_cutpoint_native(char *buff, uint64_t size);
+    
+    #ifdef __SSE3__
+    uint64_t find_cutpoint_sse128(char *buff, uint64_t size);
+    #endif
+
+    #ifdef __AVX2__
+    uint64_t find_cutpoint_avx256(char *buff, uint64_t size);
+    #endif
+    
+    #if defined(__AVX512F__)
+    uint64_t find_cutpoint_avx512(char *buff, uint64_t size);
+    #endif
+
+    #if defined(__ARM_NEON)
+    uint64_t find_cutpoint_neon(char *buff, uint64_t size);
+    #endif
+
+    #ifdef __ALTIVEC__
+    uint64_t find_cutpoint_altivec(char *buff, uint64_t size);
+    #endif
+
+   public:
+    /**
+     * @brief Default constructor.
+     * @return: void
+     */
+    MAXP_Chunking();
+
+    /**
+     * @brief Constructor with custom config from a config object
+     * @param config: the config object
+     * @return: void
+     */
+    MAXP_Chunking(const Config& config);
+
+    ~MAXP_Chunking();
+
+};
+
+#endif
\ No newline at end of file
diff --git a/dedup/include/chunking/ram_chunking.hpp b/dedup/include/chunking/ram_chunking.hpp
index 912c6fd..ecf89e7 100644
--- a/dedup/include/chunking/ram_chunking.hpp
+++ b/dedup/include/chunking/ram_chunking.hpp
@@ -18,9 +18,25 @@ class RAM_Chunking : public virtual AVX_Chunking_Technique {
     uint64_t window_size;
     uint64_t curr_pos;
 
-    __m128i *sse_array;
-    __m256i *avx256_array;
-    __m512i *avx512_array;
+    #ifdef __SSE3__
+      __m128i *sse_array;
+    #endif
+
+    #ifdef __AVX2__
+      __m256i *avx256_array;
+    #endif
+
+    #if defined(__AVX512F__)
+      __m512i *avx512_array;
+    #endif
+
+    #ifdef __ARM_NEON
+      uint8x16_t *neon_array;
+    #endif
+
+    #ifdef __ALTIVEC__
+      __vector unsigned char *altivec_array;
+    #endif
 
     /**
      * @brief finds the next cut point in an array of bytes
@@ -38,12 +54,6 @@ class RAM_Chunking : public virtual AVX_Chunking_Technique {
      * @param max_value Max value within fixed size window
      * @return uint64_t Return position in stream
      */
-    uint64_t get_return_position_sse128(char *buff, uint64_t start_position, uint64_t end_position, uint8_t max_value);
-    uint64_t get_return_position_avx256(char *buff, uint64_t start_position, uint64_t end_position, uint8_t max_value);
-    #if defined(__AVX512F__)
-        uint64_t get_return_position_avx512(char *buff, uint64_t start_position, uint64_t end_position, uint8_t max_value);
-    #endif
-
     
    public:
     /**
diff --git a/dedup/include/config/config.hpp b/dedup/include/config/config.hpp
index 3b97c06..c2fc5c2 100644
--- a/dedup/include/config/config.hpp
+++ b/dedup/include/config/config.hpp
@@ -3,8 +3,11 @@
 #define _CONFIG_
 #include "parser.hpp"
 
+#include <cstdint>
+
 #define CHUNKING_TECH "chunking_algo"
 #define HASHING_TECH "hashing_algo"
+#define CHUNKING_MODE "chunking_mode"
 #define SIMD_MODE_STRING "simd_mode"
 
 #define FC_SIZE "fc_size"
@@ -32,6 +35,8 @@
 #define CRC_WINDOW_STEP_SIZE "crc_window_step_size"
 #define CRC_HASH_BITS "crc_hash_bits"
 #define BUFFER_SIZE "buffer_size"
+#define MAXP_WINDOW_SIZE "maxp_window_size"
+#define MAXP_MAX_BLOCK_SIZE "maxp_max_block_size"
 #define SEQ_JUMP_TRIGGER "seq_jump_trigger"
 #define SEQ_CHUNK_BOUNDARY_THRESHOLD "seq_sequence_threshold"
 #define SEQ_JUMP_SIZE "seq_jump_size"
@@ -57,6 +62,7 @@ enum class ChunkingTech {
     AE,
     GEAR,
     FASTCDC,
+    MAXP,
     RAM,
     EXPERIMENT,
     CRC,
@@ -69,13 +75,18 @@ enum class SIMD_Mode{
     SSE128_NOSLIDE,
     SSE128,
     AVX256,
-    AVX512
+    AVX512,
+    NEON,
+    ALTIVEC
 };
 
 // define the possible hashing algorithms
-enum class HashingTech { MD5, SHA1, SHA256, SHA512 };
+enum class HashingTech { MD5, SHA1, SHA256, SHA512, XXHASH128, MURMURHASH3 };
+
 // define the the extreme value type of AE algorithm
 enum AE_Mode { MAX, MIN };
+
+// define SeqCDC operating modes
 enum Seq_Op_Mode { INCREASING, DECREASING };
 
 
@@ -93,27 +104,28 @@ class Config {
      */
     ChunkingTech get_chunking_tech() const;
 
-    /**
-     * @brief Get the hashing algorithm specified in the config file.
+     /**
+     * @brief Get the chunking mode specified in the config file.
      * throws ConfigError if the key does not exist or if the value is invalid
      *
-     * @return HashingTech
+     * @return ChunkingMode
      */
     HashingTech get_hashing_tech() const;
 
+    
     /**
      * @brief Get the SIMD mode for chunking technique
      * 
      * @return SIMD_Mode
      */
     SIMD_Mode get_simd_mode() const;
-
+    
     /**
      * @brief Get the size (in number of bytes) of a chunk when using fixed-size
      * chunking throws ConfigError if the key does not exist or if the value is
      * invalid
      *
-     * @return HashingTech
+     * @return uint64_t
      */
 
     uint64_t get_fc_size() const;
@@ -408,6 +420,15 @@ class Config {
      *
      * @return uint64_t
      */
+
+    uint64_t get_maxp_window_size() const;
+    uint64_t get_maxp_max_block_size() const;
+     
+     /**
+      * @brief: Get MAXP window size
+      * 
+      * @return uint64_t
+      */
 };
 
 #endif
diff --git a/dedup/include/hashing/murmurhash3_hashing.hpp b/dedup/include/hashing/murmurhash3_hashing.hpp
new file mode 100644
index 0000000..b560bbb
--- /dev/null
+++ b/dedup/include/hashing/murmurhash3_hashing.hpp
@@ -0,0 +1,57 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
+
+// Additions for dedup-bench
+//-----------------------------------------------------------------------------
+
+#include "hashing_common.hpp"
+
+#define MURMURHASH3_DIGEST_LENGTH 16
+
+class MurmurHash3_Hashing : public Hashing_Technique {
+    /**
+     * @brief Class to implement MurmurHash3 Hashing
+     * This class implements the MurmurHash3 hashing technique.
+     */
+    public:
+        // Function to hash a given chunk
+        void hash_chunk(File_Chunk& file_chunk) override;
+        MurmurHash3_Hashing() {
+            technique_name = "MurmurHash3-Hashing";
+        }
+};
diff --git a/dedup/include/hashing/xxhash_hashing.hpp b/dedup/include/hashing/xxhash_hashing.hpp
new file mode 100644
index 0000000..9eed996
--- /dev/null
+++ b/dedup/include/hashing/xxhash_hashing.hpp
@@ -0,0 +1,37 @@
+/**
+ * @author: WASL
+ * @date: 2025-07-05
+ * @brief: Header file for xxHash Hashing Technique
+ * This file defines the XXHash_Hashing class which implements the Hashing_Technique interface.
+ * It provides the functionality to hash file chunks using the xxHash algorithm.
+ * Note: I could only find xxHash128 on GitHub (Cyan4973/xxHash).)
+ */
+
+#ifndef _XXHASH_HASHING_
+#define _XXHASH_HASHING_
+
+#define XXHASH_INLINE_ALL /* enable inlining for all functions */
+
+#include "hashing_common.hpp"
+#include "xxhash.h"
+
+// Define the digest length for xxHash128
+// This is the size of the hash output in bytes
+#define XXH128_DIGEST_LENGTH 16
+
+class XXHash_Hashing: public virtual Hashing_Technique{
+    /**
+     * @brief Class to implement xxHash Hashing
+     *
+     */
+    public:
+        // Function to hash a given chunk
+        void hash_chunk(File_Chunk& file_chunk) override;
+
+        XXHash_Hashing() {
+            technique_name = "xxHash-Hashing";
+        }
+};
+
+#endif
+
diff --git a/dedup/src/chunking/ae_chunking.cpp b/dedup/src/chunking/ae_chunking.cpp
index 0536eac..06a843e 100644
--- a/dedup/src/chunking/ae_chunking.cpp
+++ b/dedup/src/chunking/ae_chunking.cpp
@@ -16,22 +16,506 @@ AE_Chunking::AE_Chunking() {
     avg_block_size = DEFAULT_AE_AVG_BLOCK_SIZE;
     extreme_mode = MAX;
     technique_name = "AE Chunking";
+
+	chunk_counter = 0;
+
+	#if defined(__SSE3__)
+	sse_array = nullptr;
+	#endif
+	
+	#if defined(__AVX2__)
+	avx256_array = nullptr;
+	#endif
+	
+	#ifdef __AVX512F__
+	avx512_array = nullptr;
+	#endif
+
+	#ifdef __ARM_NEON
+	neon_array = nullptr;
+	#endif
+
+	#ifdef __ALTIVEC__
+	altivec_array = nullptr;
+	#endif
+
+
+	simd_mode = SIMD_Mode::NONE;
 }
 
 AE_Chunking::AE_Chunking(const Config& config) {
     extreme_mode = config.get_ae_extreme_mode();
     avg_block_size = config.get_ae_avg_block_size();
     window_size = avg_block_size - 256;
+	// window_size = avg_block_size / (exp(1) - 1);  // avg_block size / e-1
+
 
-    // window_size = avg_block_size / (exp(1) - 1);  // avg_block size / e-1
+	// Initialize SIMD arrays based on the chosen SIMD mode
+
+	#if defined(__SSE3__)
+	sse_array = nullptr;
+	#endif
+	
+	#if defined(__AVX2__)
+	avx256_array = nullptr;
+	#endif
+	
+	#ifdef __AVX512F__
+	avx512_array = nullptr;
+	#endif
+
+	#ifdef __ARM_NEON
+	neon_array = nullptr;
+	#endif
+
+	#ifdef __ALTIVEC__
+	altivec_array = nullptr;
+	#endif
+
+	simd_mode = config.get_simd_mode();
+	if (simd_mode == SIMD_Mode::NONE) {
+
+	} 
+	#if defined(__SSE3__)
+	else if (simd_mode == SIMD_Mode::SSE128) {
+		if(window_size % SSE_REGISTER_SIZE_BYTES != 0 || (window_size / SSE_REGISTER_SIZE_BYTES) % 2 != 0) {
+            // Check if window size is a multiple of 16
+            // Check if window size makes an even number of vectors for find_maximum_sse128
+            std::cout << "AE window size currently unsupported by SSE128. Please use an even multiple of SSE128_REGISTER_SIZE_BYTES (default 16)." << std::endl;
+            exit(1);
+        }
+		sse_array = new __m128i[avg_block_size / SSE_REGISTER_SIZE_BYTES];
+		if (sse_array == nullptr) {
+			std::cout << "Error allocating memory for 128-bit vectors(__m128i)" << std::endl;
+			exit(1);
+		}
+	} 
+	#endif
+
+	#if defined(__AVX2__)
+	else if (simd_mode == SIMD_Mode::AVX256) {
+		if(window_size % AVX256_REGISTER_SIZE_BYTES != 0 || (window_size / AVX256_REGISTER_SIZE_BYTES % 2 != 0)) {
+			// Check if window size is a multiple of 32
+			// Check if window size makes an even number of vectors for find_maximum_AVX256
+			std::cout << "AE window size currently unsupported by AVX256. Please use an even multiple of AVX_REGISTER_SIZE_BYTES (default 32)." << std::endl;
+			exit(1);
+		}
+		avx256_array = new __m256i[avg_block_size / AVX256_REGISTER_SIZE_BYTES];
+		if (avx256_array == nullptr) {
+			std::cout << "Error allocating memory for 256-bit vectors(__m256i)" << std::endl;
+			exit(1);
+		}
+	}
+	#endif
 
+	#ifdef __AVX512F__
+	else if (simd_mode == SIMD_Mode::AVX512) {
+		if(window_size % AVX512_REGISTER_SIZE_BYTES != 0 || (window_size / AVX512_REGISTER_SIZE_BYTES) % 2 != 0) {
+			// Check if window size is a multiple of 64
+			// Check if window size makes an even number of vectors for find_maximum_AVX512
+			std::cout << "AE window size currently unsupported by AVX512. Please use an even multiple of AVX_REGISTER_SIZE_BYTES (default 64)." << std::endl;
+			exit(1);
+		}
+		avx512_array = new __m512i[avg_block_size / AVX512_REGISTER_SIZE_BYTES];
+		if (avx512_array == nullptr) {
+			std::cout << "Error allocating memory for 512-bit vectors(__m512i)" << std::endl;
+			exit(1);
+		}
+	}
+	#endif
+
+	#ifdef __ARM_NEON
+	else if (simd_mode == SIMD_Mode::NEON) {
+		if(window_size % NEON_REGISTER_SIZE_BYTES != 0 || (window_size / NEON_REGISTER_SIZE_BYTES) % 2 != 0) {
+			// Check if window size is a multiple of 16
+			// Check if window size makes an even number of vectors for find_maximum_neon
+			std::cout << "AE window size currently unsupported by NEON. Please use an even multiple of NEON_REGISTER_SIZE_BYTES (default 16)." << std::endl;
+			exit(1);
+		}
+		neon_array = new uint8x16_t[avg_block_size / NEON_REGISTER_SIZE_BYTES];
+		if (neon_array == nullptr) {
+			std::cout << "Error allocating memory for NEON vectors(uint8x16_t)" << std::endl;
+			exit(1);
+		}
+	}
+	#endif
+
+	#ifdef __ALTIVEC__
+	else if (simd_mode == SIMD_Mode::ALTIVEC) {
+		if(window_size % ALTIVEC_REGISTER_SIZE_BYTES != 0 || (window_size / ALTIVEC_REGISTER_SIZE_BYTES) % 2 != 0) {
+			// Check if window size is a multiple of 16
+			// Check if window size makes an even number of vectors for find_maximum_altivec
+			std::cout << "AE window size currently unsupported by ALTIVEC. Please use an even multiple of ALTIVEC_REGISTER_SIZE_BYTES (default 16)." << std::endl;
+			exit(1);
+		}
+		altivec_array = new __vector unsigned char[avg_block_size / ALTIVEC_REGISTER_SIZE_BYTES];
+		if (altivec_array == nullptr) {
+			std::cout << "Error allocating memory for ALTIVEC vectors(__vector unsigned char)" << std::endl;
+			exit(1);
+		}
+	}
+	#endif
+
+	else {
+		std::cerr << "Error: Unsupported SIMD mode" << std::endl;
+		exit(1);
+	}
+    
     technique_name = "AE Chunking";
 }
 
 AE_Chunking::~AE_Chunking() {
+	#ifdef __SSE3__
+	if (sse_array != nullptr) {
+		delete[] sse_array;
+	}
+	#endif
+
+	#ifdef __AVX2__
+	if (avx256_array != nullptr) {
+		delete[] avx256_array;
+	}
+	#endif
+
+	#ifdef __AVX512F__
+	if (avx512_array != nullptr) {
+		delete[] avx512_array;
+	}
+	#endif
+
+	#ifdef __ARM_NEON
+	if (neon_array != nullptr) {
+		delete[] neon_array;
+	}
+	#endif
+
+	#ifdef __ALTIVEC__
+	if (altivec_array != nullptr) {
+		delete[] altivec_array;	
+	}
+	#endif
+
+}
+
+uint64_t AE_Chunking::find_cutpoint(char *buff, uint64_t size){
+
+	// chunk_counter++;
+
+	if(simd_mode == SIMD_Mode::NONE){
+		return find_cutpoint_native(buff, size);
+	}
+	
+	#ifdef __SSE3__
+	else if(simd_mode == SIMD_Mode::SSE128){
+		return find_cutpoint_sse128(buff, size);
+	}
+	#endif
+	
+	#ifdef __AVX2__
+	else if(simd_mode == SIMD_Mode::AVX256){
+		return find_cutpoint_avx256(buff, size);
+	}
+	#endif
+
+	#ifdef __AVX512F__
+	else if(simd_mode == SIMD_Mode::AVX512){
+		return find_cutpoint_avx512(buff, size);
+	}
+	#endif
+
+	#ifdef __ARM_NEON
+	else if(simd_mode == SIMD_Mode::NEON){
+		return find_cutpoint_neon(buff, size);
+	}
+	#endif
+
+	#ifdef __ALTIVEC__
+	else if(simd_mode == SIMD_Mode::ALTIVEC){
+		return find_cutpoint_altivec(buff, size);
+	}
+	#endif
+
+	else{
+		std::cerr << "Error: Unsupported SIMD mode" << std::endl;
+		return 0;
+	}
+}
+
+#ifdef __SSE3__
+uint64_t AE_Chunking::find_cutpoint_sse128(char* buff, uint64_t size) {
+	uint64_t target_pos = 0;
+	uint64_t return_pos_range_scan;
+	uint8_t find_max_result;
+
+	uint8_t target_value = (uint8_t) buff[target_pos];
+	if(extreme_mode == AE_Mode::MAX){
+		while(target_pos < (size - window_size - 1)){
+
+			return_pos_range_scan = range_scan_gt_sse128(buff, target_pos + 1, target_pos + 1 + window_size, target_value);
+			if(return_pos_range_scan != target_pos + 1 + window_size){
+				target_pos = return_pos_range_scan;
+				target_value = (uint8_t) buff[target_pos];
+
+				find_max_result = find_maximum_sse128(buff, target_pos + 1, target_pos + 1 + window_size, sse_array);
+
+				if(find_max_result < target_value)
+					return std::min(size, target_pos + window_size);
+			}
+			else {
+				return std::min(size, target_pos + window_size);
+			}
+		}
+
+		return size;
+	}
+	else if(extreme_mode == AE_Mode::MIN){
+			uint64_t target_pos = 0;
+			uint64_t return_pos_range_scan;
+			uint8_t find_min_result;
+			uint8_t target_value = (uint8_t) buff[target_pos];
+
+			while(target_pos < (size - window_size - 1)){
+				return_pos_range_scan = range_scan_lt_sse128(buff, target_pos + 1, target_pos + 1 + window_size, target_value);
+				if(return_pos_range_scan != target_pos + 1 + window_size){
+					target_pos = return_pos_range_scan;
+					target_value = (uint8_t) buff[target_pos];
+
+					find_min_result = find_minimum_sse128(buff, target_pos + 1, target_pos + 1 + window_size, sse_array);
+
+					if(find_min_result > target_value)
+						return std::min(size, target_pos + window_size);
+				}
+				else {
+					return std::min(size, target_pos + window_size);
+				}
+			}
+	}
+	
+	return size;
+}
+#endif
+
+#ifdef __AVX2__
+uint64_t AE_Chunking::find_cutpoint_avx256(char* buff, uint64_t size) {
+	uint64_t target_pos = 0;
+	uint64_t return_pos_range_scan;
+	uint8_t find_max_result;
+
+	uint8_t target_value = (uint8_t) buff[target_pos];
+	if(extreme_mode == AE_Mode::MAX){
+		while(target_pos < (size - window_size - 1)){
+
+			return_pos_range_scan = range_scan_gt_avx256(buff, target_pos + 1, target_pos + 1 + window_size, target_value);
+			if(return_pos_range_scan != target_pos + 1 + window_size){
+				target_pos = return_pos_range_scan;
+				target_value = (uint8_t) buff[target_pos];
+
+				find_max_result = find_maximum_avx256(buff, target_pos + 1, target_pos + 1 + window_size, avx256_array);
+
+				if(find_max_result < target_value)
+					return std::min(size, target_pos + window_size);
+			}
+			else {
+				return std::min(size, target_pos + window_size);
+			}
+		}
+
+		return size;
+	}
+	else if(extreme_mode == AE_Mode::MIN){
+			uint64_t target_pos = 0;
+			uint64_t return_pos_range_scan;
+			uint8_t find_min_result;
+			uint8_t target_value = (uint8_t) buff[target_pos];
+
+			while(target_pos < (size - window_size - 1)){
+				return_pos_range_scan = range_scan_lt_avx256(buff, target_pos + 1, target_pos + 1 + window_size, target_value);
+				if(return_pos_range_scan != target_pos + 1 + window_size){
+					target_pos = return_pos_range_scan;
+					target_value = (uint8_t) buff[target_pos];
+
+					find_min_result = find_minimum_avx256(buff, target_pos + 1, target_pos + 1 + window_size, avx256_array);
+
+					if(find_min_result > target_value)
+						return std::min(size, target_pos + window_size);
+				}
+				else {
+					return std::min(size, target_pos + window_size);
+				}
+			}
+	}
+	
+	return size;
+}
+#endif
+
+#if defined(__AVX512F__)
+uint64_t AE_Chunking::find_cutpoint_avx512(char* buff, uint64_t size) {
+	uint64_t target_pos = 0;
+	uint64_t return_pos_range_scan;
+	uint8_t find_max_result;
+
+	uint8_t target_value = (uint8_t) buff[target_pos];
+	if(extreme_mode == AE_Mode::MAX){
+		while(target_pos < (size - window_size - 1)){
+
+			return_pos_range_scan = range_scan_gt_avx512(buff, target_pos + 1, target_pos + 1 + window_size, target_value);
+			if(return_pos_range_scan != target_pos + 1 + window_size){
+				target_pos = return_pos_range_scan;
+				target_value = (uint8_t) buff[target_pos];
+
+				find_max_result = find_maximum_avx512(buff, target_pos + 1, target_pos + 1 + window_size, avx512_array);
+
+				if(find_max_result < target_value)
+					return std::min(size, target_pos + window_size);
+			}
+			else {
+				return std::min(size, target_pos + window_size);
+			}
+		}
+
+		return size;
+	}
+	else if(extreme_mode == AE_Mode::MIN){
+			uint64_t target_pos = 0;
+			uint64_t return_pos_range_scan;
+			uint8_t find_min_result;
+			uint8_t target_value = (uint8_t) buff[target_pos];
+
+			while(target_pos < (size - window_size - 1)){
+				return_pos_range_scan = range_scan_lt_avx512(buff, target_pos + 1, target_pos + 1 + window_size, target_value);
+				if(return_pos_range_scan != target_pos + 1 + window_size){
+					target_pos = return_pos_range_scan;
+					target_value = (uint8_t) buff[target_pos];
+
+					find_min_result = find_minimum_avx512(buff, target_pos + 1, target_pos + 1 + window_size, avx512_array);
+
+					if(find_min_result > target_value)
+						return std::min(size, target_pos + window_size);
+				}
+				else {
+					return std::min(size, target_pos + window_size);
+				}
+			}
+	}
+	
+	return size;
+}
+#endif
+
+#ifdef __ARM_NEON
+uint64_t AE_Chunking::find_cutpoint_neon(char* buff, uint64_t size) {
+	uint64_t target_pos = 0;
+	uint64_t return_pos_range_scan;
+	uint8_t find_max_result;
+
+	uint8_t target_value = (uint8_t) buff[target_pos];
+	if(extreme_mode == AE_Mode::MAX){
+		while(target_pos < (size - window_size - 1)){
+
+			return_pos_range_scan = range_scan_gt_neon(buff, target_pos + 1, target_pos + 1 + window_size, target_value);
+			if(return_pos_range_scan != target_pos + 1 + window_size){
+				target_pos = return_pos_range_scan;
+				target_value = (uint8_t) buff[target_pos];
+
+				find_max_result = find_maximum_neon(buff, target_pos + 1, target_pos + 1 + window_size, neon_array);
+
+				if(find_max_result < target_value)
+					{
+						return std::min(size, target_pos + window_size);
+					}
+			}
+			else {
+				return std::min(size, target_pos + window_size);
+			}
+		}
+
+		return size;
+	}
+	else if(extreme_mode == AE_Mode::MIN){
+			uint64_t target_pos = 0;
+			uint64_t return_pos_range_scan;
+			uint8_t find_min_result;
+			uint8_t target_value = (uint8_t) buff[target_pos];
+
+			while(target_pos < (size - window_size - 1)){
+				return_pos_range_scan = range_scan_lt_neon(buff, target_pos + 1, target_pos + 1 + window_size, target_value);
+				if(return_pos_range_scan != target_pos + 1 + window_size){
+					target_pos = return_pos_range_scan;
+					target_value = (uint8_t) buff[target_pos];
+
+					find_min_result = find_minimum_neon(buff, target_pos + 1, target_pos + 1 + window_size, neon_array);
+
+					if(find_min_result > target_value)
+						return std::min(size, target_pos + window_size);
+				}
+				else {
+					return std::min(size, target_pos + window_size);
+				}
+			}
+	}
+	
+	return size;
+}
+#endif
+
+#ifdef __ALTIVEC__
+uint64_t AE_Chunking::find_cutpoint_altivec(char* buff, uint64_t size) {
+	uint64_t target_pos = 0;
+	uint64_t return_pos_range_scan;
+	uint8_t find_max_result;
+
+	uint8_t target_value = (uint8_t) buff[target_pos];
+	if(extreme_mode == AE_Mode::MAX){
+		while(target_pos < (size - window_size - 1)){
+
+			return_pos_range_scan = range_scan_gt_altivec(buff, target_pos + 1, target_pos + 1 + window_size, target_value);
+			if(return_pos_range_scan != target_pos + 1 + window_size){
+				target_pos = return_pos_range_scan;
+				target_value = (uint8_t) buff[target_pos];
+
+				find_max_result = find_maximum_altivec(buff, target_pos + 1, target_pos + 1 + window_size, altivec_array);
+
+				if(find_max_result < target_value)
+				{
+					return std::min(size, target_pos + window_size);
+				}
+			}
+			else {
+				return std::min(size, target_pos + window_size);
+			}
+		}
+
+		return size;
+	}
+	else if(extreme_mode == AE_Mode::MIN){
+			uint64_t target_pos = 0;
+			uint64_t return_pos_range_scan;
+			uint8_t find_min_result;
+			uint8_t target_value = (uint8_t) buff[target_pos];
+
+			while(target_pos < (size - window_size - 1)){
+				return_pos_range_scan = range_scan_lt_altivec(buff, target_pos + 1, target_pos + 1 + window_size, target_value);
+				if(return_pos_range_scan != target_pos + 1 + window_size){
+					target_pos = return_pos_range_scan;
+					target_value = (uint8_t) buff[target_pos];
+
+					find_min_result = find_minimum_altivec(buff, target_pos + 1, target_pos + 1 + window_size, altivec_array);
+
+					if(find_min_result > target_value)
+						return std::min(size, target_pos + window_size);
+				}
+				else {
+					return std::min(size, target_pos + window_size);
+				}
+			}
+	}
+	
+	return size;
 }
+#endif
 
-uint64_t AE_Chunking::find_cutpoint(char* buff, uint64_t size) {
+uint64_t AE_Chunking::find_cutpoint_native(char* buff, uint64_t size) {
     uint32_t i = 0;
     uint64_t max_value = buff[i];
     uint64_t max_pos = i;
diff --git a/dedup/src/chunking/avx_chunking_common.cpp b/dedup/src/chunking/avx_chunking_common.cpp
index 4e47512..a273f8e 100644
--- a/dedup/src/chunking/avx_chunking_common.cpp
+++ b/dedup/src/chunking/avx_chunking_common.cpp
@@ -9,6 +9,7 @@
  *
  */
 
+
 #include "avx_chunking_common.hpp"
 #include "chunking_common.hpp"
 #include "hashing_common.hpp"
@@ -21,6 +22,16 @@
 #include <memory>
 #include <string>
 
+/**
+ * * @brief: Helper functions to find maximum value in a region of the data stream
+ * * @param buff Data Stream
+ * * @param start_pos Starting position of scanned region
+ * * @param end_pos Ending position of scanned region
+ * * @param xmm_array Array of XMM registers to store intermediate results
+ * * @return uint8_t Maximum Value
+ */
+
+#if defined(__SSE3__)
 uint8_t AVX_Chunking_Technique::find_maximum_sse128(char *buff, uint64_t start_pos, uint64_t end_pos, __m128i *xmm_array){
 
     // Assume window_size is a multiple of SSE_REGISTER_SIZE_BYTES for now
@@ -33,7 +44,7 @@ uint8_t AVX_Chunking_Technique::find_maximum_sse128(char *buff, uint64_t start_p
 
     // Load contents into __m128i structures
     // Could be optimized later as only 16 xmm registers are avaiable per CPU in 64-bit
-    for(uint64_t i = start_pos; i < num_vectors; i++)
+    for(uint64_t i = 0; i < num_vectors; i++)
         xmm_array[i] = _mm_loadu_si128((__m128i const *)(buff + start_pos + (SSE_REGISTER_SIZE_BYTES * i)));
     
     // Repeat vmaxu until a single register is remaining with maximum values
@@ -65,7 +76,9 @@ uint8_t AVX_Chunking_Technique::find_maximum_sse128(char *buff, uint64_t start_p
     // Return maximum value
     return max_val;
 }
+#endif
 
+#if defined(__AVX2__)
 uint8_t AVX_Chunking_Technique::find_maximum_avx256(char *buff, uint64_t start_pos, uint64_t end_pos, __m256i *xmm_array){
 
     // Assume window_size is a multiple of AVX256_REGISTER_SIZE_BYTES for now
@@ -78,7 +91,7 @@ uint8_t AVX_Chunking_Technique::find_maximum_avx256(char *buff, uint64_t start_p
 
     // Load contents into __m256i structures
     // Could be optimized later as only 16 xmm registers are avaiable per CPU in 64-bit
-    for(uint64_t i = start_pos; i < num_vectors; i++)
+    for(uint64_t i = 0; i < num_vectors; i++)
         xmm_array[i] = _mm256_loadu_si256((__m256i const *)(buff + start_pos + (AVX256_REGISTER_SIZE_BYTES * i)));
     
     // Repeat vmaxu until a single register is remaining with maximum values
@@ -110,6 +123,7 @@ uint8_t AVX_Chunking_Technique::find_maximum_avx256(char *buff, uint64_t start_p
     // Return maximum value
     return max_val;
 }
+#endif
 
 #if defined(__AVX512F__)
 uint8_t AVX_Chunking_Technique::find_maximum_avx512(char *buff, uint64_t start_pos, uint64_t end_pos, __m512i *xmm_array){
@@ -126,7 +140,7 @@ uint8_t AVX_Chunking_Technique::find_maximum_avx512(char *buff, uint64_t start_p
 
     // Load contents into __m512i structures
     // Could be optimized later as only 16 xmm registers are avaiable per CPU in 64-bit
-    for(uint64_t i = start_pos; i < num_vectors; i++)
+    for(uint64_t i = 0; i < num_vectors; i++)
         xmm_array[i] = _mm512_loadu_si512((__m512i const *)(buff + start_pos + (AVX512_REGISTER_SIZE_BYTES * i)));
     
     // Repeat vmaxu until a single register is remaining with maximum values
@@ -159,3 +173,1236 @@ uint8_t AVX_Chunking_Technique::find_maximum_avx512(char *buff, uint64_t start_p
     return max_val;
 }
 #endif
+
+#if defined(__ARM_NEON)
+uint8_t AVX_Chunking_Technique::find_maximum_neon(char *buff, uint64_t start_pos, uint64_t end_pos, uint8x16_t *neon_array){
+
+    // Assume window_size is a multiple of NEON_REGISTER_SIZE_BYTES for now
+    // Assume num_vectors is even for now - True for most common window sizes. Can fix later via specific check
+
+    uint64_t num_vectors = (end_pos - start_pos) / NEON_REGISTER_SIZE_BYTES;
+
+    uint64_t step = 2;
+    uint64_t half_step = 1;
+
+    // Load contents into uint8x16_t structures
+    // Could be optimized later as only 16 mm registers are avaiable per CPU in 64-bit
+    for(uint64_t i = 0; i < num_vectors; i++)
+        neon_array[i] = vld1q_u8((uint8_t const *)(buff + start_pos + (NEON_REGISTER_SIZE_BYTES * i)));
+
+    // Repeat vmaxu until a single register is remaining with maximum values
+    // Each iteration calculates maximums between a pair of registers and moves it into the first register in the pair
+    // Finally, only one will be left with the maximum values from all pairs
+    while(step <= num_vectors){
+        
+        for(uint64_t i = 0; i < num_vectors; i+=step)
+            neon_array[i] = vmaxq_u8(neon_array[i], neon_array[i+half_step]);
+        
+        // Multiply step by 2
+        half_step = step;
+        step = step << 1;
+    
+    }
+
+    // Move the final set of values from the NEON into local memory
+    uint8_t result_store[NEON_REGISTER_SIZE_BYTES] = {0};
+    vst1q_u8((uint8_t *)&result_store, neon_array[0]);
+    
+    // Sequentially scan the last remaining bytes (16 in this case) to find the max value
+    uint8_t max_val = 0;
+    for(uint64_t i = 0; i < NEON_REGISTER_SIZE_BYTES; i++){
+        if(result_store[i] > max_val)
+            max_val = result_store[i];
+    }
+    // Return maximum value
+    return max_val;
+
+}
+#endif
+
+#if defined(__ALTIVEC__)
+uint8_t AVX_Chunking_Technique::find_maximum_altivec(char *buff, uint64_t start_pos, uint64_t end_pos, __vector unsigned char *vec_array){
+
+    // Assume window_size is a multiple of ALTIVEC_REGISTER_SIZE_BYTES for now
+    // Assume num_vectors is even for now - True for most common window sizes. Can fix later via specific check
+
+    uint64_t num_vectors = (end_pos - start_pos) / ALTIVEC_REGISTER_SIZE_BYTES;
+
+    uint64_t step = 2;
+    uint64_t half_step = 1;
+
+    // Load contents into vector unsigned char structures
+    // Could be optimized later as only 32 vec registers are available per CPU
+    for(uint64_t i = 0; i < num_vectors; i++)
+        vec_array[i] = vec_xl(0, (unsigned char const *)(buff + start_pos + (ALTIVEC_REGISTER_SIZE_BYTES * i)));
+
+    // Repeat vmaxu until a single register is remaining with maximum values
+    // Each iteration calculates maximums between a pair of registers and moves it into the first register in the pair
+    // Finally, only one will be left with the maximum values from all pairs
+    while(step <= num_vectors){
+        
+        for(uint64_t i = 0; i < num_vectors; i+=step)
+            vec_array[i] = vec_max(vec_array[i], vec_array[i+half_step]);
+        
+        // Multiply step by 2
+        half_step = step;
+        step = step << 1;
+    
+    }
+
+    // Move the final set of values from the vec into local memory    
+    unsigned char result_store[ALTIVEC_REGISTER_SIZE_BYTES] = {0};
+
+    vec_st(vec_array[0], 0, (unsigned char *)&result_store);
+
+    // Sequentially scan the last remaining bytes (16 in this case) to find the max value
+    uint8_t max_val = 0;
+    for(uint64_t i = 0; i < ALTIVEC_REGISTER_SIZE_BYTES; i++){
+        if(result_store[i] > max_val)
+            max_val = result_store[i];
+    }
+
+    // Return maximum value
+    return max_val;
+}
+#endif
+
+/**
+ * @brief: Helper functions to find minimum value in data stream
+ * * @param buff Data Stream
+ * * @param start_pos Starting position of scanned region
+ * * @param end_pos Ending position of scanned region
+ * * @param xmm_array Array of SIMD registers to store intermediate results
+ * * @return uint8_t Minimum Value
+ */
+
+ #if defined(__SSE3__)
+ uint8_t AVX_Chunking_Technique::find_minimum_sse128(char *buff, uint64_t start_pos, uint64_t end_pos, __m128i *xmm_array){
+
+    // Assume window_size is a multiple of SSE_REGISTER_SIZE_BYTES for now
+    // Assume num_vectors is even for now - True for most common window sizes. Can fix later via specific check
+
+    uint64_t num_vectors = (end_pos - start_pos) / SSE_REGISTER_SIZE_BYTES;
+
+    uint64_t step = 2;
+    uint64_t half_step = 1;
+
+    // Load contents into __m128i structures
+    // Could be optimized later as only 16 xmm registers are avaiable per CPU in 64-bit
+    for(uint64_t i = 0; i < num_vectors; i++)
+        xmm_array[i] = _mm_loadu_si128((__m128i const *)(buff + start_pos + (SSE_REGISTER_SIZE_BYTES * i)));
+    
+    // Repeat vminu until a single register is remaining with minimum values
+    // Each iteration calculates minimums between a pair of registers and moves it into the first register in the pair
+    // Finally, only one will be left with the minimum values from all pairs
+    while(step <= num_vectors){
+        
+        for(uint64_t i = 0; i < num_vectors; i+=step)
+            xmm_array[i] = _mm_min_epu8(xmm_array[i], xmm_array[i+half_step]);
+        
+        // Multiply step by 2
+        half_step = step;
+        step = step << 1;
+    
+    }
+
+    // Move the final set of values from the xmm into local memory    
+    uint8_t result_store[SSE_REGISTER_SIZE_BYTES] = {0};
+
+    _mm_storeu_si128((__m128i *)&result_store, xmm_array[0]);
+
+    // Sequentially scan the last remaining bytes (128 in this case) to find the min value
+    uint8_t min_val = UINT8_MAX;
+    for(uint64_t i = 0; i < SSE_REGISTER_SIZE_BYTES; i++){
+        if(result_store[i] < min_val)
+            min_val = result_store[i];
+    }
+
+    // Return minimum value
+    return min_val;
+}
+#endif
+
+#if defined(__AVX2__)
+uint8_t AVX_Chunking_Technique::find_minimum_avx256(char *buff, uint64_t start_pos, uint64_t end_pos, __m256i *xmm_array){
+
+    // Assume window_size is a multiple of AVX256_REGISTER_SIZE_BYTES for now
+    // Assume num_vectors is even for now - True for most common window sizes. Can fix later via specific check
+
+    uint64_t num_vectors = (end_pos - start_pos) / AVX256_REGISTER_SIZE_BYTES;
+
+    uint64_t step = 2;
+    uint64_t half_step = 1;
+
+    // Load contents into __m256i structures
+    // Could be optimized later as only 16 xmm registers are avaiable per CPU in 64-bit
+    for(uint64_t i = 0; i < num_vectors; i++)
+        xmm_array[i] = _mm256_loadu_si256((__m256i const *)(buff + start_pos + (AVX256_REGISTER_SIZE_BYTES * i)));
+    
+    // Repeat vminu until a single register is remaining with minimum values
+    // Each iteration calculates minimums between a pair of registers and moves it into the first register in the pair
+    // Finally, only one will be left with the minimum values from all pairs
+    while(step <= num_vectors){
+        
+        for(uint64_t i = 0; i < num_vectors; i+=step)
+            xmm_array[i] = _mm256_min_epu8(xmm_array[i], xmm_array[i+half_step]);
+        
+        // Multiply step by 2
+        half_step = step;
+        step = step << 1;
+    
+    }
+
+    // Move the final set of values from the xmm into local memory    
+    uint8_t result_store[AVX256_REGISTER_SIZE_BYTES] = {0};
+
+    _mm256_storeu_si256((__m256i *)&result_store, xmm_array[0]);
+
+    // Sequentially scan the last remaining bytes (256 in this case) to find the min value
+    uint8_t min_val = UINT8_MAX;
+    for(uint64_t i = 0; i < AVX256_REGISTER_SIZE_BYTES; i++){
+        if(result_store[i] < min_val)
+            min_val = result_store[i];
+    }
+
+    // Return minimum value
+    return min_val;
+}
+#endif
+
+#if defined(__AVX512F__)
+uint8_t AVX_Chunking_Technique::find_minimum_avx512(char *buff, uint64_t start_pos, uint64_t end_pos, __m512i *xmm_array){
+
+    // Assume window_size is a multiple of AVX512_REGISTER_SIZE_BYTES for now
+    // Assume num_vectors is even for now - True for most common window sizes. Can fix later via specific check
+
+    uint64_t num_vectors = (end_pos - start_pos) / AVX512_REGISTER_SIZE_BYTES;
+
+    uint64_t step = 2;
+    uint64_t half_step = 1;
+
+    // Load contents into __m512i structures
+    // Could be optimized later as only 16 xmm registers are avaiable per CPU in 64-bit
+    for(uint64_t i = 0; i < num_vectors; i++)
+        xmm_array[i] = _mm512_loadu_si512((__m512i const *)(buff + start_pos + (AVX512_REGISTER_SIZE_BYTES * i)));
+    
+    // Repeat vminu until a single register is remaining with minimum values
+    // Each iteration calculates minimums between a pair of registers and moves it into the first register in the pair
+    // Finally, only one will be left with the minimum values from all pairs
+    while(step <= num_vectors){
+        
+        for(uint64_t i = 0; i < num_vectors; i+=step)
+            xmm_array[i] = _mm512_maskz_min_epu8(UINT64_MAX, xmm_array[i], xmm_array[i+half_step]);
+        
+        // Multiply step by 2
+        half_step = step;
+        step = step << 1;
+    
+    }
+
+    // Move the final set of values from the xmm into local memory    
+    uint8_t result_store[AVX512_REGISTER_SIZE_BYTES] = {0};
+
+    _mm512_storeu_si512((__m512i *)&result_store, xmm_array[0]);
+
+    // Sequentially scan the last remaining bytes (512 in this case) to find the min value
+    uint8_t min_val = UINT8_MAX;
+    for(uint64_t i = 0; i < AVX512_REGISTER_SIZE_BYTES; i++){
+        if(result_store[i] < min_val)
+            min_val = result_store[i];
+    }
+
+    // Return minimum value
+    return min_val;
+}
+#endif
+
+#if defined(__ARM_NEON)
+uint8_t AVX_Chunking_Technique::find_minimum_neon(char *buff, uint64_t start_pos, uint64_t end_pos, uint8x16_t *neon_array){
+
+    // Assume window_size is a multiple of NEON_REGISTER_SIZE_BYTES for now
+    // Assume num_vectors is even for now - True for most common window sizes. Can fix later via specific check
+
+    uint64_t num_vectors = (end_pos - start_pos) / NEON_REGISTER_SIZE_BYTES;
+
+    uint64_t step = 2;
+    uint64_t half_step = 1;
+
+    // Load contents into uint8x16_t structures
+    // Could be optimized later as only 16 mm registers are avaiable per CPU in 64-bit
+    for(uint64_t i = 0; i < num_vectors; i++)
+        neon_array[i] = vld1q_u8((uint8_t const *)(buff + start_pos + (NEON_REGISTER_SIZE_BYTES * i)));
+
+    // Repeat vminu until a single register is remaining with minimum values
+    // Each iteration calculates minimums between a pair of registers and moves it into the first register in the pair
+    // Finally, only one will be left with the minimum values from all pairs
+    while(step <= num_vectors){
+        
+        for(uint64_t i = 0; i < num_vectors; i+=step)
+            neon_array[i] = vminq_u8(neon_array[i], neon_array[i+half_step]);
+        
+        // Multiply step by 2
+        half_step = step;
+        step = step << 1;
+    
+    }
+
+    // Move the final set of values from the NEON into local memory
+    uint8_t result_store[NEON_REGISTER_SIZE_BYTES] = {0};
+    vst1q_u8((uint8_t *)&result_store, neon_array[0]);
+    
+    // Sequentially scan the last remaining bytes (16 in this case) to find the min value
+    uint8_t min_val = UINT8_MAX;
+    for(uint64_t i = 0; i < NEON_REGISTER_SIZE_BYTES; i++){
+        if(result_store[i] < min_val)
+            min_val = result_store[i];
+    }
+    
+    // Return minimum value
+    return min_val;
+
+}
+#endif
+
+#if defined(__ALTIVEC__)
+uint8_t AVX_Chunking_Technique::find_minimum_altivec(char *buff, uint64_t start_pos, uint64_t end_pos, __vector unsigned char *vec_array){
+    // Assume window_size is a multiple of ALTIVEC_REGISTER_SIZE_BYTES for now
+    // Assume num_vectors is even for now - True for most common window sizes. Can fix later via specific check
+
+    uint64_t num_vectors = (end_pos - start_pos) / ALTIVEC_REGISTER_SIZE_BYTES;
+
+    uint64_t step = 2;
+    uint64_t half_step = 1;
+
+    // Load contents into vector unsigned char structures
+    // Could be optimized later as only 32 vec registers are available per CPU
+    for(uint64_t i = 0; i < num_vectors; i++)
+        vec_array[i] = vec_xl(0, (unsigned char const *)(buff + start_pos + (ALTIVEC_REGISTER_SIZE_BYTES * i)));
+
+    // Repeat vminu until a single register is remaining with minimum values
+    // Each iteration calculates minimums between a pair of registers and moves it into the first register in the pair
+    // Finally, only one will be left with the minimum values from all pairs
+    while(step <= num_vectors){
+        
+        for(uint64_t i = 0; i < num_vectors; i+=step)
+            vec_array[i] = vec_min(vec_array[i], vec_array[i+half_step]);
+        
+        // Multiply step by 2
+        half_step = step;
+        step = step << 1;
+    
+    }
+
+    // Move the final set of values from the vec into local memory    
+    unsigned char result_store[ALTIVEC_REGISTER_SIZE_BYTES] = {0};
+
+    vec_st(vec_array[0], 0, (unsigned char *)&result_store);
+
+    // Sequentially scan the last remaining bytes (16 in this case) to find the min value
+    uint8_t min_val = UINT8_MAX;
+    for(uint64_t i = 0; i < ALTIVEC_REGISTER_SIZE_BYTES; i++){
+        if(result_store[i] < min_val)
+            min_val = result_store[i];
+    }
+
+    // Return minimum value
+    return min_val;
+}
+#endif
+
+/**
+ * Range scan functions for greater-than-or-equal-to (geq) comparisons
+ * These functions are used to find the first position in the data stream matching a target comparison
+ * The functions have been implemented for SSE, AVX, AVX512, NEON and AltiVec instruction sets
+ */
+
+#if defined(__SSE3__)
+uint64_t AVX_Chunking_Technique::range_scan_geq_sse128(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / SSE_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+    uint64_t return_pos;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit SSE format
+    __m128i xmm_array, cmp_array;
+    int cmp_mask;
+
+    // Load max_value into xmm-format
+    __m128i max_val_xmm = _mm_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * SSE_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm_loadu_si128((__m128i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is geq max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        #if defined(__AVX512F__)
+            cmp_mask = _mm_cmpge_epu8_mask(xmm_array, max_val_xmm);
+        #else
+            cmp_array = GreaterOrEqual8uSSE(xmm_array, max_val_xmm);
+        
+            // Create a mask using the most-significant bit of each byte value in cmp_array
+            cmp_mask = _mm_movemask_epi8(cmp_array);
+        #endif
+        
+
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return_pos = curr_scan_start + (__builtin_ffs(cmp_mask) - 1);
+            return return_pos;            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__AVX2__)
+uint64_t AVX_Chunking_Technique::range_scan_geq_avx256(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / AVX256_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 256-bit AVX format
+    __m256i xmm_array, cmp_array;
+    uint32_t cmp_mask;
+
+    // Load max_value into xmm-format
+    __m256i max_val_xmm = _mm256_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * AVX256_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm256_loadu_si256((__m256i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is geq max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+      
+        #if defined(__AVX512F__)
+            cmp_mask = _mm256_cmpge_epu8_mask(xmm_array, max_val_xmm);
+        #else
+            cmp_array = GreaterOrEqual8uAVX256(xmm_array, max_val_xmm);
+        
+            // Create a mask using the most-significant bit of each byte value in cmp_array
+            cmp_mask = _mm256_movemask_epi8(cmp_array);
+        #endif
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return curr_scan_start + (__builtin_ffs(cmp_mask) - 1);            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__AVX512F__)
+uint64_t AVX_Chunking_Technique::range_scan_geq_avx512(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / AVX512_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+    uint64_t return_pos;
+
+    // Structures to store bytes from data stream and comparison results in 512-bit AVX format
+    __m512i xmm_array;
+    __mmask64 cmp_mask;
+
+    // Load max_value into xmm-format
+    __m512i max_val_xmm = _mm512_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * AVX512_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm512_loadu_si512((__m512i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is geq max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_mask = _mm512_cmpge_epu8_mask(xmm_array, max_val_xmm);
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return_pos = curr_scan_start + (__builtin_ffsll(cmp_mask) - 1);
+            return return_pos;            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__ARM_NEON)
+uint64_t AVX_Chunking_Technique::range_scan_geq_neon(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / NEON_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit NEON format
+    uint8x16_t xmm_array, cmp_array;
+    uint8x16_t max_val_xmm = vdupq_n_u8((uint8_t)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * NEON_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = vld1q_u8((uint8_t const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is geq max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_array = vcgeq_u8(xmm_array, max_val_xmm);
+
+        // ARM doesn't have native MOVEMASK instructions like x86 does.
+        // Following advice from an ARM community blog post to implement mask creation instead
+        // https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
+        
+        // Create a mask using the most-significant bit of each byte value in cmp_array
+        
+        uint64_t cmp_mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(cmp_array), 4)), 0);
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return curr_scan_start + ((__builtin_ffsll(cmp_mask) - 1) >> 2); // Shift right by 2 to account for 4 bits per byte            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#ifdef __ALTIVEC__
+uint64_t AVX_Chunking_Technique::range_scan_geq_altivec(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){ 
+    
+    uint64_t num_vectors = (end_position - start_position) / ALTIVEC_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit AltiVec format
+    __vector unsigned char vec_array;
+    __vector unsigned char perm_array;
+    __vector __bool char cmp_array;
+    __vector unsigned char max_val_vec = vec_splats((unsigned char)target_value);
+    __vector unsigned char bit_selector = {0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120}; // select most significant bit from each byte                    
+
+    uint8_t cond_mask_1, cond_mask_2;    
+
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * ALTIVEC_REGISTER_SIZE_BYTES);
+        
+        // Load data into vec register
+        // Use vec_xl instead of vec_ld because vec_ld truncates the load address to a 16 byte boundary
+        // vec_xl allows loading from any address, which is necessary for unaligned data
+        vec_array = vec_xl(0, (unsigned char const *)(buff + curr_scan_start));
+                
+        /* 
+         Compare values with max_value. If a byte in vec_array is geq max_val_vec,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_array = vec_cmpge(vec_array, max_val_vec);
+        
+        // Create masks using the most-significant bit of each byte value in cmp_array
+        perm_array = vec_bperm((__vector unsigned char)cmp_array, bit_selector);
+        
+        // bperm stores masks in elements 8 and 9 of the vector
+        // Extract the masks from the perm_array
+        // cond_mask_2 corresponds to the first 8 bytes, cond_mask_1 corresponds to the next 8 bytes
+        // This is because vec_bperm rearranges the bits in little endian order
+        cond_mask_1 = vec_extract(perm_array, 8);
+        cond_mask_2 = vec_extract(perm_array, 9);
+ 
+       // If cond_mask is not 0, it means at least one byte in cmp_array is non-zero
+       // Subtract 24 because clz implicitly converts to int with a 4-byte size
+       if (cond_mask_2){
+            return curr_scan_start + __builtin_clz(cond_mask_2) - 24 ; 
+        } 
+       else if(cond_mask_1){
+            return curr_scan_start + __builtin_clz(cond_mask_1) - 16; // Add 8 to the index to account for the first 8 bytes in the vector
+        }
+        
+    }
+    
+    return end_position;
+}
+#endif
+
+/**
+ * @brief: Range scan functions for strictly greater than comparison
+ * These functions are used to find the first position in the data stream matching a target comparison
+ * The functions are imlemented for SSE, AVX2, AVX512, ARM NEON and AltiVec instruction sets.
+ */
+
+
+#if defined(__SSE3__)
+uint64_t AVX_Chunking_Technique::range_scan_gt_sse128(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / SSE_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+    uint64_t return_pos;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit SSE format
+    __m128i xmm_array, cmp_array;
+    int cmp_mask;
+
+    // Load max_value into xmm-format
+    __m128i max_val_xmm = _mm_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * SSE_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm_loadu_si128((__m128i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is gt max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        #if defined(__AVX512F__)
+            cmp_mask = _mm_cmpgt_epu8_mask(xmm_array, max_val_xmm);
+        #else
+            cmp_array = Greater8uSSE(xmm_array, max_val_xmm);
+        
+            // Create a mask using the most-significant bit of each byte value in cmp_array
+            cmp_mask = _mm_movemask_epi8(cmp_array);
+        #endif
+        
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return_pos = curr_scan_start + (__builtin_ffs(cmp_mask) - 1);
+            return return_pos;            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__AVX2__)
+uint64_t AVX_Chunking_Technique::range_scan_gt_avx256(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / AVX256_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 256-bit AVX format
+    __m256i xmm_array, cmp_array;
+    uint32_t cmp_mask;
+
+    // Load max_value into xmm-format
+    __m256i max_val_xmm = _mm256_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * AVX256_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm256_loadu_si256((__m256i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is gt max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        #if defined(__AVX512F__)
+            cmp_mask = _mm256_cmpgt_epu8_mask(xmm_array, max_val_xmm);
+        #else
+            cmp_array = Greater8uAVX256(xmm_array, max_val_xmm);
+        
+            // Create a mask using the most-significant bit of each byte value in cmp_array
+            cmp_mask = _mm256_movemask_epi8(cmp_array);
+        #endif
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return curr_scan_start + (__builtin_ffs(cmp_mask) - 1);            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__AVX512F__)
+uint64_t AVX_Chunking_Technique::range_scan_gt_avx512(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / AVX512_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+    uint64_t return_pos;
+
+    // Structures to store bytes from data stream and comparison results in 512-bit AVX format
+    __m512i xmm_array;
+    __mmask64 cmp_mask;
+
+    // Load max_value into xmm-format
+    __m512i max_val_xmm = _mm512_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * AVX512_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm512_loadu_si512((__m512i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is gt max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_mask = _mm512_cmpgt_epu8_mask(xmm_array, max_val_xmm);
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return_pos = curr_scan_start + (__builtin_ffsll(cmp_mask) - 1);
+            return return_pos;            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__ARM_NEON)
+uint64_t AVX_Chunking_Technique::range_scan_gt_neon(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / NEON_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit NEON format
+    uint8x16_t xmm_array, cmp_array;
+    uint8x16_t max_val_xmm = vdupq_n_u8((uint8_t)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * NEON_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = vld1q_u8((uint8_t const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is gt max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_array = vcgtq_u8(xmm_array, max_val_xmm);
+
+        // ARM doesn't have native MOVEMASK instructions like x86 does.
+        // Following advice from an ARM community blog post to implement mask creation instead
+        // https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
+        
+        // Create a mask using the most-significant bit of each byte value in cmp_array
+        
+        uint64_t cmp_mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(cmp_array), 4)), 0);
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return curr_scan_start + ((__builtin_ffsll(cmp_mask) - 1) >> 2); // Shift right by 2 to account for 4 bits per byte            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#ifdef __ALTIVEC__
+uint64_t AVX_Chunking_Technique::range_scan_gt_altivec(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){ 
+    
+    uint64_t num_vectors = (end_position - start_position) / ALTIVEC_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit AltiVec format
+    __vector unsigned char vec_array;
+    __vector unsigned char perm_array;
+    __vector __bool char cmp_array;
+    __vector unsigned char max_val_vec = vec_splats((unsigned char)target_value);
+    __vector unsigned char bit_selector = {0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120}; // select most significant bit from each byte                    
+
+    uint8_t cond_mask_1, cond_mask_2;    
+
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * ALTIVEC_REGISTER_SIZE_BYTES);
+        
+        // Load data into vec register
+        // Use vec_xl instead of vec_ld because vec_ld truncates the load address to a 16 byte boundary
+        // vec_xl allows loading from any address, which is necessary for unaligned data
+        vec_array = vec_xl(0, (unsigned char const *)(buff + curr_scan_start));
+                
+        /* 
+         Compare values with max_value. If a byte in vec_array is gt max_val_vec,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_array = vec_cmpgt(vec_array, max_val_vec);
+        
+        // Create masks using the most-significant bit of each byte value in cmp_array
+        perm_array = vec_bperm((__vector unsigned char)cmp_array, bit_selector);
+        cond_mask_1 = vec_extract(perm_array, 8);
+        cond_mask_2 = vec_extract(perm_array, 9);
+ 
+       // If cond_mask is not 0, it means at least one byte in cmp_array is non-zero
+       if (cond_mask_2){
+            return curr_scan_start + __builtin_clz(cond_mask_2) - 24 ; 
+        } 
+       else if(cond_mask_1){
+            return curr_scan_start + __builtin_clz(cond_mask_1) - 16; // Add 8
+       }
+    }
+    return end_position;
+}
+#endif
+
+
+/**
+ * @brief: Range scan functions for less than or equal to comparison
+ * These functions are used to find the first position in the data stream matching a target comparison
+ * The functions are implemented for SSE, AVX2, AVX512, NEON and AltiVec instruction sets.
+ */
+
+#if defined(__SSE3__)
+uint64_t AVX_Chunking_Technique::range_scan_leq_sse128(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / SSE_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+    uint64_t return_pos;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit SSE format
+    __m128i xmm_array, cmp_array;
+    int cmp_mask;
+
+    // Load max_value into xmm-format
+    __m128i max_val_xmm = _mm_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * SSE_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm_loadu_si128((__m128i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is leq max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        #if defined(__AVX512F__)
+            cmp_mask = _mm_cmple_epu8_mask(xmm_array, max_val_xmm);
+        #else
+            cmp_array = LesserOrEqual8uSSE(xmm_array, max_val_xmm);
+        
+            // Create a mask using the most-significant bit of each byte value in cmp_array
+            cmp_mask = _mm_movemask_epi8(cmp_array);
+        #endif
+        
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return_pos = curr_scan_start + (__builtin_ffs(cmp_mask) - 1);
+            return return_pos;            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__AVX2__)
+uint64_t AVX_Chunking_Technique::range_scan_leq_avx256(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / AVX256_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 256-bit AVX format
+    __m256i xmm_array, cmp_array;
+    uint32_t cmp_mask;
+
+    // Load max_value into xmm-format
+    __m256i max_val_xmm = _mm256_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * AVX256_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm256_loadu_si256((__m256i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is leq max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        #if defined(__AVX512F__)
+            cmp_mask = _mm256_cmple_epu8_mask(xmm_array, max_val_xmm);
+        #else
+            cmp_array = LesserOrEqual8uAVX256(xmm_array, max_val_xmm);
+        
+            // Create a mask using the most-significant bit of each byte value in cmp_array
+            cmp_mask = _mm256_movemask_epi8(cmp_array);
+        #endif
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return curr_scan_start + (__builtin_ffs(cmp_mask) - 1);            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__AVX512F__)
+uint64_t AVX_Chunking_Technique::range_scan_leq_avx512(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / AVX512_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+    uint64_t return_pos;
+
+    // Structures to store bytes from data stream and comparison results in 512-bit AVX format
+    __m512i xmm_array;
+    __mmask64 cmp_mask;
+
+    // Load max_value into xmm-format
+    __m512i max_val_xmm = _mm512_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * AVX512_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm512_loadu_si512((__m512i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is leq max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_mask = _mm512_cmple_epu8_mask(xmm_array, max_val_xmm);
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return_pos = curr_scan_start + (__builtin_ffsll(cmp_mask) - 1);
+            return return_pos;            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__ARM_NEON)
+uint64_t AVX_Chunking_Technique::range_scan_leq_neon(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / NEON_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit NEON format
+    uint8x16_t xmm_array, cmp_array;
+    uint8x16_t max_val_xmm = vdupq_n_u8((uint8_t)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * NEON_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = vld1q_u8((uint8_t const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is leq max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_array = vcleq_u8(xmm_array, max_val_xmm);
+
+        // ARM doesn't have native MOVEMASK instructions like x86 does.
+        // Following advice from an ARM community blog post to implement mask creation instead
+        // https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
+        
+        // Create a mask using the most-significant bit of each byte value in cmp_array
+        
+        uint64_t cmp_mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(cmp_array), 4)), 0);
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return curr_scan_start + ((__builtin_ffsll(cmp_mask) - 1) >> 2); // Shift right by 2 to account for 4 bits per byte            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#ifdef __ALTIVEC__
+uint64_t AVX_Chunking_Technique::range_scan_leq_altivec(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / ALTIVEC_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit AltiVec format
+    __vector unsigned char vec_array;
+    __vector unsigned char perm_array;
+    __vector __bool char cmp_array;
+    __vector unsigned char max_val_vec = vec_splats((unsigned char)target_value);
+    __vector unsigned char bit_selector = {0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120}; // select most significant bit from each byte                    
+
+    uint8_t cond_mask_1, cond_mask_2;    
+
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * ALTIVEC_REGISTER_SIZE_BYTES);
+        
+        // Load data into vec register
+        // Use vec_xl instead of vec_ld because vec_ld truncates the load address to a 16 byte boundary
+        // vec_xl allows loading from any address, which is necessary for unaligned data
+        vec_array = vec_xl(0, (unsigned char const *)(buff + curr_scan_start));
+                
+        /* 
+         Compare values with max_value. If a byte in vec_array is leq max_val_vec,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_array = vec_cmple(vec_array, max_val_vec);
+        
+        // Create masks using the most-significant bit of each byte value in cmp_array
+        perm_array = vec_bperm((__vector unsigned char)cmp_array, bit_selector);
+        cond_mask_1 = vec_extract(perm_array, 8);
+        cond_mask_2 = vec_extract(perm_array, 9);
+ 
+       // If cond_mask is not 0, it means at least one byte in cmp_array is non-zero
+       if (cond_mask_2){
+            return curr_scan_start + __builtin_clz(cond_mask_2) - 24 ; 
+        } 
+       else if(cond_mask_1){
+            return curr_scan_start + __builtin_clz(cond_mask_1) - 16; // Add 8
+        }
+    }
+    return end_position;
+}
+#endif
+
+/**
+ * @brief: Range scan functions for strictly less than comparison
+ * These functions are used to find the first position in the data stream matching a target comparison
+ * The functions are implemented for SSE, AVX2, AVX512, NEON and AltiVec instruction sets.
+ */
+
+#if defined(__SSE3__)
+uint64_t AVX_Chunking_Technique::range_scan_lt_sse128(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / SSE_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+    uint64_t return_pos;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit SSE format
+    __m128i xmm_array, cmp_array;
+    int cmp_mask;
+
+    // Load max_value into xmm-format
+    __m128i max_val_xmm = _mm_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * SSE_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm_loadu_si128((__m128i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is lt max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        #if defined(__AVX512F__)
+            cmp_mask = _mm_cmplt_epu8_mask(xmm_array, max_val_xmm);
+        #else
+            cmp_array = Lesser8uSSE(xmm_array, max_val_xmm);
+        
+            // Create a mask using the most-significant bit of each byte value in cmp_array
+            cmp_mask = _mm_movemask_epi8(cmp_array);
+        #endif
+        
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return_pos = curr_scan_start + (__builtin_ffs(cmp_mask) - 1);
+            return return_pos;            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__AVX2__)
+uint64_t AVX_Chunking_Technique::range_scan_lt_avx256(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / AVX256_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 256-bit AVX format
+    __m256i xmm_array, cmp_array;
+    uint32_t cmp_mask;
+
+    // Load max_value into xmm-format
+    __m256i max_val_xmm = _mm256_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * AVX256_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm256_loadu_si256((__m256i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is lt max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        #if defined(__AVX512F__)
+            cmp_mask = _mm256_cmplt_epu8_mask(xmm_array, max_val_xmm);
+        #else
+            cmp_array = Lesser8uAVX256(xmm_array, max_val_xmm);
+        
+            // Create a mask using the most-significant bit of each byte value in cmp_array
+            cmp_mask = _mm256_movemask_epi8(cmp_array);
+        #endif
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return curr_scan_start + (__builtin_ffs(cmp_mask) - 1);            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__AVX512F__)
+uint64_t AVX_Chunking_Technique::range_scan_lt_avx512(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / AVX512_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+    uint64_t return_pos;
+
+    // Structures to store bytes from data stream and comparison results in 512-bit AVX format
+    __m512i xmm_array;
+    __mmask64 cmp_mask;
+
+    // Load max_value into xmm-format
+    __m512i max_val_xmm = _mm512_set1_epi8((char)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * AVX512_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = _mm512_loadu_si512((__m512i const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is lt max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_mask = _mm512_cmplt_epu8_mask(xmm_array, max_val_xmm);
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return_pos = curr_scan_start + (__builtin_ffsll(cmp_mask) - 1);
+            return return_pos;            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#if defined(__ARM_NEON)
+uint64_t AVX_Chunking_Technique::range_scan_lt_neon(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    
+    uint64_t num_vectors = (end_position - start_position) / NEON_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit NEON format
+    uint8x16_t xmm_array, cmp_array;
+    uint8x16_t max_val_xmm = vdupq_n_u8((uint8_t)target_value);
+    
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * NEON_REGISTER_SIZE_BYTES);
+        // Load data into xmm register
+        xmm_array = vld1q_u8((uint8_t const *)(buff + curr_scan_start));
+        
+        /* 
+         Compare values with max_value. If a byte in xmm_array is lt max_val_xmm,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_array = vcltq_u8(xmm_array, max_val_xmm);
+
+        // ARM doesn't have native MOVEMASK instructions like x86 does.
+        // Following advice from an ARM community blog post to implement mask creation instead
+        // https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
+        
+        // Create a mask using the most-significant bit of each byte value in cmp_array
+        
+        uint64_t cmp_mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(cmp_array), 4)), 0);
+
+        // Return index of first non-zero bit in mask
+        // This corresponds to the first non-zero byte in cmp_array 
+        if(cmp_mask){
+            return curr_scan_start + ((__builtin_ffsll(cmp_mask) - 1) >> 2); // Shift right by 2 to account for 4 bits per byte            
+        }
+    }
+    
+    return end_position;
+}
+#endif
+
+#ifdef __ALTIVEC__
+uint64_t AVX_Chunking_Technique::range_scan_lt_altivec(char *buff, uint64_t start_position, uint64_t end_position, uint8_t target_value){
+    uint64_t num_vectors = (end_position - start_position) / ALTIVEC_REGISTER_SIZE_BYTES;
+    uint64_t curr_scan_start;
+
+    // Structures to store bytes from data stream and comparison results in 128-bit AltiVec format
+    __vector unsigned char vec_array;
+    __vector unsigned char perm_array;
+    __vector __bool char cmp_array;
+    __vector unsigned char max_val_vec = vec_splats((unsigned char)target_value);
+    __vector unsigned char bit_selector = {0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120}; // select most significant bit from each byte                    
+
+    uint8_t cond_mask_1, cond_mask_2;    
+
+    for(uint64_t i = 0; i < num_vectors; i++){
+        curr_scan_start = start_position + (i * ALTIVEC_REGISTER_SIZE_BYTES);
+        
+        // Load data into vec register
+        // Use vec_xl instead of vec_ld because vec_ld truncates the load address to a 16 byte boundary
+        // vec_xl allows loading from any address, which is necessary for unaligned data
+        vec_array = vec_xl(0, (unsigned char const *)(buff + curr_scan_start));
+                
+        /* 
+         Compare values with max_value. If a byte in vec_array is lt max_val_vec,  
+         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
+        */
+        
+        cmp_array = vec_cmplt(vec_array, max_val_vec);
+        
+        // Create masks using the most-significant bit of each byte value in cmp_array
+        perm_array = vec_bperm((__vector unsigned char)cmp_array, bit_selector);
+        
+        // bperm stores masks in elements 8 and 9 of the vector
+        // Extract the masks from the perm_array
+        // cond_mask_2 corresponds to the first 8 bytes, cond_mask_1 corresponds to the next 8 bytes
+        // This is because vec_bperm rearranges the bits in little endian order
+        cond_mask_1 = vec_extract(perm_array, 8);
+        cond_mask_2 = vec_extract(perm_array, 9);
+ 
+       // If cond_mask is not 0, it means at least one byte in cmp_array is non-zero
+       // Subtract 24 because clz implicitly converts to int with a 4-byte size
+       if (cond_mask_2){
+            return curr_scan_start + __builtin_clz(cond_mask_2) - 24 ; 
+        } 
+       else if(cond_mask_1){
+            return curr_scan_start + __builtin_clz(cond_mask_1) - 16; // Add 8 to the index to account for the first 8 bytes in the vector
+        }
+    }
+    return end_position;
+}
+#endif
diff --git a/dedup/src/chunking/chunking_common.cpp b/dedup/src/chunking/chunking_common.cpp
index 4ee5a02..0cad79b 100644
--- a/dedup/src/chunking/chunking_common.cpp
+++ b/dedup/src/chunking/chunking_common.cpp
@@ -15,7 +15,6 @@
 #include <cstring>
 #include <filesystem>
 #include <fstream>
-#include <iostream>
 #include <memory>
 #include <string>
 
diff --git a/dedup/src/chunking/maxp_chunking.cpp b/dedup/src/chunking/maxp_chunking.cpp
new file mode 100644
index 0000000..76f3892
--- /dev/null
+++ b/dedup/src/chunking/maxp_chunking.cpp
@@ -0,0 +1,519 @@
+/**
+ * @file maxp_chunking.cpp
+ * @author WASL
+ * @brief Implementations for MAXP chunking technique
+ * @version 0.1
+ * @date 2025-3-26
+ *
+ * @copyright Copyright (c) 2023
+ *
+ */
+
+ #include <fstream>
+ #include "maxp_chunking.hpp"
+ 
+ MAXP_Chunking::MAXP_Chunking(){
+    window_size = DEFAULT_MAXP_WINDOW_SIZE;
+    max_block_size = DEFAULT_MAXP_MAX_BLOCK_SIZE;
+    simd_mode = SIMD_Mode::NONE;
+
+    
+    #ifdef __SSE3__
+    xmm_array = nullptr;
+    #endif
+
+    #ifdef __AVX2__
+    ymm_array = nullptr;
+    #endif
+
+    #if defined(__AVX512F__)
+    zmm_array = nullptr;
+    #endif
+
+    #if defined(__ARM_NEON)
+    neon_array = nullptr;
+    #endif
+
+    #ifdef __ALTIVEC__
+    altivec_array = nullptr;
+    #endif
+
+    chunk_counter = 0;
+ }
+ 
+ MAXP_Chunking::MAXP_Chunking(const Config &config){
+    window_size = config.get_maxp_window_size();
+    max_block_size = config.get_maxp_max_block_size();
+    simd_mode = config.get_simd_mode();
+
+    #ifdef __SSE3__
+    xmm_array = nullptr;
+    #endif
+
+    #ifdef __AVX2__
+    ymm_array = nullptr;
+    #endif
+    
+    #if defined(__AVX512F__)
+    zmm_array = nullptr;
+    #endif
+
+    #if defined(__ARM_NEON)
+    neon_array = nullptr;
+    #endif
+
+    #ifdef __ALTIVEC__
+    altivec_array = nullptr;
+    #endif
+
+    chunk_counter = 0;
+
+    if(simd_mode == SIMD_Mode::NONE) {
+        // No SIMD mode selected
+        // Pass
+    }
+
+    #ifdef __SSE3__
+    else if(simd_mode == SIMD_Mode::SSE128) {
+        if(window_size % SSE_REGISTER_SIZE_BYTES != 0 || (window_size / 16) % 2 != 0) {
+            // Check if window size is a multiple of 16
+            // Check if window size makes an even number of vectors for find_maximum_sse128
+            std::cout << "MAXP window size currently unsupported by SSE128. Please use an even multiple of SSE128_REGISTER_SIZE_BYTES (default 16)." << std::endl;
+            exit(1);
+        }
+        uint64_t num_vectors = window_size / SSE_REGISTER_SIZE_BYTES;
+        xmm_array = new __m128i[num_vectors]();
+        if(xmm_array == nullptr) {
+            std::cout << "Error allocating memory for 128-bit vectors (__m128i)" << std::endl;
+            exit(1);
+        }
+    }
+    #endif
+
+    #ifdef __AVX2__
+    else if(simd_mode == SIMD_Mode::AVX256) {
+        if(window_size % AVX256_REGISTER_SIZE_BYTES != 0 || (window_size / 32) % 2 != 0) {
+            // Check if window size is a multiple of 32
+            // Check if window size makes an even number of vectors for find_maximum_AVX256
+            std::cout << "MAXP window size currently unsupported by AVX256. Please use an even multiple of AVX_REGISTER_SIZE_BYTES (default 32)." << std::endl;
+            exit(1);
+        }
+        uint64_t num_vectors = window_size / AVX256_REGISTER_SIZE_BYTES;
+        ymm_array = new __m256i[num_vectors]();
+        if(ymm_array == nullptr) {
+            std::cout << "Error allocating memory for 256-bit vectors(__m256i)" << std::endl;
+            exit(1);
+        }
+    }
+    #endif
+
+    #if defined(__AVX512F__)
+    else if(simd_mode == SIMD_Mode::AVX512) {
+        if(window_size % AVX512_REGISTER_SIZE_BYTES != 0 || (window_size / 64) % 2 != 0) {
+            // Check if window size is a multiple of 64
+            // Check if window size makes an even number of vectors for find_maximum_AVX512
+            std::cout << "MAXP window size currently unsupported by AVX512. Please use an even multiple of AVX_REGISTER_SIZE_BYTES (default 64)." << std::endl;
+            exit(1);
+        }
+        uint64_t num_vectors = window_size / AVX512_REGISTER_SIZE_BYTES;
+        zmm_array = new __m512i[num_vectors]();
+        if(zmm_array == nullptr) {
+            std::cout << "Error allocating memory for 512-bit vectors(__m512i)" << std::endl;
+            exit(1);
+        }
+    }
+    #endif
+
+    #if defined(__ARM_NEON)
+    else if(simd_mode == SIMD_Mode::NEON) {
+        if(window_size % NEON_REGISTER_SIZE_BYTES != 0 || (window_size / 16) % 2 != 0) {
+            // Check if window size is a multiple of 16
+            // Check if window size makes an even number of vectors for find_maximum_neon
+            std::cout << "MAXP window size currently unsupported by NEON. Please use an even multiple of NEON_REGISTER_SIZE_BYTES (default 16)." << std::endl;
+            exit(1);
+        }
+        uint64_t num_vectors = window_size / NEON_REGISTER_SIZE_BYTES;
+        neon_array = new uint8x16_t[num_vectors]();
+        if(neon_array == nullptr) {
+            std::cout << "Error allocating memory for NEON vectors(uint8x16_t)" << std::endl;
+            exit(1);
+        }
+    }
+    #endif
+
+    #ifdef __ALTIVEC__
+    else if(simd_mode == SIMD_Mode::ALTIVEC) {
+        if(window_size % ALTIVEC_REGISTER_SIZE_BYTES != 0 || (window_size / 16) % 2 != 0) {
+            // Check if window size is a multiple of 16
+            // Check if window size makes an even number of vectors for find_maximum_altivec
+            std::cout << "MAXP window size currently unsupported by ALTIVEC. Please use an even multiple of ALTIVEC_REGISTER_SIZE_BYTES (default 16)." << std::endl;
+            exit(1);
+        }
+        uint64_t num_vectors = window_size / ALTIVEC_REGISTER_SIZE_BYTES;
+        altivec_array = new __vector unsigned char[num_vectors]();
+        if(altivec_array == nullptr) {
+            std::cout << "Error allocating memory for ALTIVEC vectors(__vector unsigned char)" << std::endl;
+            exit(1);
+        }
+    }
+    #endif
+    
+    else {
+        std::cout << "Unsupported SIMD Mode for MAXP" << std::endl;
+        exit(1);
+    }
+ }
+
+ #ifdef __SSE3__
+ uint64_t MAXP_Chunking::find_cutpoint_sse128(char *buff, uint64_t size){
+    if(size < (2 * window_size) + 1)
+        return size;
+    
+    // Cap out max size
+    size = std::min(size, max_block_size);
+
+    uint8_t max_value;
+    uint8_t backward_max = 0;
+    uint64_t max_pos = window_size;
+
+    uint64_t return_pos_range_scan;
+
+    while(max_pos < (size - window_size)){
+
+        max_value = (uint8_t)buff[max_pos];
+
+        return_pos_range_scan = range_scan_geq_sse128(buff, max_pos + 1, max_pos + 1 + window_size, max_value);
+        if(return_pos_range_scan == max_pos + window_size + 1){
+            // No match found i.e. all bytes in range are less than max_value
+            // Start backward scan to verify if target is a local max
+
+            backward_max = find_maximum_sse128(buff, max_pos - window_size, max_pos, xmm_array);
+            
+            // No bytes > max value in the backward region i.e. max_value is a local max
+            // Chunk boundary found
+            if(backward_max <= max_value){
+                return max_pos;
+            }
+            else {
+                // Nothing until max_pos + window_size + 1 can be a chunk boundary now
+                max_pos += window_size + 1;
+            }
+
+        }
+        else {
+            // Update maximum value
+            max_pos = return_pos_range_scan;
+        }
+    }
+
+    // No chunk boundary found
+    return size;
+
+}
+#endif
+
+#ifdef __AVX2__
+uint64_t MAXP_Chunking::find_cutpoint_avx256(char *buff, uint64_t size){
+    if(size < (2 * window_size) + 1)
+        return size;
+
+    // Cap out max size
+    size = std::min(size, max_block_size);
+
+    uint8_t max_value;
+    uint8_t backward_max = 0;
+    uint64_t max_pos = window_size;
+
+    uint64_t return_pos_range_scan;
+
+    while(max_pos < (size - window_size)){
+
+        max_value = (uint8_t)buff[max_pos];
+
+        return_pos_range_scan = range_scan_geq_avx256(buff, max_pos + 1, max_pos + 1 + window_size, max_value);
+        if(return_pos_range_scan == max_pos + window_size + 1){
+            // No match found i.e. all bytes in range are less than max_value
+            // Start backward scan to verify if target is a local max
+
+            backward_max = find_maximum_avx256(buff, max_pos - window_size, max_pos, ymm_array);
+            
+            // No bytes > max value in the backward region i.e. max_value is a local max
+            // Chunk boundary found
+            if(backward_max <= max_value){
+                return max_pos;
+            }
+            else {
+                // Nothing until max_pos + window_size + 1 can be a chunk boundary now
+                max_pos += window_size + 1;
+            }
+
+        }
+        else {
+            // Update maximum value
+            max_pos = return_pos_range_scan;
+        }
+    }
+
+    // No chunk boundary found
+    return size;
+}
+#endif
+    
+#if defined(__AVX512F__)
+uint64_t MAXP_Chunking::find_cutpoint_avx512(char *buff, uint64_t size){
+    if(size < (2 * window_size) + 1)
+        return size;
+
+    // Cap out max size
+    size = std::min(size, max_block_size);
+
+    uint8_t max_value;
+    uint8_t backward_max = 0;
+    uint64_t max_pos = window_size;
+
+    uint64_t return_pos_range_scan;
+
+    while(max_pos < (size - window_size)){
+
+        max_value = (uint8_t)buff[max_pos];
+
+        return_pos_range_scan = range_scan_geq_avx512(buff, max_pos + 1, max_pos + 1 + window_size, max_value);
+        if(return_pos_range_scan == max_pos + window_size + 1){
+            // No match found i.e. all bytes in range are less than max_value
+            // Start backward scan to verify if target is a local max
+
+            backward_max = find_maximum_avx512(buff, max_pos - window_size, max_pos, zmm_array);
+            
+            // No bytes > max value in the backward region i.e. max_value is a local max
+            // Chunk boundary found
+            if(backward_max <= max_value){
+                return max_pos;
+            }
+            else {
+                // Nothing until max_pos + window_size + 1 can be a chunk boundary now
+                max_pos += window_size + 1;
+            }
+
+        }
+        else {
+            // Update maximum value
+            max_pos = return_pos_range_scan;
+        }
+    }
+
+    // No chunk boundary found
+    return size;
+
+}
+#endif
+
+#if defined(__ARM_NEON)
+uint64_t MAXP_Chunking::find_cutpoint_neon(char *buff, uint64_t size){
+    if(size < (2 * window_size) + 1)
+        return size;
+
+    // Cap out max size
+    size = std::min(size, max_block_size);
+
+    uint8_t max_value;
+    uint8_t backward_max = 0;
+    uint64_t max_pos = window_size;
+
+    uint64_t return_pos_range_scan;
+
+    while(max_pos < (size - window_size)){
+
+        max_value = (uint8_t)buff[max_pos];
+
+        return_pos_range_scan = range_scan_geq_neon(buff, max_pos + 1, max_pos + 1 + window_size, max_value);
+        if(return_pos_range_scan == max_pos + window_size + 1){
+            // No match found i.e. all bytes in range are less than max_value
+            // Start backward scan to verify if target is a local max
+
+            backward_max = find_maximum_neon(buff, max_pos - window_size, max_pos, neon_array);
+            
+            // No bytes > max value in the backward region i.e. max_value is a local max
+            // Chunk boundary found
+            if(backward_max <= max_value){
+                return max_pos;
+            }
+            else {
+                // Nothing until max_pos + window_size + 1 can be a chunk boundary now
+                max_pos += window_size + 1;
+            }
+
+        }
+        else {
+            // Update maximum value
+            max_pos = return_pos_range_scan;
+        }
+    }
+
+    // No chunk boundary found
+    return size;
+
+}
+#endif
+
+#ifdef __ALTIVEC__
+uint64_t MAXP_Chunking::find_cutpoint_altivec(char *buff, uint64_t size){
+    if(size < (2 * window_size) + 1){
+        return size;
+    }
+    
+    // Cap out max size
+    size = std::min(size, max_block_size);
+    uint8_t max_value;
+    uint8_t backward_max = 0;
+    uint64_t max_pos = window_size;
+    uint64_t return_pos_range_scan;
+
+    while(max_pos < (size - window_size)){
+
+        max_value = (uint8_t)buff[max_pos];
+        
+        return_pos_range_scan = range_scan_geq_altivec(buff, max_pos + 1, max_pos + 1 + window_size, max_value);
+        
+        if(return_pos_range_scan == max_pos + window_size + 1){
+            // No match found i.e. all bytes in range are less than max_value
+            // Start backward scan to verify if target is a local max
+        
+            backward_max = find_maximum_altivec(buff, max_pos - window_size, max_pos, altivec_array);
+        
+            // No bytes > max value in the backward region i.e. max_value is a local max
+            // Chunk boundary found
+            if(backward_max <= max_value){
+                return max_pos;
+            }
+            else {
+                // Nothing until max_pos + window_size + 1 can be a chunk boundary now
+                max_pos += window_size + 1;
+            }
+
+        }
+        else {
+            // Update maximum value
+            max_pos = return_pos_range_scan;
+        }
+    }
+
+    // N:wqo chunk boundary found
+    return size;
+}
+#endif
+
+uint64_t MAXP_Chunking::find_cutpoint_native(char *buff, uint64_t size){
+ 
+     if(size < (2 * window_size) + 1){
+        return size;
+     }
+
+
+     size = std::min(size, max_block_size);
+
+     uint64_t max_position = window_size;
+     uint8_t max_value = (uint8_t)buff[max_position];
+     uint64_t j;
+     bool local_max_found = false;
+ 
+     // Scan window from left to right looking for a byte with a value > all bytes in a "window_size" after it
+     for(uint64_t i = window_size; i < size - 1; i++){
+ 
+         if((uint8_t)buff[i] >= max_value){
+            max_position = i;
+            max_value = (uint8_t)buff[i];
+         }
+         else if(i == max_position + window_size){ 
+            // Target byte found
+            // Initiate backward scanning to see if i is local maximum
+            local_max_found = true;
+                
+            for(j = max_position - window_size; j < max_position; j++){
+                if((uint8_t)buff[j] > max_value){
+                    max_position = i+1;
+                    max_value = (uint8_t)buff[i+1];
+                    local_max_found = false;
+                    break;
+                }
+            }
+ 
+            // Insert chunk boundary at max_position (not i) i.e. chunk boundary does not include right window
+            if(local_max_found == true){
+                return max_position;
+            }
+        }
+    }
+    
+    // Returning maximum chunk size
+    return size;
+ }
+
+ uint64_t MAXP_Chunking::find_cutpoint(char *buff, uint64_t size){
+    chunk_counter++;
+    if(simd_mode == SIMD_Mode::NONE) {
+        return find_cutpoint_native(buff, size);
+    }
+    
+    #ifdef __SSE3__
+    else if(simd_mode == SIMD_Mode::SSE128) {
+        return find_cutpoint_sse128(buff, size);
+    }
+    #endif
+    
+    #ifdef __AVX2__
+    else if(simd_mode == SIMD_Mode::AVX256) {
+        return find_cutpoint_avx256(buff, size);
+    }
+    #endif
+
+    #if defined(__AVX512F__)
+    else if(simd_mode == SIMD_Mode::AVX512) {
+        return find_cutpoint_avx512(buff, size);
+    }
+    #endif
+
+    #if defined(__ARM_NEON)
+    else if(simd_mode == SIMD_Mode::NEON) {
+        return find_cutpoint_neon(buff, size);
+    }
+    #endif
+
+    #ifdef __ALTIVEC__
+    else if(simd_mode == SIMD_Mode::ALTIVEC) {
+        return find_cutpoint_altivec(buff, size);
+    }
+    #endif
+
+    else {
+        std::cout << "Unsupported SIMD Mode for MAXP" << std::endl;
+        exit(1);
+    }
+ }
+ 
+ MAXP_Chunking::~MAXP_Chunking(){
+    
+    #ifdef __SSE3__
+    if(xmm_array != nullptr)
+        delete xmm_array;
+    #endif
+
+    #ifdef __AVX2__
+    if(ymm_array != nullptr)
+        delete ymm_array;
+    #endif 
+
+    #if defined(__AVX512F__)
+    if(zmm_array != nullptr)
+        delete zmm_array;
+    #endif
+
+    #if defined(__ARM_NEON__)
+    if(neon_array != nullptr)
+        delete neon_array;
+    #endif  
+
+    #ifdef __ALTIVEC__
+    if(altivec_array != nullptr)
+        delete altivec_array; 
+    #endif
+ }
\ No newline at end of file
diff --git a/dedup/src/chunking/ram_chunking.cpp b/dedup/src/chunking/ram_chunking.cpp
index 0ac0a86..834ee55 100644
--- a/dedup/src/chunking/ram_chunking.cpp
+++ b/dedup/src/chunking/ram_chunking.cpp
@@ -18,9 +18,25 @@ RAM_Chunking::RAM_Chunking() {
 
     window_size = avg_block_size - 256;
     
+    #ifdef __SSE3__
     sse_array = nullptr;
+    #endif
+    
+    #ifdef __AVX2__
     avx256_array = nullptr;
-    avx512_array = nullptr;
+    #endif
+    
+    #if defined(__AVX512F__)
+        avx512_array = nullptr;
+    #endif
+    
+    #ifdef __ARM_NEON
+        neon_array = nullptr;
+    #endif
+
+    #ifdef __ALTIVEC__
+        altivec_array = nullptr;
+    #endif
    
     simd_mode = SIMD_Mode::NONE;
 
@@ -32,40 +48,106 @@ RAM_Chunking::RAM_Chunking(const Config& config) {
     window_size = avg_block_size - 256;
     // window_size = avg_block_size / (exp(1) - 1);  // avg_block size / e-1
 
-    sse_array = nullptr;
-    avx256_array = nullptr;
-    avx512_array = nullptr;
+    #ifdef __SSE3__
+        sse_array = nullptr;
+    #endif
+    
+    #ifdef __AVX2__
+        avx256_array = nullptr;
+    #endif
+    
+    #if defined(__AVX512F__)
+        avx512_array = nullptr;
+    #endif
 
+    #ifdef __ARM_NEON
+        neon_array = nullptr;
+    #endif
+
+    #ifdef __ALTIVEC__
+        altivec_array = nullptr;
+    #endif
+    
     technique_name = "RAM Chunking";
 
     simd_mode = config.get_simd_mode();
 
-    if(simd_mode == SIMD_Mode::SSE128 || simd_mode == SIMD_Mode::SSE128_NOSLIDE){
+    if(simd_mode == SIMD_Mode::NONE){
+
+    }
+
+    #ifdef __SSE3__
+    else if(simd_mode == SIMD_Mode::SSE128){
          uint64_t num_vectors = window_size / SSE_REGISTER_SIZE_BYTES;
          sse_array = new __m128i[num_vectors]();
     }
+    #endif
+    
+    #ifdef __AVX2__
     else if(simd_mode == SIMD_Mode::AVX256){
         uint64_t num_vectors = window_size / AVX256_REGISTER_SIZE_BYTES;
         avx256_array = new __m256i[num_vectors]();
     }
+    #endif
     
     #if defined(__AVX512F__)
     else if(simd_mode == SIMD_Mode::AVX512){
         uint64_t num_vectors = window_size / AVX512_REGISTER_SIZE_BYTES;
         avx512_array = new __m512i[num_vectors]();
     }
-    #endif    
+    #endif
+    
+    #ifdef __ARM_NEON
+    else if(simd_mode == SIMD_Mode::NEON){
+        uint64_t num_vectors = window_size / NEON_REGISTER_SIZE_BYTES;
+        neon_array = new uint8x16_t[num_vectors]();
+    }
+    #endif
+
+    #ifdef __ALTIVEC__
+    else if(simd_mode == SIMD_Mode::ALTIVEC){
+        uint64_t num_vectors = window_size / ALTIVEC_REGISTER_SIZE_BYTES;
+        altivec_array = new __vector unsigned char[num_vectors]();
+    }
+    #endif
+
+    else {
+        std::cout << "Error: Unsupported SIMD mode" << std::endl;
+        exit(EXIT_FAILURE);
+    }
 }
 
 RAM_Chunking::~RAM_Chunking() {
-    if(simd_mode == SIMD_Mode::SSE128 || simd_mode == SIMD_Mode::SSE128_NOSLIDE)
+
+    if(simd_mode == SIMD_Mode::NONE){
+        // No SIMD mode, nothing to delete
+        return;
+    }
+
+    #ifdef __SSE3__
+    else if(simd_mode == SIMD_Mode::SSE128)
         delete sse_array;
+    #endif
+
+    #ifdef __AVX2__
     else if(simd_mode == SIMD_Mode::AVX256)
         delete avx256_array;
+    #endif
+
    #if defined(__AVX512F__)
     else if(simd_mode == SIMD_Mode::AVX512)
         delete avx512_array;
     #endif
+
+    #ifdef __ARM_NEON
+    else if(simd_mode == SIMD_Mode::NEON)
+        delete neon_array;
+    #endif
+
+    #ifdef __ALTIVEC__
+    else if(simd_mode == SIMD_Mode::ALTIVEC)
+        delete altivec_array;
+    #endif
 }
 
 uint64_t RAM_Chunking::find_cutpoint(char* buff, uint64_t size) {
@@ -77,159 +159,61 @@ uint64_t RAM_Chunking::find_cutpoint(char* buff, uint64_t size) {
     else if(size < window_size)
         return size;
 
-    // If SSE128 with normal sliding
-    if (simd_mode == SIMD_Mode::SSE128_NOSLIDE){
-        max_value = find_maximum_sse128(buff, 0, window_size, sse_array);
+    if(simd_mode == SIMD_Mode::NONE){
+        // If no SIMD enabled, execute basic find_maximum
+        for(i = 0; i < window_size; i++){
+            if ((uint8_t)buff[i] >= max_value)
+	    	    max_value = (uint8_t)buff[i];
+        }
+
+        for (i = window_size; i < size; i++) {
+            if ((uint8_t)buff[i] >= max_value)
+                return i;
+        }
+
     }
+
+    #ifdef __SSE3__
     // If SIMD enabled, accelerate find_maximum() and slide depending on chosen SIMD mode 
     else if(simd_mode == SIMD_Mode::SSE128){
         max_value = find_maximum_sse128(buff, 0, window_size, sse_array);
-        return get_return_position_sse128(buff, window_size, size, max_value);
+        return range_scan_geq_sse128(buff, window_size, size, max_value);
     }
+    #endif
+
+    #ifdef __AVX2__
     else if(simd_mode == SIMD_Mode::AVX256){
         max_value = find_maximum_avx256(buff, 0, window_size, avx256_array);
-        return get_return_position_avx256(buff, window_size, size, max_value);
+        return range_scan_geq_avx256(buff, window_size, size, max_value);
     }
+    #endif
+
     #if defined(__AVX512F__)
     else if(simd_mode == SIMD_Mode::AVX512){
         max_value = find_maximum_avx512(buff, 0, window_size, avx512_array);
-        return get_return_position_avx512(buff, window_size, size, max_value);
+        return range_scan_geq_avx512(buff, window_size, size, max_value);
     }
     #endif
-    
-    // Else execute basic find_maximum, same as native RAM
-    else {
-        for(i = 0; i < window_size; i++){
-            if ((uint8_t)buff[i] >= max_value)
-	    	    max_value = (uint8_t)buff[i];
-        }
-    }
 
-    for (i = window_size; i < size; i++) {
-        if ((uint8_t)buff[i] >= max_value)
-            return i;
+    #if defined(__ARM_NEON)
+    else if(simd_mode == SIMD_Mode::NEON){
+        max_value = find_maximum_neon(buff, 0, window_size, neon_array);
+        return range_scan_geq_neon(buff, window_size, size, max_value);
     }
+    #endif
 
-    return size;
-}
-
-uint64_t RAM_Chunking::get_return_position_sse128(char *buff, uint64_t start_position, uint64_t end_position, uint8_t max_value){
-    
-    uint64_t num_vectors = (end_position - start_position) / SSE_REGISTER_SIZE_BYTES;
-    uint64_t curr_scan_start;
-
-    // Structures to store bytes from data stream and comparison results in 128-bit SSE format
-    __m128i xmm_array, cmp_array;
-    int cmp_mask;
-
-    // Load max_value into xmm-format
-    __m128i max_val_xmm = _mm_set1_epi8((char)max_value);
-    
-    for(uint64_t i = 0; i < num_vectors; i++){
-        curr_scan_start = start_position + (i * SSE_REGISTER_SIZE_BYTES);
-        // Load data into xmm register
-        xmm_array = _mm_loadu_si128((__m128i const *)(buff + curr_scan_start));
-        
-        /* 
-
-         Compare values with max_value. If a byte in xmm_array is geq max_val_xmm,  
-         ALL the corresponding bits of the corresponding byte in cmp_array are set to 1.
-        */
-        
-        #if defined(__AVX512F__)
-            cmp_mask = _mm_cmpge_epu8_mask(xmm_array, max_val_xmm);
-        #else
-            cmp_array = GreaterOrEqual8uSSE(xmm_array, max_val_xmm);
-        
-            // Create a mask using the most-significant bit of each byte value in cmp_array
-            cmp_mask = _mm_movemask_epi8(cmp_array);
-        #endif
-        
-
-
-        // Return index of first non-zero bit in mask
-        // This corresponds to the first non-zero byte in cmp_array 
-        if(cmp_mask)
-            return curr_scan_start + (__builtin_ffs(cmp_mask) - 1);
+    #ifdef __ALTIVEC__
+    else if(simd_mode == SIMD_Mode::ALTIVEC){
+        max_value = find_maximum_altivec(buff, 0, window_size, altivec_array);
+        return range_scan_geq_altivec(buff, window_size, size, max_value);
     }
- 
-    return end_position;
-}
-
-uint64_t RAM_Chunking::get_return_position_avx256(char *buff, uint64_t start_position, uint64_t end_position, uint8_t max_value){
-    
-    uint64_t num_vectors = (end_position - start_position) / AVX256_REGISTER_SIZE_BYTES;
-    uint64_t curr_scan_start;
-
-    // Structures to store bytes from data stream and comparison results in 128-bit SSE format
-    __m256i xmm_array, cmp_array;
-    uint32_t cmp_mask;
-
-    // Load max_value into xmm-format
-    __m256i max_val_xmm = _mm256_set1_epi8((char)max_value);
+    #endif
     
-    for(uint64_t i = 0; i < num_vectors; i++){
-        curr_scan_start = start_position + (i * AVX256_REGISTER_SIZE_BYTES);
-        // Load data into xmm register
-        xmm_array = _mm256_loadu_si256((__m256i const *)(buff + curr_scan_start));
-        
-        /* 
-
-         Compare values with max_value. If a byte in xmm_array is geq max_val_xmm,  
-         the corresponding bit of cmp_mask is set to 1.
-        */
-        #if defined(__AVX512F__)
-             __mmask32 all_ones = UINT32_MAX;
-            cmp_mask = _mm256_mask_cmpge_epu8_mask(all_ones, xmm_array, max_val_xmm);
-        #else
-            cmp_array = GreaterOrEqual8uAVX256(xmm_array, max_val_xmm);
-        
-            // Create a mask using the most-significant bit of each byte value in cmp_array
-            cmp_mask = _mm256_movemask_epi8(cmp_array);
-        #endif
-        
-        // Return index of first non-zero bit in mask
-        // This corresponds to the first non-zero byte in cmp_array 
-        if(cmp_mask)
-            return curr_scan_start + (__builtin_ffs(cmp_mask) - 1);
+    else {
+        std::cout << "Error: Unsupported SIMD mode" << std::endl;
+        exit(EXIT_FAILURE);
     }
 
-    return end_position;
-
+    return size;
 }
 
-#if defined(__AVX512F__)
-    uint64_t RAM_Chunking::get_return_position_avx512(char *buff, uint64_t start_position, uint64_t end_position, uint8_t max_value){
-        
-        uint64_t num_vectors = (end_position - start_position) / AVX512_REGISTER_SIZE_BYTES;
-        uint64_t curr_scan_start;
-
-        // Structures to store bytes from data stream and comparison results in 128-bit SSE format
-        __m512i xmm_array;
-        uint64_t cmp_mask;
-
-        // Load max_value into xmm-format
-        __m512i max_val_xmm = _mm512_set1_epi8((char)max_value);
-        
-        for(uint64_t i = 0; i < num_vectors; i++){
-            curr_scan_start = start_position + (i * AVX512_REGISTER_SIZE_BYTES);
-            // Load data into xmm register
-            xmm_array = _mm512_loadu_si512((__m512i const *)(buff + curr_scan_start));
-            
-            /* 
-            Compare values with max_value. If a byte in xmm_array is geq max_val_xmm,  
-            the corresponding bit of cmp_mask is set to 1.
-            */
-
-            // Return a mask with the most significant bit of GEQ comparison byte-wise
-            cmp_mask = _mm512_cmpge_epu8_mask(xmm_array, max_val_xmm);
-
-            // Return index of first non-zero bit in mask
-            // This corresponds to the first non-zero byte in cmp_array 
-            if(cmp_mask)
-                return curr_scan_start + (__builtin_ffsll(cmp_mask) - 1);
-        }
-
-        return end_position;
-    }
-#endif
diff --git a/dedup/src/config/config.cpp b/dedup/src/config/config.cpp
index 1463ef0..1f0bb31 100644
--- a/dedup/src/config/config.cpp
+++ b/dedup/src/config/config.cpp
@@ -23,8 +23,8 @@ ChunkingTech Config::get_chunking_tech() const {
             return ChunkingTech::FASTCDC;
         } else if (value == "ram") {
             return ChunkingTech::RAM;
-        } else if (value == "experiment") {
-            return ChunkingTech::EXPERIMENT;
+        } else if(value == "maxp"){
+            return ChunkingTech::MAXP;
         } else if (value == "crc") {
             return ChunkingTech::CRC;
         } else if (value == "seq") {
@@ -49,7 +49,11 @@ HashingTech Config::get_hashing_tech() const {
             return HashingTech::SHA512;
         } else if (value == "md5") {
             return HashingTech::MD5;
-        }
+        } else if (value == "xxhash128") {
+            return HashingTech::XXHASH128;
+        } else if (value == "murmurhash3") {
+            return HashingTech::MURMURHASH3;
+        } 
     } catch (...) {
     }
     throw ConfigError(
@@ -61,23 +65,44 @@ SIMD_Mode Config::get_simd_mode() const {
         std::string value = parser.get_property(SIMD_MODE_STRING);
         if (value == "none") {
             return SIMD_Mode::NONE;
-        } else if (value == "sse128") {
-            return SIMD_Mode::SSE128;
-        } else if (value == "avx256") {
-            return SIMD_Mode::AVX256;
-        } else if (value == "avx512") {
-            if(__builtin_cpu_supports("avx512f"))
+        } 
+        #ifdef __SSE3__
+            else if (value == "sse128") {
+                return SIMD_Mode::SSE128;
+            } 
+            else if (value == "sse128_noslide"){
+                return SIMD_Mode::SSE128_NOSLIDE;
+            }
+        #endif
+        
+        #ifdef __AVX2__
+            else if (value == "avx256") {
+                return SIMD_Mode::AVX256;
+            }
+        #endif
+        
+        #ifdef __AVX512F__
+            else if (value == "avx512") {
                 return SIMD_Mode::AVX512;
-            else
-                throw ConfigError(
-                    "Invalid SIMD Mode in configuration file: AVX-512 not supported by CPU");
-        } else if (value == "sse128_noslide"){
-            return SIMD_Mode::SSE128_NOSLIDE;
+            }
+        #endif
+        #ifdef __ARM_NEON
+            else if (value == "neon128") {
+                return SIMD_Mode::NEON;
+            }
+        #endif
+        #ifdef __ALTIVEC__
+            else if (value == "altivec128") {
+                return SIMD_Mode::ALTIVEC;
+            }
+        #endif
+        else {
+            throw ConfigError("Unsupported SIMD mode");
         }
     } catch (...) {
     }
     throw ConfigError(
-        "The configuration file does not specify a valid SIMD mode");
+        "Unsupported SIMD mode. Please check compilation flags and configuration file.");
 }
 
 uint64_t Config::get_fc_size() const {
@@ -109,7 +134,7 @@ uint64_t Config::get_rabinc_min_block_size() const {
     } catch (...) {
     }
     throw ConfigError(
-        "The configuration file does not specify a valid rabins minimum block size");
+        "The configuration file does not specify a valid minimum block size");
 }
 
 uint64_t Config::get_rabinc_avg_block_size() const {
@@ -508,3 +533,25 @@ uint64_t Config::get_tttd_max_block_size() const {
         "The configuration file does not specify a valid TTTD max block "
         "size");
 }
+
+uint64_t Config::get_maxp_window_size() const {
+    try {
+        std::string value = parser.get_property(MAXP_WINDOW_SIZE);
+        return std::stoull(value);
+    } catch (...) {
+    }
+    throw ConfigError(
+        "The configuration file does not specify a valid MAXP window "
+        "size");
+}
+
+uint64_t Config::get_maxp_max_block_size() const {
+    try {
+        std::string value = parser.get_property(MAXP_MAX_BLOCK_SIZE);
+        return std::stoull(value);
+    } catch (...) {
+    }
+    throw ConfigError(
+        "The configuration file does not specify a valid MAXP maximum chunk "
+        "size");
+}
\ No newline at end of file
diff --git a/dedup/src/driver.cpp b/dedup/src/driver.cpp
index de92fe6..07ad3b9 100644
--- a/dedup/src/driver.cpp
+++ b/dedup/src/driver.cpp
@@ -32,24 +32,25 @@
 #include "crc_chunking.hpp"
 #include "seq_chunking.hpp"
 #include "tttd_chunking.hpp"
+#include "maxp_chunking.hpp"
 
 #include "md5_hashing.hpp"
 #include "sha1_hashing.hpp"
 #include "sha256_hashing.hpp"
 #include "sha512_hashing.hpp"
+#include "xxhash_hashing.hpp"
+#include "murmurhash3_hashing.hpp"
 
 bool disable_hashing = false;
 
 static void driver_function(const std::filesystem::path& dir_path,
-                            std::unique_ptr<Chunking_Technique>& chunk_method,
-                            const std::string& output_file) {
+                            std::unique_ptr<Chunking_Technique>& chunk_method, const std::string& output_file) {
     /**
      * @brief Uses the specified chunking technique to chunk the file, hash it
      * using the specified hashing technique and print the hashes
      * @param chunk_method: Chunking Technique Object. Object from a class
      * inheriting the Chunking_Technique interface.
-     * @param hash_method: Hash Technique Object. Object from a class inheriting
-     * the Hashing_Technique interface.
+     * @param output_file: Output file path for writing hashes to
      * @return: void
      *
      */
@@ -109,8 +110,7 @@ static void driver_function(const std::filesystem::path& dir_path,
     std::cout << "Avg Chunk size: " << total_bytes / chunk_count
               << std::endl;
 
-    std::cout << "Chunking Throughput (MB/sec): "
-              << total_mb / total_seconds_chunking << std::endl;
+    std::cout << "Chunking Throughput (MB/sec): " << total_mb / total_seconds_chunking << std::endl;
     std::cout << "Hashing Throughput (MB/sec): "
               << total_mb / total_seconds_hashing << std::endl;
 }
@@ -122,10 +122,10 @@ int main(int argc, char* argv[]) {
      * @todo: Add Config class which takes in parameters
      */
     if (argc > 4 || argc < 3) {
-        std::cout << "Usage: ./dedup.exe <directory_path> <config_file_path> [bool]"
+        std::cout << "Usage: ./dedup.exe <file_path> <config_file_path> [bool]"
                   << std::endl;
         std::cout
-            << "\t  <directory_path>: Path to directory to run chunking and hashing on."
+            << "\t  <file_path>: Path to file to run chunking and hashing on."
             << std::endl;
         std::cout << "\t  <config_file_path>: Path to the config file."
                   << std::endl;
@@ -181,6 +181,9 @@ int main(int argc, char* argv[]) {
             case ChunkingTech::CRC:
                 chunk_method = std::make_unique<SS_CRC_Chunking>(config);
                 break;
+            case ChunkingTech::MAXP:
+                chunk_method = std::make_unique<MAXP_Chunking>(config);
+                break;
             case ChunkingTech::SEQ:
                 chunk_method = std::make_unique<Seq_Chunking>(config);
                 break;
@@ -205,6 +208,12 @@ int main(int argc, char* argv[]) {
                 case HashingTech::SHA512:
                     chunk_method -> hash_method = std::make_unique<SHA512_Hashing>();
                     break;
+                case HashingTech::XXHASH128:
+                    chunk_method -> hash_method = std::make_unique<XXHash_Hashing>();
+                    break;
+                case HashingTech::MURMURHASH3:
+                    chunk_method -> hash_method = std::make_unique<MurmurHash3_Hashing>();
+                    break;
                 default:
                     std::cerr << "Unimplemented hashing technique" << std::endl;
                     exit(EXIT_FAILURE);
diff --git a/dedup/src/hashing/murmurhash3_hashing.cpp b/dedup/src/hashing/murmurhash3_hashing.cpp
new file mode 100644
index 0000000..b801fe8
--- /dev/null
+++ b/dedup/src/hashing/murmurhash3_hashing.cpp
@@ -0,0 +1,345 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "murmurhash3_hashing.hpp"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define	FORCE_INLINE inline __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
+{
+  return p[i];
+}
+
+FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix32 ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix64 ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = seed;
+
+  const uint32_t c1 = 0xcc9e2d51;
+  const uint32_t c2 = 0x1b873593;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock32(blocks,i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+    
+    h1 ^= k1;
+    h1 = ROTL32(h1,13); 
+    h1 = h1*5+0xe6546b64;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+  uint32_t k1 = 0;
+
+  switch(len & 3)
+  {
+  case 3: k1 ^= tail[2] << 16;
+  case 2: k1 ^= tail[1] << 8;
+  case 1: k1 ^= tail[0];
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+
+  h1 = fmix32(h1);
+
+  *(uint32_t*)out = h1;
+} 
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+                           uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  const uint32_t c1 = 0x239b961b; 
+  const uint32_t c2 = 0xab0e9789;
+  const uint32_t c3 = 0x38b34ae5; 
+  const uint32_t c4 = 0xa1e38b93;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock32(blocks,i*4+0);
+    uint32_t k2 = getblock32(blocks,i*4+1);
+    uint32_t k3 = getblock32(blocks,i*4+2);
+    uint32_t k4 = getblock32(blocks,i*4+3);
+
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k4 ^= tail[14] << 16;
+  case 14: k4 ^= tail[13] << 8;
+  case 13: k4 ^= tail[12] << 0;
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+  case 12: k3 ^= tail[11] << 24;
+  case 11: k3 ^= tail[10] << 16;
+  case 10: k3 ^= tail[ 9] << 8;
+  case  9: k3 ^= tail[ 8] << 0;
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+  case  8: k2 ^= tail[ 7] << 24;
+  case  7: k2 ^= tail[ 6] << 16;
+  case  6: k2 ^= tail[ 5] << 8;
+  case  5: k2 ^= tail[ 4] << 0;
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+  case  4: k1 ^= tail[ 3] << 24;
+  case  3: k1 ^= tail[ 2] << 16;
+  case  2: k1 ^= tail[ 1] << 8;
+  case  1: k1 ^= tail[ 0] << 0;
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  h1 = fmix32(h1);
+  h2 = fmix32(h2);
+  h3 = fmix32(h3);
+  h4 = fmix32(h4);
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(int i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock64(blocks,i*2+0);
+    uint64_t k2 = getblock64(blocks,i*2+1);
+
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k2 ^= ((uint64_t)tail[14]) << 48;
+  case 14: k2 ^= ((uint64_t)tail[13]) << 40;
+  case 13: k2 ^= ((uint64_t)tail[12]) << 32;
+  case 12: k2 ^= ((uint64_t)tail[11]) << 24;
+  case 11: k2 ^= ((uint64_t)tail[10]) << 16;
+  case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
+  case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  case  8: k1 ^= ((uint64_t)tail[ 7]) << 56;
+  case  7: k1 ^= ((uint64_t)tail[ 6]) << 48;
+  case  6: k1 ^= ((uint64_t)tail[ 5]) << 40;
+  case  5: k1 ^= ((uint64_t)tail[ 4]) << 32;
+  case  4: k1 ^= ((uint64_t)tail[ 3]) << 24;
+  case  3: k1 ^= ((uint64_t)tail[ 2]) << 16;
+  case  2: k1 ^= ((uint64_t)tail[ 1]) << 8;
+  case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix64(h1);
+  h2 = fmix64(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
+// Additions for dedup-bench
+
+void MurmurHash3_Hashing::hash_chunk(File_Chunk& file_chunk) {
+
+    // Allocate output buffer for the hash
+    file_chunk.init_hash(HashingTech::MURMURHASH3, MURMURHASH3_DIGEST_LENGTH);
+    
+    // Call the appropriate MurmurHash3 function based on the chunk size
+    MurmurHash3_x64_128(file_chunk.get_data(), file_chunk.get_size(), 0, file_chunk.get_hash());
+}
diff --git a/dedup/src/hashing/xxhash_hashing.cpp b/dedup/src/hashing/xxhash_hashing.cpp
new file mode 100644
index 0000000..3cd672c
--- /dev/null
+++ b/dedup/src/hashing/xxhash_hashing.cpp
@@ -0,0 +1,15 @@
+#include "xxhash_hashing.hpp"
+#include "hash.hpp"
+#include <cstring>
+
+#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
+#define XXH_IMPLEMENTATION      /* access definitions */
+
+void XXHash_Hashing::hash_chunk(File_Chunk& file_chunk) {
+    file_chunk.init_hash(HashingTech::XXHASH128, XXH128_DIGEST_LENGTH);
+
+    XXH128_hash_t hash_value =  XXH3_128bits((const unsigned char*)file_chunk.get_data(), file_chunk.get_size());
+    BYTE* hash_ptr = file_chunk.get_hash();
+    std::memcpy(hash_ptr, &hash_value, XXH128_DIGEST_LENGTH);
+    return;
+}