feat(data structures, streams): variations of moving average

BrianLusina · BrianLusina · commit 6168b4a9ad2e · 2025-12-27T07:44:44.000+03:00
diff --git a/datastructures/circular_buffer/__init__.py b/datastructures/circular_buffer/__init__.py
@@ -1,45 +1,11 @@
-class BufferFullException(Exception):
-    pass
-
-
-class BufferEmptyException(Exception):
-    pass
-
-
-class CircularBuffer:
-    def __init__(self, size):
-        self.buffer = bytearray(size)
-        self.read_point = 0
-        self.write_point = 0
-
-    def _update_buffer(self, data):
-        """
-        Protected helper method for Python 2/3
-        """
-        try:
-            self.buffer[self.write_point] = data
-        except TypeError:
-            self.buffer[self.write_point] = ord(data)
-
-    def clear(self):
-        self.buffer = bytearray(len(self.buffer))
-
-    def write(self, data):
-        if all(self.buffer):
-            raise BufferFullException
-        self._update_buffer(data)
-        self.write_point = (self.write_point + 1) % len(self.buffer)
-
-    def read(self):
-        if not any(self.buffer):
-            raise BufferEmptyException
-        data = chr(self.buffer[self.read_point])
-        self.buffer[self.read_point] = 0
-        self.read_point = (self.read_point + 1) % len(self.buffer)
-        return data
-
-    def overwrite(self, data):
-        self._update_buffer(data)
-        if all(self.buffer) and self.read_point == self.write_point:
-            self.read_point = (self.read_point + 1) % len(self.buffer)
-        self.write_point = (self.write_point + 1) % len(self.buffer)
+from datastructures.circular_buffer.circular_buffer import CircularBuffer
+from datastructures.circular_buffer.exceptions import (
+    BufferFullException,
+    BufferEmptyException,
+)
+
+__all__ = [
+    "CircularBuffer",
+    "BufferFullException",
+    "BufferEmptyException",
+]
diff --git a/datastructures/circular_buffer/circular_buffer.py b/datastructures/circular_buffer/circular_buffer.py
@@ -0,0 +1,43 @@
+from datastructures.circular_buffer.exceptions import (
+    BufferFullException,
+    BufferEmptyException,
+)
+
+
+class CircularBuffer:
+    def __init__(self, size):
+        self.buffer = bytearray(size)
+        self.read_point = 0
+        self.write_point = 0
+
+    def _update_buffer(self, data):
+        """
+        Protected helper method for Python 2/3
+        """
+        try:
+            self.buffer[self.write_point] = data
+        except TypeError:
+            self.buffer[self.write_point] = ord(data)
+
+    def clear(self):
+        self.buffer = bytearray(len(self.buffer))
+
+    def write(self, data):
+        if all(self.buffer):
+            raise BufferFullException
+        self._update_buffer(data)
+        self.write_point = (self.write_point + 1) % len(self.buffer)
+
+    def read(self):
+        if not any(self.buffer):
+            raise BufferEmptyException
+        data = chr(self.buffer[self.read_point])
+        self.buffer[self.read_point] = 0
+        self.read_point = (self.read_point + 1) % len(self.buffer)
+        return data
+
+    def overwrite(self, data):
+        self._update_buffer(data)
+        if all(self.buffer) and self.read_point == self.write_point:
+            self.read_point = (self.read_point + 1) % len(self.buffer)
+        self.write_point = (self.write_point + 1) % len(self.buffer)
diff --git a/datastructures/circular_buffer/exceptions.py b/datastructures/circular_buffer/exceptions.py
@@ -0,0 +1,6 @@
+class BufferFullException(Exception):
+    pass
+
+
+class BufferEmptyException(Exception):
+    pass
diff --git a/datastructures/circular_buffer/test_circular_buffer.py b/datastructures/circular_buffer/test_circular_buffer.py
diff --git a/datastructures/streams/moving_average/__init__.py b/datastructures/streams/moving_average/__init__.py
@@ -1,34 +1,6 @@
-from typing import Deque
-from collections import deque
+from datastructures.streams.moving_average.moving_average_with_buffer import (
+    MovingAverageWithBuffer,
+)
+from datastructures.streams.moving_average.moving_average import MovingAverage
 
-
-class MovingAverage:
-    def __init__(self, size):
-        """
-        Initializes the moving average object
-        Args:
-            size (int): The size of the moving average
-        """
-        self.queue: Deque[int] = deque()
-        self.size: int = size
-        self.window_sum: float = 0.0
-
-    def next(self, val: int) -> float:
-        """
-        Adds a value to the stream and returns the moving average of the stream
-        Args:
-            val (int): The value to add to the stream
-        Returns:
-            float: The moving average of the stream
-        """
-        if len(self.queue) == self.size:
-            # remove oldest value
-            oldest_value = self.queue.popleft()
-            self.window_sum -= oldest_value
-
-        # add new value to queue
-        self.queue.append(val)
-        self.window_sum += val
-
-        # calculate average
-        return self.window_sum / len(self.queue)
+__all__ = ["MovingAverage", "MovingAverageWithBuffer"]
diff --git a/datastructures/streams/moving_average/exponential_moving_average.py b/datastructures/streams/moving_average/exponential_moving_average.py
@@ -0,0 +1,28 @@
+class ExponentialMovingAverage:
+    """
+    The Exponential Moving Average (EMA) is widely used in finance and signal processing because it reacts faster to
+    recent price changes than a simple moving average.
+
+    The beauty of the EMA is its efficiency: it does not require a buffer or a window of previous values. You only need
+    to store the previous EMA result. This makes the time and space complexity both O(1).
+
+    Why use EMA?
+    1. Reduced Lag: Because it weights the most recent data most heavily, it catches trend reversals much sooner than an SMA.
+    2. Memory Efficiency: You don't need to store a list of numbers; you only need to store one variable (self.ema).
+    3. Smoothness: It creates a smooth curve that isn't as sensitive to an old "outlier" dropping out of the window (a common issue with SMA).
+    """
+
+    def __init__(self, size: int):
+        self.size = size
+        self.alpha = 2 / (size + 1)
+        self.ema = None  # Initialized with the first value received
+
+    def next(self, val: int) -> float:
+        if self.ema is None:
+            # The first value acts as the starting point
+            self.ema = float(val)
+        else:
+            # Apply the EMA formula
+            self.ema = (val * self.alpha) + (self.ema * (1 - self.alpha))
+
+        return self.ema
diff --git a/datastructures/streams/moving_average/moving_average.py b/datastructures/streams/moving_average/moving_average.py
@@ -0,0 +1,34 @@
+from typing import Deque
+from collections import deque
+
+
+class MovingAverage:
+    def __init__(self, size: int):
+        """
+        Initializes the moving average object
+        Args:
+            size (int): The size of the moving average
+        """
+        self.queue: Deque[int] = deque()
+        self.size: int = size
+        self.window_sum: float = 0.0
+
+    def next(self, val: int) -> float:
+        """
+        Adds a value to the stream and returns the moving average of the stream
+        Args:
+            val (int): The value to add to the stream
+        Returns:
+            float: The moving average of the stream
+        """
+        if len(self.queue) == self.size:
+            # remove oldest value
+            oldest_value = self.queue.popleft()
+            self.window_sum -= oldest_value
+
+        # add new value to queue
+        self.queue.append(val)
+        self.window_sum += val
+
+        # calculate average
+        return self.window_sum / len(self.queue)
diff --git a/datastructures/streams/moving_average/moving_average_with_buffer.py b/datastructures/streams/moving_average/moving_average_with_buffer.py
@@ -0,0 +1,38 @@
+class MovingAverageWithBuffer:
+    """
+    Using a Circular Buffer (implemented with a fixed-size list) is an excellent way to optimize memory. Instead of
+    dynamically resizing or shifting elements, we use a fixed array and a pointer that "wraps around" using the modulo
+    operator (index(modsize)).
+
+    This approach is often preferred in embedded systems or high-performance scenarios because it avoids the overhead of
+     frequent memory allocations.
+
+     The expression self.head = (self.head + 1) % self.size ensures that if our size is 3, the pointer sequence will be:
+     0 → 1 → 2 → 0 → 1...
+
+     This effectively "recycles" the array positions, making it behave like a continuous loop.
+    """
+
+    def __init__(self, size: int):
+        # Pre-allocate a list of zeros
+        self.size = size
+        self.buffer = [0] * size
+        self.head = 0  # Pointer to the next position to overwrite
+        self.count = 0  # Track how many elements we've actually added
+        self.current_sum = 0.0
+
+    def next(self, val: int) -> float:
+        # If the buffer is full, subtract the value we are about to overwrite
+        if self.count == self.size:
+            self.current_sum -= self.buffer[self.head]
+        else:
+            self.count += 1
+
+        # Overwrite the old value at the head pointer
+        self.buffer[self.head] = val
+        self.current_sum += val
+
+        # Move the pointer to the next index, wrapping around if at the end
+        self.head = (self.head + 1) % self.size
+
+        return self.current_sum / self.count
diff --git a/datastructures/streams/moving_average/test_moving_average.py b/datastructures/streams/moving_average/test_moving_average.py
@@ -2,6 +2,9 @@
 from typing import List, Tuple
 from parameterized import parameterized
 from datastructures.streams.moving_average import MovingAverage
+from datastructures.streams.moving_average.moving_average_with_buffer import (
+    MovingAverageWithBuffer,
+)
 
 
 TEST_CASES = [
@@ -22,6 +25,16 @@ def test_moving_average(self, size: int, data_to_expected: List[Tuple[int, float
             round(actual, 5)
             self.assertEqual(expected, round(actual, 5))
 
+    @parameterized.expand(TEST_CASES)
+    def test_moving_average_with_buffer(
+        self, size: int, data_to_expected: List[Tuple[int, float]]
+    ):
+        moving_average = MovingAverageWithBuffer(size)
+        for data, expected in data_to_expected:
+            actual = moving_average.next(data)
+            round(actual, 5)
+            self.assertEqual(expected, round(actual, 5))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/datastructures/streams/moving_average/weighted_moving_average.py b/datastructures/streams/moving_average/weighted_moving_average.py
@@ -0,0 +1,46 @@
+class WeightedMovingAverage:
+    """
+    Weight Assignment: The most recent value always gets the maximum weight (self.size), and weights
+    decrease as we look further back in time.
+
+    The Denominator: Unlike the simple moving average where you divide by the count, here you divide by the sum of the
+    weights applied.
+
+    For a full window of size 3, weights are 3, 2, and 1. Sum = 6.
+    Before the window is full, we use a current_denominator to ensure accuracy.
+
+    The WMA is slightly more expensive (O(size) per next() call) because we have to re-sum the weighted values each time.
+    If you need O(1) performance for a weighted average, you might look into an Exponential Moving Average (EMA), which
+    uses a smoothing factor (α) to give more weight to recent data without needing to store the full history.
+    """
+
+    def __init__(self, size: int):
+        self.size = size
+        self.buffer = [0] * size
+        self.head = 0
+        self.count = 0
+        # The denominator is the sum of weights: 1 + 2 + ... + size
+        # Formula: (n * (n + 1)) / 2
+        self.denominator = (size * (size + 1)) / 2
+
+    def next(self, val: int) -> float:
+        # 1. Update the buffer
+        self.buffer[self.head] = val
+        self.head = (self.head + 1) % self.size
+        if self.count < self.size:
+            self.count += 1
+
+        # 2. Calculate Weighted Sum
+        weighted_sum = 0.0
+        current_denominator = 0
+
+        # Iterate backward from the most recent element
+        for i in range(self.count):
+            # Find index of elements from newest to oldest
+            # (self.head - 1 - i) handles the circular wrap-around
+            idx = (self.head - 1 - i) % self.size
+            weight = self.size - i
+            weighted_sum += self.buffer[idx] * weight
+            current_denominator += weight
+
+        return weighted_sum / current_denominator

-Original file line number
+Diff line change
@@ @@ -0,0 +1,6 @@ @@
 +class BufferFullException(Exception):
 +    pass
++
++
 +class BufferEmptyException(Exception):
 +    pass