add test and debug

WeltXing · WeltXing · commit e6db71bfa319 · 2025-08-21T10:38:41.000+08:00
diff --git a/README.md b/README.md
@@ -6,14 +6,15 @@
 
 [![Downloads](https://pepy.tech/badge/pydynet)](https://pepy.tech/project/pydynet)
 [![Downloads](https://static.pepy.tech/personalized-badge/pydynet?period=month&units=international_system&left_color=grey&right_color=orange&left_text=downloads/month)](https://pepy.tech/project/pydynet)
-![](https://img.shields.io/pypi/l/pydynet)
-![](https://img.shields.io/pypi/implementation/numpy)
-![](https://img.shields.io/github/stars/Kaslanarian/PyDyNet?style=social)
-![](https://img.shields.io/github/forks/Kaslanarian/PyDyNet?style=social)
+![x](https://img.shields.io/pypi/l/pydynet)
+![x](https://img.shields.io/pypi/implementation/numpy)
+![x](https://img.shields.io/github/stars/Kaslanarian/PyDyNet?style=social)
+![x](https://img.shields.io/github/forks/Kaslanarian/PyDyNet?style=social)
 
 ## Towards Large Language Model
 
 **2025.8.12**: 实现了纯推理的llama3 (6-layer Transformer, vocab-size=32000). 参考了[这里](https://github.com/likejazz/llama3.np)的NumPy实现和数据集. 将数据集下载到`llama`文件夹即可运行:
+
 ```bash
 >>> python .\llama\infer.py
 There was a boy named Timmy. He loved to play with hi toy and run around outside. One day, Timmy' mom asked him to help her with the laundry. Timmy didn't want to help because he wanted to play. But hi mom said, "Timmy, you need to help me. It' important to help out."
@@ -53,7 +54,7 @@ python setup.py install
 
 ## Example
 
-[tests](./tests)中是一些例子。运行`python examples/XXX.py`即可:
+[examples](./examples/)中是一些例子。运行`python examples/XXX.py`即可:
 
 ### AutoDiff
 
@@ -65,7 +66,6 @@ python setup.py install
 
 <img src="imgs/ad2d.png" alt="ad2" style="zoom:67%;" />
 
-
 ### MLP & LeNet
 
 [mlp_cnn.py](examples/mlp_cnn.py)使用全连接网络(三层+残差)和LeNet对MNIST进行分类. 训练准确率和测试准确率：
diff --git a/pydynet/core/__init__.py b/pydynet/core/__init__.py
@@ -1,4 +1,4 @@
 from .tensor import (Tensor, add, sub, mul, div, pow, matmul, abs, sum, mean,
                      min, max, min, argmax, argmin, maximum, minimum, exp, log,
-                     sign, reshape, transpose, swapaxes, concat)
+                     sign, reshape, transpose, swapaxes, concat, sigmoid, tanh)
 from .function import sqrt, square, vsplit, hsplit, dsplit, split, unsqueeze, squeeze
diff --git a/pydynet/core/tensor.py b/pydynet/core/tensor.py
@@ -11,13 +11,13 @@ class Graph:
     size = 0
 
     @classmethod
-    def add_node(cls, node):
+    def _add_node(cls, node):
         '''添加图节点'''
         cls.node_list.append(node)
         cls.size += 1
 
     @classmethod
-    def free_node(cls, node):
+    def _free_node(cls, node):
         node.last.clear()
 
         index = cls.node_list.index(node)
@@ -87,7 +87,7 @@ def __init__(
             with self.device:
                 self.grad = self.xp.zeros(self.shape, dtype=dtype)
             self.last: list[Tensor] = list()
-            Graph.add_node(self)
+            Graph._add_node(self)
         else:
             self.grad = None
 
@@ -249,72 +249,66 @@ def __abs__(self):
     def __getitem__(self, key):
         return _get_slice(self, key)
 
-    def __setitem__(self, key, value):
-        if is_grad_enable() and self.requires_grad:
+    def _inplace(self, *others: Tensor, func):
+        if self.requires_grad and is_grad_enable():
             raise ValueError(
                 "In-place operation is forbidden in node requires grad.")
-        if isinstance(key, Tensor):
-            key = key.data
 
-        with self.device:
-            self.data[key] = value.data if isinstance(value, Tensor) else value
+        others = tuple(other.data if isinstance(other, Tensor) else other
+                       for other in others)
 
-    def __inplace(self, other, func):
-        if self.requires_grad:
-            raise ValueError(
-                "In-place operation is forbidden in node requires grad.")
-        if isinstance(other, Tensor):
-            other = other.data
         with self.device:
-            self.data[...] = func(self.data, other)
+            func(*others)
         return self
 
+    def __setitem__(self, key, value):
+        return self._inplace(key, value, func=self.data.__setitem__)
+
     def __iadd__(self, other):
-        return self.__inplace(other, lambda x, y: x + y)
+        return self._inplace(other, func=self.data.__iadd__)
 
     def __isub__(self, other):
-        return self.__inplace(other, lambda x, y: x - y)
+        return self._inplace(other, func=self.data.__isub__)
 
     def __imul__(self, other):
-        return self.__inplace(other, lambda x, y: x * y)
+        return self._inplace(other, func=self.data.__imul__)
 
     def __itruediv__(self, other):
-        return self.__inplace(other, lambda x, y: x / y)
+        return self._inplace(other, func=self.data.__itruediv__)
 
     def __imatmul__(self, other):
-        return self.__inplace(other, lambda x, y: x @ y)
-
-    def __compare(self, other, func):
-        if isinstance(other, Tensor):
-            other = other.data
+        return self._inplace(other, func=self.data.__imatmul__)
 
+    def _compare(self, other, func):
         with self.device:
-            return Tensor(func(self.data, other), self.xp.bool_, None,
-                          self.device, False)
+            return Tensor(
+                func(self.data,
+                     other.data if isinstance(other, Tensor) else other),
+                self.xp.bool_, None, self.device, False)
 
     @no_grad()
     def eq(self, other):
-        return self.__compare(other, lambda x, y: x == y)
+        return self._compare(other, lambda x, y: x == y)
 
     @no_grad()
     def ne(self, other):
-        return self.__compare(other, lambda x, y: x != y)
+        return self._compare(other, lambda x, y: x != y)
 
     @no_grad()
     def __lt__(self, other):
-        return self.__compare(other, lambda x, y: x < y)
+        return self._compare(other, lambda x, y: x < y)
 
     @no_grad()
     def __le__(self, other):
-        return self.__compare(other, lambda x, y: x <= y)
+        return self._compare(other, lambda x, y: x <= y)
 
     @no_grad()
     def __gt__(self, other):
-        return not self.__le__(other)
+        return self._compare(other, lambda x, y: x > y)
 
     @no_grad()
     def __ge__(self, other):
-        return not self.__lt__(other)
+        return self._compare(other, lambda x, y: x >= y)
 
     def backward(self, retain_graph: bool = False):
         '''
@@ -364,7 +358,7 @@ def backward(self, retain_graph: bool = False):
 
                 # if not retain graph and node is not leaf, free it
                 if not retain_graph and not node.is_leaf:
-                    Graph.free_node(node)
+                    Graph._free_node(node)
 
     def _build_edge(self, node: Tensor):
         node.last.append(self)
@@ -397,8 +391,8 @@ def to(self, device):
     def cpu(self):
         return self.to("cpu")
 
-    def cuda(self):
-        return self.to("cuda:0")
+    def cuda(self, id: int = 0):
+        return self.to(f"cuda:{id}")
 
     @property
     def xp(self):
@@ -809,7 +803,7 @@ def grad_fn(self, x: Tensor, grad) -> np.ndarray:
 class minimum(_BinaryOperator):
 
     def forward_(self, x: Tensor, y: Tensor) -> np.ndarray:
-        return self.xp.minimum(x, y)
+        return self.xp.minimum(x.data, y.data)
 
     def grad_fn(self, x: Tensor, grad) -> np.ndarray:
         return (self.data == x) * grad
@@ -983,3 +977,29 @@ def grad_fn(self, x, grad: np.ndarray):
         slc = [slice(None)] * grad.ndim
         slc[self.axis] = slice(start, end)
         return grad[tuple(slc)]
+
+
+class sigmoid(_UnaryOperator):
+    '''Sigmoid运算, 我们前向传播避免了溢出问题'''
+
+    def forward_(self, x: Tensor) -> np.ndarray:
+        sigmoid = self.xp.zeros(x.shape, dtype=x.dtype)
+        sigmoid[x.data > 0] = 1 / (1 + self.xp.exp(-x.data[x.data > 0]))
+        sigmoid[x.data <= 0] = 1 - 1 / (1 + self.xp.exp(x.data[x.data <= 0]))
+        return sigmoid
+
+    def grad_fn(self, x: Tensor, grad: np.ndarray) -> np.ndarray:
+        return self.data * (1 - self.data) * grad
+
+
+class tanh(_UnaryOperator):
+    '''Tanh运算, 我们前向传播避免了溢出问题'''
+
+    def forward_(self, x: Tensor) -> np.ndarray:
+        tanh = self.xp.zeros(x.shape, dtype=x.dtype)
+        tanh[x.data > 0] = 2 / (1 + self.xp.exp(-2 * x.data[x.data > 0])) - 1
+        tanh[x.data <= 0] = 1 - 2 / (1 + self.xp.exp(2 * x.data[x.data <= 0]))
+        return tanh
+
+    def grad_fn(self, x: Tensor, grad: np.ndarray) -> np.ndarray:
+        return (1 - self.data**2) * grad
diff --git a/pydynet/nn/functional.py b/pydynet/nn/functional.py
@@ -20,30 +20,12 @@ def embedding(x: tensor.Tensor, weight: tensor.Tensor, padding_idx: int):
     return query
 
 
-class sigmoid(tensor._UnaryOperator):
-    '''Sigmoid运算, 我们前向传播避免了溢出问题'''
-
-    def forward_(self, x: tensor.Tensor) -> np.ndarray:
-        sigmoid = self.xp.zeros(x.shape, dtype=x.dtype)
-        sigmoid[x.data > 0] = 1 / (1 + self.xp.exp(-x.data[x.data > 0]))
-        sigmoid[x.data <= 0] = 1 - 1 / (1 + self.xp.exp(x.data[x.data <= 0]))
-        return sigmoid
-
-    def grad_fn(self, x: tensor.Tensor, grad: np.ndarray) -> np.ndarray:
-        return self.data * (1 - self.data) * grad
+def sigmoid(x: tensor.Tensor):
+    return tensor.sigmoid(x)
 
 
-class tanh(tensor._UnaryOperator):
-    '''Tanh运算, 我们前向传播避免了溢出问题'''
-
-    def forward_(self, x: tensor.Tensor) -> np.ndarray:
-        tanh = self.xp.zeros(x.shape, dtype=x.dtype)
-        tanh[x.data > 0] = 2 / (1 + self.xp.exp(-2 * x.data[x.data > 0])) - 1
-        tanh[x.data <= 0] = 1 - 2 / (1 + self.xp.exp(2 * x.data[x.data <= 0]))
-        return tanh
-
-    def grad_fn(self, x: tensor.Tensor, grad: np.ndarray) -> np.ndarray:
-        return (1 - self.data**2) * grad
+def tanh(x: tensor.Tensor):
+    return tensor.tanh(x)
 
 
 def relu(x: tensor.Tensor):
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+filterwarnings = 
+    ignore::UserWarning:pydynet
diff --git a/tests/test_backward.py b/tests/test_backward.py
@@ -0,0 +1,9 @@
+import sys, pytest, random
+import numpy as np
+
+sys.path.append('../pydynet')
+
+np.random.seed(0)
+random.seed(0)
+
+type_list = [np.float16, np.float32, np.float64]
diff --git a/tests/test_tensor_basic.py b/tests/test_tensor_basic.py
@@ -0,0 +1,114 @@
+import sys, pytest, random
+import numpy as np
+from itertools import product
+
+sys.path.append('../pydynet')
+
+import pydynet as pdn
+
+np.random.seed(0)
+random.seed(0)
+
+type_list = [np.float16, np.float32, np.float64]
+
+
+def matmul_shape_pair(max_dim=4, max_size=5):
+    ndim = random.randint(0, max_dim)
+
+    shape1 = []
+    shape2 = []
+    for _ in range(ndim):
+        if random.random() < 0.5:
+            # 50% 概率设置为 1, 确保广播可能
+            s1, s2 = random.choice([(1, random.randint(1, max_size)),
+                                    (random.randint(1, max_size), 1)])
+        else:
+            # 否则两边相同
+            val = random.randint(1, max_size)
+            s1, s2 = val, val
+        shape1.append(s1)
+        shape2.append(s2)
+    shape1, shape2 = tuple(shape1), tuple(shape2)
+
+    m = random.randint(1, max_size)
+    n = random.randint(1, max_size)
+    p = random.randint(1, max_size)
+
+    shape1 = shape1 + (m, n)
+    shape2 = shape2 + (n, p)
+
+    shape1 = shape1[random.randint(0, len(shape1) - 2):]
+
+    return shape1, shape2
+
+
+def broadcastable_shape_pair(max_dim=4, max_size=5):
+    ndim = random.randint(0, max_dim)  # 随机维数
+    shape1 = []
+    shape2 = []
+    for _ in range(ndim):
+        if random.random() < 0.5:
+            # 50% 概率设置为 1, 确保广播可能
+            s1, s2 = random.choice([(1, random.randint(1, max_size)),
+                                    (random.randint(1, max_size), 1)])
+        else:
+            # 否则两边相同
+            val = random.randint(1, max_size)
+            s1, s2 = val, val
+        shape1.append(s1)
+        shape2.append(s2)
+    shape1, shape2 = tuple(shape1), tuple(shape2)
+
+    # 随机缺失维度
+    shape1 = shape1[random.randint(0, len(shape1)):]
+    return shape1, shape2
+
+
+def array_pair_generator(pair_gen_func,
+                         max_dim=4,
+                         max_size=5,
+                         n_iter=4,
+                         seed=None):
+    rng = np.random.default_rng(seed)
+    count = 0
+    while n_iter is None or count < n_iter:
+        shape1, shape2 = pair_gen_func(max_dim, max_size)
+        a = rng.standard_normal(size=shape1).astype(rng.choice(type_list))
+        b = rng.standard_normal(size=shape2).astype(rng.choice(type_list))
+        yield a, b
+        count += 1
+
+
+test_list = array_pair_generator(broadcastable_shape_pair, 4, 5, 8, seed=42)
+func_list = [(pdn.add, np.add), (pdn.sub, np.subtract), (pdn.mul, np.multiply),
+             (pdn.div, np.divide), (pdn.pow, np.power),
+             (pdn.maximum, np.maximum), (pdn.minimum, np.minimum)]
+test_list = [(*array, *funcs)
+             for (array, funcs) in product(test_list, func_list)]
+
+
+@pytest.mark.parametrize("operand1, operand2, pdn_func, np_func", test_list)
+@pytest.mark.filterwarnings("ignore:invalid value")
+@pytest.mark.filterwarnings("ignore:divide by zero")
+def test_binary_operator(operand1: np.ndarray, operand2: np.ndarray,
+                         pdn_func: callable, np_func: callable):
+    pdn_operand1, pdn_operand2 = pdn.Tensor(operand1), pdn.Tensor(operand2)
+    pdn_output: pdn.Tensor = pdn_func(pdn_operand1, pdn_operand2)
+    np_output: np.ndarray = np_func(operand1, operand2)
+    assert pdn_output.shape == np_output.shape
+    assert pdn_output.dtype == np_output.dtype
+    assert np.allclose(pdn_output.data, np_output, equal_nan=True)
+
+
+test_list = array_pair_generator(matmul_shape_pair, 4, 5, 8, seed=42)
+
+
+@pytest.mark.parametrize("operand1, operand2", test_list)
+def test_matmul(operand1: np.ndarray, operand2: np.ndarray):
+    pdn_operand1, pdn_operand2 = pdn.Tensor(operand1), pdn.Tensor(operand2)
+    pdn_output: pdn.Tensor = pdn.matmul(pdn_operand1, pdn_operand2)
+    np_output: np.ndarray = np.matmul(operand1, operand2)
+    assert pdn_output.shape == np_output.shape
+    assert pdn_output.dtype == np_output.dtype
+    assert np.allclose(pdn_output.data, np_output, equal_nan=True)
+

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[pytest]`
	`2`	`+filterwarnings =`
	`3`	`+ ignore::UserWarning:pydynet`