|
| 1 | +# Plan: NumPy-compatible `.base` Property (Storage-Level Tracking) |
| 2 | + |
| 3 | +## Overview |
| 4 | + |
| 5 | +Implement NumPy's `.base` property by tracking view ownership at the `UnmanagedStorage` level, not `NDArray` level. This is architecturally cleaner because `UnmanagedStorage` is where data sharing actually occurs. |
| 6 | + |
| 7 | +## Design |
| 8 | + |
| 9 | +### Core Concept |
| 10 | + |
| 11 | +``` |
| 12 | +NumPy: ndarray.base → the ndarray that owns the data (or None) |
| 13 | +NumSharp: UnmanagedStorage._baseStorage → the storage that owns the data |
| 14 | + NDArray.@base → computed property that wraps _baseStorage |
| 15 | +``` |
| 16 | + |
| 17 | +### Architecture |
| 18 | + |
| 19 | +``` |
| 20 | +Original Array (owns data): |
| 21 | + NDArray a |
| 22 | + └── Storage A (_baseStorage = null, InternalArray = DATA) |
| 23 | +
|
| 24 | +View (shares data): |
| 25 | + NDArray b = a[2:5] |
| 26 | + └── Storage B (_baseStorage = A, InternalArray = DATA) ← same DATA |
| 27 | +
|
| 28 | +View of View (chains to original): |
| 29 | + NDArray c = b[1:2] |
| 30 | + └── Storage C (_baseStorage = A, InternalArray = DATA) ← still points to A |
| 31 | +``` |
| 32 | + |
| 33 | +## Implementation |
| 34 | + |
| 35 | +### Step 1: Add `_baseStorage` field to UnmanagedStorage |
| 36 | + |
| 37 | +**File:** `src/NumSharp.Core/Backends/Unmanaged/UnmanagedStorage.cs` |
| 38 | + |
| 39 | +```csharp |
| 40 | +public partial class UnmanagedStorage : ICloneable |
| 41 | +{ |
| 42 | + // ... existing fields ... |
| 43 | +
|
| 44 | + /// <summary> |
| 45 | + /// The original storage this is a view of, or null if this storage owns its data. |
| 46 | + /// Always points to the ultimate owner (not intermediate views). |
| 47 | + /// </summary> |
| 48 | + internal UnmanagedStorage? _baseStorage; |
| 49 | + |
| 50 | + // ... rest of class ... |
| 51 | +} |
| 52 | +``` |
| 53 | + |
| 54 | +### Step 2: Update Alias methods to set `_baseStorage` |
| 55 | + |
| 56 | +**File:** `src/NumSharp.Core/Backends/Unmanaged/UnmanagedStorage.Cloning.cs` |
| 57 | + |
| 58 | +```csharp |
| 59 | +public UnmanagedStorage Alias() |
| 60 | +{ |
| 61 | + var r = new UnmanagedStorage(); |
| 62 | + r._shape = _shape; |
| 63 | + r._typecode = _typecode; |
| 64 | + r._dtype = _dtype; |
| 65 | + if (InternalArray != null) |
| 66 | + r.SetInternalArray(InternalArray); |
| 67 | + r.Count = _shape.size; |
| 68 | + r._baseStorage = _baseStorage ?? this; // ← ADD: chain to original |
| 69 | + return r; |
| 70 | +} |
| 71 | + |
| 72 | +public UnmanagedStorage Alias(Shape shape) |
| 73 | +{ |
| 74 | + var r = new UnmanagedStorage(); |
| 75 | + r._typecode = _typecode; |
| 76 | + r._dtype = _dtype; |
| 77 | + if (InternalArray != null) |
| 78 | + r.SetInternalArray(InternalArray); |
| 79 | + r._shape = shape; |
| 80 | + r.Count = shape.size; |
| 81 | + r._baseStorage = _baseStorage ?? this; // ← ADD: chain to original |
| 82 | + return r; |
| 83 | +} |
| 84 | + |
| 85 | +public UnmanagedStorage Alias(ref Shape shape) |
| 86 | +{ |
| 87 | + var r = new UnmanagedStorage(); |
| 88 | + r._shape = shape; |
| 89 | + r._typecode = _typecode; |
| 90 | + r._dtype = _dtype; |
| 91 | + if (InternalArray != null) |
| 92 | + r.SetInternalArray(InternalArray); |
| 93 | + r.Count = shape.size; |
| 94 | + r._baseStorage = _baseStorage ?? this; // ← ADD: chain to original |
| 95 | + return r; |
| 96 | +} |
| 97 | +``` |
| 98 | + |
| 99 | +### Step 3: Update GetView to propagate `_baseStorage` |
| 100 | + |
| 101 | +**File:** `src/NumSharp.Core/Backends/Unmanaged/UnmanagedStorage.Slicing.cs` |
| 102 | + |
| 103 | +Review `GetView()` - it calls `Alias()` internally, so should automatically work. Need to verify. |
| 104 | + |
| 105 | +### Step 4: Update CreateBroadcastedUnsafe |
| 106 | + |
| 107 | +**File:** `src/NumSharp.Core/Backends/Unmanaged/UnmanagedStorage.cs` lines 148-153 |
| 108 | + |
| 109 | +Only the overload that takes `UnmanagedStorage` needs updating (the one taking `IArraySlice` creates owned data): |
| 110 | + |
| 111 | +```csharp |
| 112 | +public static UnmanagedStorage CreateBroadcastedUnsafe(UnmanagedStorage storage, Shape shape) |
| 113 | +{ |
| 114 | + var ret = new UnmanagedStorage(); |
| 115 | + ret._Allocate(shape, storage.InternalArray); |
| 116 | + ret._baseStorage = storage._baseStorage ?? storage; // ← ADD: track original |
| 117 | + return ret; |
| 118 | +} |
| 119 | +``` |
| 120 | + |
| 121 | +The `IArraySlice` overload (line 135) stays unchanged - it creates owned data. |
| 122 | + |
| 123 | +### Step 5: Add computed `@base` property to NDArray |
| 124 | + |
| 125 | +**File:** `src/NumSharp.Core/Backends/NDArray.cs` |
| 126 | + |
| 127 | +```csharp |
| 128 | +/// <summary> |
| 129 | +/// NumPy-compatible: The array owning the memory, or null if this array owns its data. |
| 130 | +/// </summary> |
| 131 | +/// <remarks> |
| 132 | +/// https://numpy.org/doc/stable/reference/generated/numpy.ndarray.base.html |
| 133 | +/// Note: Unlike NumPy, this returns a new NDArray wrapper each time. |
| 134 | +/// For checking view status, use: arr.@base != null |
| 135 | +/// </remarks> |
| 136 | +public NDArray? @base => Storage._baseStorage is { } bs |
| 137 | + ? WrapOwned(bs) |
| 138 | + : null; |
| 139 | +``` |
| 140 | + |
| 141 | +### Step 6: Add WrapOwned factory (if not exists) |
| 142 | + |
| 143 | +**File:** `src/NumSharp.Core/Backends/NDArray.cs` |
| 144 | + |
| 145 | +```csharp |
| 146 | +/// <summary> |
| 147 | +/// Wrap an UnmanagedStorage in an NDArray. Used for Clone, Scalar, computed .base, etc. |
| 148 | +/// </summary> |
| 149 | +internal static NDArray WrapOwned(UnmanagedStorage storage) |
| 150 | + => new NDArray(storage); |
| 151 | +``` |
| 152 | + |
| 153 | +### Step 7: Remove manual @base assignments |
| 154 | + |
| 155 | +Remove all the manual `view.@base = source.@base ?? source` assignments we added earlier: |
| 156 | + |
| 157 | +- `NDArray.Indexing.cs` |
| 158 | +- `NDArray.Indexing.Selection.Getter.cs` |
| 159 | +- `Default.Transpose.cs` |
| 160 | +- `np.expand_dims.cs` |
| 161 | +- `np.broadcast_to.cs` |
| 162 | +- `NdArray.ReShape.cs` |
| 163 | +- `NDArray.cs` (view method) |
| 164 | +- `Default.Reduction.*.cs` |
| 165 | + |
| 166 | +These are now handled automatically by `Alias()`. |
| 167 | + |
| 168 | +### Step 8: Remove `@base` field from NDArray |
| 169 | + |
| 170 | +The field we added: |
| 171 | +```csharp |
| 172 | +public NDArray? @base; // ← REMOVE this field |
| 173 | +``` |
| 174 | + |
| 175 | +Replace with computed property from Step 5. |
| 176 | + |
| 177 | +## Files to Modify |
| 178 | + |
| 179 | +| File | Change | |
| 180 | +|------|--------| |
| 181 | +| `Backends/Unmanaged/UnmanagedStorage.cs` | Add `_baseStorage` field, update `CreateBroadcastedUnsafe(storage, shape)` | |
| 182 | +| `Backends/Unmanaged/UnmanagedStorage.Cloning.cs` | Set `_baseStorage` in all 3 `Alias()` methods | |
| 183 | +| `Backends/NDArray.cs` | Replace `@base` field with computed property | |
| 184 | + |
| 185 | +### Files to Revert (remove manual @base assignments) |
| 186 | + |
| 187 | +| File | Lines to Revert | |
| 188 | +|------|-----------------| |
| 189 | +| `Selection/NDArray.Indexing.cs` | Lines 63-66, 80-83 | |
| 190 | +| `Selection/NDArray.Indexing.Selection.Getter.cs` | Lines 52-56, 117-121 | |
| 191 | +| `Backends/Default/ArrayManipulation/Default.Transpose.cs` | Lines 192-194 | |
| 192 | +| `Manipulation/np.expand_dims.cs` | Lines 11-13 | |
| 193 | +| `Creation/np.broadcast_to.cs` | Lines 70-73, 111-114, 153-156 | |
| 194 | +| `Creation/NdArray.ReShape.cs` | Lines 37-39, 63-65, 89-91, 107-109 | |
| 195 | +| `Backends/NDArray.cs` | Lines 476-479 (view method) | |
| 196 | +| `Backends/Default/Math/Reduction/*.cs` | Various (7 files) | |
| 197 | + |
| 198 | +## Investigation Results |
| 199 | + |
| 200 | +### 1. CreateBroadcastedUnsafe Location |
| 201 | + |
| 202 | +**File:** `UnmanagedStorage.cs` lines 135-153 |
| 203 | + |
| 204 | +Two overloads: |
| 205 | +```csharp |
| 206 | +// Creates new storage from raw slice - OWNS data, no _baseStorage |
| 207 | +public static UnmanagedStorage CreateBroadcastedUnsafe(IArraySlice arraySlice, Shape shape) |
| 208 | + |
| 209 | +// Creates view of existing storage - SHARES data, needs _baseStorage |
| 210 | +public static UnmanagedStorage CreateBroadcastedUnsafe(UnmanagedStorage storage, Shape shape) |
| 211 | +``` |
| 212 | + |
| 213 | +**Action:** Only update the second overload (line 148) to propagate `_baseStorage`. |
| 214 | + |
| 215 | +### 2. GetView() Path Analysis |
| 216 | + |
| 217 | +**File:** `UnmanagedStorage.Slicing.cs` |
| 218 | + |
| 219 | +All paths go through `Alias()`: |
| 220 | +- Line 49: `view.Alias(view.Shape.ExpandDimension(axis))` |
| 221 | +- Line 98: `return Alias()` |
| 222 | +- Line 116: `return Alias(slicedShape)` |
| 223 | + |
| 224 | +**Exception:** Line 108 creates `new UnmanagedStorage(clonedData, cleanShape)` for broadcast materialization - this is a COPY (owns data), correctly should NOT set `_baseStorage`. |
| 225 | + |
| 226 | +**Action:** No changes needed - Alias() handles it. |
| 227 | + |
| 228 | +### 3. Clone() Behavior |
| 229 | + |
| 230 | +**File:** `UnmanagedStorage.Cloning.cs` line 200 |
| 231 | + |
| 232 | +```csharp |
| 233 | +public UnmanagedStorage Clone() => new UnmanagedStorage(CloneData(), _shape.Clone(...)); |
| 234 | +``` |
| 235 | + |
| 236 | +Uses constructor, not Alias(). Creates owned data. |
| 237 | + |
| 238 | +**Action:** No changes needed - Clone() should NOT set `_baseStorage`. |
| 239 | + |
| 240 | +### 4. np.* Functions |
| 241 | + |
| 242 | +All use constructors like `new NDArray(dtype, shape)` or `new UnmanagedStorage(...)`. |
| 243 | +These create owned data, `_baseStorage` stays null by default. |
| 244 | + |
| 245 | +**Action:** No changes needed. |
| 246 | + |
| 247 | +### 5. NDArray<T> Generic Class |
| 248 | + |
| 249 | +Inherits from NDArray. Uses `base(storage)` constructors. |
| 250 | + |
| 251 | +**Action:** No changes needed - inherits behavior from NDArray. |
| 252 | + |
| 253 | +## Behavior Comparison |
| 254 | + |
| 255 | +| Scenario | NumPy | NumSharp (this plan) | |
| 256 | +|----------|-------|---------------------| |
| 257 | +| `a = np.arange(10)` | `a.base is None` | `a.@base == null` | |
| 258 | +| `b = a[2:5]` | `b.base is a` | `b.@base != null` (wraps a's storage) | |
| 259 | +| `c = b[1:2]` | `c.base is a` | `c.@base` wraps a's storage | |
| 260 | +| `d = a.copy()` | `d.base is None` | `d.@base == null` | |
| 261 | +| `c.base is a` | `True` | `False` (different wrapper) | |
| 262 | + |
| 263 | +**Note:** Object identity (`is`) won't match, but semantic equivalence does. |
| 264 | + |
| 265 | +## Testing |
| 266 | + |
| 267 | +```csharp |
| 268 | +[Test] |
| 269 | +public void Base_OriginalArray_IsNull() |
| 270 | +{ |
| 271 | + var a = np.arange(10); |
| 272 | + Assert.That(a.@base, Is.Null); |
| 273 | +} |
| 274 | + |
| 275 | +[Test] |
| 276 | +public void Base_SliceOfOriginal_PointsToOriginalStorage() |
| 277 | +{ |
| 278 | + var a = np.arange(10); |
| 279 | + var b = a["2:5"]; |
| 280 | + Assert.That(b.@base, Is.Not.Null); |
| 281 | + Assert.That(b.@base!.Storage, Is.SameAs(a.Storage)); |
| 282 | +} |
| 283 | + |
| 284 | +[Test] |
| 285 | +public void Base_SliceOfSlice_PointsToOriginalStorage() |
| 286 | +{ |
| 287 | + var a = np.arange(10); |
| 288 | + var b = a["2:7"]; |
| 289 | + var c = b["1:3"]; |
| 290 | + Assert.That(c.@base, Is.Not.Null); |
| 291 | + Assert.That(c.@base!.Storage, Is.SameAs(a.Storage)); |
| 292 | +} |
| 293 | + |
| 294 | +[Test] |
| 295 | +public void Base_Copy_IsNull() |
| 296 | +{ |
| 297 | + var a = np.arange(10); |
| 298 | + var b = np.copy(a); |
| 299 | + Assert.That(b.@base, Is.Null); |
| 300 | +} |
| 301 | + |
| 302 | +[Test] |
| 303 | +public void Base_Reshape_PointsToOriginalStorage() |
| 304 | +{ |
| 305 | + var a = np.arange(12); |
| 306 | + var b = a.reshape(3, 4); |
| 307 | + Assert.That(b.@base, Is.Not.Null); |
| 308 | + Assert.That(b.@base!.Storage, Is.SameAs(a.Storage)); |
| 309 | +} |
| 310 | + |
| 311 | +[Test] |
| 312 | +public void Base_Transpose_PointsToOriginalStorage() |
| 313 | +{ |
| 314 | + var a = np.arange(12).reshape(3, 4); |
| 315 | + var b = a.T; |
| 316 | + // b.@base points to reshape's storage, which points to a's storage |
| 317 | + Assert.That(b.@base, Is.Not.Null); |
| 318 | +} |
| 319 | +``` |
| 320 | + |
| 321 | +## Rollback Plan |
| 322 | + |
| 323 | +If issues arise: |
| 324 | +1. Remove `_baseStorage` field from UnmanagedStorage |
| 325 | +2. Revert Alias() methods |
| 326 | +3. Go back to NDArray-level tracking with manual assignments |
| 327 | + |
| 328 | +## Open Questions |
| 329 | + |
| 330 | +1. Should `@base` cache the created NDArray to avoid repeated allocations? |
| 331 | +2. Should we add a `bool OwnsData` property for cleaner checks? |
| 332 | +3. Impact on memory/GC - does `_baseStorage` reference prevent collection? |
0 commit comments