Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 52 additions & 49 deletions chapter_13_chapter_computer-vision/boundingbox.ipynb

Large diffs are not rendered by default.

801 changes: 659 additions & 142 deletions chapter_13_chapter_computer-vision/fcn.ipynb

Large diffs are not rendered by default.

49 changes: 49 additions & 0 deletions chapter_13_chapter_computer-vision/ge_check_op.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"graph_id": 14,
"op": [
{
"error_type": "infer_shape_error",
"input0": {
"data_type": "DT_INT32",
"layout": "NCHW",
"shape": [
4
]
},
"input1": {
"data_type": "DT_FLOAT",
"layout": "NCHW",
"shape": [
21,
21,
64,
64
]
},
"input2": {
"data_type": "DT_FLOAT",
"layout": "NCHW",
"shape": [
32,
21,
10,
15
]
},
"name": "Conv2DBackpropInput15",
"output0": {
"data_type": "DT_FLOAT",
"layout": "NCHW",
"shape": [
32,
21,
288,
448
]
},
"reason": "InferShapeFailed!",
"type": "Conv2DBackpropInput"
}
],
"session_id": 14
}
284 changes: 162 additions & 122 deletions chapter_13_chapter_computer-vision/multiscale-object-detection.ipynb

Large diffs are not rendered by default.

121 changes: 80 additions & 41 deletions chapter_13_chapter_computer-vision/ssd.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,22 @@
"execution_count": 1,
"id": "e6901d6d",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[WARNING] CORE(503527,ffff9d2cb640,python):2025-12-22-00:20:08.955.715 [mindspore/core/utils/ms_context.cc:533] GetJitLevel] Set jit level to O2 for rank table startup method.\n"
]
}
],
"source": [
"%matplotlib inline\n",
"import mindspore\n",
"from mindspore import nn\n",
"from mindspore import mint\n",
"import sys\n",
"sys.path.insert(0, \"..\")\n",
"from d2l import mindspore as d2l\n",
"\n",
"\n",
Expand Down Expand Up @@ -90,8 +101,8 @@
"def forward(x, block):\n",
" return block(x)\n",
"\n",
"Y1 = forward(d2l.zeros((2, 8, 20, 20)), cls_predictor(8, 5, 10))\n",
"Y2 = forward(d2l.zeros((2, 16, 10, 10)), cls_predictor(16, 3, 10))\n",
"Y1 = forward(mint.zeros((2, 8, 20, 20)), cls_predictor(8, 5, 10))\n",
"Y2 = forward(mint.zeros((2, 16, 10, 10)), cls_predictor(16, 3, 10))\n",
"Y1.shape, Y2.shape"
]
},
Expand All @@ -103,10 +114,10 @@
"outputs": [],
"source": [
"def flatten_pred(pred):\n",
" return d2l.flatten(pred.permute(0, 2, 3, 1)) # flatten不改变0轴的size\n",
" return mint.flatten(pred.permute(0, 2, 3, 1), start_dim=1) # flatten不改变0轴的size\n",
"\n",
"def concat_preds(preds):\n",
" return d2l.concat([flatten_pred(p) for p in preds], axis=1)"
" return mint.concat([flatten_pred(p) for p in preds], dim=1)"
]
},
{
Expand Down Expand Up @@ -175,7 +186,7 @@
}
],
"source": [
"forward(d2l.zeros((2, 3, 20, 20)), down_sample_blk(3, 10)).shape"
"forward(mint.zeros((2, 3, 20, 20)), down_sample_blk(3, 10)).shape"
]
},
{
Expand Down Expand Up @@ -211,7 +222,7 @@
" blk.append(down_sample_blk(num_filters[i], num_filters[i+1]))\n",
" return nn.SequentialCell(*blk)\n",
"\n",
"forward(d2l.zeros((2, 3, 256, 256)), base_net()).shape"
"forward(mint.zeros((2, 3, 256, 256)), base_net()).shape"
]
},
{
Expand Down Expand Up @@ -296,7 +307,7 @@
" X, anchors[i], cls_preds[i], bbox_preds[i] = blk_forward(\n",
" X, getattr(self, f'blk_{i}'), sizes[i], ratios[i],\n",
" getattr(self, f'cls_{i}'), getattr(self, f'bbox_{i}'))\n",
" anchors = d2l.concat(anchors, axis=1)\n",
" anchors = mint.concat(anchors, dim=1)\n",
" cls_preds = concat_preds(cls_preds)\n",
" cls_preds = cls_preds.reshape(\n",
" cls_preds.shape[0], -1, self.num_classes + 1)\n",
Expand All @@ -322,7 +333,7 @@
],
"source": [
"net = TinySSD(num_classes=1)\n",
"X = d2l.zeros((32, 3, 256, 256))\n",
"X = mint.zeros((32, 3, 256, 256))\n",
"anchors, cls_preds, bbox_preds = net(X)\n",
"\n",
"print('output anchors:', anchors.shape)\n",
Expand Down Expand Up @@ -357,13 +368,18 @@
"output_type": "stream",
"text": [
"read 1000 training examples\n",
"read 100 validation examples\n"
"read 100 validation examples\n",
"250\n"
]
}
],
"source": [
"batch_size = 32\n",
"train_iter, _ = d2l.load_data_bananas(batch_size) # "
"# 将 Batch Size 设为 4\n",
"# 后续的 multibox_target 函数包含大量基于 CPU 的 Python 循环操作。如果 Batch Size 过大,CPU 计算压力过大导致流水线阻塞,NPU 长期空闲,程序会表现为卡死。\n",
"# 改小 Batch Size 可显著减少单次迭代的 CPU 负担,快速跑通代码。\n",
"batch_size = 4\n",
"train_iter, _ = d2l.load_data_bananas(batch_size) # \n",
"print(train_iter.get_dataset_size())"
]
},
{
Expand Down Expand Up @@ -398,7 +414,7 @@
"def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):\n",
" batch_size, num_classes = cls_preds.shape[0], cls_preds.shape[2]\n",
" cls = cls_loss(cls_preds.reshape(-1, num_classes),\n",
" cls_labels.reshape(-1)).reshape(batch_size, -1).mean(axis=1)\n",
" cls_labels.reshape(-1).astype(mindspore.int32)).reshape(batch_size, -1).mean(axis=1)\n",
" bbox = bbox_loss(bbox_preds * bbox_masks,\n",
" bbox_labels * bbox_masks).mean(axis=1)\n",
" return cls + bbox"
Expand All @@ -417,7 +433,7 @@
" cls_labels.dtype) == cls_labels).sum())\n",
"\n",
"def bbox_eval(bbox_preds, bbox_labels, bbox_masks):\n",
"# return float((d2l.abs((bbox_labels - bbox_preds) * bbox_masks)).sum())\n",
"# return float((mint.abs((bbox_labels - bbox_preds) * bbox_masks)).sum())\n",
" return float(((bbox_labels - bbox_preds) * bbox_masks).abs().sum())"
]
},
Expand Down Expand Up @@ -493,7 +509,7 @@
" label[:, 1:], anchors)\n",
" bbox_mask = ops.tile((anchors_bbox_map >= 0).float().unsqueeze(-1), (1, 4))\n",
" # 将类标签和分配的边界框坐标初始化为零\n",
" class_labels = ops.zeros(num_anchors, dtype=mindspore.int32)\n",
" class_labels = ops.zeros(num_anchors, dtype=mindspore.int64)\n",
" assigned_bb = ops.zeros((num_anchors, 4), dtype=mindspore.float32)\n",
" # 使用真实边界框来标记锚框的类别。\n",
" # 如果一个锚框没有被分配,标记其为背景(值为零)\n",
Expand Down Expand Up @@ -525,7 +541,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"(32, 3, 256, 256) (32, 1, 5)\n"
"(4, 3, 256, 256) (4, 1, 5)\n"
]
}
],
Expand All @@ -547,8 +563,8 @@
"output_type": "stream",
"text": [
"output anchors: (1, 5444, 4)\n",
"output class preds: (32, 5444, 2)\n",
"output bbox preds: (32, 21776)\n"
"output class preds: (4, 5444, 2)\n",
"output bbox preds: (4, 21776)\n"
]
}
],
Expand All @@ -567,21 +583,13 @@
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[WARNING] KERNEL(269803,2aecc71afd80,python):2023-02-23-20:56:28.763.896 [mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/cuda_class_common.h:51] CalShapesSizeInBytes] For 'Argmax', the shapes[0] is ( )\n",
"[WARNING] KERNEL(269803,2aecc71afd80,python):2023-02-23-20:56:28.763.965 [mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/cuda_class_common.h:51] CalShapesSizeInBytes] For 'Argmax', the shapes[0] is ( )\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"bbox_labels: (32, 21776) (32, 21776)\n",
"bbox_masks: (32, 21776) Float32\n",
"cls_labels: (32, 5444) Int32\n"
".bbox_labels: (4, 21776) (4, 21776)\n",
"bbox_masks: (4, 21776) Float32\n",
"cls_labels: (4, 5444) Int64\n"
]
}
],
Expand All @@ -601,7 +609,7 @@
{
"data": {
"text/plain": [
"Tensor(shape=[], dtype=Float32, value= 0.69856)"
"Tensor(shape=[], dtype=Float32, value= 0.705361)"
]
},
"execution_count": 22,
Expand All @@ -616,18 +624,34 @@
},
{
"cell_type": "markdown",
"id": "9c78354a",
"id": "11899268-b5c4-4b96-bfbb-3a372813b9e4",
"metadata": {},
"source": [
"# BUG 直接运行的话,这里会卡住"
"#### 13.7.2.3. 单批次试运行\n",
"从数据集中取出一批数据,完整走一遍全部过程,旨在验证各环节的张量形状和计算逻辑是否正确,只跑一轮就退出,不进行参数更新。"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 26,
"id": "5e0d0317",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 3, 256, 256) (4, 1, 5)\n",
"output anchors: (1, 5444, 4)\n",
"output class preds: (4, 5444, 2)\n",
"output bbox preds: (4, 21776)\n",
"bbox_labels: (4, 21776) (4, 21776)\n",
"bbox_masks: (4, 21776) Float32\n",
"cls_labels: (4, 5444) Int64\n",
"[0.713037 0.70012754 0.6992287 0.707439 ]\n"
]
}
],
"source": [
"#forward\n",
"for X, Y in train_iter:\n",
Expand All @@ -650,12 +674,27 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 25,
"id": "6ef20694",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 3, 256, 256) (4, 1, 5)\n",
"output anchors: (1, 5444, 4)\n",
"output class preds: (4, 5444, 2)\n",
"output bbox preds: (4, 21776)\n",
"bbox_labels: (4, 21776) (4, 21776)\n",
"bbox_masks: (4, 21776) Float32\n",
"cls_labels: (4, 5444) Int64\n",
"[0.7040471 0.70199287 0.7137333 0.69965523]\n"
]
}
],
"source": [
"X, Y = next(train_iter)\n",
"X, Y = next(train_iter.create_tuple_iterator())\n",
"print(X.shape, Y.shape)\n",
"anchors, cls_preds, bbox_preds = net(X)\n",
"print('output anchors:', anchors.shape)\n",
Expand All @@ -677,7 +716,7 @@
"id": "6912bb2d",
"metadata": {},
"source": [
"#### 13.7.2.3. 训练模型"
"#### 13.7.2.4. 训练模型"
]
},
{
Expand Down Expand Up @@ -730,9 +769,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3.10",
"language": "python",
"name": "python3"
"name": "py310"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -744,7 +783,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.10.14"
},
"toc": {
"base_numbering": 1,
Expand Down
Loading