From 044c6ef5527f7318b3592e6a717caccb2275e866 Mon Sep 17 00:00:00 2001 From: AIWintermuteAI Date: Wed, 8 Dec 2021 22:09:43 +0100 Subject: [PATCH 1/3] yolov3 temp (working, not finished) --- .../kendryte_sdk/include/sipeed_yolo2.h | 11 +- components/kendryte_sdk/src/sipeed_yolo2.c | 133 +++++++--- .../micropython/port/src/Maix/Maix_kpu.c | 246 +++++++++++++++++- 3 files changed, 352 insertions(+), 38 deletions(-) diff --git a/components/kendryte_sdk/include/sipeed_yolo2.h b/components/kendryte_sdk/include/sipeed_yolo2.h index a9454286d..6d1627c6e 100644 --- a/components/kendryte_sdk/include/sipeed_yolo2.h +++ b/components/kendryte_sdk/include/sipeed_yolo2.h @@ -25,14 +25,17 @@ typedef struct float nms_value; uint32_t coords; uint32_t anchor_number; + uint8_t branch_number; + int wh[2]; + uint8_t ver; float *anchor; uint32_t image_width; uint32_t image_height; uint32_t classes; uint32_t net_width; uint32_t net_height; - uint32_t layer_width; - uint32_t layer_height; + uint32_t layer_width[2]; + uint32_t layer_height[2]; uint32_t boxes_number; uint32_t output_number; float scale; @@ -40,6 +43,8 @@ typedef struct void *boxes; //uint8_t *input; float *output; + float *output0; + float *output1; float *probs_buf; float **probs; float *activate; @@ -48,6 +53,8 @@ typedef struct typedef void (*callback_draw_box)(uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2, uint32_t class, float prob);; +//uint8_t _branch; + int region_layer_init(region_layer_t *rl, void* ctx); void region_layer_deinit(region_layer_t *rl); void region_layer_run(region_layer_t *rl, obj_info_t *obj_info); diff --git a/components/kendryte_sdk/src/sipeed_yolo2.c b/components/kendryte_sdk/src/sipeed_yolo2.c index 0370b62de..a7ba39551 100644 --- a/components/kendryte_sdk/src/sipeed_yolo2.c +++ b/components/kendryte_sdk/src/sipeed_yolo2.c @@ -2,6 +2,7 @@ #include "sipeed_kpu.h" #include #include +#include #include "printf.h" // #include "lcd.h" @@ -24,11 +25,13 @@ typedef struct } __attribute__((aligned(8))) sortable_box_t; +uint8_t _branch; + int region_layer_init(region_layer_t *rl, void* ctx) { int flag = 0; - uint16_t wi,hi,chi; - uint16_t wo,ho,cho; + uint16_t wi, hi, chi; + uint16_t wo[2], ho[2], cho[2]; size_t size; int kmodel_type=sipeed_kpu_model_get_type(ctx); @@ -39,26 +42,57 @@ int region_layer_init(region_layer_t *rl, void* ctx) return -1; } - if(sipeed_kpu_model_get_output_shape(ctx, &wo, &ho, &cho) != SIPEED_KPU_ERR_NONE) - { - // mp_printf(&mp_plat_print, "[MAIXPY]rl: can't fetch last layer!\r\n"); - return -1; - } + for (uint8_t i = 0; i < rl->branch_number; i++) + { + if(sipeed_kpu_get_outputs_shape(ctx, i, &wo[i], &ho[i], &cho[i]) != SIPEED_KPU_ERR_NONE) + { + // mp_printf(&mp_plat_print, "[MAIXPY]rl: can't fetch last layer!\r\n"); + return -1; + } + } + + //printf("%d %d %d \r\n", wo[0], ho[0], cho[0]); + //printf("%d %d %d \r\n", wo[1], ho[1], cho[1]); rl->coords = 4; rl->image_width = wi; rl->image_height = hi; - rl->classes = cho / 5 - 5; + rl->classes = cho[0] / rl->anchor_number - 5; rl->net_width = wi; rl->net_height = hi; - rl->layer_width = wo; - rl->layer_height = ho; - rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number); - rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1)); + + for (uint8_t i = 0; i < rl->branch_number; i++) + { + rl->layer_width[i] = wo[i]; + rl->layer_height[i] = ho[i]; + rl->boxes_number += (rl->layer_width[i] * rl->layer_height[i] * rl->anchor_number); + rl->wh[i] = rl->layer_width[i] * rl->layer_height[i]; + //sipeed_kpu_get_output(ctx, i, &(rl->output[i]), &size); + //printf("%d size \r\n", size); + } + + sipeed_kpu_get_output(ctx, 0, &(rl->output0), &size); + + + //for (uint32_t i = 0; i < size; i++) + //{ + //printf("%f output1 \r\n", rl->output[0][i]); + //} + + sipeed_kpu_get_output(ctx, 1, &(rl->output1), &size); + + rl->output = rl->output0; + _branch = 0; + + //printf("%d anchor num \r\n", rl->anchor_number); + //printf("%d box num \r\n", rl->boxes_number); + + //_branch = 0; + + //rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1)); - sipeed_kpu_get_output(ctx, 0, &(rl->output), &size); - + //module output -> rl output //mp_printf(&mp_plat_print, "size=%ld\r\n",size); //rl->scale = output_scale; @@ -109,8 +143,10 @@ int region_layer_init(region_layer_t *rl, void* ctx) rl->activate[i] = 1.0 / (1.0 + expf(-(i * rl->scale + rl->bias))); rl->softmax[i] = expf(rl->scale * (i - 255)); }*/ - for (uint32_t i = 0; i < rl->boxes_number; i++){ - rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]);} + for (uint32_t i = 0; i < rl->boxes_number; i++) + { + rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]); + } return 0; @@ -145,11 +181,11 @@ static void activate_array(region_layer_t *rl, int index, int n) static int entry_index(region_layer_t *rl, int location, int entry) { - int wh = rl->layer_width * rl->layer_height; - int n = location / wh; - int loc = location % wh; - return n * wh * (rl->coords + rl->classes + 1) + entry * wh + loc; + int n = location / rl->wh[_branch]; + int loc = location % rl->wh[_branch]; + + return n * rl->wh[_branch] * (rl->coords + rl->classes + 1) + entry * rl->wh[_branch] + loc; } static void softmax(float *data, int n, int stride) @@ -195,19 +231,24 @@ static void forward_region_layer(region_layer_t *rl) //for (index = 0; index < rl->output_number; index++) // rl->output[index] = rl->input[index] * rl->scale + rl->bias; + + for (int n = 0; n < rl->anchor_number; ++n) { - index = entry_index(rl, n * rl->layer_width * rl->layer_height, 0); - activate_array(rl, index, 2 * rl->layer_width * rl->layer_height); - index = entry_index(rl, n * rl->layer_width * rl->layer_height, 4); - activate_array(rl, index, rl->layer_width * rl->layer_height); - } + index = entry_index(rl, n * rl->layer_width[_branch] * rl->layer_height[_branch], 0); + activate_array(rl, index, 2 * rl->layer_width[_branch] * rl->layer_height[_branch]); + index = entry_index(rl, n * rl->layer_width[_branch] * rl->layer_height[_branch], 4); + activate_array(rl, index, rl->layer_width[_branch] * rl->layer_height[_branch]); - index = entry_index(rl, 0, rl->coords + 1); + index = entry_index(rl, n * rl->layer_width[_branch] * rl->layer_height[_branch], 5); + activate_array(rl, index, rl->classes * rl->layer_width[_branch] * rl->layer_height[_branch]); + } + /* + index = entry_index(rl, 0, rl->coords + 1, i); softmax_cpu(rl->output + index, rl->classes, rl->anchor_number,\ - rl->output_number / rl->anchor_number, rl->layer_width * rl->layer_height,\ - rl->layer_width * rl->layer_height); - + rl->output_number / rl->anchor_number, rl->layer_width[i] * rl->layer_height[i],\ + rl->layer_width[i] * rl->layer_height[i]); + */ } static void correct_region_boxes(region_layer_t *rl, box_t *boxes) @@ -251,20 +292,22 @@ static box_t get_region_box(float *x, float *biases, int n, int index, int i, in b.x = (i + x[index + 0 * stride]) / w; b.y = (j + x[index + 1 * stride]) / h; - b.w = expf(x[index + 2 * stride]) * biases[2 * n] / w; - b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1] / h; + b.w = expf(x[index + 2 * stride]) * biases[2 * n + 6 * _branch]; + b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1 + 6 * _branch]; return b; } static void get_region_boxes(region_layer_t *rl, float *predictions, float **probs, box_t *boxes) { - uint32_t layer_width = rl->layer_width; - uint32_t layer_height = rl->layer_height; uint32_t anchor_number = rl->anchor_number; + uint8_t branch_num = rl->branch_number; uint32_t classes = rl->classes; uint32_t coords = rl->coords; float threshold = rl->threshold; + uint32_t layer_width = rl->layer_width[_branch]; + uint32_t layer_height = rl->layer_height[_branch]; + for (int i = 0; i < layer_width * layer_height; ++i) { int row = i / layer_width; @@ -272,13 +315,18 @@ static void get_region_boxes(region_layer_t *rl, float *predictions, float **pro for (int n = 0; n < anchor_number; ++n) { - int index = n * layer_width * layer_height + i; + + int index = n * layer_width * layer_height + i + 240 * _branch; + //printf("i %d n %d b %d \r\n", i, n, _branch); + //printf("index %d \r\n", index); for (int j = 0; j < classes; ++j) probs[index][j] = 0; + int obj_index = entry_index(rl, n * layer_width * layer_height + i, coords); int box_index = entry_index(rl, n * layer_width * layer_height + i, 0); float scale = predictions[obj_index]; + //printf("scale %f \r\n", scale); boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row, layer_width, layer_height, layer_width * layer_height); @@ -287,13 +335,16 @@ static void get_region_boxes(region_layer_t *rl, float *predictions, float **pro for (int j = 0; j < classes; ++j) { + int class_index = entry_index(rl, n * layer_width * layer_height + i, coords + 1 + j); + //(rl->coords + rl->classes + 1) * i + 5 float prob = scale * predictions[class_index]; probs[index][j] = (prob > threshold) ? prob : 0; if (prob > max) max = prob; } + //printf("anchor loop 6 \r\n" ); probs[index][classes] = max; } } @@ -430,11 +481,23 @@ static void region_layer_output(region_layer_t *rl, obj_info_t *obj_info) } void region_layer_run(region_layer_t *rl, obj_info_t *obj_info) -{ +{ + //printf("run \r\n"); + + for (uint8_t i = 0; i < rl->branch_number; i++) + { forward_region_layer(rl); + //printf("run1 \r\n"); get_region_boxes(rl, rl->output, rl->probs, rl->boxes); + _branch = 1; + rl->output = rl->output1; + } + + //printf("run2 \r\n"); do_nms_sort(rl, rl->boxes, rl->probs); + //printf("run3 \r\n"); region_layer_output(rl, obj_info); + //printf("run4 \r\n"); } void region_layer_draw_boxes(region_layer_t *rl, callback_draw_box callback) diff --git a/components/micropython/port/src/Maix/Maix_kpu.c b/components/micropython/port/src/Maix/Maix_kpu.c index dad3aae86..86666f34e 100644 --- a/components/micropython/port/src/Maix/Maix_kpu.c +++ b/components/micropython/port/src/Maix/Maix_kpu.c @@ -541,7 +541,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_KW(py_kpu_class_set_outputs_obj, 4, py_kpu_class_ typedef struct py_kpu_class_yolo_args_obj { mp_obj_base_t base; - mp_obj_t threshold, nms_value, anchor_number, anchor, rl_args; + mp_obj_t threshold, nms_value, anchor_number, branch_number, anchor, rl_args; } __attribute__((aligned(8))) py_kpu_class_yolo_args_obj_t; typedef struct py_kpu_class_region_layer_arg @@ -549,6 +549,7 @@ typedef struct py_kpu_class_region_layer_arg float threshold; float nms_value; int anchor_number; + int branch_number; float *anchor; }__attribute__((aligned(8))) py_kpu_class_yolo_region_layer_arg_t; @@ -796,6 +797,117 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_KW(py_kpu_class_init_yolo2_obj, 5, py_kpu_class_i /////////////////////////////////////////////////////////////////////////////// +STATIC mp_obj_t py_kpu_class_init_yolo3(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) +{ + enum { ARG_kpu_net, ARG_threshold, ARG_nms_value, ARG_anchor_number, ARG_branch_number, ARG_anchor, ARG_dma}; + static const mp_arg_t allowed_args[] = { + { MP_QSTR_kpu_net, MP_ARG_OBJ, {.u_obj = mp_const_none} }, + { MP_QSTR_threshold, MP_ARG_OBJ, {.u_obj = mp_const_none} }, + { MP_QSTR_nms_value, MP_ARG_OBJ, {.u_obj = mp_const_none} }, + { MP_QSTR_anchor_number, MP_ARG_INT, {.u_int = 0x0} }, + { MP_QSTR_branch_number, MP_ARG_INT, {.u_int = 0x0} }, + { MP_QSTR_anchor, MP_ARG_OBJ, {.u_obj = mp_const_none} }, + { MP_QSTR_dma, MP_ARG_INT, {.u_int = -1} }, + }; + mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; + mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args); + + mp_printf(&mp_plat_print, "YOLO v3\n"); + + if(mp_obj_get_type(args[ARG_kpu_net].u_obj) == &py_kpu_net_obj_type) + { + float threshold, nms_value, *anchor = NULL; + int anchor_number, branch_number; + + sipeed_kpu_use_dma(args[ARG_dma].u_int); + threshold = mp_obj_get_float(args[ARG_threshold].u_obj); + if(!(threshold >= 0.0 && threshold <= 1.0)) + { + mp_raise_ValueError("[MAIXPY]kpu: threshold only support 0 to 1"); + return mp_const_false; + } + + nms_value = mp_obj_get_float(args[ARG_nms_value].u_obj); + if(!(nms_value >= 0.0 && nms_value <= 1.0)) + { + mp_raise_ValueError("[MAIXPY]kpu: nms_value only support 0 to 1"); + return mp_const_false; + } + + + branch_number = args[ARG_branch_number].u_int; + anchor_number = args[ARG_anchor_number].u_int; + + if(anchor_number > 0) + { + //need free + anchor = (float*)malloc(anchor_number * 2 * branch_number * sizeof(float)); + + mp_obj_t *items; + mp_obj_get_array_fixed_n(args[ARG_anchor].u_obj, + args[ARG_anchor_number].u_int * 2 * branch_number, &items); + + for(uint8_t index = 0; index < args[ARG_anchor_number].u_int * 2 * branch_number; index++) + anchor[index] = mp_obj_get_float(items[index]); + } + else + { + mp_raise_ValueError("[MAIXPY]kpu: anchor_number should > 0"); + return mp_const_false; + } + + py_kpu_class_yolo_args_obj_t *yolo_args = m_new_obj(py_kpu_class_yolo_args_obj_t); + + yolo_args->base.type = &py_kpu_class_yolo_args_obj_type; + + yolo_args->threshold = mp_obj_new_float(threshold); + yolo_args->nms_value = mp_obj_new_float(nms_value); + yolo_args->anchor_number = mp_obj_new_int(anchor_number); + yolo_args->branch_number = mp_obj_new_int(branch_number); + + mp_obj_t *tuple, *tmp; + + tmp = (mp_obj_t *)malloc(anchor_number * 2 * branch_number * sizeof(mp_obj_t)); + + for (uint8_t index = 0; index < anchor_number * 2 * branch_number; index++) + tmp[index] = mp_obj_new_float(anchor[index]); + + tuple = mp_obj_new_tuple(anchor_number * 2 * branch_number, tmp); + + free(tmp); + + yolo_args->anchor = tuple; + + //need free + py_kpu_class_yolo_region_layer_arg_t *rl_arg = malloc(sizeof(py_kpu_class_yolo_region_layer_arg_t)); + + rl_arg->threshold = threshold; + rl_arg->nms_value = nms_value; + rl_arg->anchor_number = anchor_number; + rl_arg->branch_number = branch_number; + rl_arg->anchor = anchor; + + yolo_args->rl_args = MP_OBJ_FROM_PTR(rl_arg); + + py_kpu_net_obj_t *kpu_net = MP_OBJ_TO_PTR(args[ARG_kpu_net].u_obj); + + kpu_net->net_args = MP_OBJ_FROM_PTR(yolo_args); + + kpu_net->net_deinit = MP_OBJ_FROM_PTR(py_kpu_calss_yolo2_deinit); + + return mp_const_true; + } + else + { + mp_raise_TypeError("[MAIXPY]kpu: kpu_net type error"); + return mp_const_false; + } +} + +STATIC MP_DEFINE_CONST_FUN_OBJ_KW(py_kpu_class_init_yolo3_obj, 5, py_kpu_class_init_yolo3); + +/////////////////////////////////////////////////////////////////////////////// + typedef struct py_kpu_class_yolo2_find_obj { mp_obj_base_t base; @@ -996,6 +1108,136 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_KW(py_kpu_class_run_yolo2_obj, 2, py_kpu_class_ru /////////////////////////////////////////////////////////////////////////////// +STATIC mp_obj_t py_kpu_class_run_yolo3(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) +{ + if(mp_obj_get_type(pos_args[0]) == &py_kpu_net_obj_type) + { + py_kpu_net_obj_t *kpu_net = MP_OBJ_TO_PTR(pos_args[0]); + image_t *arg_img = py_image_cobj(pos_args[1]); + //PY_ASSERT_TRUE_MSG(IM_IS_MUTABLE(arg_img), "Image format is not supported."); + + uint16_t w0=0; + uint16_t h0=0; + uint16_t ch0=0; + //mp_printf(&mp_plat_print, "1\n"); + + int kmodel_type=sipeed_kpu_model_get_type(kpu_net->kmodel_ctx); + if(abs(kmodel_type)==3 || abs(kmodel_type)==4){ + if(sipeed_kpu_model_get_input_shape(kpu_net->kmodel_ctx, &w0, &h0, &ch0) != SIPEED_KPU_ERR_NONE) + { + mp_raise_ValueError("[MAIXPY]kpu: first layer not conv layer!\r\n"); + return mp_const_none; + } + } + if(check_img_format(arg_img, w0, h0, ch0, kmodel_type)) + { + mp_raise_ValueError("[MAIXPY]kpu: check img format err!\r\n"); + return mp_const_none; + } + /*****************************region prepare*************************************************/ + //mp_printf(&mp_plat_print, "2\n"); + + py_kpu_class_yolo_args_obj_t *net_args = MP_OBJ_TO_PTR(kpu_net->net_args); + py_kpu_class_yolo_region_layer_arg_t *rl_arg = net_args->rl_args; + region_layer_t kpu_detect_rl; + kpu_detect_rl.anchor_number = rl_arg->anchor_number; + kpu_detect_rl.branch_number = rl_arg->branch_number; + kpu_detect_rl.anchor = rl_arg->anchor; + kpu_detect_rl.threshold = rl_arg->threshold; + kpu_detect_rl.nms_value = rl_arg->nms_value; + int er = region_layer_init(&kpu_detect_rl, kpu_net->kmodel_ctx); + if(er) + { + //mp_printf(&mp_plat_print, "flag %d \n", er); + mp_raise_ValueError("[MAIXPY]kpu: region_layer_init err!\r\n"); + return mp_const_none; + } + /*************************************************************************************/ + //mp_printf(&mp_plat_print, "3\n"); + + g_ai_done_flag = 0; + sipeed_kpu_err_t ret = sipeed_kpu_model_run(kpu_net->kmodel_ctx, arg_img->pix_ai, K210_DMA_CH_KPU, ai_done, NULL); + //mp_printf(&mp_plat_print, "4\n"); + if(ret != SIPEED_KPU_ERR_NONE) + { + char* msg = get_kpu_err_str(ret); + mp_raise_msg(&mp_type_OSError, msg); + } + while (!g_ai_done_flag) + ; + g_ai_done_flag = 0; + //mp_printf(&mp_plat_print, "5\n"); + /****************************start region layer***************************************/ + static obj_info_t mpy_kpu_detect_info; + region_layer_run(&kpu_detect_rl, &mpy_kpu_detect_info); + //mp_printf(&mp_plat_print, "6\n"); + uint8_t obj_num = 0; + obj_num = mpy_kpu_detect_info.obj_number; + + if (obj_num > 0) + { + list_t out; + list_init(&out, sizeof(py_kpu_class_yolo2__list_link_data_t)); + + for (uint8_t index = 0; index < obj_num; index++) + { + py_kpu_class_yolo2__list_link_data_t lnk_data; + lnk_data.rect.x = mpy_kpu_detect_info.obj[index].x1; + lnk_data.rect.y = mpy_kpu_detect_info.obj[index].y1; + lnk_data.rect.w = mpy_kpu_detect_info.obj[index].x2 - mpy_kpu_detect_info.obj[index].x1; + lnk_data.rect.h = mpy_kpu_detect_info.obj[index].y2 - mpy_kpu_detect_info.obj[index].y1; + lnk_data.classid = mpy_kpu_detect_info.obj[index].classid; + lnk_data.value = mpy_kpu_detect_info.obj[index].prob; + + lnk_data.index = index; + lnk_data.objnum = obj_num; + list_push_back(&out, &lnk_data); + } + + mp_obj_list_t *objects_list = mp_obj_new_list(list_size(&out), NULL); + + for (size_t i = 0; list_size(&out); i++) + { + py_kpu_class_yolo2__list_link_data_t lnk_data; + list_pop_front(&out, &lnk_data); + + py_kpu_class_yolo2_find_obj_t *o = m_new_obj(py_kpu_class_yolo2_find_obj_t); + + o->base.type = &py_kpu_class_yolo2_find_type; + + o->x = mp_obj_new_int(lnk_data.rect.x); + o->y = mp_obj_new_int(lnk_data.rect.y); + o->w = mp_obj_new_int(lnk_data.rect.w); + o->h = mp_obj_new_int(lnk_data.rect.h); + o->classid = mp_obj_new_int(lnk_data.classid); + o->index = mp_obj_new_int(lnk_data.index); + o->value = mp_obj_new_float(lnk_data.value); + o->objnum = mp_obj_new_int(lnk_data.objnum); + + objects_list->items[i] = o; + } + region_layer_deinit(&kpu_detect_rl); + return objects_list; + } + else + { + region_layer_deinit(&kpu_detect_rl); + + return mp_const_none; + } + } + else + { + mp_raise_TypeError("[MAIXPY]kpu: kpu_net type error"); + return mp_const_false; + } + +} + +STATIC MP_DEFINE_CONST_FUN_OBJ_KW(py_kpu_class_run_yolo3_obj, 2, py_kpu_class_run_yolo3); + +/////////////////////////////////////////////////////////////////////////////// + typedef void (*call_net_arg_deinit)(mp_obj_t o); void call_deinit(call_net_arg_deinit call_back, mp_obj_t o) @@ -1818,7 +2060,9 @@ static const mp_map_elem_t globals_dict_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR_load_flash), (mp_obj_t)&py_kpu_class_load_flash_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_set_outputs), (mp_obj_t)&py_kpu_class_set_outputs_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_init_yolo2), (mp_obj_t)&py_kpu_class_init_yolo2_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_init_yolo3), (mp_obj_t)&py_kpu_class_init_yolo3_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_run_yolo2), (mp_obj_t)&py_kpu_class_run_yolo2_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_run_yolo3), (mp_obj_t)&py_kpu_class_run_yolo3_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_deinit), (mp_obj_t)&py_kpu_deinit_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_set_layers), (mp_obj_t)&py_kpu_set_layers_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_forward), (mp_obj_t)&py_kpu_forward_obj }, From 73615d2dd1c66dad1c033a6594c83bc436c4d095 Mon Sep 17 00:00:00 2001 From: AIWintermuteAI Date: Thu, 9 Dec 2021 21:02:23 +0100 Subject: [PATCH 2/3] added support for yolov3 --- .../kendryte_sdk/include/sipeed_yolo2.h | 7 +- components/kendryte_sdk/src/sipeed_yolo2.c | 114 ++++++++---------- .../micropython/port/src/Maix/Maix_kpu.c | 43 ++++--- 3 files changed, 75 insertions(+), 89 deletions(-) diff --git a/components/kendryte_sdk/include/sipeed_yolo2.h b/components/kendryte_sdk/include/sipeed_yolo2.h index 6d1627c6e..716b638bf 100644 --- a/components/kendryte_sdk/include/sipeed_yolo2.h +++ b/components/kendryte_sdk/include/sipeed_yolo2.h @@ -42,9 +42,7 @@ typedef struct float bias; void *boxes; //uint8_t *input; - float *output; - float *output0; - float *output1; + float *output[2]; float *probs_buf; float **probs; float *activate; @@ -53,12 +51,9 @@ typedef struct typedef void (*callback_draw_box)(uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2, uint32_t class, float prob);; -//uint8_t _branch; - int region_layer_init(region_layer_t *rl, void* ctx); void region_layer_deinit(region_layer_t *rl); void region_layer_run(region_layer_t *rl, obj_info_t *obj_info); void region_layer_draw_boxes(region_layer_t *rl, callback_draw_box callback); - #endif diff --git a/components/kendryte_sdk/src/sipeed_yolo2.c b/components/kendryte_sdk/src/sipeed_yolo2.c index a7ba39551..c9bd8be7b 100644 --- a/components/kendryte_sdk/src/sipeed_yolo2.c +++ b/components/kendryte_sdk/src/sipeed_yolo2.c @@ -2,11 +2,8 @@ #include "sipeed_kpu.h" #include #include -#include #include "printf.h" -// #include "lcd.h" - /* start of region_layer.c*/ typedef struct @@ -24,7 +21,6 @@ typedef struct float **probs; } __attribute__((aligned(8))) sortable_box_t; - uint8_t _branch; int region_layer_init(region_layer_t *rl, void* ctx) @@ -35,7 +31,6 @@ int region_layer_init(region_layer_t *rl, void* ctx) size_t size; int kmodel_type=sipeed_kpu_model_get_type(ctx); - if(sipeed_kpu_model_get_input_shape(ctx, &wi, &hi, &chi) != SIPEED_KPU_ERR_NONE) { // mp_printf(&mp_plat_print, "[MAIXPY]rl: first layer not conv layer!\r\n"); @@ -51,9 +46,6 @@ int region_layer_init(region_layer_t *rl, void* ctx) } } - //printf("%d %d %d \r\n", wo[0], ho[0], cho[0]); - //printf("%d %d %d \r\n", wo[1], ho[1], cho[1]); - rl->coords = 4; rl->image_width = wi; rl->image_height = hi; @@ -64,34 +56,14 @@ int region_layer_init(region_layer_t *rl, void* ctx) for (uint8_t i = 0; i < rl->branch_number; i++) { - rl->layer_width[i] = wo[i]; - rl->layer_height[i] = ho[i]; - rl->boxes_number += (rl->layer_width[i] * rl->layer_height[i] * rl->anchor_number); - rl->wh[i] = rl->layer_width[i] * rl->layer_height[i]; - //sipeed_kpu_get_output(ctx, i, &(rl->output[i]), &size); - //printf("%d size \r\n", size); + rl->layer_width[i] = wo[i]; + rl->layer_height[i] = ho[i]; + rl->boxes_number += (rl->layer_width[i] * rl->layer_height[i] * rl->anchor_number); + rl->wh[i] = rl->layer_width[i] * rl->layer_height[i]; + sipeed_kpu_get_output(ctx, i, &rl->output[i], &size); } - sipeed_kpu_get_output(ctx, 0, &(rl->output0), &size); - - - //for (uint32_t i = 0; i < size; i++) - //{ - //printf("%f output1 \r\n", rl->output[0][i]); - //} - - sipeed_kpu_get_output(ctx, 1, &(rl->output1), &size); - - rl->output = rl->output0; - _branch = 0; - - //printf("%d anchor num \r\n", rl->anchor_number); - //printf("%d box num \r\n", rl->boxes_number); - - //_branch = 0; - - //rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1)); - + rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1)); //module output -> rl output //mp_printf(&mp_plat_print, "size=%ld\r\n",size); @@ -108,6 +80,7 @@ int region_layer_init(region_layer_t *rl, void* ctx) flag = -1; goto malloc_error; }*/ + rl->boxes = malloc(rl->boxes_number * sizeof(box_t)); if (rl->boxes == NULL) { @@ -126,6 +99,7 @@ int region_layer_init(region_layer_t *rl, void* ctx) flag = -4; goto malloc_error; } + /*rl->activate = malloc(256 * sizeof(float)); if (rl->activate == NULL) { @@ -143,12 +117,12 @@ int region_layer_init(region_layer_t *rl, void* ctx) rl->activate[i] = 1.0 / (1.0 + expf(-(i * rl->scale + rl->bias))); rl->softmax[i] = expf(rl->scale * (i - 255)); }*/ + for (uint32_t i = 0; i < rl->boxes_number; i++) { rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]); } - return 0; malloc_error: //free(rl->output); @@ -172,11 +146,13 @@ void region_layer_deinit(region_layer_t *rl) static void activate_array(region_layer_t *rl, int index, int n) { - float *output = &rl->output[index]; + float *output = &rl->output[_branch][index]; //uint8_t *input = &rl->input[index]; for (int i = 0; i < n; ++i) + { output[i] = 1.0 / (1.0 + expf(-output[i]));//rl->activate[input[i]]; + } } static int entry_index(region_layer_t *rl, int location, int entry) @@ -231,8 +207,6 @@ static void forward_region_layer(region_layer_t *rl) //for (index = 0; index < rl->output_number; index++) // rl->output[index] = rl->input[index] * rl->scale + rl->bias; - - for (int n = 0; n < rl->anchor_number; ++n) { index = entry_index(rl, n * rl->layer_width[_branch] * rl->layer_height[_branch], 0); @@ -240,15 +214,20 @@ static void forward_region_layer(region_layer_t *rl) index = entry_index(rl, n * rl->layer_width[_branch] * rl->layer_height[_branch], 4); activate_array(rl, index, rl->layer_width[_branch] * rl->layer_height[_branch]); + if (rl->ver == 3) + { index = entry_index(rl, n * rl->layer_width[_branch] * rl->layer_height[_branch], 5); activate_array(rl, index, rl->classes * rl->layer_width[_branch] * rl->layer_height[_branch]); + } + } + + if (rl->ver == 2) + { + index = entry_index(rl, 0, rl->coords + 1); + softmax_cpu(rl->output[_branch] + index, rl->classes, rl->anchor_number,\ + rl->output_number / rl->anchor_number, rl->layer_width[_branch] * rl->layer_height[_branch],\ + rl->layer_width[_branch] * rl->layer_height[_branch]); } - /* - index = entry_index(rl, 0, rl->coords + 1, i); - softmax_cpu(rl->output + index, rl->classes, rl->anchor_number,\ - rl->output_number / rl->anchor_number, rl->layer_width[i] * rl->layer_height[i],\ - rl->layer_width[i] * rl->layer_height[i]); - */ } static void correct_region_boxes(region_layer_t *rl, box_t *boxes) @@ -286,14 +265,27 @@ static void correct_region_boxes(region_layer_t *rl, box_t *boxes) } } -static box_t get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride) +static box_t get_region_box(float *x, float *biases, int n, int index, + int i, int j, int w, int h, + int stride, region_layer_t *rl) { volatile box_t b; b.x = (i + x[index + 0 * stride]) / w; b.y = (j + x[index + 1 * stride]) / h; - b.w = expf(x[index + 2 * stride]) * biases[2 * n + 6 * _branch]; - b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1 + 6 * _branch]; + + if (rl->ver == 2) + { + b.w = expf(x[index + 2 * stride]) * biases[2 * n + rl->anchor_number * 2 * _branch] / w; + b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1 + rl->anchor_number * 2 * _branch] / h; + } + + if (rl->ver == 3) + { + b.w = expf(x[index + 2 * stride]) * biases[2 * n + rl->anchor_number * 2 * _branch]; + b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1 + rl->anchor_number * 2 * _branch]; + } + return b; } @@ -315,10 +307,8 @@ static void get_region_boxes(region_layer_t *rl, float *predictions, float **pro for (int n = 0; n < anchor_number; ++n) { - - int index = n * layer_width * layer_height + i + 240 * _branch; - //printf("i %d n %d b %d \r\n", i, n, _branch); - //printf("index %d \r\n", index); + int index = n * layer_width * layer_height + i + + (rl->layer_width[_branch - 1] * rl->layer_height[_branch - 1] * rl->anchor_number) * _branch; for (int j = 0; j < classes; ++j) probs[index][j] = 0; @@ -326,10 +316,10 @@ static void get_region_boxes(region_layer_t *rl, float *predictions, float **pro int obj_index = entry_index(rl, n * layer_width * layer_height + i, coords); int box_index = entry_index(rl, n * layer_width * layer_height + i, 0); float scale = predictions[obj_index]; - //printf("scale %f \r\n", scale); - boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row, - layer_width, layer_height, layer_width * layer_height); + boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, + col, row, layer_width, layer_height, + layer_width * layer_height, rl); float max = 0; @@ -337,14 +327,12 @@ static void get_region_boxes(region_layer_t *rl, float *predictions, float **pro { int class_index = entry_index(rl, n * layer_width * layer_height + i, coords + 1 + j); - //(rl->coords + rl->classes + 1) * i + 5 float prob = scale * predictions[class_index]; probs[index][j] = (prob > threshold) ? prob : 0; if (prob > max) max = prob; } - //printf("anchor loop 6 \r\n" ); probs[index][classes] = max; } } @@ -482,22 +470,14 @@ static void region_layer_output(region_layer_t *rl, obj_info_t *obj_info) void region_layer_run(region_layer_t *rl, obj_info_t *obj_info) { - //printf("run \r\n"); - - for (uint8_t i = 0; i < rl->branch_number; i++) + for (_branch = 0; _branch < rl->branch_number; _branch++) { - forward_region_layer(rl); - //printf("run1 \r\n"); - get_region_boxes(rl, rl->output, rl->probs, rl->boxes); - _branch = 1; - rl->output = rl->output1; + forward_region_layer(rl); + get_region_boxes(rl, rl->output[_branch], rl->probs, rl->boxes); } - //printf("run2 \r\n"); do_nms_sort(rl, rl->boxes, rl->probs); - //printf("run3 \r\n"); region_layer_output(rl, obj_info); - //printf("run4 \r\n"); } void region_layer_draw_boxes(region_layer_t *rl, callback_draw_box callback) diff --git a/components/micropython/port/src/Maix/Maix_kpu.c b/components/micropython/port/src/Maix/Maix_kpu.c index 86666f34e..789685396 100644 --- a/components/micropython/port/src/Maix/Maix_kpu.c +++ b/components/micropython/port/src/Maix/Maix_kpu.c @@ -541,7 +541,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_KW(py_kpu_class_set_outputs_obj, 4, py_kpu_class_ typedef struct py_kpu_class_yolo_args_obj { mp_obj_base_t base; - mp_obj_t threshold, nms_value, anchor_number, branch_number, anchor, rl_args; + mp_obj_t threshold, nms_value, anchor_number, branch_number, ver, anchor, rl_args; } __attribute__((aligned(8))) py_kpu_class_yolo_args_obj_t; typedef struct py_kpu_class_region_layer_arg @@ -550,6 +550,7 @@ typedef struct py_kpu_class_region_layer_arg float nms_value; int anchor_number; int branch_number; + int ver; float *anchor; }__attribute__((aligned(8))) py_kpu_class_yolo_region_layer_arg_t; @@ -754,6 +755,7 @@ STATIC mp_obj_t py_kpu_class_init_yolo2(size_t n_args, const mp_obj_t *pos_args, yolo_args->threshold = mp_obj_new_float(threshold); yolo_args->nms_value = mp_obj_new_float(nms_value); yolo_args->anchor_number = mp_obj_new_int(anchor_number); + yolo_args->branch_number = 1; mp_obj_t *tuple, *tmp; @@ -774,7 +776,9 @@ STATIC mp_obj_t py_kpu_class_init_yolo2(size_t n_args, const mp_obj_t *pos_args, rl_arg->threshold = threshold; rl_arg->nms_value = nms_value; rl_arg->anchor_number = anchor_number; + rl_arg->branch_number = 1; rl_arg->anchor = anchor; + rl_arg->ver = 2; yolo_args->rl_args = MP_OBJ_FROM_PTR(rl_arg); @@ -834,10 +838,15 @@ STATIC mp_obj_t py_kpu_class_init_yolo3(size_t n_args, const mp_obj_t *pos_args, return mp_const_false; } - branch_number = args[ARG_branch_number].u_int; anchor_number = args[ARG_anchor_number].u_int; + if(!(branch_number >= 1 && threshold <= 2)) + { + mp_raise_ValueError("[MAIXPY]kpu: branch_number only supports 1 to 2"); + return mp_const_false; + } + if(anchor_number > 0) { //need free @@ -886,6 +895,7 @@ STATIC mp_obj_t py_kpu_class_init_yolo3(size_t n_args, const mp_obj_t *pos_args, rl_arg->anchor_number = anchor_number; rl_arg->branch_number = branch_number; rl_arg->anchor = anchor; + rl_arg->ver = 3; yolo_args->rl_args = MP_OBJ_FROM_PTR(rl_arg); @@ -1019,9 +1029,12 @@ STATIC mp_obj_t py_kpu_class_run_yolo2(size_t n_args, const mp_obj_t *pos_args, py_kpu_class_yolo_region_layer_arg_t *rl_arg = net_args->rl_args; region_layer_t kpu_detect_rl; kpu_detect_rl.anchor_number = rl_arg->anchor_number; + kpu_detect_rl.branch_number = rl_arg->branch_number; + kpu_detect_rl.ver = rl_arg->ver; kpu_detect_rl.anchor = rl_arg->anchor; kpu_detect_rl.threshold = rl_arg->threshold; kpu_detect_rl.nms_value = rl_arg->nms_value; + if(region_layer_init(&kpu_detect_rl, kpu_net->kmodel_ctx)) { mp_raise_ValueError("[MAIXPY]kpu: region_layer_init err!\r\n"); @@ -1116,10 +1129,9 @@ STATIC mp_obj_t py_kpu_class_run_yolo3(size_t n_args, const mp_obj_t *pos_args, image_t *arg_img = py_image_cobj(pos_args[1]); //PY_ASSERT_TRUE_MSG(IM_IS_MUTABLE(arg_img), "Image format is not supported."); - uint16_t w0=0; - uint16_t h0=0; - uint16_t ch0=0; - //mp_printf(&mp_plat_print, "1\n"); + uint16_t w0 = 0; + uint16_t h0 = 0; + uint16_t ch0 = 0; int kmodel_type=sipeed_kpu_model_get_type(kpu_net->kmodel_ctx); if(abs(kmodel_type)==3 || abs(kmodel_type)==4){ @@ -1135,29 +1147,28 @@ STATIC mp_obj_t py_kpu_class_run_yolo3(size_t n_args, const mp_obj_t *pos_args, return mp_const_none; } /*****************************region prepare*************************************************/ - //mp_printf(&mp_plat_print, "2\n"); py_kpu_class_yolo_args_obj_t *net_args = MP_OBJ_TO_PTR(kpu_net->net_args); py_kpu_class_yolo_region_layer_arg_t *rl_arg = net_args->rl_args; region_layer_t kpu_detect_rl; kpu_detect_rl.anchor_number = rl_arg->anchor_number; kpu_detect_rl.branch_number = rl_arg->branch_number; + kpu_detect_rl.ver = rl_arg->ver; kpu_detect_rl.anchor = rl_arg->anchor; kpu_detect_rl.threshold = rl_arg->threshold; kpu_detect_rl.nms_value = rl_arg->nms_value; + int er = region_layer_init(&kpu_detect_rl, kpu_net->kmodel_ctx); if(er) { - //mp_printf(&mp_plat_print, "flag %d \n", er); - mp_raise_ValueError("[MAIXPY]kpu: region_layer_init err!\r\n"); + mp_raise_ValueError("[MAIXPY]kpu: region_layer_init err! \r\n"); return mp_const_none; } /*************************************************************************************/ - //mp_printf(&mp_plat_print, "3\n"); g_ai_done_flag = 0; sipeed_kpu_err_t ret = sipeed_kpu_model_run(kpu_net->kmodel_ctx, arg_img->pix_ai, K210_DMA_CH_KPU, ai_done, NULL); - //mp_printf(&mp_plat_print, "4\n"); + if(ret != SIPEED_KPU_ERR_NONE) { char* msg = get_kpu_err_str(ret); @@ -1166,11 +1177,11 @@ STATIC mp_obj_t py_kpu_class_run_yolo3(size_t n_args, const mp_obj_t *pos_args, while (!g_ai_done_flag) ; g_ai_done_flag = 0; - //mp_printf(&mp_plat_print, "5\n"); + /****************************start region layer***************************************/ static obj_info_t mpy_kpu_detect_info; region_layer_run(&kpu_detect_rl, &mpy_kpu_detect_info); - //mp_printf(&mp_plat_print, "6\n"); + uint8_t obj_num = 0; obj_num = mpy_kpu_detect_info.obj_number; @@ -2058,11 +2069,11 @@ static const mp_map_elem_t globals_dict_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR_kpu) }, { MP_OBJ_NEW_QSTR(MP_QSTR_load), (mp_obj_t)&py_kpu_class_load_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_load_flash), (mp_obj_t)&py_kpu_class_load_flash_obj }, - { MP_OBJ_NEW_QSTR(MP_QSTR_set_outputs), (mp_obj_t)&py_kpu_class_set_outputs_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_set_outputs), (mp_obj_t)&py_kpu_class_set_outputs_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_init_yolo2), (mp_obj_t)&py_kpu_class_init_yolo2_obj }, - { MP_OBJ_NEW_QSTR(MP_QSTR_init_yolo3), (mp_obj_t)&py_kpu_class_init_yolo3_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_init_yolo3), (mp_obj_t)&py_kpu_class_init_yolo3_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_run_yolo2), (mp_obj_t)&py_kpu_class_run_yolo2_obj }, - { MP_OBJ_NEW_QSTR(MP_QSTR_run_yolo3), (mp_obj_t)&py_kpu_class_run_yolo3_obj }, + { MP_OBJ_NEW_QSTR(MP_QSTR_run_yolo3), (mp_obj_t)&py_kpu_class_run_yolo3_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_deinit), (mp_obj_t)&py_kpu_deinit_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_set_layers), (mp_obj_t)&py_kpu_set_layers_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_forward), (mp_obj_t)&py_kpu_forward_obj }, From c019d44a0a4d2d1b081f061f6778159ce4bcba4d Mon Sep 17 00:00:00 2001 From: Dmitry Maslov Date: Thu, 3 Apr 2025 15:37:47 +0200 Subject: [PATCH 3/3] update submodule path --- tools/flash/kflash_py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/flash/kflash_py b/tools/flash/kflash_py index 1ef6f4c0b..550828c76 160000 --- a/tools/flash/kflash_py +++ b/tools/flash/kflash_py @@ -1 +1 @@ -Subproject commit 1ef6f4c0b2cb8b1872b6ffe9337f4e02d5487fa6 +Subproject commit 550828c768b16ef329695d3f5eace3f6bcf14af2