/***************************************************************************//** * @file memx_c_example2.cpp * @brief Example 2: Multiple Models Coexist * * Two models are compiled in the same DFP file and coexist, which means two * models have no hardware resources overlapped. * * @note * Copyright (C) 2019-2022 MemryX Limited. All rights reserved. * ******************************************************************************/ #include #include #include #include #include #include #include "memx/memx.h" using namespace cv; // Model: MobileNet v1 + MobileNet v2 #define MODEL_ID (0) // MPU device group 0 #define GROUP_ID (0) // Since two models share the same procedure to run inference, we make it a // common sub-routine with structure of parameters here. typedef struct { uint8_t model_id; // model ID uint8_t group_id; // MPU device group ID uint8_t iport; // input port ID void* ifmap; // input feature map uint8_t oport; // output port ID void* ofmap; // output feature map } RunInferenceConfig; // Simple frame-in-frame-out inference. Two models can run inference // simultaneously for they are using different ports. memx_status run_inference(RunInferenceConfig* config) { memx_status status = MEMX_STATUS_OK; const int timeout = 200; // 200 ms // 1. Enable data transfer of both models to device. if (memx_status_no_error(status)) { status = memx_set_stream_enable(config->model_id, 0); } // 2. Write input feature map to device to run inference if (memx_status_no_error(status)) { status = memx_stream_ifmap(config->model_id, config->iport, config->ifmap, timeout); } // 3. Read output feature map from device after inference if (memx_status_no_error(status)) { status = memx_stream_ofmap(config->model_id, config->oport, config->ofmap, timeout); } // 4. Disable data transfer of this model to device. if (memx_status_no_error(status)) { status = memx_set_stream_disable(config->model_id, 0); } return status; } // Model 0 run inference, this sub-routine runs in background thread void* run_inference_model_0(void* arg) { // Assumes input feature map uses only flow 0 as format float32(224,224,3) float* ifmap; // Assumes output feature map uses only flow 0 as format float32(1,1,1000) float ofmap[1*1*1000]; // allocate memory space // 1. Pre-process input feature map Mat img = imread("image.png", IMREAD_COLOR); cv::resize(img, img, cv::Size(224,224), 0, 0, CV_INTER_LINEAR); img.convertTo(img, CV_32F, 1.0/127.5, -1); ifmap = (float*)img.data; // 2. Run inference setup RunInferenceConfig config; config.model_id = MODEL_ID; // model 0 config.group_id = GROUP_ID; // device 0 config.iport = 0; // input port 0 (flow 0) config.ifmap = ifmap; // input feature map config.oport = 0; // output port 0 (flow 0) config.ofmap = ofmap; // output feature map // 3. Run inference common sub-routine memx_status status = run_inference(&config); // 4. Post-process output feature map if (memx_status_no_error(status)) { int argmax = 0; for (int i = 1; i < 1000; ++i) { argmax = (ofmap[i] > ofmap[argmax]) ? i : argmax; } printf(" - Model 0 argmax = %d\n", argmax); } else { printf(" - Model 0 failed to run inference = %d\n", status); } return NULL; } // Model 1 run inference, this sub-routine runs in background thread void* run_inference_model_1(void* arg) { // Assumes input feature map uses only flow 0 as format float32(224,224,3) float* ifmap; // Assumes output feature map uses only flow 0 as format float32(1,1,1000) float ofmap[1*1*1000]; // allocate memory space // 1. Pre-process input feature map Mat img = imread("images/siamese_cat.jpg", IMREAD_COLOR); cv::resize(img, img, cv::Size(224,224), 0, 0, CV_INTER_LINEAR); img.convertTo(img, CV_32F, 1.0/127.5, -1); ifmap = (float*)img.data; // 2. Run inference setup RunInferenceConfig config; config.model_id = MODEL_ID; // model 0 config.group_id = GROUP_ID; // device 0 config.iport = 1; // input port 1 (flow 1) config.ifmap = ifmap; // input feature map config.oport = 1; // output port 1 (flow 1) config.ofmap = ofmap; // output feature map // 3. Run inference common sub-routine memx_status status = run_inference(&config); // 4. Post-process output feature map if (memx_status_no_error(status)) { int argmax = 0; for (int i = 1; i < 1000; ++i) { argmax = (ofmap[i] > ofmap[argmax]) ? i : argmax; } printf(" - Model 1 argmax = %d\n", argmax); } else { printf(" - Model 1 failed to run inference = %d\n", status); } return NULL; } // Main process, create two threads to run inferences in parallel. int main(void) { memx_status status = MEMX_STATUS_OK; pthread_t t0, t1; // 1. Bind MPU device group 0 as MX3:Cascade to model. if (memx_status_no_error(status)) { status = memx_open(MODEL_ID, GROUP_ID, MEMX_DEVICE_CASCADE); } // 2. Download weight memory and model to device. Because two models are // compiled together in one DFP file and coexist with no hardware resources // overlapped, we only need to download to device once. if (memx_status_no_error(status)) { status = memx_download_model(MODEL_ID, "models/mobilenet_v1_v2.dfp", 0, // model_idx = 0 MEMX_DOWNLOAD_TYPE_WTMEM_AND_MODEL); } // 3. Run two models simultaneously using posix threads (Linux only) if (memx_status_no_error(status)) { if ((pthread_create(&t0, NULL, &run_inference_model_0, NULL) != 0) ||(pthread_create(&t1, NULL, &run_inference_model_1, NULL) != 0)) { status = MEMX_STATUS_OTHERS; } } if (memx_status_no_error(status)) { pthread_join(t0, NULL); pthread_join(t1, NULL); } // 4. Always remember to clean-up resources before leaving. memx_close(MODEL_ID); // End of process if (memx_status_no_error(status)) { printf("success.\n"); } else { printf("failure.\n"); } return 0; }