/***************************************************************************//**
 * @file memx_c_example2.cpp
 * @brief Example 2: Multiple Models Coexist
 *
 * Two models are compiled in the same DFP file and coexist, which means two
 * models have no hardware resources overlapped.
 *
 * @note
 * Copyright (C) 2019-2022 MemryX Limited. All rights reserved.
 *
 ******************************************************************************/
#include <stdint.h>
#include <stdio.h>
#include <pthread.h>
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/opencv.hpp>
#include "memx/memx.h"

using namespace cv;

// Model: MobileNet v1 + MobileNet v2
#define MODEL_ID (0)
// MPU device group 0
#define GROUP_ID (0)

// Since two models share the same procedure to run inference, we make it a
// common sub-routine with structure of parameters here.
typedef struct  {
  uint8_t model_id; // model ID
  uint8_t group_id; // MPU device group ID
  uint8_t iport; // input port ID
  void* ifmap; // input feature map
  uint8_t oport; // output port ID
  void* ofmap; // output feature map
} RunInferenceConfig;

// Simple frame-in-frame-out inference. Two models can run inference
// simultaneously for they are using different ports.
memx_status run_inference(RunInferenceConfig* config)
{
  memx_status status = MEMX_STATUS_OK;
  const int timeout = 200; // 200 ms

  // 1. Enable data transfer of both models to device.
  if (memx_status_no_error(status)) {
    status = memx_set_stream_enable(config->model_id, 0);
  }
  // 2. Write input feature map to device to run inference
  if (memx_status_no_error(status)) {
    status = memx_stream_ifmap(config->model_id,
      config->iport, config->ifmap, timeout);
  }
  // 3. Read output feature map from device after inference
  if (memx_status_no_error(status)) {
    status = memx_stream_ofmap(config->model_id,
      config->oport, config->ofmap, timeout);
  }
  // 4. Disable data transfer of this model to device.
  if (memx_status_no_error(status)) {
    status = memx_set_stream_disable(config->model_id, 0);
  }

  return status;
}

// Model 0 run inference, this sub-routine runs in background thread
void* run_inference_model_0(void* arg)
{
  // Assumes input feature map uses only flow 0 as format float32(224,224,3)
  float* ifmap;
  // Assumes output feature map uses only flow 0 as format float32(1,1,1000)
  float ofmap[1*1*1000]; // allocate memory space

  // 1. Pre-process input feature map
  Mat img = imread("image.png", IMREAD_COLOR);
  cv::resize(img, img, cv::Size(224,224), 0, 0, CV_INTER_LINEAR);
  img.convertTo(img, CV_32F, 1.0/127.5, -1);
  ifmap = (float*)img.data;

  // 2. Run inference setup
  RunInferenceConfig config;
  config.model_id = MODEL_ID; // model 0
  config.group_id = GROUP_ID; // device 0
  config.iport = 0; // input port 0 (flow 0)
  config.ifmap = ifmap; // input feature map
  config.oport = 0; // output port 0 (flow 0)
  config.ofmap = ofmap; // output feature map

  // 3. Run inference common sub-routine
  memx_status status = run_inference(&config);

  // 4. Post-process output feature map
  if (memx_status_no_error(status)) {
    int argmax = 0;
    for (int i = 1; i < 1000; ++i) {
      argmax = (ofmap[i] > ofmap[argmax]) ? i : argmax;
    }
    printf(" - Model 0 argmax = %d\n", argmax);
  } else {
    printf(" - Model 0 failed to run inference = %d\n", status);
  }

  return NULL;
}

// Model 1 run inference, this sub-routine runs in background thread
void* run_inference_model_1(void* arg)
{
  // Assumes input feature map uses only flow 0 as format float32(224,224,3)
  float* ifmap;
  // Assumes output feature map uses only flow 0 as format float32(1,1,1000)
  float ofmap[1*1*1000]; // allocate memory space

  // 1. Pre-process input feature map
  Mat img = imread("images/siamese_cat.jpg", IMREAD_COLOR);
  cv::resize(img, img, cv::Size(224,224), 0, 0, CV_INTER_LINEAR);
  img.convertTo(img, CV_32F, 1.0/127.5, -1);
  ifmap = (float*)img.data;

  // 2. Run inference setup
  RunInferenceConfig config;
  config.model_id = MODEL_ID; // model 0
  config.group_id = GROUP_ID; // device 0
  config.iport = 1; // input port 1 (flow 1)
  config.ifmap = ifmap; // input feature map
  config.oport = 1; // output port 1 (flow 1)
  config.ofmap = ofmap; // output feature map

  // 3. Run inference common sub-routine
  memx_status status = run_inference(&config);

  // 4. Post-process output feature map
  if (memx_status_no_error(status)) {
    int argmax = 0;
    for (int i = 1; i < 1000; ++i) {
      argmax = (ofmap[i] > ofmap[argmax]) ? i : argmax;
    }
    printf(" - Model 1 argmax = %d\n", argmax);
  } else {
    printf(" - Model 1 failed to run inference = %d\n", status);
  }

  return NULL;
}

// Main process, create two threads to run inferences in parallel.
int main(void) {
  memx_status status = MEMX_STATUS_OK;
  pthread_t t0, t1;

  // 1. Bind MPU device group 0 as MX3:Cascade to model.
  if (memx_status_no_error(status)) {
    status = memx_open(MODEL_ID, GROUP_ID, MEMX_DEVICE_CASCADE);
  }

  // 2. Download weight memory and model to device. Because two models are
  // compiled together in one DFP file and coexist with no hardware resources
  // overlapped, we only need to download to device once.
  if (memx_status_no_error(status)) {
    status = memx_download_model(MODEL_ID,
      "models/mobilenet_v1_v2.dfp", 0, // model_idx = 0
      MEMX_DOWNLOAD_TYPE_WTMEM_AND_MODEL);
  }

  // 3. Run two models simultaneously using posix threads (Linux only)
  if (memx_status_no_error(status)) {
    if ((pthread_create(&t0, NULL, &run_inference_model_0, NULL) != 0)
      ||(pthread_create(&t1, NULL, &run_inference_model_1, NULL) != 0)) {
      status = MEMX_STATUS_OTHERS;
    }
  }
  if (memx_status_no_error(status)) {
    pthread_join(t0, NULL);
    pthread_join(t1, NULL);
  }

  // 4. Always remember to clean-up resources before leaving.
  memx_close(MODEL_ID);

  // End of process
  if (memx_status_no_error(status)) {
    printf("success.\n");
  } else {
    printf("failure.\n");
  }
  return 0;
}