TensorRT打印网络推理时间

  1. 实现profiler类

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    class Profiler : public nvinfer1::IProfiler
    {
    public:
    void printLayerTimes(int itrationsTimes)
    {
    float totalTime = 0;
    for (size_t i = 0; i < mProfile.size(); i++)
    {
    printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes);
    totalTime += mProfile[i].second;
    }
    printf("+++++++++++++iteration: %d", itrationsTimes);
    printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes);
    }
    private:
    typedef std::pair<std::string, float> Record;
    std::vector<Record> mProfile;

    virtual void reportLayerTime(const char* layerName, float ms)
    {
    auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
    if (record == mProfile.end())
    mProfile.push_back(std::make_pair(layerName, ms));
    else
    record->second += ms;
    }
    };
  2. 注册profiler类

    实例化1中的类,并将其注册到context中

    1
    2
    3
    IExecutionContext* context = engine->createExecutionContext();
    cuda_context_ = context;
    cuda_context_->setProfiler(&mTrtProfiler);
  3. 调用打印时间

    1
    2
    p_Net->run(image);
    p_Net->printTime();