使用C++调用TensorFlow模型简单说明

之前都是使用python训练模型，使用python加载模型预测结果。正好遇到了一个需要使用C++加载模型预测结果的需求，趁这个机会学习一下相关的流程。

对于C++的TensorFlow Api，官方文档介绍只能通过bazel编译使用。所以先将介绍如何使用bazel来编译调用TensorFlow模型的C++代码。

一、准备工作

bazel 安装

bazel官方文档写的很清楚，可以根据自己的平台选择安装方法，我是Ubuntu系统，直接输入下列命令即可：

1 2	chmod +x bazel-<version>-installer-linux-x86_64.sh ./bazel-<version>-installer-linux-x86_64.sh --user

可以设置一下环境变量（并不是必须的，因为我执行完上面两个命令就可以了）：

1	export PATH="$PATH:$HOME/bin"

下载TensorFlow的源代码

因为使用bazel编译的方式需要TensorFlow的源代码，所以第一步需要下载好源代码：

1	git clone --recursive https://github.com/tensorflow/tensorflow

二、准备好模型部分

我们准备一个简单的实现 x * w的tf代码，不需要训练，只需要能输出结果，能够保存模型即可。

# train.py
import os
import sys
import numpy as np
import tensorflow as tf

if __name__ == '__main__':

    config = tf.ConfigProto(log_device_placement=False,allow_soft_placement=True)
    config.gpu_options.allow_growth = True

    train_dir = os.path.join('demo_model/', "demo")

    x = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='x')

    w = tf.Variable(tf.ones(shape=[2, 1], dtype=tf.int32), dtype=tf.int32, name='w')

    # a * w
    res = tf.matmul(x, w, name='res')

    with tf.Session(config=config) as sess:

        feed_dict = dict()
        feed_dict[x] = [[1, 2],[3, 4]]
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        # get results and save model
        res = sess.run(feed_dict=feed_dict, fetches=[res])
        saver.save(sess, train_dir)

        print("result: ", res[0])

运行上面的代码，可以得到 [[3],[7]]的输出结果，且在demo_model/下存好了对应的模型：

demo_model
├── checkpoint
├── demo.data-00000-of-00001
├── demo.index
└── demo.meta

接下来，我们写一个简单的python代码调用模型，保证模型没有问题：

import os
import tensorflow as tf

if __name__ == '__main__':

    config = tf.ConfigProto(log_device_placement=False,allow_soft_placement = True)
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:

        saver = tf.train.import_meta_graph('demo_model/demo.meta')
        saver.restore(sess, tf.train.latest_checkpoint('demo_model/'))

        graph = tf.get_default_graph()
        x = graph.get_tensor_by_name("x:0")

        feed_dict = dict()
        feed_dict[x] = [[1,2],[3,4]]

        op_to_restore = graph.get_tensor_by_name("res:0")
        print(sess.run(fetches=op_to_restore, feed_dict=feed_dict))

我们可以看到输出了与训练阶段相同的模型输出，证明model调起没什么问题。接下来就是比较困难的C++调用部分。

三、C++ 调用

C++ 代码

首先定义好graph和checkpoint的路径

1 2	const string pathToGraph = "yourpath/demo_model/demo.meta"; const string checkpointPath = "youpath/demo_model/demo";

新建session，如果新建没有成功则退出。

auto session = NewSession(SessionOptions());
if (session == nullptr)
{
    // throw runtime_error("Could not create Tensorflow session.");
    cout << "Could not create Tensorflow session." << endl;
    return;
}

加载存储的graph结构，如果加载不成功则退出。

MetaGraphDef graph_def;
status = ReadBinaryProto(Env::Default(), pathToGraph, &graph_def);
if (!status.ok())
{
    // throw runtime_error("Error reading graph definition from " + pathToGraph + ": " + status.ToString());
    cout << "Error reading graph" << endl;
    return 0;
}

根据载入的图结构以及之前创建的session对象新建一个session：

status = session->Create(graph_def.graph_def());
if (!status.ok())
{
    // throw runtime_error("Error creating graph: " + status.ToString());
    cout << "error creating graph" << endl;
    return 0;
}

载入存储的模型参数，通过session->Run的方式重新加载模型参数

Tensor checkpointPathTensor(DT_STRING, TensorShape());
checkpointPathTensor.scalar<std::string>()() = checkpointPath;
status = session->Run(
    {{ graph_def.saver_def().filename_tensor_name(), checkpointPathTensor },},
    {},
    {graph_def.saver_def().restore_op_name()},
    nullptr);
if (!status.ok())
{
    // throw runtime_error("Error loading checkpoint from " + checkpointPath + ": " + status.ToString());
    cout << "error loading checkpoint" << endl;
    return 0;
}

构造模型的输入数据，此处演示了一个batch_size为2的二维矩阵的输入。input相当于python版本中的feed_dict。

// input相当于python版本中的feed_dict。
std::vector<std::pair<string, Tensor>> input;
// 输入Tensor的shape
tensorflow::TensorShape inputshape({2, 2});
// 根据类型和shape新建Tensor
Tensor a(tensorflow::DT_INT32,inputshape);
// 得到类型为int，维度为2的模板实例，类似于Eigen中矩阵的用法
auto a_map = a.tensor<int,2>();
int count = 1;
for (int i=0; i<2; ++i){
    for (int j=0; j<2; ++j){
        a_map(i,j) = count++;

    }
}
// emplace_back用法类似于push_back,只是免去了构造结构体或类的麻烦
input.emplace_back(std::string("x"), a);

调用模型，获取输出结果。

// 结果是Tensor的向量
std::vector<tensorflow::Tensor> answer;
status = session->Run(input, {"res"}, {}, &answer);

Tensor result = answer[0];
// 获取类似矩阵的实例
auto result_map = result.tensor<int,2>();
cout<<"result: "<<result_map(0,0)<<endl;
cout<<"result: "<<result_map(1,0)<<endl;

其实大部分代码都是参考网友实现的，官方的API真的很晦涩难以阅读。目前来看，想用c++构造输入输出都很不灵活。整个c++文件如下：

#include <iostream>
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/protobuf/meta_graph.pb.h"
#include "tensorflow/cc/client/client_session.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/framework/tensor.h"

using namespace std;
using namespace tensorflow;

int main()
{
    const string pathToGraph = "yourpath/demo_model/demo.meta";
    const string checkpointPath = "yourpath/demo_model/demo";
    auto session = NewSession(SessionOptions());
    if (session == nullptr)
    {
        throw runtime_error("Could not create Tensorflow session.");
        //cout << "Could not create Tensorflow session." << endl;
    }

    Status status;

// 读入我们预先定义好的模型的计算图的拓扑结构
    MetaGraphDef graph_def;
    status = ReadBinaryProto(Env::Default(), pathToGraph, &graph_def);
    if (!status.ok())
    {
        // throw runtime_error("Error reading graph definition from " + pathToGraph + ": " + status.ToString());
        cout << "Error reading graph" << endl;
        return 0;
    }

// 利用读入的模型的图的拓扑结构构建一个session
    status = session->Create(graph_def.graph_def());
    if (!status.ok())
    {
        // throw runtime_error("Error creating graph: " + status.ToString());
        cout << "error creating graph" << endl;
        return 0;
    }

// 读入预先训练好的模型的权重
    Tensor checkpointPathTensor(DT_STRING, TensorShape());
    checkpointPathTensor.scalar<std::string>()() = checkpointPath;
    status = session->Run(
            {{ graph_def.saver_def().filename_tensor_name(), checkpointPathTensor },},
            {},
            {graph_def.saver_def().restore_op_name()},
            nullptr);
    if (!status.ok())
    {
        // throw runtime_error("Error loading checkpoint from " + checkpointPath + ": " + status.ToString());
        cout << "error loading checkpoint" << endl;
        return 0;
    }

    // input相当于python版本中的feed_dict。
    std::vector<std::pair<string, Tensor>> input;
    // 输入Tensor的shape
    tensorflow::TensorShape inputshape({2, 2});
    // 根据类型和shape新建Tensor
    Tensor a(tensorflow::DT_INT32,inputshape);
    // 得到类型为int，维度为2的模板实例，类似于Eigen中矩阵的用法
    auto a_map = a.tensor<int,2>();
    int count = 1;
    for (int i=0; i<2; ++i){
        for (int j=0; j<2; ++j){
            a_map(i,j) = count++;

        }
    }
    // emplace_back用法类似于push_back,只是免去了构造结构体或类的麻烦
    input.emplace_back(std::string("x"), a);

//   运行模型，并获取输出
    std::vector<tensorflow::Tensor> answer;
    status = session->Run(input, {"res"}, {}, &answer);

    Tensor result = answer[0];
    auto result_map = result.tensor<int,2>();
    cout<<"result: "<<result_map(0,0)<<endl;
    cout<<"result: "<<result_map(1,0)<<endl;

    return 0;
}

编译

可以在tensorflow/tensorflow文件夹下新建demo文件夹，将上面的C++代码放入该文件夹。同时新建一个BUILD文件，里面内容如下，这个bazel的类似于makefile的编译文件，主要定义目标的名字，源文件是什么，以及依赖。具体可以参考教材Introduction to Bazel: Building a C++ Project

load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
tf_cc_binary(
     name = "demo", #目标文件名
     srcs = ["call.cc"], #源代码文件名
     deps = [
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:client_session",
         "//tensorflow/core:tensorflow"
         ],
 )

回到仓库的根目录，执行下面的编译语句。//tensorflow/demo是BUILD文件的位置，demo表示目标文件的名字。

1	bazel build //tensorflow/demo:demo

第一次编译需要很久，之后就很快了。在bazel_bin/tensorflow/demo文件夹下会有编译好的demo可执行文件。在该目录下执行命令

./demo

即可得到模型计算出的答案[[3],[7]]。

使用GPU运行

首先，如果希望编译出的代码在运行时可以调用GPU运行，那么必须在重新配置TensorFlow(在仓库根目录下，运行命令./configure)，需要enable cuda，如下所示：

1	Do you wish to build TensorFlow with CUDA support? [y/N]: y

之后需要按照命令行提示填写cuda版本和路径，cudnn的版本和路径。配置完毕后，在使用bazel编译代码时，加上--config=cuda的参数，编译命令如下：

1	bazel build -c opt --config=cuda //tensorflow/demo:demo

再运行demo，则发现预测过程是在GPU上计算了。

四、指定GPU运行

上面阐述的方法主要是对metaGraphDef进行加载，然后再载入checkpoint的参数。这种方法有一个局限性，即无法在同一个程序中指定多个GPU。

一般来说，我们可以通过设置"CUDA_VISIBLE_DEVICES"环境变量的方式来指定模型需要使用的GPU，但是在C++部署的生产环境中，如果需要使用多个模型进行集成，可以并行使用多个GPU进行预测是较为理想的方案。所以，此时不能再通过设置"CUDA_VISIBLE_DEVICES"环境变量的方法来指定GPU了。

一般来说，C++中指定GPU有两种方法，一种是通过tensorflow::SessionOptions()进行设置，代码如下：

1 2	auto options = tensorflow::SessionOptions(); options.config.mutable_gpu_options()->set_visible_device_list("0");

这种方法的问题是这个设置目前是进程级别的设置，所以在单个进程中无法对多个模型指定不同的设备。相关可以参考https://github.com/tensorflow/tensorflow/issues/18861。

第二种方法是通过遍历图中节点，将图中的节点手动移动到指定的device中。实际上，TensorFlow也提供了相关的函数，如下所示

inline void SetDefaultDevice(const string& device, GraphDef* graph_def) {
   for (int i = 0; i < graph_def->node_size(); ++i) {
    auto node = graph_def->mutable_node(i);
    if (node->device().empty()) {
      node->set_device(device);
    }
 }
}

很遗憾，通过metaGraphDef获取的GraphDef是const的，所以无法对其进行修改。

最终，我们不读取metaGraphDef的方式，采用读取GraphDef的方式加载模型，而GraphDef对应的是pb格式的模型。

metaGraphDef和GraphDef的区别

待总结。先贴个博客Tensorflow框架实现中的“三”种图

如何存储pb格式模型

存储pb格式模型有两种方法：

使用convert_variables_to_constants，但是这个接口未来会被弃用

from tensorflow.python.framework.graph_util import convert_variables_to_constants

output_graph_def = convert_variables_to_constants(sess, sess.graph_def, output_node_names=["output/pred_y"])
with tf.gfile.FastGFile('models/graph_wh.pb', mode='wb') as f:
    f.write(output_graph_def.SerializeToString())

使用 freeze_graph.py

使用 freeze_graph.py可以将没有保存参数的GraphDef和离线存储的参数整合到一起，形成可以直接使用的模型。

1 2	saver.save(sess, "models/test") // 保存checkpoint文件 tf.train.write_graph(sess.graph.as_graph_def(), 'models/', 'graph.pb', as_text=False)

通过上面的代码保存了pb文件和checkpoint文件，然后通过 freeze_graph.py则可以固化模型。

1	python freeze_graph.py --input_graph "graph.pb" --input_checkpoint "test" --output_graph "graph_scrpit.pb" --output_node_names "output/pred_y" --input_binary=true

C++代码

#include <iostream>
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/protobuf/meta_graph.pb.h"
#include "tensorflow/cc/client/client_session.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/framework/tensor.h"

using namespace std;
using namespace tensorflow;

int main()
{
    const string graph_path = "yourpath/demo_model/demo.pb";
    tensorflow::GraphDef graph_def;
    SessionOptions session_options;
    session_options.config.mutable_gpu_options()->set_allow_growth(allow_growth);
    tensorflow::Session* session = NewSession(session_options);
    if (session == nullptr)
    {
        cout << "Could not create Tensorflow session." << endl;
    }

    Status status;
    // 读入我们预先定义好的模型的计算图的拓扑结构
    status = ReadBinaryProto(Env::Default(), graph_path, &graph_def);
    if (!status.ok())
    {
        cout << "Error reading graph" << endl;
    }
    string my_device = "/gpu:" + device;
    for (int i = 0; i < graph_def.node_size(); ++i) {
        auto node = graph_def.mutable_node(i);
        node->set_device(device);
    }

    // 利用读入的模型的图的拓扑结构构建一个session
    status = session->Create(graph_def);
    if (!status.ok())
    {
        cout << "Error creating graph" << endl;

    }

    // input相当于python版本中的feed_dict。
    std::vector<std::pair<string, Tensor>> input;
    // 输入Tensor的shape
    tensorflow::TensorShape inputshape({2, 2});
    // 根据类型和shape新建Tensor
    Tensor a(tensorflow::DT_INT32,inputshape);
    // 得到类型为int，维度为2的模板实例，类似于Eigen中矩阵的用法
    auto a_map = a.tensor<int,2>();
    int count = 1;
    for (int i=0; i<2; ++i){
        for (int j=0; j<2; ++j){
            a_map(i,j) = count++;

        }
    }
    // emplace_back用法类似于push_back,只是免去了构造结构体或类的麻烦
    input.emplace_back(std::string("x"), a);

//   运行模型，并获取输出
    std::vector<tensorflow::Tensor> answer;
    status = session->Run(input, {"res"}, {}, &answer);

    Tensor result = answer[0];
    auto result_map = result.tensor<int,2>();
    cout<<"result: "<<result_map(0,0)<<endl;
    cout<<"result: "<<result_map(1,0)<<endl;

    return 0;
}