升级 TF1 为 TF2#
import tensorflow as tf
try:
tf1 = tf.compat.v1
except (ImportError, AttributeError):
tf1 = tf
tf.get_logger().setLevel('ERROR')
2024-01-06 16:23:07.117814: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-06 16:23:07.160960: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-06 16:23:07.161006: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-06 16:23:07.161048: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-06 16:23:07.169897: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-06 16:23:07.170731: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-06 16:23:08.330236: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
temp_dir = ".temp" # 缓存目录
# log_dir = "/media/pc/data/lxw/ai/tasks/logs/tf1-tf2"
检查点迁移#
定义辅助函数:
def print_checkpoint(save_path, print_value=True, tab_size=10):
"""打印检查点信息"""
shape_size = max(tab_size, 20)
dtype_size = max(tab_size, 10)
reader = tf.train.load_checkpoint(save_path)
shapes = reader.get_variable_to_shape_map()
dtypes = reader.get_variable_to_dtype_map()
print(f"检查点: {save_path}")
tt = "key".ljust(tab_size)
tt += "\tshape".ljust(shape_size)
tt += "\tdtype".ljust(dtype_size)
tt += "\tvalue".ljust(tab_size)
print(tt)
print("="*tab_size*7)
for key in shapes:
tt = f"{key}".ljust(tab_size)
tt += f"\t{shapes[key]}".ljust(shape_size)
tt += f"\t{dtypes[key].name}".ljust(dtype_size)
if print_value:
tt += f"\t{reader.get_tensor(key)}".ljust(max(tab_size, 10))
print(tt)
先看 TF1 的例子:
with tf.Graph().as_default() as g:
a = tf1.get_variable('a', shape=[], dtype=tf.float32,
initializer=tf1.zeros_initializer())
b = tf1.get_variable('b', shape=[], dtype=tf.uint8,
initializer=tf1.zeros_initializer())
c = tf1.get_variable('scoped/c', shape=[], dtype=tf.uint8,
initializer=tf1.zeros_initializer())
with tf1.Session() as sess:
saver = tf1.train.Saver()
sess.run(a.assign(1))
sess.run(b.assign(2))
sess.run(c.assign(3))
saver.save(sess, f'{temp_dir}/tf1-ckpt')
print_checkpoint(f'{temp_dir}/tf1-ckpt')
检查点: .temp/tf1-ckpt
key shape dtype value
======================================================================
scoped/c [] uint8 3
b [] uint8 2
a [] float32 1.0
2024-01-06 16:23:09.741226: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2024-01-06 16:23:09.762581: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled
TF2 的例子:
a = tf.Variable(5.0, name='a')
b = tf.Variable(6.0, name='b')
with tf.name_scope('scoped2'):
c = tf.Variable(7.0, name='c')
ckpt = tf.train.Checkpoint(variables=[a, b, c])
save_path_v2 = ckpt.save(f'{temp_dir}/tf2-ckpt')
print_checkpoint(save_path_v2, tab_size=32)
检查点: .temp/tf2-ckpt-1
key shape dtype value
================================================================================================================================================================================================================================
variables/2/.ATTRIBUTES/VARIABLE_VALUE [] float32 7.0
variables/1/.ATTRIBUTES/VARIABLE_VALUE [] float32 6.0
variables/0/.ATTRIBUTES/VARIABLE_VALUE [] float32 5.0
save_counter/.ATTRIBUTES/VARIABLE_VALUE [] int64 1
_CHECKPOINTABLE_OBJECT_GRAPH [] string b"\n%\n\r\x08\x01\x12\tvariables\n\x10\x08\x02\x12\x0csave_counter*\x02\x08\x01\n\x19\n\x05\x08\x03\x12\x010\n\x05\x08\x04\x12\x011\n\x05\x08\x05\x12\x012*\x02\x08\x01\nM\x12G\n\x0eVARIABLE_VALUE\x12\x0csave_counter\x1a'save_counter/.ATTRIBUTES/VARIABLE_VALUE*\x02\x08\x01\nA\x12;\n\x0eVARIABLE_VALUE\x12\x01a\x1a&variables/0/.ATTRIBUTES/VARIABLE_VALUE*\x02\x08\x01\nA\x12;\n\x0eVARIABLE_VALUE\x12\x01b\x1a&variables/1/.ATTRIBUTES/VARIABLE_VALUE*\x02\x08\x01\nI\x12C\n\x0eVARIABLE_VALUE\x12\tscoped2/c\x1a&variables/2/.ATTRIBUTES/VARIABLE_VALUE*\x02\x08\x01"
备注
基于名称的检查点中的键是变量的名称。基于对象的检查点中的键指向从根对象到变量的路径。
查看 tf2-ckpt
中的键,可以看出它们全部指向每个变量的对象路径。
仔细研究下面的打印信息:
a = tf.Variable(0.)
b = tf.Variable(0.)
c = tf.Variable(0.)
root = ckpt = tf.train.Checkpoint(variables=[a, b, c])
print("root type =", type(root).__name__)
print("root.variables =", root.variables)
print("root.variables[0] =", root.variables[0])
root type = Checkpoint
root.variables = ListWrapper([<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>, <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>, <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>])
root.variables[0] = <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>
尝试使用下面的代码段,看看检查点键如何随对象结构变化:
module = tf.Module()
module.d = tf.Variable(0.)
test_ckpt = tf.train.Checkpoint(v={'a': a, 'b': b},
c=c,
module=module)
test_ckpt_path = test_ckpt.save(f'{temp_dir}/root-tf2-ckpt')
print_checkpoint(test_ckpt_path, tab_size=25)
检查点: .temp/root-tf2-ckpt-1
key shape dtype value
===============================================================================================================================================================================
v/b/.ATTRIBUTES/VARIABLE_VALUE [] float32 0.0
v/a/.ATTRIBUTES/VARIABLE_VALUE [] float32 0.0
module/d/.ATTRIBUTES/VARIABLE_VALUE [] float32 0.0
save_counter/.ATTRIBUTES/VARIABLE_VALUE [] int64 1
c/.ATTRIBUTES/VARIABLE_VALUE [] float32 0.0
_CHECKPOINTABLE_OBJECT_GRAPH [] string b"\n0\n\x05\x08\x01\x12\x01c\n\n\x08\x02\x12\x06module\n\x05\x08\x03\x12\x01v\n\x10\x08\x04\x12\x0csave_counter*\x02\x08\x01\n>\x128\n\x0eVARIABLE_VALUE\x12\x08Variable\x1a\x1cc/.ATTRIBUTES/VARIABLE_VALUE*\x02\x08\x01\n\x0b\n\x05\x08\x05\x12\x01d*\x02\x08\x01\n\x12\n\x05\x08\x06\x12\x01a\n\x05\x08\x07\x12\x01b*\x02\x08\x01\nM\x12G\n\x0eVARIABLE_VALUE\x12\x0csave_counter\x1a'save_counter/.ATTRIBUTES/VARIABLE_VALUE*\x02\x08\x01\nE\x12?\n\x0eVARIABLE_VALUE\x12\x08Variable\x1a#module/d/.ATTRIBUTES/VARIABLE_VALUE*\x02\x08\x01\n@\x12:\n\x0eVARIABLE_VALUE\x12\x08Variable\x1a\x1ev/a/.ATTRIBUTES/VARIABLE_VALUE*\x02\x08\x01\n@\x12:\n\x0eVARIABLE_VALUE\x12\x08Variable\x1a\x1ev/b/.ATTRIBUTES/VARIABLE_VALUE*\x02\x08\x01"
下面是对不同模型使用相同检查点的示例。
使用
tf1.train.Saver
保存 TF1 检查点:
with tf.Graph().as_default() as g:
a = tf1.get_variable('a', shape=[], dtype=tf.float32,
initializer=tf1.zeros_initializer())
b = tf1.get_variable('b', shape=[], dtype=tf.uint8,
initializer=tf1.zeros_initializer())
c = tf1.get_variable('scoped/c', shape=[], dtype=tf.uint8,
initializer=tf1.zeros_initializer())
with tf1.Session() as sess:
saver = tf1.train.Saver()
sess.run(a.assign(1))
sess.run(b.assign(2))
sess.run(c.assign(3))
saver.save(sess, f'{temp_dir}/tf1-ckpt')
print_checkpoint(f'{temp_dir}/tf1-ckpt')
检查点: .temp/tf1-ckpt
key shape dtype value
======================================================================
scoped/c [] uint8 3
b [] uint8 2
a [] float32 1.0
使用
tf.compat.v1.Saver
在 Eager 模式下加载检查点:
a = tf.Variable(0, name="a", dtype=tf.float32)
b = tf.Variable(0, name="b", dtype=tf.uint8)
with tf.name_scope('scoped'):
c = tf.Variable(0, name='c', dtype=tf.uint8)
# 在 TF2 中删除集合后,必须将变量列表传递给 Saver 对象:
saver = tf1.train.Saver(var_list=[a, b, c])
saver.restore(sess=None, save_path=f'{temp_dir}/tf1-ckpt')
print(f"加载后的值 [a, b, c]: [{a.numpy()}, {b.numpy()}, {c.numpy()}]")
# Saving 也可以立即执行(sess 必须为 None)。
path = saver.save(sess=None, save_path=f'{temp_dir}/tf1-ckpt-saved-in-eager')
print_checkpoint(path)
加载后的值 [a, b, c]: [1.0, 2, 3]
检查点: .temp/tf1-ckpt-saved-in-eager
key shape dtype value
======================================================================
scoped/c [] uint8 3
b [] uint8 2
a [] float32 1.0
使用 TF2 API tf.train.Checkpoint
加载检查点:
a = tf.Variable(0, name="a", dtype=tf.float32)
b = tf.Variable(0, name="b", dtype=tf.uint8)
with tf.name_scope('scoped'):
c = tf.Variable(0, name='c', dtype=tf.uint8)
# Without the name_scope, name="scoped/c" works too:
c_2 = tf.Variable(0, name='scoped/c', dtype=tf.uint8)
print("变量名称: ")
print(f"\ta.name = {a.name}")
print(f"\tb.name = {b.name}")
print(f"\tc.name = {c.name}")
print(f"\tc_2.name = {c_2.name}")
# Restore the values with tf.train.Checkpoint
ckpt = tf.train.Checkpoint(variables=[a, b, c, c_2])
ckpt.restore(f'{temp_dir}/tf1-ckpt')
print(f"加载后的值 [a, b, c, c_2]: [{a.numpy()}, {b.numpy()}, {c.numpy()}, {c_2.numpy()}]")
变量名称:
a.name = a:0
b.name = b:0
c.name = scoped/c:0
c_2.name = scoped/c:0
加载后的值 [a, b, c, c_2]: [1.0, 2, 3, 3]
SavedModel#
import shutil
def remove_dir(path):
try:
shutil.rmtree(path)
except:
...
定义简单运算:
def add_two(x):
return x + 2
TensorFlow 1:保存和导出 SavedModel#
在 TensorFlow 1 中,使用 tf.compat.v1.saved_model.Builder
、tf.compat.v1.saved_model.simple_save
和 tf.estimator.Estimator.export_saved_model
API 来构建、保存及导出 TensorFlow 计算图和会话。
使用 SavedModelBuilder 将计算图保存为 SavedModel
model_dir = f"{temp_dir}/saved-model-builder"
remove_dir(model_dir)
with tf.Graph().as_default() as g:
x = tf1.placeholder(tf.float32, shape=[])
y = add_two(x)
with tf1.Session() as sess:
print(f"结果为 {sess.run(y, {x: 3.})}")
# 使用 SavedModelBuilder 保持
builder = tf1.saved_model.Builder(model_dir)
sig_def = tf1.saved_model.predict_signature_def(
inputs={'input': x},
outputs={'output': y}
)
builder.add_meta_graph_and_variables(
sess=sess,
tags=[tf1.saved_model.tag_constants.SERVING],
signature_def_map={
tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: sig_def
})
builder.save()
结果为 5.0
为应用构建 SavedModel
remove_dir(f"{temp_dir}/simple-save")
with tf.Graph().as_default() as g:
x = tf1.placeholder(tf.float32, shape=[])
y = add_two(x)
with tf1.Session() as sess:
print(f"结果为 {sess.run(y, {x: 3.})}")
tf1.saved_model.simple_save(
sess, f"{temp_dir}/simple-save",
inputs={'input': x},
outputs={'output': y}
)
结果为 5.0
TensorFlow 2:保存和导出 SavedModel#
保存并导出使用 tf.Module
定义的 SavedModel
要在 TensorFlow 2 中导出模型,必须定义 tf.Module
或 tf.keras.Model
来保存模型的所有变量和函数。随后,可以调用 tf.saved_model.save
来创建 SavedModel。
class MyModel(tf.Module):
@tf.function
def __call__(self, x):
return add_two(x)
model = MyModel()
@tf.function
def serving_default(x):
return {"output": model(x)}
signature_function = serving_default.get_concrete_function(
tf.TensorSpec(shape=[], dtype=tf.float32)
)
tf.saved_model.save(
model, f"{temp_dir}/tf2-save",
signatures={
tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_function
}
)
保存并导出使用 Keras 定义的 SavedModel#
用于保存和导出的 Keras API(Model.save
或 tf.keras.models.save_model
)可以从 tf.keras.Model
导出 SavedModel。
inp = tf.keras.Input(3)
out = add_two(inp)
model = tf.keras.Model(inputs=inp, outputs=out)
@tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.float32)])
def serving_default(input):
return {'output': model(input)}
model.save(
f"{temp_dir}/keras-model", save_format='tf',
signatures={tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: serving_default}
)
加载 SavedModel#
TensorFlow 1:使用 tf.saved_model.load
加载 SavedModel#
在 TensorFlow 1 中,可以使用 tf.saved_model.load
将 SavedModel 直接导入当前计算图和会话。可以在张量输入和输出名称上调用 Session.run
:
def load_tf1(path, x):
print(f"加载 {path}")
with tf.Graph().as_default() as g:
with tf1.Session() as sess:
meta_graph = tf1.saved_model.load(sess, ["serve"], path)
sig_def = meta_graph.signature_def[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
input_name = sig_def.inputs['input'].name
output_name = sig_def.outputs['output'].name
print(x, '=>', sess.run(output_name, feed_dict={input_name: x}))
load_tf1(f'{temp_dir}/saved-model-builder', 5.)
load_tf1(f'{temp_dir}/simple-save', 5.)
load_tf1(f'{temp_dir}/keras-model', 5.)
加载 .temp/saved-model-builder
5.0 => 7.0
加载 .temp/simple-save
5.0 => 7.0
加载 .temp/keras-model
5.0 => 7.0
TensorFlow 2:加载使用 tf.saved_model 保存的模型#
在 TensorFlow 2 中,对象会加载到存储变量和函数的 Python 对象中。这与从 TensorFlow 1 保存的模型兼容。
def load_tf2(path, x):
print(f"加载 {path}")
loaded = tf.saved_model.load(path)
sig_key = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
out = loaded.signatures[sig_key](tf.constant(x))['output']
print(x, '=>', out)
load_tf2(f'{temp_dir}/saved-model-builder', 5.)
load_tf2(f'{temp_dir}/simple-save', 5.)
load_tf2(f'{temp_dir}/tf2-save', 5.)
load_tf2(f'{temp_dir}/keras-model', 5.)
加载 .temp/saved-model-builder
5.0 => tf.Tensor(7.0, shape=(), dtype=float32)
加载 .temp/simple-save
5.0 => tf.Tensor(7.0, shape=(), dtype=float32)
加载 .temp/tf2-save
5.0 => tf.Tensor(7.0, shape=(), dtype=float32)
加载 .temp/keras-model
5.0 => tf.Tensor(7.0, shape=(), dtype=float32)