BERT模型源码解析(11) _生活百科

# In the case where we have other sequences, we just concatenate
# them to the self-attention head before the projection.
attention_output = tf.concat(attention_heads, axis=-1)
tf.concat(),tensorflow中用来拼接张量的函数tf.concat()，用法:
axis=0 代表在第0个维度拼接; axis=1 代表在第1个维度拼接
axis=-1表示倒数第一个维度，对于三维矩阵拼接来说，axis=-1等价于axis=2 。
对于一个二维矩阵，第0个维度代表最外层方括号所框下的子集，第1个维度代表内部方括号所框下的子集。维度越高，括号越小。
# Run a linear projection of `hidden_size` then add a residual
# with `layer_input`.
对隐藏层尺寸进行线性投影，然后再加上一个残差
with tf.variable_scope("output"):
attention_output = tf.layers.dense(  创建一个全连接层/密集层
attention_output,
hidden_size,
kernel_initializer=create_initializer(initializer_range))
attention_output = dropout(attention_output, hidden_dropout_prob) 丢弃
attention_output = layer_norm(attention_output + layer_input) 标准化
激活函数仅用于中间层
# The activation is only applied to the "intermediate" hidden layer.
with tf.variable_scope("intermediate"):
intermediate_output = tf.layers.dense(  创建一个全连接层/密集层
attention_output,  将上一层的输出，作为本层的输入
intermediate_size, 中间层大小
activation=intermediate_act_fn,
kernel_initializer=create_initializer(initializer_range))
向下投射到隐藏层大?。?然后再和残差相加
# Down-project back to `hidden_size` then add the residual.
with tf.variable_scope("output"):
layer_output = tf.layers.dense( 创建密集层，进行矩阵投影运算
intermediate_output,
hidden_size,
kernel_initializer=create_initializer(initializer_range))
layer_output = dropout(layer_output, hidden_dropout_prob) 丢弃
layer_output = layer_norm(layer_output + attention_output) 标准化
prev_output = layer_output
all_layer_outputs.append(layer_output)  再添加一个层
if do_return_all_layers: 如果要求返回所有的层
final_outputs = [] 最终返回值
for layer_output in all_layer_outputs: 遍历所有层
final_output = reshape_from_matrix(layer_output, input_shape) 每个层都进行变形
final_outputs.append(final_output) 添加到返回值中
return final_outputs
else: 如果不要求返回所有层
final_output = reshape_from_matrix(prev_output, input_shape) 变形
return final_output
相关
辅助函数
def get_shape_list(tensor, expected_rank=None, name=None):
"""Returns a list of the shape of tensor, preferring static dimensions.
返回一个张量在各个维度上的大?。?最好是静态维度
Args:  入参：张量，想要得到的秩，名称
tensor: A tf.Tensor object to find the shape of.
expected_rank: (optional) int. The expected rank of `tensor`. If this is
specified and the `tensor` has a different rank, and exception will be
thrown. 如果指定参数与张量的秩不同，则报错
name: Optional name of the tensor for the error message.
Returns: 返回值：张量在各个维度上的大小，构成的一个列表
A list of dimensions of the shape of tensor. All static dimensions will
be returned as python integers, and dynamic dimensions will be returned
as tf.Tensor scalars. 如果是动态维度，将返回一个标量
"""
if name is None: 如果没有指定名称，就用张量的名称
name = tensor.name
if expected_rank is not None:  如果没有指定秩，就用张量的秩
assert_rank(tensor, expected_rank, name)
shape = tensor.shape.as_list() 将尺寸参数转换为列表
non_static_indexes = []
for (index, dim) in enumerate(shape):
if dim is None: 维度是None，表示该维度为动态维度dynamic dimension
non_static_indexes.append(index)
if not non_static_indexes: 如果没有非静态维度（全是静态维度），就直接返回
return shape
dyn_shape = tf.shape(tensor) 包含动态维度的形状
for index in non_static_indexes: 获取所有动态维度
shape[index] = dyn_shape[index]
return shape
■多维变2维
def reshape_to_matrix(input_tensor): 将张量转换为二维矩阵
"""Reshapes a >= rank 2 tensor to a rank 2 tensor (i.e., a matrix)."""
ndims = input_tensor.shape.ndims
if ndims < 2: 待转换张量维度小于2，就报错
raise ValueError("Input tensor must have at least rank 2. Shape = %s" %

BERT模型源码解析(11)

推荐阅读

宫保鸡丁怎么炒不粘锅宫保鸡丁怎么炒

霍金预言人类将于2032年灭亡，小行星撞地球全球变冷冻死人类 ...

做梦梦见大乌龟是什么意思

乳头混淆是什么？

月经周期各生殖器官的变化子宫月经周期的变化

奥德赛车身颜色种类奥德赛车身颜色有哪几种

重力势能重力势能是标量还是矢量

双子女和双子男的区别双子男和双子女有什么不同

支付宝蚂蚁森林能量雨活动入口在哪里

九吉公红糖一天喝几块好九吉公红糖的正确喝法

儿童节的食品礼物春节食品礼物

女人爱着旧爱的表现男人爱着旧爱的表现

【健康知识】牛油果适不适合减肥吃瘦的人可以吃牛油果增肥吗!

弈字取名含义是什么弈和什么字搭配最好听

珠海车船税收费标准珠海车船税收费标准表

郑州银行信用卡审核要多久

六个月辅食六个月辅食添加一天时间表

单纯糠疹白点癫风的初期图

煮小米粥需要多长时间才能煮熟煮小米粥需要多长时间

前女友生日送什么礼物好,超人气的礼物盘点