# In the case where we have other sequences, we just concatenate
# them to the self-attention head before the projection.
attention_output = tf.concat(attention_heads, axis=-1)
tf.concat(),tensorflow中用来拼接张量的函数tf.concat(),用法:
axis=0 代表在第0个维度拼接; axis=1 代表在第1个维度拼接
axis=-1表示倒数第一个维度,对于三维矩阵拼接来说,axis=-1等价于axis=2 。
对于一个二维矩阵 , 第0个维度代表最外层方括号所框下的子集,第1个维度代表内部方括号所框下的子集 。维度越高,括号越小 。
# Run a linear projection of `hidden_size` then add a residual
# with `layer_input`.
对隐藏层尺寸进行线性投影,然后再加上一个残差
with tf.variable_scope("output"):
attention_output = tf.layers.dense( 创建一个全连接层/密集层
attention_output,
hidden_size,
kernel_initializer=create_initializer(initializer_range))
attention_output = dropout(attention_output, hidden_dropout_prob) 丢弃
attention_output = layer_norm(attention_output + layer_input) 标准化
激活函数仅用于中间层
# The activation is only applied to the "intermediate" hidden layer.
with tf.variable_scope("intermediate"):
intermediate_output = tf.layers.dense( 创建一个全连接层/密集层
attention_output, 将上一层的输出 , 作为本层的输入
intermediate_size, 中间层大小
activation=intermediate_act_fn,
kernel_initializer=create_initializer(initializer_range))
向下投射到隐藏层大?。?然后再和残差相加
# Down-project back to `hidden_size` then add the residual.
with tf.variable_scope("output"):
layer_output = tf.layers.dense( 创建密集层,进行矩阵投影运算
intermediate_output,
hidden_size,
kernel_initializer=create_initializer(initializer_range))
layer_output = dropout(layer_output, hidden_dropout_prob) 丢弃
layer_output = layer_norm(layer_output + attention_output) 标准化
prev_output = layer_output
all_layer_outputs.append(layer_output) 再添加一个层
if do_return_all_layers: 如果要求返回所有的层
final_outputs = [] 最终返回值
for layer_output in all_layer_outputs: 遍历所有层
final_output = reshape_from_matrix(layer_output, input_shape) 每个层都进行变形
final_outputs.append(final_output) 添加到返回值中
return final_outputs
else: 如果不要求返回所有层
final_output = reshape_from_matrix(prev_output, input_shape) 变形
return final_output
相关
辅助函数
def get_shape_list(tensor, expected_rank=None, name=None):
"""Returns a list of the shape of tensor, preferring static dimensions.
返回一个张量在各个维度上的大?。?最好是静态维度
Args: 入参:张量,想要得到的秩,名称
tensor: A tf.Tensor object to find the shape of.
expected_rank: (optional) int. The expected rank of `tensor`. If this is
specified and the `tensor` has a different rank, and exception will be
thrown. 如果指定参数与张量的秩不同,则报错
name: Optional name of the tensor for the error message.
Returns: 返回值:张量在各个维度上的大小,构成的一个列表
A list of dimensions of the shape of tensor. All static dimensions will
be returned as python integers, and dynamic dimensions will be returned
as tf.Tensor scalars. 如果是动态维度,将返回一个标量
"""
if name is None: 如果没有指定名称,就用张量的名称
name = tensor.name
if expected_rank is not None: 如果没有指定秩,就用张量的秩
assert_rank(tensor, expected_rank, name)
shape = tensor.shape.as_list() 将尺寸参数转换为列表
non_static_indexes = []
for (index, dim) in enumerate(shape):
if dim is None: 维度是None,表示该维度为动态维度dynamic dimension
non_static_indexes.append(index)
if not non_static_indexes: 如果没有 非静态维度(全是静态维度),就直接返回
return shape
dyn_shape = tf.shape(tensor) 包含动态维度的形状
for index in non_static_indexes: 获取所有动态维度
shape[index] = dyn_shape[index]
return shape
■多维变2维
def reshape_to_matrix(input_tensor): 将张量转换为二维矩阵
"""Reshapes a >= rank 2 tensor to a rank 2 tensor (i.e., a matrix)."""
ndims = input_tensor.shape.ndims
if ndims < 2: 待转换张量维度小于2,就报错
raise ValueError("Input tensor must have at least rank 2. Shape = %s" %
推荐阅读
- 【lwip】11-UDP协议&源码分析
- 硬核剖析Java锁底层AQS源码,深入理解底层架构设计
- SpringCloudAlibaba 微服务组件 Nacos 之配置中心源码深度解析
- Seata 1.5.2 源码学习
- MindStudio模型训练场景精度比对全流程和结果分析
- .NET 源码学习 [数据结构-线性表1.2] 链表与 LinkedList<T>
- Redisson源码解读-公平锁
- OpenHarmony移植案例: build lite源码分析之hb命令__entry__.py
- 【深入浅出 Yarn 架构与实现】1-2 搭建 Hadoop 源码阅读环境
- JVM学习笔记——内存模型篇