BERT模型源码解析( 四 )


"""
return self.embedding_output
def get_embedding_table(self):  获取嵌入表
return self.embedding_table
格鲁激活
■格鲁激活函数
def gelu(x):
"""Gaussian Error Linear Unit.  高斯误差线性单元
This is a smoother version of the RELU.   gelu是relu的平滑版
Original paper: https://arxiv.org/abs/1606.08415
Args:  x是将被激活的张量
x: float Tensor to perform activation.
Returns: 返回值是激活后的张量
`x` with the GELU activation applied.
"""    tf.tanh 反正切函数
cdf = 0.5 * (1.0 + tf.tanh(
(np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
return x * cdf
获取
激活函数
■通过字符串(函数名称)获取激活函数
def get_activation(activation_string):
"""Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`.
创建一个字符串到激活函数的映射关系
Args:   输入参数:激活函数名
activation_string: String name of the activation function.
返回值:相应的激活函数 。
如果输入的字符串为None、 empty或者"linear",就会返回None 。
如果输入参数不是字符串类型,就会返回 `activation_string`
Returns:
A Python function corresponding to the activation function. If
`activation_string` is None, empty, or "linear", this will return None.
If `activation_string` is not a string, it will return `activation_string`.
Raises: 异常:如果字符串无法匹配任何一个激活函数
ValueError: The `activation_string` does not correspond to a known
activation.
"""
如果入参不是字符串,就直接返回去
# We assume that anything that"s not a string is already an activation
# function, so we just return it.
if not isinstance(activation_string, six.string_types):
return activation_string
if not activation_string: 如果字符串为None或者empty,则返回None
return None
将入参字符串转换为小写
act = activation_string.lower()
if act == "linear":
return None
elif act == "relu":   热卤激活函数
return tf.nn.relu
elif act == "gelu":   格鲁激活函数
return gelu
elif act == "tanh":   反正切激活函数
return tf.tanh
else:             触发异常
raise ValueError("Unsupported activation: %s" % act)
读取
检查点
■从检查点获取任务映射
def get_assignment_map_from_checkpoint(tvars, init_checkpoint):
"""Compute the union of the current variables and checkpoint variables."""
获取当前变量和检查点变量
assignment_map = {}
initialized_variable_names = {}
 OrderedDict的 Key 会按照插入的顺序排列,不是Key本身排序
name_to_variable = collections.OrderedDict()
for var in tvars:
name = var.name
m = re.match("^(.*):\\d+$", name)
if m is not None:
name = m.group(1)
name_to_variable[name] = var
init_vars = tf.train.list_variables(init_checkpoint)
assignment_map = collections.OrderedDict()
for x in init_vars:
(name, var) = (x[0], x[1])
if name not in name_to_variable:
continue
assignment_map[name] = name
initialized_variable_names[name] = 1
initialized_variable_names[name + ":0"] = 1
return (assignment_map, initialized_variable_names)
丢弃
标准化
初始化
■丢弃函数
def dropout(input_tensor, dropout_prob):
"""Perform dropout. 进行丢弃
Args:  参数
input_tensor: float Tensor. 输入的张量
dropout_prob: Python float. The probability of dropping out a value (NOT of
*keeping* a dimension as in `tf.nn.dropout`).  丢弃某个值的概率
Returns: 返回值:丢弃部分数据后的张量
A version of `input_tensor` with dropout applied.
"""  如果丢弃概率为None或者为0 , 则原封不动的返回
if dropout_prob is None or dropout_prob == 0.0:
return input_tensor
  1、tf.nn.dropout 中参数 keep_prob :每一个元素被保存下的概率 。
2、tf.layer.dropout 中参数 rate :每一个元素丢弃的概率 。keep_prob = 1 - rate
def dropout(x, keep_prob, noise_shape=None, seed=None, name=None)
要么保留,要么丢弃,所以keep_prob+dropout_prob=1
output = tf.nn.dropout(input_tensor, 1.0 - dropout_prob)
return output
■数据标准化
def layer_norm(input_tensor, name=None):
"""Run layer normalization on the last dimension of the tensor."""

推荐阅读