-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtransformer_helper_dc.py
75 lines (57 loc) · 2.34 KB
/
transformer_helper_dc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import tensorflow as tf
import numpy as np
from os import environ
environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
def get_angles(pos, k, d: int):
"""
Get angles to be used in the positional encoding vectors
Arguments:
pos -- Column vector containing the positions [[0], [1], ...,[N-1]]
k -- Row vector containing the dimension span [[0, 1, 2, ..., d-1]]
d -- Encoding size
Returns:
angles -- (pos, d) np.array
"""
# Get i from dimension span k
i = k // 2
# Calculate the angles using pos, i and d
angles = pos / (10000 ** (2 * i / d))
return angles
def positional_encoding(positions: int, d: int):
"""
Precomputes a matrix with all the positional encodings
Arguments:
positions - Maximum number of positions to be encoded
d - Encoding size
Returns:
pos_encoding - (1, position, d_model) matrix with the positional encodings
"""
angle_rads = get_angles(np.arange(positions)[:, np.newaxis],
np.arange(d)[np.newaxis, :],
d)
# apply sin to even indices 2i
angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
# apply cos to odd indices 2i+1
angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
pos_encoding = angle_rads[np.newaxis, :, :].reshape(1, positions, d)
# casts tensor to float dtype
return tf.cast(pos_encoding, dtype=tf.float32)
def create_look_ahead_mask(dim1, dim2):
"""
Returns an upper triangular matrix filled with ones.
Lets the training model check if it got predictions right by having access to the actual output
Arguments:
sequence_length -- matrix size (sequence length is the number of time steps per input
input.shape = [batch_size, sequence_length, num_features])
Returns:
mask -- (size, size) tensor
>>>create_look_ahead_mask(5)
<tf.Tensor: shape=(1, 5, 5), dtype=float32, numpy=
array([[[-0.e+00, -1.e+11, -1.e+11, -1.e+11, -1.e+11],
[-0.e+00, -0.e+00, -1.e+11, -1.e+11, -1.e+11],
[-0.e+00, -0.e+00, -0.e+00, -1.e+11, -1.e+11],
[-0.e+00, -0.e+00, -0.e+00, -0.e+00, -1.e+11],
[-0.e+00, -0.e+00, -0.e+00, -0.e+00, -0.e+00]]], dtype=float32)>
"""
mask = (1 - tf.linalg.band_part( tf.ones((dim1,dim2)), -1, 0) ) * -1e11
return mask