1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
import tensorflow as tf
class CrossLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(CrossLayer, self).__init__(**kwargs)
# kernel (w_l) and bias (b_l) will be initialized in build method
def build(self, input_shape):
# input_shape is a list: [x_0_shape, x_l_shape]
# For DCN, x_0 and x_l have the same dimension d
dim = input_shape[0][-1]
self.kernel = self.add_weight(name='kernel',
shape=(dim, 1), # w_l is a vector
initializer='glorot_uniform',
trainable=True)
self.bias = self.add_weight(name='bias',
shape=(dim,), # b_l is a vector
initializer='zeros',
trainable=True)
def call(self, inputs):
x_0, x_l = inputs
# x_0 * (x_l^T @ w_l) -> x_0 * dot_product(x_l, w_l)
# x_l^T w_l is a scalar if w_l is (dim,). For (dim, 1), it's (1,1)
# x_0 x_l^T w_l in the paper is element-wise product of x_0 and (x_l^T w_l) if interpreted as (x_0 * (x_l dot w_l))
# Or, more accurately from x_0 x_l^T w_l, it's x_0 outer_product(x_l, w_l)
# The paper describes w_l as a vector. x_l^T w_l is a scalar.
# So, x_0 (x_l^T w_l) + b_l + x_l
# The operation x_0 x_l^T w_l is actually: x_0 * (tf.matmul(tf.expand_dims(x_l, axis=-1), self.kernel, transpose_a=True))
# This is equivalent to x_0 * matmul(x_l^T, w_l), but this is not right.
# It should be x_0 * (x_l^T w_l) as a scalar product then broadcasted to x_0
# Let's re-read: x_0 x_l^T w_l + b_l + x_l. x_0, x_l, w_l, b_l are column vectors.
# x_l^T w_l is a scalar. So x_0 times a scalar.
# This seems to be the most common interpretation.
x_l_T_w_l = tf.matmul(x_l, self.kernel) # (batch_size, 1)
x_0_x_l_T_w_l = x_0 * x_l_T_w_l # (batch_size, dim)
return x_0_x_l_T_w_l + self.bias + x_l
class DCN(tf.keras.Model):
def __init__(self, num_cross_layers, deep_hidden_units, output_dim=1, **kwargs):
super(DCN, self).__init__(**kwargs)
self.num_cross_layers = num_cross_layers
self.cross_layers = [CrossLayer() for _ in range(num_cross_layers)]
self.deep_network = tf.keras.Sequential([
tf.keras.layers.Dense(units, activation='relu') for units in deep_hidden_units
])
self.combination_layer = tf.keras.layers.Dense(output_dim, activation='sigmoid')
def call(self, inputs): # inputs should be the stacked x_0
# Cross Network Path
x_cross = inputs # x_0
for i in range(self.num_cross_layers):
x_cross = self.cross_layers[i]([inputs, x_cross]) # Pass x_0 and current x_l
# Deep Network Path
x_deep = self.deep_network(inputs)
# Combination
concatenated_output = tf.keras.layers.concatenate([x_cross, x_deep])
final_output = self.combination_layer(concatenated_output)
return final_output
|