Commit 88e50fc3 authored by Amelie Royer's avatar Amelie Royer

improving data dependent initialization for TF 1.3+

parent 44d0a230
...@@ -68,8 +68,10 @@ def conv(inp, name, filter_size, out_channels, stride=1, ...@@ -68,8 +68,10 @@ def conv(inp, name, filter_size, out_channels, stride=1,
out = tf.nn.atrous_conv2d(inp, V_norm, dilation, padding) out = tf.nn.atrous_conv2d(inp, V_norm, dilation, padding)
m_init, v_init = tf.nn.moments(out, [0, 1, 2]) m_init, v_init = tf.nn.moments(out, [0, 1, 2])
scale_init = init_scale / tf.sqrt(v_init + 1e-8) scale_init = init_scale / tf.sqrt(v_init + 1e-8)
g = get_variable('g', shape=None, dtype=tf.float32, initializer=scale_init, trainable=True, regularizer=tf.contrib.layers.l2_regularizer(tf.GLOBAL['reg'])) g = get_variable('g', shape=(out_channels,), dtype=tf.float32, initializer=tf.constant_initializer(1.), trainable=True)
b = get_variable('b', shape=None, dtype=tf.float32, initializer=-m_init * scale_init, trainable=True, regularizer=tf.contrib.layers.l2_regularizer(tf.GLOBAL['reg'])) b = get_variable('b', shape=(out_channels,), dtype=tf.float32, initializer=tf.constant_initializer(0.), trainable=True)
g = tf.assign(g, scale_init)
b = tf.assign(b, -m_init * scale_init)
out = tf.reshape(scale_init, [1, 1, 1, out_channels]) * (out - tf.reshape(m_init, [1, 1, 1, out_channels])) out = tf.reshape(scale_init, [1, 1, 1, out_channels]) * (out - tf.reshape(m_init, [1, 1, 1, out_channels]))
if nonlinearity is not None: if nonlinearity is not None:
out = nonlinearity(out) out = nonlinearity(out)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment