Projekt_AI-Automatyczny_saper/venv/Lib/site-packages/caffe2/python/attention.py

## @package attention
# Module caffe2.python.attention


from caffe2.python import brew


class AttentionType:
    Regular, Recurrent, Dot, SoftCoverage = tuple(range(4))


def s(scope, name):
    # We have to manually scope due to our internal/external blob
    # relationships.
    return "{}/{}".format(str(scope), str(name))


# c_i = \sum_j w_{ij}\textbf{s}_j
def _calc_weighted_context(
    model,
    encoder_outputs_transposed,
    encoder_output_dim,
    attention_weights_3d,
    scope,
):
    # [batch_size, encoder_output_dim, 1]
    attention_weighted_encoder_context = brew.batch_mat_mul(
        model,
        [encoder_outputs_transposed, attention_weights_3d],
        s(scope, 'attention_weighted_encoder_context'),
    )
    # [batch_size, encoder_output_dim]
    attention_weighted_encoder_context, _ = model.net.Reshape(
        attention_weighted_encoder_context,
        [
            attention_weighted_encoder_context,
            s(scope, 'attention_weighted_encoder_context_old_shape'),
        ],
        shape=[1, -1, encoder_output_dim],
    )
    return attention_weighted_encoder_context


# Calculate a softmax over the passed in attention energy logits
def _calc_attention_weights(
    model,
    attention_logits_transposed,
    scope,
    encoder_lengths=None,
):
    if encoder_lengths is not None:
        attention_logits_transposed = model.net.SequenceMask(
            [attention_logits_transposed, encoder_lengths],
            ['masked_attention_logits'],
            mode='sequence',
        )

    # [batch_size, encoder_length, 1]
    attention_weights_3d = brew.softmax(
        model,
        attention_logits_transposed,
        s(scope, 'attention_weights_3d'),
        engine='CUDNN',
        axis=1,
    )
    return attention_weights_3d


# e_{ij} = \textbf{v}^T tanh \alpha(\textbf{h}_{i-1}, \textbf{s}_j)
def _calc_attention_logits_from_sum_match(
    model,
    decoder_hidden_encoder_outputs_sum,
    encoder_output_dim,
    scope,
):
    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum = model.net.Tanh(
        decoder_hidden_encoder_outputs_sum,
        decoder_hidden_encoder_outputs_sum,
    )

    # [encoder_length, batch_size, 1]
    attention_logits = brew.fc(
        model,
        decoder_hidden_encoder_outputs_sum,
        s(scope, 'attention_logits'),
        dim_in=encoder_output_dim,
        dim_out=1,
        axis=2,
        freeze_bias=True,
    )

    # [batch_size, encoder_length, 1]
    attention_logits_transposed = brew.transpose(
        model,
        attention_logits,
        s(scope, 'attention_logits_transposed'),
        axes=[1, 0, 2],
    )
    return attention_logits_transposed


# \textbf{W}^\alpha used in the context of \alpha_{sum}(a,b)
def _apply_fc_weight_for_sum_match(
    model,
    input,
    dim_in,
    dim_out,
    scope,
    name,
):
    output = brew.fc(
        model,
        input,
        s(scope, name),
        dim_in=dim_in,
        dim_out=dim_out,
        axis=2,
    )
    output = model.net.Squeeze(
        output,
        output,
        dims=[0],
    )
    return output


# Implement RecAtt due to section 4.1 in http://arxiv.org/abs/1601.03317
def apply_recurrent_attention(
    model,
    encoder_output_dim,
    encoder_outputs_transposed,
    weighted_encoder_outputs,
    decoder_hidden_state_t,
    decoder_hidden_state_dim,
    attention_weighted_encoder_context_t_prev,
    scope,
    encoder_lengths=None,
):
    weighted_prev_attention_context = _apply_fc_weight_for_sum_match(
        model=model,
        input=attention_weighted_encoder_context_t_prev,
        dim_in=encoder_output_dim,
        dim_out=encoder_output_dim,
        scope=scope,
        name='weighted_prev_attention_context',
    )

    weighted_decoder_hidden_state = _apply_fc_weight_for_sum_match(
        model=model,
        input=decoder_hidden_state_t,
        dim_in=decoder_hidden_state_dim,
        dim_out=encoder_output_dim,
        scope=scope,
        name='weighted_decoder_hidden_state',
    )
    # [1, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum_tmp = model.net.Add(
        [
            weighted_prev_attention_context,
            weighted_decoder_hidden_state,
        ],
        s(scope, 'decoder_hidden_encoder_outputs_sum_tmp'),
    )
    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum = model.net.Add(
        [
            weighted_encoder_outputs,
            decoder_hidden_encoder_outputs_sum_tmp,
        ],
        s(scope, 'decoder_hidden_encoder_outputs_sum'),
        broadcast=1,
    )
    attention_logits_transposed = _calc_attention_logits_from_sum_match(
        model=model,
        decoder_hidden_encoder_outputs_sum=decoder_hidden_encoder_outputs_sum,
        encoder_output_dim=encoder_output_dim,
        scope=scope,
    )

    # [batch_size, encoder_length, 1]
    attention_weights_3d = _calc_attention_weights(
        model=model,
        attention_logits_transposed=attention_logits_transposed,
        scope=scope,
        encoder_lengths=encoder_lengths,
    )

    # [batch_size, encoder_output_dim, 1]
    attention_weighted_encoder_context = _calc_weighted_context(
        model=model,
        encoder_outputs_transposed=encoder_outputs_transposed,
        encoder_output_dim=encoder_output_dim,
        attention_weights_3d=attention_weights_3d,
        scope=scope,
    )
    return attention_weighted_encoder_context, attention_weights_3d, [
        decoder_hidden_encoder_outputs_sum,
    ]


def apply_regular_attention(
    model,
    encoder_output_dim,
    encoder_outputs_transposed,
    weighted_encoder_outputs,
    decoder_hidden_state_t,
    decoder_hidden_state_dim,
    scope,
    encoder_lengths=None,
):
    weighted_decoder_hidden_state = _apply_fc_weight_for_sum_match(
        model=model,
        input=decoder_hidden_state_t,
        dim_in=decoder_hidden_state_dim,
        dim_out=encoder_output_dim,
        scope=scope,
        name='weighted_decoder_hidden_state',
    )

    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum = model.net.Add(
        [weighted_encoder_outputs, weighted_decoder_hidden_state],
        s(scope, 'decoder_hidden_encoder_outputs_sum'),
        broadcast=1,
        use_grad_hack=1,
    )

    attention_logits_transposed = _calc_attention_logits_from_sum_match(
        model=model,
        decoder_hidden_encoder_outputs_sum=decoder_hidden_encoder_outputs_sum,
        encoder_output_dim=encoder_output_dim,
        scope=scope,
    )

    # [batch_size, encoder_length, 1]
    attention_weights_3d = _calc_attention_weights(
        model=model,
        attention_logits_transposed=attention_logits_transposed,
        scope=scope,
        encoder_lengths=encoder_lengths,
    )

    # [batch_size, encoder_output_dim, 1]
    attention_weighted_encoder_context = _calc_weighted_context(
        model=model,
        encoder_outputs_transposed=encoder_outputs_transposed,
        encoder_output_dim=encoder_output_dim,
        attention_weights_3d=attention_weights_3d,
        scope=scope,
    )
    return attention_weighted_encoder_context, attention_weights_3d, [
        decoder_hidden_encoder_outputs_sum,
    ]


def apply_dot_attention(
    model,
    encoder_output_dim,
    # [batch_size, encoder_output_dim, encoder_length]
    encoder_outputs_transposed,
    # [1, batch_size, decoder_state_dim]
    decoder_hidden_state_t,
    decoder_hidden_state_dim,
    scope,
    encoder_lengths=None,
):
    if decoder_hidden_state_dim != encoder_output_dim:
        weighted_decoder_hidden_state = brew.fc(
            model,
            decoder_hidden_state_t,
            s(scope, 'weighted_decoder_hidden_state'),
            dim_in=decoder_hidden_state_dim,
            dim_out=encoder_output_dim,
            axis=2,
        )
    else:
        weighted_decoder_hidden_state = decoder_hidden_state_t

    # [batch_size, decoder_state_dim]
    squeezed_weighted_decoder_hidden_state = model.net.Squeeze(
        weighted_decoder_hidden_state,
        s(scope, 'squeezed_weighted_decoder_hidden_state'),
        dims=[0],
    )

    # [batch_size, decoder_state_dim, 1]
    expanddims_squeezed_weighted_decoder_hidden_state = model.net.ExpandDims(
        squeezed_weighted_decoder_hidden_state,
        squeezed_weighted_decoder_hidden_state,
        dims=[2],
    )

    # [batch_size, encoder_output_dim, 1]
    attention_logits_transposed = model.net.BatchMatMul(
        [
            encoder_outputs_transposed,
            expanddims_squeezed_weighted_decoder_hidden_state,
        ],
        s(scope, 'attention_logits'),
        trans_a=1,
    )

    # [batch_size, encoder_length, 1]
    attention_weights_3d = _calc_attention_weights(
        model=model,
        attention_logits_transposed=attention_logits_transposed,
        scope=scope,
        encoder_lengths=encoder_lengths,
    )

    # [batch_size, encoder_output_dim, 1]
    attention_weighted_encoder_context = _calc_weighted_context(
        model=model,
        encoder_outputs_transposed=encoder_outputs_transposed,
        encoder_output_dim=encoder_output_dim,
        attention_weights_3d=attention_weights_3d,
        scope=scope,
    )
    return attention_weighted_encoder_context, attention_weights_3d, []


def apply_soft_coverage_attention(
    model,
    encoder_output_dim,
    encoder_outputs_transposed,
    weighted_encoder_outputs,
    decoder_hidden_state_t,
    decoder_hidden_state_dim,
    scope,
    encoder_lengths,
    coverage_t_prev,
    coverage_weights,
):

    weighted_decoder_hidden_state = _apply_fc_weight_for_sum_match(
        model=model,
        input=decoder_hidden_state_t,
        dim_in=decoder_hidden_state_dim,
        dim_out=encoder_output_dim,
        scope=scope,
        name='weighted_decoder_hidden_state',
    )

    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum_tmp = model.net.Add(
        [weighted_encoder_outputs, weighted_decoder_hidden_state],
        s(scope, 'decoder_hidden_encoder_outputs_sum_tmp'),
        broadcast=1,
    )
    # [batch_size, encoder_length]
    coverage_t_prev_2d = model.net.Squeeze(
        coverage_t_prev,
        s(scope, 'coverage_t_prev_2d'),
        dims=[0],
    )
    # [encoder_length, batch_size]
    coverage_t_prev_transposed = brew.transpose(
        model,
        coverage_t_prev_2d,
        s(scope, 'coverage_t_prev_transposed'),
    )

    # [encoder_length, batch_size, encoder_output_dim]
    scaled_coverage_weights = model.net.Mul(
        [coverage_weights, coverage_t_prev_transposed],
        s(scope, 'scaled_coverage_weights'),
        broadcast=1,
        axis=0,
    )

    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum = model.net.Add(
        [decoder_hidden_encoder_outputs_sum_tmp, scaled_coverage_weights],
        s(scope, 'decoder_hidden_encoder_outputs_sum'),
    )

    # [batch_size, encoder_length, 1]
    attention_logits_transposed = _calc_attention_logits_from_sum_match(
        model=model,
        decoder_hidden_encoder_outputs_sum=decoder_hidden_encoder_outputs_sum,
        encoder_output_dim=encoder_output_dim,
        scope=scope,
    )

    # [batch_size, encoder_length, 1]
    attention_weights_3d = _calc_attention_weights(
        model=model,
        attention_logits_transposed=attention_logits_transposed,
        scope=scope,
        encoder_lengths=encoder_lengths,
    )

    # [batch_size, encoder_output_dim, 1]
    attention_weighted_encoder_context = _calc_weighted_context(
        model=model,
        encoder_outputs_transposed=encoder_outputs_transposed,
        encoder_output_dim=encoder_output_dim,
        attention_weights_3d=attention_weights_3d,
        scope=scope,
    )

    # [batch_size, encoder_length]
    attention_weights_2d = model.net.Squeeze(
        attention_weights_3d,
        s(scope, 'attention_weights_2d'),
        dims=[2],
    )

    coverage_t = model.net.Add(
        [coverage_t_prev, attention_weights_2d],
        s(scope, 'coverage_t'),
        broadcast=1,
    )

    return (
        attention_weighted_encoder_context,
        attention_weights_3d,
        [decoder_hidden_encoder_outputs_sum],
        coverage_t,
    )
network 2021-06-01 17:38:31 +02:00			`## @package attention`
			`# Module caffe2.python.attention`





			`from caffe2.python import brew`


			`class AttentionType:`
			`Regular, Recurrent, Dot, SoftCoverage = tuple(range(4))`


			`def s(scope, name):`
			`# We have to manually scope due to our internal/external blob`
			`# relationships.`
			`return "{}/{}".format(str(scope), str(name))`


			`# c_i = \sum_j w_{ij}\textbf{s}_j`
			`def _calc_weighted_context(`
			`model,`
			`encoder_outputs_transposed,`
			`encoder_output_dim,`
			`attention_weights_3d,`
			`scope,`
			`):`
			`# [batch_size, encoder_output_dim, 1]`
			`attention_weighted_encoder_context = brew.batch_mat_mul(`
			`model,`
			`[encoder_outputs_transposed, attention_weights_3d],`
			`s(scope, 'attention_weighted_encoder_context'),`
			`)`
			`# [batch_size, encoder_output_dim]`
			`attention_weighted_encoder_context, _ = model.net.Reshape(`
			`attention_weighted_encoder_context,`
			`[`
			`attention_weighted_encoder_context,`
			`s(scope, 'attention_weighted_encoder_context_old_shape'),`
			`],`
			`shape=[1, -1, encoder_output_dim],`
			`)`
			`return attention_weighted_encoder_context`


			`# Calculate a softmax over the passed in attention energy logits`
			`def _calc_attention_weights(`
			`model,`
			`attention_logits_transposed,`
			`scope,`
			`encoder_lengths=None,`
			`):`
			`if encoder_lengths is not None:`
			`attention_logits_transposed = model.net.SequenceMask(`
			`[attention_logits_transposed, encoder_lengths],`
			`['masked_attention_logits'],`
			`mode='sequence',`
			`)`

			`# [batch_size, encoder_length, 1]`
			`attention_weights_3d = brew.softmax(`
			`model,`
			`attention_logits_transposed,`
			`s(scope, 'attention_weights_3d'),`
			`engine='CUDNN',`
			`axis=1,`
			`)`
			`return attention_weights_3d`


			`# e_{ij} = \textbf{v}^T tanh \alpha(\textbf{h}_{i-1}, \textbf{s}_j)`
			`def _calc_attention_logits_from_sum_match(`
			`model,`
			`decoder_hidden_encoder_outputs_sum,`
			`encoder_output_dim,`
			`scope,`
			`):`
			`# [encoder_length, batch_size, encoder_output_dim]`
			`decoder_hidden_encoder_outputs_sum = model.net.Tanh(`
			`decoder_hidden_encoder_outputs_sum,`
			`decoder_hidden_encoder_outputs_sum,`
			`)`

			`# [encoder_length, batch_size, 1]`
			`attention_logits = brew.fc(`
			`model,`
			`decoder_hidden_encoder_outputs_sum,`
			`s(scope, 'attention_logits'),`
			`dim_in=encoder_output_dim,`
			`dim_out=1,`
			`axis=2,`
			`freeze_bias=True,`
			`)`

			`# [batch_size, encoder_length, 1]`
			`attention_logits_transposed = brew.transpose(`
			`model,`
			`attention_logits,`
			`s(scope, 'attention_logits_transposed'),`
			`axes=[1, 0, 2],`
			`)`
			`return attention_logits_transposed`


			`# \textbf{W}^\alpha used in the context of \alpha_{sum}(a,b)`
			`def _apply_fc_weight_for_sum_match(`
			`model,`
			`input,`
			`dim_in,`
			`dim_out,`
			`scope,`
			`name,`
			`):`
			`output = brew.fc(`
			`model,`
			`input,`
			`s(scope, name),`
			`dim_in=dim_in,`
			`dim_out=dim_out,`
			`axis=2,`
			`)`
			`output = model.net.Squeeze(`
			`output,`
			`output,`
			`dims=[0],`
			`)`
			`return output`


			`# Implement RecAtt due to section 4.1 in http://arxiv.org/abs/1601.03317`
			`def apply_recurrent_attention(`
			`model,`
			`encoder_output_dim,`
			`encoder_outputs_transposed,`
			`weighted_encoder_outputs,`
			`decoder_hidden_state_t,`
			`decoder_hidden_state_dim,`
			`attention_weighted_encoder_context_t_prev,`
			`scope,`
			`encoder_lengths=None,`
			`):`
			`weighted_prev_attention_context = _apply_fc_weight_for_sum_match(`
			`model=model,`
			`input=attention_weighted_encoder_context_t_prev,`
			`dim_in=encoder_output_dim,`
			`dim_out=encoder_output_dim,`
			`scope=scope,`
			`name='weighted_prev_attention_context',`
			`)`

			`weighted_decoder_hidden_state = _apply_fc_weight_for_sum_match(`
			`model=model,`
			`input=decoder_hidden_state_t,`
			`dim_in=decoder_hidden_state_dim,`
			`dim_out=encoder_output_dim,`
			`scope=scope,`
			`name='weighted_decoder_hidden_state',`
			`)`
			`# [1, batch_size, encoder_output_dim]`
			`decoder_hidden_encoder_outputs_sum_tmp = model.net.Add(`
			`[`
			`weighted_prev_attention_context,`
			`weighted_decoder_hidden_state,`
			`],`
			`s(scope, 'decoder_hidden_encoder_outputs_sum_tmp'),`
			`)`
			`# [encoder_length, batch_size, encoder_output_dim]`
			`decoder_hidden_encoder_outputs_sum = model.net.Add(`
			`[`
			`weighted_encoder_outputs,`
			`decoder_hidden_encoder_outputs_sum_tmp,`
			`],`
			`s(scope, 'decoder_hidden_encoder_outputs_sum'),`
			`broadcast=1,`
			`)`
			`attention_logits_transposed = _calc_attention_logits_from_sum_match(`
			`model=model,`
			`decoder_hidden_encoder_outputs_sum=decoder_hidden_encoder_outputs_sum,`
			`encoder_output_dim=encoder_output_dim,`
			`scope=scope,`
			`)`

			`# [batch_size, encoder_length, 1]`
			`attention_weights_3d = _calc_attention_weights(`
			`model=model,`
			`attention_logits_transposed=attention_logits_transposed,`
			`scope=scope,`
			`encoder_lengths=encoder_lengths,`
			`)`

			`# [batch_size, encoder_output_dim, 1]`
			`attention_weighted_encoder_context = _calc_weighted_context(`
			`model=model,`
			`encoder_outputs_transposed=encoder_outputs_transposed,`
			`encoder_output_dim=encoder_output_dim,`
			`attention_weights_3d=attention_weights_3d,`
			`scope=scope,`
			`)`
			`return attention_weighted_encoder_context, attention_weights_3d, [`
			`decoder_hidden_encoder_outputs_sum,`
			`]`


			`def apply_regular_attention(`
			`model,`
			`encoder_output_dim,`
			`encoder_outputs_transposed,`
			`weighted_encoder_outputs,`
			`decoder_hidden_state_t,`
			`decoder_hidden_state_dim,`
			`scope,`
			`encoder_lengths=None,`
			`):`
			`weighted_decoder_hidden_state = _apply_fc_weight_for_sum_match(`
			`model=model,`
			`input=decoder_hidden_state_t,`
			`dim_in=decoder_hidden_state_dim,`
			`dim_out=encoder_output_dim,`
			`scope=scope,`
			`name='weighted_decoder_hidden_state',`
			`)`

			`# [encoder_length, batch_size, encoder_output_dim]`
			`decoder_hidden_encoder_outputs_sum = model.net.Add(`
			`[weighted_encoder_outputs, weighted_decoder_hidden_state],`
			`s(scope, 'decoder_hidden_encoder_outputs_sum'),`
			`broadcast=1,`
			`use_grad_hack=1,`
			`)`

			`attention_logits_transposed = _calc_attention_logits_from_sum_match(`
			`model=model,`
			`decoder_hidden_encoder_outputs_sum=decoder_hidden_encoder_outputs_sum,`
			`encoder_output_dim=encoder_output_dim,`
			`scope=scope,`
			`)`

			`# [batch_size, encoder_length, 1]`
			`attention_weights_3d = _calc_attention_weights(`
			`model=model,`
			`attention_logits_transposed=attention_logits_transposed,`
			`scope=scope,`
			`encoder_lengths=encoder_lengths,`
			`)`

			`# [batch_size, encoder_output_dim, 1]`
			`attention_weighted_encoder_context = _calc_weighted_context(`
			`model=model,`
			`encoder_outputs_transposed=encoder_outputs_transposed,`
			`encoder_output_dim=encoder_output_dim,`
			`attention_weights_3d=attention_weights_3d,`
			`scope=scope,`
			`)`
			`return attention_weighted_encoder_context, attention_weights_3d, [`
			`decoder_hidden_encoder_outputs_sum,`
			`]`


			`def apply_dot_attention(`
			`model,`
			`encoder_output_dim,`
			`# [batch_size, encoder_output_dim, encoder_length]`
			`encoder_outputs_transposed,`
			`# [1, batch_size, decoder_state_dim]`
			`decoder_hidden_state_t,`
			`decoder_hidden_state_dim,`
			`scope,`
			`encoder_lengths=None,`
			`):`
			`if decoder_hidden_state_dim != encoder_output_dim:`
			`weighted_decoder_hidden_state = brew.fc(`
			`model,`
			`decoder_hidden_state_t,`
			`s(scope, 'weighted_decoder_hidden_state'),`
			`dim_in=decoder_hidden_state_dim,`
			`dim_out=encoder_output_dim,`
			`axis=2,`
			`)`
			`else:`
			`weighted_decoder_hidden_state = decoder_hidden_state_t`

			`# [batch_size, decoder_state_dim]`
			`squeezed_weighted_decoder_hidden_state = model.net.Squeeze(`
			`weighted_decoder_hidden_state,`
			`s(scope, 'squeezed_weighted_decoder_hidden_state'),`
			`dims=[0],`
			`)`

			`# [batch_size, decoder_state_dim, 1]`
			`expanddims_squeezed_weighted_decoder_hidden_state = model.net.ExpandDims(`
			`squeezed_weighted_decoder_hidden_state,`
			`squeezed_weighted_decoder_hidden_state,`
			`dims=[2],`
			`)`

			`# [batch_size, encoder_output_dim, 1]`
			`attention_logits_transposed = model.net.BatchMatMul(`
			`[`
			`encoder_outputs_transposed,`
			`expanddims_squeezed_weighted_decoder_hidden_state,`
			`],`
			`s(scope, 'attention_logits'),`
			`trans_a=1,`
			`)`

			`# [batch_size, encoder_length, 1]`
			`attention_weights_3d = _calc_attention_weights(`
			`model=model,`
			`attention_logits_transposed=attention_logits_transposed,`
			`scope=scope,`
			`encoder_lengths=encoder_lengths,`
			`)`

			`# [batch_size, encoder_output_dim, 1]`
			`attention_weighted_encoder_context = _calc_weighted_context(`
			`model=model,`
			`encoder_outputs_transposed=encoder_outputs_transposed,`
			`encoder_output_dim=encoder_output_dim,`
			`attention_weights_3d=attention_weights_3d,`
			`scope=scope,`
			`)`
			`return attention_weighted_encoder_context, attention_weights_3d, []`


			`def apply_soft_coverage_attention(`
			`model,`
			`encoder_output_dim,`
			`encoder_outputs_transposed,`
			`weighted_encoder_outputs,`
			`decoder_hidden_state_t,`
			`decoder_hidden_state_dim,`
			`scope,`
			`encoder_lengths,`
			`coverage_t_prev,`
			`coverage_weights,`
			`):`

			`weighted_decoder_hidden_state = _apply_fc_weight_for_sum_match(`
			`model=model,`
			`input=decoder_hidden_state_t,`
			`dim_in=decoder_hidden_state_dim,`
			`dim_out=encoder_output_dim,`
			`scope=scope,`
			`name='weighted_decoder_hidden_state',`
			`)`

			`# [encoder_length, batch_size, encoder_output_dim]`
			`decoder_hidden_encoder_outputs_sum_tmp = model.net.Add(`
			`[weighted_encoder_outputs, weighted_decoder_hidden_state],`
			`s(scope, 'decoder_hidden_encoder_outputs_sum_tmp'),`
			`broadcast=1,`
			`)`
			`# [batch_size, encoder_length]`
			`coverage_t_prev_2d = model.net.Squeeze(`
			`coverage_t_prev,`
			`s(scope, 'coverage_t_prev_2d'),`
			`dims=[0],`
			`)`
			`# [encoder_length, batch_size]`
			`coverage_t_prev_transposed = brew.transpose(`
			`model,`
			`coverage_t_prev_2d,`
			`s(scope, 'coverage_t_prev_transposed'),`
			`)`

			`# [encoder_length, batch_size, encoder_output_dim]`
			`scaled_coverage_weights = model.net.Mul(`
			`[coverage_weights, coverage_t_prev_transposed],`
			`s(scope, 'scaled_coverage_weights'),`
			`broadcast=1,`
			`axis=0,`
			`)`

			`# [encoder_length, batch_size, encoder_output_dim]`
			`decoder_hidden_encoder_outputs_sum = model.net.Add(`
			`[decoder_hidden_encoder_outputs_sum_tmp, scaled_coverage_weights],`
			`s(scope, 'decoder_hidden_encoder_outputs_sum'),`
			`)`

			`# [batch_size, encoder_length, 1]`
			`attention_logits_transposed = _calc_attention_logits_from_sum_match(`
			`model=model,`
			`decoder_hidden_encoder_outputs_sum=decoder_hidden_encoder_outputs_sum,`
			`encoder_output_dim=encoder_output_dim,`
			`scope=scope,`
			`)`

			`# [batch_size, encoder_length, 1]`
			`attention_weights_3d = _calc_attention_weights(`
			`model=model,`
			`attention_logits_transposed=attention_logits_transposed,`
			`scope=scope,`
			`encoder_lengths=encoder_lengths,`
			`)`

			`# [batch_size, encoder_output_dim, 1]`
			`attention_weighted_encoder_context = _calc_weighted_context(`
			`model=model,`
			`encoder_outputs_transposed=encoder_outputs_transposed,`
			`encoder_output_dim=encoder_output_dim,`
			`attention_weights_3d=attention_weights_3d,`
			`scope=scope,`
			`)`

			`# [batch_size, encoder_length]`
			`attention_weights_2d = model.net.Squeeze(`
			`attention_weights_3d,`
			`s(scope, 'attention_weights_2d'),`
			`dims=[2],`
			`)`

			`coverage_t = model.net.Add(`
			`[coverage_t_prev, attention_weights_2d],`
			`s(scope, 'coverage_t'),`
			`broadcast=1,`
			`)`

			`return (`
			`attention_weighted_encoder_context,`
			`attention_weights_3d,`
			`[decoder_hidden_encoder_outputs_sum],`
			`coverage_t,`
			`)`