Source code for deeppavlov.models.ranking.mpm_siamese_network

# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from logging import getLogger

from keras import backend as K
from keras.initializers import glorot_uniform, Orthogonal
from keras.layers import Input, LSTM, Lambda, Dense, Dropout
from keras.layers.wrappers import Bidirectional
from keras.models import Model

from deeppavlov.core.common.registry import register
from deeppavlov.core.layers.keras_layers import AttentiveMatchingLayer, MaxattentiveMatchingLayer
from deeppavlov.core.layers.keras_layers import FullMatchingLayer, MaxpoolingMatchingLayer
from deeppavlov.models.ranking.bilstm_siamese_network import BiLSTMSiameseNetwork

log = getLogger(__name__)


[docs]@register('mpm_nn')
class MPMSiameseNetwork(BiLSTMSiameseNetwork):
    """The class implementing a siamese neural network with bilateral multi-Perspective matching.

    The network architecture is based on https://arxiv.org/abs/1702.03814.
    
    Args:
        dense_dim: Dimensionality of the dense layer.
        perspective_num: Number of perspectives in multi-perspective matching layers.
        aggregation dim: Dimensionality of the hidden state in the second BiLSTM layer.
        inpdrop_val: Float between 0 and 1. A dropout value for the linear transformation of the inputs.
        recdrop_val: Float between 0 and 1. A dropout value for the linear transformation of the recurrent state.
        ldrop_val: A dropout value of the dropout layer before the second BiLSTM layer.
        dropout_val:  A dropout value of the dropout layer after the second BiLSTM layer.
    """

    def __init__(self,
                 dense_dim: int = 50,
                 perspective_num: int = 20,
                 aggregation_dim: int = 200,
                 recdrop_val: float = 0.0,
                 inpdrop_val: float = 0.0,
                 ldrop_val: float = 0.0,
                 dropout_val: float = 0.0,
                 *args,
                 **kwargs) -> None:

        self.dense_dim = dense_dim
        self.perspective_num = perspective_num
        self.aggregation_dim = aggregation_dim
        self.ldrop_val = ldrop_val
        self.recdrop_val = recdrop_val
        self.inpdrop_val = inpdrop_val
        self.dropout_val = dropout_val
        self.seed = kwargs.get("triplet_loss")
        self.triplet_mode = kwargs.get("triplet_loss")

        super(MPMSiameseNetwork, self).__init__(*args, **kwargs)

    def create_lstm_layer_1(self):
        ker_in = glorot_uniform(seed=self.seed)
        rec_in = Orthogonal(seed=self.seed)
        bioutp = Bidirectional(LSTM(self.hidden_dim,
                                    input_shape=(self.max_sequence_length, self.embedding_dim,),
                                    kernel_regularizer=None,
                                    recurrent_regularizer=None,
                                    bias_regularizer=None,
                                    activity_regularizer=None,
                                    recurrent_dropout=self.recdrop_val,
                                    dropout=self.inpdrop_val,
                                    kernel_initializer=ker_in,
                                    recurrent_initializer=rec_in,
                                    return_sequences=True), merge_mode=None)
        return bioutp

    def create_lstm_layer_2(self):
        ker_in = glorot_uniform(seed=self.seed)
        rec_in = Orthogonal(seed=self.seed)
        bioutp = Bidirectional(LSTM(self.aggregation_dim,
                                    input_shape=(self.max_sequence_length, 8 * self.perspective_num,),
                                    kernel_regularizer=None,
                                    recurrent_regularizer=None,
                                    bias_regularizer=None,
                                    activity_regularizer=None,
                                    recurrent_dropout=self.recdrop_val,
                                    dropout=self.inpdrop_val,
                                    kernel_initializer=ker_in,
                                    recurrent_initializer=rec_in,
                                    return_sequences=False),
                               merge_mode='concat',
                               name="sentence_embedding")
        return bioutp

    def create_model(self) -> Model:
        if self.use_matrix:
            context = Input(shape=(self.max_sequence_length,))
            response = Input(shape=(self.max_sequence_length,))
            emb_layer = self.embedding_layer()
            emb_c = emb_layer(context)
            emb_r = emb_layer(response)
        else:
            context = Input(shape=(self.max_sequence_length, self.embedding_dim,))
            response = Input(shape=(self.max_sequence_length, self.embedding_dim,))
            emb_c = context
            emb_r = response
        lstm_layer = self.create_lstm_layer_1()
        lstm_a = lstm_layer(emb_c)
        lstm_b = lstm_layer(emb_r)

        f_layer_f = FullMatchingLayer(self.perspective_num)
        f_layer_b = FullMatchingLayer(self.perspective_num)
        f_a_forw = f_layer_f([lstm_a[0], lstm_b[0]])[0]
        f_a_back = f_layer_b([Lambda(lambda x: K.reverse(x, 1))(lstm_a[1]),
                              Lambda(lambda x: K.reverse(x, 1))(lstm_b[1])])[0]
        f_a_back = Lambda(lambda x: K.reverse(x, 1))(f_a_back)
        f_b_forw = f_layer_f([lstm_b[0], lstm_a[0]])[0]
        f_b_back = f_layer_b([Lambda(lambda x: K.reverse(x, 1))(lstm_b[1]),
                              Lambda(lambda x: K.reverse(x, 1))(lstm_a[1])])[0]
        f_b_back = Lambda(lambda x: K.reverse(x, 1))(f_b_back)

        mp_layer_f = MaxpoolingMatchingLayer(self.perspective_num)
        mp_layer_b = MaxpoolingMatchingLayer(self.perspective_num)
        mp_a_forw = mp_layer_f([lstm_a[0], lstm_b[0]])[0]
        mp_a_back = mp_layer_b([lstm_a[1], lstm_b[1]])[0]
        mp_b_forw = mp_layer_f([lstm_b[0], lstm_a[0]])[0]
        mp_b_back = mp_layer_b([lstm_b[1], lstm_a[1]])[0]

        at_layer_f = AttentiveMatchingLayer(self.perspective_num)
        at_layer_b = AttentiveMatchingLayer(self.perspective_num)
        at_a_forw = at_layer_f([lstm_a[0], lstm_b[0]])[0]
        at_a_back = at_layer_b([lstm_a[1], lstm_b[1]])[0]
        at_b_forw = at_layer_f([lstm_b[0], lstm_a[0]])[0]
        at_b_back = at_layer_b([lstm_b[1], lstm_a[1]])[0]

        ma_layer_f = MaxattentiveMatchingLayer(self.perspective_num)
        ma_layer_b = MaxattentiveMatchingLayer(self.perspective_num)
        ma_a_forw = ma_layer_f([lstm_a[0], lstm_b[0]])[0]
        ma_a_back = ma_layer_b([lstm_a[1], lstm_b[1]])[0]
        ma_b_forw = ma_layer_f([lstm_b[0], lstm_a[0]])[0]
        ma_b_back = ma_layer_b([lstm_b[1], lstm_a[1]])[0]

        concat_a = Lambda(lambda x: K.concatenate(x, axis=-1))([f_a_forw, f_a_back,
                                                                mp_a_forw, mp_a_back,
                                                                at_a_forw, at_a_back,
                                                                ma_a_forw, ma_a_back])
        concat_b = Lambda(lambda x: K.concatenate(x, axis=-1))([f_b_forw, f_b_back,
                                                                mp_b_forw, mp_b_back,
                                                                at_b_forw, at_b_back,
                                                                ma_b_forw, ma_b_back])

        concat_a = Dropout(self.ldrop_val)(concat_a)
        concat_b = Dropout(self.ldrop_val)(concat_b)

        lstm_layer_agg = self.create_lstm_layer_2()
        agg_a = lstm_layer_agg(concat_a)
        agg_b = lstm_layer_agg(concat_b)

        agg_a = Dropout(self.dropout_val)(agg_a)
        agg_b = Dropout(self.dropout_val)(agg_b)

        reduced = Lambda(lambda x: K.concatenate(x, axis=-1))([agg_a, agg_b])

        if self.triplet_mode:
            dist = Lambda(self._pairwise_distances)([agg_a, agg_b])
        else:
            ker_in = glorot_uniform(seed=self.seed)
            dense = Dense(self.dense_dim, kernel_initializer=ker_in)(reduced)
            dist = Dense(1, activation='sigmoid', name="score_model")(dense)
        model = Model([context, response], dist)
        return model