Source code for deeppavlov.models.ranking.keras_siamese_model

# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from abc import abstractmethod
from logging import getLogger
from typing import List

import numpy as np
from tensorflow.keras import losses
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

from deeppavlov.core.models.keras_model import KerasModel
from deeppavlov.models.ranking.siamese_model import SiameseModel

log = getLogger(__name__)


[docs]class KerasSiameseModel(SiameseModel, KerasModel):
    """The class implementing base functionality for siamese neural networks in keras.

    Args:
        learning_rate: Learning rate.
        use_matrix: Whether to use a trainable matrix with token (word) embeddings.
        emb_matrix: An embeddings matrix to initialize an embeddings layer of a model.
            Only used if ``use_matrix`` is set to ``True``.
        max_sequence_length: A maximum length of text sequences in tokens.
            Longer sequences will be truncated and shorter ones will be padded.
        dynamic_batch:  Whether to use dynamic batching. If ``True``, the maximum length of a sequence for a batch
            will be equal to the maximum of all sequences lengths from this batch,
            but not higher than ``max_sequence_length``.
        attention: Whether any attention mechanism is used in the siamese network.
        *args: Other parameters.
        **kwargs: Other parameters.
    """

    def __init__(self,
                 learning_rate: float = 1e-3,
                 use_matrix: bool = True,
                 emb_matrix: np.ndarray = None,
                 max_sequence_length: int = None,
                 dynamic_batch: bool = False,
                 attention: bool = False,
                 *args,
                 **kwargs) -> None:

        super(KerasSiameseModel, self).__init__(*args, **kwargs)

        self.learning_rate = learning_rate
        self.attention = attention
        self.use_matrix = use_matrix
        self.emb_matrix = emb_matrix
        if dynamic_batch:
            self.max_sequence_length = None
        else:
            self.max_sequence_length = max_sequence_length
        self.model = self.create_model()
        self.compile()
        if self.load_path.exists():
            self.load()
        else:
            self.load_initial_emb_matrix()

        if not self.attention:
            self.context_model = self.create_context_model()
            self.response_model = self.create_response_model()

    def compile(self) -> None:
        optimizer = Adam(lr=self.learning_rate)
        loss = losses.binary_crossentropy
        self.model.compile(loss=loss, optimizer=optimizer)

    def load(self) -> None:
        log.info("[initializing `{}` from saved]".format(self.__class__.__name__))
        self.model.load_weights(str(self.load_path))

    def save(self) -> None:
        log.info("[saving `{}`]".format(self.__class__.__name__))
        self.model.save_weights(str(self.save_path))

    def load_initial_emb_matrix(self) -> None:
        log.info("[initializing new `{}`]".format(self.__class__.__name__))
        if self.use_matrix:
            self.model.get_layer(name="embedding").set_weights([self.emb_matrix])

    @abstractmethod
    def create_model(self) -> Model:
        pass

    def create_context_model(self) -> Model:
        m = Model(self.model.inputs[:-1],
                  self.model.get_layer("sentence_embedding").get_output_at(0))
        return m

    def create_response_model(self) -> Model:
        m = Model(self.model.inputs[-1],
                  self.model.get_layer("sentence_embedding").get_output_at(1))
        return m

    def _train_on_batch(self, batch: List[np.ndarray], y: List[int]) -> float:
        loss = self.model.train_on_batch(batch, np.asarray(y))
        return loss

    def _predict_on_batch(self, batch: List[np.ndarray]) -> np.ndarray:
        y_pred = self.model.predict_on_batch(batch)
        return y_pred

    def _predict_context_on_batch(self, batch: List[np.ndarray]) -> np.ndarray:
        return self.context_model.predict_on_batch(batch)

    def _predict_response_on_batch(self, batch: List[np.ndarray]) -> np.ndarray:
        return self.response_model.predict_on_batch(batch)