diff --git a/README.md b/README.md index 4a168e275..3527aa4ce 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,7 @@ The recommender models supported by Cornac are listed below. Why don't you join | | [Causal Inference for Visual Debiasing in Visually-Aware Recommendation (CausalRec)](cornac/models/causalrec), [paper](https://arxiv.org/abs/2107.02390) | [requirements.txt](cornac/models/causalrec/requirements.txt) | [causalrec_clothing.py](examples/causalrec_clothing.py) | | [Explainable Recommendation with Comparative Constraints on Product Aspects (ComparER)](cornac/models/comparer), [paper](https://dl.acm.org/doi/pdf/10.1145/3437963.3441754) | N/A | [PreferredAI/ComparER](https://github.com/PreferredAI/ComparER) | 2020 | [Adversarial Training Towards Robust Multimedia Recommender System (AMR)](cornac/models/amr), [paper](https://ieeexplore.ieee.org/document/8618394) | [requirements.txt](cornac/models/amr/requirements.txt) | [amr_clothing.py](examples/amr_clothing.py) +| | [Hybrid neural recommendation with joint deep representation learning of ratings and reviews (HRDR)](cornac/models/hrdr), [paper](https://www.sciencedirect.com/science/article/abs/pii/S0925231219313207) | [requirements.txt](cornac/models/hrdr/requirements.txt) | [hrdr_example.py](examples/hrdr_example.py) | 2019 | [Embarrassingly Shallow Autoencoders for Sparse Data (EASEá´¿)](cornac/models/ease), [paper](https://arxiv.org/pdf/1905.03375.pdf) | N/A | [ease_movielens.py](examples/ease_movielens.py) | 2018 | [Collaborative Context Poisson Factorization (C2PF)](cornac/models/c2pf), [paper](https://www.ijcai.org/proceedings/2018/0370.pdf) | N/A | [c2pf_exp.py](examples/c2pf_example.py) | | [Multi-Task Explainable Recommendation (MTER)](cornac/models/mter), [paper](https://arxiv.org/pdf/1806.03568.pdf) | N/A | [mter_exp.py](examples/mter_example.py) diff --git a/cornac/models/__init__.py b/cornac/models/__init__.py index 63e7315e7..94634c843 100644 --- a/cornac/models/__init__.py +++ b/cornac/models/__init__.py @@ -36,6 +36,7 @@ from .global_avg import GlobalAvg from .hft import HFT from .hpf import HPF +from .hrdr import HRDR from .ibpr import IBPR from .knn import ItemKNN from .knn import UserKNN diff --git a/cornac/models/hrdr/__init__.py b/cornac/models/hrdr/__init__.py new file mode 100644 index 000000000..750eb4b90 --- /dev/null +++ b/cornac/models/hrdr/__init__.py @@ -0,0 +1 @@ +from .recom_hrdr import HRDR diff --git a/cornac/models/hrdr/hrdr.py b/cornac/models/hrdr/hrdr.py new file mode 100644 index 000000000..f57960bc9 --- /dev/null +++ b/cornac/models/hrdr/hrdr.py @@ -0,0 +1,239 @@ +import numpy as np +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers, initializers +from tensorflow.python.keras.preprocessing.sequence import pad_sequences + +from ...utils import get_rng +from ...utils.init_utils import uniform +from ..narre.narre import TextProcessor, AddGlobalBias + + +def get_data(batch_ids, train_set, max_text_length, by="user", max_num_review=32): + batch_reviews, batch_num_reviews = [], [] + review_group = ( + train_set.review_text.user_review + if by == "user" + else train_set.review_text.item_review + ) + for idx in batch_ids: + review_ids = [] + for inc, (jdx, review_idx) in enumerate(review_group[idx].items()): + if max_num_review is not None and inc == max_num_review: + break + review_ids.append(review_idx) + reviews = train_set.review_text.batch_seq( + review_ids, max_length=max_text_length + ) + batch_reviews.append(reviews) + batch_num_reviews.append(len(reviews)) + batch_reviews = pad_sequences(batch_reviews, maxlen=max_num_review, padding="post") + batch_num_reviews = np.array(batch_num_reviews).astype(np.int32) + batch_ratings = ( + np.zeros((len(batch_ids), train_set.num_items), dtype=np.float32) + if by == "user" + else np.zeros((len(batch_ids), train_set.num_users), dtype=np.float32) + ) + rating_group = train_set.user_data if by == "user" else train_set.item_data + for batch_inc, idx in enumerate(batch_ids): + jds, ratings = rating_group[idx] + for jdx, rating in zip(jds, ratings): + batch_ratings[batch_inc, jdx] = rating + return batch_reviews, batch_num_reviews, batch_ratings + +class Model(keras.Model): + def __init__(self, n_users, n_items, n_vocab, global_mean, embedding_matrix, + n_factors=32, embedding_size=100, id_embedding_size=32, + attention_size=16, kernel_sizes=[3], n_filters=64, + n_user_mlp_factors=128, n_item_mlp_factors=128, + dropout_rate=0.5, max_text_length=50): + super().__init__() + self.l_user_review_embedding = layers.Embedding(n_vocab, embedding_size, embeddings_initializer=embedding_matrix, mask_zero=True, name="user_review_embedding") + self.l_item_review_embedding = layers.Embedding(n_vocab, embedding_size, embeddings_initializer=embedding_matrix, mask_zero=True, name="item_review_embedding") + self.l_user_embedding = layers.Embedding(n_users, id_embedding_size, embeddings_initializer="uniform", name="user_embedding") + self.l_item_embedding = layers.Embedding(n_items, id_embedding_size, embeddings_initializer="uniform", name="item_embedding") + self.user_bias = layers.Embedding(n_users, 1, embeddings_initializer=tf.initializers.Constant(0.1), name="user_bias") + self.item_bias = layers.Embedding(n_items, 1, embeddings_initializer=tf.initializers.Constant(0.1), name="item_bias") + self.user_text_processor = TextProcessor(max_text_length, filters=n_filters, kernel_sizes=kernel_sizes, dropout_rate=dropout_rate, name='user_text_processor') + self.item_text_processor = TextProcessor(max_text_length, filters=n_filters, kernel_sizes=kernel_sizes, dropout_rate=dropout_rate, name='item_text_processor') + + self.l_user_mlp = keras.models.Sequential([ + layers.Dense(n_user_mlp_factors, input_dim=n_items, activation="relu"), + layers.Dense(n_user_mlp_factors // 2, activation="relu"), + layers.Dense(n_filters, activation="relu"), + layers.BatchNormalization(), + ]) + self.l_item_mlp = keras.models.Sequential([ + layers.Dense(n_item_mlp_factors, input_dim=n_users, activation="relu"), + layers.Dense(n_item_mlp_factors // 2, activation="relu"), + layers.Dense(n_filters, activation="relu"), + layers.BatchNormalization(), + ]) + self.a_user = keras.models.Sequential([ + layers.Dense(attention_size, activation="relu", use_bias=True), + layers.Dense(1, activation=None, use_bias=True) + ]) + self.user_attention = layers.Softmax(axis=1, name="user_attention") + self.a_item = keras.models.Sequential([ + layers.Dense(attention_size, activation="relu", use_bias=True), + layers.Dense(1, activation=None, use_bias=True) + ]) + self.item_attention = layers.Softmax(axis=1, name="item_attention") + self.ou_dropout = layers.Dropout(rate=dropout_rate) + self.oi_dropout = layers.Dropout(rate=dropout_rate) + self.ou = layers.Dense(n_factors, use_bias=True, name="ou") + self.oi = layers.Dense(n_factors, use_bias=True, name="oi") + self.W1 = layers.Dense(1, activation=None, use_bias=False, name="W1") + self.add_global_bias = AddGlobalBias(init_value=global_mean, name="global_bias") + + def call(self, inputs, training=False): + i_user_id, i_item_id, i_user_rating, i_user_review, i_user_num_reviews, i_item_rating, i_item_review, i_item_num_reviews = inputs + user_review_h = self.user_text_processor(self.l_user_review_embedding(i_user_review), training=training) + item_review_h = self.item_text_processor(self.l_item_review_embedding(i_item_review), training=training) + user_rating_h = self.l_user_mlp(i_user_rating) + item_rating_h = self.l_item_mlp(i_item_rating) + a_user = self.a_user( + tf.multiply( + user_review_h, + tf.expand_dims(user_rating_h, 1) + ) + ) + a_user_masking = tf.expand_dims(tf.sequence_mask(tf.reshape(i_user_num_reviews, [-1]), maxlen=i_user_review.shape[1]), -1) + user_attention = self.user_attention(a_user, a_user_masking) + a_item = self.a_item( + tf.multiply( + item_review_h, + tf.expand_dims(item_rating_h, 1) + ) + ) + a_item_masking = tf.expand_dims(tf.sequence_mask(tf.reshape(i_item_num_reviews, [-1]), maxlen=i_item_review.shape[1]), -1) + item_attention = self.item_attention(a_item, a_item_masking) + ou = tf.multiply(user_attention, user_review_h) + ou = tf.reduce_sum(ou, 1) + if training: + ou = self.ou_dropout(ou, training=training) + ou = self.ou(ou) + oi = tf.multiply(item_attention, item_review_h) + oi = tf.reduce_sum(oi, 1) + if training: + oi = self.oi_dropout(oi, training=training) + oi = self.oi(oi) + pu = tf.concat([ + user_rating_h, + ou, + self.l_user_embedding(i_user_id) + ], axis=-1) + qi = tf.concat([ + item_rating_h, + oi, + self.l_item_embedding(i_item_id) + ], axis=-1) + h0 = tf.multiply(pu, qi) + r = self.add_global_bias( + tf.add_n([ + self.W1(h0), + self.user_bias(i_user_id), + self.item_bias(i_item_id) + ]) + ) + return r + +class HRDRModel: + def __init__(self, n_users, n_items, vocab, global_mean, + n_factors=32, embedding_size=100, id_embedding_size=32, + attention_size=16, kernel_sizes=[3], n_filters=64, + n_user_mlp_factors=128, n_item_mlp_factors=128, + dropout_rate=0.5, max_text_length=50, max_num_review=32, + pretrained_word_embeddings=None, verbose=False, seed=None): + self.n_users = n_users + self.n_items = n_items + self.n_vocab = vocab.size + self.global_mean = global_mean + self.n_factors = n_factors + self.embedding_size = embedding_size + self.id_embedding_size = id_embedding_size + self.attention_size = attention_size + self.kernel_sizes = kernel_sizes + self.n_filters = n_filters + self.n_user_mlp_factors = n_user_mlp_factors + self.n_item_mlp_factors = n_item_mlp_factors + self.dropout_rate = dropout_rate + self.max_text_length = max_text_length + self.max_num_review = max_num_review + self.verbose = verbose + if seed is not None: + self.rng = get_rng(seed) + tf.random.set_seed(seed) + + embedding_matrix = uniform(shape=(self.n_vocab, self.embedding_size), low=-0.5, high=0.5, random_state=self.rng) + embedding_matrix[:4, :] = np.zeros((4, self.embedding_size)) + if pretrained_word_embeddings is not None: + oov_count = 0 + for word, idx in vocab.tok2idx.items(): + embedding_vector = pretrained_word_embeddings.get(word) + if embedding_vector is not None: + embedding_matrix[idx] = embedding_vector + else: + oov_count += 1 + if self.verbose: + print("Number of OOV words: %d" % oov_count) + + embedding_matrix = initializers.Constant(embedding_matrix) + self.graph = Model( + self.n_users, self.n_items, self.n_vocab, self.global_mean, embedding_matrix, + self.n_factors, self.embedding_size, self.id_embedding_size, + self.attention_size, self.kernel_sizes, self.n_filters, + self.n_user_mlp_factors, self.n_item_mlp_factors, + self.dropout_rate, self.max_text_length + ) + + def get_weights(self, train_set, batch_size=64): + P = np.zeros((self.n_users, self.n_filters + self.n_factors + self.id_embedding_size)) + Q = np.zeros((self.n_items, self.n_filters + self.n_factors + self.id_embedding_size)) + A = np.zeros((self.n_items, self.max_num_review)) + for batch_users in train_set.user_iter(batch_size, shuffle=False): + i_user_review, i_user_num_reviews, i_user_rating = get_data(batch_users, train_set, self.max_text_length, by='user', max_num_review=self.max_num_review) + user_review_embedding = self.graph.l_user_review_embedding(i_user_review) + user_review_h = self.graph.user_text_processor(user_review_embedding, training=False) + user_rating_h = self.graph.l_user_mlp(i_user_rating) + a_user = self.graph.a_user( + tf.multiply( + user_review_h, + tf.expand_dims(user_rating_h, 1) + ) + ) + a_user_masking = tf.expand_dims(tf.sequence_mask(tf.reshape(i_user_num_reviews, [-1]), maxlen=i_user_review.shape[1]), -1) + user_attention = self.graph.user_attention(a_user, a_user_masking) + ou = self.graph.ou(tf.reduce_sum(tf.multiply(user_attention, user_review_h), 1)) + pu = tf.concat([ + user_rating_h, + ou, + self.graph.l_user_embedding(batch_users) + ], axis=-1) + P[batch_users] = pu.numpy() + for batch_items in train_set.item_iter(batch_size, shuffle=False): + i_item_review, i_item_num_reviews, i_item_rating = get_data(batch_items, train_set, self.max_text_length, by='item', max_num_review=self.max_num_review) + item_review_embedding = self.graph.l_item_review_embedding(i_item_review) + item_review_h = self.graph.item_text_processor(item_review_embedding, training=False) + item_rating_h = self.graph.l_item_mlp(i_item_rating) + a_item = self.graph.a_item( + tf.multiply( + item_review_h, + tf.expand_dims(item_rating_h, 1) + ) + ) + a_item_masking = tf.expand_dims(tf.sequence_mask(tf.reshape(i_item_num_reviews, [-1]), maxlen=i_item_review.shape[1]), -1) + item_attention = self.graph.item_attention(a_item, a_item_masking) + oi = self.graph.oi(tf.reduce_sum(tf.multiply(item_attention, item_review_h), 1)) + qi = tf.concat([ + item_rating_h, + oi, + self.graph.l_item_embedding(batch_items) + ], axis=-1) + Q[batch_items] = qi.numpy() + A[batch_items, :item_attention.shape[1]] = item_attention.numpy().reshape(item_attention.shape[:2]) + W1 = self.graph.W1.get_weights()[0] + bu = self.graph.user_bias.get_weights()[0] + bi = self.graph.item_bias.get_weights()[0] + mu = self.graph.add_global_bias.get_weights()[0][0] + return P, Q, W1, bu, bi, mu, A diff --git a/cornac/models/hrdr/recom_hrdr.py b/cornac/models/hrdr/recom_hrdr.py new file mode 100644 index 000000000..52369e850 --- /dev/null +++ b/cornac/models/hrdr/recom_hrdr.py @@ -0,0 +1,340 @@ +# Copyright 2018 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import pickle +from tqdm.auto import trange + +from ..recommender import Recommender +from ...exception import ScoreException + + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + +class HRDR(Recommender): + """ + + Parameters + ---------- + name: string, default: 'HRDR' + The name of the recommender model. + + embedding_size: int, default: 100 + Word embedding size + + n_factors: int, default: 32 + The dimension of the user/item's latent factors. + + attention_size: int, default: 16 + Attention size + + kernel_sizes: list, default: [3] + List of kernel sizes of conv2d + + n_filters: int, default: 64 + Number of filters + + n_user_mlp_factors: int, default: 128 + Number of latent dimension of the first layer of a 3-layer MLP following by batch normalization on user net to represent user rating. + + n_item_mlp_factors: int, default: 128 + Number of latent dimension of the first layer of a 3-layer MLP following by batch normalization on item net to represent item rating. + + dropout_rate: float, default: 0.5 + Dropout rate of neural network dense layers + + max_text_length: int, default: 50 + Maximum number of tokens in a review instance + + max_num_review: int, default: 32 + Maximum number of reviews that you want to feed into training. By default, the model will be trained with all reviews. + + batch_size: int, default: 64 + Batch size + + max_iter: int, default: 20 + Max number of training epochs + + optimizer: string, optional, default: 'adam' + Optimizer for training is either 'adam' or 'rmsprop'. + + learning_rate: float, optional, default: 0.001 + Initial value of learning rate for the optimizer. + + trainable: boolean, optional, default: True + When False, the model will not be re-trained, and input of pre-trained parameters are required. + + verbose: boolean, optional, default: True + When True, running logs are displayed. + + init_params: dictionary, optional, default: None + Initial parameters, pretrained_word_embeddings could be initialized here, e.g., init_params={'pretrained_word_embeddings': pretrained_word_embeddings} + + seed: int, optional, default: None + Random seed for weight initialization. + If specified, training will take longer because of single-thread (no parallelization). + + References + ---------- + Liu, H., Wang, Y., Peng, Q., Wu, F., Gan, L., Pan, L., & Jiao, P. (2020). Hybrid neural recommendation with joint deep representation learning of ratings and reviews. Neurocomputing, 374, 77-85. + """ + + def __init__( + self, + name="HRDR", + embedding_size=100, + id_embedding_size=32, + n_factors=32, + attention_size=16, + kernel_sizes=[3], + n_filters=64, + n_user_mlp_factors=128, + n_item_mlp_factors=128, + dropout_rate=0.5, + max_text_length=50, + max_num_review=32, + batch_size=64, + max_iter=20, + optimizer='adam', + learning_rate=0.001, + model_selection='last', # last or best + user_based=True, + trainable=True, + verbose=True, + init_params=None, + seed=None, + ): + super().__init__(name=name, trainable=trainable, verbose=verbose) + self.seed = seed + self.embedding_size = embedding_size + self.id_embedding_size = id_embedding_size + self.n_factors = n_factors + self.attention_size = attention_size + self.n_filters = n_filters + self.kernel_sizes = kernel_sizes + self.n_user_mlp_factors = n_user_mlp_factors + self.n_item_mlp_factors = n_item_mlp_factors + self.dropout_rate = dropout_rate + self.max_text_length = max_text_length + self.max_num_review = max_num_review + self.batch_size = batch_size + self.max_iter = max_iter + self.optimizer = optimizer + self.learning_rate = learning_rate + self.model_selection = model_selection + self.user_based = user_based + # Init params if provided + self.init_params = {} if init_params is None else init_params + self.losses = {"train_losses": [], "val_losses": []} + + def fit(self, train_set, val_set=None): + """Fit the model to observations. + + Parameters + ---------- + train_set: :obj:`cornac.data.Dataset`, required + User-Item preference data as well as additional modalities. + + val_set: :obj:`cornac.data.Dataset`, optional, default: None + User-Item preference data for model selection purposes (e.g., early stopping). + + Returns + ------- + self : object + """ + Recommender.fit(self, train_set, val_set) + + if self.trainable: + if not hasattr(self, "model"): + from .hrdr import HRDRModel + self.model = HRDRModel( + self.train_set.num_users, + self.train_set.num_items, + self.train_set.review_text.vocab, + self.train_set.global_mean, + n_factors=self.n_factors, + embedding_size=self.embedding_size, + id_embedding_size=self.id_embedding_size, + attention_size=self.attention_size, + kernel_sizes=self.kernel_sizes, + n_filters=self.n_filters, + n_user_mlp_factors=self.n_user_mlp_factors, + n_item_mlp_factors=self.n_item_mlp_factors, + dropout_rate=self.dropout_rate, + max_text_length=self.max_text_length, + max_num_review=self.max_num_review, + pretrained_word_embeddings=self.init_params.get('pretrained_word_embeddings'), + verbose=self.verbose, + seed=self.seed, + ) + self._fit() + + return self + + def _fit(self): + import tensorflow as tf + from tensorflow import keras + from .hrdr import get_data + from ...eval_methods.base_method import rating_eval + from ...metrics import MSE + if not hasattr(self, '_optimizer'): + from tensorflow import keras + if self.optimizer == 'rmsprop': + self._optimizer = keras.optimizers.RMSprop(learning_rate=self.learning_rate) + else: + self._optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate) + loss = keras.losses.MeanSquaredError() + train_loss = keras.metrics.Mean(name="loss") + val_loss = float('inf') + best_val_loss = float('inf') + self.best_epoch = None + loop = trange(self.max_iter, disable=not self.verbose, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') + for i_epoch, _ in enumerate(loop): + train_loss.reset_states() + for i, (batch_users, batch_items, batch_ratings) in enumerate(self.train_set.uir_iter(self.batch_size, shuffle=True)): + user_reviews, user_num_reviews, user_ratings = get_data(batch_users, self.train_set, self.max_text_length, by='user', max_num_review=self.max_num_review) + item_reviews, item_num_reviews, item_ratings = get_data(batch_items, self.train_set, self.max_text_length, by='item', max_num_review=self.max_num_review) + with tf.GradientTape() as tape: + predictions = self.model.graph( + [batch_users, batch_items, user_ratings, user_reviews, user_num_reviews, item_ratings, item_reviews, item_num_reviews], + training=True, + ) + _loss = loss(batch_ratings, predictions) + gradients = tape.gradient(_loss, self.model.graph.trainable_variables) + self._optimizer.apply_gradients(zip(gradients, self.model.graph.trainable_variables)) + train_loss(_loss) + if i % 10 == 0: + loop.set_postfix(loss=train_loss.result().numpy(), val_loss=val_loss, best_val_loss=best_val_loss, best_epoch=self.best_epoch) + current_weights = self.model.get_weights(self.train_set, self.batch_size) + if self.val_set is not None: + self.P, self.Q, self.W1, self.bu, self.bi, self.mu, self.A = current_weights + [current_val_mse], _ = rating_eval( + model=self, + metrics=[MSE()], + test_set=self.val_set, + user_based=self.user_based + ) + val_loss = current_val_mse + if best_val_loss > val_loss: + best_val_loss = val_loss + self.best_epoch = i_epoch + 1 + best_weights = current_weights + loop.set_postfix(loss=train_loss.result().numpy(), val_loss=val_loss, best_val_loss=best_val_loss, best_epoch=self.best_epoch) + self.losses["train_losses"].append(train_loss.result().numpy()) + self.losses["val_losses"].append(val_loss) + loop.close() + + # save weights for predictions + self.P, self.Q, self.W1, self.bu, self.bi, self.mu, self.A = best_weights if self.val_set is not None and self.model_selection == 'best' else current_weights + if self.verbose: + print("Learning completed!") + + def save(self, save_dir=None): + """Save a recommender model to the filesystem. + + Parameters + ---------- + save_dir: str, default: None + Path to a directory for the model to be stored. + + """ + if save_dir is None: + return + graph = self.model.graph + del self.model.graph + _optimizer = self._optimizer + del self._optimizer + model_file = Recommender.save(self, save_dir) + + self._optimizer = _optimizer + self.model.graph = graph + self.model.graph.save(model_file.replace(".pkl", ".cpt")) + with open(model_file.replace(".pkl", ".opt"), 'wb') as f: + pickle.dump(self._optimizer.get_weights(), f) + return model_file + + @staticmethod + def load(model_path, trainable=False): + """Load a recommender model from the filesystem. + + Parameters + ---------- + model_path: str, required + Path to a file or directory where the model is stored. If a directory is + provided, the latest model will be loaded. + + trainable: boolean, optional, default: False + Set it to True if you would like to finetune the model. By default, + the model parameters are assumed to be fixed after being loaded. + + Returns + ------- + self : object + """ + import tensorflow as tf + from tensorflow import keras + import absl.logging + absl.logging.set_verbosity(absl.logging.ERROR) + + model = Recommender.load(model_path, trainable) + model.model.graph = keras.models.load_model(model.load_from.replace(".pkl", ".cpt"), compile=False) + if model.optimizer == 'rmsprop': + model._optimizer = keras.optimizers.RMSprop(learning_rate=model.learning_rate) + else: + model._optimizer = keras.optimizers.Adam(learning_rate=model.learning_rate) + zero_grads = [tf.zeros_like(w) for w in model.model.graph.trainable_variables] + model._optimizer.apply_gradients(zip(zero_grads, model.model.graph.trainable_variables)) + with open(model.load_from.replace(".pkl", ".opt"), 'rb') as f: + optimizer_weights = pickle.load(f) + model._optimizer.set_weights(optimizer_weights) + + return model + + def score(self, user_idx, item_idx=None): + """Predict the scores/ratings of a user for an item. + + Parameters + ---------- + user_idx: int, required + The index of the user for whom to perform score prediction. + + item_idx: int, optional, default: None + The index of the item for that to perform score prediction. + If None, scores for all known items will be returned. + + Returns + ------- + res : A scalar or a Numpy array + Relative scores that the user gives to the item or to all known items + """ + if item_idx is None: + if self.train_set.is_unk_user(user_idx): + raise ScoreException( + "Can't make score prediction for (user_id=%d)" % user_idx + ) + h0 = self.P[user_idx] * self.Q + known_item_scores = h0.dot(self.W1) + self.bu[user_idx] + self.bi + self.mu + return known_item_scores.ravel() + else: + if self.train_set.is_unk_user(user_idx) or self.train_set.is_unk_item( + item_idx + ): + raise ScoreException( + "Can't make score prediction for (user_id=%d, item_id=%d)" + % (user_idx, item_idx) + ) + h0 = self.P[user_idx] * self.Q[item_idx] + known_item_score = h0.dot(self.W1) + self.bu[user_idx] + self.bi[item_idx] + self.mu + return known_item_score diff --git a/cornac/models/hrdr/requirements.txt b/cornac/models/hrdr/requirements.txt new file mode 100644 index 000000000..14e2508bc --- /dev/null +++ b/cornac/models/hrdr/requirements.txt @@ -0,0 +1 @@ +tensorflow==2.6.0 diff --git a/docs/source/models.rst b/docs/source/models.rst index eb9665b89..7796c6ac5 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -48,6 +48,11 @@ Multi-Task Explainable Recommendation (MTER) .. automodule:: cornac.models.mter.recom_mter :members: +Hybrid neural recommendation with joint deep representation learning of ratings and reviews (HRDR) +--------------------------------------------------------------------------- +.. automodule:: cornac.models.hrdr.recom_hrdr + :members: + Neural Attention Rating Regression with Review-level Explanations (NARRE) --------------------------------------------------------------------------- .. automodule:: cornac.models.narre.recom_narre diff --git a/examples/hrdr_example.py b/examples/hrdr_example.py new file mode 100644 index 000000000..f72019b65 --- /dev/null +++ b/examples/hrdr_example.py @@ -0,0 +1,67 @@ +# Copyright 2018 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import cornac +from cornac.data import Reader +from cornac.datasets import amazon_digital_music +from cornac.eval_methods import RatioSplit +from cornac.data import ReviewModality +from cornac.data.text import BaseTokenizer + + +feedback = amazon_digital_music.load_feedback() +reviews = amazon_digital_music.load_review() + + +review_modality = ReviewModality( + data=reviews, + tokenizer=BaseTokenizer(stop_words="english"), + max_vocab=4000, + max_doc_freq=0.5, +) + +ratio_split = RatioSplit( + data=feedback, + test_size=0.1, + val_size=0.1, + exclude_unknowns=True, + review_text=review_modality, + verbose=True, + seed=123, +) + +pretrained_word_embeddings = {} # You can load pretrained word embedding here + +model = cornac.models.HRDR( + embedding_size=100, + id_embedding_size=32, + n_factors=32, + attention_size=16, + kernel_sizes=[3], + n_filters=64, + n_user_mlp_factors=128, + n_item_mlp_factors=128, + dropout_rate=0.5, + max_text_length=50, + batch_size=64, + max_iter=10, + init_params={'pretrained_word_embeddings': pretrained_word_embeddings}, + verbose=True, + seed=123, +) + +cornac.Experiment( + eval_method=ratio_split, models=[model], metrics=[cornac.metrics.RMSE()] +).run()