diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 00000000..ef0dcf8a
Binary files /dev/null and b/.DS_Store differ
diff --git a/examples/coldStartExample/.DS_Store b/examples/coldStartExample/.DS_Store
new file mode 100644
index 00000000..5008ddfc
Binary files /dev/null and b/examples/coldStartExample/.DS_Store differ
diff --git a/examples/coldStartExample/LightFM Worked example.ipynb b/examples/coldStartExample/LightFM Worked example.ipynb
new file mode 100644
index 00000000..8b8e54d6
--- /dev/null
+++ b/examples/coldStartExample/LightFM Worked example.ipynb
@@ -0,0 +1,1218 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Importing necessary libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/root947/opt/anaconda3/lib/python3.7/site-packages/lightfm/_lightfm_fast.py:9: UserWarning: LightFM was compiled without OpenMP support. Only a single thread will be used.\n",
+ " warnings.warn('LightFM was compiled without OpenMP support. '\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from lightfm import LightFM"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Creating dummy datasets "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " item | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " u1 | \n",
+ " i1 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " u1 | \n",
+ " i3 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " u2 | \n",
+ " i2 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " u2 | \n",
+ " i3 | \n",
+ " 0.3 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " u3 | \n",
+ " i1 | \n",
+ " 0.4 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " u3 | \n",
+ " i4 | \n",
+ " 0.5 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " u3 | \n",
+ " i2 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user item r\n",
+ "0 u1 i1 0.1\n",
+ "1 u1 i3 0.2\n",
+ "2 u2 i2 0.1\n",
+ "3 u2 i3 0.3\n",
+ "4 u3 i1 0.4\n",
+ "5 u3 i4 0.5\n",
+ "6 u3 i2 0.2"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# create dummy dataset\n",
+ "data = {'user': ['u1','u1','u2','u2', 'u3', 'u3', 'u3'], \n",
+ " 'item': ['i1', 'i3', 'i2', 'i3', 'i1', 'i4', 'i2'], \n",
+ " 'r': [.1,.2,.1,.3,.4,.5,.2]\n",
+ " }\n",
+ "df = pd.DataFrame(data, columns = ['user', 'item', 'r'])\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " f1 | \n",
+ " f2 | \n",
+ " f3 | \n",
+ " loc | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " u1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " del | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " u2 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " mum | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " u3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " del | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user f1 f2 f3 loc\n",
+ "0 u1 1 1 0 del\n",
+ "1 u2 0 1 0 mum\n",
+ "2 u3 1 1 1 del"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#dummy item features\n",
+ "data = {'user': ['u1','u2','u3'], \n",
+ " 'f1': [1, 0, 1], \n",
+ " 'f2': [1, 1, 1],\n",
+ " 'f3': [0, 0, 1],\n",
+ " 'loc': ['del', 'mum', 'del']\n",
+ " }\n",
+ "features = pd.DataFrame(data, columns = ['user', 'f1', 'f2', 'f3', 'loc'])\n",
+ "features"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Creating user features"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "f1:1\n",
+ "f1:0\n",
+ "f2:1\n",
+ "f3:0\n",
+ "f3:1\n",
+ "loc:del\n",
+ "loc:mum\n"
+ ]
+ }
+ ],
+ "source": [
+ "uf = []\n",
+ "col = ['f1']*len(features.f1.unique()) + ['f2']*len(features.f2.unique()) + ['f3']*len(features.f3.unique()) + ['loc']*len(features['loc'].unique())\n",
+ "unique_f1 = list(features.f1.unique()) + list(features.f2.unique()) + list(features.f3.unique()) + list(features['loc'].unique())\n",
+ "#print('f1:', unique_f1)\n",
+ "for x,y in zip(col, unique_f1):\n",
+ " res = str(x)+ \":\" +str(y)\n",
+ " uf.append(res)\n",
+ " print(res)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Fitting the dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from lightfm.data import Dataset\n",
+ "# we call fit to supply userid, item id and user/item features\n",
+ "dataset1 = Dataset()\n",
+ "dataset1.fit(\n",
+ " df['user'].unique(), # all the users\n",
+ " df['item'].unique(), # all the items\n",
+ " #user_features = ['f1:1', 'f1:0', 'f2:1', 'f2:0', 'f3:1', 'f3:0', 'loc:mum', 'loc:del']\n",
+ " user_features = uf\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# plugging in the interactions and their weights\n",
+ "(interactions, weights) = dataset1.build_interactions([(x[0], x[1], x[2]) for x in df.values ])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[1, 1, 0, 0],\n",
+ " [0, 1, 1, 0],\n",
+ " [1, 0, 1, 1]], dtype=int32)"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "interactions.todense()\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[0.1, 0.2, 0. , 0. ],\n",
+ " [0. , 0.3, 0.1, 0. ],\n",
+ " [0.4, 0. , 0.2, 0.5]], dtype=float32)"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "weights.todense()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Building user features"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def feature_colon_value(my_list):\n",
+ " \"\"\"\n",
+ " Takes as input a list and prepends the columns names to respective values in the list.\n",
+ " For example: if my_list = [1,1,0,'del'],\n",
+ " resultant output = ['f1:1', 'f2:1', 'f3:0', 'loc:del']\n",
+ " \n",
+ " \"\"\"\n",
+ " result = []\n",
+ " ll = ['f1:','f2:', 'f3:', 'loc:']\n",
+ " aa = my_list\n",
+ " for x,y in zip(ll,aa):\n",
+ " res = str(x) +\"\"+ str(y)\n",
+ " result.append(res)\n",
+ " return result\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['f1:1', 'f2:1', 'f3:0', 'loc:del']\n",
+ "['f1:0', 'f2:1', 'f3:0', 'loc:mum']\n",
+ "['f1:1', 'f2:1', 'f3:1', 'loc:del']\n",
+ "Final output: [['f1:1', 'f2:1', 'f3:0', 'loc:del'], ['f1:0', 'f2:1', 'f3:0', 'loc:mum'], ['f1:1', 'f2:1', 'f3:1', 'loc:del']]\n"
+ ]
+ }
+ ],
+ "source": [
+ "ad_subset = features[[\"f1\", 'f2','f3', 'loc']] \n",
+ "ad_list = [list(x) for x in ad_subset.values]\n",
+ "feature_list = []\n",
+ "for item in ad_list:\n",
+ " feature_list.append(feature_colon_value(item))\n",
+ " print(feature_colon_value(item))\n",
+ "print(f'Final output: {feature_list}') "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[('u1', ['f1:1', 'f2:1', 'f3:0', 'loc:del']),\n",
+ " ('u2', ['f1:0', 'f2:1', 'f3:0', 'loc:mum']),\n",
+ " ('u3', ['f1:1', 'f2:1', 'f3:1', 'loc:del'])]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_tuple = list(zip(features.user, feature_list))\n",
+ "user_tuple\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],\n",
+ " [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],\n",
+ " [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_features = dataset1.build_user_features(user_tuple, normalize= False)\n",
+ "user_features.todense()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],\n",
+ " [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],\n",
+ " [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_features.todense() # rows are the users and columns are the user features : total 10 features. WHY 10 see below\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "({'u1': 0, 'u2': 1, 'u3': 2},\n",
+ " {'u1': 0,\n",
+ " 'u2': 1,\n",
+ " 'u3': 2,\n",
+ " 'f1:1': 3,\n",
+ " 'f1:0': 4,\n",
+ " 'f2:1': 5,\n",
+ " 'f3:0': 6,\n",
+ " 'f3:1': 7,\n",
+ " 'loc:del': 8,\n",
+ " 'loc:mum': 9},\n",
+ " {'i1': 0, 'i3': 1, 'i2': 2, 'i4': 3},\n",
+ " {'i1': 0, 'i3': 1, 'i2': 2, 'i4': 3})"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_id_map, user_feature_map, item_id_map, item_feature_map = dataset1.mapping()\n",
+ "dataset1.mapping()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'u1': 0,\n",
+ " 'u2': 1,\n",
+ " 'u3': 2,\n",
+ " 'f1:1': 3,\n",
+ " 'f1:0': 4,\n",
+ " 'f2:1': 5,\n",
+ " 'f3:0': 6,\n",
+ " 'f3:1': 7,\n",
+ " 'loc:del': 8,\n",
+ " 'loc:mum': 9}"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_feature_map\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Training the model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "model = LightFM(loss='warp')\n",
+ "model.fit(interactions, # spase matrix representing whether user u and item i interacted\n",
+ " user_features= user_features, # we have built the sparse matrix above\n",
+ " sample_weight= weights, # spase matrix representing how much value to give to user u and item i inetraction: i.e ratings\n",
+ " epochs=10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Evaluating the model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Hybrid training set AUC: 0.6111111\n"
+ ]
+ }
+ ],
+ "source": [
+ "from lightfm.evaluation import auc_score\n",
+ "train_auc = auc_score(model,\n",
+ " interactions,\n",
+ " user_features=user_features\n",
+ " ).mean()\n",
+ "print('Hybrid training set AUC: %s' % train_auc)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Prediction for KNOWN user"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([-0.0981927 , -0.04535889, -0.01995798, -0.20764589])"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "# predict for existing user\n",
+ "user_x = user_id_map['u3']\n",
+ "n_users, n_items = interactions.shape # no of users * no of items\n",
+ "model.predict(user_x, np.arange(n_items)) # means predict for all\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Prediction for NEW user"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# predict for new user\n",
+ "user_feature_list = ['f1:1', 'f2:1', 'f3:0', 'loc:del']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from scipy import sparse\n",
+ "def format_newuser_input(user_feature_map, user_feature_list):\n",
+ " #user_feature_map = user_feature_map \n",
+ " num_features = len(user_feature_list)\n",
+ " normalised_val = 1.0 \n",
+ " target_indices = []\n",
+ " for feature in user_feature_list:\n",
+ " try:\n",
+ " target_indices.append(user_feature_map[feature])\n",
+ " except KeyError:\n",
+ " print(\"new user feature encountered '{}'\".format(feature))\n",
+ " pass\n",
+ " #print(\"target indices: {}\".format(target_indices))\n",
+ " new_user_features = np.zeros(len(user_feature_map.keys()))\n",
+ " for i in target_indices:\n",
+ " new_user_features[i] = normalised_val\n",
+ " new_user_features = sparse.csr_matrix(new_user_features)\n",
+ " return(new_user_features)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "new_user_features = format_newuser_input(user_feature_map, user_feature_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[0., 0., 0., 1., 0., 1., 1., 0., 1., 0.]])"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_user_features.todense()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([-1.64527702, -1.36543167, -1.10002422, -1.63429642])"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "model.predict(0, np.arange(n_items), user_features=new_user_features) # Here 0 means pick the first row of the user_features sparse matrix"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[3., 0., 0., 0.],\n",
+ " [0., 1., 0., 0.],\n",
+ " [3., 0., 0., 2.]], dtype=float32)"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.predict_rank(test, user_features = user_features).todense()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you notice, for user 0, i.e. first row, the rank for item 0 is 3, thats because it has the lowest predict score in output [78]. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[1., 0., 0., 0.],\n",
+ " [0., 1., 0., 0.],\n",
+ " [1., 0., 0., 1.]], dtype=float32)"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test.todense()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In the above matrix, whereever there is a 1, only those interactions (u,i) pair will hv a ranking."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " (0, 0)\t1.0\n",
+ " (2, 3)\t1.0\n",
+ " (2, 0)\t1.0\n",
+ " (1, 1)\t1.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([-1.86071086, -1.56120646, -1.30620074, -1.85903156])"
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.predict(0, np.arange(n_items), user_features = user_features)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([-1.63636696, -1.35404575, -1.03478122, -1.60591865])"
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.predict(1, np.arange(n_items), user_features = user_features)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([-1.79751194, -1.5171963 , -1.21272945, -1.77229905])"
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.predict(2, np.arange(n_items), user_features = user_features)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "<3x4 sparse matrix of type ''\n",
+ "\twith 4 stored elements in Compressed Sparse Row format>"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model.predict_rank(test,user_features = user_features)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1., 3., 4., 2.])"
+ ]
+ },
+ "execution_count": 86,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import scipy.stats as ss\n",
+ "\n",
+ "res = model.predict(0, np.arange(n_items), user_features=new_user_features)\n",
+ "\n",
+ "ss.rankdata(res)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([-1.64527702, -1.36543167, -1.10002422, -1.63429642])"
+ ]
+ },
+ "execution_count": 87,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Higher the rank the better"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],\n",
+ " [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],\n",
+ " [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)"
+ ]
+ },
+ "execution_count": 106,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "user_features.todense()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " episode | \n",
+ " progress | \n",
+ " r_est_SVD | \n",
+ " r_est_KNN | \n",
+ " rank_lightfm | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " cGILP7WVa4XwVILmjxp0ZVI7gfC3 | \n",
+ " 7d69c61b-2937-4f1e-a1f3-dd53ba863130 | \n",
+ " 98.33 | \n",
+ " 999.000000 | \n",
+ " 999.000000 | \n",
+ " 999.000000 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " cGILP7WVa4XwVILmjxp0ZVI7gfC3 | \n",
+ " f1be1f0b-6359-41a6-b921-c5a6307f90bb | \n",
+ " 98.33 | \n",
+ " 92.794784 | \n",
+ " 94.245662 | \n",
+ " 0.328915 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " cGILP7WVa4XwVILmjxp0ZVI7gfC3 | \n",
+ " 949243f9-09eb-4110-950c-f9a15182c6b7 | \n",
+ " 98.33 | \n",
+ " 999.000000 | \n",
+ " 999.000000 | \n",
+ " 999.000000 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " cGILP7WVa4XwVILmjxp0ZVI7gfC3 | \n",
+ " tag:soundcloud,2010:tracks/772779739 | \n",
+ " 98.33 | \n",
+ " 92.624375 | \n",
+ " 94.245662 | \n",
+ " 0.329188 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " cGILP7WVa4XwVILmjxp0ZVI7gfC3 | \n",
+ " 2890ec96-8327-11ea-a096-b747bcb91e95 | \n",
+ " 98.33 | \n",
+ " 92.531928 | \n",
+ " 94.245662 | \n",
+ " 0.329029 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 2IU3mIjL5uNgOnrEM128PEPgPLz1 | \n",
+ " aca62eec-c140-415e-8d27-e06e808de84a | \n",
+ " 94.23 | \n",
+ " 93.752177 | \n",
+ " 93.976973 | \n",
+ " 0.328142 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 2IU3mIjL5uNgOnrEM128PEPgPLz1 | \n",
+ " cc344c6e-84d3-11ea-bddc-af6bd1d75ec3 | \n",
+ " 94.23 | \n",
+ " 93.466062 | \n",
+ " 93.976973 | \n",
+ " 0.329994 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " lAKy9OUT97RaTqJ7XicGHDUDouT2 | \n",
+ " 516c3808-aaa7-11ea-88c6-43ea29ee4991 | \n",
+ " 99.98 | \n",
+ " 94.414046 | \n",
+ " 999.000000 | \n",
+ " 0.328714 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " lAKy9OUT97RaTqJ7XicGHDUDouT2 | \n",
+ " ff99c9f6-948e-11ea-b40e-b79c47374336 | \n",
+ " 99.98 | \n",
+ " 93.858811 | \n",
+ " 999.000000 | \n",
+ " 0.322251 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user episode \\\n",
+ "0 cGILP7WVa4XwVILmjxp0ZVI7gfC3 7d69c61b-2937-4f1e-a1f3-dd53ba863130 \n",
+ "1 cGILP7WVa4XwVILmjxp0ZVI7gfC3 f1be1f0b-6359-41a6-b921-c5a6307f90bb \n",
+ "2 cGILP7WVa4XwVILmjxp0ZVI7gfC3 949243f9-09eb-4110-950c-f9a15182c6b7 \n",
+ "3 cGILP7WVa4XwVILmjxp0ZVI7gfC3 tag:soundcloud,2010:tracks/772779739 \n",
+ "4 cGILP7WVa4XwVILmjxp0ZVI7gfC3 2890ec96-8327-11ea-a096-b747bcb91e95 \n",
+ "5 2IU3mIjL5uNgOnrEM128PEPgPLz1 aca62eec-c140-415e-8d27-e06e808de84a \n",
+ "6 2IU3mIjL5uNgOnrEM128PEPgPLz1 cc344c6e-84d3-11ea-bddc-af6bd1d75ec3 \n",
+ "7 lAKy9OUT97RaTqJ7XicGHDUDouT2 516c3808-aaa7-11ea-88c6-43ea29ee4991 \n",
+ "8 lAKy9OUT97RaTqJ7XicGHDUDouT2 ff99c9f6-948e-11ea-b40e-b79c47374336 \n",
+ "\n",
+ " progress r_est_SVD r_est_KNN rank_lightfm \n",
+ "0 98.33 999.000000 999.000000 999.000000 \n",
+ "1 98.33 92.794784 94.245662 0.328915 \n",
+ "2 98.33 999.000000 999.000000 999.000000 \n",
+ "3 98.33 92.624375 94.245662 0.329188 \n",
+ "4 98.33 92.531928 94.245662 0.329029 \n",
+ "5 94.23 93.752177 93.976973 0.328142 \n",
+ "6 94.23 93.466062 93.976973 0.329994 \n",
+ "7 99.98 94.414046 999.000000 0.328714 \n",
+ "8 99.98 93.858811 999.000000 0.322251 "
+ ]
+ },
+ "execution_count": 121,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from requests import put, get\n",
+ "import requests\n",
+ "from pandas.io.json import json_normalize \n",
+ "\n",
+ "url = 'http://127.0.0.1:1221/vs'\n",
+ "params = {'uid': 'BWf6M8RVnhRLbng1VUAdhtCLEG72', 'num1': 12\n",
+ " }\n",
+ "response = requests.get(url, params)\n",
+ "d = response.json()\n",
+ "json_normalize(d, 'res')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/coldStartExample/README.md b/examples/coldStartExample/README.md
new file mode 100755
index 00000000..02c4969d
--- /dev/null
+++ b/examples/coldStartExample/README.md
@@ -0,0 +1,2 @@
+# LightFm_HybridRecommenderSystem
+Creating a hybrid recommender system using LightFM. Learn how to tackle the cold start problem. For further clarity, read the associated article published in Towards Data Science: https://towardsdatascience.com/how-i-would-explain-building-lightfm-hybrid-recommenders-to-a-5-year-old-b6ee18571309