diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 00000000..ef0dcf8a Binary files /dev/null and b/.DS_Store differ diff --git a/examples/coldStartExample/.DS_Store b/examples/coldStartExample/.DS_Store new file mode 100644 index 00000000..5008ddfc Binary files /dev/null and b/examples/coldStartExample/.DS_Store differ diff --git a/examples/coldStartExample/LightFM Worked example.ipynb b/examples/coldStartExample/LightFM Worked example.ipynb new file mode 100644 index 00000000..8b8e54d6 --- /dev/null +++ b/examples/coldStartExample/LightFM Worked example.ipynb @@ -0,0 +1,1218 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Importing necessary libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/root947/opt/anaconda3/lib/python3.7/site-packages/lightfm/_lightfm_fast.py:9: UserWarning: LightFM was compiled without OpenMP support. Only a single thread will be used.\n", + " warnings.warn('LightFM was compiled without OpenMP support. '\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from lightfm import LightFM" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating dummy datasets " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
useritemr
0u1i10.1
1u1i30.2
2u2i20.1
3u2i30.3
4u3i10.4
5u3i40.5
6u3i20.2
\n", + "
" + ], + "text/plain": [ + " user item r\n", + "0 u1 i1 0.1\n", + "1 u1 i3 0.2\n", + "2 u2 i2 0.1\n", + "3 u2 i3 0.3\n", + "4 u3 i1 0.4\n", + "5 u3 i4 0.5\n", + "6 u3 i2 0.2" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create dummy dataset\n", + "data = {'user': ['u1','u1','u2','u2', 'u3', 'u3', 'u3'], \n", + " 'item': ['i1', 'i3', 'i2', 'i3', 'i1', 'i4', 'i2'], \n", + " 'r': [.1,.2,.1,.3,.4,.5,.2]\n", + " }\n", + "df = pd.DataFrame(data, columns = ['user', 'item', 'r'])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userf1f2f3loc
0u1110del
1u2010mum
2u3111del
\n", + "
" + ], + "text/plain": [ + " user f1 f2 f3 loc\n", + "0 u1 1 1 0 del\n", + "1 u2 0 1 0 mum\n", + "2 u3 1 1 1 del" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#dummy item features\n", + "data = {'user': ['u1','u2','u3'], \n", + " 'f1': [1, 0, 1], \n", + " 'f2': [1, 1, 1],\n", + " 'f3': [0, 0, 1],\n", + " 'loc': ['del', 'mum', 'del']\n", + " }\n", + "features = pd.DataFrame(data, columns = ['user', 'f1', 'f2', 'f3', 'loc'])\n", + "features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating user features" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "f1:1\n", + "f1:0\n", + "f2:1\n", + "f3:0\n", + "f3:1\n", + "loc:del\n", + "loc:mum\n" + ] + } + ], + "source": [ + "uf = []\n", + "col = ['f1']*len(features.f1.unique()) + ['f2']*len(features.f2.unique()) + ['f3']*len(features.f3.unique()) + ['loc']*len(features['loc'].unique())\n", + "unique_f1 = list(features.f1.unique()) + list(features.f2.unique()) + list(features.f3.unique()) + list(features['loc'].unique())\n", + "#print('f1:', unique_f1)\n", + "for x,y in zip(col, unique_f1):\n", + " res = str(x)+ \":\" +str(y)\n", + " uf.append(res)\n", + " print(res)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fitting the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from lightfm.data import Dataset\n", + "# we call fit to supply userid, item id and user/item features\n", + "dataset1 = Dataset()\n", + "dataset1.fit(\n", + " df['user'].unique(), # all the users\n", + " df['item'].unique(), # all the items\n", + " #user_features = ['f1:1', 'f1:0', 'f2:1', 'f2:0', 'f3:1', 'f3:0', 'loc:mum', 'loc:del']\n", + " user_features = uf\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# plugging in the interactions and their weights\n", + "(interactions, weights) = dataset1.build_interactions([(x[0], x[1], x[2]) for x in df.values ])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "matrix([[1, 1, 0, 0],\n", + " [0, 1, 1, 0],\n", + " [1, 0, 1, 1]], dtype=int32)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "interactions.todense()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "matrix([[0.1, 0.2, 0. , 0. ],\n", + " [0. , 0.3, 0.1, 0. ],\n", + " [0.4, 0. , 0.2, 0.5]], dtype=float32)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weights.todense()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Building user features" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def feature_colon_value(my_list):\n", + " \"\"\"\n", + " Takes as input a list and prepends the columns names to respective values in the list.\n", + " For example: if my_list = [1,1,0,'del'],\n", + " resultant output = ['f1:1', 'f2:1', 'f3:0', 'loc:del']\n", + " \n", + " \"\"\"\n", + " result = []\n", + " ll = ['f1:','f2:', 'f3:', 'loc:']\n", + " aa = my_list\n", + " for x,y in zip(ll,aa):\n", + " res = str(x) +\"\"+ str(y)\n", + " result.append(res)\n", + " return result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['f1:1', 'f2:1', 'f3:0', 'loc:del']\n", + "['f1:0', 'f2:1', 'f3:0', 'loc:mum']\n", + "['f1:1', 'f2:1', 'f3:1', 'loc:del']\n", + "Final output: [['f1:1', 'f2:1', 'f3:0', 'loc:del'], ['f1:0', 'f2:1', 'f3:0', 'loc:mum'], ['f1:1', 'f2:1', 'f3:1', 'loc:del']]\n" + ] + } + ], + "source": [ + "ad_subset = features[[\"f1\", 'f2','f3', 'loc']] \n", + "ad_list = [list(x) for x in ad_subset.values]\n", + "feature_list = []\n", + "for item in ad_list:\n", + " feature_list.append(feature_colon_value(item))\n", + " print(feature_colon_value(item))\n", + "print(f'Final output: {feature_list}') " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('u1', ['f1:1', 'f2:1', 'f3:0', 'loc:del']),\n", + " ('u2', ['f1:0', 'f2:1', 'f3:0', 'loc:mum']),\n", + " ('u3', ['f1:1', 'f2:1', 'f3:1', 'loc:del'])]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_tuple = list(zip(features.user, feature_list))\n", + "user_tuple\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],\n", + " [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],\n", + " [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_features = dataset1.build_user_features(user_tuple, normalize= False)\n", + "user_features.todense()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],\n", + " [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],\n", + " [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_features.todense() # rows are the users and columns are the user features : total 10 features. WHY 10 see below\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'u1': 0, 'u2': 1, 'u3': 2},\n", + " {'u1': 0,\n", + " 'u2': 1,\n", + " 'u3': 2,\n", + " 'f1:1': 3,\n", + " 'f1:0': 4,\n", + " 'f2:1': 5,\n", + " 'f3:0': 6,\n", + " 'f3:1': 7,\n", + " 'loc:del': 8,\n", + " 'loc:mum': 9},\n", + " {'i1': 0, 'i3': 1, 'i2': 2, 'i4': 3},\n", + " {'i1': 0, 'i3': 1, 'i2': 2, 'i4': 3})" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_id_map, user_feature_map, item_id_map, item_feature_map = dataset1.mapping()\n", + "dataset1.mapping()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'u1': 0,\n", + " 'u2': 1,\n", + " 'u3': 2,\n", + " 'f1:1': 3,\n", + " 'f1:0': 4,\n", + " 'f2:1': 5,\n", + " 'f3:0': 6,\n", + " 'f3:1': 7,\n", + " 'loc:del': 8,\n", + " 'loc:mum': 9}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_feature_map\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training the model" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "model = LightFM(loss='warp')\n", + "model.fit(interactions, # spase matrix representing whether user u and item i interacted\n", + " user_features= user_features, # we have built the sparse matrix above\n", + " sample_weight= weights, # spase matrix representing how much value to give to user u and item i inetraction: i.e ratings\n", + " epochs=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluating the model" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hybrid training set AUC: 0.6111111\n" + ] + } + ], + "source": [ + "from lightfm.evaluation import auc_score\n", + "train_auc = auc_score(model,\n", + " interactions,\n", + " user_features=user_features\n", + " ).mean()\n", + "print('Hybrid training set AUC: %s' % train_auc)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prediction for KNOWN user" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-0.0981927 , -0.04535889, -0.01995798, -0.20764589])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "# predict for existing user\n", + "user_x = user_id_map['u3']\n", + "n_users, n_items = interactions.shape # no of users * no of items\n", + "model.predict(user_x, np.arange(n_items)) # means predict for all\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prediction for NEW user" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "# predict for new user\n", + "user_feature_list = ['f1:1', 'f2:1', 'f3:0', 'loc:del']" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy import sparse\n", + "def format_newuser_input(user_feature_map, user_feature_list):\n", + " #user_feature_map = user_feature_map \n", + " num_features = len(user_feature_list)\n", + " normalised_val = 1.0 \n", + " target_indices = []\n", + " for feature in user_feature_list:\n", + " try:\n", + " target_indices.append(user_feature_map[feature])\n", + " except KeyError:\n", + " print(\"new user feature encountered '{}'\".format(feature))\n", + " pass\n", + " #print(\"target indices: {}\".format(target_indices))\n", + " new_user_features = np.zeros(len(user_feature_map.keys()))\n", + " for i in target_indices:\n", + " new_user_features[i] = normalised_val\n", + " new_user_features = sparse.csr_matrix(new_user_features)\n", + " return(new_user_features)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "new_user_features = format_newuser_input(user_feature_map, user_feature_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "matrix([[0., 0., 0., 1., 0., 1., 1., 0., 1., 0.]])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_user_features.todense()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-1.64527702, -1.36543167, -1.10002422, -1.63429642])" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "model.predict(0, np.arange(n_items), user_features=new_user_features) # Here 0 means pick the first row of the user_features sparse matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "matrix([[3., 0., 0., 0.],\n", + " [0., 1., 0., 0.],\n", + " [3., 0., 0., 2.]], dtype=float32)" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict_rank(test, user_features = user_features).todense()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you notice, for user 0, i.e. first row, the rank for item 0 is 3, thats because it has the lowest predict score in output [78]. " + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "matrix([[1., 0., 0., 0.],\n", + " [0., 1., 0., 0.],\n", + " [1., 0., 0., 1.]], dtype=float32)" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test.todense()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the above matrix, whereever there is a 1, only those interactions (u,i) pair will hv a ranking." + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " (0, 0)\t1.0\n", + " (2, 3)\t1.0\n", + " (2, 0)\t1.0\n", + " (1, 1)\t1.0\n" + ] + } + ], + "source": [ + "print(test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-1.86071086, -1.56120646, -1.30620074, -1.85903156])" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(0, np.arange(n_items), user_features = user_features)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-1.63636696, -1.35404575, -1.03478122, -1.60591865])" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(1, np.arange(n_items), user_features = user_features)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-1.79751194, -1.5171963 , -1.21272945, -1.77229905])" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(2, np.arange(n_items), user_features = user_features)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<3x4 sparse matrix of type ''\n", + "\twith 4 stored elements in Compressed Sparse Row format>" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict_rank(test,user_features = user_features)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1., 3., 4., 2.])" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import scipy.stats as ss\n", + "\n", + "res = model.predict(0, np.arange(n_items), user_features=new_user_features)\n", + "\n", + "ss.rankdata(res)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-1.64527702, -1.36543167, -1.10002422, -1.63429642])" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Higher the rank the better" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],\n", + " [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],\n", + " [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_features.todense()" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userepisodeprogressr_est_SVDr_est_KNNrank_lightfm
0cGILP7WVa4XwVILmjxp0ZVI7gfC37d69c61b-2937-4f1e-a1f3-dd53ba86313098.33999.000000999.000000999.000000
1cGILP7WVa4XwVILmjxp0ZVI7gfC3f1be1f0b-6359-41a6-b921-c5a6307f90bb98.3392.79478494.2456620.328915
2cGILP7WVa4XwVILmjxp0ZVI7gfC3949243f9-09eb-4110-950c-f9a15182c6b798.33999.000000999.000000999.000000
3cGILP7WVa4XwVILmjxp0ZVI7gfC3tag:soundcloud,2010:tracks/77277973998.3392.62437594.2456620.329188
4cGILP7WVa4XwVILmjxp0ZVI7gfC32890ec96-8327-11ea-a096-b747bcb91e9598.3392.53192894.2456620.329029
52IU3mIjL5uNgOnrEM128PEPgPLz1aca62eec-c140-415e-8d27-e06e808de84a94.2393.75217793.9769730.328142
62IU3mIjL5uNgOnrEM128PEPgPLz1cc344c6e-84d3-11ea-bddc-af6bd1d75ec394.2393.46606293.9769730.329994
7lAKy9OUT97RaTqJ7XicGHDUDouT2516c3808-aaa7-11ea-88c6-43ea29ee499199.9894.414046999.0000000.328714
8lAKy9OUT97RaTqJ7XicGHDUDouT2ff99c9f6-948e-11ea-b40e-b79c4737433699.9893.858811999.0000000.322251
\n", + "
" + ], + "text/plain": [ + " user episode \\\n", + "0 cGILP7WVa4XwVILmjxp0ZVI7gfC3 7d69c61b-2937-4f1e-a1f3-dd53ba863130 \n", + "1 cGILP7WVa4XwVILmjxp0ZVI7gfC3 f1be1f0b-6359-41a6-b921-c5a6307f90bb \n", + "2 cGILP7WVa4XwVILmjxp0ZVI7gfC3 949243f9-09eb-4110-950c-f9a15182c6b7 \n", + "3 cGILP7WVa4XwVILmjxp0ZVI7gfC3 tag:soundcloud,2010:tracks/772779739 \n", + "4 cGILP7WVa4XwVILmjxp0ZVI7gfC3 2890ec96-8327-11ea-a096-b747bcb91e95 \n", + "5 2IU3mIjL5uNgOnrEM128PEPgPLz1 aca62eec-c140-415e-8d27-e06e808de84a \n", + "6 2IU3mIjL5uNgOnrEM128PEPgPLz1 cc344c6e-84d3-11ea-bddc-af6bd1d75ec3 \n", + "7 lAKy9OUT97RaTqJ7XicGHDUDouT2 516c3808-aaa7-11ea-88c6-43ea29ee4991 \n", + "8 lAKy9OUT97RaTqJ7XicGHDUDouT2 ff99c9f6-948e-11ea-b40e-b79c47374336 \n", + "\n", + " progress r_est_SVD r_est_KNN rank_lightfm \n", + "0 98.33 999.000000 999.000000 999.000000 \n", + "1 98.33 92.794784 94.245662 0.328915 \n", + "2 98.33 999.000000 999.000000 999.000000 \n", + "3 98.33 92.624375 94.245662 0.329188 \n", + "4 98.33 92.531928 94.245662 0.329029 \n", + "5 94.23 93.752177 93.976973 0.328142 \n", + "6 94.23 93.466062 93.976973 0.329994 \n", + "7 99.98 94.414046 999.000000 0.328714 \n", + "8 99.98 93.858811 999.000000 0.322251 " + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from requests import put, get\n", + "import requests\n", + "from pandas.io.json import json_normalize \n", + "\n", + "url = 'http://127.0.0.1:1221/vs'\n", + "params = {'uid': 'BWf6M8RVnhRLbng1VUAdhtCLEG72', 'num1': 12\n", + " }\n", + "response = requests.get(url, params)\n", + "d = response.json()\n", + "json_normalize(d, 'res')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/coldStartExample/README.md b/examples/coldStartExample/README.md new file mode 100755 index 00000000..02c4969d --- /dev/null +++ b/examples/coldStartExample/README.md @@ -0,0 +1,2 @@ +# LightFm_HybridRecommenderSystem +Creating a hybrid recommender system using LightFM. Learn how to tackle the cold start problem. For further clarity, read the associated article published in Towards Data Science: https://towardsdatascience.com/how-i-would-explain-building-lightfm-hybrid-recommenders-to-a-5-year-old-b6ee18571309