diff --git a/neighborhood_model.ipynb b/neighborhood_model.ipynb
new file mode 100644
index 0000000..5427cdc
--- /dev/null
+++ b/neighborhood_model.ipynb
@@ -0,0 +1,539 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Recommedation System with Neighborhood Model\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load the module neighborhood_model. See neighborhood_model.py\n",
+    "1. Read in the user-song pair data. \n",
+    "2. Tune the hyperparameter for the neighborhood model. \n",
+    "3. Make recommendation based on the model. The recommendation can be made for both user in the read-in data and out.\n",
+    "4. Showcase for the recommnedation system. Randomly select 10 users. Show the songs they have listened before and the songs we recommend to them."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Read in data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from neighborhood_model import *\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy.sparse import coo_matrix\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.preprocessing import normalize\n",
+    "from scipy.sparse import lil_matrix\n",
+    "from scipy.sparse import csr_matrix\n",
+    "from scipy.sparse import load_npz\n",
+    "import scipy.sparse as sp\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "def load_data(filename):\n",
+    "    df = pd.read_table(filename, sep='\\t', names = ['user_id','song_id','playcount'])\n",
+    "    n_users = df.user_id.unique().shape[0] \n",
+    "    n_songs = df.song_id.unique().shape[0]\n",
+    "    ratings = np.zeros((n_users, n_songs))\n",
+    "    df['user_id'] = df['user_id'].astype('category')\n",
+    "    df['song_id'] = df['song_id'].astype('category')\n",
+    "    parsed_matrix = coo_matrix((df['playcount'].astype(float),(df['user_id'].cat.codes, df['song_id'].cat.codes))).tolil()\n",
+    "    return parsed_matrix\n",
+    "\n",
+    "#obtain a subset of the data\n",
+    "def data_sampling(data, num_users,num_songs):\n",
+    "    num_instances, num_features = data.shape[0], data.shape[1]\n",
+    "    #sample users from the data\n",
+    "    sample_user_index = np.random.choice(num_instances, num_users, replace=False)\n",
+    "    #sample songs from the data\n",
+    "    sample_song_index = np.random.choice(num_features, num_songs, replace=False)\n",
+    "    samples = data[sample_user_index,:]\n",
+    "    samples = samples[:, sample_song_index,]\n",
+    "    return samples\n",
+    "\n",
+    "#compute the inverse user frequency of the data reduce weights for commonly occurring songs\n",
+    "def ivf(x):\n",
+    "    num_user = x.shape[0]\n",
+    "    binary_data = x.copy()\n",
+    "    binary_data[x != 0] = 1\n",
+    "    nj = np.asarray(binary_data.sum(axis = 0)).squeeze()\n",
+    "\n",
+    "    fj =np.log(num_user/nj)\n",
+    "    for i in range(num_user):\n",
+    "        x[i,:] = x[i,:].multiply(fj)\n",
+    "    return x\n",
+    "\n",
+    "#produce the inverse user frequency feature if needed, and normalize the data\n",
+    "def prep2(delete_user_song, if_ivf):\n",
+    "    if if_ivf == 1:\n",
+    "    #compute ivf of delete_user_song_ivf\n",
+    "        delete_user_song = ivf(delete_user_song)\n",
+    "    user_song_normalized = lil_matrix(normalize(delete_user_song, axis=1),dtype = np.float64)\n",
+    "    return user_song_normalized\n",
+    "\n",
+    "def train_test_split(data, size):\n",
+    "#data should be ndarray format\n",
+    "    test = np.zeros(data.shape)\n",
+    "    train = data.copy()\n",
+    "    for user in range(data.shape[0]):\n",
+    "        test_index = np.random.choice(data[user, :].nonzero()[0], \n",
+    "                                        size=size, \n",
+    "                                        replace=False)\n",
+    "        train[user, test_index] = 0.\n",
+    "        test[user, test_index] = data[user, test_index]\n",
+    "    # Test and training are truly disjoint\n",
+    "    assert(np.all((train * test) == 0)) \n",
+    "    train = lil_matrix(train,dtype = np.float64)\n",
+    "    test = lil_matrix(test,dtype = np.float64)\n",
+    "    return train, test\n",
+    "\n",
+    "def main_2(num_users, num_songs, thres, rho, user_id, num_recommend, if_ivf = 0):\n",
+    "    user_song_matrix = load_npz('./sparse_matrix.npz')\n",
+    "    raw_data = data_sampling(user_song_matrix, num_users, num_songs)\n",
+    "    data  = prep(raw_data,thres)\n",
+    "    model = NeighborhoodModel(rho)\n",
+    "    model.fit(data)\n",
+    "    #print(model.recommend(user_id,num_recommend))\n",
+    "    return model.evaluate()\n",
+    "\n",
+    "def main(data, train, test, thres, rho, user_id, num_recommend, if_ivf = 0):\n",
+    "    model = NeighborhoodModel(data, train, test)\n",
+    "    model.fit(rho)\n",
+    "    print(model.recommend(user_id,num_recommend))\n",
+    "    print(model.evaluate())\n",
+    "    user_pref = np.zeros((1, data.shape[1]))\n",
+    "    user_pref[0,0], user_pref[0,22], user_pref[0,222], user_pref[0,2222] = 2, 3, 5, 6\n",
+    "    print(model.recommend_ind(user_pref, 3))\n",
+    "    return 1\n",
+    "\n",
+    "def run(train, test, rho):\n",
+    "    model = NeighborhoodModel()\n",
+    "    model.fit(train, test, rho)\n",
+    "    return model.evaluate()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "f = './sample_matrix.npz'\n",
+    "#read in the data produced by preprocessing.py\n",
+    "raw_data = sp.load_npz(f)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Parameter Tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import operator\n",
+    "\n",
+    "def parameter_tuning(raw_data, test_size, rho_range = [1,1.5, 1.999, 2.5, 3]):\n",
+    "    '''\n",
+    "    Tune two parameter: \n",
+    "    if_ivf: whether to use Inverse User Frequency or original data feature\n",
+    "    rho: case amplificatio parameter\n",
+    "    Inputs:\n",
+    "        - raw_data: a sparse matrix of size(#users, #songs)\n",
+    "        - test_size: scalar, percent of obs used for testing\n",
+    "        - rho_range: list of rhos used for parameter tuning\n",
+    "    Output:\n",
+    "        - best_para: tuple of best parameter\n",
+    "    '''\n",
+    "    result = {}\n",
+    "    for if_ivf in [0,1]:\n",
+    "        data = prep2(raw_data.copy(), if_ivf = if_ivf)\n",
+    "        train, test = train_test_split(data.toarray(), test_size)\n",
+    "        for rho in [1,1.5, 1.999, 2.5, 3]:\n",
+    "            result[(rho,if_ivf)] = run(train, test, rho)\n",
+    "\n",
+    "    for key in result:\n",
+    "        print(str(key) + ': ' + str(result[key]))\n",
+    "    best_para = min(result.items(), key=operator.itemgetter(1))[0]\n",
+    "    print('best parameter is rho = ' + str(best_para[0]) + ' if_ivf = ' + str(best_para[1]))\n",
+    "    return best_para"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/hp/anaconda/lib/python3.5/site-packages/scipy/sparse/data.py:111: RuntimeWarning: invalid value encountered in power\n",
+      "  return self._with_data(data ** n)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(2.5, 0): 16.3655613746\n",
+      "(3, 1): 15.7523197962\n",
+      "(2.5, 1): 24.4495429034\n",
+      "(1.5, 0): 16.8063741018\n",
+      "(1.999, 1): 24.3087870156\n",
+      "(1.999, 0): 16.4543514092\n",
+      "(1.5, 1): 24.3728337994\n",
+      "(1, 0): 14.865758076\n",
+      "(3, 0): 13.3545845664\n",
+      "(1, 1): 16.5995817516\n",
+      "best parameter is rho = 3 if_ivf = 0\n"
+     ]
+    }
+   ],
+   "source": [
+    "best_para = parameter_tuning(raw_data, test_size = 20,rho_range = [2.5, 3,4,5,10])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Recommendation system"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def recommend(raw_data, best_para, num_rec, if_outside_user = 0, user_id = None, user_pref = None):\n",
+    "    '''\n",
+    "    Recommend songs using neighborhood models, produce song_ids for given user_id.\n",
+    "    Inputs:\n",
+    "        - raw_data: a sparse matrix of size(#users, #songs)\n",
+    "        - best_para: tuple of best parameter\n",
+    "        - if_outside_user: logical parameter that indicates whether the user is in the read-in data\n",
+    "        - id of the user\n",
+    "        - user_pref: if the user is not in the read-in data, this parameter is a list \n",
+    "        of # of times of each song that the user has listened to (the same format in the read-in data)\n",
+    "    Output:\n",
+    "        - rec_song: a list of song_ids generated by the neighborhood model.\n",
+    "    '''\n",
+    "    rho, if_ivf= 1.5, best_para[1]\n",
+    "    data = prep2(raw_data, if_ivf = if_ivf)\n",
+    "    model = NeighborhoodModel()\n",
+    "    if if_outside_user == 0:\n",
+    "        rec_song = model.recommend(data,user_id = user_id, num_rec = num_rec)\n",
+    "    else:\n",
+    "        \n",
+    "        rec_song = model.recommend_out(data, user_pref, num_rec = num_rec)\n",
+    "    return rec_song"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[6962]"
+      ]
+     },
+     "execution_count": 96,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "recommend(raw_data, best_para, if_outside_user = 0, num_rec = 1, user_id = 23)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Recommendation system showcase"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def top_k_song(raw_data, user_id, k):\n",
+    "    # get the most listened k song ids for the specific user \n",
+    "    song_arr = np.asarray(raw_data[user_id, :].todense()).squeeze()\n",
+    "    topksong = np.argsort(song_arr)[-k:]\n",
+    "    return topksong"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "#select 10 random users\n",
+    "user_ids = np.random.choice(num_instances, 10)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "num_user_rec = 10\n",
+    "rec_dict = {}\n",
+    "read_dict = np.load('song_index_dictionary.npy').item()\n",
+    "\n",
+    "for user_id in user_ids:\n",
+    "    # the list of song ids generated by the recommendation system\n",
+    "    rec_song = np.array([read_dict[x] for x in recommend(raw_data, best_para, if_outside_user = 0, num_rec = 3, user_id = user_id)])\n",
+    "    #the list of  10 song ids that the user listened to most frequently.\n",
+    "    topksong =  np.array([read_dict[x] for x in top_k_song(raw_data, user_id, 10)])\n",
+    "    rec_song_name = []\n",
+    "    topksong_name = []\n",
+    "    # convert song ids to the name of the songs using unique_tracks.txt for both rec_song and topksong\n",
+    "    # store the song name for each user in dictionary rec_dict, key is the user_id, \n",
+    "    # value is a list of song names and artists\n",
+    "    for i in range(rec_song.shape[0]):\n",
+    "        song_id = rec_song[i]\n",
+    "        song_name = None\n",
+    "        searchfile = open(\"unique_tracks.txt\", \"r\")\n",
+    "        for line in searchfile:\n",
+    "            if song_id in line:\n",
+    "                song_arr = line.rsplit('<SEP>', 2)\n",
+    "                song_name = song_arr[2].rstrip()\n",
+    "                artist_name = song_arr[1].rstrip()\n",
+    "        rec_song_name.append((song_name,artist_name))\n",
+    "        searchfile.close()\n",
+    "    for i in range(topksong.shape[0]):\n",
+    "        song_id = topksong[i]\n",
+    "        song_name = None\n",
+    "        searchfile = open(\"unique_tracks.txt\", \"r\")\n",
+    "        for line in searchfile:\n",
+    "            if song_id in line: \n",
+    "                song_arr = line.rsplit('<SEP>', 2)\n",
+    "                song_name = song_arr[2].rstrip()\n",
+    "                artist_name = song_arr[1].rstrip()\n",
+    "        topksong_name.append((song_name,artist_name))\n",
+    "        searchfile.close()\n",
+    "    rec_dict[user_id] = (topksong_name, rec_song_name)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{279: ([('Prête A Porter', 'Paris Combo'),\n",
+       "   ('Invocation: Attica Blues', 'Archie Shepp / William Kunstler'),\n",
+       "   (\"Where You'll Find Me Now\", 'Neutral Milk Hotel'),\n",
+       "   ('Under The Gun', 'The Killers'),\n",
+       "   ('Anthems For a Seventeen Year-Old Girl', 'Broken Social Scene'),\n",
+       "   ('Angry Chair', 'Alice In Chains'),\n",
+       "   ('Would You Go With Me', 'Josh Turner'),\n",
+       "   ('Spilt Needles (Album)', 'The Shins'),\n",
+       "   ('Comet Course', 'Flying Lotus'),\n",
+       "   ('Jeane', 'The Smiths')],\n",
+       "  [('Scream', 'Michael Jackson'),\n",
+       "   ('Window Blues', 'Lykke Li'),\n",
+       "   ('I Believe In A Thing Called Love', 'The Darkness')]),\n",
+       " 854: ([(\"It's My Party\", 'Lesley Gore'),\n",
+       "   ('People', 'Journey'),\n",
+       "   ('When A Man Loves A Woman', 'Percy Sledge'),\n",
+       "   ('Window Blues', 'Lykke Li'),\n",
+       "   ('Ride For You (Album Version)', 'Danity Kane'),\n",
+       "   (\"Un-thinkable (I'm Ready)\", 'Alicia Keys'),\n",
+       "   ('The Wild Boys', 'Duran Duran'),\n",
+       "   ('Secret Hell', 'dEUS'),\n",
+       "   (\"Things I Don't Understand\", 'Coldplay'),\n",
+       "   ('So Glad To See You', 'Hot Chip')],\n",
+       "  [('Jerry Was A Race Car Driver', 'Primus'),\n",
+       "   ('Neon', 'John Mayer'),\n",
+       "   ('More Than Everything', 'Gareth Emery')]),\n",
+       " 986: ([('Ego', 'Beyoncé'),\n",
+       "   ('California One / Youth and Beauty Brigade', 'The Decemberists'),\n",
+       "   ('The Mask (Featuring Ghostface Killah) (Album Version)', 'Danger Doom'),\n",
+       "   ('Sei Lá Mangueira', 'Elizeth Cardoso'),\n",
+       "   ('Times Like These', 'Jack Johnson'),\n",
+       "   ('Never Ending Math Equation', 'Modest Mouse'),\n",
+       "   ('Hallowed Be My Name', 'HAMMERFALL'),\n",
+       "   ('Sit Down. Stand Up', 'Radiohead'),\n",
+       "   ('But Tonight We Dance', 'Rise Against'),\n",
+       "   ('Last Night On Earth [feat. Green Day & The Cast Of American Idiot] (Album Version)',\n",
+       "    'Green Day')],\n",
+       "  [('Wait', 'Alexi Murdoch'),\n",
+       "   ('Country Road', 'James Taylor'),\n",
+       "   ('Tabaco Y Chanel', 'Bacilos')]),\n",
+       " 992: ([('Do You Wanna', 'The Kooks'),\n",
+       "   ('The Fake Headlines', 'The New Pornographers'),\n",
+       "   ('Murder The Government', 'NOFX'),\n",
+       "   ('Let Me', 'Rihanna'),\n",
+       "   ('Middle Man', 'Jack Johnson'),\n",
+       "   ('Stilettos', 'Holy Fuck'),\n",
+       "   ('Made For You', 'OneRepublic'),\n",
+       "   ('Sex In Secret', 'Cabaret Voltaire'),\n",
+       "   ('Jazz Street', 'Jaco Pastorius_ Brian Melvin'),\n",
+       "   ('This Is Nowhere', 'The Airborne Toxic Event')],\n",
+       "  [('Jerry Was A Race Car Driver', 'Primus'),\n",
+       "   ('Neon', 'John Mayer'),\n",
+       "   ('Ego', 'Beyoncé')]),\n",
+       " 1722: ([('Contra La Corriente', 'Marc Anthony'),\n",
+       "   ('Hallowed Be My Name', 'HAMMERFALL'),\n",
+       "   ('Ego', 'Beyoncé'),\n",
+       "   ('Daughter', 'Bassholes'),\n",
+       "   (\"Soon We'll Be Found\", 'Sia'),\n",
+       "   ('(iii)', 'The Gerbils'),\n",
+       "   ('The General Specific (Album)', 'Band Of Horses'),\n",
+       "   (\"I'm Done\", 'The Pussycat Dolls'),\n",
+       "   ('Diamonds From Sierra Leone', 'Kanye West / Jay-Z'),\n",
+       "   ('God Put A Smile Upon Your Face', 'Coldplay')],\n",
+       "  [('New Direction (Original Version)', 'Echo And The Bunnymen'),\n",
+       "   ('Welcome To Hollywood', 'Beyoncé feat. Jay-Z'),\n",
+       "   (\"Can't Help But Wait (Album Version)\", 'Trey Songz')]),\n",
+       " 1997: ([(\"Everything's Magic\", 'Angels and Airwaves'),\n",
+       "   ('A Beautiful Mine', 'RJD2'),\n",
+       "   ('Esisti Tu', 'Valerio Scanu'),\n",
+       "   ('Where The White Boys Dance', 'The Killers'),\n",
+       "   ('Itkupilli (2001 Digital Remaster)', 'Neljä Ruusua'),\n",
+       "   ('22', 'Lily Allen'),\n",
+       "   ('Puto', 'Molotov'),\n",
+       "   ('Proud Mary', 'Creedence Clearwater Revival'),\n",
+       "   ('The KKK Took My Baby Away (LP Version )', 'Ramones'),\n",
+       "   ('The Slow Descent Into Alcoholism', 'The New Pornographers')],\n",
+       "  [('Winter Song', 'Sara Bareilles'),\n",
+       "   ('More Than Everything', 'Gareth Emery'),\n",
+       "   ('New Direction (Original Version)', 'Echo And The Bunnymen')]),\n",
+       " 2937: ([('Sincerité Et Jalousie', 'Alliance Ethnik'),\n",
+       "   ('A Rush Of Blood To The Head', 'Coldplay'),\n",
+       "   ('Clint Eastwood (Ed Case/Sweetie Irie Refix) (Edit)', 'Gorillaz'),\n",
+       "   ('The Caterpillar', 'The Cure'),\n",
+       "   ('De Weg', 'Guus Meeuwis'),\n",
+       "   ('Ballad Of A Comeback Kid', 'The New Pornographers'),\n",
+       "   ('I Found A Whistle', 'MGMT'),\n",
+       "   ('Ragoo', 'Kings Of Leon'),\n",
+       "   ('One Thing', 'Finger Eleven'),\n",
+       "   ('Winter Song', 'Sara Bareilles')],\n",
+       "  [('Rebirth of Slick (Cool Like Dat) (2005 Digital Remaster) (Explicit)',\n",
+       "    'Digable Planets'),\n",
+       "   (\"Still Don't Give A Fuck\", 'Eminem'),\n",
+       "   ('Slam', 'Pendulum')]),\n",
+       " 3038: ([('Zero', 'Yeah Yeah Yeahs'),\n",
+       "   ('I Thought I Saw Your Face Today', 'She & Him'),\n",
+       "   ('Crossfire', 'Rick Cua'),\n",
+       "   ('So Long', 'Rilo Kiley'),\n",
+       "   ('Rayando el sol', 'Maná'),\n",
+       "   ('Escapémonos', 'Marc Anthony;Jennifer Lopez'),\n",
+       "   ('If It Means A Lot To You', 'A Day To Remember'),\n",
+       "   (\"Everything's Ruined\", 'Faith No More'),\n",
+       "   ('Pop Champagne', 'Jim Jones & Ron Browz featuring Juelz Santana'),\n",
+       "   ('Firestarter', 'The Prodigy')],\n",
+       "  [('What Else Is There?', 'Röyksopp'),\n",
+       "   ('Circling', 'Four Tet'),\n",
+       "   ('Le Corps de Notre Seigneur', 'Choeur Arménien de Sofia')]),\n",
+       " 3099: ([('Tiger Feet', 'Mud'),\n",
+       "   ('Girl Money', 'Kix'),\n",
+       "   ('Zebra', 'Beach House'),\n",
+       "   ('A Day Without Me', 'U2'),\n",
+       "   ('Lady Picture Show (LP Version)', 'Stone Temple Pilots'),\n",
+       "   ('Black Horse And The Cherry Tree (Radio Version)', 'KT Tunstall'),\n",
+       "   ('Making Time', 'The Creation'),\n",
+       "   ('Seven Nation Army (Album Version)', 'The White Stripes'),\n",
+       "   ('Breadfan', 'Metallica'),\n",
+       "   (\"Don't Look Back\", 'Boston')],\n",
+       "  [('More Than Everything', 'Gareth Emery'),\n",
+       "   ('Hallowed Be My Name', 'HAMMERFALL'),\n",
+       "   ('Ego', 'Beyoncé')]),\n",
+       " 3117: ([('Hasta Ayer', 'Marc Anthony'),\n",
+       "   ('I Kissed A Girl', 'Katy Perry'),\n",
+       "   ('Breathe . Something/Stellar STar', 'Flying Lotus'),\n",
+       "   ('Saturdays', 'Cut Copy'),\n",
+       "   ('Crackers and Cheese', 'Tea Leaf Green'),\n",
+       "   ('California One / Youth and Beauty Brigade', 'The Decemberists'),\n",
+       "   ('Times Like These', 'Foo Fighters'),\n",
+       "   (\"Don't Stop The Music\", 'Jamie Cullum'),\n",
+       "   ('Again I Go Unnoticed', 'Dashboard Confessional'),\n",
+       "   (\"I'm Just A Man\", 'Jason Aldean')],\n",
+       "  [('Hallowed Be My Name', 'HAMMERFALL'),\n",
+       "   ('Ego', 'Beyoncé'),\n",
+       "   ('Kalopsia', 'The Blizzard')])}"
+      ]
+     },
+     "execution_count": 103,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# key is the user_id, \n",
+    "# value is a list of song names and artists\n",
+    "# first list of the values are the song names of the past listening hitory\n",
+    "# second list of the values are the song name of the \n",
+    "rec_dict"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:anaconda]",
+   "language": "python",
+   "name": "conda-env-anaconda-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/neighborhood_model.py b/neighborhood_model.py
new file mode 100644
index 0000000..9d61561
--- /dev/null
+++ b/neighborhood_model.py
@@ -0,0 +1,108 @@
+import numpy as np
+import pandas as pd
+from scipy.sparse import coo_matrix
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import normalize
+from scipy.sparse import lil_matrix
+
+
+class NeighborhoodModel(object):
+
+	def __init__(self):
+		
+		self.pred_ranks_percentile = None
+		self.pred_ranks = None
+		self.num_instances_train, self.num_features_train = None, None
+		#case amplification factor
+
+	def fit(self, train, test, rho):
+		self.train = train
+		self.test = test
+		pred = self.predict(self.train, rho)
+		self.pred_ranks, self.pred_ranks_percentile = self.ranking(pred)
+
+	#output the predicted score for each item for each user
+	def predict(self, x, rho):
+		v_bar = lil_matrix(x.sum(axis = 1))
+		weight = (x.dot(x.T)).multiply(v_bar.dot(v_bar.T).power(-1/2)).power(rho)
+
+		for i in range(weight.shape[0]):
+			weight[i,i] = 0
+		pred = weight.dot(x).todense()
+		return pred
+
+	def predict_ind(self, x, user_pref_sparse):
+		v_bar = lil_matrix(x.sum(axis = 1))
+		weight = (user_pref_sparse.dot(x.T)).multiply(v_bar.power(-1/2))
+		for i in range(weight.shape[0]):
+			weight[i,i] = 0
+		pred = weight.dot(x).todense()
+		return pred
+
+	#produce the ranking percentile for each item for each user
+	def ranking(self, pred):
+		num_instances_train, num_features_train = pred.shape[0], pred.shape[1]
+		temp = pred.argsort(axis = 1)
+		#produce the abosulte ranks for each item for each user
+		pred_ranks = np.empty_like(temp)
+		for i in range(num_instances_train):
+			pred_ranks[i,temp[i,:]] = np.arange(num_features_train - 1, -1, -1)
+		#convert the ranks to rank percentile
+		pred_ranks_percentile = pred_ranks / np.max(pred_ranks) * 100
+		return pred_ranks, pred_ranks_percentile
+	
+	#output expected percentile ranking of a watching unit
+	def evaluate(self):
+		test = self.test
+
+		num_instances_train, num_features_train = self.num_instances_train, self.num_features_train
+		pred_ranks_percentile = self.pred_ranks_percentile
+		test = test.todense()
+		metrics = np.sum(np.multiply(test, pred_ranks_percentile))/np.sum(test)
+		return metrics
+
+	#recommend the top "num_rec" songs to user "user_id"
+	def recommend(self, data, user_id,rho = 1, num_rec = 3):
+		pred = self.predict(data, rho)
+		pred_ranks, pred_ranks_percentile = self.ranking(pred)
+		song_rank_list = np.asarray(pred_ranks[user_id,:]).squeeze()
+		#produce the song list sorted by their scores
+		rank_index = np.argsort(song_rank_list)
+		rec_list = []
+		num = 0
+		song_arr = np.asarray(data[user_id,:].todense()).squeeze()
+		#songs that the user has already listened
+		song_in_bucket = np.nonzero(song_arr)[0]
+		for item in rank_index:
+			if num >= num_rec:
+				break
+			#exclude the songs that the user has already listened
+			if item not in song_in_bucket:
+				rec_list.append(item)
+				num += 1
+
+		return rec_list
+
+#recommend songs for a user not in the data
+#input a array of the times of the songs that the user has listened
+	def recommend_out(self, data, user_pref, num_rec = 3):
+		user_pref_sparse = lil_matrix(user_pref, dtype = np.float64)
+		#similarity_ind = user_pref_sparse.dot(data.T)
+		#pred = similarity_ind.dot(data).todense()
+		pred = self.predict_ind(data, user_pref_sparse)[0]
+		song_rank_list, _ = np.asarray(self.ranking(pred)).squeeze()
+		rank_index = np.argsort(song_rank_list)
+		rec_list = []
+		num = 0
+		song_arr = np.asarray(user_pref).squeeze()
+		#songs that the user has already listened
+		song_in_bucket = np.nonzero(song_arr)[0]
+		for item in rank_index:
+			if num >= num_rec:
+				break
+			#exclude the songs that the user has already listened
+			if item not in song_in_bucket:
+				rec_list.append(item)
+				num += 1
+		return rec_list
+
diff --git a/song_index_dictionary.npy b/song_index_dictionary.npy
new file mode 100755
index 0000000..2e1171e
Binary files /dev/null and b/song_index_dictionary.npy differ