Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 43 additions & 6 deletions sensible_raw/loaders/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,44 @@ def get_raw_value(index_name, indexed_value):
return data[0][0]


def load_data(data_type, month, config=None, as_dataframe=False):
def load_data(data_type, month, config=None, as_dataframe=True):
"""Main function for loading data.

Input
-----
data_type : str
Shuold be one of the following:
'bluetooth',
'calllog',
'cell',
'context',
'facebook_friends',
'grades',
'indices',
'local',
'location',
'location_raw',
'questionnaires',
'screen',
'sms',
'stop_locations',
'transport_minute',
'transport_segment',
'user_metadata',
'vectors',
'weather'

month : str
Month of year. Example value: "january_2014". Typically, you want to query months between
September 2013 and January 2016. Refer to https://ulfaslak.com/files/sensible_dtu_data.png
for overview of data volume for each month.

config : bool/None-type
Whether to provide a config file or not.

as_dataframe : bool
Whether the return the data as a `pandas.DataFrame` object.
"""
if not config:
config = load_config()
columns, data = load_from_db(data_type,
Expand All @@ -49,11 +86,11 @@ def load_data(data_type, month, config=None, as_dataframe=False):

if not as_dataframe:
return columns, data

dict = {}
for column, array in zip(columns, data):
dict[column] = array
return pandas.DataFrame(dict)
else:
dict = {}
for column, array in zip(columns, data):
dict[column] = array
return pandas.DataFrame(dict)


def load_from_db(db, collection, field_names, field_types, db_host, query_spec={}):
Expand Down