diff --git a/sensible_raw/loaders/loader.py b/sensible_raw/loaders/loader.py index 78041f6..cd4d0a8 100644 --- a/sensible_raw/loaders/loader.py +++ b/sensible_raw/loaders/loader.py @@ -38,7 +38,44 @@ def get_raw_value(index_name, indexed_value): return data[0][0] -def load_data(data_type, month, config=None, as_dataframe=False): +def load_data(data_type, month, config=None, as_dataframe=True): + """Main function for loading data. + + Input + ----- + data_type : str + Shuold be one of the following: + 'bluetooth', + 'calllog', + 'cell', + 'context', + 'facebook_friends', + 'grades', + 'indices', + 'local', + 'location', + 'location_raw', + 'questionnaires', + 'screen', + 'sms', + 'stop_locations', + 'transport_minute', + 'transport_segment', + 'user_metadata', + 'vectors', + 'weather' + + month : str + Month of year. Example value: "january_2014". Typically, you want to query months between + September 2013 and January 2016. Refer to https://ulfaslak.com/files/sensible_dtu_data.png + for overview of data volume for each month. + + config : bool/None-type + Whether to provide a config file or not. + + as_dataframe : bool + Whether the return the data as a `pandas.DataFrame` object. + """ if not config: config = load_config() columns, data = load_from_db(data_type, @@ -49,11 +86,11 @@ def load_data(data_type, month, config=None, as_dataframe=False): if not as_dataframe: return columns, data - - dict = {} - for column, array in zip(columns, data): - dict[column] = array - return pandas.DataFrame(dict) + else: + dict = {} + for column, array in zip(columns, data): + dict[column] = array + return pandas.DataFrame(dict) def load_from_db(db, collection, field_names, field_types, db_host, query_spec={}):