From cd78a7ffe74a523ac5cb2a4f427e0bb1a5c7805f Mon Sep 17 00:00:00 2001 From: Oisin Date: Sat, 3 Jan 2026 13:47:08 +0000 Subject: [PATCH 01/16] Revised object classes using github copilot --- generator/objects/Application.py | 54 +++++++++-------- generator/objects/Card.py | 51 ++++++++-------- generator/objects/Device.py | 50 ++++++++-------- generator/objects/Ip.py | 25 ++++---- generator/objects/Transaction.py | 72 +++++++++++------------ generator/objects/User.py | 70 +++++++++++----------- generator/utilities/round_trans_amount.py | 19 ++++-- 7 files changed, 182 insertions(+), 159 deletions(-) diff --git a/generator/objects/Application.py b/generator/objects/Application.py index 013bf2e..992cf5d 100644 --- a/generator/objects/Application.py +++ b/generator/objects/Application.py @@ -1,34 +1,42 @@ -import numpy as np import cons from utilities.gen_idhash_cnt_dict import gen_idhash_cnt_dict from utilities.cnt2prop_dict import cnt2prop_dict + +import numpy as np from beartype import beartype +from typing import List, Dict class Application: - + @beartype def __init__( - self, - n_application_hashes:int + self, + n_application_hashes:int, ): """ - The randomly generated application data model object. - + Initialize the Application object with randomly generated data model. + Parameters ---------- n_application_hashes : int The number of application hashes to generate. - + Attributes ---------- n_application_hashes : int The number of application hashes generated. lam : float - The lambda parameter of the squared poisson distribution used to generate the application hash counts. - application_hashes_cnts_dict : dict - The application hash counts dictionary. - application_hashes_props_dict : dict - The application hash proportions dictionary. + The lambda parameter for the Poisson distribution used to generate application hash counts. + power : float + The power parameter for the Poisson distribution. + payment_channels : Dict[str, float] + The population proportions of available payment channels. + application_hashes_cnts_dict : Dict[str, int] + Mapping of application hashes to their occurrence counts. + application_hashes_props_dict : Dict[str, float] + Mapping of application hashes to their proportions. + application_hashes_payment_channel_dict : Dict[str, str] + Mapping of application hashes to randomly assigned payment channels. """ self.n_application_hashes = n_application_hashes self.lam = cons.data_model_poisson_params["application"]["lambda"] @@ -37,30 +45,30 @@ def __init__( self.application_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_application_hashes, lam=self.lam) self.application_hashes_props_dict = cnt2prop_dict(self.application_hashes_cnts_dict) self.application_hashes_payment_channel_dict = self.gen_transaction_payment_channel(list(self.application_hashes_cnts_dict.keys()), self.payment_channels) - + @beartype def gen_transaction_payment_channel( self, - application_hashes:list, - payment_channels:dict - ) -> dict: + application_hashes:List[str], + payment_channels:Dict[str, float], + ) -> Dict[str, str]: """ Generates a dictionary of random application payment channels. - + Parameters ---------- - application_hashes : list + application_hashes : List[str] The application hashes. - payment_channels : dict + payment_channels : Dict[str, float] The population proportion of payment channels. - + Returns ------- - dict + Dict[str, str] A dictionary of transaction payment channels. """ # randomly sample payment channels based on population proportions - transactoin_payment_channels = list( + transaction_payment_channels = list( np.random.choice( a=list(payment_channels.keys()), p=list(payment_channels.values()), @@ -69,5 +77,5 @@ def gen_transaction_payment_channel( ) ) # return payment channels and application hashes - application_hashes_payment_channels_dict = dict(zip(application_hashes, transactoin_payment_channels)) + application_hashes_payment_channels_dict = dict(zip(application_hashes, transaction_payment_channels)) return application_hashes_payment_channels_dict diff --git a/generator/objects/Card.py b/generator/objects/Card.py index c76f7ac..44de111 100644 --- a/generator/objects/Card.py +++ b/generator/objects/Card.py @@ -1,50 +1,53 @@ -import numpy as np import cons from utilities.gen_idhash_cnt_dict import gen_idhash_cnt_dict from utilities.cnt2prop_dict import cnt2prop_dict from utilities.gen_country_codes_dict import gen_country_codes_dict from utilities.gen_shared_idhashes import gen_shared_idhashes + +import numpy as np from beartype import beartype -from typing import Union +from typing import List, Dict, Union class Card: - + @beartype def __init__( self, n_card_hashes:Union[int,np.int64], - fpath_countrieseurope:str=cons.fpath_countrieseurope + fpath_countrieseurope:str=cons.fpath_countrieseurope, ): """ The randomly generated card data model object. - + Parameters ---------- n_card_hashes : int The number of card hashes to generate. fpath_countrieseurope : str The file path to the european countries reference file, default is cons.fpath_countrieseurope. - + Attributes ---------- n_card_hashes : int The number of card hashes generated. - card_types_dict : dict + card_types_dict : Dict[str, float] The population proportions of card types. lam : float The lambda parameter of the squared poisson distribution used to generate the card hash counts. + power : float + The power parameter of the squared poisson distribution used to generate the card hash counts. prop_shared_card_hashes : float The population proportion of shared card hashes. - card_hashes_cnts_dict : dict + card_hashes_cnts_dict : Dict[str, int] The card hash counts dictionary. - card_hashes_props_dict : dict + card_hashes_props_dict : Dict[str, float] The card hash proportions dictionary. - card_hashes_type_dict : dict + card_hashes_type_dict : Dict[str, str] The card hash types dictionary. - card_hashes_country_code_dict : dict + card_hashes_country_code_dict : Dict[str, str] The card hash country codes dictionary. - card_hashes_shared_props_dict : dict - The shared card hash proportions dictionary. + card_shared_idhash_map_dict : Dict[str, str] + The card shared idhash mapping dictionary. """ self.n_card_hashes = n_card_hashes self.fpath_countrieseurope = fpath_countrieseurope @@ -52,32 +55,32 @@ def __init__( self.lam = cons.data_model_poisson_params["card"]["lambda"] self.power = cons.data_model_poisson_params["card"]["power"] self.prop_shared_card_hashes = cons.data_model_shared_entities_dict["card"] - self.card_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_card_hashes, lam=self.lam) + self.card_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_card_hashes, lam=self.lam, power=self.power) self.card_hashes_props_dict = cnt2prop_dict(self.card_hashes_cnts_dict) self.card_hashes_type_dict = self.gen_card_type(list(self.card_hashes_cnts_dict.keys()), self.card_types_dict) self.card_hashes_country_code_dict = gen_country_codes_dict(self.card_hashes_cnts_dict, self.fpath_countrieseurope) self.card_shared_idhash_map_dict = gen_shared_idhashes(self.card_hashes_cnts_dict, self.prop_shared_card_hashes) - + @beartype def gen_card_type( self, - card_hashes:list, - card_types_dict:dict - ) -> dict: + card_hashes:List[str], + card_types_dict:Dict[str, float], + ) -> Dict[str, str]: """ Generates a dictionary of random card types. - + Parameters ---------- - card_hashes : list + card_hashes : List[str] The card hashes. - card_types_dict : dict + card_types_dict : Dict[str, float] The population proportions of card types. - + Returns ------- - dict - A dictionary of card hash prices. + Dict[str, str] + A dictionary of card types. """ # randomly choose card types based on the population proportions of card types card_types = np.random.choice( diff --git a/generator/objects/Device.py b/generator/objects/Device.py index 4f6ad4d..0b3d923 100644 --- a/generator/objects/Device.py +++ b/generator/objects/Device.py @@ -1,77 +1,79 @@ -import string -import numpy as np -import pandas as pd import cons from utilities.gen_idhash_cnt_dict import gen_idhash_cnt_dict from utilities.cnt2prop_dict import cnt2prop_dict from utilities.gen_shared_idhashes import gen_shared_idhashes + +import numpy as np +import pandas as pd from beartype import beartype -from typing import Union +from typing import List, Dict, Union class Device: - + @beartype def __init__( self, n_device_hashes:Union[int,np.int64], - fpath_smartphones:str=cons.fpath_smartphones + fpath_smartphones:str=cons.fpath_smartphones, ): """ The randomly generated device data model object. - + Parameters ---------- n_device_hashes : int The number of device hashes to generate. fpath_smartphones : str The file path to the smart phones reference file, default is cons.fpath_smartphones. - + Attributes ---------- n_device_hashes : int The number of device hashes generated. lam : float The lambda parameter of the squared poisson distribution used to generate the device hash counts. + power : float + The power parameter of the squared poisson distribution used to generate the device hash counts. prop_shared_device_hashes : float The population proportion of shared device hashes. - device_hashes_cnts_dict : dict + device_hashes_cnts_dict : Dict[str, int] The device hash counts dictionary. - device_hashes_props_dict : dict + device_hashes_props_dict : Dict[str, float] The device hash proportions dictionary. - device_hashes_type_dict : dict + device_hashes_type_dict : Dict[str, str] The device hash types dictionary. - device_hashes_shared_props_dict : dict - The shared device hash proportions dictionary. + device_shared_idhash_map_dict : Dict[str, str] + The device shared idhash mapping dictionary. """ self.n_device_hashes = n_device_hashes self.fpath_smartphones = fpath_smartphones self.lam = cons.data_model_poisson_params["device"]["lambda"] self.power = cons.data_model_poisson_params["device"]["power"] self.prop_shared_device_hashes = cons.data_model_shared_entities_dict["device"] - self.device_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_device_hashes, lam=self.lam) + self.device_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_device_hashes, lam=self.lam, power=self.power) self.device_hashes_props_dict = cnt2prop_dict(self.device_hashes_cnts_dict) - self.device_hashes_type_dict = self.gen_device_type(list(self.device_hashes_cnts_dict.keys()), self.fpath_smartphones) + self.device_hashes_type_dict = self.gen_device_types(list(self.device_hashes_cnts_dict.keys()), self.fpath_smartphones) self.device_shared_idhash_map_dict = gen_shared_idhashes(self.device_hashes_cnts_dict, self.prop_shared_device_hashes) - + @beartype - def gen_device_type( + def gen_device_types( self, - device_hashes:list, - fpath_smartphones:str - ) -> dict: + device_hashes:List[str], + fpath_smartphones:str, + ) -> Dict[str, str]: """ Generates a dictionary of random device types - + Parameters ---------- - device_hashes : list + device_hashes : List[str] The device hashes. fpath_smartphones : str The file path to the smart phones reference file. - + Returns ------- - dict + Dict[str, str] A dictionary of device hash types. """ # load in smartphone data diff --git a/generator/objects/Ip.py b/generator/objects/Ip.py index ad28150..4d3e73e 100644 --- a/generator/objects/Ip.py +++ b/generator/objects/Ip.py @@ -1,53 +1,56 @@ import cons -import numpy as np from utilities.gen_idhash_cnt_dict import gen_idhash_cnt_dict from utilities.cnt2prop_dict import cnt2prop_dict from utilities.gen_country_codes_dict import gen_country_codes_dict from utilities.gen_shared_idhashes import gen_shared_idhashes + +import numpy as np from beartype import beartype from typing import Union class Ip: - + @beartype def __init__( self, n_ip_hashes:Union[int,np.int64], - fpath_countrieseurope:str=cons.fpath_countrieseurope + fpath_countrieseurope:str=cons.fpath_countrieseurope, ): """ The randomly generated ip data model object. - + Parameters ---------- n_ip_hashes : int The number of ip hashes to generate. fpath_countrieseurope : str The file path to the european countries reference file, default is cons.fpath_countrieseurope. - + Attributes ---------- n_ip_hashes : int The number of ip hashes generated. lam : float The lambda parameter of the squared poisson distribution used to generate the ip hash counts. + power : float + The power parameter of the squared poisson distribution used to generate the ip hash counts. prop_shared_ip_hashes : float The population proportion of shared ip hashes. - ip_hashes_cnts_dict : dict + ip_hashes_cnts_dict : Dict[str, int] The ip hash counts dictionary. - ip_hashes_props_dict : dict + ip_hashes_props_dict : Dict[str, float] The ip hash proportions dictionary. - ip_hashes_country_code_dict : dict + ip_hashes_country_code_dict : Dict[str, str] The ip hash country codes dictionary. - ip_hashes_shared_props_dict : dict - The shared ip hash proportions dictionary. + ip_shared_idhash_map_dict : Dict[str, str] + The shared ip hash mapping dictionary. """ self.n_ip_hashes = n_ip_hashes self.fpath_countrieseurope = fpath_countrieseurope self.lam = cons.data_model_poisson_params["ip"]["lambda"] self.power = cons.data_model_poisson_params["ip"]["power"] self.prop_shared_ip_hashes = cons.data_model_shared_entities_dict["ip"] - self.ip_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_ip_hashes, lam=self.lam) + self.ip_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_ip_hashes, lam=self.lam, power=self.power) self.ip_hashes_props_dict = cnt2prop_dict(self.ip_hashes_cnts_dict) self.ip_hashes_country_code_dict = gen_country_codes_dict(self.ip_hashes_cnts_dict, self.fpath_countrieseurope) self.ip_shared_idhash_map_dict = gen_shared_idhashes(self.ip_hashes_cnts_dict, self.prop_shared_ip_hashes) diff --git a/generator/objects/Transaction.py b/generator/objects/Transaction.py index 1ef0911..0398ba2 100644 --- a/generator/objects/Transaction.py +++ b/generator/objects/Transaction.py @@ -1,25 +1,25 @@ -import numpy as np -import pandas as pd -from datetime import datetime import cons from utilities.gen_idhash_cnt_dict import gen_idhash_cnt_dict from utilities.cnt2prop_dict import cnt2prop_dict from utilities.gen_dates_dict import gen_dates_dict from utilities.round_trans_amount import round_trans_amount + +import numpy as np from beartype import beartype +from typing import List, Dict, Union class Transaction: - + @beartype def __init__( self, - n_transaction_hashes, - start_date, - end_date + n_transaction_hashes:Union[int,np.int64], + start_date:str, + end_date:str, ): """ The randomly generated transaction data model object. - + Parameters ---------- n_transaction_hashes : int @@ -28,7 +28,7 @@ def __init__( The start date to generate transactions from. end_date : str The end date to generate transaction till. - + Attributes ---------- n_transaction_hashes : int @@ -39,23 +39,19 @@ def __init__( The date transactions are generated till, must be of the form '%Y-%m-%d'. lam : float The lambda parameter of the squared poisson distribution used to generate the transaction hash counts. - payment_channels : float - The population proportion of payment channels. - transaction_status : float + power : float + The power parameter of the squared poisson distribution used to generate the transaction hash counts. + transaction_status : Dict[str, float] The population proportion of transaction statuses. - rejection_codes : float - The population proportion of rejection codes. - transaction_hashes_cnts_dict : dict + transaction_hashes_cnts_dict : Dict[str, int] The transaction hash counts dictionary. - transaction_hashes_props_dict : dict + transaction_hashes_props_dict : Dict[str, float] The transaction hash proportions dictionary. - transaction_hashes_dates_dict : dict + transaction_hashes_dates_dict : Dict[str, str] The transaction hash dates dictionary. - transaction_hashes_payment_channel_dict : dict - The transaction hash payment channels dictionary. - transaction_hashes_status_dict : dict + transaction_hashes_status_dict : Dict[str, str] The transaction hash status dictionary. - transaction_hashes_amounts_dict : dict + transaction_hashes_amounts_dict : Dict[str, float] The transaction hash amount dictionary. """ self.n_transaction_hashes = n_transaction_hashes @@ -64,31 +60,31 @@ def __init__( self.lam = cons.data_model_poisson_params["transaction"]["lambda"] self.power = cons.data_model_poisson_params["transaction"]["power"] self.transaction_status = cons.data_model_transaction_status - self.transaction_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_transaction_hashes, lam=self.lam) + self.transaction_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_transaction_hashes, lam=self.lam, power=self.power) self.transaction_hashes_props_dict = cnt2prop_dict(self.transaction_hashes_cnts_dict) self.transaction_hashes_dates_dict = gen_dates_dict(self.transaction_hashes_cnts_dict,start_date=self.start_date,end_date=self.end_date,) self.transaction_hashes_status_dict = self.gen_transaction_status(list(self.transaction_hashes_cnts_dict.keys()), self.transaction_status) self.transaction_hashes_amounts_dict = self.gen_transaction_amounts(list(self.transaction_hashes_cnts_dict.keys())) - + @beartype def gen_transaction_status( self, - transaction_hashes:list, - transaction_status:dict + transaction_hashes:List[str], + transaction_status:Dict[str, float], ): """ Generates a dictionary of random transaction statuses - + Parameters ---------- - transaction_hashes : list + transaction_hashes : List[str] The transaction hashes - transaction_status : dict + transaction_status : Dict[str, float] The population proportion of transaction statuses - + Returns ------- - dict + Dict[str, str] A dictionary of transaction statuses """ # randomly sample transaction status based on population proportions @@ -103,29 +99,29 @@ def gen_transaction_status( # return transaction hashes and statuses transaction_hashes_status_dict = dict(zip(transaction_hashes, transaction_status)) return transaction_hashes_status_dict - + @beartype def gen_transaction_amounts( self, - transaction_hashes:list, + transaction_hashes:List[str], loc:float=0, - scale:float=2 - ): + scale:float=2, + ) -> Dict[str, float]: """ Generates a dictionary of random transaction hash amounts. - + Parameters ---------- - transaction_hashes : list + transaction_hashes : List[str] The transaction hashes. loc : float The mean of the transaction amount distribution to generate, default is 0. scale : float The scale of the transaction amount distribution to generate, default is 2. - + Returns ------- - dict + Dict[str, float] A dictionary of transaction hash prices """ # randomly sample transaction prices from an absolute normal distribution with mean 0 and standard deviation 2 diff --git a/generator/objects/User.py b/generator/objects/User.py index f89aeef..e1d024b 100644 --- a/generator/objects/User.py +++ b/generator/objects/User.py @@ -1,14 +1,16 @@ import cons -import numpy as np -import pandas as pd from utilities.gen_idhash_cnt_dict import gen_idhash_cnt_dict from utilities.cnt2prop_dict import cnt2prop_dict from utilities.gen_country_codes_dict import gen_country_codes_dict from utilities.gen_dates_dict import gen_dates_dict + +import numpy as np +import pandas as pd from beartype import beartype +from typing import Dict class User: - + @beartype def __init__( self, @@ -18,11 +20,11 @@ def __init__( fpath_firstnames:str=cons.fpath_firstnames, fpath_lastnames:str=cons.fpath_lastnames, fpath_countrieseurope:str=cons.fpath_countrieseurope, - fpath_domain_email:str=cons.fpath_domain_email + fpath_domain_email:str=cons.fpath_domain_email, ): """ The randomly generated user data model object - + Parameters ---------- n_user_ids : int @@ -38,8 +40,8 @@ def __init__( fpath_countrieseurope : str The full file path to the europe countries reference data, default is cons.fpath_countrieseurope. fpath_domain_email : str - The full file path to the email domain reference daa, default is cons.fpath_domain_email. - + The full file path to the email domain reference data, default is cons.fpath_domain_email. + Attributes ---------- n_user_ids : int @@ -50,19 +52,21 @@ def __init__( The date user ids are generated till, must be of the form '%Y-%m-%d' lam : float The lambda parameter of the squared poisson distribution used to generate the user ids counts - user_ids_cnts_dict : dict + power : float + The power parameter of the squared poisson distribution used to generate the user ids counts + user_ids_cnts_dict : Dict[str, int] The user id counts dictionary - user_ids_props_dict : dict + user_ids_props_dict : Dict[str, float] The user id proportions dictionary - user_ids_firstname_dict : dict + user_ids_firstname_dict : Dict[str, str] The user id first names dictionary - user_ids_lastname_dict : dict + user_ids_lastname_dict : Dict[str, str] The user id last names dictionary - user_ids_country_code_dict : dict + user_ids_country_code_dict : Dict[str, str] The user id country codes dictionary - user_ids_email_domain_dict : dict + user_ids_email_domain_dict : Dict[str, str] The user id email domains dictionary - user_ids_dates_dict : dict + user_ids_dates_dict : Dict[str, str] The user id dates dictionary """ self.n_user_ids = n_user_ids @@ -74,30 +78,30 @@ def __init__( self.fpath_domain_email = fpath_domain_email self.lam = cons.data_model_poisson_params["user"]["lambda"] self.power = cons.data_model_poisson_params["user"]["power"] - self.user_ids_cnts_dict = gen_idhash_cnt_dict(idhash_type="id", n=self.n_user_ids, lam=self.lam) + self.user_ids_cnts_dict = gen_idhash_cnt_dict(idhash_type="id", n=self.n_user_ids, lam=self.lam, power=self.power) self.user_ids_props_dict = cnt2prop_dict(self.user_ids_cnts_dict) self.user_ids_country_code_dict = gen_country_codes_dict(self.user_ids_cnts_dict, self.fpath_countrieseurope) self.user_ids_firstname_dict = self.gen_user_firstname(self.fpath_firstnames) self.user_ids_lastname_dict = self.gen_user_lastname(self.fpath_lastnames) self.user_ids_email_domain_dict = self.gen_user_email_domain(self.fpath_domain_email) self.user_ids_dates_dict = gen_dates_dict(self.user_ids_cnts_dict, start_date=self.start_date, end_date=self.end_date) - + @beartype def gen_user_firstname( self, - fpath_firstnames:str - ) -> dict: + fpath_firstnames:str, + ) -> Dict[str, str]: """ Generates a dictionary of random user id first names - + Parameters ---------- fpath_firstnames : str The file path to the first names reference file - + Returns ------- - dict + Dict[str, str] A dictionary of user id first names """ # load in list of first names @@ -111,23 +115,23 @@ def gen_user_firstname( # convert key value pairs to dict user_ids_firstname_dict = pd.concat([pd.Series(d) for d in user_ids_names_pairs])[country_code_dataframe["user_ids"]].to_dict() return user_ids_firstname_dict - + @beartype def gen_user_lastname( self, - fpath_lastnames:str - ) -> dict: + fpath_lastnames:str, + ) -> Dict[str, str]: """ Generates a dictionary of random user id last names. - + Parameters ---------- fpath_lastnames : str The file path to the last names reference file. - + Returns ------- - dict + Dict[str, str] A dictionary of user id last names. """ # load in list of last names @@ -141,23 +145,23 @@ def gen_user_lastname( # convert key value pairs to dict user_ids_lastname_dict = pd.concat([pd.Series(d) for d in user_ids_names_pairs])[country_code_dataframe["user_ids"]].to_dict() return user_ids_lastname_dict - + @beartype def gen_user_email_domain( self, - fpath_domain_email:str - ) -> dict: + fpath_domain_email:str, + ) -> Dict[str, str]: """ Generates a dictionary of random user id email domains - + Parameters ---------- fpath_domain_email : str The file path to the email domains reference file - + Returns ------- - dict + Dict[str, str] A dictionary of user id email domains """ # load domain names data diff --git a/generator/utilities/round_trans_amount.py b/generator/utilities/round_trans_amount.py index d52f018..8b1002e 100644 --- a/generator/utilities/round_trans_amount.py +++ b/generator/utilities/round_trans_amount.py @@ -1,24 +1,31 @@ import numpy as np -import pandas as pd from beartype import beartype @beartype def round_trans_amount(amounts:np.ndarray) -> np.ndarray: """ Rounds transaction amounts to have store price like remainders such as 1.99, 3.45, and 2.5. - + Parameters ---------- - amounts : np.array + amounts : np.ndarray The transaction amounts to round. Returns ------- np.array - The rounded transaction amounts with store rice like remainders. + The rounded transaction amounts with store price like remainders. + + Examples + -------- + ``` + import numpy as np + amounts = np.array([2.34, 5.67, 3.21]) + round_trans_amount(amounts=amounts) + ``` """ + # a probability distribution for remainders round_dict = {0.01:0.4, 0.5:0.1, 0.45:0.1, 0.51:0.1, 0.41:0.1, 0.71:0.1, 1:0.1} remainder = np.random.choice(a=list(round_dict.keys()), size=amounts.shape[0], replace=True, p=list(round_dict.values())) - rounded_amounts = np.round(np.ceil(amounts) - remainder, 2) - rounded_amounts = pd.Series(rounded_amounts).apply(lambda x: max(0, x)).values + rounded_amounts =np.maximum(0, np.round(np.ceil(amounts) - remainder, 2)) return rounded_amounts \ No newline at end of file From a52c38ba2b05e46174c59dc44e7c307c0ac8028f Mon Sep 17 00:00:00 2001 From: Oisin Date: Sat, 3 Jan 2026 13:47:31 +0000 Subject: [PATCH 02/16] Fixed unittests given recent revisions with copilot --- .../unittests/app/test_gen_user_trans_data.py | 12 ++--- generator/unittests/objects/test_User.py | 54 +++++++++---------- .../utilities/test_gen_obj_idhash_series.py | 2 +- .../test_gen_random_entity_counts.py | 12 ++--- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/generator/unittests/app/test_gen_user_trans_data.py b/generator/unittests/app/test_gen_user_trans_data.py index 07e5df8..8f4c8f6 100644 --- a/generator/unittests/app/test_gen_user_trans_data.py +++ b/generator/unittests/app/test_gen_user_trans_data.py @@ -45,12 +45,12 @@ # generate random users user_obj = User( - n_user_ids=programmeparams.n_users, - start_date=programmeparams.registration_start_date, - end_date=programmeparams.registration_end_date, - fpath_firstnames=fpath_firstnames, - fpath_lastnames=fpath_lastnames, - fpath_countrieseurope=fpath_countrieseurope, + n_user_ids=programmeparams.n_users, + start_date=programmeparams.registration_start_date, + end_date=programmeparams.registration_end_date, + fpath_firstnames=fpath_firstnames, + fpath_lastnames=fpath_lastnames, + fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email ) diff --git a/generator/unittests/objects/test_User.py b/generator/unittests/objects/test_User.py index 80d911c..be8c197 100644 --- a/generator/unittests/objects/test_User.py +++ b/generator/unittests/objects/test_User.py @@ -10,46 +10,46 @@ from objects.User import User exp_user_ids_cnts_dict = { - "6374692674377254": 420, - "1751409580926382": 318, - "4264861381989413": 244, - "6720317315593519": 387, + "6374692674377254": 20, + "1751409580926382": 29, + "4264861381989413": 19, + "6720317315593519": 26, } exp_user_ids_props_dict = { - "6374692674377254": 0.30679327976625276, - "1751409580926382": 0.2322863403944485, - "4264861381989413": 0.17823228634039445, - "6720317315593519": 0.28268809349890434, + "6374692674377254": 0.2127659574468085, + "1751409580926382": 0.30851063829787234, + "4264861381989413": 0.20212765957446807, + "6720317315593519": 0.2765957446808511, } exp_user_ids_firstname_dict = { - "6374692674377254": "ernst", - "1751409580926382": "mykhaylo", - "4264861381989413": "hugo", - "6720317315593519": "alexandra", + "6374692674377254": "simone", + "1751409580926382": "francesca", + "4264861381989413": "igor", + "6720317315593519": "beckett", } exp_user_ids_lastname_dict = { - "6374692674377254": "buchmann", - "1751409580926382": "lyashenko", - "4264861381989413": "diaz", - "6720317315593519": "mariana", + "6374692674377254": "de filippo", + "1751409580926382": "gagliardi", + "4264861381989413": "lupu", + "6720317315593519": "leslie", } exp_user_ids_country_code_dict = { - "6374692674377254": 276, - "1751409580926382": 804, - "4264861381989413": 724, - "6720317315593519": 642, + "6374692674377254": 380, + "1751409580926382": 380, + "4264861381989413": 498, + "6720317315593519": 826, } exp_user_ids_email_domain_dict = { - "6374692674377254": "gmail.com", + "6374692674377254": "yahoo.com", "1751409580926382": "yahoo.com", - "4264861381989413": "aol.com", - "6720317315593519": "hotmail.com", + "4264861381989413": "yahoo.com", + "6720317315593519": "gmail.com", } exp_user_ids_dates_dict = { - "6374692674377254": np.datetime64("2020-06-20T00:00:00.000000000"), - "1751409580926382": np.datetime64("2020-12-25T00:00:00.000000000"), - "4264861381989413": np.datetime64("2020-08-01T00:00:00.000000000"), - "6720317315593519": np.datetime64("2020-02-04T00:00:00.000000000"), + "6374692674377254": np.datetime64("2020-03-21T00:00:00.000000000"), + "1751409580926382": np.datetime64("2020-06-11T00:00:00.000000000"), + "4264861381989413": np.datetime64("2020-10-15T00:00:00.000000000"), + "6720317315593519": np.datetime64("2020-09-17T00:00:00.000000000"), } exp_start_date = cons.unittest_registration_start_date exp_end_date = cons.unittest_registration_end_date diff --git a/generator/unittests/utilities/test_gen_obj_idhash_series.py b/generator/unittests/utilities/test_gen_obj_idhash_series.py index e94bf44..90f18f1 100644 --- a/generator/unittests/utilities/test_gen_obj_idhash_series.py +++ b/generator/unittests/utilities/test_gen_obj_idhash_series.py @@ -38,7 +38,7 @@ # generate user data and device hashes user_data = random_entity_counts.copy() obs_obj_idhash_series = gen_obj_idhash_series(idhashes_props_dict=device_obj.device_hashes_props_dict, n_counts_series=user_data['n_devices']) -exp_obj_idhash_series = pd.Series([['8c1fd1152fc83030', 'd4f37f7620f0fba2', '565dd55c257aa14d'], ['0bef04bcf232f0f0'], ['bbdcd452b847c0d4'], ['e2b03ec4f60f2f18']]) +exp_obj_idhash_series = pd.Series([['2e23f63807f6170a'], ['b8816ed926bf9f83', 'b010fdb44fa68822'], ['ff23757073a07357'], ['3d2fd828c1fd1152']]) class Test_gen_idhash_cnt_dict(unittest.TestCase): """""" diff --git a/generator/unittests/utilities/test_gen_random_entity_counts.py b/generator/unittests/utilities/test_gen_random_entity_counts.py index dffdb3d..266dd4d 100644 --- a/generator/unittests/utilities/test_gen_random_entity_counts.py +++ b/generator/unittests/utilities/test_gen_random_entity_counts.py @@ -26,12 +26,12 @@ user_object = User(n_user_ids=exp_n_user_ids, start_date=exp_start_date, end_date=exp_end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email) exp_randomentity_counts_dict = { - 'uid': ['1751409580926382', '6720317315593519', '4264861381989413', '6374692674377254'], - 'n_devices': [3, 1, 1, 1], - 'n_cards': [1, 1, 1, 1], - 'n_ips': [5, 6, 3, 4], - 'n_transactions': [55, 69, 54, 54], - 'n_applications': [3, 10, 28, 6] + 'uid': ['6374692674377254', '6720317315593519', '4264861381989413', '1751409580926382'], + 'n_devices': [1, 2, 1, 1], + 'n_cards': [1, 1, 1, 1], + 'n_ips': [3, 5, 5, 1], + 'n_transactions': [72, 16, 13, 29], + 'n_applications': [4, 2, 3, 5] } exp_randomentity_counts_df = pd.DataFrame.from_dict(exp_randomentity_counts_dict) From e24afec068388bdf6832466d25a3655a5c03ee90 Mon Sep 17 00:00:00 2001 From: Oisin Date: Sat, 3 Jan 2026 13:47:55 +0000 Subject: [PATCH 03/16] Refreshed test data given recent copilot revisions --- data/unittest/transaction_data.parquet | Bin 31931 -> 30779 bytes data/unittest/user_data.parquet | Bin 19645 -> 19003 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/data/unittest/transaction_data.parquet b/data/unittest/transaction_data.parquet index 1067bf56d9be6cf7eea5a949ac479cc8607d83f8..7418c8cb16e0d4ef1ae1185a4d8641e5ff953eed 100644 GIT binary patch literal 30779 zcmdsg34B!5`SwgQ6J|mZCt+qxAnFj4m}r2zFE*B$>ELKcL=UCBT;vp~Qttb&3D zL<9v32ndLZii+A=P*A|7xFcExrL9_9QE9b{_Iu7u0wD?L|Eu-;e*aW*bIv{YtnYc= z=Y7tdX=$LG_Sig~k)ACPqdjhqCBb4jU>)tUSS$?1P?W?_BEyLy!*d>Y2FJ*ZD6q01 z3GwbMnqqNZkXfFVk)(K*W_U{E@nDiWl@U->k~y07xKlYw5;%qtSScwZm0?(xlLU%o zliD)`N=5@5#mJ&3JC#SB5=C*cfLkIXkm@Cgk~|jAyhzW@ky)Ojc8|*w<%vobEEz7+ z#h$%L4CPQ~8sIRFz1EX^B5^%xx7sGVn1l-9!m5;xs1&CqxO{#dwenI*NH6AAR0fMv z1|=v?Yk-bYmJC{6;l4u)R1~Bymsj~_J5_V#8f%^{80DCn5a>+s6;>|cOx?R8Vb;nu z308d#owuSeeo%_f*5Xa_7gPibim8r3abCJ>dFL9ZXMDHMF0} z_j!#qd8<~hT$xwvT6v+LHVaa{gVOR>rKSdz;L4=sDZ!-WoyAj^uUccVV2$=!J-@Pc zuaWArV@OSkTGwcGPO4Mdnw0^_XlXT?YK;aP{;XGwI_scBr9p2tlosnizZPt24k`^% zxHmI6GiWr}vtxpKP_Yim)@p-FvqJ8));88FE&We-RQR#P0bO;X`2Uu1si768O>Ht)3l)4(%3K%dCiS&EiFc!(HfrY8O?3etXV~x(u@@d zO=N>nZ?x7rE!M0iy*W6o&6wlBV_2ug=3qmRG|<#I%c!gCf2s#`wVCMXegY5tZ<*}% zR?n8xlU;gpoW+t7eCKeM*J8=d&mS6%>%hlj*XH1QY2({p4Z?Nx>XXwGa6M4tc_6g1BFtGgb)+k$EK7JME;8#JpHHs@ANA46&Ais+%Fm+oZJP$^kjk6Ku;}RqC z@=@Y;>&X>eSQ1d(Gy9?b5R1Xf_+t|X2Rc`@XO48_tSOY?llky+i@k|2pRCEzh8 zOf_J8h9kMKTLVDRML?p3m3y?p30zcjZ{0|1A)4!0LGr5D_+BeUcMHn^vy` z>xOHM^#pz!+gj@y8)w8{;HYhE9e%Nenyu6}HsbE^=}iMH)+>wJhc_6laVQBO)mvKO z0_&{_6sf1XWPSg(?mT@z(RsQ_Jl4zdU4tWvw)&DfJP)N7j9Z<)W^GpWmR%Xk;}@)P zJvnMLb<&-%(S0qWC$7kQ*IHbiW6d18!@vGmbHe5W(f)T!{O=@X&WGjiTQ99Is@}L{ z!-nFLqL_CVxet_Q#l@tK^0<=rZh9wa>)g(WcQ#FWovRtjMVKp!G6&iYxs819pZBf! z{l1;f{g1>F%Rd(Bc_nhRCo#0qPlmZ0t!SJeQanYQ2BmPs%rs4BG@fQm&VUt*I{R0Au1=arotPPL~*R9+Ox9_mBNPLIbK#ZQ_~D< zMmELil8)vzR;MV@q*0Z~tD=hip|Uc5(P}>sPxxT(%CJXz)rFNj_a52S%?B?hd~h4# zgBu7Rj3#_=m;3H}!3Qr7^TCbN!+bE>u|CWPCobWp>w~-a;HID;1BijcIIcY;LSmd{*arUp`0S zj%B=3-_q7F%~z<@Debd;1r$eiP-SptqpwEfeSD$yf|H|*8XFp$ebq9OLvXh`s5I6G zedUIZ-(wqF4Ij<;N}C&fTp@XQT!T?s*Vf{rsfr7FB*!*3>%Q^%1LLRoE9iEVg-f;+ zH<%zi?aU~Pb9P3#q14qIzFdZ`v5u0uZgn@%|Dp~#PhS!T_UJd-`oCP-1@6`Vjiv24 zy|hLBE^Wp+mv*b&v%o&ulM-6mHzGpen89iaL-DjI!>6gVhFxLOil}Hjhs}fy#FL$g z{lcrJCMpuAt2)CW3%4jvQe~Z{bXikETN>L!RcW0=Ia+1N#^+^Sl_-ssSecPD+Q4R| zHCa~>gfWJoD~1YpCSq5zhGB@jE~|nr+A}zrXBfjobf+RJr0h$Nh%`$X=#r$Mn84^H zlw=K_Hbjw9nXu<2c^tJV_2&sF zapTh;e2u(@bZg`doYmfNiQDSD;ZHY?w3qo74~@utZo&KoMbqh_gJ=9DGv}x;x8vbO zO$%a! zf7`A+-{B=MEEqaaX1q05I4*lu-Y0(%k^173{>4p;7JR*+W8N<1t(GNOIX7Q@ixjaa z`qs|0YR4<`ACJu$BRxFtYJK(LRf9^XQ>!93#1385(wIs|2UAx5K6&VinT5&MEZDmL z;%m}7#J5t^h4BZP^IXR(n{JW5UfVR>Hel_gO;7%Q?R@x~EDtpr7lJz83KD6~47MoI zxFT?DHJT9v>6q{nR{VGNgxg5L#OO1{@J#df&+eG-n_cULmwWy=?a5tVz5VU0k0w}$ zeD}ru%ii@oT{w2;hhLs@SudLN>u0_yURU*ts~W#~|A`yX_5T4eF0KgEV~pru5uWeP zBE}Ol&Jg2gtlvM7HQzUo$TCpsT)&e~wUi9X8uW+agQ?>yckRpR)EB*(7CmVC*qj%Z zrx$hBgG?VNP|`XhhHhbAxOJ0d`AXla6-7k}&giJZ#e)vrI*%{ON?nmEb)Gno6fw$^ zIFQh2E-HFhTBRqycv+r<`C0R->ax_rTiwMaw#=PbPE5+jF`g3xM|;Xc3%e^SggF{E zBiU3mr>cz36HiRZ95{k*(kib|3`dwSD6Y=Y3YfH_Y7!IX44fph5+{Q*u#&8xl*noZ z&oP=VE3_mNHUKqA_cV zl~NQ_(i9C%0I5LMRg)Gp5i%20Cp-t8isv}q5TH=8GNpl5sD>f~Yz$L21xkKYNbL>kDdQM@%HhSC+y`fMW2Ngyhe{baVO=>h{_)KQiKtpWzA z45x-j5~hm+RAQbG0Z0_f^QwVv2>>ci;(0}9Wm(f`fm1SVlFq3T#$rgaE*Sy~_5 zbrXbmuko&4h59M?v-)m~j72CQHzHzd${byH|f38 zmfbfuV{B>my2q{>@&;$$R2Na6SH!1Q9Oi2``Ib87T2(nPEB=t7?0<92hnDiVn{2*E z>R(@w{o3vIYxo;tBJ6)Oy`5z-j(HV#yymVdYd-q+)HOxd3`y|CugD+g%8vM&t8`7h z#bd;kXTPlFUmTJz`v=|ALPSWH_)pY`qLoWM5$R3KZT`CsB<;#5eN!J_zNKbkYS5*Z zEl)3DhA+Kq+2q^vViRn^r&ISbn-!tUNgG>XsUB(YH?@!tE;9iS>c<1IJH{b zGC68*%_Dr#ywt#u)ZlB*fp=|+TOBnwAuT@Pnxf~@i$BU5^@o&!^YZJT;G&=7UtF=} zlNG7Occm6BN*@zJVzQlao|UnqJ(EIUx_UqeOc99)AX|zlU^hz)HfgrTY8uF#gm6mL zffd*SjINq6OI725Hw+6Tkth>eh7}B;oGu$Gpp4V7RZK*2f+|6oCoGTAWLo14Q56|U zHaUc3h@o^-kipyp&>BUymu3TOOkKw=P+1C^go3INm(ivSwB`gJ2oK(u4OpRA5JXck zB%Kurz>-*xMpt>-#I85&2hasL0#4UuS>_1zkhqPJ6&XPzubYZS=>%XAlis4P0iDuT(IhH3zGfYO?-D69-jL^0=5P&iSQWx+5_Ry8=x zi6RmRV1S7xN1<+Odo~NuQaM9GtVs!yDcRjwoX!Gt&`pVDV@o9!L)T>9zy!f7NNN5o zNr6vMIEm3{+Uf(?0#pq}K%OpXj7X(bWE-5wfk*3{1~}nW2IB?RKa3M} zlLZl05o!a1bjV7Yq+v9IU}_xjNJ`6+Xq^Sv!Yxu1fSlwQxKhMVGBnVNw`WVL1UTi; zZ{08jg+&>_9UzZ27!_WaHSGr^8PwQN(X=V4Sdz5ecwAw5ti1qt$1-MqwPfg+K^i9~ z3Q*bPij$#P!5Nxh{IbCaa8MG=MK-a%IBn5z%7Pro0Gb9#i*S3B;_bWO*J%dPfXV>$ zAddlBRhcocKBCIN)L7o$zF3C@rlWDuP~ilO5Z{+{jfGk8@R%f8Pdk=v$f~Filt%wV zmZ4z@@Lw1QVfYLn7=wqe0XYy^j)IZOYFbt{rK*~0(7o2=^ z)ad#90&kbKy})wosxEqYp(DW->$N!!53t(Q{5K*4>pz;j@++U$715Dq+c+RQ;$xR) zT;%#BHvYr>WqH|oWvebe<)3FpOt4*bs3~dR_{yv4)e%;Qhg$0W+uDu`lb*NoFaNdT z70ZIPUr(x7=(u&!&-Z-g?|i*F+2@~kM@|{Pb%G^tU#>6acydXkJeYFqvjqNNso2*v z$)XewhzPvz3H&KH5Y<{%7rFk;d*|J{AvQiC>EgLfxiejbZ)V@)kMG<(^n#o~>7Ib? z3$FC-9LuTpv9JI9<*%}#l>xUFH+%k|jV6a|gx)WZL%@Ya)HRu+c|!dpj<7&Qz+#K6 zL^dyCQXCTVKV|>vt?D`Ie;SJl{|Qq)t#uZBB+#U{~$);LhO~MX-H27OLL5& ziU=~XH$(xv2UJJ{HxU6Y*`jVx*c9N1FjtKSFmMvO5B{e@|KcRt35q2OFl|m?L6}e< zD3%I=7`V+M5(e#50Hg}$K`_Bq39G_jtfi75N*Yngcs#2!p{;@9CIt|HX8>ixXrYM7 zEcUAbsSf)_2YBfe4Tr)4j#-?F5@66oosoG;02KpWmC+h6YNn|JsX!HFi8To3X^f5) z)`^_Q>KK|Ns}v>-m;y*4l(Q4i0I?Yu$T659i6;Yrzzv0w19#L|R!zH3;z1E*43LJx zsIrFrq)P^pQPziD0Be;ic3@559~g##gAhUcz$P`xq!}=KLx!p#2x-S8*-(hiC(>|Q zSShSDLwu=;QiG`a;;hxllqGcs~ih!p4=)Oi}Xr=fqqOz?IXrD)24vHWj{ z?FkB;hRJHY3?v+y2rr>Q=L9XqjNoQk1ttR#gr-oU&I=M}AP0s56fzk@Q4By++9rtt zq=D|6oXCS+@d@tD?wFK)%r;mCh&eT z#3hQvb(t?Y3|0^LZ6Y@})}||=8CNJ?gNIfSnrTMbOOk;NY0v_qJC#rg`vH!{2qlBW zGa|Mr<^X{Wt01O87a3YFUWru#5$B*F(I(bSNjrsLk3vlV76apDw6t2-1CW50RmfO0 zEo%$+Vdk)hNw5HX5Lnu1;^4Ee0I+gafmh0WiM%C%%>!4(nJR1w6;p}?565c22oAg< z|5p+Xivk|$CTr-3H(?@h@v5egXae?xFaZldfu1mkgLq2iSurh<6E%g_brq;#0LY40 zrXt!gpmB4WvB6_8u{FSuICx!fCipu9aoG}R429)UF;q|!(SDLdER;g2f`Sl6Vw~B5 zv3OfsKS;3ejnzy@cs$pH^ zgM!D3IJe^L2eMSKBkUCh*kNwiV{L~43nrvF1F;y)FY_4as-)9!2pr2wiU>4-S#ls+ z49*k4d!%hk5qX=YN*N-!EHu5$Q9li zGXkEt2TB2~V6cjbP*j$UZD1qN4M2MY;JtxumzHJIG<#+;rbne@fy+OEUJ=@+!Pn5N zqA2#07RvfSG7JkRa1vj?8sLi~5l}9K>83_+17;%5hAsgm0pT!?mx>*V)Vp zkZXnq(u3~E*y7vtY#FqeCj6YnU|7W%x(TO(3I>AphJql*mSRFgn%6iJQ;F5wRtlKW zv7>N4gJJ5tlwaop+m`vj>Y-m*^k9*siZV_y6+yrj#mw5@gGYq5!+C+;^Jp;529ZVw zNrOgAY1pL#aGNqTxN^{38SI+ZFaW*zOg{rXaA2Wa1$c)nIG1g1#fx0gKfXtQ(9E8U~TK0JMf_96ynaC4uZiGte*~ zmxF|WcrXZ0xPuk&6^OI|1_(;vkwWZPg?HxxI>0+oU=~irrpGz5X@Xg>U@rvHv3U*n zZb&m|i;_0C1H=?YflEUd6cbUI+orGqDw&6CheKC~9pfO=z}3OQY8p1x!kvI}_(nMt z7vOEHgS%meELga}!z+sbM!+WaKU@|Lr&+iZh5>+p`QXgi;51Q$egiLH5+M(_RiLL1 z{i)W?g1oR){wRkm()m;jVi3vN=GL*D1Sz<{t)qIuuwDy44vWL`vAT}%6dqKEWTP!{ zWkI}w=nk%;Gs=cz2&Az45H@kJCK*zevKes@2>=+yFce(@Us{+70;yp_R30pdM!=MR z7$@6A0K{xC=>Cp&hQukiR7^B z;DoWURLXw9LM051^&v**I2<-p&WvREL`gySH#}lv__T^t2HO^ALGW2a8!Z|v5Xg?@ z6T!v7rWM`(UJMST&AFGkk<=yL=>~15;4ml}f+LU+f-E!;gOJ(3AM1GJyKiKtLGm3j}JO`&l@BqMI;A#2#P6TXX1g>H#m2H6ip7@igy zA+`#FUa)K`xFA)-D#|#flFhk;j7bh~l(>I8TpA*LF>Z`QDMF;GXut&QIfkP+tTO`@ zm;&pBR4c%H*|WicAdDg&#?IDg#Vf^08Ws=Eid9t}RLEH3qCjp05wDJbj+lIX6VbdC zFe60NtPW?oK9$f@SRbZ7?%HSzm3E!Opjc~LvH)@m`l6?u@(C0TID!5UU~IfroQ(5y zsGx|)RMS+r;uAP`hx^2-AI6WkySUCxnUK}x4U1w-L~7oq*bIOez>0w%RIJ(dr5s`( z$kT>u;>sHbU&7WE;Gg2Yi;2O8lf4}=@cSqYVAhF>1$`LO6wn?t2fI?l|Ae&NZk!+r zEOY$Z7`Rl;JIR3%5&yyGND}t9NvEB1!?|zS32r41G^DO9_3Mf}Z>`D!xt1hM4e6kF# zqry{dxWWd%Tmh*|B0?I6a}}1?mb7LA%yC|W_=^R|!)wfSxn(dflaF)8+h{TFSe(^u z(c{V;$qdd7aG+$4If2E-3o9mQCNzK9z=0{)FAl1xxE9CIT1aFnD{eRinV3eHoDa^0y@J?h?opf03|>2k z;~*{qaBx_pfc+o5E)kPxV9Ui74RY`zzoZo$94A`p`qf(?X+|-B5qw#iG+$bBkhAjq%ah)^HA-^9bC0VcsX;FCc(>EGA6v4 zDDMSsC_~U00uXrtfH-J_7OTO#cy~CIU@Bhvf-VGS#0eo@u)uCiXmZ3ry%IKj>oyUD7>xHN z4WBY2_Ig{|a6m}Jw zWUcGL-w|t*V`zbf&=HJ*Uxssm1A;yVHX7vI5YfO5_b!j2aYF0u9B2bG%p91Evsl=M zNe4ep1_DEuVvM~}5QSKfDRPpF*cqx692%<^SHn-C1-#n_T>)oF_KZ{&+Z5^?uN!e} zf<;1@Ly)m@=q{|g%B5Xrp@%-{;vv^Daa!VFtSgUI-tf5#l9D21ZUXp0;#a|WuyC}3 zXc7_AO6Y9c0KNbm^ujX<8JG!S}4gy#cx>zWCjQzz;NpOETI2AF}p^HiYHrf6PA zdP?1(|op~QXbbU{;4 zxWN`TXd7f72#Amec$)s?u#KnU*0_`L$`4K-7(iGYxGU_3oS!HN2)98!BX14hTpTnM z`^x#An}<-zX@Ny99COJg02%=iYZ{2u4Xlsg&4%>M!MqF_Q9Jk+7(ya)u!Ru|L#~rK zav+rm^%id!t8sNM8_SP5;SgzgXfTlmq-%`g)h|G73_Mg2SwJoT4y2v3C>wsA0cjS% zh&MDvHP8Xu$FYwD*$TEz%U+igCtILIh_Mv3ZOTv)@Lm=?1#k*btgOb6Zv76Oe2eS>Zf5)DKU1aDr94-^P^4i3GeOZWG|j;f=r}z`A(I zus9{)yocjsc!bSyPsf9$LN%ff9?Qn-C}53qGdy*=8uo;z4xZuF7eNEB;{;c zy!_Il6&W?pZd!8tWu+U7*5quy`Hm~9AE|xk+0D1yIb!Owi<5G1? z7MngiuqOAuTi5-p{mYNvdG5a3emSxu!Z|o^Yv+dC#R-`c_ibH%Z((Oj@m+b_R%{-% znh74=x9#@(#;wm;GB|(x%5CGfmT#N5fBPNxSM5{}-j#pv#sjPcti9eiZ-i+57mp|!=2Zh84`spZr6 z4nBI{Z-42ZAAM2DuC1@$TXg&7lMn6M_WFIL_m-?Hd2IWe_g6nU{db2RyZ`NnrapJ` zMWY_y@y?^QuWz6Hq8Dq0v#!Gzd5#XjSyyD}tn1^r3XA2s_H|!Q!0i{EOrR3i!FNA@ zo7E8$%)0u0t0QaouRqVKLhgWZ3$8_O<*~#=$lZL+1BunhJr$8N4Y|21-+BzW#~=T9 zc@1)@qLmwvd*eG9Cz1Qg^k4scB62VKSJ^Toqjz2~`)VX>0-Ihya_ei7cM_6cYcK0~ zrr@v2qsYBu(ci3-k$d98TS}43ZoA}{xXin9*Pn6o)fbDhe}S96$-lc9xs89h<{jid zb7NG-6y(On-#rt#lQxdnkKFs&eS@YV_aFPJRphSzv0 zuyLzz2CiQ2sz#LBRcXf5AsPAcKPIB84biFJAot|my9?@(J95~VJMir9#?53JkW9Mc zKwTrQwds?eMyV&B_`=zQWK{8@YCJpP=-~U1`_;d4^R7W|&aFpQAbD)3m1;(EM9!)k zQ0Ru&FMk;|CjV{x^cLhgx5rnu;#&Q)Et_!nzL&223eWs)@^1re$j$#;>qPRS_+`Up zA~~0PoS4Jm{Uu4Wki4Yo17Z#hAGbV?++WXj*xQjCGjQE_;x zN6yC0qSDWaRs5kj@rpS}^0o(NqtJpo0tb-0=jiPb*CF?Ux{NZbqi)p0b8fjFN$8=o zZou`en-uqrxTYoj4U3zl{Gw=G2a-EKfA3Ei(7Jo?dV4OCpK&A7=iz#DRrPZyl`&`T z)cHux8t8opr98t&j$VLd#E1V97vfshvV0Lbu-=ybCJKLk$5WRsLhjEm8`q57WxGGZ zI*xv?xc!2gkehhhq2k53PV-G!jlvagBrjNk+PHcp0vLt$Ow1TXDU8&OgXPemL;9=-ZH_*IinJjpb+NAfk_5iDKZ&`0WW zRw6n1>u0XJ1J{2heVnuk*OL3Mt3~OLU*2rL6G?H(l@;jTqLZ(XReE!-_65d}I_R5` zt8p{_g~zT~gX`4lk~z5h&?oPcFz-U{+I4GpA^E4_bmZMgUb$yKRx9hW*7;Yh zMRKM+aVehJziP)(J;?pbZNIYo64$GSmW@N< z%ioOq*?J^roA)k3;R9cbN04i~>gI%v$Q}R6#R_saPI;6p{3JYEkoQ&L{YbLk>|}S~`s(1L zhf&J6qv4VVkZimvvH?{+^U9sSLT>ETtbq?AchVPou&gon&zrCnmwycU^l!LXy!nRw zhj8H}=`RFP<^x#LwHskVbYuhP2J31;l z_fg!u`_6loBlpt`_84;Ir(PJo3%QF&O}ZY*^oE6lA49V3-$_9v)koj1cpOQ~Tb3`8 z95?L_{}V{w^G)V$sN&B{p7|Ymam$WB&e@IJ_NklACvi30TKQAB?%MqIdX$P3mwt|C z%MbW|{xoto)I3Jk`sAm@?;>~Pb?;~X3c0t&wzMPnclQh+h@VBR_52#ShsyUqg5(oX z^Y3{E$*9#A{u#*$nLB>=8zeV9g;VyEPe^`g_x#J=gXNIV zMTT#Zzrv2t8~TJ~?_A8%-y=@7cQYNwWQ#Q-(q@ku;BZDq4;(l!CMGs6?gIR{T(0=| zgh7Mc9l4S7c>n4I4fJ+7zUC z@}ekS5)*N5btQg&mVMP#KmYm2kt6;7Ku%6>UVeT-VNp?WaY@OjQKQF<89T1Dw5)u5 z#e@l!l~vW%H8m5jo-}FFS_SKPjG z<;pu&-FfGot5>hN>+ZGd?)l~V4IA-u@4cHgZQi`)zWcUr-M0Py9XobB@Zdu`AAb0e zM<3nw*kg}B{>1JlpL+V~U;X---|X45cki>$?c2Bi`4+r6h^6KyuXwZM@v>4R7om}|6E zjwzx1EIu@UvEMl|CfR@28mrZDeB@Qup|)?i*bc`a|26h_+uui8?{O3!|1h@5ax50+ zKJv3N>s^)$VkwI!w$zs3w~MLY(*A4R=-@xnoI7oI^07WJ6UuT)tiv)SHp7w<>#&XR zJF6{`$aFc^+mfz8Uk3QCGaY5SV`6;{uRqiNNqmRpo7mNsfli;TgB>PWMhtUhB=}!` z2|r5(f4RNb7VWp*=a{oHJ+{MgX>6h;BKClVaTeJuj#!5y#c$fJ78@!U5$m_4;&zO4 zs_mt3hGkfM=s1f^cdR3Rk7aPC&++!xBQq&WX6#P+{lLj* zIQr(|-ir~ZO1k&xfXN*lQ~eoZ%NT4qVR@=L=fZdkm6DW^aLe50<(8;HCB>@GGJ4U96M9bkr4YLm#M}bw zYvVuJd9Bs5GU%k%Z}A@9UT@v`3#;oQuYXcvOk~bm7HMh6%K5^q^_j7ohgq6Rzp>n6 z{&BC}5_ji{y%C>Wbonh09`{>E$3z`)`o6rlWXUCa$y6g^NcCiNfAS?Ek9_ileAb10 zlz)~|hpz&_ioIp^ym!{8UWAOO@XbUUDU7l+PEIG^N)h@%(Jw*W@EWQT-5F2=v%7Q5Bqvkb6y zgS^`j_zj@?V(aA{o{NF=0c0B*s?q= zl8-dCv_WETX))VE)75NDH?&qmCtnKDWHji(hH03bLsrk>v)0^_5q;*l!~tC;@r-d& z@A{NL#N^Tnx4SIhvX%ur6?qjle`#raUa32Of+yfAbC(7tOsMd?{efzGATLnr^5jpb zi1&DG`AKEg3VVgsUEy-)O~`ju*z?OO$~<{x@iv>S%$;X(=>dB_8OguM$FI7V{f1Da zl@EFJ%G}0=28KHPEr_=I7ma^@%!ez2FFG6_tyC^jPwB0FVmFdv-E~F?&6Odr&F?Cf2)9VxJrgZeT{2 zSy+$S(NxmE5_wBvREt(?)GMCh7EerCL}FAMmMEyFSw<)0iBOi)4C3oX8kBmYXRf0T zAH(aJX~Wa;&BnAKJ_fIlm7k(3tw!C_gyd}0)Kyo~Sp~GlwuaW`*;BMe93J&(F|zIK z9>k!7!MZ6rzB4A+(6it`-Iy8Fj48EBOKpGot+SiXs@O@uu}|r6Ztt@3p|a=d;(%aN zpL#Ap-nq)V0DN_@^3cTe?yd_n*3hDa_RCqdk~ux=X0IuY#{Gnz4=eR#O2g|mxaSR^ zZ#{O+6!L8`jb|;dJ2cMj`&zAnom4bKySo8IjMya%GvUx`R}BopiY0pGurl!d)x@!(T2 z7JP9Nu-ekI@-rqJu79`PyW5_=!;yYC^aW3SzH#XMwVgKoaAj}U(dG1cap{X=NN@Z3 zPM!X%vvrNU@1_aY`>fr2*q*+1Q2K}Q$%Kggz%Q2Rzp?IVKKBL;H~Lnj_h@ALJ&Ea$ zTu27f{mIkwS4#G4_;D=U+5zcXE>2%MB(!or;M+vbU-emAKitNrHt!Kz`rj^1-MBE&R=U!+z40tj{)A31JeI;d3wiXp^pE6uXy-jO%PZKH*t7?_wfPgtC{qZ!$}iA z{0j%p-_RLM3pev#wD+m#^g};O-y$RnKkVb@Kcs;(x--5e($h(x7*SW=cS*>CDr~g%j<^Wa1~D_c=yDouc%1BwJ5ZRKj2f9=kIH8L=QI; z-Xl*ze2Go=ix8Z+iOH$>A=4|7VQO-{rI11#vXtc23Ur9+{E8 z{tavT>ZWi*-+$E9e?#4{byh>fFNGUAk?Q>>CH=Fnt?4JCbmYr0=;6^-j#_69JvS>a!*)?5)EU-yG>(8JXU3 zM@0Inm8kd}(Zl&GKZmamH?Yg@-DFRHa7{$|_B+wQ{{;^}f8(cFM7X(Etlqb*>8sX7 zq`$H@G@d?`w;vMRw*~Kl#|h;~cX-#{6XAyFl9d?Mgl`^HS`uGNx;6!0>twWp(A2fn zH?*W&>AU*clo`mwUn$^h(~OiXUkYKt$Y^M*t3wKN-rR~L75cle34h}KgiyjD3HtiU zNQ7&g5~|+ls`%`B%%D-HPdPg;+_5PHi&0Zb8{#HV=TINo>YHYtzR{?X2$JN3@8O-p zRt3ecO)(l!9ML3sc0xtLFe&Bw9-TS67b)RDfYfnTPkQA4N4+7lb$u2()ipx2(iLU& zZSt&JKW?ktQB2=Pd*1nR+dL}IHeS711?$y5X$l@4h2&S2wgwCii=o zL7#~~tF4~#%lCAvchO$m`=1$SpI(7d_HDl_^T%zeXK>lK$=-K=+_rnfuYH^Db@RW{ zatKQL?D4LayKnxuEuW6T`;PdmTmOYty9abOqJ>)RzV%;dwf9Epr}f@Ne%xMkX%&6v zyT^?my~W-`I(yW;q!QA#AGZg+^_ISa@2|v<+lQXY2%+MA`_NyB|6U*Z_ccBH&|RWe zAG#e18Nh$yYI-&mCh_N=k~3v`b;zu1R9XeDZ-08+IG@k!*^zGPD=dwjze9b?_UhPw z==J*3;`;XJT%~^8zVw`oMtRtL!acC(UdDb6jKt}^YkT7}OIppA*uwRPHTlrLyS zfBM!xvA(WOEi5HQr zB~?>ZUaJ@8pSBkh_JBDK_l4M2eS7Wq^-aYb>4o9GhV1Jnnm4V!vO%dHOKJ55lu{+P zRWoEP;j#C#wzDTz=}j2RY}is0KImnVb?I8i{_K?49Bi+S*z`HpnzUDmw&}XD)5}%0 zVXWSt&0-x|)FSMwqO)wVPk&%nwAxTU8}`&=fAqD5zT+M{6Lv{y4RvM@+w5;l|Iyw% z$2jQo?WgX0KiOWYqHC3^cC4-3T2ocm7G96Ob2zcTazELgf?WXvCsxXhz{bXN_OY;% z!G5l*W@>9SW?E=Z|77#n-F6o)p93%98zuq|E4wk~zk>Pau)FCs)g{2lGjr3=p6^rU zr<%VSreOBOf-YkCq7t5Pk!~OJQ_ZJR zontESGfLR6)+<$&EtuaPeyVO(Ph0pY`}ITTk&L0=vOn3n7tGQM+p%A}{pET0POVmy z(*pc~kE@%h*7d-upQ^v`h2W2KYQP1<{{6iDB|M-_V+yAK|I^>|@{!*C4e^bTFX(T6 z!S%YiM%Z6~WB20CAwEO6P1OWuY(Z-qzO5k41Ahq5>oeB{t)cyXuKjlY^8Im-dbLol z)$?k)czr+puWz4OQ(eZ4{U2~=;887jS8WX&=5*NiJa}!JULX_h-Ka9>^HA7;K+L;r>{pfEj{|z1$_El_7tpPp>9tf)N`#teHh1kjCndRvQ4e8 zY&oYL{WoHS-nPJwrNEtO#EgUw^u_A)m;b5eyZ8FnX$>VawYu|j@80${vASfo%9i}U z;N4B=%lG+23gbD$G5f=nAHyEHGy}vXz$kfkSDX&tQG!^jgb&5V4LLmi#$0J3Ow#Ks zXKM`nN|40x#qHyAx%Sc113?mh7N6-SgZZ69us>AC^y0QLL2gcQ0|kMkhA1UnwnC&5 z4e8ujL_5)lz>-h2m#~&HD}ehYL|Vz20l6hGmZSq^Nasx;vP%<@UkYdChDsxyN77@< zP*rZIsv1&NE=lLjAa!0teie~l0n(AMUNd@*IAVl^RDMCY;coqhlpsGu-^n?%W}&>? zSyw<>5=v)|7(t4cO$aGN5SK_I^qc%DAdeRH_Jx?gdU0v6W$B#r_lHzccy8<=w*v(g(ufgPjOo>% z7ILdce-x7L)1ST=j`hn!eItsy>kG7#Cx6)ZpsR%9W3+6HFhlc$wa$~f)@xK~y>dhI z9r`m=bEuO!y)h!>qeJ&g)UM~p1jd!JXU$hhsKA-^3su@@{X$$ORB|kNo|)6#pRoP( z-=BUtS*Vi2>3IH%X0uJsFTMd$*j^;PodY|NhAHF_!q;`k b;0qr=;I(dia^OGz&$5BCSW5AK8~lF&?4U2Q literal 31931 zcmdsg34B!5-S(YiCd`CMn1swYVAM%W#!v&?eW9^rvL}Q%2}wvIDt9G;$wC%3x3Dj6 zAfTvJ!=_+S5vfvJ6>tN^wQjZ6y0+T(t+cON>uc@zoJk_tu)c4V?|Z-eG&kqmbMHCl zfBwt!f6l#^aHxv$+P&*zy?4cwdsDnrGDRJ=m3t_PVriD9Ia+1~niV;j^`>N@ij!$h z5oOVvk|8l1OLLOI(&MtS1csMck>(Xq@n&U-EHAMp!ukoZYDX}xoO_>o6i%F8tmXu+o*Ua|V z(hK5B40~;XwYpN!imIr*N=L`0G%LpPIbs*2%(kiFpd*Hg%S=fR*bAyG#%Yf778)~x z=@T5)!JD+Cbh|ktW$8jsnwqp;4KhV)ZDv|cnxjJtt7(qp5-XgWl@8l9Ki!|sb2Rufr|5t^T%~{nC(r5H^z6H z!?$jfZ*N@irI?`e+lpXZHrAN(#!&d>!GWSse!=E_&ep%?jh%MXE>>GwoH(jo?PjFO zY>C6)&1!pRQ;Y3vcbnQ3u{jIocBw7#s9(_3qMI#Cnp#v_;#R#Cg*LsZwPh4eS&`QE zCbh+ZKRX+n=5~eH!(N6mHJNtV@>;IiAwk6@@in*+&_KwaLwb|@lwCxVx zZb$GfSMbCrvT8%8JmLzyZ}YzS^$N9T5i!nb)gVcyExw?sD*_oe5qUSZMoitDXFDUc zy;W;!>1b`So#Ag$7pU#+t&TJNU32C%neDB%f}PE+?Pg0WS(kaOElqRW_+x?E5-`m6 zzyfnlC%IS0Lep$97mvJ9?t!iS&N+Q!z-9kC7wn6%Cl|~aQ3tNqlMdgLj^I8|@IWG2 zuT#x@#C7YjdGGjo-NGj>q$ug|8RviDrKq{@Jr(&F$Hv#538wid>iwgC|9K*gS0_GE zc_xlOfBvJ*ZX8EHkgzom$3Lv|Jm|x5E?aN=5snXNTGbTa2#e!~+4f#U{_5?{hDb(S#o6A5a(GJ; z5$P1-GmENharV+`{K{{@ui`xXDx6`9;|mJO>56jlyR;Cib6t%0#vzd^rlQ;*liz?A zt8FJ%^+luDnvzY-V=OazSzGB-c2{AUYmGg*bW6tJ^~R5l^gWc8y!Y~g_>8gc?AW-p zJV#7!!8P`zoq1MKT}Il^!Y_I0>dC?6q)Fv(rdMuxJ8koZw6TSyDSOkCi%A$#!4b50t-xIr@2Tf161+ZSJ| zw;Fx2eLB{6JT`d8S;2jK49G@|T)Cq5sFCm0DBfmQ?=@t)ubTS|^7aw3c1F7rYZ zRM=8vS(GJ)7Db6?39@iy$+AQP8~`$~QFLL;%Asjl#tlS4lA{pAmO}B4b23C(!JP@p zz^xgDlW;4Z;iV|jz^z42;zhKOc#fbAM^+Bcio7f`oQxq5%;CtA(G$l=9B{}t!HyxL zWc{9@W5PoJ@AQe=vrei+Y}WwH@kLzlP)2ZfS`>7A3jvI%7!}@Z*W!06AWhsT;!jNrgZA0GD z$|}kU_?72blax!@efEO1a0=zL**s|(!8fu`%EoPyAzW&-;?U9?4!yHj8J#pSGapm5us9@H|_Tq0*urx+s7{)MF)?{ocCtfo(olyjqH7!}>HAN&i z9mrZx3{9qWPUaL=CUr0;i%}(27kHWG1)kJ#GH|%58888w*ExmMiH5}JilIuZ$eN;| zk~&FdEmqYnoM3_B8B)hv0spt&}iP|1(p|io>vsdq8tr@QHdmdHl*-xFDAj~d1ciLIL zvT;Y!(&MQ;E2|F4?>p`b$2@s+_z?BG_0%ltaQC}aHW$^MTbhALVOz0t*(|?%Q&+BQ zzK~}NekPx^hS7$(ph-6`ZBzmBqgl$I<9&aQ4<6!!YbHfK#KJ%J-<*A7N+8_c>Pjkb*Rq0XyJ<>zH>hc4leJ{9z&rJOJd=55sw&nPsYG?8sM5{ zF(NvH+m;-QWJD>|M6BQxRpmJqcf$B2P1Q8c;&_RdOeo~0Snt+Bel?us8G3j4)gfYl zEu~%l;Mt$|@ogSO()dm0cakee?D;kyQwxll} z{o)Ug{^R4T?9)6+Y+AEEwDEiCa;+p6U)OH?NQ7O4n@5E#a3}-FVEk;wj&T{4619iD!eFbZF_8G*K1w+%z=HF;e zAuc-e#^xOxcmH~P&$O6{zggGp;wSl8BO!iv$!psmdo{xBH*VekS>cnDKVr?bYj@T! zn=|i#IqjB=gvS$)z0~s;(LSx)IJT^7%BEe`E1F<3I&0Cgq%o!jmtv{9OlyKHsRAp> zv;%pe#HyMuvofOrIZ6&A2j`JZ(t;wXfXJ3<%gK=>6<`u@(XarTP2$v8RWvx=pml@S zWJ_{|Wi++m8#!75P}FGnG}V%IbjT|l+@mIv^pQ3+4uIK`HB%4(f^jEP1Q*UJw5}>F zI&|d7x}m5HfGi=f(Fl%@S5-#BC6dZpK#`7cj;QM@XE7$6w5qbSVGCnOAjuhn(K#Si zS#%xc8O#i+sZJ}rW+-r%lEeVK>$D)TmZZ@tnH#663aq#_BB7**75Pdi1HapdG!s-zi6GT^*bEVXFMLUd3Jh=t>6g1ucmZV>iz+Vy@; z5ky81(U+nbjL2fLxRcIUmMup%6vku}QKKA~0`E8~o0ejUw5afcA{r(Se2!A;~9@PmkY;Nna~L#i z6&`kF%qMnRU+NH7lk0KDR{BTXkoEWx|Js+by5CJ5lahIZZF;roy6J#(-7@7@W2sxh z^lMLgZ`8Z4tD>(X8rVt68(5a>CP_F86)zeC?-!8{Tu5m8^NK@vQCn)_Q)z zO?S+mwq|q1Zl^CJd&}&u!fg|-D@eX=x4dDudtJ?}74J6QV#UWlnRT6K=`+ICH;zQs zbz~J%^WR%ze=Kjx<`;Wbzwaz+_rB0IVO{Rrb?wfwn+k4Ok@do-&a(5|Te~h^6GOkS zk?Ed)$*2{~x(^#)+t9M&NY*VKa@{XCl-`svy-r0gh3{M!{3A6NVwGD`kZzTI(PYNC|gCExQB^duwvLi-^ zMczpNE=bN`o{uVbL}@;EFX2bHK6_1QB*Jfmt*MxrP#@tpga_ir#*WSRj`es1Rr1DF zX4YgR&GgtZW;o;;hj&KJ>ak2!4G1l-t)nKb#$F@XVof1ztMwGlw~un9nR(|qxRe^k zSviWSoH4erATZyXIkS3ffunNl*j1J%ztX#YMkcZdZ(1nnTw6!Ar#8*T*422zzObiK zpJ^);Tw}e}Ge%+iDLiBTj6xyPTUohs;by6(#v@eCs1~UDRW`vE+~0Om@M9qO;Q`|T zyKlcexTZPS9U%+QKj9ID{6f6u^`_o+|vMtTDW4q6)O30(wLhMcrUP zM9{h|OSTZ0Z~@{0qYA2q5Msg`upFbQ7Q?ER1)r89u`(xG#4CUoF=!Q`$23f0)PiJ! zTHr}gfk%cI)*6ou5b>h1_5xK<^ZnY z*b%~HgkAVKOIH!;bcI830(+tBCiqZ=)i7AhmW6);az_xdXa*BTP~j{vAG#$VqUowB zB4+y$!O?`Z{m3mUCY@O=bE+&bI>R$67DX~LkINc3A5A8{+F(_VgP*5$!c}pi&Vl&R z1rp^zz{2&*DuOgp78()6fXl}CG;mcQf;7#RLD-QtK;Xj!2l zA)Z%xAOKS^B#X80Ef%02r;YWH8l3I6cL^@|JJ>q9Tf&y8AEbLRysJ1!pGesRpFvr02BIJDo}aFp8k z3Ke|4+ji0?c+|GGNR!^X19Ou0`Rf?pbz_3ZFAF|Bzt=1OYZ;H|FkWx;9=WKTZ1$6+ zWiKD~7(9<|65&P!00n?Ekd7FvPTYtnnt}=T$j|^> zb=hIaENv>hNz=fF0`QrmCx^9wRTK*@&jf~)h|vb*0__-vWyIC2h4^H z0M{@vkvCNdK#J#J$iRlOE(q8gK^L@UnYyEgS3rY;leHAVv@FeaLe?>BUNj*q3jiKI z1IfRsBaMg50JL}^^K#h$-~-hxh~SGsL^$4*X^00>)vz+M!6=#jfF@ch=7!Ne=LFEV z9mr8tQe5sv=%LUSKdXbK(pab)1i@M)tmwLc zG{e;cv@Br^EFvPp3Y(dYZ4H<<0+azO!o~#@2?0TsH35bVjkio>0vgHgc+JuPHn1sB z*o`em=q(aD1QR4%=E|+GA4cU^on|#`Rmu`N*LfY`h}My=D2PHNqu{X!n1~4hThUYv zQiF!@3VetKU64aC9Hfb`Szr%rUC>7U29e1=6u- zJT7H&mStfXSQUl^D1l^$#9ufVj)nHK*j6zF1K0vtC5w#HZSjh}^lCqLdURgKMcD8} z;;Kpl%?J>3(;=V^#=?kIm?$uUXu4$;V2P1%9|#+G025`P!O#&+(~yO#gu=txL-QEu zC5=CcD}ZM~7KE7~qeou4a;u%Q(w+$zNWy4r`_LGkh30U(fE1zzxPZ{i3J@kR7XXqd z2u%SjgMp!A#|rKVlZ6g20?B%19)LlnRdO56;Nd%<{?Jn7a#$=)gWED-Gdl8Pq<;rs z$}(fY_#i555DBhpVCp(nLPlc8GZGIQ;uxNV*WqXybb~2lQb3!60NoZ9N#toAB9=4( z+f`r(2zz0Xgjs}7loTELzm5$!1vZ2|G_)6CwXg!VsYJF!0|Qn$1#X6BiRxg-ixo9Q z8xR55v%^J<0Qic09)Dn@pdFCl!U#1M+hxE>5PCK?-waU|bdeY77yWRooQ|}YL4s-^ zbUCg`!FIX8aOx2c3{T()uL@Ad%UBrfhJjiII0Kgf*UhMaY7!&LrFAKYRPgv|qumMs zAqFB^7Bg7RKzm6%`tbM-!FQEJ&LHSq(f2uu0?rH-T^057N@tpgEKr zs&hl;W@5-i(Gwc8P2?t^=n$C%@tyQE$OIG#!VGva;3L5@OLzbmKnsiJu@0hj%#L`e zS%4_8C5%M2RszpUP=DS;8idFryB1|=@Jhh^2wmG6nm$a0J%fWnb}+{-a=sJsvI?3> z7IY}DVX%f)Sb!%(5ZyrrNx*>yv*%50PXLg>-@?annz?p%wx+^NSlBEcE;&5Pc9c9s zqQdt=8xdC^#U5US!5WZ?C1B@8_hqOo#O)YdH?LbZhjfecV(07Hn|NL+n9#(~@rK0=4AIGzVigykZ_fNTWAhE1iyO_ceLGXwkg zz*nH;2-<~nu)xA z4V}g#!oU(N3--Vv0ocSg zK7tLXSj`P&k@QY+3Xq3t3De)l<4kaT4D1E8DywBSB9e*{_yWU%U$Im; zDli97SV1Cr29oK*x)eNH1;A=+bR$@B$FlJ(gn^s^xB-SK$-eSLI9iM!i3%Rgvfv1x z@WZ(xpo6|pL3kRx?@&VGbqRKwPpox&us5ivnT>W{TzXL|2fCU;at6tQ4uRtaI<>$K zGiG5V3s^=6*Q!E^cI{aTBW192G-R$ zmL0&;D1gbP#vdLbi+ zFt+6KF#(N%%0W^_ragm0Uc%C%su+ovjcJ;NNCY@~2FMg9d;fiwc13ijl) zk9LE5(p6WAVkp4I<4=qUs0d7|%vo$=%V>C977!HZ6Pz}@tH>2b-~eH&h?T4R8a?@(`3QLnYL zdSPmcWJ$8jY%BLbqYXLpxCe=;Y!&_l%8677wy7gg6Ok4mOj@8`R=y5C2F$gD{SYM8 z;LbDcsi3v6!NN(lx9y5n+6@PbG!%}F22RE89S5?o9fIfo0oruq@L7%=yCEiaC8h#M z>BvHPU*l+?P$Z8G)>>gEKEBL56CA$O+4mu!Xu@vN^A7P-AM{9CH}?Z#*R7fI048A)c{BbzXv2NXRe3C z2cHE-5jJ89tcbvY4JSew@_-zSDSY%kcshkeMhB{oM}o;^9!$YwMaX#43Oo!*D+$z> z&I2{`ngmFT%)(G@*YF|twiop5m)n1@|Rk8lkLAhOj1$A7Fk z6*&ZKO=Hs*W468(2^bT3RDuu~WZ#l$a??O6kgfsN7f9H5+=IO!08wCb39HQUrFE&u zDnWVha_9~_#~iyn10qEejGjH*M$ws#vLS*)uz0{Q1-1rU$6Fv%@xUsO5`dTDd*7W3 z7|7$PJ#Jf-U7~$QTEKpl1rH0`;W=Ol^Gl>1EO<~=1XS0A@%vnE5#iEiz=LDg5Y%=cv3qO^GG&;L zd9mrv;5npxiUKgGUf`>9BR(O|XArl5gb}7*PH-c3aEUDmep&;05#49wRqzDJb`)?$ zqM~BM3=-pMInkHs1_v)U9r8=C4a9=OH@Gof(rbKj2J&yjG997V6-Fi?D}XI5xEzZG zF2u&00N=uL*s>#BpZPKZ2>g}~9E`<8{<5;uFC(Ag3@P#G7`KFF+64o!3_%8jU;*h3 zeNPW1Csw%wNSjq;la|?p2_Zb{XarmbQG8SfA_^Xn7qJ}*-V1oQgfbI9^#l+(F?C1- zbGCC*;_+lR(59AnV-f}GBJpxp2KE;Kel>6|zE4I2*MiMJ;!E~|IcaUVA6^?Nn3nkL z7&jOWZdaH$fvy20po}U4x{o4ky9QJ%CN}!q5FL^}p2m(PwsS$ALrv9$MOa4;&<)`i z=tJf*{T_6|35geYQb3m>BE#xIh?&@{5NTjGMmY8f&zL%Z7eEUhkz|DzKS=|q9H{>~57Km*d5C2oKgCUI&Kazw18vUavCz;A*@ z#)AoPhXS2>+>N~?CAljF+X=|?T-YebGp^v3I7|s^>f7f6ZLI$9HeoQM^9A z^p=7>H|(0Wg>8D{@jV;wys#&4?fAmGHtm_QyJ}Cv6L)R?$;^G)v0Dm%a^u|>9c=&T zjVFF`)4i7-TIM>lXz!N$^rJUSxai5fH{U2N$6jzi-T9G^!8ne9Go5|C|1-F$=YuSAAx?7)Nf;Ik%(ni_etgT!LcY;@_-8 z@sX?IvMxn2A?elyD1PyHoi-cAEx%v-PZWQ@$XB7FxboJie?;-+?3Wb{#d9BNT!Z4_ zw>MB1xf#al}f{(<5jezk2H{3W&K%cnm?@wIC&l`Is4=X|mfgY4S3 z=4I4A9y;sXIj9}`%lqb|Rx^qYp_W?RdT}FaFZA2@qV~2uTfVS4BRR(p-_(TKMM)_y z&Bf8jU(h-aNArmnAI3$oAAZpgL2+Ar#^;#+iCYgAHKX{O3m330IA*^SzZCbnto`=q z(fH|D{_IxNHU=;LIf^xJjK8xD#hvr+{S?Lj{4&2_K8hR8X?_RAxA)oTb`&S)-FPK# zapf<6@GNSdH6A^)1GTB2&6tbYd8h(SK&zhYD;YBzO%{#F-`?UKhaC?rMD4?iosLB~UbL0jj9Wr}QB!;k zYUeKf!)6qnJI8X@qBv97jTOxPbmog^Ux#ABM`kmw*|_8jvV!l9zCQkX)OJrgcqeL~ zf3*BF)Sg}WPQiNAHox1o5ygfZxvMsyxaYj1Si$no56()x0mTm@ zb~au1r}kYae^U9&|GERmcN`mje9PfGW?Oo*fkIt@7$WeLdsYwTLTr+Lf6$h3cDrs3c{--F8P0x>e z0LS|ZB0He`7tYz7`yh&MTyXiZgE%glz03L;j!*qX`6(vrTmD?^Lnt2D^*1bpJ65{p zW7JN){H+PUK<#gC8wFpRolS4`K8)hiRZkGfH+KE`!bebiFmCy6xF&AP8UKOWnhAS< z_$X?3K9su-#k}7%op%Vu4J}`FVLeg{Klth}iVw|6+GI-BpCIp+?ss^9;TQl!$cvP{C&^#EF~%2tw{&PNz{sy+ zKB?;mBhDI%ve|4gF|l@sBW~0vr_1Gvk00$$NJt#x@gyaWO-b?kd}sK_jUPY$%rjHZ zI{TbJTKc&enVG@!vL;OU!TI>f&Yn04Ka(euCssf{;@L+D^n)_xhdCGgXliaKFE78K zu&AiGq@=WL+O%oq)A3VLQGs_UDl4n1X3VIruCA%6omp4kaM4AxF1`dmm(B)1h>b36 z`2d#9nbX+V)HHWqB+}f{(%RNGe?H#Y?(FPZuyE1hC6`@(#g$iHdDT@{buV4Ee8q~D zD_5>swR-iMwd=0F=GtqoyY9N{*KfFCdnhkpKx zhaY+L(MJy*I(+1@#~y#;$)}DUJ$mfvXP!BJ{P?raKKK0dFTC*LOE3NMWh(gAs+f~+ z!FmQljFT_H?o9Lj`JCYP?wH_--`S$C!46%gQ>@;IFr#O*_r_7>-UGHM%y_|(^Uji# zdy3zAcj})i51zg9kE3&4PUo0h%3*WcbMGm(rBfd#1gO{qe*l(#D=blXI&;`B%m#`0Z~`b@<&kzcckR z+iK?*fm~aWyX_xruFGQ&=DJF#u?bG<478ecmt>t8>7m6p&*B`55o#=F|=H!%|%sTfq`Ow6K^5_)jb zNlnJxx-XdMr_Ms_Y?t4T?)Fh4&W=aL#000^mz#OWJ}%SM=)OYCouBD){(UOtvL(p& z)0X((KF-)GL$F{6y+%IZlRb!a)eFUeW{xW;VvIIj=B6Po{{(g}^{ zkDr$7F1vlqrvZP8{m9|G^%Z+OG232M)w}FJ*>-1cZg_OeyZgqEcXluR>&r>AE{t2Y zbYJ~!@0wAL{|fu@%q!WHc706nku@=};BUPuBU-TR&FO#VHZ?+C_aH9-kdH|$R3j0yll;}z+SJn7 z0gZr}KQ!nS5I!rex0&BgE<@Xf6ZKg*;5{4i`puPi$0h^s+W2j*FIt-)U138xXdC_2 z$B%y1nqqT&H7X`Hm&$>ApmHgZicO*#sMr{L2C1NI*;K5Jx&-=6Zb#WDnhN&Z0s-}Z z_GJ7(0kx@%o6VNaOPkHk#@4=1p}ZX9dn+b*%a)knk(**t#i4^8Q5?18y?$rZo1MR# zd<#Q07WaPY1UXJua`pv`jul@a%&xGIass2m-g5PfV^7;Gkoq6p=S9C-o z|5*6)KMq`R)mN1>&;HX>3uDiC_|q?Ie(Re)|HhA7Z@Tq2tsid|&k63keSjQOqOW}P z3s7&w=T^)}uc|#B>w6(K`0Cb};M=!Cg5j)T6B<#7W!{Q@oyuHKq||QCHT6!@ATJkn zbm5Vnjt;9ULSE%=Gg}P23kmW4!sb0PNV%x2_>*n@qxxFnnoCYx-1qo<%z3$mj_TSH zdpH!f+dMT%DWNds2!$O^56rN?|oL(ovn8QeY3B*fBspgAB2wQ|;{P=#|cwF}_u?!I!ti1pm+j>6~J` zBTDE?oA-TNxi>zlXjhU9lq$qSLQ_gxI=U>YNyq1PI*YnmjE-@x@zv~CyQVutz2Hyk9SMkvuSgeA3f{isb@c3_a7Cd8=gzO$s5b$Zdy9}XeddknwFkD zJ1=*()28MnzPF`0w0-4KzU57M#rl2MUVU)QHhF#^YsLF3GGE`f-d!{|ePu>$p7Hk4 zNXDI|?j!3r9$|uq?i#R8zJdP0#Mf=w2YjDmr^9!HBe?(0nBYCTuvY(r$&R>=S>Eh& zZ%lL*Q=$){V$TV$;cIx<3r}+55q3!0=N&%)FGEc{LmT;r)j+1Bv-*_WcAJ9=Zn|f{ zOr1@g?E~I*c+Tc~)fQZJHv+=ms2&cx-3T3wG@Wr@`UT1#*P%D!^99~Xl-HdZ)Bkm; zOsYH;ql#9Y{Cv^C+ONNn(_d-F)x*EV61gThH3xUHux{w95d+T+dqjHB$$`2|_`&{@ zM5e=#{Q!~1a;QLrjX2iI3 za9!`fE+udlcS`R1GR>BS$CK)>Icw zl+&*mFqpxkNbmg=ExE^_kqoH+w?;#xbwxG?)i*=*()S_N;av_o_1*T`|25Fzoen(n z-S#=?1B`^vAKv+hO@@quT-gb-fLz@&X!-lki2Ow@9T$`H1Jq!c;0N_JFemyK!y4Lj z$ngGq$~kOQNE3(m->F_B8MVf5W>RdiJ~Rd%)+^hIc*WPky&O zM{{qI2@jv~pi|#Nul*Am?9rmV_MiG5dL3#gBlJGB$#*-7zVB%am-m1Z-+7Nir!;uh zL%uXYhW6c#VCZY>!zVvdi|=*}L#dzPV;HH$_cw--W6i)Z^tTu?hW>y;Ch+fsnt@&Q z(pldul9O$Q3<#f|S35<1_;?1K_@JA`)BexK1aq1{xmDH%oY zktVHOZC^~ld)s32&HsgD1Pjde4s7I*T1J{gPomR-s{4KEfRpSbb`qC<1?6{#s;VxW zIi+`Nz`vKCOnw&4nGp(wD)B%0nHCE1p}aYvqEKi~D1T10E-!D;`Gusi@Z|Y96}@Hr zj@~7DzBCkC7{YZplA9FFDZp=(L!pY&>G(@3ZH{y`6xZ@~Y{detq_$IQncj3^Q=Von zE^Ak33iC?mw$3SSDQnc4jYw(J!a1eQLZddbhP3H2W-jU|&kr@}TvemqQi;0I!oKnX zJ-<{eEttNfVg{~hsh}I`Lgf4+tCsF4%`cm)v5OdOreKji8tNJ&`eL*%>cn`4-@l6$^}3ypY^yVLh&|udA?(lE}j7x%@IlYmVUlRsG{-w7NW@j;j*% zVlwVflg5_KZ@^b~d*{PMjN&5PqpFe274wd0%@qq8iXj&=ru?$n+QRzI3-imAA!A=c z+Rx-Vn!X+v`qtUn#p=w;CL)Jn;~sKeFD`=YiLDzpRmqki>K3Ug(IY!5^7;Ds3Ka=T`Nkd!1%tiC+ z*l%dll7}_Nx-tX$ZmI23XA;@eMCYfq)OPA6k%6*bP~Y4Zspopt|Lq8pVZQ z>x%__fQ);`LUgNnQRBZFn+6#f#l2&V>ep$Ko8Da8qSjT=dUFx2&Q!YUSTdL1xsR~6 ziyLMdZJ5hq=u#WL`C*ZD>08H<^pxlvbT2}5`V?zT`m06XOr&CNm8P{+7$fN{)}ccy zfnAjh(#2upfnG6MOVwiN(*XMzt_#EGU9kXqN$V{UYk+Q!G^c;d_D(Sm=9}!P|J-S_ zrI}2lI&%@$R_Uyt8R_a>kKrhHTIykJ&Y16-oM!mU;_=<1kZ~DeLSL15CjFK{eV3Gb9bGqcI*5z3$;*8oGSDV$D zwH=V}fH)OdI8YZ(=eWMD9LXF;Yj|2yOTMm#cfyivRnjRhmg7g(_gyMiY1vI1s*~5g1>HXs23(=3YNOV9s;lBfGTb+KyA|(f5%f4Alj$ zf(CYGkTViIFx*#vbNkaJ@1g4-(Ob$E=#g*6-b3}Rp{{JP#+Ciwz};;a%fE_=H0E;> zV~*rkz6*Wm;|!3Oz(*;I`|@C%LuiSHxG16sDhM3l_(d~5Za}U&@aUc^P{a%E+FNKO57?xdaHVJt9()} zm`Cn8pZqEzzd~dny?jmi5^3^e8KuIa-j4hEAJT&S5Pm1`DYDo+)m^_&@ zudInOLm-z(A^MyADk7Jb42^}5U$e5f4~vCD7tSU9&zT&aJQ=O?@~X&{q3C#{d{VR+ zA{S!VzBat%o5w?LQhaLPLr#Z^s^!U(&!0YIJRRiJfbpoLJZwC} zeK^*y0QZ|*+JC>$B68)on;&?UXnu^I&B<0&K3MAlrEk5aMb|4oD(~o@(K|;6nK#r& zL}PUHe3{mF{q)de*NoDG-sf^!=yYC4h@x&>nB$Zxt4?&g+YjQLd0&I%hIWXJ#0xKfr!^P4lGuqoVBVKbk|3D+@_~=NFRe zQRUm%1$G!dL&m;YDL&ghlgP)V*B^+?5 zxR&r29pJPUXfuo2wCG9})R5~7V50-)3lbVXC$A?yE>u$6)(|S-N&D$T+D9ifpnWm9 zw4^N3z($r3TkONnL-Re-35}j7YhT~5HQMP=e^og0{rYUDJ_O?{?)Bf~meWJ?zrJ6u z{~NY`q*?Jv9+DJ8W$;mxC)bc)D4lBkqNWI5;A?)BT)LR_2mb-NLG<;ABGG_AynMe9 zPjx?x8~&=UBBD1(h(bm$8Q{+@p!@;+(_{0y%+85=TiZmHY(fP3HW>oF+Yfm38*dl< Q`~Oi-;N7Y){@L>X1EjHz9RL6T diff --git a/data/unittest/user_data.parquet b/data/unittest/user_data.parquet index af449759e52c8ce1d40c2abfac37b138aceb4a1e..b04806ab966d78e7057cfaa693d6fdaf13936586 100644 GIT binary patch literal 19003 zcmc(n3wT{ub?5K3b!}Z+zEK?Ma%~LA#u74!c%SzIj{&Z(!E|&}a zFtCL02d*7>wy@1LD{=xZyOtY7VJJd#O^7gbLfiL(&=qzRH0Imgz_VNtd7+4G-w6wy zojfFhz!rgR3petk{MJt2cYNEkqR{o6$PfKOQ|JV)&l7>`FxGSPO`WdoyP$&~ za`H`~&oqt|IF{!{j%O9NGRVy7hY`;T$8no;%`MHVJJ?TS^Md9DEBsu0)k2l=F>z^Mm;_%88HEadlAY+5-NL4-5~=u>zlJRaKP>`H)mc zMyqmYL4I-5Kvj;)LjL9Ta$>YlY#fnsZ8-m`iX2j-T8`xxUotkXM+WqenQ3ozZ&eS? zA^3vHJC%h~k1=*mDWA+YKYmuqp@GGW8y%h)8`CvCKDorm@IGCu6?6Gz<7%|JdqQ8E z$ty<@L)J4MF^5O1L)H9CD*LKK*VOcAK61z8(D?9}+4MD|6T9?Q>jxHi<`?w{K}KWq}gb{VG??*S&6jvA*uHdBytNyOz|~y**!S>}<;O zI6rTm`imGVnp@5}WTpOf^Cw4C>VGx)A!o#xeU+OfchV{67Y=>%9xHRnllkWRP}`&! zs?!+zfE*YezBn1ac+W`jWf$ZI^GgPH56WtdOM@luV-htsj^hnpJfz3-udfVGjMs*T zugR~h7&q8Gxc|cC?E}N(XP%iUGBH#$|QCf^tc?DpbgwI7)~hwYU`P zz>gvz*Opc+ycqYmxY_Pt)8`;|<_3Y-855zwm-HLYFErL0r>@MOLfWroUURI_{O-b* z=2e*@ADfqX4f+yp7)QzyDoUhLWE8Ru9Vn^OL_}B^6(OyowGKQL#l8x-WF?W3IGC+{ zM#f2j2R+-?DQ*>$Z-n2NS7D^$P$W+1M0S{1#wL#TEFAck9)SlrZR}IKQ{MF2spNbhs9b%3d<{Lu4yNc zw(zVVBKO5s;epP;a)f5FP_iH2R{73OjHTk(Qf?qaE4DKu1*sRO2^YAf>?~Qjwp5XF z!Xyf0vji=wPoS6mhu?HG9pNP-%jydm4yb3 zB~C4$X)sOT7Y=tar59?j1kDFTNWMJ`REqzk00L&vP>(~FANc~nNfVoZvMjSrtl-E< zTQ*ZVu3OmOnPO8nww;KO#E1b=CY7J2!m@>eE+ru$6^5QCl@motV)?H0U?{1JS^zOO zWXC?C*xpIxhuV{Y6(=EiJowLJ8r#+h_R6l>J=lFa2VrpO{7T_ zE6cYdtFS-h*~CgyTl&gLlqj@^%8kP`h&90%YTsduFleI!WjVNbnDz*KT8E%pKtM%q z;vF8U`1yUsz4&)S?RtHX^J(mZgl`qdhGF{Sw=enNu0dXfP?(dY5ZCQ%^klJ1X z8s|@ka8>165?_)i-fpe9Rt%uPU8S@Ts`tm8(oI1+m*9$BpfbpJh6zq+vQA(D1c~Tb z-^rXoETl}sL}I{*SQ}~{?I0ee#o7#SA=YO2IZhTlZu0MvYyak7|L?!eGvT&<{(>c! zv^1=jD>mKMvE}bOx30c)yS}RbuE96|gr3&IdB4R4Oa9jAMnEM@!3(J;!G!I96wB;fMh&-47D52;>7(4c(61fp>v zQenr$F7RLnN=23mZ8r*1ywF465xLDGiN%hVOqKc`VJ%H2lYXkf3W7LvB!TZHo^4(D zB*;qy=-3X@$WfF72G65VNnf*QqWvXLiYSQ!$8kL06LC;D*%@h>_#RjuhuA&}Od`Pj zgCG@=bSW3+C3EyH5c5F6A%m@$Z?AT9<7EXLv5u|ZKGNn)gk<3#`WP4y`Yeo~WPuQer&fYlC>Qj!3QxLzN@B;*R)XF9UJ+WM@ppu>9R;;%y(ILp zlhkP2iA*kP{VHB!tAIre?{HMux;=D>bj!AsW2LAeESwBI;bhP8==A|z!r%cS&2(IniLMZ4h$}qJeq>AYE2y! zDaMpx;v`<=MoG`3IIIXzg+*+O$corQ9AF2ZVDSQ>MT{ka#72QwI!J=hmyAj5z(zg? z4#w_e>vuXeydt)NCRceWj+cLc=t+4>2Pq3!+G_m{`Anh9Ff>4FMXp&4xc3q_WhkPV zskI0!AK;e6Y-o7~9UYLz2DHHbRQZLYLd2nFhsp&ZgC&r^RHkH@(2GGBF2aT z$dd=VCfi_{2#>5oDno)$enh~>z5O9x=7a2Z0Bjmy-XE&KQkJbrK0+(JyD5YMM}Ulz zl3(${*0;O9u*ltT0xf~|;>nQGz)3Z3sTBqZTQ8}k2$4kAb>C{hF^z$R#5BOLiflJY z*SsA*7Nz(-aqs(NV3rOYFZ7^FsLTmMoE@b>9zER9k8$zd2Qd_}PUI&}0Bl7Ef98gS zJTdEOZKZ)MKAF>9hgU~&K(=>jIshru1TYr79?uAqkVI$Up=49fhaqEzgP2+JRS3&d zBzj|R*YZ-hxJiDXO?>}p@P>?(LcLR5)FG*9Tm&i`xY~1KXzF9QLKH_rTTm36xW*wI z+oMwwLZaBb42t_HcGw?;1oo+QdhiyA2gQLGTRtm6&G7DKaWJ?X=#3KiVvWYw_C+tO9f-#*nESgB8NH ztB*B6^sJRDDrpK!k0l1skXa{5EsJCs!>m9FCl-MZcms$qo`Z)dH&CQYaB#;DPt~nR z;8=pnr0XB+$r=}bX!iaDO z^mpw?wuf+Z{Fp+_3KQjpJvCQrA7Uv|Mf9escLluAO&#_n+yuiqYdU~5rbx(`u@@^1 z@P&2+f)I}$%-VJiHrDy^%G+BK8;u)o#ID((=Xs$e5)>+f!qyIq7{U}taS+b?XqQ*b2ei6oWECZj2$Rs^vf2SMUul2jyI@nbxZjroey zCNzF$Sr&Dgb}W~W3y42g#E}iiS~`Ii+P%j>Ly}C$v$g|1oO)^nI8Ts@aAilm@RQcM zTi@C)Ori+@71!&zIkX&nmd+f>J|?*pPrD$A_Fl@NN(04a>D zW=pN5l2rN!A2&{uI_MGUL}Ix;_1IFfHz$(HlVs3>s#qR-b=3*g;6|~FrG0XjDasNF z*osqkP$c8)T!3P@(k~uuC!}Cwltel{aWLnsyonwF)IPECBZ=Sw55hDav1H0xgcOif z*onr`E$P7)Jh$hUjnd~%*E^abB(!aYq;c%PY)k_{Vk&s3HVlul!=_qeb0kKs9Y3*C z4L41M5UqFArNN2U)=78e={oI~e^zWsJy6&NOiOZ6U2OH#fa8FQTEobL2(`6#kO3)p zTndbY;Ez+nT*DA3Jbm({qDaH3n1u2NkWa0Iu*MTpi{gk_r}DF`C>eB{hf6aTxO zT@_B&1FNN$VkL2biZHA`-2yh_W^`>&wB)SiN9qOXg+f)Cs?!gStBS8-dx3RgTZ6#B zL=jvGcIUwhExf_)5sjpWSeC9XHUb5)bK{{f!W^jTZ--Y z62%@M5vR942~7Krw!>7SS{9~%OB>n-3@j=$9TRUo4?;HR8U(3h(Q}Bs5H(5YGc9RQ zAJb5ve5Vr7fPksWch-A>*$(lftVob3oKON#oume^l;X%$u$cRgVrjNPh9tX@xh04T zJC+HH#4M6JHqijPfRLk57c%Ofs!&BnsRI?x#5`C;i>i<*cmSF3}hCd7jl&~kz1`G56QVJbN7pcc?x zgHLYaQ#mM#MMa5xz67A~h@O7d6>w$tODh04mEdCX#(E+QZjCvG6tOREnM5#vwCu58)^fSh(U0O4=5^-C{y3-f~ITMGv z+Og@l0k0I@JVn-zz*D&1ktSR*jP-TDZXh)5>NCgzC9)9JhuLcou3o45)~zcfAcpVS zU_yjpER|wO4{{(|w>KuF%jniuHA)5Y-S{{Q!*hKRqZ|C6(haVk;D2OU4uE1={%T_* zP^i7qTQ6urNhytrb`0FrVH7XA*hNAjN zQrghi7>Nj9IF=Kop5kf`sPmzeE>#kF6@o?2f+h!@k)PfSg~59gpppv2i`Q(=c}p)S zvO_~rBV9gxL4!kQru4)jV3sXkDAo~>9cg>Hqmi|CYN4)L4&Q`xO~%M z`UIqSn!F$h2)O*ArGm)uXlJC5ESXd{p-S4XGz&moxESRC%^hlAh(pR>N{!&wlNB0P ztfWidFE-%w?!j9cNhnUTa-xYGOE!UZQCiVVfi8ff^wRXxEe@_vt)b*2*Pj}IGy2kn z3(>MAmzCdnVUvwQ98voA;zruZFt_z218QkOy#-729JTbB`kY<*c>^UrT^L%&UQ~FX zA&3L{qmxaBU#Cv_7mLGG(aFG26qb^1e&Hz1ZyJAi22?i0fet>7slgY@XR-%)P<;&4 zKvp7?GtwYRkGHpzhrzZ0N*?6@k?;-Zw$UA-@`hbFZtER6uXI=aYDy6p2m(`f*2Su{ zWYKB@4XOx~#VfZrDXJY5^}|vFB-AQzS_=H977P`vrTgxms%sJ{eioJfd2wAD{^&%$ zvz}JTtxg3?IxcV513H3Zk!2G~NT59tK@^mKR%#@oi8a3=A$*p9r?Ecut(A|| zDZljPMLBhWKloXucSRTz~T2Nr#ohNBV zI8n+v)TDM4ulWUMeh-UJjGzAcbF+FXxo!Wub z(@(_-N0kXZ>!kFP2CyWdXAuI`rW#Q)ZtZA-`_N>T--Ba7PKt-AcLQztH2taQyz=Ku zBu>aP4KO}%XmtEE1s#{X2qRJWB=0|Dn#klNcnmB!D0h-JDL3F?Rs_AHnMLQGGLrgF z(d@q0auvoU6A%L`IPz0Vq%%Mgu zb3CG}tKs%^L@4SL3KfFMaDP8>V(Le@WQgUdj_CSOMnUVyz;G&>6e-!uA$jq7A}`bQNk!o(a2Zj{kg8E>U=>f-C3e2l0ul;y0HX8}li|!!LPVCM z6ro#}bFaIfr{a^5TX-nOOGv~5e@mEF5-AifFb+qyt>70_lnTmJ;*m#)U4-c%ECNbE zPdc$+s~|768sX}2sON~{1Dr9_H%$On%aOADU;_!r!c%E~kpdl;It=gRn486@i{iL< zeG6Su4WH6c=|E%7D-SHE)y2s)9G-{aWzf9_k9=7AMq?A>q;i*sC3Bt&1qB1B>LX!} zra!T;`Idzp=3HdjVa)lUka8GfdOqg}9&aw>{&N1vD*nv>^{V{*U+>K?{Pn;6aU-90 z@NYN&k_3KnEWaynjALNP8!u|#cC7!zZ3o}?5)q#~(sld0-~ZBZ^y$Y>-2R>qzT%R1 z)i3M5usAp-hJnLKl+;fFAn_R;XB{=v9)h`_|UR-cOCw? zGw{7*JHLL{`#%|s{N2fquekchlv+Enj zK6LxWn^yIHV*T+C-}#0kcJ+yG9RJAOmmlrEae2=@Hy?lFv2Dk9J#x=S@9q11eCiWD z_x{QKZ~E%!PoH??-j99m>WAJ{eCdY!j(j0G`QbHh`sRHf|I+S9Z|nW!hWkJ9F8(+Hdv$y=0v8TUx{7v8b?5F;0-|4?Q^~sH&`}BW(+b@6q(;t58 zbD#Ndf1I1YXvL<_A3bq>{Y9^S^U2TO`t|oTzH;*|o4#<{Bk!v?d!9V`h1m6bwmVD&mN|L)VrJ3B}Id7@=a9kbj$G$Dt4JVQ^ARBavdp zyyifXrzHYKAWS)ejcLSttjDoOM6shK@05^EQ?77G0z#T|JQ9H)@wy01o$h1cxa1t> za`LK^3YT&q2F~a~2*ptwjW3|YHF<_}OFM-=(`i%Spn2PYW>#!oGod=;yvNEqsk|r1 zc_SYKm>h^hb-~LbWUJ6(23~TY%9MZ-a#LiZR@zf=3mTPN;joE5Fa(DtIwt@$S!m}` zb5hjO8H2AEIuCKeM;AVU1Ne{$$2Z6HWN?nZ=%OPR^wdyfg*7d@MI0BUsY|1sb2{n> zj^9jw-HJK#A`xh?X|x9Y>uc9~)h#95ryV$-5df4Q+Za=*8kpjyFgmcFz{l zFs1_$0#UmX<&o|yg&xD&CK|mtu@0-I;^G1)wMVKuuF>TI*=+WYK@DOBh z*c<1VG$JWva6sD6Mb26|Of(%;4huLVvgsh5pm9nDQA;L|+GQHA*xo z#^==+jubhx;t)5cl@Mg@cGK!`bpB{3CvSejPS`oMV`yG70tzA7F_l?Y zn7r6x1Zv9Xn?Rmr&s4lzWx-f7pa*61@?3L6>%7YRiK=SNZCPO?G2e@6mA5a48oXJ zWI#@?*nl+E+SQ8pR;xqP7hNzhrbo|8=gq4uR(fwW(N_=1u>th8V2fGHtE<%25TkkC zw6yjZe+QW5q;{su%mv!s^J2eWVdp42{VoQ^$*xu~CkU)T+s(DqqaAnbGR2 z&joNYrAiLTm1`t2YFcsNh@d6 zne2uxDocyiU}FFBrP)tNvwbJe)~wF}F4;C#pPZ4Z7jKe_7j zr_$$KwQ?5VGLJl&FF%@ZJ9b~b?Wubw7kB~5aP|V_XZ}22ZfdJ9AG@FYm|NCSSunzX z(jdnwpS|F1t9UtG?bLu4cGXz3Yg!mS9P?kGHH?vLld z@hU!teTL36{Fytx`=+6>tIYjr4lqmc=dCrHmpzYgb3ZUc)auZ<@6ML(1oqvjv!I#P33qnaOmvBy)}UT}X1Kf|xDA^gAk< zi=9>P$W*6{7nn~7$-CX87E4YmB4tfV`6T3FzHS#8@X7U*MxL`cXqFL zXvc)?yKL{SjaOEUEQ}pbHq}zKskSehcirZ_I|oNg@AvHxJNvc`sEs{lPv~RE z#7;Z9hP@AEd#~-da$8lny+hXdvY*q_S-DtyWY@ahIc1)9Z{JSzy0ibX=`vaIU~Nxq zZ#$!7EAH?1`kifF!amSzHMTdubyweIV$udyO>Nx39NPxY>SL1_>)kX>e@1Wpo3B+H zC+%f)v1_nai#Kjd&+2%NG0Daa_``;4r?0L0yb+@l^k?>l-K~9k$26S}MpZeuV~?8L z`}i*8Vh*wMtR449ThtDu~FM)*REBYHseFJy>TrvcA2!}T>go_s*O8@G`>6;PqS>Dt!LKmyVAaF!}tVm zs?FuAS^TBM6R|qY2WQE%Cw6vBAfIPqoLEnH_U+#%@%Lx(iCO!{_EtQ!?Ep5Ie3{r1m+hG|{^s7(;EwT~eYLTEd)wZ)Z^KAjeU=!T zd))8KZ}^Wub{vp>llz(@*3P^C`-(H{ZG(rketTdbv3Eb4&9B+jzl#{(kxJrHPR8+E zdK;6(WFP)CiLpD>v-$9EfLD8V_HVxCdE}fmW-dL(8;Qpa<7Y9o|F?)+vcG$}j%MS* zU~S*@J^a4qGdL6vT{f`OnZ&MX_M9y%F-Te!TmNIz0)7e}@hkKJM9Ln&4jdbb< zHyqF=7n4JE--ZLpVB}0<_pVwpbQw9$%{a!T+jmb~`bKwO?+{#KdcGapaUikDZD!7O zy$81J>7BUrz~%@RxzSL`Gd|A8%d+oXmv(in=YQr0zPZoP&yD-m@y#_fbiMh$#n901 zD-Bm&YsPKPNUks+Gk$v6_HgamkZ(O3Ah}#T!*|UW^JB($pVMu2ZT+Qdx2|2=wbfa> z)(pR5-<9SvWSXCh6lYVj`qU!b1IecDF|h1R?jDA744d0sUE3qpch{!O$LQ{D*~Oja zcQ#4()$s4GE_|RCPcrv0Gk)g_UteIGiNxNpXNo(VTYsZj|1~>b-1_S-HRJcqSl<)| z)AvtN!B4`!-6P3{XmZUhdsuI6CcnAy!uGp;G|ziypP%xtYc6a(4A$82 zI@gR3X6$d8ezK{vKw+KpI8G+c`eF0@CVYC*pL;W${PTkm*$w;K-G%FCxN*g~MbF^L zOnak}u5JtdW`;7wIsN7;EhBRDtrL3uwaLiHYqP??WvZ}mnJoF2J;|Z;3;8Ga!thk!Z9Q_PHMZDKkG`n=!o>*jKL(xz={T1qW#T5$1spZ9s2 zQ%*k2M?Y}fEhs_}M@Df)MJ(SfA|j%)BePNjE$XhU>*X@q%Yl(=7QA*H*70&#*LC;z zoTN>fgaI7E0Lgj3o{#&v@BjV(-_Lv4THaIye3EWEg&uh3k`oSVs<$o4g5G8x|q zTtBk?z;%Ma_S{@^7}&zG0>|>a$nmTw+uUUZp5=Y349J;n}Bi|1ro)eDKkc7VD+cX-wo)h_@pX~@kp0R|_2<*VKMJ`#% z3Bh1QVCQ-=QMRMYwS71AEhpr`$hDk8rqEhg*~v_r3X2MhTKr5$)2wFfQq$}qGTWxE z1^jN#*5#V)?fY)xbNg58S_+NJyv(AeWu=7^m87ts*j#+Hu6SQv`;mq1^{=$;Wt|u} zQ&^DUR}+6S8IGUK7BKUa!3Rdp;%r(S9<9isMSRqiYGrJoj!y^W&``F1&E5$)w3tIR zJf!r9sto7yTlZD?Jak26NY-;jPK@emMd!Mk2j%E^Whnc0F(N0bIWCP4RQ8U|W0*SA zc&UEs$sz8X#|V$qPaB~jt&EP156MAYGsTY=6o0g!{Rd~Y|2$`GF}t~W=lyK9@Z;0- z9vwHsyL@=4vd0);Y`@k+`k*nup5Yi9WDBJsxlfLc4rdE%28T!W(6E{6=)|5q6+N13 zY}r$psA958_WaVoa8)aPdG0O!!*OM3YkY{7h*e72bBnCI;LR`_XCOQCKmCe9hvdn#k&qjJ15 zJakzi$4|S(vkTfEIlKKgOIuz=ck`O(P__{F5j6&u*3>s=Gr4SLd})2=H@Q3z^}_|` z_%D{0>b{$4$acMbA!9bfHZjHzb8jiMPGdo-{?r|%`tMHOk}-3idzOoNcM=a4=C1W0 zcn4_kgXZa(zxj-rtKo^E@zH~qso_LlCaWU@vKAf>E+~F_LHk2*Xn*Kj>@c^Jm*{$4 zJ8UUz>;un?8Q$8rgy9Vgj}OX9^#V0K*udeQbC!0P5R${SQ_Q?nXqlclUb$q(6Q#4J z?&FEZ&dGT$$Zc)NW!i5kO?!ExHeFnn4D)BGHl?Ss#UEwc?|ozYiK6lJ`P-a#0wkCR zW+xb1*h2Uuj=--am~)ytEkP~_>{@o=P&gvAJyFXnh2{?5b_3h3C6(H}4tc<>C6sfT zJA@|!C!{^cDI}dVZF5x1B!!NZf#s08OcGg^bda$~PbU3D=XB5|>C6saGJBX&S`6e= zSEZ)fxM#GF73aZ=2mWN*l8YwB^yqW~d2V6*%`NTUSysy+ub};TE&AM9g?rBGE7&!s z_|?K%%63!Fm9gvC%-zzVAoV6%3zrF|ekkK8O*s{KaS)S`(^#kl|rCLbWq<@l#>V%q?YMW2YwV8%S$U3UheuXH%VkDQmuXMx``#p|8R&jwmi$S5-n|= z>w;IX$~sQqCBjus?t1t@Bx0C~<++whWUi|#<<`*mQ!TwTZ~`a0wJQki)Rro>RAl2o zwE;xRb3NgPagplTl-1w zK&WG7MV@WzH1yp#YAZd34|-7&heA1_6WO8RQp(bf_ADI)BDSG8<{UI9wPYX}2UhnQ znnTY|okS){f^kBhl`U*4Z0Tf)r@74R$-!l28JCvZZZLVZ;kS2h$e7@^7v>sTuPt8} zbsT&A)9(}C>HYlQlsKJffAOuST{K*h=zSHXFB^~$57@`}S-PqR_0TwhZx?@pCbJ`Css5vbLlp2q9A}L)*COAwKcwXRZ3>`-* zNLa6~Q0O3*aqOgy8@PcscZpa>p=YZkzzu!8-A!=0#PNLY_I$emeyBs2bqs+CTRWkl zjMB4wOIlK-QpVEuXeoevdTt}$Ow^{onO_WdyH%`*P z#(QjSS@^w`JHc=?UZZ^9PH}$xsw;-U#;N7g3X2-}Eb9dYyAvZ!FYBy!5|(vVXROy7 z`%Qw&b!W|@p0{v{ss43irv19r(+k<4>hLp(_HjiFHAgA|ZT(Dz@duv7LBUY`ZZiMC)`E`T~4l zSptn>>GEAKMTl4cj*LQO)?cSl>I*+|y~t0r%gTnpLtsgtC`}L{R_;g``GP34W1Q1A z=C!=&qeRfh)zHw&-4jL2T(b zn~)!ZI02mj5y?WB^|w_@LX0EZVLg%rzY$67kZe-?2B#!rbYKygDnX`6Qu2{*yCY07 z2SX17>DfVuJ`Di`odOw-h{$ts+d~oSmj3$x4*%2N6%8cpUbtxKn%2g3vSs6Koqa#++PZS{ zcKz=Dy9O`)Z|{Fk$6qFuTL$jAuM^yV?$4*;;;hQZykN1rcx_YhsT5}5L1|c z#%$*s)VyHv)Ij0LC4Gfkay7KL*R7#Ng4=q;p-RZz!ZGnocw-LVQ36gvL~|;2123>_ zFD7q$AOxr21H!flGFVLHxPWk%(&YOzN+=eehKUQ9TY*p1 zrLhMPI1mO$_!fcYS~4_M34zbyrjF%%wkPqrF4$BQYrGRDN&+yfqbo6N$|0s*n>+$s zG`o(KMu3d+grgIIv!-66fUpp-aIL`Qj1y|yIZo3wR=^C^6oqf43LlIi6Smt{GDQr) z#8Nt7Ms~=s;U`IA`#c1%g?wcX0I0E}90#DVwCm`G1Fo`ta3k@FDdI4fbR`nXrwzdk zd>2nQSY;{P*j2!-En-Rj3|%J$t`lMqmKE6auRyoR1+RgoAPjQ{T!I%+wW2f*SQ4r< z_pmeaYH1c%z>D={hkkUMH z04YzLWYrzyXt3CdTwpM;yoM8Xsa}3zizOp10?P*v1DSwdmdE{WoHAIKP_&~o@qG~k zGQI0q;3VK5lWiqI2ssS#9AzR4zG_{qFHh~9_4DB?F z!L&$mQYX@^yT<~;{j}%A@PZKS67}9l)CGx6hKd~9qlBniyWn=10F;k{fcYT+P$DpD ztyAod>}hx*w0t^4d&s~O5VYKZl`IW@<2s3V<&I29N|p&wp%pTQRO}qS-;K2Nxx@Ge zdF~_Ex~{FrtW-ihh(g%*y%Gr~4&5MkZhaD^J^h_AfFWGlG4g2nQ1%D_m7+OfY4i+K z0vEL`n^EFLp+idWQ5~rU8_g1fFQM!999qfT8DnV2O5R*?UFn+yp5R+9Q?HD;I*|zF zAi#rAOotx}6Wd03022fivd<)@7+#uyhAgH?LOM2T8Jw_T8VR0E$8i9HC8RSn82P*H zX`DZ{WtvzBIyZHWb%kg<#i}BvtpJG7_quSR*o(=BA*x+fF70q_&k2?75QfWdH)-1g zTp(9;=zE@r&s##pUIdShBlw)k9cZ$dN1DCAQQ^N|c!5}Fz7d8ODV&E#a6+;~c;yw1 zl9KV^kj$H)=QK1ti0wG&sY0Dti&Q8wD=xU|djMgg;78g)J~~P7A)ME>bsRWCkmz;q zi{Ot6Ba3!WfVLOr9_%1tIG}^2JiGkC+=H2Ld1s^WmA-YbPUx#wQjkVz#I7ePCn!Jc z5hUd#j;++j0bp`|?np_395VEJmepCo;r}k+eb83utU_0O;RH@7VMf9uwOOeX^gM~8 zCCrsY1O}49w0+NwWa7!xa5|4k_VyE6u;nNwygbK>pBQie4FU*D8*m%p$Q zkFrwWN!rJ{fTM(9BXC*wFrln(+g1?aABiu2o^gRZ?W%x;lR}Oi+tO_}xIURNK^~(l zNHfO{;5f*6!=O?Nd1f*CIK|a01f$k5yu6|7`2Q?1As^_;w=`VYL@EXsND3M}#96Bu@{Vi^MJk`S_B zi0@K;h&PPdY%<5ft4n99L9dA z{>PKRrj;^Q(k-MyRYp)9gt+fmHt_~Of?9iA0Zd@Sf*l#R=O}R+C5r3=Npg~?_}hAb z2Y~aOfa(TBCTck8I`l_YjiXq4Zq)X4MnCi9N=gsp5>%1Dd0kzc-g>g6eA0J~uVY?% zX6;CeM1R{Fqy%n4ppoxP`iD69v-Ff#lWmK{((L7y#uNFG|rWJox0 zY^&1PL~3xtVy5 zZh*D9llm9RRe`b!4}|;&KI~4QdVFCsX>z1(3!hE}8bpyQU2uTfU=)%0{b*Gk5|tGd zkW9?vxaSWL8i1Tln((#nHyi=Z5tvW~%TaKqG92XYi z+WA9E>Qk+VnN+aLryYnb^uU&FYuK$zB7sn22N555;^~mo&+J@S83Hp+@E6ukBFNGC zh{XWjf$50<0QH>$V-}^oE|R*46IUXNDLT4g8G(m`1fUP0!H%Ttd3I%jvPNEczP|D& znUocN9Z<(gY!j6(nf&wQhv0$N)gWes*{2?{{a(q8Ygl)kVJQVS?? zV%>YR(G*Rt{4dRJ3L-_%AMFAxRjuHH5{n_=%kMxs;7giqTf9F>i`Oq+2{VdtbvRMJ z+_36bMs&a-xe+aZ6E?xiNHMNY1%c`K#ioW1Hf}8!Oiny6e|E#weJv>!u_)4U{))x* zz>jWh_I-$soqum*6WLFNJulXY4lH5q1EGw3M}D9wL12YbF+Q701tP}kd9o!1Z>c!N&)i?H zW2fig><2pFA2>4A6lK?#3w0z@&ymz3A`Ntsx>5gvq5$3yfctcO9?r?Gh=c$QYauZP}@qLf07af*om`{Lk;{I*5ifRreLB3 zF!-yc<6rBL4n<|(CB>%tw~c5HC*OW{NyGugcCY6}=nqoFjj5HUSA-KBBBPb0(uMyH5 zJ+r1RwadRa@5yuMo`>Doa&UL2a&KKCAHOE!=kGbEPP)AhleQ(LD31!cKr@@zf-X`B zr$A}647Fq?b z*^NlSE8B5V90JyAs4MFWrnqg}U6V9`0Gma#{J7YcCyXr*Y2%=qAuVApX@q+R*`JrF zP#FQVw5x?lQelp~{B8A5bW$RMv|1W@P0iOm*6dQ&Ng%E%TM+DB9UiZX!u*T*7Ay1= zwN1PDNWCj`1QqgqMd}EgW6e#Fvow6>c)cC_x2~4Y0r|coL)yXk!X5l!quGPV|0GXr zhbpBLN^>#ECN>)|DRL9c^I=L}l0xfKS$wF8`4ejiHHDD85IMjN_@#(gwyGdsUhj+W z7T9qF^;UlVe=PxRG@o3=vPykx`SI+Ly2Q_aWpN7&p@<4rs|lEPbbh#@i8xN3%5!xg z`Eqd!A^^Zmb;I?5n~5#7xS$S8-TS6`U+U#|u|S8;t|yudD?xN3XhT?|#64lS?0X<8 zaYM>6)NgVp>c}6rpVLY@CUr)sxVotYRszDscJ}eQSgiULwNvW20Yx+iF&OGQPr|BL zj6|XJd8q`c`a%nOHVyF4{Qh(6WQw=Jp4fxYl#(rcVAUF;FBKll5m6pGFt2-!n4_qV zp(uRlqF?d)`7iK8>z@MkO2HI&2L<~5~jmujdDW?F}95IV1FWp z?og8^s}M_+q$omZ1c57y9-6(!<2~(iZ1+Hi!#}>zuMh6~{-`re2%4%3W zEp;3nY?g0o(ujnh?dcF@OH~?hq{7YQ(EU6bQt!sEVvtm|Jrt%uGiX7m`>`cWj`pe9 z1d*Z=iY!l~o|Z<6Bx(Nk#m(RsTQ;y*Hc?376rtI947}pKOEcn`y{#1Ojr@%L-Wxk$ zV#p={N4vM~&m`q1GvV=kD`6WY5#pklp_+lpu$|P3Y=DT3u{byg5Rsw;=%Is;G_|615&Kk~waz%_KC-kqKxQ~`{zSeBDM%IPP{~&g zd!dDufqY8m_+Qrrf|?mcDqxcGCY207Vjt6`q9|lc zc>xLFTg5w;bU1)Nr7uY)%Pyw6MQOzBl~DCCN@G=tJuv}g^l%1>hExl(C!4t&(8*u( z#-=y`L*QZ&m6ViFf{3vx>h-M4SoyatZG^46<%Udf?1jbzYV*vpCRAPEomXwFrwWw+ z**PUx93<8{*42pgm-!zqE#ZUI^b>t=E8~O&@b8tM%7n*G*2k9n%#C$HnvdU*5ycw{ zyvk+vVX!h8&9RG}OIpdJR4^#Ko%{dt8Ie0tf+N5@J^4selu}~No~)G-krhhLH-N5(ARLSH5K` zeK;mqZt9YWog90xkvej6{DNk-7;P^)ZyZChZws5EV97?fFy;IIw5V`mQKuwC2d6D;L(E+tL5Xkt;uNR?~`&H?6t#s;ifj?185qx%C6roZZ@eZK-_Q2M@oY zW82aG|sb^P{gKKkZ0SJf};zT?`zddtR-b?kcdj>8{+ z>(<*geyaP<>ptOF!J{&KDGAl zzx?dKI`GTq{`Q-9fAn*IerQ3%dFzheaO>J@&t0|qv7>)=`-U4<^nQBXv5(#P&YSGY zj~_es@w+cP(tY^6o_lUQ_O7Gbj_rQ@o=<#!=l$^$pYHkmzx%=^4~;(mT>8WVcYgPi zLVf4v-Y?(vA3qb0{ruz;U%vbMpIh^R#ecHt!K45A_KhF^vv+^z!DH{rv_E=Bc6zs@ ziMO4G#-ysf@igC7`7OEPBO}GDRQpw*&9*;Mp4>Qjd0&`!60lmTn9#3=1Jobj z9=uZ%A$KkG3ELgiz}Q(~V}e*s`G{6FI!73VY%~ zGnxQoHM`U#W8R2iGq(fOp`u282gMEA1i6!J<2Vp~l0M+Wsg#?jIY3x=yCI?enmX7_ zvoB4BIffokt4-M#Mc)FQAuvs`(N(#vE@}#%NC})8ZR(%DhT@Wj2clppqdE?o%iJi* z|7LHB(}{t#_wX7O&Nl-i$vCwQvu3NZh$xCiZnzuPK z?H_tzW`xO+k!nRvzEe0~gsm$++F0ae%lq%owm)!Jb~3_Veq{4TSVy6=udtvNU(L1f zf->_$PfSUUyn~lxhI0$WGhU=?nte}c&KV}v&}t^%oXs|8+Q0Rc>G_rFE90ZJ4cFQW zbtkgLC$sJU;lXVCufIGwl~>UEd{bHW+me?lEgDk;ytG$1FH>l2TTpvxxh>Pz!bobz z)9>%jJbU_O)>9|zcy#W&oz?5lZRw&V+A=r1+$)fiJ)ZFj;i>k`(5Bvqn>yt!D9wET zh$UInXZrH$O0{>9%*)f}sZ;MHO+PmMw#%z{=+vI~t9WD%@V}a8XS?((eYQ5|(+)E= z>0y}J%Q{PoM&wW;$4d8|{k|2Ip+q0x9scUX;LzBL zx3^yMz7>~qj!!G#{Co6@&ej#?t1p}wny6Me8XM=EHXMrD=iw1PT9co3?eqKJ&4tMZ zFRQh#r`kQUnY5y-$z^Arn;h3=;|E7fe=8=2D(1@twINIljvPFFVL0Ba)wnr7sK=>J zk4tWU-wHj%?UkWD=Glw4uRlLr@%|ZuIdc>%_-2!7;EZ9MI{7aehcVOpI~mT@+~{KJ z>qB$0 zYN}S^Q8{|hWTugW=Bu6i%?S4C(J{V6XwF)}1>yp+g2b_6s`nWe?F;M+oE7iSlp4$1 zwq3aM?UQ=9bmYye%}-G7E0@a`@t^s5XSwW_yZ4mWm&<#~YxmU7b$6d}eZM)`bNc#) z7j2w8=69`?+V$RYd4HMb_-h(j8+LP=V|sA8zdPt1tWNCSu*2=Qx9p2I?ii1UHdijJ zbjSMOrct@myS#Vr@SfhGO#|^@Qthqm-_twj4a7SyHup>Rpj7~y~*U<$9FR>+7P== zuj7HJZ>_QIDc@l}^zkxd8^cCoYOJQ;*UN4PckfI_@acotcZ63kQZtvSxzF)MVxzj- zu3nLB+=LHR_r=x7tjpv&&gY-_YqDX7kj9rM<7v)X*ZMPk@4VRFvVMGmH^S!g)mr?e z#1pYP!w2V#XK(E6m|%RKiE&~*-L><;eu=-ogip+wKWlHrL)#9r<~1Mfx7UxU4FdzU z@y|QQ>O`_`*Oadk$FqEy*b-az&KrO8&uMVS_^zGRv3`5ozIf;Qk+|{_F*g5wub1EO zC5s&g<<7}@%@b>9Tz|ddjPAK-`l3-7{@87Y_!j`)AJK z^&X$Wp?GM^z%FMJyJoEC+_4hVE_Z5tLaQ%L@;Q;=+ z{*OLx*}v`JOuNk!_am&yOrM!Au1@>v?;lTAV(@&Iv#E*=_o)gT%J6{=Y)B2RKd4PE zCWq>s>kq2I$eG0M-BmTTg&gPBIL7AfYGU(RcYp5?Tw-Rv9o%tH+2l6U=GxwaeS3Q+ zHXqy+!6G*pDtXGs`M9NaT;5zRuj3!{1K-?l=;wz0YdLZa4P9rB`wR{3zSwZp)#koU zHIggL#~MG~vOQeAI^?KlJtUWtr}(ZpFhA!0?rGcB&aKXm@^gh)|_kh~iJqDJY%H6|oj$w1TT;3khzq>l6J?`$_Ry(=N{H`@pJ2d>e zT*e2g@g#E}GxzU$<^2oRWHasE{@=Fi)%Cx2v$=o&tp1HKm^pt$1wV@a zx<}OdXtK|~z4W)bHhy#AmCbkE9@F3Yt-%@guGx2ny=zWVzT7lB6TdkwU$lDP>ebs$ zKYww}ij((G8r0lWv-gIo*uAsn9_0(o{q_}aoAQs%wfPwDs_7wqn@15({1{DDo|Oa3 z(md~-etycoYWLUH(>ycP$Ov3*ymq*09sRBDG2`X!nScD#OlB+p Date: Sun, 4 Jan 2026 17:26:42 +0000 Subject: [PATCH 04/16] Revised generator app scripts using copilot --- generator/app/ProgrammeParams.py | 47 ++++++++++++++++++++++- generator/app/gen_random_telecom_data.py | 49 +++++++++++++----------- generator/app/gen_trans_data.py | 35 +++++++++-------- generator/app/gen_user_data.py | 25 +++++++----- 4 files changed, 106 insertions(+), 50 deletions(-) diff --git a/generator/app/ProgrammeParams.py b/generator/app/ProgrammeParams.py index 963db5b..01de6d5 100644 --- a/generator/app/ProgrammeParams.py +++ b/generator/app/ProgrammeParams.py @@ -1,8 +1,51 @@ -import cons from datetime import datetime from beartype import beartype +import cons + class ProgrammeParams(): + """ + Class to manage and store programme parameters for the telecom payment generator. + This class validates and initializes all configuration parameters needed for the + payment generation process, including user counts, application volumes, and date ranges + for registration and transaction periods. + + Parameters + ---------- + n_users : int, optional + Number of users. Defaults to 100. + random_seed : int, optional + Seed for reproducible randomization. Defaults to None. + n_applications : int, optional + Number of applications. Defaults to 20000. + registration_start_date : str, optional + Registration period start date. Defaults to cons.default_registration_start_date. + registration_end_date : str, optional + Registration period end date. Defaults to cons.default_registration_end_date. + transaction_start_date : str, optional + Transaction period start date. Defaults to cons.default_transaction_start_date. + transaction_end_date : str, optional + Transaction period end date. Defaults to cons.default_transaction_end_date. + + Attributes + ---------- + random_seed : int, optional + Seed for random number generation for reproducibility. + n_users : int + Number of users to generate. Defaults to 100. + n_applications : int + Number of applications to generate. Defaults to 20000. + registration_start_date : str + Start date for user registration (format: YYYY-MM-DD). + registration_end_date : str + End date for user registration (format: YYYY-MM-DD). + transaction_start_date : str + Start date for transactions (format: YYYY-MM-DD). + transaction_end_date : str + End date for transactions (format: YYYY-MM-DD). + transaction_timescale : float + The transaction period duration in years. + """ @beartype def __init__( @@ -13,7 +56,7 @@ def __init__( registration_start_date:str=cons.default_registration_start_date, registration_end_date:str=cons.default_registration_end_date, transaction_start_date:str=cons.default_transaction_start_date, - transaction_end_date:str=cons.default_transaction_end_date + transaction_end_date:str=cons.default_transaction_end_date, ): # take programme parameters from class parameters self.random_seed = random_seed diff --git a/generator/app/gen_random_telecom_data.py b/generator/app/gen_random_telecom_data.py index 9a0be63..cc56a8a 100644 --- a/generator/app/gen_random_telecom_data.py +++ b/generator/app/gen_random_telecom_data.py @@ -1,4 +1,6 @@ import numpy as np +import pandas as pd +from typing import Dict import random from beartype import beartype @@ -16,22 +18,25 @@ @beartype def gen_random_telecom_data( - n_users=1, - random_seed=None, - registration_start_date=cons.default_registration_start_date, - registration_end_date=cons.default_registration_end_date, - transaction_start_date=cons.default_transaction_start_date, - transaction_end_date=cons.default_transaction_end_date - ): + n_users:int=1, + random_seed:int=None, + n_applications:int=20000, + registration_start_date:str=cons.default_registration_start_date, + registration_end_date:str=cons.default_registration_end_date, + transaction_start_date:str=cons.default_transaction_start_date, + transaction_end_date:str=cons.default_transaction_end_date, + ) -> Dict[str, pd.DataFrame]: """ Generates random telecommunications data. - + Parameters ---------- - n_users : float + n_users : int The number of users to generate random telecom payments data for, default is 1. random_seed : int A set random seed for reproducible results, default is None. + n_applications : int + The number of applications to generate, default is 20000. registration_start_date : str The user registration start date, default is cons.default_registration_start_date. registration_end_date : str @@ -40,28 +45,28 @@ def gen_random_telecom_data( The user transaction start date, default is cons.default_transaction_start_date. transaction_end_date : str The user transaction end date, default is cons.default_transaction_end_date. - + Returns ------- - pandas.DataFrame + Dict[str, pandas.DataFrame] A random telecommunication payments dataset. """ - + # initalise programme parameters programmeparams = ProgrammeParams( - n_users=n_users, + n_users=n_users, random_seed=random_seed, - n_applications=20000, - registration_start_date=registration_start_date, + n_applications=n_applications, + registration_start_date=registration_start_date, registration_end_date=registration_end_date, transaction_start_date=transaction_start_date, transaction_end_date=transaction_end_date ) - + # set random seed random.seed(programmeparams.random_seed) np.random.seed(seed=programmeparams.random_seed) - + # generate random users user_obj = User( n_user_ids=programmeparams.n_users, @@ -72,20 +77,20 @@ def gen_random_telecom_data( fpath_countrieseurope=cons.fpath_countrieseurope, fpath_domain_email=cons.fpath_domain_email ) - + # generate random entity counts for each user random_entity_counts = gen_random_entity_counts( user_obj=user_obj, transaction_timescale=programmeparams.transaction_timescale ) - + # generate random entity values device_obj = Device(n_device_hashes=random_entity_counts['n_devices'].sum()) card_obj = Card(n_card_hashes=random_entity_counts['n_cards'].sum()) ip_obj = Ip(n_ip_hashes=random_entity_counts['n_ips'].sum()) transaction_obj = Transaction(n_transaction_hashes=random_entity_counts['n_transactions'].sum(), start_date=programmeparams.transaction_start_date, end_date=programmeparams.transaction_end_date) application_obj = Application(n_application_hashes=programmeparams.n_applications) - + # generate user level data user_data = gen_user_data( random_entity_counts=random_entity_counts, @@ -96,7 +101,7 @@ def gen_random_telecom_data( transaction_obj=transaction_obj, application_obj=application_obj, ) - + # generate transaction level data trans_data = gen_trans_data( user_data=user_data, @@ -108,5 +113,5 @@ def gen_random_telecom_data( application_obj=application_obj, fpath_countrycrimeindex=cons.fpath_countrycrimeindex ) - + return {"user_data":user_data, "trans_data":trans_data} diff --git a/generator/app/gen_trans_data.py b/generator/app/gen_trans_data.py index 69cfc8a..651f251 100644 --- a/generator/app/gen_trans_data.py +++ b/generator/app/gen_trans_data.py @@ -1,8 +1,9 @@ import random import pandas as pd import numpy as np -import cons from datetime import datetime +from beartype import beartype + from objects.User import User from objects.Device import Device from objects.Card import Card @@ -14,7 +15,7 @@ from utilities.gen_trans_rejection_rates import gen_trans_rejection_rates from utilities.gen_trans_status import gen_trans_status from utilities.join_idhashes_dict import join_idhashes_dict -from beartype import beartype +import cons @beartype def gen_trans_data( @@ -25,11 +26,11 @@ def gen_trans_data( ip_obj:Ip, transaction_obj:Transaction, application_obj:Application, - fpath_countrycrimeindex:str=cons.fpath_countrycrimeindex + fpath_countrycrimeindex:str=cons.fpath_countrycrimeindex, ): """ Generates random transaction level telecom payments data. - + Parameters ---------- user_data : pandas.DataFrame @@ -48,22 +49,23 @@ def gen_trans_data( The random application data model object. fpath_countrycrimeindex : str The full file path to the country crime index reference data, default is cons.fpath_countrycrimeindex. - + Returns ------- pandas.DataFrame The random transaction level telecom payments data. """ - + # explode user data to transaction level trans_data = user_data.explode('transaction_hash').dropna(subset = ['transaction_hash']).reset_index(drop = True) # select uid entity hashes for each transaction - trans_data['device_hash'] = trans_data['device_hash'].apply(lambda x: np.random.choice(x, size = 1)[0] if x != [] else np.nan) - trans_data['card_hash'] = trans_data['card_hash'].apply(lambda x: np.random.choice(x, size = 1)[0] if x != [] else np.nan) - trans_data['ip_hash'] = trans_data['ip_hash'].apply(lambda x: np.random.choice(x, size = 1)[0] if x != [] else np.nan) - trans_data['application_hash'] = trans_data['application_hash'].apply(lambda x: np.random.choice(x, size = 1)[0]) + trans_data['device_hash'] = trans_data['device_hash'].apply(lambda x: np.random.choice(x, size = 1)[0] if isinstance(x, list) and x != [] else np.nan) + trans_data['card_hash'] = trans_data['card_hash'].apply(lambda x: np.random.choice(x, size = 1)[0] if isinstance(x, list) and x != [] else np.nan) + trans_data['ip_hash'] = trans_data['ip_hash'].apply(lambda x: np.random.choice(x, size = 1)[0] if isinstance(x, list) and x != [] else np.nan) + trans_data['application_hash'] = trans_data['application_hash'].apply(lambda x: np.random.choice(x, size = 1)[0] if isinstance(x, list) and x != [] else np.nan) # add null values card hashes - trans_data['card_hash'] = trans_data['card_hash'].apply(lambda x: np.nan if random.uniform(0, 1) <= cons.data_model_null_rates['card'] else x) + trans_null_mask = np.random.uniform(size=len(trans_data.shape[0])) <= cons.data_model_null_rates['card'] + trans_data.loc[trans_null_mask, 'card_hash'] = np.nan # add shared hashed entities between users trans_data['ip_hash'] = trans_data['ip_hash'].apply(lambda x: ip_obj.ip_shared_idhash_map_dict[x] if x in ip_obj.ip_shared_idhash_map_dict.keys() else x) trans_data['card_hash'] = trans_data['card_hash'].apply(lambda x: card_obj.card_shared_idhash_map_dict[x] if x in card_obj.card_shared_idhash_map_dict.keys() else x) @@ -79,7 +81,7 @@ def gen_trans_data( trans_data = join_idhashes_dict(data=trans_data, idhashes_dict=transaction_obj.transaction_hashes_dates_dict, idhash_key_name='transaction_hash', idhash_val_name='transaction_date') # add application data trans_data = join_idhashes_dict(data=trans_data, idhashes_dict=application_obj.application_hashes_payment_channel_dict, idhash_key_name='application_hash', idhash_val_name='card_payment_channel') - + # TODO: wrap this logic up into a separate function # align payment channel with missing card hashes and 0 transaction amounts zero_transaction_amount_filter = (trans_data['transaction_amount'] == 0.0) @@ -90,7 +92,8 @@ def gen_trans_data( trans_data['transaction_payment_method'] = 'card' zero_transaction_amount_filter = (trans_data['transaction_amount'] == 0.0) missing_card_hash_filter = (trans_data['card_hash'].isnull()) - trans_data.loc[missing_card_hash_filter, 'transaction_payment_method'] = missing_card_hash_filter.apply(lambda x: np.random.choice(a = list(cons.data_model_non_card_trans_methods.keys()), size = 1, p = list(cons.data_model_non_card_trans_methods.values()))[0]) + # trans_data.loc[missing_card_hash_filter, 'transaction_payment_method'] = missing_card_hash_filter.apply(lambda x: np.random.choice(a = list(cons.data_model_non_card_trans_methods.keys()), size = 1, p = list(cons.data_model_non_card_trans_methods.values()))[0]) + trans_data.loc[missing_card_hash_filter, 'transaction_payment_method'] = pd.Series(np.random.choice(a = list(cons.data_model_non_card_trans_methods.keys()), size = missing_card_hash_filter.sum(), p = list(cons.data_model_non_card_trans_methods.values()))[0]) trans_data.loc[zero_transaction_amount_filter, 'transaction_payment_method'] = np.nan # align country codes for user, ip and card country_code_columns = ['registration_country_code_alpha', 'ip_country_code_alpha', 'card_country_code_alpha'] @@ -109,11 +112,11 @@ def gen_trans_data( trans_data = join_idhashes_dict(data=trans_data, idhashes_dict=country_codes_map, idhash_key_name='registration_country_code_alpha', idhash_val_name='registration_country_code') trans_data = join_idhashes_dict(data=trans_data, idhashes_dict=country_codes_map, idhash_key_name='card_country_code_alpha', idhash_val_name='card_country_code') trans_data = join_idhashes_dict(data=trans_data, idhashes_dict=country_codes_map, idhash_key_name='ip_country_code_alpha', idhash_val_name='ip_country_code') - + # generate transaction status and error code rejection_rates_dict = gen_trans_rejection_rates(trans_data=trans_data, fpath_countrieseurope=user_obj.fpath_countrieseurope, fpath_countrycrimeindex=fpath_countrycrimeindex, fpath_domain_email=user_obj.fpath_domain_email) trans_data[['transaction_status', 'transaction_error_code']] = trans_data.apply(lambda series: gen_trans_status(series = series, rejection_rates_dict = rejection_rates_dict), result_type = 'expand', axis = 1) - + # order columns and sort rows by transaction date user_cols = ['userid', 'firstname', 'lastname', 'registration_date', 'registration_country_code', 'uid', 'email_domain'] device_cols = ['device_hash', 'device_type'] @@ -124,5 +127,5 @@ def gen_trans_data( itr_cols = ['itr_hash'] col_order = user_cols + device_cols + card_cols + ip_cols + app_cols + trans_cols + itr_cols trans_data = trans_data[col_order].sort_values(by = 'transaction_date').reset_index(drop = True) - + return trans_data \ No newline at end of file diff --git a/generator/app/gen_user_data.py b/generator/app/gen_user_data.py index 7d52dd5..f6ed7cf 100644 --- a/generator/app/gen_user_data.py +++ b/generator/app/gen_user_data.py @@ -1,5 +1,7 @@ import pandas as pd import numpy as np +from beartype import beartype + from objects.User import User from objects.Device import Device from objects.Card import Card @@ -9,7 +11,6 @@ from utilities.gen_obj_idhash_series import gen_obj_idhash_series from utilities.join_idhashes_dict import join_idhashes_dict from utilities.gen_random_hash import gen_random_hash -from beartype import beartype @beartype def gen_user_data( @@ -28,17 +29,17 @@ def gen_user_data( ---------- random_entity_counts : pd.DataFrame The randomly generated entities count data - user_obj : class + user_obj : User The random user data model object - device_obj : class + device_obj : Device The random device data model object - card_obj : class + card_obj : Card The random card data model object - ip_obj : class + ip_obj : Ip The random ip data model object - transaction_obj : class + transaction_obj : Transaction The random transaction data model object - application_obj : class + application_obj : Application The random application data model object Returns @@ -62,10 +63,14 @@ def gen_user_data( user_data['card_hash'] = gen_obj_idhash_series(idhashes_props_dict=card_obj.card_hashes_props_dict, n_counts_series=user_data['n_cards']) user_data['ip_hash'] = gen_obj_idhash_series(idhashes_props_dict=ip_obj.ip_hashes_props_dict, n_counts_series=user_data['n_ips']) user_data['transaction_hash'] = gen_obj_idhash_series(idhashes_props_dict=transaction_obj.transaction_hashes_props_dict, n_counts_series=user_data['n_transactions']) - user_data['application_hash'] = user_data['n_applications'].apply(lambda x: list(np.random.choice(a = list(application_obj.application_hashes_props_dict.keys()), p = list(application_obj.application_hashes_props_dict.values()), replace = True, size = x))) + # generate application hashes per user + #user_data['application_hash'] = user_data['n_applications'].apply(lambda x: list(np.random.choice(a = list(application_obj.application_hashes_props_dict.keys()), p = list(application_obj.application_hashes_props_dict.values()), replace = True, size = x))) + total_application_hashes = user_data['n_applications'].sum() + split_indices = user_data['n_applications'].cumsum()[:-1].values + application_hashes = np.random.choice(a = list(application_obj.application_hashes_props_dict.keys()), p=list(application_obj.application_hashes_props_dict.values()), replace=True, size=total_application_hashes) + user_data['application_hash'] = pd.Series(np.split(application_hashes, split_indices)) # drop excess columns - drop_columns = ['n_devices', 'n_cards', 'n_ips', 'n_applications', 'n_transactions'] - user_data = user_data.drop(columns = drop_columns) + user_data = user_data.drop(columns = ['n_devices', 'n_cards', 'n_ips', 'n_applications', 'n_transactions']) # create a hash value for the dataset (to distinguish between different iterations) user_data['itr_hash'] = gen_random_hash(size=1)[0] return user_data \ No newline at end of file From 325da635c6c54658a9cbb41df6f52f7e08f41f79 Mon Sep 17 00:00:00 2001 From: Oisin Date: Sun, 4 Jan 2026 17:37:38 +0000 Subject: [PATCH 05/16] Fixed revision bugs --- generator/app/gen_trans_data.py | 2 +- generator/app/gen_user_data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/generator/app/gen_trans_data.py b/generator/app/gen_trans_data.py index 651f251..80d770a 100644 --- a/generator/app/gen_trans_data.py +++ b/generator/app/gen_trans_data.py @@ -64,7 +64,7 @@ def gen_trans_data( trans_data['ip_hash'] = trans_data['ip_hash'].apply(lambda x: np.random.choice(x, size = 1)[0] if isinstance(x, list) and x != [] else np.nan) trans_data['application_hash'] = trans_data['application_hash'].apply(lambda x: np.random.choice(x, size = 1)[0] if isinstance(x, list) and x != [] else np.nan) # add null values card hashes - trans_null_mask = np.random.uniform(size=len(trans_data.shape[0])) <= cons.data_model_null_rates['card'] + trans_null_mask = np.random.uniform(size=trans_data.shape[0]) <= cons.data_model_null_rates['card'] trans_data.loc[trans_null_mask, 'card_hash'] = np.nan # add shared hashed entities between users trans_data['ip_hash'] = trans_data['ip_hash'].apply(lambda x: ip_obj.ip_shared_idhash_map_dict[x] if x in ip_obj.ip_shared_idhash_map_dict.keys() else x) diff --git a/generator/app/gen_user_data.py b/generator/app/gen_user_data.py index f6ed7cf..ed69139 100644 --- a/generator/app/gen_user_data.py +++ b/generator/app/gen_user_data.py @@ -68,7 +68,7 @@ def gen_user_data( total_application_hashes = user_data['n_applications'].sum() split_indices = user_data['n_applications'].cumsum()[:-1].values application_hashes = np.random.choice(a = list(application_obj.application_hashes_props_dict.keys()), p=list(application_obj.application_hashes_props_dict.values()), replace=True, size=total_application_hashes) - user_data['application_hash'] = pd.Series(np.split(application_hashes, split_indices)) + user_data['application_hash'] = pd.Series(np.split(application_hashes, split_indices)).apply(lambda x: x.tolist()) # drop excess columns user_data = user_data.drop(columns = ['n_devices', 'n_cards', 'n_ips', 'n_applications', 'n_transactions']) # create a hash value for the dataset (to distinguish between different iterations) From c19c561ea0450d56e95a26796b1042777c59ca6e Mon Sep 17 00:00:00 2001 From: Oisin Date: Sun, 4 Jan 2026 17:38:07 +0000 Subject: [PATCH 06/16] Updated transaction test data for unittests given revisions to app logic --- data/unittest/transaction_data.parquet | Bin 30779 -> 30737 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/unittest/transaction_data.parquet b/data/unittest/transaction_data.parquet index 7418c8cb16e0d4ef1ae1185a4d8641e5ff953eed..88658a117e2ab4509394a90919b944e474826d94 100644 GIT binary patch delta 1925 zcmb`IZBP?O7{_;)y>JPbxrAIq#5QO+pp6a!B7)jna)F=`Lco?6twxfLvP|;t8DmJs*J$tR+ zt!mKd)|ISJLn3o+nyyc-!PncJRlDNa3y)1sGA%vb@B!9W&Y%XVC20>cqdFVtQ;ufe z&l@H+r|B*(X-SJXI{ppWo=mMrlcmJ63gR=bGl#sct2#OZZ7S(!g<|lDRDx>&(nIj6 zipAsQWln3PWSP}jZFiJf9X8?J@{u>5wSF-9g^j;|sQLBwt-y`TS0CRi8UH8?>kdqSQ@Lzdh~#2Eyj!*Jx#vl4~md%NoN z+8y;+n-gLCN)yYQiq5~+OZv}rAKcQFW=f9FTF-a=BKE7ZJJiN{Cb!2j_WKuI6s<7P=*d>d zXe>|$y+Z@q3+d*6TP!F=;(yG=jp(2CC}BOpU)5**muJYgDjnsvD(i|!!fIP#uMoig z7xRn-R5tl8&iBx_7%pk$zCTbHwgV4eNE389fdJdgq z5}I7lIk7%r5^&gGkFb|k+ienI;H)VvwL6`!)s*wo44jMNSW4)A^Em}a5R{pa0j&YB ztc=wfi1{?oQU;cj6%mAj%EKH*8O#M3n`IOP1~ZLiQ07*cDOQ-I#Wcp8f`yPZD-?MO zOmRV$N01(dCR*}@X7z#y0l+XLacX*gjk79n(C$Yin3B`by;OarnRiR#$i{ACqyTv>nkfN?4>X&vY>34y|SiyOcbxr zjE4vK^2}MTlKn@d8^1nud^D@FYk9BRHTPx7nJ+Hvn;Y@N`?qq}?ApDlr^|I|U}4#v z)GhwQbjUPsq8y2#niS_H$u=umV*n=8P_%)fw4!ls+2Pp7s^nKvJOh#86FF-{k{Cf< zG^pL&Ctj>{go||EOi+Lbup_)A1rR0h+P`uBYk%XiG1BE654 z0mSTQnVZ^RdT?I0^kb~tjhTxab3-3YP0h)`TML*rzA9&-^v4k8a0qjznE9?KIu!_7 z9FKA*5O-c@4lLx4M#b}{+zjH=i;(%4Z_KrY1|CAja|QC`Yz;rjJS+4>Jz_=!kl)E_ zc;5Vy@)1-DnYJdxTx&$a$Ks#-Yy3^Kmb!qIgP8fP1u?gqgA$*fmn-VtfS40J-;!4% z8QP4P!8Sn&);Ni0EXkr1?T87y!!NST3~S$ql!F9AD$i`|>9RD%lG_snf3lXTe*vje BZe;)f delta 1959 zcmb_dZBQH48a`*U8#dv>Y?kacd{_f+*3e>w6e`eGH?Ro_g*1UsX_-1k5-kWcP2hHP zv`N#Dp;~BQE2SN()It2}C>GSI9X~3S*5cIaw5YVxPi+0@wWyup-iwW%)ES-5+?oEl zKi+fBp7WeJ&v~EsIomx056-~h?~>_$oOvG4VWtR%V&sfGW62uHQK<_6(1o=M000Um z#y;G6?iveaztB(?n-3Gt{`UqN{iMKWzE6nFUZW8raPZtLnrq>qY*RiYTJBUs#ikzv z4-gJ*M<$j%7TyxJH0EWdJ^oX6$&|J<_*TRl{>Oz?8zxkFXY1Z*zi!biyyxZd#4a`c zO!|+i5F1$i`GE_4{>puC_xty%N(Sm)e|1#Up4fUMoS9f;U{^0B@BX;xaM8~gH*rAd z@J7P3;b3UgHR<1)Q}RsRvxQhh*%j5*lE;g#oPV;i@U2jtyYEDAN_p<}UfJHH%!q$I zm#1vV+VO2h=0tW`#)j}KpUmID2k)EAdZb--)F-A;7gl+nEu8K5F2WP~@A4k_wttIM zKxNa@keP;HZB-2j6#PR3rrBr|{p(q$quV0B_hFG~#rnBOoahTXzak5#SI zV4?{jK7h9;yX$sjWsZ)aaH*gu;6QmwPReELr|Y=tRilSXqVC8koie4-R&u19w?vz* zM*67PrHf*jLxv+=gP?ndwztP(p%qGbSx3sTu24aF4%fpKMyHRaVk;O;;-NunYQ9`~ zk8TxXvUZB3VX3dT*1?r^sjcOB_Hd43B$2Miv;Mw3SfGwy+*A$acv%T*%vGonO;U`w zKVaFhjlp#2M4MhA$O~y9bl|?*MShIM{TPV*@mgT<1JD3uZ~-7>!9p~ck75xZAgKijFes|gCyRnZQ4C}<4eHg> z+offJ|4~rn#`V4?x7YQAhH$x`@U$RC|DVd52Pi8!fCD5dFOUGFX)OT=$PhXJ7lOC~ za43C6MlAsHObP=SrANq)EC2+O8X!X*0E?1A4n%=f14KGV0)s#SVl&!~1qH2^0w5}f zGRg{op5n+_ij*M?fD{LjGUbe|0=>a;WXbni{8|7L2uoQ8d~q##G}}F$``>bwDTK zglZ?H7HdqDlQ21)4n}mSa2$82ML@ZYgelJDxA)ymMt!>@q2uwu9kWasv>u)g4G*}jcu*#JS`3NF1R?u>}qIg^LbJfcPz^* zu5R;tZ{ES|bG5mX+)}z(4?mWUISlC4-vVbs_o9Q=UVU_rde4;~LZ7|VG`I8Q_s$8fr>%1jZQOtA!X=k&=)>uee?2{=%W75*7NyGyI`RoO?yDA()9|YE!|h|^4=b`N zx3Q|)!|h`s>mzK7#N%HVZY_OBoPKfhH)NOHtb{_Odc5*tPs0RBUK%h_5oJ>PaCW`f zL@U6^?`Bb+d`8K>r{s?<<%gF1`D3b14$9cDjNfPE&kOOX{gu|_eK`9X&YuwZ>C*Vr z_7!(axi&S?Zef z78dBG&Xr3OuFgV!<}{R4_vc7+E0+_a5sd#%T5fOFT*TO`7~j{4@h7)o8B6}Mru30r zpibiKB+idVG5(|NXmO21IabPp&tZJ;4$0%FQ@*ha<6ql}R^qw#$b0u-{PBLNplVrS b_#KR$B6yPRe}3ets!kobHx~fAu`2Hm1mtVj From c9f41b4142835eb325159ed1bd319019624dbc52 Mon Sep 17 00:00:00 2001 From: Oisin Date: Mon, 5 Jan 2026 10:58:16 +0000 Subject: [PATCH 07/16] Revised utitity functions using copilot --- generator/utilities/Bedrock.py | 78 +++++++++++++++++++--- generator/utilities/align_country_codes.py | 20 +++--- generator/utilities/gen_user_names_file.py | 75 +++++++++++++++++---- 3 files changed, 138 insertions(+), 35 deletions(-) diff --git a/generator/utilities/Bedrock.py b/generator/utilities/Bedrock.py index ab2fa79..b6223cb 100644 --- a/generator/utilities/Bedrock.py +++ b/generator/utilities/Bedrock.py @@ -4,14 +4,36 @@ class Bedrock(): """ + Bedrock AWS API client wrapper for invoking language models. + This class provides a simplified interface to interact with AWS Bedrock runtime, + enabling prompt-based interactions with language models like Llama 3. + + Parameters + ---------- + session : boto3.Session + A Boto3 session object configured with appropriate AWS credentials. + model_region: str + The AWS region where the Bedrock model is hosted. + model_id: str + The identifier of the Bedrock model to use. + + Attributes + ---------- + client: boto3.Session.client + Boto3 Bedrock runtime client for model invocation. + model_id: str + The identifier of the Bedrock model to use. + + References + ---------- https://docs.aws.amazon.com/general/latest/gr/bedrock.html """ @beartype def __init__( - self, + self, session:boto3.Session, model_region="us-east-1", - model_id:str="meta.llama3-8b-instruct-v1:0" + model_id:str="meta.llama3-8b-instruct-v1:0", ): self.client = session.client("bedrock-runtime", region_name=model_region) self.model_id = model_id @@ -19,14 +41,49 @@ def __init__( @beartype def prompt( self, - prompt:str, - system:str="", + user_prompt:str, + system_prompt:str="", top_p:float=0.5, temperature:float=0.5, - max_gen_len:int=512 + max_gen_len:int=512, ) -> str: - # generate bedrock request - formatted_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>{system}<|eot_id|><|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>""" + """ + Invoke the Bedrock model with the provided prompts and generation parameters. + + Formats the user and system prompts according to the Llama 2 chat template, + sends a request to the configured Bedrock model, and returns the generated response. + + Parameters + ---------- + user_prompt : str + The main prompt or query to send to the model. + system_prompt : str, optional + System-level instructions for the model behavior. Defaults to "". + top_p : float, optional + Nucleus sampling parameter controlling diversity. Defaults to 0.5. + temperature : float, optional + Temperature parameter controlling randomness. Defaults to 0.5. + max_gen_len : int, optional + Maximum length of the generated response. Defaults to 512. + + Returns + ------- + str: + The generated text response from the Bedrock model. + + Raises + ------ + Exception: If the model invocation fails. + + Examples + -------- + ``` + bedrockModel = Bedrock(session=boto3.Session(...), model_region="us-east-1") + bedrockModel.prompt(user_prompt="Who was the first president of the United States?", system_prompt="You are a helpful assistant.", max_gen_len=100) + ``` + """ + # generate bedrock request payload + formatted_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>{user_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>""" native_request = {"prompt": formatted_prompt, "max_gen_len": max_gen_len, "temperature": temperature, "top_p":top_p} request = json.dumps(native_request) # call bedrock model @@ -34,14 +91,13 @@ def prompt( # Invoke the model with the request. response = self.client.invoke_model(modelId=self.model_id, body=request) except Exception as e: - print(f"ERROR: Can't invoke '{self.model_id}'. Reason: {e}") - exit(1) + raise Exception(f"ERROR: Can't invoke '{self.model_id}'. Reason: {e}") # Decode and extract the response model_response = json.loads(response["body"].read()) response_text = model_response["generation"] - return(response_text) + return response_text -system = """# Task +system_prompt = """# Task You are a name generator for people from different countries in Europe. Your task is to generate an arbitrary N number of distinct and varied first names and last names for people from a given European country of origin. diff --git a/generator/utilities/align_country_codes.py b/generator/utilities/align_country_codes.py index 3c1646e..a45ae42 100644 --- a/generator/utilities/align_country_codes.py +++ b/generator/utilities/align_country_codes.py @@ -6,21 +6,21 @@ @beartype def align_country_codes( series:pd.Series, - proba_comm_ip:float=0.95, - proba_comm_card:float=0.99 + proba_comm_ip:float=0.05, + proba_comm_card:float=0.01 ) -> pd.Series: """ Aligns inconsistent registration, ip and card country codes to have mostly common values; with a random chance of inconsistencies. - + Parameters ---------- series : pandas.Series A series from the random transaction dataframe with inconsistent country codes to align. proba_comm_ip : float - The probability of a common / shared registration country code and ip country code. + The probability of a common / shared registration country code and ip country code, default is 0.05. proba_comm_card : float - The probability of a common / shared registration country code and card country code. - + The probability of a common / shared registration country code and card country code, default is 0.01. + Returns ------- pandas.Series @@ -33,16 +33,16 @@ def align_country_codes( ip_country_code = series["ip_country_code_alpha"] card_country_code = series["card_country_code_alpha"] # determine shared or new ip country code - if ip_country_code == ip_country_code: - if random_unif >= proba_comm_ip: + if pd.notna(ip_country_code): + if random_unif <= proba_comm_ip: new_ip_country_code = ip_country_code else: new_ip_country_code = registration_country_code else: new_ip_country_code = np.nan # determine shared or new card country code - if card_country_code == card_country_code: - if random_unif >= proba_comm_card: + if pd.notna(card_country_code): + if random_unif <= proba_comm_card: new_card_country_code = card_country_code else: new_card_country_code = registration_country_code diff --git a/generator/utilities/gen_user_names_file.py b/generator/utilities/gen_user_names_file.py index 8f3dfe6..22671d3 100644 --- a/generator/utilities/gen_user_names_file.py +++ b/generator/utilities/gen_user_names_file.py @@ -11,20 +11,66 @@ sys.path.append("E:\\GitHub\\RandomTelecomPayments\\generator") import cons -from utilities.Bedrock import Bedrock, prompt, system +from utilities.Bedrock import Bedrock, prompt, system_prompt -def invoke_bedrock(model, n_user_names, country): +def invoke_bedrock( + model:Bedrock, + n_user_names:int, + country:str, + countrieseurope:pd.DataFrame, + ) -> tuple[pd.DataFrame, pd.DataFrame]: """ + Invokes the Bedrock model to generate user names for a specified country. + + This function calls the Bedrock model with a formatted prompt to generate first names + and last names for a given country. It processes the model's response, parses the JSON + output, and merges the results with country data. The function deduplicates and standardizes + the name formatting, then persists the data to temporary CSV files. + + Parameters + ---------- + model : Bedrock + The Bedrock model instance used to generate names. + n_user_names : int + The number of user names to generate. + country : str + The country for which to generate names. + countrieseurope : pd.DataFrame + A DataFrame containing country information for merging. + + Returns + ------- + tuple: + A tuple containing two pandas DataFrames: + - tmp_firstname_country_data (pd.DataFrame): DataFrame with deduplicated and standardized first names along with country information. + - tmp_lastname_country_data (pd.DataFrame): DataFrame with deduplicated and standardized last names along with country information. + + Raises + ------ + json.JSONDecodeError: If the model response cannot be parsed as JSON. + KeyError: If the expected keys ("firstnames", "lastnames") are missing from the JSON response. + Exception: If the merge with country data fails or file I/O operations encounter errors. + + Notes + ----- + - Names are standardized by converting to lowercase, removing extra whitespace, and applying Unicode normalization using unidecode. + - Duplicate names are removed after each processing step. + - Results are concatenated with any previously generated data for the same country and saved to temporary CSV files if the new data increases the dataset size. + - CSV files are encoded in latin1 format. + """ logging.info("Calling Bedrock ...") # call bedrock model formatted_prompt = prompt.format(n_user_names=n_user_names, country=country) logging.info(formatted_prompt) - model_response = model.prompt(prompt=formatted_prompt, system=system, max_gen_len=2048) + model_response = model.prompt(user_prompt=formatted_prompt, system_prompt=system_prompt, max_gen_len=2048) # split out answer text = model_response.split("")[1].split("")[0] # parse json - record_set = json.loads(text) + try: + record_set = json.loads(text) + except json.JSONDecodeError as e: + raise Exception(f"Error parsing JSON: {e}") logging.info("Processing results ...") # generate pandas dataframe user_firstname_data = pd.Series(record_set["firstnames"], name="firstnames").to_frame().drop_duplicates(subset=["firstnames"]) @@ -52,7 +98,7 @@ def invoke_bedrock(model, n_user_names, country): tmp_firstname_country_data = pd.concat(objs=[tmp_firstname_country_data, llama_firstname_country_data], axis=0, ignore_index=True) tmp_lastname_country_data = pd.concat(objs=[tmp_lastname_country_data, llama_lastname_country_data], axis=0, ignore_index=True) # standardise names formatting - standardise_text_lambda = lambda x: unidecode.unidecode(" ".join(x.lower().strip().split())) if x not in [None, "", np.nan] else x + standardise_text_lambda = lambda x: unidecode.unidecode(" ".join(x.lower().strip().split())) if pd.isna(x) else x tmp_firstname_country_data["firstnames"] = tmp_firstname_country_data["firstnames"].apply(lambda x: standardise_text_lambda(x)) tmp_lastname_country_data["lastnames"] = tmp_lastname_country_data["lastnames"].apply(lambda x: standardise_text_lambda(x)) # deduplicate data @@ -61,22 +107,22 @@ def invoke_bedrock(model, n_user_names, country): # print shapes logging.info(f"tmp_firstname_country_data.shape: {tmp_firstname_country_data.shape}") logging.info(f"tmp_lastname_country_data.shape: {tmp_lastname_country_data.shape}") - # save firstnames names data to temp directory + # save firstnames names data to temp directory (if pairwise firstnames have been created) if tmp_firstname_country_data.shape[0] >= llama_firstname_country_data.shape[0]: tmp_firstname_country_data.to_csv(fpath_temp_llama_firstnames, index=False, encoding="latin1") logging.info(f"Wrote {fpath_temp_llama_firstnames} ...") - # save lastnames data to temp directory + # save lastnames data to temp directory (if pairwise lastnames have been created) if tmp_lastname_country_data.shape[0] >= llama_lastname_country_data.shape[0]: tmp_lastname_country_data.to_csv(fpath_temp_llama_lastnames, index=False, encoding="latin1") logging.info(f"Wrote {fpath_temp_llama_lastnames} ...") return (tmp_firstname_country_data, tmp_lastname_country_data) if __name__ == "__main__": - + # set up logging lgr = logging.getLogger() lgr.setLevel(logging.INFO) - + # load aws config with open(cons.fpath_aws_session_token, "r") as j: aws_config = json.loads(j.read()) @@ -106,12 +152,13 @@ def invoke_bedrock(model, n_user_names, country): firstname_country_data = [] lastname_country_data = [] error_countries = [] + # switch to toggle bedrock calls run_bedrock = False - + # set countries list countries_list = countrieseurope['name'].to_list() #countries_list = ['Cyprus'] - + for country in countries_list: logging.info(f"{country} ...") try: @@ -119,7 +166,7 @@ def invoke_bedrock(model, n_user_names, country): # call bedrock model and generate user names data tmp_firstname_country_data, tmp_lastname_country_data = invoke_bedrock(model=bedrock, n_user_names=n_user_names, country=country) logging.info("Waiting ...") - # wait 30 seconds before retrying + # wait 20 seconds before retrying time.sleep(20) else: tmp_firstname_country_data = pd.read_csv(cons.fpath_temp_llama_firstnames.format(country=country.lower()), encoding="latin1") @@ -134,7 +181,7 @@ def invoke_bedrock(model, n_user_names, country): # log if any countries failed to generate data if len(error_countries) > 0: logging.info(f"Failed to generated data for countries: {error_countries}") - + # load existing reference data firstname_country_df = pd.read_csv(cons.fpath_llama_firstnames, encoding="latin1") lastname_country_df = pd.read_csv(cons.fpath_llama_lastnames, encoding="latin1") @@ -147,7 +194,7 @@ def invoke_bedrock(model, n_user_names, country): # sort and deduplicate output data output_firstname_country_df = output_firstname_country_df.drop_duplicates(subset=["country","firstnames"]).sort_values(by=["country","firstnames"]) output_lastname_country_df = output_lastname_country_df.drop_duplicates(subset=["country","lastnames"]).sort_values(by=["country","lastnames"]) - + # write data to disk if output_firstname_country_df['country'].nunique() == n_countries: logging.info(f"output_firstname_country_df.shape: {output_firstname_country_df.shape}") From 4ab8740bf061fc1008c181f57b68d44a7f6543aa Mon Sep 17 00:00:00 2001 From: Oisin Date: Mon, 5 Jan 2026 10:58:50 +0000 Subject: [PATCH 08/16] Updated test case to account for new random uniform prob check --- generator/unittests/utilities/test_align_country_codes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generator/unittests/utilities/test_align_country_codes.py b/generator/unittests/utilities/test_align_country_codes.py index 787d9f9..d0626b1 100644 --- a/generator/unittests/utilities/test_align_country_codes.py +++ b/generator/unittests/utilities/test_align_country_codes.py @@ -45,7 +45,7 @@ }, { "registration_country_code_alpha": 353, - "ip_country_code_alpha": 353.0, + "ip_country_code_alpha": 42.0, "card_country_code_alpha": np.nan, }, { @@ -62,7 +62,7 @@ ) obs_data_df = input_data_df.apply( lambda series: align_country_codes( - series, proba_comm_ip=0.95, proba_comm_card=0.99 + series, proba_comm_ip=0.05, proba_comm_card=0.01 ), axis=1, ) From ef6e53eff2313d789cfe31a3e2a9045cfc46f63e Mon Sep 17 00:00:00 2001 From: Oisin Date: Mon, 5 Jan 2026 10:59:34 +0000 Subject: [PATCH 09/16] Updated test case data given inversion of align country codes probabilities --- data/unittest/transaction_data.parquet | Bin 30737 -> 30792 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/unittest/transaction_data.parquet b/data/unittest/transaction_data.parquet index 88658a117e2ab4509394a90919b944e474826d94..e10d1a56e6d1548d8a21c209fd9151323a2e2e9f 100644 GIT binary patch delta 448 zcmWm9O=uHA6bJB`Np{+z!oHpC>>d=dh7JWiblGlt=m(~0QUW2_*=_1atGI|>6d^r% zDA<7LsWxR=RM6tlqtMWUC%yIHNwkMhiboGUI##U(v)(O;pDe)eU!|RXV?`#^=U~>SXwH(ki)RzM>c3 z@Zf7kXWzityQT-_QS%1mDYQ`vSA#rr*I#nK09EOO3n|8a$4K>5W4T*?xyZq@9gNc@ z1~?c=jn$jro8WXSP*_Sh0Ac_*0WW|qSm0!aIgA&}vRU-k^ILvkn_xowQ5z(Lgxv>e zEl7;r4s^!73DaJLz_u7uvBS(suN5-x=C(OwQcB$mDS+umzya;8IJHUZFw1{r#qk|+ zc>8h@jp-lNrA9&Z8&`~@$KumHb=r79KmNkvZChOn$J9o1YUt!I7K0_IG(qEJN!mW=`?=8Ti=P*4W) zj{+v_j*~-CQwOk=0w%_j0+V4y8I!hAK?dHK0w&s)ll@U?2F9EMCjOg~Y*IW6oV5ZL P+65*E7R;Tqvr?5M)x?EY From a8de17c54838cc5e25194345c59d24b03775cdbf Mon Sep 17 00:00:00 2001 From: Oisin Date: Tue, 6 Jan 2026 09:43:03 +0000 Subject: [PATCH 10/16] Revised cnt 2 prop function with copilot revisions --- generator/utilities/cnt2prop_dict.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/generator/utilities/cnt2prop_dict.py b/generator/utilities/cnt2prop_dict.py index 45ef11f..7a5cf48 100644 --- a/generator/utilities/cnt2prop_dict.py +++ b/generator/utilities/cnt2prop_dict.py @@ -1,27 +1,35 @@ from beartype import beartype +from typing import Dict @beartype def cnt2prop_dict( - idhash_cnt_dict:dict - ) -> dict: + idhash_cnt_dict:Dict[str, int], + ) -> Dict[str, float]: """ Converts a dictionary of counts to a dictionary of proportions. - + Parameters ---------- idhash_cnt_dict : dict A dictionary of key, value pairs where the value indicates a count. - + Returns ------- dict A dictionary of key, value pairs where the value indicates a proportion. + + Examples + -------- + ``` + idhash_cnt_dict = {'7125135c8882b0f6': 2, '049dd291d9506532': 3, 'd6708d344cb6f498': 5} + prop_dict = cnt2prop_dict(idhash_cnt_dict=idhash_cnt_dict) + ``` """ # empty dictionary for proportions prop_dict = {} - # sum of dictionary counts - cnt_total = sum(idhash_cnt_dict.values()) - # iterate over input dictionary and convert counts to proportions - for idhash, cnt in idhash_cnt_dict.items(): - prop_dict[idhash] = cnt / cnt_total + if idhash_cnt_dict != {}: + # sum of dictionary counts + cnt_total = sum(idhash_cnt_dict.values()) + # iterate over input dictionary and convert counts to proportions + prop_dict = {idhash: cnt / cnt_total for idhash, cnt in idhash_cnt_dict.items()} return prop_dict From 9a7f2b2547d616eaad2c538a7f7b703e135afd93 Mon Sep 17 00:00:00 2001 From: Oisin Date: Tue, 6 Jan 2026 09:47:23 +0000 Subject: [PATCH 11/16] Fixed type hints beartpying bug --- generator/utilities/cnt2prop_dict.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/generator/utilities/cnt2prop_dict.py b/generator/utilities/cnt2prop_dict.py index 7a5cf48..b845061 100644 --- a/generator/utilities/cnt2prop_dict.py +++ b/generator/utilities/cnt2prop_dict.py @@ -1,21 +1,22 @@ from beartype import beartype -from typing import Dict +import numpy as np +from typing import Dict, Union @beartype def cnt2prop_dict( - idhash_cnt_dict:Dict[str, int], - ) -> Dict[str, float]: + idhash_cnt_dict:Dict[Union[str, int], Union[int,np.int64]], + ) -> Dict[Union[str, int], float]: """ Converts a dictionary of counts to a dictionary of proportions. Parameters ---------- - idhash_cnt_dict : dict + idhash_cnt_dict : Dict[Union[str, int], Union[int,np.int64] A dictionary of key, value pairs where the value indicates a count. Returns ------- - dict + Dict[Union[str, int], float] A dictionary of key, value pairs where the value indicates a proportion. Examples From f70b7ed2f27e03a81cf3baa360dbf27a5ac2fda1 Mon Sep 17 00:00:00 2001 From: Oisin Date: Mon, 12 Jan 2026 10:57:04 +0000 Subject: [PATCH 12/16] Reviewed utilities and objects using copilet --- data/unittest/transaction_data.parquet | Bin 30792 -> 30601 bytes data/unittest/user_data.parquet | Bin 19003 -> 18997 bytes generator/app/gen_user_data.py | 8 ++-- generator/objects/Application.py | 5 ++- generator/objects/Card.py | 9 ++-- generator/objects/Device.py | 9 ++-- generator/objects/Ip.py | 7 +-- generator/objects/Transaction.py | 13 +++--- generator/objects/User.py | 19 ++++----- .../utilities/test_gen_obj_idhash_series.py | 2 +- .../utilities/test_gen_shared_idhashes.py | 2 +- generator/utilities/cnt2prop_dict.py | 14 +++--- generator/utilities/commandline_interface.py | 34 ++++++++++----- generator/utilities/gen_country_codes_dict.py | 40 +++++++++++++----- generator/utilities/gen_country_codes_map.py | 22 +++++++--- generator/utilities/gen_dates_dict.py | 13 +++--- generator/utilities/gen_idhash_cnt_dict.py | 36 ++++++++++++---- generator/utilities/gen_obj_idhash_series.py | 17 ++++---- generator/utilities/gen_shared_idhashes.py | 6 +-- 19 files changed, 161 insertions(+), 95 deletions(-) diff --git a/data/unittest/transaction_data.parquet b/data/unittest/transaction_data.parquet index e10d1a56e6d1548d8a21c209fd9151323a2e2e9f..e11b43d5d10553d774e54590af5114170ab3bae6 100644 GIT binary patch delta 14086 zcmdUVe|Qwtz4uJA8+OBH*$unP5`u0*c1;lN%$YO4FqTcoLIMGj5Nc3tb7sy=iA4-Z zTeQ?ofM`HeU?T#>mLCBHtCu3B)_Pk+QSj1=__N4u#Y+{d*K&K+>-Adueg@n3Y43ZV z_kExK*YG@>FqzDpIp_QR{5o^&g1F{);`)#BOLHqrQg!Pin~bL78nallhUmumJ+TSZ z(M^$T)b8G}d2~ar-AwQtui9b75dIn>09qQNb}o=jw8qK8i3V5KsF_@;jj^ z*p{YEn}>#)7Dk4~M7TYzhK=sXP20GUCpHa@-B7(HF?M}R_ofZOps(p<0_)Vzh58U6TslFwlJ0 z)~XGq^GfDcZKidr4n(Yp>3ie3HFq`zHa;?I=*sFBn>$W-ZYXc**j17(y*9Tp^61f$ z@#5d(6cz#x@IQB?aJ*?sKb=$VJ!3?I(aJbQbC5ckF3D2|^fJR7_z zud9}BQQfo@6CbiA#_A=?4K5MeR=6bTu8c#}RVlNmLWp9>jv?U?cN~eTDseS~S*GiX zF~g#U;xg5d42deX<&V`nmSpIXN>$UcUE6m2wT7c>rYYkOnQTd_XchX46NR6bn)#*8 z8%t6Xw@3Dvz|P6V+JRxZbM50Ec0P0wJ9q2_cJ@ymcM#aQ&o140Yw9PNeLH+j+kdiN zI283w6fZltX2zq_eo{4Y$@F~_qrQiB(3)Fg@;G4UkD{dqn%2(TLxr)!O$UpjrTa=B z&qQ{Pdz?<+aUhim#vh7ES5@z-@UU~`5B4=Zw0C&8>Bo`bF_D9h&b)}7Gk5Ofi9N#= zJF533Dz?w)-m{~$)aOm)zT)`%MTNs7r->&EfzZU+2|*BA7P~!*dzO<5A)rnlHLXGr zuk+s!>y($L=yY*Z?VQC<+oG@EoRXzZ{IA1di*9sMQ;9rZ%o>0G-Rzr|^3>ei#mU(j znu5vc_?%eO&xvIxh-J-QZ*sPNPAvC&lVes4|8HX1_c^ibYZLZdB$nZUmR-~K{h3&{ zeoibEn4AZq!JSQOW`J1YLrudMiDfT{rD89LWoIhmk8h1g`9BfM${kypw%!Y3+3gX_ z@U9sbCuhe0L@eIqJm`;a^tZ;V<3hP0yj0X0!Q`lfqlB1l65B#usA6ort8m6SM`aiq zA&%~yhQd=V*VdUqxyAE@M#Or9suDFUlgkn}ZDxqI^{7E^Su&?MQML&2YLybkO|BTW zX~?>+;gD>boasa(j>ef|*{D@lW;(M~ic&>+s=-=ns<^jnXw)(_L-NNid|7c++0k{$ zB6$<{R!nbT3Q;6QvK-0eXbR4}W4f$J4lZRdM^`n%y@gB1@i& zoR3#;?%uuaNWgwus!CpdN6Wx7NpZLIOvPLu=D@4LNpYza-@FP;be7`SjqK)S&QhB# z<=!dZ=((9;Hjjz%Uxn0cJP#ka zxnc>O-Rk|mV~+RP(uu+UwkZDlqQ4q^ZBfUZWKmNNXZ%pSaPV(^hJ5kd!0OgX=iith ztQ`MHQz*4$WVJBcw|)HDgB5G1wyZq?W=dtPaeb*~Y1^5NLe+tBFKl{Pm{B5Z?G5Y{ zS|9Sq?kk%4M0WW~-=YTL>5@cXXDlggd;f4_Utdvs>zL--Cj>U$BR(~4=E>kkMZ$G= z2v-Q*xqxq+uu}9EZ&ZA9D86lEYkc;_iCI^uEKgofiD^?bud7H*(@8K^&rBlAT+)e4 zR9m+l4;Kt-n2JG7gE)#|GBlA%bd^%o(IkT?rUVeEmrQB_DqPu8nM9}}#tIwCYsA{Z zq4G@LFceN@X6S@jwm~kUjjgMOLkU6u>oV~;PvH&`FTxw>)NoX$%eXYKAb18yVl&w=Wom#iCI9k#y+x?Sm<04NG|92J zSYIz`l43}Ts#=b%<3AXHYyoL>gEA)TmZSQ4J)T(7ZA~MtX%Y@fudR1=0?Z_iEUT6z zbAWC=NToUu&GFPg zI%_yw(TL1UO#*@w%fGxvS8PY6xRfcrDH#4X!*;n%F#oy?%(W%|I^Y_?8k(AB;ewKq zP8t}Z?odgis;laToX#f=6{ktAZCYH`h$(v~*<2$)a*hj1w(W=JP&M7*I^(u%Xo}+B zUeBq8i&>6ga>LOS>c2o8sxZ@$32vuQirZTbGr)lrJ`u% zv$M9(Yb%Rs(PLF7i-pSa*w$;z+@ZjwU%hL)kvem!3QVoJzdX4t-lh<$S3`Z}-#>c-r{<8HA?aq*IV|?YJ_8;^wsH!U2 zG}J%bdv0}L(^pFdze0l9DfmK{}R9ow^@{-uC{Qz^SJ1?09lpoK#plDRAJwe!X#W)ivqkpU|24jWx#$ zsju8BlCGFpVj|M?jxqFf0quQF6Se&~YV8 z1AKB_GBj03ucz17y8wJ00BR^eF711mncjTp4~EJ2~VXE=sy23!-5fFE+z;F1}o)nqo^F< z8JDT6>oyAlL8Yw|6XGe z(_xN7Tt&QY9cD_lHAz)n%RwPw)eJ|*JUY;uvPlg>{cZIM7L^#13g5;kGc8n`%`NJv zwkLDB?JE9*^|%Q1y37H{wrW$w1IPSdt+vCHk^ff7o({Rc=mQ8-{BYVYmRz4rg_>C9 zvGuh_mP(DQjJq!ceo>r#VM19*5enI|RjWcxo$W8D>R)P@KSqoAHuQ}1_s;fzeU*RF z*q7T@Jv-~!563^>{qQyM7yX`LzW(p+ul5FqZdvkntSK>Wi!m8jqUY5cEX~&yQ4d9?vDPZ_7|y0ajq`E zYHI6K%Z#VnyJGX?S0-FN+NrjO4oko)+c4Wzd=+ycnbE^jLXdC_E zvbuC&loOA=+jh^{u2%|I-!oN=Uxd@arSU%A>dAXg60SBcq7xL2#3V~&U_&sK=ggUA zQAt+}_yRRA5n|$)>j0b}kSt&m*b_3P-eWr*NCu=)WSv0i7`7_As%xvdhUP&7dw}5( z!(|W_2FgzL8kllV9uOBT0^YZPbcSY`R5u_WxD74k1luUE4HYAiJQNGFJl_xT#le`a zimbo@YomW1I3LA!Wf?L6#Rmyum=;s1$s8C$AdZV-ghaDs6`ZRmswpvlThdl^C=}QK z?;%E^!RqT}Sw=45C^97u0LPO+6#foSsOC*wku66307Fav|Xhl^u6jfoaq;Zp3;&Q{1Fi0JMlD8C$*Q#3) zqLfSG{SiEtMe87>AUsXPGcsV!QXBvrhy#j$r+UHqN z6CkWP@WWVXtPcLbONw+^(-HFkDhwiFz2|AY<%qET< zJY!(-;2&J#NUp0<#lM`oHkC|F7KVgE=~*3+2yYl)2`XYB71>GEOOUP{0-rgO?_vXl2!DHsG63G!6mbTM)xwFfVd}QnwkVmFgtjiOE2Zdj16fpeZt1!j%CBGC%~4 zDVuh#%RskU7(a0s^Tdu<1sV_m*i&Ip*8=ONKL|Lk!$OrHH3)v0=5^^GXP^$QV3KBQ$dbcP|%@5FttDy z1SfST3`;jv2q;ZML7^?uD-6OId>#xCo*dQ=f(m4&nlx`=UARqEJ$DWbt;q0?s)JdC z#(=k=y6h^bWT1nD`So8g90{ckHNiMMmaKYZY1`iOyF{ZhF;)L|!&T9<1Y*iX4HSVXtcZnDA{gSLTt(<~P&it6PA%LBky0AU=O7czs~^>oAl zC&~SfhH!J+Spb|iO(;t646LsLHZd8|IX7i)AoVl#mPH04Kb6CN0@WxaD4If33FIzd z4H_It92Ap{xinSGAmgxyz&xxE;KYPxbCETYToa~6hrXsRrW7m128@&q_pee%ajSkU za5N}VNHmfHgu|k#M&4hK)DM~(#u-rn?sGe>~fB_EmDx;1R=zTPyOjH;M%&f}j z@hpHHwjP~?;6Rt1htGg9h^)AI-I64WQ{bYZBK|ZK!&PLNDjH8OtR&FQ8hAqdIG~w; z7(f;y45YV+@!)l$?&Mwm{SaWRXiFEzz08cSGTWtx>E1 zcMBrqcxS-XtjdI37>J5b15{JNbB2k#yEdT4heo&ZNVFl{ z!O?*7+z^0GLqd+S0r%Jr2WFxB;9)34jfpB`xk>{l?WlNq!&cCW9@!$yMowmNgkCDT z9?ztL)eQtvKpe)Y0|%lY^;TiYfCf-xQaTyqn1j6LRDB@=Y?qDPgD_QqIp$z12p4iS z++BqVQy~q}B*=F(f170L$j@~w5>jug;$W$SH36UiV&Pl#yIKtyVhzP8F`{r7Ac!YG z3G#mlYm1E|JnxP(5pbTx#LFUil&Pfz(P9+cP2g=9umsv-0m6Ds1h^?ZuVcRr0ou!~>XbV&X zP(haW9z^|Xp7S+P<8b_P-3Kt@T$WLz@O<#0&QY+uu0T^kEy*%KH1R};B94>ci8Tf) z(agHxqzQD!M1<>uAr0A5`V9kq5Ix{n7W^D1s`HXhvcmDw6nchI2L0iYmJa;&{5!au zpcDLO&}51X-3Q{eFd_~hgzj(<3dpt!Awksi3PBAo4%R?Zz@0&pX>}_q9gg}3ia+^# zpD72AQC9+)NHX$Btc;131%}8bgs1DGjxCm6FTPVtKnQu+b{!}c=Tl6Sr9jPN$&i%t z#KMu_L$p1_2{2hzxr(xaJoJn<2+V*jHvMNRQ7cfqs?_kNkJ-m3Ray|h2GDrNzxpg1 zU4O!KKWqy|b5CRs+l0?nYP zM;A_lNJD%=Y;T(?{I!uH#ICmqsyY@V8`2v+J)A_SskoZ6V`hlBl;%zfQaBi_#2+CO zG1Q7TMuKVqJT#9;0&%R)HNXzg9O)+{4a(Z(GOED@4OvJ!0AK{E6~qw?7m5YBiDTo@ zTe<-HDud`FUfOE9>HwD70_V9Ds)kl2~;HXy^2<07=j`lT^~?oGyHBK ziS>sNWWnt*2cy$S_y?mR9KDO>&Br9T1WNjU8zCrBIsknO)kvT~b#6*rE~^b;n@$$* zjA1j=Hk5$~)(Z=-S{gyY>Sp4lGO#%s1$d-eP>R&f^&y>sNQPyCOV%~5bjybga(iSrX~aT;~{lFmuzH!$i3jAkn-L2S}oKnW=2Okpoc3*<|--4d)q8FTo`l; zvFkpd09C}Wh#|4GChh5sA%L}+9?p!4z=Cra56a#^Nz zL&88N5Gp~R9IM2>4sg}h!q-L%RaeIH{}HKLSYrYHAPJGmg@0CtosjyZjuwI830pxQ zCl!OEfnqAu1$L&qO%>#d+mqlEWQr_IM~Cp|)<7$?DBiEYwg}*wfu(aFPr{NhM2i?W zal)6Bip!C~!0I~Qz6at3JJIn{c>wls$K??l)uxAk7)B<6T^po83bGZb4NLbQj4||w z%d4LAC>MUkFRc!k>=XQ5(R@7XqGH>g1nZOV3aA!6{G(EcI}cw_>j;|jvWD$Eh$>az%_+dpWXP!vm_$QD0D#O^ zvx)0~lL5mLEUpgzp-6O4Sx9g_QwJFjiH(-TCzQ=VcMN?;TqArs90AqY9@fIOA~eR7 z!wW!Z;W{wiSS2$rX*To`(VUkcbPx?8%S9tH%aw9nFe?Zb@O#;&@NsJlKsuBrWZjd) zK1sXrJVt|?Vh0K|13%BJAOvg)fhz0)_s9Xeckr_SOC&{1g=Uhh^g_U+f}7yA0#ybh zscYV&3jB+Xmjf_oCIw;u6=Ax-yLedvTNeo_gm_Pb<_=#O3*ouVM0ePb(L+dAyw?jX zIN051;7Z_3Ino{E{?sP^Gt|a4k;$W~3E=$ra3#Dg?387PuL=2VlE_C!`*eH9p_GQu z33~(BU4x#m05Xsu*nLFXC)NRqpnHLAF7!T>Kw>x!d5dHYBFRSDr$}{K*fK0W!WbPZ zMxkYg>p^6=y^KiTvEWUVs;ofzA5Wobz%&TJ_MV|?9`YBVKd9ftCQZdl>s8*zbzuME^c@MEh$ zM(_x6h-?ahh61r@LNn^(_5hePT#_GCwLL@;4k3`jyg&`&B^3!%ZELxJAcq^`5fvF5 z311Vh*dGq5SnKevkr8A=OsgACK^#D0!>U((D7bY=szFt$7LWwXDYHbV3=xsHRe|jR z+k)8EVW?onVNf+xyn+Y=l|X$C6-WEH5&lLXfLc#KmQnx>aF3h~Oi66>wo;h`qlsNx zY+*>)fUSj|a#duDxLJ65D4neZU#i%Zc3}R(V@CD#LV_%bRpN~nj%_cJI9!I{7sdx8 z^b#%r%7s$wQ1Lu=#UO>zgK%j6`dVx-;XbO&h#S5n6g&tbg(SigVmn*5W<7|Rofu7F zRwZwbo#!fXA75Dt;h^fk(QCmoF$@01QNwRVBgBDPNUul&3>9=rSc>};wc_byK$p#` zc}W?RmA47oE5IouU|?_(peGm>|8{|e<6~;!|G}0nVjHXK1IQW}FayRHv?N1E1Y!hR z#}b7qi-4V(UDm3i}7VK>UCmEbN#WC{aMQ z>vAj&F4)qCf4Z#nH1NGpK;XMOzsskp#D?pl_SP?;|dG9MY z5MOv2#W{K+2Cl??fx%D6dmlIYr?)f_H$qqFu|^TNo7=mb&^5pE&xHb5{CoWW`bq( zdIhmF(OQPE1qhLdR)9;>K-2y+Ar;OzJi8Euaz;ud-?%wK9SAz{{SY=BNnI9x7yCn~ zS_h&LYWXB}F}+AEvHOJ=CNxN)dPG$CB`^TDqX>~7ZBySD#WRmi(M2)3NGpA48^il^ zOXvLI6${@V?J}PCH+PcXABY^qfBRQW|BCnhWbDktM-zDM<@f&hr~bcq?Ijub^Dn)h zRxH2&_wy17q4rNl3t##W&x=CrkU%$Z@qo~op;{( z?Yr;3``&xlu)~)5_L1>TBUmL=!<{PuYh~NuG0zzdrXb4JKq~?lj-GY0Ld{`UL`bR_- z{CdhqwPL9-zN}05kFtnwbXLqn7Tj}cN{7EaP?S~t;-re~w_>>8C#mdv?c!KrR9Tl# z$OfB*F=Z(sU6vG*Wf7sOtXD{s8N!INR49<$x*!97b;Gj@l_J%;pw)-y0ir)*|I+O|~1K*aiRbPxvKl|pCC0AE9`2KT> z|5=|>Ue+yKDC-VnvaVkgLS_4Xb=ly@zS3-Pp)jGWPq;L=-#50Z?3hp-Tp0O-l>PY$ zQ4~Hd+bra-sC&$JWnJ0FLOF(US#Y_pPYwn`!pBn*ID;ZYjY>-1H)e$e50Z)uy0JL z{I#r=9Lq{d4=+s3E_<`$waE_%?>SY=zFhozU-j|Vip0k4y(8aPDwgJ(r@!M{wD+zX zo(=3T8TG3*vGa?xe?0lj2+LO)nRfW8iOaJ6mriIL8!Z{zE&N-cyy(c2ZP`zkW$v_6 zqf^Z#;<&y@|NeFTeFI00Yl}9dmVVGO@hab%fwrzwn=2FP*Oo2XHh$3m0%+{}FOkVuNyw|_?x4y%NS9rY6{ZhQEH6FOwwwHI7;EKKMmR`2VTUFNEdvku-O-mgy zl>O#SOBZqc3Y{%5ev7jhMX|XkzWu+wUKsfEc}f4pvvBr0@xq;d>llmXwsm$UH_nR} z8e;Q=_-sL3A~wr+jp=OiWqkr`Yih6cAB`%)m?y{2iu&en$QraI)}C%_Z|^ReA5Gp{ zC(M7THIj>WmUhl-n&ofoZ1WeAxyim0V*FI$OSw$mz2N-mm4l+IvQ6=5w6kemG?LB5 zm@gZPGze`ColRmSmKCFsrl=5$BYIeZNX^FP2rQ=RI$0xKW+5oWfjfRj_F)7O__me&_yqW`i44i>}?k{5(Q@xmQ4E^^g14_5depI-OvAAe-G z|MZqeuKVQ2AGo*vAwF*KUr*n6V??Z4`RcR7?|<~`N9Wx3rug8wx=V}o!D?sSp}vkx zb5(!uDnaZYuD!Oeru?aBN2GbPPpIr)Uu-OH&DI{Ki`#rRtj^RX_Y32+0ag^68p+J3 zy+uE`@rF=IB7Uat_+_mO1xi`+1!vhk@XJ0Gpi-M$Cr4}m)YNxd3>sOC_7sn`P*pX<4ESa zb2D#FFZ9h`5Pq{bajrOXa(?D?SEYuxOlDtJh|U>9PLEEUEYF1CF}jtf~qLy@H0nyz>)ns}`& zb6}t-^T{8)Kls1n?WVSwk@I5Wf|yykt|)Wr&LZz2|E+CWVm~yNfbM?z(tz zdV5RRy5hv9;>^a4MVSXT6ct!*Y~fJ*Rgs_i6Z`#{r#BU4emaCxy)tvIJhZha^Zpk6 g_y23D3Ub@%P~yOWqQq%`CXjgIJC8ovac}y60UtcQ!Tv z7OQm=1PY28NJOyMLlhLOZLv~|R$H)80nvKFiiipxTD5vwT50Rs_h{emp#6M$Iq&(L z&!_MEN2BC2dwVXw-}m;+>fea_Pl^41C|)_Hw{%Qc=w5qkGZ{RW==E)M-SqCZF_jfO zEEeQazPfU4SK@BF&^4ict+_sZW7zTrH;TN%-(6<-qx0)RnSzlMz&ASIjt{u}oa&3Rx?6vDR3Ih*`-}+Y^c1g)o z|BA!zk$&*6IP4?AImN)Fp4xD4_4DD>lo?kxZ0vgYww>ASvAUUS4^B?hAHH|_lw^0Z zE_B_fwnD|W6@6pFTk@?9Uz&HcwJg>)sejVUF|`NA6!yHBZLNIxj%9mRuj^`++L}hS zUUxVW9=koYp={5VwvqSC9Y2}W=C*XcGbX;wnOCU|-`CZ-<-zN=G_QNOw7GxTguA83 z_LYtn{}q>TAoNf%(7G)>vN!qNx(SzWsNcA$amIr?vuoqaH^!cr+)hpgDwCEUB2ZEl@$|LUQkwn?p#H&!P0Pi-uZ z)J;ytM()}2`p9j4{ejoF%zA~+tfc{OU2DT|>0rWbN&Uy(b-yUzyCb~sdr=`NC6AXR zUl@2cyrAf~E}@#Hcsk>j>3L!zXSs%MnldhO*&>>SOVraSx2UQpDwADC#00Y_Q$4P^ zgb}LRRxp=yEy4_#zR2Y{5b&eMQUg_!skC^3pV&n8Jf?^S$o* z-Ak3pLion)RF}`4=cdOi@=S3u`~0EwWeXO&X<0e5QiG98|3@peJh9>b$4d2LrHY@e zRO^4aQrZ9ON^J`!mj~OEW0FFJARG>~$1r(UwYfqx*)XYYTC$A-@nmjrTcfBe)RLy- z5=uw2Jlo)mI+mjpDX!r$LYZYcGI30sGh9L?JC-Fo6i<{bMe(0yIu;|g%2ZQfw(O#C zW!rSPq3DXMJKVK`OLGP)RCQg~&>Yj!9V3{frlu&a$8>6$IwQd~M+{jdF2=;TYZNtI zas1J%t|q&NK`g~&R8vj=8l?(%Oh;w5$z;PoUucShrs0OE*cO^Y6LXZR241RK9Pgl! z@Y0;gT&B4O2ISeqREd~np6YPNanOuR$6$h+bE={em00d*VmfF@hbM*PH>Kn@sWEvp zn2aS$lchC!*T8}HOi>W-3Y;z%a~M!e9R8yp|J!Ttro@t4D#Vgc&lZQh^}l~#Ee`wi zXP?CIcR~z)`Wq^SKmFu4c&y|!6@&9{N{Hc0ep9?Cctv_uZJ^;N%WrL-dVBb#8H+w> zXsR9=Sd*!#9lvYZv4zcR(r;ZoZp*}HW^|j{wLM$6tIAD6HyPx%`jIdzbCxKfh^p zW7BO{-_8Q19eu26e?_D8?xsaO-#%~Cj6LI&_Bqe3-YSh>V8)KdEcV9<)mv644=%j3 z{}+Sj-pQH{TX(U_-J`x7e{@2A*%gfwrTF~d4^}O@Ec9+u&u%kt&($v!uo!6A#w2OS z55EXgT@tR{(7lM4;paTQXtBG{<_jIH>k%>emiV{p%45mm+*4ovXV+9z$KL(XLyiCO z;Rm0-_xjtP{PyZmZzO+w*QO8mZf&U&M}PI{uf6-N`>wq9;-8(mZ$S~`>`HdDC(F+a zcWh$d{ovWfwypq1+d2)S{8J#gtmJP-nGH<6yeiP#f@eG}o_gw^9c;-Js~%iFK0mZl z%3oILJkyvNXfMuu>HfW;b%%C5+02`sZ+nluEVcE|YiJF;o}HcP57c_UJhC>CURSYq z>iX8!if~!!d8;Z9-nndO^|~3Yb!|J&J+M}l*3asDG}eCaAn&WWYSJTPZ(CQrI_BJP za>Z@QmW3x<*KKL;4?Ktc!SvsYBu@@+Pfk6v06R+o8mMg>&Ez+QOf5@sG@n+8NiDRU zCtDgP3RQ85G1Z|mXNJNpn<-}yf>CCwjG9bwRc3NL%TR4sql)dATxOsPfW&oF zU6Hw|6TpjNSz?Z9rtY{@XNqUYT(>h@a@^59kH`vfm?7It4`!LH%UtHfSMPw&>i~vj>!;w|sF!O?&8DLZ8rYkGDZmE>wZLs#luytMWOjB{>;2OrM z2f;ys@fNj-9CVltG}fpAEOdRP|=ZUd9&)K1^ z8s8MVABa9`&A#_biJ4YxY2`A$uYW}`;N4P?6H?jG#zh;a z{5EmzkxiwmYVUZo^SM{$Jzu)xp}y>89l86yfAOfpG`MAMU}|%#R@e2SHfKwEO{h<_ z%)(dV2VK7J)hX`?Q_H_DrN1}tmF2mY2j*?mZixhf?|P~J&PZrk*WE8CuIyZR^tIU= zTdyBgk&drxxjdE&{EGA_;@#As{eiJ6}|{62BsB02f01ssof=b;Al{ku8Gyg$7f>sc)B!xy};vR)O2N4 z@5+>XZhS(g9IuLVQIz|F9KW?yCT7Uv^;EnzIYJcq^vbxXhIw$|m2o*1AJN|uKX>{S zsd!wdjK{?*;`0md27@aWz>{SvfFuL}kpVou*Z$suF{#%F52pPP}iN$Kx@%Bc6(%S%4ko$-!uQ za@Lt8*f8vjI)ZxEA!N9xqcfRe0IqD+w)LWEG3a*7LF1#E702>WV3tjR9*P=f30b5=a&a@*kppsg zj)Qu(RHADJ$6L8YMr6+flu}&-{6bab01`xn{PMWV9Mv%KUcRf4ZQayl4=wBl_hSq| zY3ewpX;L3Ze21Wz+_Wu6b3ASn$Hz<937c@wlDP^z^3|9MU4~W%B5JnohC7PPwNwmG z<+`VNuH^zkfV#HBxoQI9@VxLE=p(~2P2Kf8)p99Tg&Tf9D}XqU61=&%G^YZBEb4OD z8lp3g1rv?bQGp;BCsWmE2eUAF+tgev5m26G@{NoaA;36giY?1x8sG_#bh(ae4zm@5 zWV&)LHK?OdhuXjf%~CK^;0J(OHdKJ818%}hq2^QwO^Z`3f$Rhm%w{&GqwAhc0WK`l z$Ye(ac7nAC0WkYlT)D`>I6x;w2KH#d9J3hUkz(YI>*-v@lYm}8Fx6Eou%zk*_cIgH z)3xx~9<#73nO%4}S2gUv4gymZuVn^v9ju|u7~w!!kG9oNSp~g$kkqED=oUtZk}-=O z_LvY%4s?bM)2VG!tO;<^BU*4Lh*wsiw=D&fq?u|a523EO*doJHP;aUhT)N6JHBb}e zvugp>-7|ukIkt+@&_IH416JmE&NVHA`>2Zn8mc0r8WivhX5wpT1(1sAgFQgshDr%a z)wD8=IbvD1<;uWjSXYJE0Ntwe-&Jx|?C*+iV5oXj@wq}wc!e!LCEQuk^0L}Kcp~zu z8harYT`!D0oX)-U+ZiurVkPyhGf%S>4_xy`%$$0<{novO*E*LxtJ3?f9R2)?P=ypt zNud{qiIUZFxTLW8->(_`D4mK0dNb1f!*YT5VzxUv_I@<}PRkw5x#rIGXP?e5^8(YP zix2jU+*`b2`qlD=fEY@WHK{*r>K!rir=s@!@4H?QmT&swtgaQIJ6B$^`=fmSD>G`+ z`DJ%Eb!ywD3C(-Qrz0n7CY6}gB(zs3{8H%J+cQhxZNmbEw~~bq#urK#cg`)@{OSYC z?i`B7D@LB(*E9Zx*u+bQSk1%Wm1Dd zr8qX~*A1doq4iM7C_=7-Xd$!!QH~|co`%ciQ67D-Jf*@sl9c!Q7Z>5iAi90s!GF!hTkyIw9y;n<`inrukDL zs;V1=LfYA?YGrO_8Wfy~DasxNNV~yPF%!T<#7AR5UyHVdunj0CoG;h7k>b8X}5Q7+tJ^U*L|+J7A>r^VN)A3A$D*n%9Y9n zP@W-)v8;Ks1f}p+4FVbBJE?|4Ay;(@dC8!CQOPOtGmFlNW`lM)+>q^N4lx&f=*l__ zmE|jh;C`xNj?DEvhk@?ID!_cG9I_u{RAi@Z5E})hOp&_C9=48WPQzajyb}P$#f%j@ zGY2&R0FX@!*^exn_KLj|v3TfRU(W*5DF=-Ml4M=GF8;Dx$HO=;J^h$wGGz6a=1uAsqGFW z1rHqnWkW%g&^q8KSg9O?`XJZ=3zLDOFoLIi*FZS2bPgNAlyI()$pR&@BaTh6IvwC7 zSM`dz!;p6wp354JBM*|{H8mH@fsO-zQ3ImM0FuHgVWAwJ1~uA#5qgSFW%P4+O_c_y zQ8be`hai@=I(4QDYhoe0q23{?iZ-7UzM*%S<@)I^1KJtkogv7$zEBk?hQOYO9V~_h1>xzyYtK9&A^(I_004pj znBlsBCfERjdEg^tzO2Z@@By$h@bHR(m+0!e-Plzd1t)Wi9)<)m)8^PaR3K6VKOh2f zx}JODWDRx&QH!h~H9%qvDFOMQz(UXiT|j^MA#^^B14!nKe6>K?8XyGlW$4O^*-R_C zstidiBj$w&@j)7$>Vl9Fhv35q+qx8(#1#$!i!pEy<|!d@wLoe#@HJT48h4x`83B8N zG;JFeW5o_&Gx*LtBi(gVJ+KSKs6r>}8kp7q&H)C|>|h~qpQ(Z+6a{br1;Z#x+cpej zCct&i55w@V1vGRJ`pikF&?G8pnH(|=xrIdF6PUOp;h?K@*1bI8;Qe)1yb0utvcnFl zjsqJBR&jKdz^3&107gra^cNPTT3kuVc|4|%B};yYa+;hFP23#*v%FKXd+-FO~uEc zjO9!t#U+bC7(*YUW4G@Imtb9yJHl`Si2za((Q8X;Ox1ucRy{eq6a3}KvVqeCaIMQy zi836*0bn^&NQg_A|9$;PEB(YyL9s$nYF0y@0yr$DBJRSVps-q*W60-6Fe5NVgCEhOW;q3Y2gGkI01PO8A%^75m_h#3s*Lwm^}~YITjCgq#;na z4eoNvwQ|Y8!3Xm!3-gB&HPNyJ2Q7P(rcQ3KCcX zJ&7|PxL;0!IcOVXl5no*a9idnR?ci}AgD{VEDaLJT^%D3ExIAY)k0@$^LqUFm_zZv z`>Kwi1vl6E$`d7s)i1xPOdy$?LoQLpB{e#9s{>h=Ii1#t3|K){P*5bzZY$Q{pcMfm z43_12oVK0Bfh^b*M_^b1Smd_33F0B5HiuS5JXo((Pc#cS18ON?MK0!oYbdM?vSHWq z@KbXS96|@{Agl6EBN6nSnd*%oR>HG^(T*QGA)7^%1mKrx(MER~_)pL5O5i+ASCtc= zM8L3iYE}rQ5C4OJ7(<_Xa^`dbOhaAm#4jU?OReq=6e+WVj0s%kU_rYNqs@>Qz$QRu z`w8!g4*?2DygdZ6kZQ7fU<*)9bI|c9Kb;qK&#wdo^e7ZmA%}QU$-YL&6(kEjZz123 zmDJn_vos5m5T%bm3OTj=UWi+2`TS@NunaKok<4)zfB$gN(9zzGTD?3<(Livn2+cYA z)kyV(z{RjsD$WuV&$99PEpw}&id8Vyp2s4Jha?-)!7|`m5uQPpsk#L`!J(cGE9d7R z*vsHf@CAqlHIAbQh=uA%GzPJ#D<=vuN9UT2t=D8zJii7@2@MF|T%I7PLaLPCP!>~Q zHEBa0s9yA3xCDgjrmLnlM4>{#d+=*GxxlePH$Bl*2lm%drLb|~EPt^MfFS$n95GXy z%OE+?FF+;3f@q{|+rUV0(g;=z6~4nwWzT{;!=_*j3)5^nfNjA!l7t6(j;FM1d)hGz=aYW-y zIf)I|IJBnXXYQt_wbW2F09+@`qAcU%QyVIv&5`UO(gf{eDIm~^jXaIug}R@}-*E(R zi9LgZB?K1QPPKIxr?Qw3 z>?pR&w1-B3u)qiu*`nb=rvB4nRA^!0(>3sS}Hn|*EIp}Yb$=Mf5&PHl_h7CvYl#Gp`uvSp<{j)SlOfe;lSUFb^r7jqM6 zP6ZT{IxB%t2l+q&X)Ca;h!ZS?qBh7Q^+2^;uc1=bGRYoaA(4HT}zY6F;|m}DfqFakV9Lx2?vZ3wP_++=QQ zR}vNtN)&)#Bi~a=Lmu;`C|CmsW#gqraA^p-uCJ^HcRTs_k*LLeH|L<0P#o|}Xcv5@ zDsF~ShlTb}d36~PMRx?m3+@2FAl-uK?4}7sU{HX2)<$F;f2R6}OVED}!)tKRiTd#5 z?)Pc{uLzYCcTXt-9TkGfKPH7&Mxq41Qk}LLprIl@g2^TV1Dx&zvvn345aF8UxaDev zz+h>7)SF(r6MJvb%*}#a`$SAb+NPA-Rn^#A4Ld#bn;7CCt~YD}upw`^plh&qGKV`x z_6gzYKy!MqABt}kA+zD!Wnx3Q7r6mu12Q|dhosW+a}BMcE?fhkf=<^+VY&zAK$GS7UzCa(Dc_i=!5uC*3{U`1<@nMgl>h8 zB{U?B)Gn-y;pEBmfW9z4s^`L*yVxQku*&DgBvqSoGGxeRZFiLnz_t~ha?XMo2G((y2qXl+gUo4x4?Uelx(d+q5Vu%` zUUUG?VHh$eRJd%WVqqLMLH58Q-hH#k}X&>9O8IIt2{s4 zw^GQna4Li_)j!fwGP?l08ngxa07VC3is+C-XyS3)$$;^J#wp@>*b^7U0_&|CsheYh zQT|-APKTNSxu8mLfP%pxZiiR~GU$4$Ig*CV0hq)3pEx2F9kt@UPy+mi{wiM*jzJ9| z6U|GOv*%Y;aCSf!rMf1L11j9V;GZ`krP56xj}H`3ya@e4Jsf2A2!;0n^CFo?7Ov>2 zk0ZcAh$S#PmQ=^BfS8sBu0f0mVuShBomO`iZvBE*1k}T&=^$&64Wt63m4+l0={`-p z7>{W1$>l$cLyRIUk`Elu;T8^PL)Zvl1Q1(&>%Rl+K7;1d%b>nBjC{=#*$BIUkFNKnFU{%h?!NBJbLY*QzhJ?Fo}TNA3l}b0v}o~?B};Dj%F-LZdehBc z``RtH6pOum%Wl2(*5%7rto-`dSFK)s+ikb6S#!soclG!8uU$7VI5>Fs`g`uVXTye# z_kLs3ec$}I%|rL&>wyQhY}vZ?!G|8&wr%^v-+JVcN51`?@9uc?(eFL>*v{{N|MACv zuTz4o zso}|ohqWhvB%V1{`Z!qgorx#Dn)uFdE)9m}Tu__;Lw43(&-{Kw9^dfW%a`std&LKx zyQF+LojouA+57f=@T054OQl^55}ecb*cEx(8Lq(L|cX9(NN^nr5|>R z_X=l4iI9wTNEP{@QTK_QzbSzc{wWjQA>E@z)3^r-kDn6_38SJ}VN5h6U6>Eg5K3?{ z7Tzq4JRgG@mKSdbb?%Bp)1g$pA^3j0SNJ%(K^PuROTFqiCR{i!maWJ?e+XY|^!(Id zn^cw;9}0bSus+%=oExnY0@3|~5^j}*P&5=8llOw6AmI%zjOK-69UhN_XG@1Z9+wr; z7`lq9iD)RkTc~bGhhF>Rr42-Ai0;5YCK%&(2-SGOxzVM{`gWs z8#f1k9T(2WOtRsS)NA0W>F5lh7C$Nejl)yrae2(*oWfn9Tk?5v*v!z`nWkuuZ~;Cw z6W%3#?V>P~gv^wsf*DQMHRSysP}Ni(x2UZTCN~HF>K3)CkiV69dERfXGhe-abA5hw z;C7sI^v=#_t4fFMP4@}e)S90@^V{BkOK$R{lD&bx3F6CFyuafnQ5ftFlg$sN zUVM07LELez7#p3+&#H=)H2qv)YkCK7)xWa2A-Z*((9`j;aJ%>Jo}f^E&$>N<_eYPt z{W~Y}Vtb@?e>nZSvnQ=SXOAEZyxTOoxJpdk`M0+)W+ngb#;P;lvGUEQ#{aYT2Z}3$ zemh^6yskZ2cBYrLu2Q^p55KX{!{_=t{FMa@=Q{XXjMVU+o<)lnEOf;&%voSBn71H^ z$GLN(JO97DmvLan_(bvl$z6;igjic>vyjKf)rUl3PH#FW1VXqwtXBy2r^Q|gXWJJQ z(!%bv6bJ}`SQhshbxL@Om=A0YEn6i7`i0&)pja@gXjs-8>?6Wz+4Q_kmiD+R8JHRD!;m^iID z(HsoMI)ZT_-y#$W@j|{Oh?_g}v6ez}d}a`tWT(^|i?wugNm55VCJHSbNhvPIdlMb8 z0$!Atl9JS%D2QUABa!G#NU>&rPySO(o)rJGE4R$5#N1m3^p>VeYRDJHFPJ~ywZ-bH zuI_p6f+dTi)vfdkZSxl`@w{%k+nv97!jk#UB5caaK=RJOUv;nkar)-uKi`{xosHgE#E$El!?ZcIb(hzdyP1%?CcX{MK)O=bpzNEWUQ+n(klg z8`nLZEYVhxia=Pb3xzwC_=d#VT0si2Kut?5mM)WS$<)2azVS0E@9hay+gz|2@VrkPDPwkHE;N-uHd-ULW&2n?KGx&EoA zNid64vih&*AY@~I|D>vaxPsXY&wjpF;!(fKNB(D(CRCL!vgbVZ>j|Yn>C~|LUk`jd z`HP{)ht+RT>Q7xT&>y*A;EwhS%HJwW?JcW6cv1a>`e!#}Ur~NKkoq)G|9GMPS9$*u zk5vv_JVh^mSW10Isz2OZ|K522a^Dnf_>&c>-SPT+yXx1s4(ypStK#QU>Sd|^t*h#P zKC?y_vbgna;KIwkATLX#PF2(&`*QuZYyHcyzrRoGAMbAYZd-cb$;+z4N4{LY`Bkxg z!}SAiUN*bo<1zIgek|6XIwscmfc)R+9`EwzaoZ|VA4cnUemzkC-k<#!{P$S?+U1jL z{!>h?4Ad{bGf;o{jsWKP*BCUgqwSo5YdS{7UY1fvrTXLjf%>C&`A-^H)zKEYtt2&A zQvc1n1NA!x2j1+M7TXz2Z3)&N+8C(ce-EDJm!dMTxKo|7^S(g+hnsNef3~wD8&Nkx fG9chv3_oxqc>m*p)X`vlDD~twA3NQ7cjo^9$Y>5V diff --git a/data/unittest/user_data.parquet b/data/unittest/user_data.parquet index b04806ab966d78e7057cfaa693d6fdaf13936586..1e6fb636106b131c2d7a6e78523bd12e78b14cb1 100644 GIT binary patch delta 1182 zcma)5y^5tp5WOGxjWeQh-Mx35U_cg};i|jZ-PKiH4PPR-3bKK*S$9>nFJQ2X;x+IM zW&^~hE=A`LPKlny$miPU=P!V#h<0Di3W@m($R*q463c9U|j_B zv|Ew{imi13mC_(xAr<6EvGhg)L)x_8Fh<7EdnWBY7|old6%B!| za)FXu-`O+E@FC2ZQVcOQ(@D8l>>NX;3aK}kcT49?t!RNt0lJK7_bRzll%xinPF8td z79H6P3|g(^eB_$|$w(Y9U~akXtLt7yu3Xp;6pilfl~Vz&P*hP8lnm~9VM?eyRcmHx z%o~n5pb#RO_^h6N`aCFvTMteLw^2=OUh)W{nscpX#L8zUODGtvvh~J&>HNy7P*rJfmk!AeD|G&TQ_4AMifXR zO}oR8@z$MFrobYu?{dKySX(JAdf7ZDU#nB>q9yo#McG^-wPD^qRzZ)X9PRIiinV2y z)-*+Y?RQUAEn7^4mh)(L4;KX(rck?AawyoOPDVZHDA8>%fW_khZZ0r02fL#2ct#wf zh^SjfLfLO>_qvRd8bSq$^W}Ssk{3F#yW7CM$L(tS+4hU+w*SiY_F#K(N~_)adLd8N z*P4Ct&Fde&%dw8yEgu*lh*KeM@|HdZ=@yqr3A74-BKYum- yKLPpTrMF(W*rv^!kG`AEpMLw#*mdmxviaD}=Z^pSWjfxRA09t?@^Sk0<-Y;1$1o-U delta 1166 zcmaizv5Qn_ho7-(TK(yy&vI zz1jS6D&}BasE7E~eAzD%@JHHHlH`l*LP?DOX9AMjobS^C=4fph*C3B_;4j z*aQ`O7ZI$nhUt_$myodQsKz~&dH-Y`C^#?~C`3$SOc$%glwCx}A_k<@XVGZ6NYKzi zrXhUPAX;i5gao4`h&~<(Tf-n-lt#$&w>*lD-icIGpj_rBoQ8;^)xEQyXM?UK2>%;LfV+5DWX`#~df*jLPB;O-OV-jbgwGZ)dQ zySEI@bMZDUzSG~l#acMW03P?iP;Yh)$wdQbMNcO7`Er#k?%zEO)<#mM%N3wQ?$if` zUNRktYN`33%C@(lJ>~47yfThM{@8ur7u5l558EcDV5>K9%F=Hm85&lo_`7R>8k9Ypm%l~RFh55S9NNd zdvWi9fyWikEUr-sDMQ990vzSSs14EACua%WTBt)E4Pq#}-yd*wAS-&FF;3r1E9YK| zija+Qmeg?f!)hGj5M)8hJx4n{y?^J^|Ds#(Zm%JDcya&hF9aG#Vapb3M+wVjxxL(Z zu$(U2oVwhi8Y^;l|+> dEDvA)GM)eV^S`bCzwz)D+?!s$c?!Rs{R8F2GNS+h diff --git a/generator/app/gen_user_data.py b/generator/app/gen_user_data.py index ed69139..9c5fa94 100644 --- a/generator/app/gen_user_data.py +++ b/generator/app/gen_user_data.py @@ -59,10 +59,10 @@ def gen_user_data( zero_pad = (userid_date_country_code.str.len() - 11).abs().apply(lambda x: '0'*x) user_data['userid'] = userid_date_country_code + zero_pad + user_data['uid'].astype(str).str[-5:] # add hash data lists - user_data['device_hash'] = gen_obj_idhash_series(idhashes_props_dict=device_obj.device_hashes_props_dict, n_counts_series=user_data['n_devices']) - user_data['card_hash'] = gen_obj_idhash_series(idhashes_props_dict=card_obj.card_hashes_props_dict, n_counts_series=user_data['n_cards']) - user_data['ip_hash'] = gen_obj_idhash_series(idhashes_props_dict=ip_obj.ip_hashes_props_dict, n_counts_series=user_data['n_ips']) - user_data['transaction_hash'] = gen_obj_idhash_series(idhashes_props_dict=transaction_obj.transaction_hashes_props_dict, n_counts_series=user_data['n_transactions']) + user_data['device_hash'] = gen_obj_idhash_series(idhashes=device_obj.device_hashes, n_counts_series=user_data['n_devices']) + user_data['card_hash'] = gen_obj_idhash_series(idhashes=card_obj.card_hashes, n_counts_series=user_data['n_cards']) + user_data['ip_hash'] = gen_obj_idhash_series(idhashes=ip_obj.ip_hashes, n_counts_series=user_data['n_ips']) + user_data['transaction_hash'] = gen_obj_idhash_series(idhashes=transaction_obj.transaction_hashes, n_counts_series=user_data['n_transactions']) # generate application hashes per user #user_data['application_hash'] = user_data['n_applications'].apply(lambda x: list(np.random.choice(a = list(application_obj.application_hashes_props_dict.keys()), p = list(application_obj.application_hashes_props_dict.values()), replace = True, size = x))) total_application_hashes = user_data['n_applications'].sum() diff --git a/generator/objects/Application.py b/generator/objects/Application.py index 992cf5d..8d5d5cb 100644 --- a/generator/objects/Application.py +++ b/generator/objects/Application.py @@ -43,8 +43,9 @@ def __init__( self.power = cons.data_model_poisson_params["application"]["power"] self.payment_channels = cons.data_model_payment_channels self.application_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_application_hashes, lam=self.lam) - self.application_hashes_props_dict = cnt2prop_dict(self.application_hashes_cnts_dict) - self.application_hashes_payment_channel_dict = self.gen_transaction_payment_channel(list(self.application_hashes_cnts_dict.keys()), self.payment_channels) + self.application_hashes = list(self.application_hashes_cnts_dict.keys()) + self.application_hashes_props_dict = cnt2prop_dict(idhashes_cnts_dict=self.application_hashes_cnts_dict) + self.application_hashes_payment_channel_dict = self.gen_transaction_payment_channel(application_hashes=self.application_hashes, payment_channels=self.payment_channels) @beartype def gen_transaction_payment_channel( diff --git a/generator/objects/Card.py b/generator/objects/Card.py index 44de111..d31fe67 100644 --- a/generator/objects/Card.py +++ b/generator/objects/Card.py @@ -56,10 +56,11 @@ def __init__( self.power = cons.data_model_poisson_params["card"]["power"] self.prop_shared_card_hashes = cons.data_model_shared_entities_dict["card"] self.card_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_card_hashes, lam=self.lam, power=self.power) - self.card_hashes_props_dict = cnt2prop_dict(self.card_hashes_cnts_dict) - self.card_hashes_type_dict = self.gen_card_type(list(self.card_hashes_cnts_dict.keys()), self.card_types_dict) - self.card_hashes_country_code_dict = gen_country_codes_dict(self.card_hashes_cnts_dict, self.fpath_countrieseurope) - self.card_shared_idhash_map_dict = gen_shared_idhashes(self.card_hashes_cnts_dict, self.prop_shared_card_hashes) + self.card_hashes = list(self.card_hashes_cnts_dict.keys()) + self.card_hashes_props_dict = cnt2prop_dict(idhashes_cnts_dict=self.card_hashes_cnts_dict) + self.card_hashes_type_dict = self.gen_card_type(card_hashes=self.card_hashes, card_types_dict=self.card_types_dict) + self.card_hashes_country_code_dict = gen_country_codes_dict(idhashes_cnts_dict=self.card_hashes_cnts_dict, fpath_countrieseurope=self.fpath_countrieseurope) + self.card_shared_idhash_map_dict = gen_shared_idhashes(idhashes_cnts_dict=self.card_hashes_cnts_dict, prop_shared_idhashes=self.prop_shared_card_hashes) @beartype def gen_card_type( diff --git a/generator/objects/Device.py b/generator/objects/Device.py index 0b3d923..d8d54be 100644 --- a/generator/objects/Device.py +++ b/generator/objects/Device.py @@ -51,10 +51,11 @@ def __init__( self.power = cons.data_model_poisson_params["device"]["power"] self.prop_shared_device_hashes = cons.data_model_shared_entities_dict["device"] self.device_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_device_hashes, lam=self.lam, power=self.power) - self.device_hashes_props_dict = cnt2prop_dict(self.device_hashes_cnts_dict) - self.device_hashes_type_dict = self.gen_device_types(list(self.device_hashes_cnts_dict.keys()), self.fpath_smartphones) - self.device_shared_idhash_map_dict = gen_shared_idhashes(self.device_hashes_cnts_dict, self.prop_shared_device_hashes) - + self.device_hashes = list(self.device_hashes_cnts_dict.keys()) + self.device_hashes_props_dict = cnt2prop_dict(idhashes_cnts_dict=self.device_hashes_cnts_dict) + self.device_hashes_type_dict = self.gen_device_types(device_hashes=self.device_hashes, fpath_smartphones=self.fpath_smartphones) + self.device_shared_idhash_map_dict = gen_shared_idhashes(idhashes_cnts_dict=self.device_hashes_cnts_dict, prop_shared_idhashes=self.prop_shared_device_hashes) + @beartype def gen_device_types( self, diff --git a/generator/objects/Ip.py b/generator/objects/Ip.py index 4d3e73e..702281f 100644 --- a/generator/objects/Ip.py +++ b/generator/objects/Ip.py @@ -51,6 +51,7 @@ def __init__( self.power = cons.data_model_poisson_params["ip"]["power"] self.prop_shared_ip_hashes = cons.data_model_shared_entities_dict["ip"] self.ip_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_ip_hashes, lam=self.lam, power=self.power) - self.ip_hashes_props_dict = cnt2prop_dict(self.ip_hashes_cnts_dict) - self.ip_hashes_country_code_dict = gen_country_codes_dict(self.ip_hashes_cnts_dict, self.fpath_countrieseurope) - self.ip_shared_idhash_map_dict = gen_shared_idhashes(self.ip_hashes_cnts_dict, self.prop_shared_ip_hashes) + self.ip_hashes = list(self.ip_hashes_cnts_dict.keys()) + self.ip_hashes_props_dict = cnt2prop_dict(idhashes_cnts_dict=self.ip_hashes_cnts_dict) + self.ip_hashes_country_code_dict = gen_country_codes_dict(idhashes_cnts_dict=self.ip_hashes_cnts_dict, fpath_countrieseurope=self.fpath_countrieseurope) + self.ip_shared_idhash_map_dict = gen_shared_idhashes(idhashes_cnts_dict=self.ip_hashes_cnts_dict, prop_shared_idhashes=self.prop_shared_ip_hashes) \ No newline at end of file diff --git a/generator/objects/Transaction.py b/generator/objects/Transaction.py index 0398ba2..4b5f522 100644 --- a/generator/objects/Transaction.py +++ b/generator/objects/Transaction.py @@ -61,10 +61,11 @@ def __init__( self.power = cons.data_model_poisson_params["transaction"]["power"] self.transaction_status = cons.data_model_transaction_status self.transaction_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_transaction_hashes, lam=self.lam, power=self.power) - self.transaction_hashes_props_dict = cnt2prop_dict(self.transaction_hashes_cnts_dict) - self.transaction_hashes_dates_dict = gen_dates_dict(self.transaction_hashes_cnts_dict,start_date=self.start_date,end_date=self.end_date,) - self.transaction_hashes_status_dict = self.gen_transaction_status(list(self.transaction_hashes_cnts_dict.keys()), self.transaction_status) - self.transaction_hashes_amounts_dict = self.gen_transaction_amounts(list(self.transaction_hashes_cnts_dict.keys())) + self.transaction_hashes = list(self.transaction_hashes_cnts_dict.keys()) + self.transaction_hashes_props_dict = cnt2prop_dict(idhashes_cnts_dict=self.transaction_hashes_cnts_dict) + self.transaction_hashes_dates_dict = gen_dates_dict(idhashes_cnts_dict=self.transaction_hashes_cnts_dict,start_date=self.start_date,end_date=self.end_date,) + self.transaction_hashes_status_dict = self.gen_transaction_status(transaction_hashes=self.transaction_hashes, transaction_status=self.transaction_status) + self.transaction_hashes_amounts_dict = self.gen_transaction_amounts(transaction_hashes=self.transaction_hashes, loc=0, scale=2) @beartype def gen_transaction_status( @@ -104,8 +105,8 @@ def gen_transaction_status( def gen_transaction_amounts( self, transaction_hashes:List[str], - loc:float=0, - scale:float=2, + loc:Union[int, float]=0, + scale:Union[int, float]=2, ) -> Dict[str, float]: """ Generates a dictionary of random transaction hash amounts. diff --git a/generator/objects/User.py b/generator/objects/User.py index e1d024b..28311db 100644 --- a/generator/objects/User.py +++ b/generator/objects/User.py @@ -79,12 +79,13 @@ def __init__( self.lam = cons.data_model_poisson_params["user"]["lambda"] self.power = cons.data_model_poisson_params["user"]["power"] self.user_ids_cnts_dict = gen_idhash_cnt_dict(idhash_type="id", n=self.n_user_ids, lam=self.lam, power=self.power) - self.user_ids_props_dict = cnt2prop_dict(self.user_ids_cnts_dict) - self.user_ids_country_code_dict = gen_country_codes_dict(self.user_ids_cnts_dict, self.fpath_countrieseurope) - self.user_ids_firstname_dict = self.gen_user_firstname(self.fpath_firstnames) - self.user_ids_lastname_dict = self.gen_user_lastname(self.fpath_lastnames) - self.user_ids_email_domain_dict = self.gen_user_email_domain(self.fpath_domain_email) - self.user_ids_dates_dict = gen_dates_dict(self.user_ids_cnts_dict, start_date=self.start_date, end_date=self.end_date) + self.user_ids = list(self.user_ids_cnts_dict.keys()) + self.user_ids_props_dict = cnt2prop_dict(idhashes_cnts_dict=self.user_ids_cnts_dict) + self.user_ids_country_code_dict = gen_country_codes_dict(idhashes_cnts_dict=self.user_ids_cnts_dict, fpath_countrieseurope=self.fpath_countrieseurope) + self.user_ids_firstname_dict = self.gen_user_firstname(fpath_firstnames=self.fpath_firstnames) + self.user_ids_lastname_dict = self.gen_user_lastname(fpath_lastnames=self.fpath_lastnames) + self.user_ids_email_domain_dict = self.gen_user_email_domain(fpath_domain_email=self.fpath_domain_email) + self.user_ids_dates_dict = gen_dates_dict(idhashes_cnts_dict=self.user_ids_cnts_dict, start_date=self.start_date, end_date=self.end_date) @beartype def gen_user_firstname( @@ -170,17 +171,15 @@ def gen_user_email_domain( email_domain_data["proportion"] = email_domain_data["proportion"].divide(email_domain_data["proportion"].sum()) # convert email domain proportions to a dictionary email_domain_dict = email_domain_data.set_index("domain").to_dict()["proportion"] - # extract the user ids - user_ids_list = list(self.user_ids_cnts_dict.keys()) # randomly choose the email domains based on proportions user_email_domain_list = list( np.random.choice( a=list(email_domain_dict.keys()), p=list(email_domain_dict.values()), replace=True, - size=len(user_ids_list), + size=len(self.user_ids), ) ) # return the user ids email domains - user_ids_email_domain_dict = dict(zip(user_ids_list, user_email_domain_list)) + user_ids_email_domain_dict = dict(zip(self.user_ids, user_email_domain_list)) return user_ids_email_domain_dict diff --git a/generator/unittests/utilities/test_gen_obj_idhash_series.py b/generator/unittests/utilities/test_gen_obj_idhash_series.py index 90f18f1..03a1b75 100644 --- a/generator/unittests/utilities/test_gen_obj_idhash_series.py +++ b/generator/unittests/utilities/test_gen_obj_idhash_series.py @@ -37,7 +37,7 @@ device_obj = Device(n_device_hashes=random_entity_counts['n_devices'].sum(), fpath_smartphones=fpath_smartphones) # generate user data and device hashes user_data = random_entity_counts.copy() -obs_obj_idhash_series = gen_obj_idhash_series(idhashes_props_dict=device_obj.device_hashes_props_dict, n_counts_series=user_data['n_devices']) +obs_obj_idhash_series = gen_obj_idhash_series(idhashes=device_obj.device_hashes, n_counts_series=user_data['n_devices']) exp_obj_idhash_series = pd.Series([['2e23f63807f6170a'], ['b8816ed926bf9f83', 'b010fdb44fa68822'], ['ff23757073a07357'], ['3d2fd828c1fd1152']]) class Test_gen_idhash_cnt_dict(unittest.TestCase): diff --git a/generator/unittests/utilities/test_gen_shared_idhashes.py b/generator/unittests/utilities/test_gen_shared_idhashes.py index e1e24f2..15ff768 100644 --- a/generator/unittests/utilities/test_gen_shared_idhashes.py +++ b/generator/unittests/utilities/test_gen_shared_idhashes.py @@ -15,7 +15,7 @@ obs_prop_shared_idhashes=cons.data_model_shared_entities_dict["ip"] obs_hash_cnt_dict = gen_idhash_cnt_dict(idhash_type="hash", n=4, lam=1, nbytes=16) -obs_shared_idhashes = gen_shared_idhashes(idhash_cnt_dict=obs_hash_cnt_dict, prop_shared_idhashes=obs_prop_shared_idhashes) +obs_shared_idhashes = gen_shared_idhashes(idhashes_cnts_dict=obs_hash_cnt_dict, prop_shared_idhashes=obs_prop_shared_idhashes) exp_shared_idhashes = {} class Test_gen_shared_idhashes(unittest.TestCase): diff --git a/generator/utilities/cnt2prop_dict.py b/generator/utilities/cnt2prop_dict.py index b845061..937c32a 100644 --- a/generator/utilities/cnt2prop_dict.py +++ b/generator/utilities/cnt2prop_dict.py @@ -4,14 +4,14 @@ @beartype def cnt2prop_dict( - idhash_cnt_dict:Dict[Union[str, int], Union[int,np.int64]], + idhashes_cnts_dict:Dict[Union[str, int], Union[int,np.int64]], ) -> Dict[Union[str, int], float]: """ Converts a dictionary of counts to a dictionary of proportions. Parameters ---------- - idhash_cnt_dict : Dict[Union[str, int], Union[int,np.int64] + idhashes_cnts_dict : Dict[Union[str, int], Union[int,np.int64] A dictionary of key, value pairs where the value indicates a count. Returns @@ -22,15 +22,15 @@ def cnt2prop_dict( Examples -------- ``` - idhash_cnt_dict = {'7125135c8882b0f6': 2, '049dd291d9506532': 3, 'd6708d344cb6f498': 5} - prop_dict = cnt2prop_dict(idhash_cnt_dict=idhash_cnt_dict) + idhashes_cnts_dict = {'7125135c8882b0f6': 2, '049dd291d9506532': 3, 'd6708d344cb6f498': 5} + prop_dict = cnt2prop_dict(idhashes_cnts_dict=idhashes_cnts_dict) ``` """ # empty dictionary for proportions prop_dict = {} - if idhash_cnt_dict != {}: + if idhashes_cnts_dict != {}: # sum of dictionary counts - cnt_total = sum(idhash_cnt_dict.values()) + cnt_total = sum(idhashes_cnts_dict.values()) # iterate over input dictionary and convert counts to proportions - prop_dict = {idhash: cnt / cnt_total for idhash, cnt in idhash_cnt_dict.items()} + prop_dict = {idhash: cnt / cnt_total for idhash, cnt in idhashes_cnts_dict.items()} return prop_dict diff --git a/generator/utilities/commandline_interface.py b/generator/utilities/commandline_interface.py index a8de020..0b72757 100644 --- a/generator/utilities/commandline_interface.py +++ b/generator/utilities/commandline_interface.py @@ -1,31 +1,45 @@ -import argparse -from beartype import beartype import cons -@beartype -def commandline_interface() -> dict: +import argparse +from typing import Dict + +def commandline_interface() -> Dict[str, object]: """ A commandline interface for parsing input parameters with - + Windows python RandomTeleComData\\generator\\main.py --n_users 100 --random_seed 1 --n_itr 2 - + Linux python3 RandomTeleComData/generator/main.py --n_users 100 --random_seed 1 --n_itr 2 - + Parameters ---------- - + n_users : int + The number of users to generate random telecom payments data for. + use_random_seed : int + Use a set random seed for reproducible results; must be either 0 or 1. + n_itr : int + Number of iterations to run. + registration_start_date : str + The start date for registrations. + registration_end_date : str + The end date for registrations. + transaction_start_date : str + The start date for transactions. + transaction_end_date : str + The end date for transactions. + Returns ------- - dict + Dict[str, object] A dictionary of key, value pairs where the values are parsed input parameters. """ # define argument parser object parser = argparse.ArgumentParser(description="Execute Random TeleCom Data Programme.") # add input arguments parser.add_argument("--n_users", action="store", dest="n_users", type=int, default=cons.default_n_users, help="Integer, the number of users to generate random telecom payments data for",) - parser.add_argument("--use_random_seed", action="store", dest="use_random_seed", type=int, default=cons.default_use_random_seed, help="Integer, use a set random seed for reproducible results; must be either 0 or 1",) + parser.add_argument("--use_random_seed", action="store", dest="use_random_seed", type=int, default=cons.default_use_random_seed, choices=[0, 1], help="Integer, use a set random seed for reproducible results; must be either 0 or 1",) parser.add_argument("--n_itr", action="store", dest="n_itr", type=int, default=cons.default_n_itr, help="Integer, number of iterations to run",) parser.add_argument("--registration_start_date", action="store", dest="registration_start_date", type=str, default=cons.default_registration_start_date, help="String, the start date for registrations",) parser.add_argument("--registration_end_date", action="store", dest="registration_end_date", type=str, default=cons.default_registration_end_date, help="String, the end date for registrations",) diff --git a/generator/utilities/gen_country_codes_dict.py b/generator/utilities/gen_country_codes_dict.py index c36d9fa..1dd62bf 100644 --- a/generator/utilities/gen_country_codes_dict.py +++ b/generator/utilities/gen_country_codes_dict.py @@ -1,38 +1,56 @@ import cons +from utilities.cnt2prop_dict import cnt2prop_dict + +import os import numpy as np import pandas as pd -from utilities.cnt2prop_dict import cnt2prop_dict from beartype import beartype +from typing import Dict, Union @beartype def gen_country_codes_dict( - idhashes_cnts_dict:dict, - fpath_countrieseurope:str=cons.fpath_countrieseurope - ) -> dict: + idhashes_cnts_dict:Dict[str, Union[int, np.int64]], + fpath_countrieseurope:str=cons.fpath_countrieseurope, + ) -> Dict[str, Union[int, np.int64]]: """ - Generates a dictionary of random country codes for an input dictionary of idhashes counts. - + Generates a dictionary of randomLy sampled country codes for an input dictionary of idhashes counts. + Parameters ---------- - idhashes_cnts_dict : dict + idhashes_cnts_dict : Dict[str, Union[int, np.int64]] A dictionary of idhashes counts. fpath_countrieseurope : str The file path to the european countries reference file, default is cons.fpath_countrieseurope. - + Returns ------- - dict + Dict[str, Union[int, np.int64]] A dictionary of idhashes country codes. + + Examples + -------- + ``` + import cons + idhashes_cnts_dict:{'abcd1234': 5, 'defg4567': 3, 'ghij7891': 7} + gen_country_codes_dict(idhashes_cnts_dict=idhashes_cnts_dict, + fpath_countrieseurope=cons.fpath_countrieseurope, + ) + ``` """ - + # check file path exists + if os.path.exists(fpath_countrieseurope) == False: + raise FileNotFoundError(f"File not found: {fpath_countrieseurope}") # load population data of european countries european_populations_cnt_data = pd.read_csv(filepath_or_buffer=fpath_countrieseurope, usecols=["ISO numeric", "population"],) # convert to a dictionary of ISO country codes with population counts - european_populations_cnt_dict = european_populations_cnt_data.set_index("ISO numeric").to_dict()["population"] + european_populations_cnt_dict = european_populations_cnt_data.set_index("ISO numeric")["population"].to_dict() # convert dictionary of population counts to dictionary of population proportions european_populations_props_dict = cnt2prop_dict(european_populations_cnt_dict) # extract out idhashes from idhashes counts dictionary idhashes_list = list(idhashes_cnts_dict.keys()) + # check population proportions sum to 1.0 + if np.isclose(sum(european_populations_props_dict.values()), 1.0) == False: + raise ValueError("Population proportions do not sum to 1.0") # randomly generate country codes for all idhashes based on population proportions country_codes_list = list( np.random.choice( diff --git a/generator/utilities/gen_country_codes_map.py b/generator/utilities/gen_country_codes_map.py index d6254ff..ef20723 100644 --- a/generator/utilities/gen_country_codes_map.py +++ b/generator/utilities/gen_country_codes_map.py @@ -1,26 +1,36 @@ import cons + +import numpy as np import pandas as pd from beartype import beartype +from typing import Dict, Union @beartype def gen_country_codes_map( - fpath_countrieseurope:str=cons.fpath_countrieseurope - ) -> dict: + fpath_countrieseurope:str=cons.fpath_countrieseurope, + ) -> Dict[int, Union[str, np.int64]]: """ Generates a dictionary of ISO numeric codes mapping to ISO alpha codes. - + Parameters ---------- fpath_countrieseurope : str The full file path to the european countries reference file, default is cons.fpath_countrieseurope. - + Returns ------- - dict + Dict[int, Union[str, np.int64]] A dictionary of ISO numeric codes mapping to ISO alpha codes. + + Examples + -------- + ``` + import cons + gen_country_codes_map(fpath_countrieseurope=cons.fpath_countrieseurope) + ``` """ # load european county codes data country_codes_data = pd.read_csv(filepath_or_buffer=fpath_countrieseurope, usecols=["ISO numeric", "ISO alpha 2"],) # convert data to a dictionary of ISO numeric codes mapping to ISO alpha codes - country_codes_map = country_codes_data.set_index("ISO numeric").to_dict()["ISO alpha 2"] + country_codes_map = country_codes_data.set_index("ISO numeric")["ISO alpha 2"].to_dict() return country_codes_map diff --git a/generator/utilities/gen_dates_dict.py b/generator/utilities/gen_dates_dict.py index 05f29f9..2c3737a 100644 --- a/generator/utilities/gen_dates_dict.py +++ b/generator/utilities/gen_dates_dict.py @@ -2,19 +2,20 @@ import numpy as np from datetime import datetime from beartype import beartype +from typing import Dict, Union @beartype def gen_dates_dict( - idhashes_cnts_dict:dict, + idhashes_cnts_dict:Dict[str, Union[str, int, np.int64]], start_date:str, - end_date:str - ) -> dict: + end_date:str, + ) -> Dict[str, Union[pd.Timestamp, np.datetime64]]: """ Generates a dictionary of random dates for an input dictionary of idhashes counts. Parameters ---------- - idhashes_cnts_dict : dict + idhashes_cnts_dict : Dict[str, Union[str, np.int64]] A dictionary of idhashes counts. start_date : str The start date ("%Y-%m-%d") to generate random dates from. @@ -23,11 +24,11 @@ def gen_dates_dict( Returns ------- - dict + Dict[str, Union[pd.Timestamp,int, np.datetime64]] A dictionary of idhashes dates. """ # generate a range of dates between the given input start and end dates - dates = pd.date_range(start=datetime.strptime(start_date, "%Y-%m-%d"), end=datetime.strptime(end_date, "%Y-%m-%d") - pd.Timedelta(days=1), freq="d",) + dates = pd.date_range(start=datetime.strptime(start_date, "%Y-%m-%d"), end=datetime.strptime(end_date, "%Y-%m-%d"), freq="d", inclusive="both",) # extract out the idhashes from idhashes counts dictionary idhashes_list = list(idhashes_cnts_dict.keys()) # randomly sample dates for each of the idhashes diff --git a/generator/utilities/gen_idhash_cnt_dict.py b/generator/utilities/gen_idhash_cnt_dict.py index 58e5bda..381897b 100644 --- a/generator/utilities/gen_idhash_cnt_dict.py +++ b/generator/utilities/gen_idhash_cnt_dict.py @@ -1,9 +1,10 @@ -import numpy as np from utilities.gen_random_hash import gen_random_hash from utilities.gen_random_id import gen_random_id from utilities.gen_random_poisson_power import gen_random_poisson_power + +import numpy as np from beartype import beartype -from typing import Union +from typing import Union, Dict @beartype def gen_idhash_cnt_dict( @@ -11,15 +12,15 @@ def gen_idhash_cnt_dict( n:Union[int,np.int64], lam:Union[int,float], nbytes:int=16, - power:int=2 - ) -> dict: + power:int=2, + ) -> Dict[str, Union[str, int, np.int64]]: """ Generates a dictionary of n random idhashes and associated counts. - + Parameters ---------- idhash_type : str - Whether to generate a "id2 or "hash" value. + Whether to generate a "id" or "hash" value. n : int The total number of idhash values to generate. lam : float @@ -28,11 +29,24 @@ def gen_idhash_cnt_dict( The number bytes to include in the idhash value, default is 16. power : int The power of the polynomial random poisson variable, default is 2. - + Returns ------- - dict + Dict[str, Union[str, int, np.int64]] A dictionary of idhashes counts. + + Examples + -------- + ``` + import cons + gen_idhash_cnt_dict( + idhash_type="hash", + n=10, + lam=5.0, + nbytes=16, + power=2, + ) + ``` """ # if generating a random hash value if idhash_type == "hash": @@ -40,8 +54,12 @@ def gen_idhash_cnt_dict( # else if generating a random id value elif idhash_type == "id": idhash_list = gen_random_id(size=n, nbytes=nbytes) + else: + raise ValueError("idhash_type must be either 'id' or 'hash'") # randomly sample n counts from a squared poisson distribution with given lam value - cnts_list = list(gen_random_poisson_power(lam=lam, size=n, power=power)) + cnts_list = gen_random_poisson_power(lam=lam, size=n, power=power).tolist() # return a dictionary of idhashes and counts + if len(idhash_list) != len(set(idhash_list)): + raise ValueError("Generated idhash values are not unique, please increase nbytes value") idhash_dict = dict(zip(idhash_list, cnts_list)) return idhash_dict diff --git a/generator/utilities/gen_obj_idhash_series.py b/generator/utilities/gen_obj_idhash_series.py index cb15463..c90401f 100644 --- a/generator/utilities/gen_obj_idhash_series.py +++ b/generator/utilities/gen_obj_idhash_series.py @@ -1,28 +1,29 @@ import pandas as pd from beartype import beartype +from typing import List @beartype def gen_obj_idhash_series( - idhashes_props_dict:dict, + idhashes:List[str], n_counts_series:pd.Series ) -> pd.Series: """ - Generates a series of entity idhash lists using the entity counts per user Series and idhashes proportions dictionary. - + Generates a series of entity idhash lists using the entity counts per user Series and idhashes list. + Parameters ---------- - idhashes_props_dict : dict - The idhash proportions dictionary. + idhashes : List[str] + The idhashes list. n_counts_series : pd.Series The entity counts for each uid as Series. - + Returns ------- pd.Series A Series of lists containing entity idhashes for each user. """ # create an exploded series for idhashes within the entity object - obj_idhash_series = pd.Series(data=idhashes_props_dict.keys(), index=n_counts_series.apply(lambda x: range(x)).explode().index) + obj_idhash_series = pd.Series(data=idhashes, index=n_counts_series.index.repeat(n_counts_series.values).to_list()) # group by uid index and collate idhashes as lists - obj_idhash_agg = obj_idhash_series.groupby(level=0).apply(lambda series: series.to_list()) + obj_idhash_agg = obj_idhash_series.groupby(level=0).apply(list) return obj_idhash_agg \ No newline at end of file diff --git a/generator/utilities/gen_shared_idhashes.py b/generator/utilities/gen_shared_idhashes.py index aa72f7b..a1b880c 100644 --- a/generator/utilities/gen_shared_idhashes.py +++ b/generator/utilities/gen_shared_idhashes.py @@ -4,7 +4,7 @@ @beartype def gen_shared_idhashes( - idhash_cnt_dict:dict, + idhashes_cnts_dict:dict, prop_shared_idhashes:float ) -> dict: """ @@ -23,11 +23,11 @@ def gen_shared_idhashes( A dictionary of shared idhashes proportion. """ # calculate the total number of idhashes - n_idhashes = len(idhash_cnt_dict) + n_idhashes = len(idhashes_cnts_dict) # randomly sample the idhashes based on the total proportion of shared idhashes shared_idhashes_list = list( np.random.choice( - a=list(idhash_cnt_dict.keys()), + a=list(idhashes_cnts_dict.keys()), size=int(np.round(n_idhashes * prop_shared_idhashes)), replace=False ) From 3fec8eb0fcff1212d9985dfa39520512caa43a1f Mon Sep 17 00:00:00 2001 From: Oisin Date: Fri, 16 Jan 2026 11:08:04 +0000 Subject: [PATCH 13/16] Reviewed logic using copilot --- data/unittest/transaction_data.parquet | Bin 30601 -> 30598 bytes generator/cons.py | 5 ++- generator/objects/Card.py | 2 +- generator/objects/Ip.py | 2 +- generator/objects/Transaction.py | 2 +- generator/objects/User.py | 4 +-- .../utilities/test_gen_country_codes_dict.py | 6 ++-- .../utilities/test_gen_country_codes_map.py | 2 +- .../utilities/test_gen_dates_dict.py | 6 ++-- generator/utilities/align_country_codes.py | 7 ++++ generator/utilities/gen_country_codes_dict.py | 16 ++++----- generator/utilities/gen_dates_dict.py | 23 ++++++++----- generator/utilities/gen_obj_idhash_series.py | 8 +++++ .../utilities/gen_random_entity_counts.py | 32 +++++++++++------- generator/utilities/gen_random_hash.py | 18 ++++++---- generator/utilities/gen_random_id.py | 21 ++++++++---- .../utilities/gen_random_poisson_power.py | 14 +++++--- 17 files changed, 107 insertions(+), 61 deletions(-) diff --git a/data/unittest/transaction_data.parquet b/data/unittest/transaction_data.parquet index e11b43d5d10553d774e54590af5114170ab3bae6..37b73a4d6825b187c0dcc11698ecc88d29a8e90c 100644 GIT binary patch delta 724 zcmXBOYe*Au008j&&%3!TvAgZ=)KZ)IcPKv5Wnu+)db7>uV|w;L%$b`Rim8R@ht!#h zE*05{nUaJ~abhwzdr%T$;UmICslX2z1>%>|h>$F*WnX@uf9>0#bsIRp;XrGJi(i|+R!#yIrd=-g_4nHrnYHIN4zdm}^Lwg^#=P5BelmG;wP^!=Uoj8jI?uYh&Iaxrg&en?%1kv&T)C<1i zmU{dCMbCtWg%(lhKm45{IymcmSN0lm9 z*7Z3y1|aB#ODk|qo@7v%;`-4abp2~IX?fFI*NtAe3A2)mbzN$(rp*2bQ#OgB!Sc>e z@XoC$iR}m~KyfMrMOcf>Qb(vPO5XUqv+f5>P8bH>WBw@0h$i%XJKh3J{LM z2%fxswg&>mzzzrloo0=(ee%v z)gy|c)-ejF#FRKN(J4IAs2f_rPD`w3Sx~rmXyNyQwC?HdWn!{!Ie*Xbt0efwgMhxz zw%djEqi^R&zs&6_eCtarlQoVbUdI``Y1pO5XO-t?n;Redgy5x%bcO z4CQ>0W9dfddw42DrZ%x$hTK`igs1z9B}3Eg2a;lj+l>A)F)}MQrlN5&(p{65FGHOlYi delta 743 zcmXBOUq};i0KoCz@AkVnP3ms#PHS45oyR1tWusyh-Oal-bLehunMvTXnbulnMgJ)0 zDqF+Kb}@&Rltm~}m4Fj}qE@|ppS};D);V}-4)%W`=&cs0 zM1q)#)tQRTor<_!P9qOBtIpdCXWp*>!Q6*Jrta{9JW!5q<%SGK$FF(kvmXMsVfEg=W|v-rOD>T0036!-`eGc z%2my_>ur>}QEFjSlS0?+g!Sg??dp4BnOPLfH+LJ#6BiEOX*v*@;nlj|GBegXOe9&0 zT6a3uH{Hd+D0As0GpRO6i}xBk(Rcdn3uJhHFRip4HJoEA%(bOgOz^eN@qw{?B0b%{ z%RPm1xKK6lb2?(ySLToZFqLIQm~mORE7;>)u9MQ8$2lXAPj?yF0B6)EKu4zZ1# zK#`?qQ;El4>-G7(dzYlsJB!Wyv7_}~e?zV3 zXu~1D)zjcR*5swrbLHb=k-8nDVpt(Hg;NBjq?9@99?M&#;${FsFnMuJioUV3cPaj4 zYr&bJ`C4h@5NK9uKe8w@yE zo4%#-9Dpr%4Z8x2dW>`uwip6AE$xa8emiFvHTfNA|9RGs-4C?uE_kra5`wKVHDZqr zF3eO~62?sMv?dI!ArL=et Dict[str, Union[int, np.int64]]: """ - Generates a dictionary of randomLy sampled country codes for an input dictionary of idhashes counts. + Generates a dictionary of randomLy sampled country codes for an input list of idhashes. Parameters ---------- - idhashes_cnts_dict : Dict[str, Union[int, np.int64]] - A dictionary of idhashes counts. + idhashes : List[str] + A list of idhashes. fpath_countrieseurope : str The file path to the european countries reference file, default is cons.fpath_countrieseurope. @@ -46,8 +46,6 @@ def gen_country_codes_dict( european_populations_cnt_dict = european_populations_cnt_data.set_index("ISO numeric")["population"].to_dict() # convert dictionary of population counts to dictionary of population proportions european_populations_props_dict = cnt2prop_dict(european_populations_cnt_dict) - # extract out idhashes from idhashes counts dictionary - idhashes_list = list(idhashes_cnts_dict.keys()) # check population proportions sum to 1.0 if np.isclose(sum(european_populations_props_dict.values()), 1.0) == False: raise ValueError("Population proportions do not sum to 1.0") @@ -57,9 +55,9 @@ def gen_country_codes_dict( a=list(european_populations_props_dict.keys()), p=list(european_populations_props_dict.values()), replace=True, - size=len(idhashes_list), + size=len(idhashes), ) ) # return a dictionary of idhashes and country codes - idhashes_country_codes = dict(zip(idhashes_list, country_codes_list)) + idhashes_country_codes = dict(zip(idhashes, country_codes_list)) return idhashes_country_codes diff --git a/generator/utilities/gen_dates_dict.py b/generator/utilities/gen_dates_dict.py index 2c3737a..b02bca0 100644 --- a/generator/utilities/gen_dates_dict.py +++ b/generator/utilities/gen_dates_dict.py @@ -2,21 +2,21 @@ import numpy as np from datetime import datetime from beartype import beartype -from typing import Dict, Union +from typing import Dict, Union, List @beartype def gen_dates_dict( - idhashes_cnts_dict:Dict[str, Union[str, int, np.int64]], + idhashes:List[str], start_date:str, end_date:str, ) -> Dict[str, Union[pd.Timestamp, np.datetime64]]: """ - Generates a dictionary of random dates for an input dictionary of idhashes counts. + Generates a dictionary of random dates for an input list of idhashes. Parameters ---------- - idhashes_cnts_dict : Dict[str, Union[str, np.int64]] - A dictionary of idhashes counts. + idhashes : List[str] + A list of idhashes. start_date : str The start date ("%Y-%m-%d") to generate random dates from. end_date : str @@ -26,13 +26,18 @@ def gen_dates_dict( ------- Dict[str, Union[pd.Timestamp,int, np.datetime64]] A dictionary of idhashes dates. + + Examples + -------- + ``` + idhashes = ['2e23f63807f6170a', 'b8816ed926bf9f83', 'b010fdb44fa68822'] + gen_dates_dict(idhashes=idhashes, start_date='2020-01-01', end_date='2023-01-01') + ``` """ # generate a range of dates between the given input start and end dates dates = pd.date_range(start=datetime.strptime(start_date, "%Y-%m-%d"), end=datetime.strptime(end_date, "%Y-%m-%d"), freq="d", inclusive="both",) - # extract out the idhashes from idhashes counts dictionary - idhashes_list = list(idhashes_cnts_dict.keys()) # randomly sample dates for each of the idhashes - dates_list = list(np.random.choice(a=dates, replace=True, size=len(idhashes_list))) + dates_list = list(np.random.choice(a=dates, replace=True, size=len(idhashes))) # return a dictionary of idhashes and dates - idhashes_dates_dict = dict(zip(idhashes_list, dates_list)) + idhashes_dates_dict = dict(zip(idhashes, dates_list)) return idhashes_dates_dict diff --git a/generator/utilities/gen_obj_idhash_series.py b/generator/utilities/gen_obj_idhash_series.py index c90401f..cbefbab 100644 --- a/generator/utilities/gen_obj_idhash_series.py +++ b/generator/utilities/gen_obj_idhash_series.py @@ -21,6 +21,14 @@ def gen_obj_idhash_series( ------- pd.Series A Series of lists containing entity idhashes for each user. + + Examples + -------- + ``` + idhashes = ['2e23f63807f6170a', 'b8816ed926bf9f83', 'b010fdb44fa68822'] + n_counts_series = pd.Series(data=[2, 1, 2], index=range(3), name='n_entities') + gen_obj_idhash_series(idhashes=idhashes, n_counts_series=n_counts_series) + ``` """ # create an exploded series for idhashes within the entity object obj_idhash_series = pd.Series(data=idhashes, index=n_counts_series.index.repeat(n_counts_series.values).to_list()) diff --git a/generator/utilities/gen_random_entity_counts.py b/generator/utilities/gen_random_entity_counts.py index 4490b0e..7b18b49 100644 --- a/generator/utilities/gen_random_entity_counts.py +++ b/generator/utilities/gen_random_entity_counts.py @@ -1,40 +1,46 @@ -import numpy as np -import pandas as pd import cons from objects.User import User from utilities.gen_random_poisson_power import gen_random_poisson_power + +import numpy as np +import pandas as pd from beartype import beartype @beartype def gen_random_entity_counts( user_obj:User, - transaction_timescale:float=1.0 + transaction_timescale:float=1.0, ) -> pd.DataFrame: """ Generates a dataframe of entity counts for all users from a given user object. - + Parameters ---------- user_obj : User The User class object. transaction_timescale : float The transaction timescale where 1.0 is a single year of transactions, default is 1.0. - + Returns ------- pd.DataFrame A dataframe of entity counts for all users from the specified user object. + + Examples + -------- + ``` + from objects.User import User + user_obj=User(n_user_ids=1000, start_date='2020-01-01', end_date='2023-01-01') + gen_random_entity_counts(user_obj=user_obj, transaction_timescale=1.0) + ``` """ # create an empty pandas dataframe to hold the random aggregated data random_entity_counts = pd.DataFrame() # randomly sample from the random user uids - random_entity_counts['uid'] = np.random.choice(a = list(user_obj.user_ids_props_dict.keys()), size = user_obj.n_user_ids, replace = False) + random_entity_counts["uid"] = np.random.choice(a=user_obj.user_ids, size=user_obj.n_user_ids, replace=False) # randomly simulate the number of entities per user - random_entity_counts['n_devices'] = gen_random_poisson_power(lam = cons.data_model_poisson_params["device"]["lambda"], size = user_obj.n_user_ids, power = cons.data_model_poisson_params["device"]["power"]) - random_entity_counts['n_cards'] = gen_random_poisson_power(lam = cons.data_model_poisson_params["card"]["lambda"], size = user_obj.n_user_ids, power = cons.data_model_poisson_params["card"]["power"]) - random_entity_counts['n_ips'] = gen_random_poisson_power(lam = cons.data_model_poisson_params["ip"]["lambda"], size = user_obj.n_user_ids, power = cons.data_model_poisson_params["ip"]["power"]) - random_entity_counts['n_transactions'] = gen_random_poisson_power(lam = cons.data_model_poisson_params["transaction"]["lambda"], size = user_obj.n_user_ids, power = cons.data_model_poisson_params["transaction"]["power"]) - random_entity_counts['n_applications'] = gen_random_poisson_power(lam = cons.data_model_poisson_params["application"]["lambda"], size = user_obj.n_user_ids, power = cons.data_model_poisson_params["application"]["power"]) - # scale n transactions by - random_entity_counts['n_transactions'] = (random_entity_counts['n_transactions'] * transaction_timescale).round().astype(int) + for object_type in cons.object_types: + random_entity_counts[f"n_{object_type}s"] = gen_random_poisson_power(lam = cons.data_model_poisson_params[object_type]["lambda"], size = user_obj.n_user_ids, power = cons.data_model_poisson_params[object_type]["power"]) + # scale n transactions by + random_entity_counts["n_transactions"] = (random_entity_counts["n_transactions"] * transaction_timescale).astype(int) return random_entity_counts diff --git a/generator/utilities/gen_random_hash.py b/generator/utilities/gen_random_hash.py index e7c6f98..2cec880 100644 --- a/generator/utilities/gen_random_hash.py +++ b/generator/utilities/gen_random_hash.py @@ -1,30 +1,36 @@ import string import numpy as np from beartype import beartype -from typing import Union +from typing import Union, List @beartype def gen_random_hash( size:Union[int,np.int64], - nbytes:int=16 - ) -> list: + nbytes:int=16, + ) -> List[str]: """ Generates a list of random hashes. - + Parameters ---------- size : int The total number of hashes to generate. nbytes : int The number of alphanumeric values in each hash, default is 16. - + Returns ------- list A list of random hashes. + + Examples + -------- + ``` + gen_random_hash(size=5, nbytes=16) + ``` """ # generate a list of digits and lower case letters from string library alphanumeric = list(string.digits) + list(string.ascii_lowercase)[:6] # randomly sample nbytes digits, string concatenate and convert to integers - random_hashes = ["".join(np.random.choice(a=alphanumeric, size=nbytes, replace=True)) for i in range(size)] + random_hashes = [''.join(row) for row in np.random.choice(a=alphanumeric, size=(size, nbytes), replace=True).tolist()] return random_hashes diff --git a/generator/utilities/gen_random_id.py b/generator/utilities/gen_random_id.py index a6e8c8f..43d1f5a 100644 --- a/generator/utilities/gen_random_id.py +++ b/generator/utilities/gen_random_id.py @@ -1,29 +1,36 @@ import string import numpy as np from beartype import beartype +from typing import Union, List @beartype def gen_random_id( - size:int, - nbytes:int=16 - ) -> list: + size:Union[int,np.int64], + nbytes:int=16, + ) -> List[str]: """ Generates a list of random ids. - + Parameters ---------- size : int The total number of ids to generate. nbytes : int The number of numeric values in each id, default is 16. - + Returns ------- list A list of random ids. + + Examples + -------- + ``` + gen_random_id(size=5, nbytes=16) + ``` """ # generate a list of digits from string library digits = list(string.digits) - # randomly sample nbytes digits, string concatenate and convert to integers - random_ids = ["".join(np.random.choice(a=digits, size=nbytes, replace=True))for i in range(size)] + # randomly sample nbytes digits, string concatenate + random_ids = ["".join(row) for row in np.random.choice(a=digits, size=(size, nbytes), replace=True).tolist()] return random_ids diff --git a/generator/utilities/gen_random_poisson_power.py b/generator/utilities/gen_random_poisson_power.py index e3d64ca..e915046 100644 --- a/generator/utilities/gen_random_poisson_power.py +++ b/generator/utilities/gen_random_poisson_power.py @@ -10,20 +10,26 @@ def gen_random_poisson_power( ) -> np.ndarray: """ Generates data from a polynomial random poisson variable to a given power. - + Parameters ---------- - lam : int + lam : int,float The lambda of the underlying poisson random variable. size : int The number of values to generate. power : int The power of the polynomial sum. - + Returns ------- numpy.ndarray - The random squared poisson values. + The random sum of powered poisson values. + + Examples + -------- + ``` + gen_random_poisson_power(lam=3.0, size=10, power=2) + ``` """ # randomly generate a square poisson distribution a = np.array([np.random.poisson(lam, size) ** p for p in range(1, power+1)]).sum(axis = 0) + 1 From d41275f0bf82b580c97c675e7e73fda503019273 Mon Sep 17 00:00:00 2001 From: Oisin Date: Sat, 17 Jan 2026 13:43:11 +0000 Subject: [PATCH 14/16] Updated code with copilot review --- data/unittest/transaction_data.parquet | Bin 30598 -> 30584 bytes data/unittest/user_data.parquet | Bin 18997 -> 18980 bytes generator/app/gen_random_telecom_data.py | 4 +- generator/app/gen_trans_data.py | 4 +- generator/cons.py | 4 +- generator/objects/Card.py | 12 ++--- generator/objects/Device.py | 2 +- generator/objects/Ip.py | 12 ++--- generator/objects/User.py | 26 +++++------ .../unittests/app/test_gen_user_trans_data.py | 12 ++--- generator/unittests/objects/test_Card.py | 4 +- generator/unittests/objects/test_Ip.py | 4 +- generator/unittests/objects/test_User.py | 6 +-- .../utilities/test_gen_country_codes_dict.py | 4 +- .../utilities/test_gen_country_codes_map.py | 4 +- .../utilities/test_gen_obj_idhash_series.py | 6 +-- .../test_gen_random_entity_counts.py | 6 +-- .../utilities/test_gen_shared_idhashes.py | 4 +- generator/utilities/gen_country_codes_dict.py | 14 +++--- generator/utilities/gen_country_codes_map.py | 10 ++-- generator/utilities/gen_shared_idhashes.py | 44 ++++++++++-------- .../utilities/gen_trans_rejection_rates.py | 41 ++++++++-------- generator/utilities/gen_user_names_file.py | 2 +- 23 files changed, 116 insertions(+), 109 deletions(-) diff --git a/data/unittest/transaction_data.parquet b/data/unittest/transaction_data.parquet index 37b73a4d6825b187c0dcc11698ecc88d29a8e90c..0da3bed32bc74373ffbfeb5b45757c295f2ee073 100644 GIT binary patch delta 14234 zcmdUW4Rlo1z3(}bnJ|+~hDkCr3^CeC$ixYacs};|;*Mnk8Au>#fMBDwE&H6k4@3b& z2#T$}lOR|?q=5lM)Y}kHfvXljscnQ|MV>lRZ8Xjd6hA_ zl-?tcX^k&;XudVkXWZhuHE6s2x3=W`x0D9vZEP9kt82UVHpz8keO0UbmsW{viP3s_ zY>ZNu@-J8-verB2_xnrY(^{EtiFj-8{KjZsVm@i3qw2T#K9!s=N`2$Tlr(hBn@w+R zy=7T{Z)+&ge@~7&QyaEar}Opwm$StD{_4KETN6uqr_Gzcq<4&8i2LJ*#P~OhUlae~ zlUb*yx37mTE)@jfmY!RBc%UrSzu@LYbZKBJ>0v$8`+>8dhxSqLu8YmS`oH7benX6( zE|!-}R&x&RTiVYr-rZupi1#lL%W8XXzLhUr7!pLijrH}+U&g->#Ld%XeK+^@EbhVW zPyGLV>)QX-i5GA7Uz51W{B%P)AqYQP8+|5$-*?^d@L)v(H&tDH*Ps16y|LE4BGv?YX78FMNz*r$%}TZ0xT@{$ZFRd-r+g!&TS?QD z*oZwo9gnxHd8t*|_}I*@D`VsGvtQr!QhjB9c6CeHo~c7sUSAEB#6K#2uXLj>TdJX( z29b$jd2k|TvXW$)s;TIjVP3=(F_y(UB*Rc7Ls9++EXCcU{+^NLs2Cq9-WSYX7F|AJ zG4B`il{IaPBIu)Adls{${e7G+@?I~FE=?3GLyd*Q(d)*Tu= zE&ffVv8JwOIhAe?^_&>lymHs{=Lcui9rMi&Y@Mi;Di@kr?A0^?jFnx?qANo~t@aaIyxCX*D$QJEwe zu8fzsE#Uw)qFJ`%Iu6H6#MKD3iK-~7DRWa2W2Q|^)ukGjOo^zD9q7#3+|Vsc#`URe zOPY=1Ff`k+Q7g7;;TNB5UqsvEWq&M%;Xfsr@zWX}e&!;{JbaO4wtFOV2}ou}YfYc= z1(3|hjUSUtS^4nCB=gPNq#GalDlZGPObtk8`;O@i+p5zuKr%aP{u;^bDT)8K_-y$!-}x!U((w3}a#X=Ji+Sh5 zWlK}C)HND^ZC~sHT9;QgiJh6*oGx6%Q&SZAI&pMhac!5pJVmA@gvqiHxZbZ#6K50` zhv|PI)nDw#J^y__#$rF(|6)Hv|G6LA{vYhe#=o{7?f=L9*cFJcEv~DmFJxVr8r0DU zDgxD`>0Wsmu4cQAK~2JJrVzY@A4$ZtEhbCMa;WK59IDW^Wk&Em*;W+qT^6%V$x%(! zQcOoi5t@W3lvzwQ9m|vrBfzqj$xV$LjB?kJ3YIFNA-Nq3H?}W2;@2QN|bw z-P9F@8&MoKQ4L&QwKaCy1k(Km4dlEZL`sqxSs!{HR#+Y(gCU<}<(h{)dI% z#qgHTC&W=7J^K;<_>012Vqi{cb&IdbT{&(@Xdf#cGVjusPOWbU*B>7HVt8Dn?ydXh zM=D3|es#Ur=&w2`?YXyj&>S2Zb0TzSa>Y1s!PJVZ+uHNf+P*jIrd>MuO$`tAwWT55_GH_dkx-e({h6};nKjXt6(^^z^@#tv)iv{8-q~C>OWs^Renr)| z4eRR**5*YsT=BdhjtF%=x-7Qk2SW2~N$2-hmvWm_( zU5`{;X3E4?R13U;P7GwSj?0{aVkw3p16v$P^CX8wYyg2Pqvs@rsNN#DDjANhE3RcJ zOtx_?^sY)RE-SijtAvEI1b=X4-7zFIub~Hynuh9djbITLl}*bGtP4wwI$|U#6B86D z1&(GZ)7^qA$%@2GLv~CUDbqC+gA++7nhVt61X2* zVNK0h+=E7l#&ro;V@Zyf0eTVBQt^Hh>o={yaMpF0t^mFmZd-C3hBs-XKuxYuL&jbL ztpZtKDK|m9hH6ubI0Uay;(~bzpw=diB&WNGYy%yk$taZ#n>P)4s|MmKT$VLkl3BXh zvk2P9W~U__6{O zVKQRl`ZhN$W^!E(VATL^$#pE7$+}|6p*P?#87I1los#i2`R**W6j^3aiLRnK26RSe zR?myVLK%bp@Azas5lG9iE@-w+=;vGk z=bfsr-O^vX_3^PsTKq@O=Z>h_mq}pm7JqI1*aOA>H|P83RZliNPH^k_TeBH|JE29`mWUPl_Ao_`A2K8b}l$l zJ-PgYPTvmy*`*7AnCd859e8<5zE!+x!`S327dDTcdS}hy)Q)NL;j?vh&)8@vPY}QiIcTsh2rTzLv?%CAK|T| z%Y(Ph4hY!^AA68!Gd##5c#u2(#Dm;hIm7cHHFG}pAbT!tudM1W9lX)=AQpZ4FFeQ+ z`{)LEkg9TckiLr^JP%UUJndr- zvN{s~ZMY+T{Y6|}IqD)BDH=mfn2zI8BB4M7F_;n!(v=cw+2~CLub?z!;E|zf1_4h4 zEM%R@re@n{Cc{C0;U&YSCUZ2)(t&R>^CW^^;LsnkMNCwiC3AEcgoS~2qAEVgTs5?u zFxQlEZOLRBKqe5&qD8R=Zt4{M21S9Qc63eCP|sL|;vpoitq}atRb53=Bw1Dh%d_Z3 z$O6l98IV=gT{M2drrgn(;yNyIx$8otm>SlrIVxJll~vd;)H^{B1D_~ovW1Q{fLx{u zR5cwFKnoS_ZIKDZuj(?>xNN1Hi789C1y!-BiZ;cCDFJd(rpTJ3!^Q^ zrLIL??!mvRh|i{MRadF5Dh7a0v2DZkpc0s3a1XDtreR=p03RTy1kA_nTG$H;;e{`w zsw<_Nz5Ui*6Bp(VS9C}9K4+;+rd;Q)#}cw57GeSyKc9i0K$Dvm)jeHE!49|=O9mv$ zhDSiqavVT!>6|$_b!a-5b>KiWu#f5hTvT>(pCB!{qiVA3YNis(W-W=!43GpAfn9>tN-o4cOLn&2|FkO=b|sQB24`W5GQV26PITV91!r|(Hx>mj{F%< z^g@8j-~k4mi(ThwKfd24IBMJU@SgC%x|GJCY&qq;&L?P17qT)qR#h2Px2jmcPt+h| zgjg2*9TYIdnH9euOBVk8LUJ4r zq{>s_>91sml-8G9EAEuKrj%3+4R@~`PMuoPRNOo7yF)#v0;x+bPro6Eqr37y>b$DB zWcgQ>T&k}-Azn54!9sX-Jy~0&m#<6}M>22uqou;!alJ%%Vkop-m{GjqLI0%>`NHG+ zKkNT`=HQwwv$f1!TYQNXO{a3n&GY!?ec`u!!NkDlKD*iXGEYYL+)%97rY!A$fn4XW zCwra{=bq|{lL`Bu?utIN?v=e&r{{_*2F&vFx0US__wUU-H!`mC$Afn3?n z@wvHIA#{RRTHP0a$La92peGwwTK@@8gvzKe9Hy%2-19?VOh<7z+CXs%l45&m31wh1 z)UFFUQ*exUNwt7AI){63&=3ZA5TOcM-qleAP)UY!(brhErAaDO4VMhtg6PQ-395>k z)KNH8nF}1tLfil*5!%418?aC)DttToy`VuwXtG0Wpd`S@kfCV|2lj_+fpx$c$Sh7Q z<`B)0y;soeCY3pUC3wL@EyH5a3Y1Hd3l&HVh$9`=2zZRw9Tc|&NHmzDp!_Lr(=eEI zQNfwq(h3&g61v*~cYBL+W#;NoF&w&*JArJLFo^?64PZ6VD4-iI4O)S#wyi-hK|M7k zO&t;swoRh83w1#79;Dg0sOK=brrJ7^3B`8M;-+Tk6hhARVk^sF1@Zc*Wk&`DKz1=~tS3fgg#k+er3&^FHzqOJ(LGXte8U&I z=v$zkn-7|nMX4rJ#nqUqNmhWF*c-JVJ2ux`heEIBTLICELnVW0x`}dG)G)C(63i|mE(xpy^CIXmWZghEM5squj$#0YRavFb$B^{kC8&H?(F;UYEbvVr zmOd4J4muxK<&I=nP>W8*I#2v*Iua`AQ`^dafE#dd7H|+4V{l6f3}d-~ zJ?DX4+;!qHH!rq1&cfo(!f05TX7)gn;qlInP_0==)RvSOo-Y}EyuSp^mLjTyMj#j2Q)I5hY6gK3!?y1mr?OOH=wjoF)Ei`=pw2aAtoVMkx=(I zXiIV0QB){>8+gT4Xd=P_M+qQLhgpT9F$-vv-H<-62>}H~h8tBl4=hI|VWTCB5>-a^ zz&-b8xZy%r$#9br$`jB=m_-12mQ55L$4D(lQ6NWM!?R8_ecZ&Fb(g4s1xOQDY8paH zibGd4r5Z9aovpf`*s5k1 z9LPi2b7N*xGtpfIM zSwQ;}Hx%<@H-C^qg;N69<^hJLg6PMPSh2y47^Nbefs?lk2~`L=yIa5~A^4$65tOLZ zRXlQ#H4qcxRpuz*%D@|FH5?UkRy2oV*rD@~zlMoCg+bFuTx}{eBSVluz*wUVQYAf) z;FuM30zjJp2$8O|AQ^^K!a9*nsEC7`E)X0L?3iJ>2u@V3DcP(e;UGHp9h4!eViq_B zFa-;kUibs=b%STP06>Oqp!w0H`CV9V0j;EQzz0y#;Q58vPEd%6tDU_0_$P|QYA`ao4gHPOotI= z&{GB5W-6khB;n|EZVhTds0t9LrkSdSV+B-Lpw#wm90+!oST0g-hbM;1V0oZTZ9qHh z3tA`bPaz!A0bAIaUcblbCN(6KFM=iArV`ZRQy!8*T0mg{(QV~e;da8{r{P>Y&k1^} zSsjXkA~n{sA__HX>o_I_u|HE8$QJ?_ZYxG$G zvM|1oWy>*+T@a-0(FO;AtblH~6kyOOI)oVT!+>F}IT_4=YeBP^G&ttqTj?`I1qEwV zF;oM_UfKNy(8$Gz0QZJIA_d#bp8(vT*b%2XDrRIKZ<8tGeJ)NLDJXC10;B1uObkq* zEsUD26-l(R28O8mYA~p4vUhvB4Mac-qAT#F>3sw{3U-3H2Yx&5?sXtbZ(66JQy3u7 z;o(T3x-0{W02OO-xN5!?+XDnq@L9Y|B8l6}(cLn35D_V|M^#J2%1ne5s45GM3Vnhp z%yQF~k+SJPRKv5qYR}0Ags8Y8;8himhhl_ah~mf?48g)HR$zGy(Fc>v!r=8`s7{(- zWnowtC(;~D!pEwv>p~%43}Q5_9bvHvf%yl)q%StW*n2Z0&_{Y-Mv*aFAqG&8RDGhH z_M+A$jlz#0qhWyas=XmVA|l1es!$XSY0#!*-7&vr<(q@HY!;5)jz}0eC*;42;#G}A zM+m(E;73nO5dKI4fQBd&4P^)nFc7u$gaP3S;%RPSnxpHEw@n6`UDX(b9E64oSAvR% zk#Zp#u@%&X-~uB;O+({f49*FpQbB{+g6IZY!jh{_8jXm#td7W&BU92y2$lqa%*F7> z#V%XOpkmk`otX!=L;?I3%MU~)L8H{`dK$agDh z35o%_%t{Dl5ZI`BsDBx#Xq)GPVIVcDdpZfi04ZvLr@|`iLrXpeGX&#?7#uCBLTxE7 z^as@;%|sT1SfdJ$YzaptC{G%ALr@1FYk-M_eFml5Fb%E)P=eMr@l^mdelmkhN`hcC zF=LDTOE|x)0h$M$Op!K6(v^bER|@L(bb}85L{%#A4pR!iEcJA10d&!eJ%kri zz`2I)KxwCkV5Gp0NNEuHz<($DBPj{!^4H#>nOpubxcBHus%*-h)15$OY z2HDx}ETVnS6~o&r3>AV5A8m>NAF~w=HkiT(MLCDFJ9{CjVbpH!?nE%91C<~+HQYCv z?I~PA(E(}->w4{KjaF&Ryj^#|y7)-E?(T&Xk&VhB5XOOya zJRZWKp96&~1hzNib&y>mBXPmFG2DluNwh4AB+pXP{tSGPX6qmkW<+j^Z^nlZO~VbP1>GXxI{HuAV zqOBGjI+D!b;{Ua2d$7 z42Ta9k)-IBcf8QZ3=Vo<)J6V)@RDI(59mZrrh?`y3rTRNpX#836f_BCgv;vl$77&; z>bMa$+OLzBcGh}#`YcDD` zVa}1OGVCjYdWanTz*oZnTm(%)%mO0I!%3pRtE*DtbSR0y2ag_*NK-rrR!p3U5^w%* zq2{#$qGoEYSuH4$2Pzu~z8qm2a*`^r5IR;z8jDc|9(AAuB@4fSc$nh33S@G6`z8~e z1)l)_z=6iZ&A%h5$ieWOf(BaDw2D{*qz)`5Fhh%cDk5koKSu|^!U$t_r_+MP9(;;$ zjP|fw;0`1Mv>io@^X2GjoE#`mM{EI1ArO1W_|Wc*28NS}^%a?^&vQgIiFXo6R4f-E zhs7ZIRM`wgn+8-b(r zY=x$~HRogC1ME@V!X%O_h=02dp%~n`3Ti>#Vk3M=?`yDuTj&84aAZQ*Z+kHzOeXRJ z{H6fUEamw#1j@#n-r(>JN0Srl$}r!d$j>oL!22{jumKN@A*6=n49N+~O3Qx`#=A*m zR+%72X2t}orlA^RRZLnqmWvnqVbfFHM9TYY=7Xo;{NuZGx zTY^pnDI!2vP^+OU%z|w3$hL@Iqig2H5IA6dO%I0Q26)o~G&N(0bP)|e+5?bp$2!3m z7>&SnV1i8GSI|=qSOA+L+s^iCy88>nKRMP&$#@Dg?FZ0)i<# zv*tufYwALY+oNHO=cUN8%9sUOp=#n0_&CUaM5dAo7XS!u_gtk>06SpZ250L!O~)nh zCUzY`3g+*2&q=(~LfXu+W}T=Wcs0XNs7SuB*{VKR(()}V8h8kw0Lc&VRgo~-Pb}HHT{9F)(73c%)bp1TY}~fa3-t zq5$EpU=%UTgV2GuLk*$+5m+Uj8tw51>^hlR0mCu0 zf(3^_kXg+F42qB+$zWV>(ngz(gX9Pj2e63_4Q_zzBduY-W=GCeh9P_GH4Rx+hMxo= z;~Z3^t9D#4;xCrQPnZ7b6Es35>cS=dN!q4jD)r}izVT$G_JlZk@1^nmmvqEmEx9;x zIbD2NKX=*IcL)DN40h;c|9n5iT;^o+Nz2CXeb;>Wsu*m3@(0FRU#LTVVBB?I!K+U$ zop_fRT6o=%cEcX;uaWFzU-6nx4Hmw5`|V%)(mxgk1_uXMthi(4s#SM>`OB+UuUWhH zu61{>U%&njnU`ufw)?A!N^Z~m`uef!(bKKq^h2M!#3?$Gmxj~qF2 z^ysk{zI*(8FTVKw6ED5=a^aPer%s)E_0?Bjd+oK;r+@JJ>py(sjW>Su=8u1T=Iq(G z-hTU?pZxUQci(;Q+_`f<`x(Y^m^}@b$JdYQh(9G>+=&x`++)+uej)bQzfO8>NAFc- zul;V48W4ktoDdKjGP$oD{7vSDue>}dkO@v0mkWHtuhvwg1b-+P9*}Z7#Yqi23Ku5r zyKL3ZXYC7zk`lh*i!og9-|BPQ?iIy`vy*;{k01Qam|UpW{~wdY20Tk1tdNApil9H4 z3uXMD$c1wLaXC>BW^Sm#i$9(eidOCtUPIBI=Ll#Gl5doqcQ4`?JIm zp}e9yczJHNI6641ELR9E5b7#S|9N~;Xu{{x70HyATY_8J(fj*~eZrZFPGNNDOn7Wv zt|@S@P+76ue@QNIW$;6i3oQ?h$~{)s>z{BXJ}6am3qnOus0}59Uy^d78@+0}k#hs0 z;IEh`jH}29msJGAA58k*aZ&VtFsW{S@C%z#R}_Z*m*w!)jTJ*e9Jh8w({kaS$|0eo zVxGJgE6e!9xhBDXMK1jb|2}_ctN%kC-=X4v1@DekoEOGcoWa6Z3sqOl3;J@OXc^=G zs1;v!DgLjkSRkbE&(`xcg>j|kOqZM?zcsuIaz+~O~35jE?oZ7&&4MXu_s48>Xr{* zzy7|3k9Ws)pqN)xuqOoO^Zp_?8)61F;Xbcb2_0xw<|&`IDoE#pTj{SG;@fwxv?ru2BQ?#M~vj zswcFRMYWw3LU`|qU90D8y5k!IPt<422Z#1wMGlRsOO9x}s=FS0-Mr#+D?f9`8e6#j z5n;c*OdxAt)ehI5t^MS}#Cf6k?&M1gE5!I}@lS!KJLAW{m0-z>zoN)n&gcH7r}e}Y z0k0LWjdyp%gBSbolie5pnMH5!(*Bz-@8jM-IO(Og_0oj_F_C4r@x|VcZ@sy1A;Zgf znsCGy-%}i!QeW60o*(*~*5aGfId*Pqbyr;L%va70qMH^`CY7yo& zUT<5X}WDE90HBTJtUb%FaM87lodW1#K0_h0FSQB6Ml3G;zw!ix+c8tf}qlS;TK%)?XE>$*L3QF6Dhw=wb)| z?1i)HP$DZ&oU?3cFL(Mmo7zX0v5IMJWMcc`rORBm$LZmV`=>6$Q~WrDjlTG{KYOHe z*q2FQ5f|TGoIdOFf*`CB&#%4`Yy0fiANbv+KmXl3Gv~kg>&H+2`@Ji__njkq%-QVG z=WhM^E61kQe(jQXfAs9~7k~EFcRqFR{ROea%B|ecGIjl#!voQs`Qgm^ACh&KB&J{T zjDN<|%nrS;djFzSk=2c{uhaXI&ve}2UwM2gAGf3ZlKr8xZN3e@jZrO7lO8>*d-95{ zLBH?vnZCiM75bA)r!FkjMx;}ieTxLPlsk7SBJ@0j4f;7fc|ymaQA#7=)=m%s7p*2Wj6{K>Pg zMe&+3#o>-%@xhsu!QVz3ADPqmt7*lvu}tx&GfDZYC5hKd8c$x=_+EEWoH4$5&DAwm zzFgIKx~K8r4Yj&Q|NrAdDS!NeXRfZ1UcISt+bd$@rkg+h@c*bC@L|2;P($L)`o`nG z6&ru{x>$Vas^((vH51EDR5X6D(${$OL$P>MOLOteHIvGJD<%f;%3Z$3t;O-3Gs=Hj zk~m+|xNg1o_kqr?z^^v=u%AEZyf(T&kk}h&9Nyw2C&yV-~!l;vw!Gxa)gc>Yu^Pcm*r-|}g zXrW7;1c(iY3QR$N- zQIqtoXwYt+KXn@sDhHccQ8c(C*prEDP4ti(x7f){;QIKsh*Vn`Q`|JgKZovU-nM;k zu(>-jI6A_dsVQ19CpUHLR+iW{IA%-jj>MSFtzFx;1cN>y?u$Ps#-A?yRJ?X#*6CTg zyoW9>5d`7eJ>Tx(i~S9u9@ay}iUy~WhV+(7uEhZUiVos{&u0X zxQVnlbop&7`0(l$^F@3`cTf-~E@jJmzO|BnH-I(MmCKhb?^)dAt$*o~@9>2Shu42^ zaq9o~`nA96+zX3*v*W+t82MQuAqWqyd!Vlj|Gd?_DU~j~;%obFx~A9tZhrszakbKN zu{APh^{GhDln18nNF6+tO2^lxuCAIt{n+}O3rbJwCu>vLMd@5)(c{wTOkXT};K0K( z$F_>Kt*vdj`Ki?W@pY9u1Gm)b^GeM2t)&~1!CU%U?%G+irF34&+?wsAe%;}SJt2K> zJh$P_=D^lRW)4oOeYIuI+Z|iVqpfrHW=n6*t&Mn%wYfO{S>cx@TZtr@rV7B&409Mx z#JZGj8j@islA-uz!4hK)vZWgMC6S5o@3>MpIO4xEu$&a*j}(3oXh^lJnzWd&2%~wv z)3cb}wqiM_i^Os%h+nLx-Ak5Ca+WMAd=hBL?~hc~_8t`GOSRv*m|eXt^UD1TJ6}wt zm7+{V?9J(ck{>nob+FlYEX;0YCw#%_b;xwbcCZ_6AjZ3CPRLAzm8n`Xt=ry8Qw&OYu_t%-+&@D^G7pZJZTH#RX zj4wN|Wot=t!mh}E3v4pESU)^O_H2C0W0Qx5*<|+tuu0$Ku}8or2c6PA-%b8Jb8xq> zdDqW33r|FS6U3{IY?$`g)SuT(Sa##V2~poeyGiQ1F?lT5+!_6C~?Mn{f3Hhq{)rtjIq z68nd$cGn(ERPCD9wSRYM=`fodERKIv_}h^;il+-jp$XRmSA^EZF0gZzQY8em8%Is8 z62!0hZ;5rttCHkKaa7&x#k_6d%G;8%)Pet+!|B3BJUK;?=Zo3Gq7ePxdokxrX4SrA z);(a>4F|lv*!d;1-s|m!RW>p;Q;<60;m?iggVr zDQ4R;Blw(bD+=n25QQ>}sitF@vSH|AtRROg@}?_OgE|^Pt)afOU>&hE+{`s~Vq3Z? z`D2&Ap>j>;h9TLCg`26C_aLgGN~&aY$zmvKM!nw|vMO<0$)w!SbcK1pvrKMk++dWu zj%2Bl_dBjSRFO2nn#2r|HE0&IP03MB)ly7H=2(OwDUQlH zuFfo5XGUCz-|dTUFPsZ&`R^BfTp|)&z4)8eV(}MM;)u`R%SXiE7ZX3qr^FGT{o&7Z z#NemDxb>SUvG@r|^ndZkYB6~G|NQkIB(eB*|J>w2P0N^f1M3D_a$-&5)&~xez?i!O z`tuX|CB^65v%Y4%`M{y#lh)qeXUoUdY<{+-aj5#4x9T32-JNy&0?!xa$M;6svbP)@ zj07s43C=m=u)(Hj*4hnDl5W~i(Q@@fNg3$ce7`y^vUq!WIMN^K2#ihoj|HM1RXeM4+U+1<=4xe~jynN)ZT|>Tj?!f+~51zZ^O9l4*f(;z$+oZJ4${g6Y@yPgtx3(TU zP1AR+^$iRS)wN`+lZBSPV_zFH=J3|i=B+z>Hy&*+Zc@K}W~ga?=%#58UcGT^MRct@qhppnzLgzl`AKTSyrh396%YjwLO#Ad6u*Du zjQFhKjoDN(!)pMCg0IojGO=xiX|fn2iscY=qN_?&HH?MCiuiagIUBO{)PsZ znYcT?1D-+;8@N1fW!Ms>uIW$*cft20UAJ9XcMJ)SYv}kMLJqn^*w7&9p3PD<&E|Mg9kdG61MomO z8>luio{Gqc1+-;DKWqf zGnlLbQBBv+h$ibYm8s-XYEvc1OdXJ<5p1N&J#tTHP0mzZk!jx2B_OzB`&Xq5)!`b! zr7ZD+VEWrk$7PO!6EtK%up{|50p=7`frVYi6(u!YY2pzLP9&XZE|j2*-)Q)hem-?$Fb6>2C9*OMH_4+){^24@CkZH{c}s_Ng>z=(~D+1#|4$#s?ZFB48wYH?Y? z?NmZ=dz(`W1i@X))g|46%+M^IaRTG&8Zu5Wcd>9h_RT!4KDjye!ZGsFy6l$PnSwvw z!polT=Wl0dWpCHm@63O5SKFDIny>c-pG)|ve-}SG+E@JEvi^^IJKMTn94#g9h*d3L zUt3-m)0W1wEnh8)R-c%;YhGJfOpl(dIbAGNm&bPAWaXX+T=}iLcA3eu$7}lLE^0iy zKIxy_xNG%5N51pz`3H|aH}!{K znOna9?m}wR+SKG9nX{f>zwEK)w{E@ESf2b#A@%GXPfM3GQ_7R8w;%ZJ4^rZ`)Tz^{ zSIf=z3MTFJ{cGa(mwo0-?~Z8Q?CV*VnmyTRy_<-yCfhEgn%<4lA1%D=A5%N-oESL# zKYzP*a?P9R*3ZcF{MOW|LNfT9RAZO7l7VpiN%Tb9dIkF!nLk*SL+Qi;Sh%R?x%4`2@!1uQXC%@Fgr z7*!P4(OpxufpD7Fcihq}N&rTbDYE93wImtTW}fG9fFgEq6&ge9${>@};1QxxPJwjzj3t}U&J8+*l_hbR;i*vjvY2l$b?S1%rY3VV%hC;e$_4^a z0}qH6R-wDaI!`0%iiUQVOviC4bc^mXOR;Uk_0S0*f)C+g7_)W9B?fmjh7VabE`mm; z&`v6L!$Tgd1HcW40M^hvXIL-sTFq-mjT3^auQDBModS{z)qslhNAQB}gA8C54Q5!f88-8N+jt5OvRLH|k9 zhP9zCHV7C7bL4L$s2m^~lZk5>4h;fBiR0=Pzz=FILc*z;T*f|f2vFG~rb7H}4Jr;* zF(nO7j1p?us5Xb$glmo`e3|2_{v!>z2o$@_fXR;L5Vf$ds`id11}Y4Um0H(QiVdcY~8w0b4UB@$%fY&=a1GSzAZgt{Yz*0SFZCf z9P@hHx)U=`d@}Cku9a_od&cMs(bv@E;FPwPFGSu*L`S{e9%`F+bX{lfMIm~ImSlrf zLh0Pt6@&7^H`XuQlMRla|BYBK`eW02=U?*eZ!QfEu3q+DWTh`VKYroeUD4gQbwz(y_fj%aoU30q zWyZ5B%;(xWWAo%U#$Q*#8*Y01=J4}v@4nX^SCa3LXr&T6wroLkO2e_aH3Q#otN3I^ zeL9dI#pAIL+U_~u`9|Tod!~r-VOSkl9`9u{JZb1D%k|bU8bJj~RI+sn0t7cfKs8X^ zCX%6=a0Z&BC|*3`a$pfOl?^BYUqYBPc=TofxBx7wY$%>Kax~f1Tt_qVI+_Iy>j4B; zOqW7Sm?$&NYgocOL7=#32~fQaWHWW!B8CaUz#Ql=9&97PGE|79K%-FGGw#r9%$9L| zn0*sGjdta*KC0u&GI$@g2F+nwHr0sbp#U_Qrn@LU$U0lrbfT!LW=YiFR_Uk)G>Pl~ zmk=e8T@4MgEUQf7s!S9PfbmohksS-rr&)$7+m!f$eaH;33fbbsQnldO1_`x^U;+XF zfCaIF8rC2Tusv4ZkQ@Vy?{5I~S}v9GJQ5mKGXXq~s-n%P30(-FlMN4PEd?+pQ8YAx zv4l9pLt83AKmjCjEyjR`=p7hEh(HHF9ghA~flh{|n8bjC)0Ol&Jhj3Mi{+UYgQyH$ zjYC1tql?jMu&eb$L`NRRAYBzK-qfH2@e$2bHI=%O&Maz+t4v$MLmB`OTgBtr^=m?e zFiHGj1go-1J){$qriFY(28!7#2cCf^Ac3e$8I|+QmC|bhimf-^S?%h&N==ugeVzxk z0J=JdA;wALbnp!?+A(C^Ku`l{Fck?;E`dQbV4viqD;wZg70oaJF{bXwfWad`dydBk zRm!-VgKxP6OyqD|F>;{_Db1VP=U6s)it_(Pkfg)&3 z*>ZB7Cc4$e0u)ZECvLnd(18d5ACtH?NH+a(fV-TAs=;AU__Bg8$*37X5OYks?#NoTe1Dj01 z9FbH7B7|z-N*p-YQB|<@Kpq4u^)H&XVQJ7)x{iWF*QD2&3Z<})a75S{xJ7?H1`UDF z5F6*l9HJSyi|BV%#&c>Mn+(YTw?PcqRZ-|b4+-1vzYJ%IPJpt3Qi7Op#uCE)G-8!a@TVa6Lco`Jr=^v9lih%Z^6b(U?|vR z6+C27#bC^m8FY(fv@SFe4{8in6?jOXRnbklra%`15FyqP;z3$L1u~o!HkUGZNx&tJ z4nSl<-MNOPD7j{AivcB0T_dGSxBLoyW^f3TKCkN_Gw1TQQR-U*>2>^Dvg}4l-65)sxkdH7Oh8m}U_Ctb! z_JtSiA24+T4jMVR20mko-Je1>p@G0{+XkRO(t>Ps877xARGJQ6WcUpHG(;dr_+nUi z4uFON7RJ!l%Bvi-VHxR($SmRF!E~HM0dS7i22vu0P#vHsfzHkKniQ>UTQXH;&9GcG z{UX89L48<`Lp8|Lp4ZU47~OD#8o03R;KD>jqJsECF%W*}YR^#ulY#jt6ej3m%C7166*inUo?kK9-rB-^4kE)GQ z5()wG0a49{h`{=Cl@J&%YNRZN`2-|`3))5+ZPNDxs6Z9kU)R9ba1I`Da>Tm2s=K-? zX<$A)i4FyXp%y0&0N-|7q5c8SByZwLHG@iQAG`o~9@Pq7u%Ta0bp%{p4$lizLRGsm z@}s(#1ka^fHJOkLt5NlNfan?+(zI}Q*8vLo(CHvX1sWgh4id->0`hc(>8Kizmctp) z4BZE_Lm-DN=r5J#s!iZK*RXoiQPGMXPa_9MDu=usaT&TEi_)M6OoUeeBgzPeS5Xm| zYcOv>1Bf{(T^VE81D)4=B?3~A&D?` z-VK}*5_7xiNGT?M5Lp0A0}jWyYlbO9=AjrRs;CTB39<`Vf?0ut!^O}D3*LP;0=BfN zcvS>dYO6uC7(sI@FhLYh0#&jAF~6<>W$;@ti~W8;(DM5nFcq>f%i=vpE4iXW$kK4W zG=y??a~I(Pfut_9CVaa9~Cz+6LZ!{F(1I!nOpP$j4cpn@zPI0BvFS!B#9HADk) z{l{?ZOqNliFo!U#{5bT1p+b$BQr?kefN0|B5J7e)!#(R1ETdcXLzR{-K`+yALolQx zhf1F_;T+Ke904kV%`*jKkL3UgYjl)03KG`aK08R|CEK78c1XH5zp=yB1n#MFg zQV0l3&ys`NOc-d(f3_OM0IW@7`hIYoXl->X2vq_Ks)URb%9SVBrOJD&orh#la1>ACgER5w5Zc@-w;!v0% z(6My`5dqYSt0gX#nFuGKmUYvHykcP5%Q1{*08i&@LeNdl`-7M|sS4&qa2pKrhVjOW z1X@oA&Y8@1T}oP?PU?~j*?`ovajH*qAVN@raMLn^7&Q-}tsU>42ziH~MRB~DR`}MD zB6PAh0M)n+v4@<;NDoya0aaa{@17n~TtafE1t}a1R^v-ZPfV@q+(>XOkO$Tu5d;3# z0XqP5B&85LsA`wVs0Ir(WFry*eknH3BO8J>!1j6x3wN;e)=q%FhDaPz0iMoswSFA6 z4bF231QnBD&~m*=XfGSo2+{UK!8$c$y1IsdnW73&tSYEq2RbFZuQUKv0a^sQ z6HeiH5cq0pLMSD9$+i@@1(_pIk(ESm2sti-3{)xvAgW3MrrFp92d#mp2GkL7m121= z&(P7)088^r?S~_1^yMp3AKmk)FgW9MN}l5freh8cm!2CzB!=DR``QJCC(ojkp_3gIG#>d!jF!M zF!V0YHzvVf5T)<;5d{@W`k{=WClzQ@gIN-j%j!ZH@X6wxF~!Lv^jG^MI4~T%X6p(n z*02(@4$_?Ge0FDDdZ2RP*@R|ip0YKMEulp6a{%Bgm+bS2n`?uO)U!U$07|s zlN_XlNXX#7kO$uNRvpwTcE&)kV1%nj=Bn{b7|A%WZ;&a9Q~xmmrXsIJUWt>nNYBz3 zl3Glo!kJMK_&wVZR8z=^_$7^j|g(CHWEkt>;XXF^QVF6Ug z3}Qp4sub!4UMPly7J$9)b{~;uehQ%htcS96U7)0<1{*M%cA#LO5}=WwPfk`#c^SbB zCN$xjqXC=ov~MCuKT9AV{;6ubj-v>{aqr;L7~;rD_eh%v?MJ!GkHEO`gp0i*#IL{#`gGyFHBVhCw4 z+2ac;5tB-qD&>9}unhTky&;@24SzmrcU=HuwzMzl&7e4iB(1r!Di8%oVR!M^P=PQT z3IGQ{p(5oXiiVc}lwd<8%mm8G2>+}U@(&1`cON%1%##=^s z9|hS0;$PiSTn^R-PD`-Q1_+8EEJEoa4E78mgg!JldKN#Sefqm%XiUX5!*7QpAVbGP zV>n{u&o~5_1!yr`Cl6zfgF{~BKtL%ve+^0qsS{#hv?{e-Dc1=*gWv*xE;|GsaDxfV zhZKeGduGTd>5DGmX>e1Fv%o`$3Rn%4f+HcDg@0fkdtej@FAOwA3Pm-DD#=cF13y(e z&n3LHfcRL_Hhf4_7#ssH9$?Qb0!RUF!j*xT@gfAKAPFgioJEHy4^N7Pux=~S6*gsb z8G;+{)dd^wci&mq7MNOw;0PlG;wb*Jo-I`o@1yP&;QgtgYWQOKE!)Y5ZwmPwB@wCc z8P4t}k~;D%j2SShgW#}%LJ%n!Wdauxn@ZuLY!Erv1)|jCiJ>_3FH%3GRSv~RrTQ#< z8x9{qkAV{-P`g77pgP=M#&m$&FfVFNR#5t$N}_7OMR|m42hI|djf43C#DRr*Pt|MX zpm110OqX>TP|y>q14Kf$Vk+U#@TGvapMWoHj}7PqCD2>|`8ez)udE0dQ@&WOU805JB#QtPA@By@)qnBy6>#=K_Kp zZj47Xhhxd+*Aa0UBIV+?r1Wx4sf#pp8Zb*5Ej0nh2F4 ze)6Usm^3&@N~r**(xD!>BvlF|Mf68C#^ zS<2x=F$2alM8Y(;4uZ?o5L@DA;TuEgY#jn54I^$2cNiW$s%I%Q%CcBBUYB8*pec!? zWyq9ajqrqCcn1BSxLk_p4pf6!6ITVUp>OLjtHOOWnJRAhics*#@Pr<7BS1aG{mcik zvlF68?5gCA{8_FV_wkh_5j<)fo?j21jbX%(U@iP^G@@|GiS(LEz@v&z2}^OGs#QH# z88BokUo)>#2B|p;Js`kJBY~i>8Xzz@8vibVhU25_5D1&BxF^aA+o+F?dUc z(g?&945B3h*%kqPQwdhdLDWKVc!a?xI}9sAxCVG};eqYOL1aU!qQM}Vd5qN26gZJO z#AJA8gbWNvgA2Cyq99O)_#3nVU6tz96w(SOni(^zEg}$Ed9%_vV-{T{LR_|F)-t?Pp)k$ zDwx;z=GUzG-uG_5J>T26cJ11A>(=+*amV)u1~zQic;}s)?z;P)d+yo1Wow~O7`%7e zwr%&_w|&RXAKd@LAN_dOZv3@p&)&U5L;Lpcf8c=!4;*~xp@$y+$s<2Kbm-B;haWrg z_~TFf?8(1<>ZzYU{dd3k`@cVW^qFJNKKtD9=TDqGb?Vd$FZ}Ywe|YKT)2ClK^XjYl z*Is|)%{SkC>#etb^{ZdK{q{R&&;I&1zxmB?&z(Pi;UE9;;_rUmL=!Hf4hm!554Kgz9Y26!L0WBy~-;e!)G*KB%>xFTv?~^r?QBzA}eMh3+_2{!yJEmpeU>Q#feqfzlq^~pCz*&wu@tgQDvPzAscKF zMwcapbXlcPSr!p$%9aX=GE*2)mJ9{5J9mg;;QS4bh?jhmYRX!KvH0YLvTQ-EjuSDWxIS=WW^r`zB`xpQ`Rl)o!U#}btgoICTSdi`!uE3(}v4T20{TO^;JpTQPvVagP zYZJaw7LmJzs^E6t^jxqq)GKALY`ik?MRQjCB5>U_BbfAkFmh%9 z^wyqwt4M6xwRGgUmAKCxJi96n|2~B-^LU(pq@Aj_-LQTQgwBIAB_ zbYI1)NcDulTaFeNUT7Luyeht?@L^MRzTv~a_I*9PZ{zjbSL2;C;(_62ySiief1seN zm(qp9|69e!6yH_2Yf5c?lX&^gzxFAL z=5};cZk-n|G{)u$@mYenOl*18 zqeG0wvZ6oI92H_wztGqycKEW*kyuAO<`dgv*_hBQX8WRzzD9pzR219&S)r{%oF`@* z8$04+v@u&59fqmT#rV78jCkqr#@)W40{fCf=rDQi%G;K5X9dr*smtj~HYPZ_Q<>bj zaz!^^+~d#{Jxdl(r9BH*F6ZUJaSiI^dAIT9Q|Mv`|H}$ziD06IOm1C#+e+8%aq$25 zteCnI@8g$MwMdhvELpr5*TwfcdKU2|D_7tEHWkGO|HG@Chl=VFcna?tg=^dE@`7-O zc)9;8c($*9^JLYDFHQtk|NKwiiaxUXrLTQ<;B)ti?>#4@f-9o3 z^7OC!-tuMsSlBwfJ@C4xn9~DN;oEzTnOFZby9BR%yH3hG}7OG}lRk&|vbi}8T%#Y^c z0e(93>!lZp6Bmm!r{`zhE+l5btCwV5yeC}w)!5erXVH?d&Po?QA?S>|wm zQRcHhi-lQ@6ANc%-!Sr$n7Axv)@~}woGDbdO&@u-IPq?A=83zDGRF$b+dBMPw-#j{ zEc~+VtC6StiHH4}W7~={PY)JV>xT+*JNF;jS(Ld{_(A)1r4Ju2O5i{Oi6kDorauUsA0xMQFVywAHXjV4XLa|#6x))t(?M6}vTCvnzWN|HSsS+_4p%>!kv_i!T zWnp*u&;0-Y=l}eEzwg`g#is`^JU#gQUmaV1yWD^Bo5Pc9C)U=Ui1%-GvvQic<1*b|9zFHSd6FG9pHKTF~LUf_BjfqRj*BWIF z%0%C#&4aCAN(|Y^Zd%qY_2I2xtIR1C#WGW#NqjninG?(9AaqE$Ke}+U-4bUWVG~3=iz03 zB$bZBNxnd3IX@em5tfyy?0VuA!;8yMLJhJjXQYa@xZ(MQNY3ZVoi=t?svf)%yb)Rj zFOm&T7cpNCGMk*b@zzUabD5u8C~K*av_eT&wYP)C-hZ4frM2LCto7hc)3fN=Yef-V zR^$DJ(M#{M2-RqA_O1^WLGWvqVo)|GiyS@|j7%Yjq;z#rOE;`W+0<+{I_4&#J$QYg zs^=9}b*F2fOoQ`5CaGlAva<`J*!gzQt%}+$9i&vSfh$HXy^%UJU2^Wd58p7QaZkz3 z_Cf<5JRDS0MoAu0lPt~8UkJ1Y1rNDa<$BL!R`0wpF6*Low(jr_OWYVLE-K*|80!U{>iqB`%VV#d@x=$1MBzDJu|#crAq{blId5hm?iQ3?aJOWHdUr4{m#Di|K9eCnqw+HoRrLbRqb}%m!)o(Idv>uBz}x z#a^8|dL)F#$5m53f1Jz1*Fx&nra;paDVN1G)|+H&imod*nIin~dT3VBdoH&?zxsH1 zEp#ukQ9^swdz9($ft_f0M@sT>J7PA5`yob~MWvq&6V}!I_28^$Hg%&i(OWg#H-@S4 z>@UNfLs=fY5F9OCl{Y;)t$m$8O0A`8IV;hsvc4QX5NZTvbXUF5g~H9Z7qV%Qs8$Nu z79s3?BcpY1G-j*RBgCif-we7$`S@42Xw729{NYgOrKoCD)Z1ur=~tE^@*f*{s$aZO{Pk6v zl`f?Motm=;uY@eT1HA!7Id7u-_@H`g%Haoq*&4d=8VWEfBpy_0XPrGbR&DUBfpNW5 z(JK?&EAY-0(>3%YOrgG7iTR@jHt@wYpKY$K9ju24j4+v2(}&u*gR|f-V+ejrFKJmZ zf78G!Hb_xc1n1S=SE0#L1U$EDeJ{$T?*uErQ5ucvQU!5%W532^X&+c3JpSwRhmCW= zm7c1C)oKACv&!BVLs_~gaSSyXAE;zq!f&E!IlMKJx(+5ghQaeNHV_aM)FE35QSzV& zh(R?pX;vDTfUf6n-uL9X2zzHY1w0w~o+?&!;{g|AlVpf|mM zlju4rl!3x%w43d%lL+ zl#)I&4ArfuNCzXDDF8t3p)qb+$;MdATDz{z9zI}{R~4x5vM||p_QnSL>}1THu9IX* zvC-K`HsSBy;VT;xXiyE=P_6k(raL~DGRR(44zn;efyO8r0A{_(ehx`c9H`3NJ1yzE zMaT#y&uTOnqX5c}AO)gXX5>IEP?9|L<*|SncrGH(YF`*W6i(z8Ue|=rvF&apYhr4* zUmaQ3Pl?H{aIjXWxj+BfI>Y{8Xe#u?7V~OKsE>98h#+OATO7*z<#S9B(4b{%2(2Z; z>cj+4bP>526E!Nv`|iL5 zA`PFLK07|m8enmnET4L0B2Y1aq4S;1NVDx@EBq}{2Y4OPC;Evn4#f$jwE=5|z? zL`7+nq!!RyEJHxm#brWUQdNQcEaL^$Ve@Fa9F;0ZJDUwl;cx!s7Sq(EKD{@U0mu2) zT>`7D-{@2sED5FOALwu8o(PEN>o|g(PInRfHw#>LVz9C zKffPhz2*ULr_NJv82|g-%OPO_(`(@FP_;=#8B~|EMpHQ^^LXm{Q1+VBfpD8 z*d5+L-pbl3r$sHKR^|_%t~e`DC{;f=1NR|*oyb|x5crQ|+Q;#338#R^!zp4Mhg&1r z?DP}&ZnI5zuypWJKz|Fzc*!7WrH;t}u`rtWR9H)CvyyZf!y92d*iifN#j}o;pf9Ox zjQ54XYX*5>kys9mi4B>3d^rMHX}M+t1O{oZ4TLoJo-zl&y$ijOfuF#1j$`IYR4oK}nx6v;}h#v`2sAB91;_@TfEjlL>ga zeBnzgRUbunxIcap4NPV$Xbhu67e0mA;$B#!;(Rax_--H;AYG4c@QIy{Ol!)j*(2}R z6t(nx^Ut@Eb~a+KcN=pD40t^~bMM+xU@-;qdF87V>42~D(nGjYUy}z^KtlqF`KL@0 zmZYPjx@h}!ux=u#sZh@=b*9sW$;uQXBs%}%K>TcSVzcw2HvPNt7JW}FF}yy)knH{H zwu~hO-Dh7qZs@;?YiBqh+bm~KjX#V?NUH9qst1T(9m=gkZb=ak`B-mBE(bbEd=H zxa!zgk}F`(1kDtu|8{J>hub%g-dc0d>s_j;6VLOrYm?JL7zQk0*?7{Lo4*-=&SaZK zz9AME)-NF^Nf#h11l5u@RqerT)uGJPrhjs5L>zJS>UM?i7_Yr=O`;SUWmSYl7B}2+ zM64GCbEOU4HiAju>0Um!CilLwUfuMI8DMJ9h_)Lle`B?>u~U+abzC(9>r( zMkw5)8(U%7TNY*dv&SbZ6aV7n9V(=fOKtCuN8}Q`*D+@jV*+LQAk^X06V!#7{`j$N z3@KfSr``r8tTSr*nRkpt(z9C|1XxTOx&`RV>W6Pk&SDlASwV~V{-GO)08j>VwM0n* z91=E$^(_(v3W0mQ0kacE4}FFIZPK;F4}fntNf1q7QL36fa~z*9G|@`oZ!mB6_2aAH z1*?a_un=Xwi|kGkf?&J5^5i)4N4Jo#sr2awk5Amv_;m1!DP{%RNGw_`=h^0X!if0v zx0{wihEq?B){P(h!H&iUGP!28|+wPbsz=lVV6-I-a!Z$G`(X>I)-RP&3I%2RH?w}j{cmzH8H?g%o#ei zInJqS!fvkdp(oc>EBHOmh z1UNJl?FfLA;#>;5c#_Qg)Oz#%>v;El2oeVc_!Dq<7s%I;DFPj^3RNUS`POV*E9$e!aT3_PN z=j`f~Wb@?PPriM}t?h4;JB#+7AEdQ z@e1KQ?rXE=_#6{Vz=Qc$YNF1m5}4~1H^6|kdOj(0$`%@EfV&G ziLQk*tx%qm1G$P)R^=i*UmY0?7Rxm;_~m`#7YqqdAUPpf&{T*nRY(e~grRu&DlsGt zf@D|2r-yX`m>}rLa!E3f(P^;1AWDECtM@3F)$kG@Pf-?}*W+d?W1$Yafk~(#m zp~|J@MpwL{^%POc%F?gSimW03mV#i3hvI~I7O59|f>BKxH%fIlb*Xq}t5Kd%Q(K*% zfH1;VGyoeUEv$uqLtaLPsz54fcx#~$=xBQ!5Q)sv2Q{E!3CC!6cD{t9#;6qz1BkZ^ zvTqeCjh_=_d8kxka5&nIF4Pg%ehkwtHJYw zi~Fna6z!<6D1#5Laon{eR@(daZq{rfL+SCsKwQO^<6nS!Lb8V!y`W8qWQg$5dQEZ) z)8Qku7x@lnPPq)(F*GkG7ec{Vp*Y!B&e&i-%gKT!I&g}8m7tSBA2a!lQ<1(LK;u{j z1SI|q{P5!i)(PvykpdjURTB8durkXW%;2r1=FANFj!7bVL>r3OiEUSUIAOCiMIs)c z|B!tAI)Use5teCqeL?dDsF!lAL<@=QrFI|~s=Yiao2qQDm=z;8Hhm%0a8#DHAZ8&WO&FxG$`S&0~amOvI-DCsVS zC+I#78u@%p#VrRZ>dG&?ceeMxcAV*5P2~TWlP_F-@71@9=N>up#FfFBx8DDQq5nT; z{>tyY|6Nx;FGg4XP@a*W-aPXcFAmN;^GAd4-~Io-^S|%D=gL%_yyv^09h~{I&knA< X=Y9KEKCFJ|ZQuRVFZ`nV>v#PZhf)^I delta 7659 zcma)>Z>Zh(UB}Ond)9lBo72#$%SEV8;pid2nIW*)StbAu{T_n;5z^!3}G=$_uMu{*Hl_ozDHf< z1hpExlSjxR+fF zQt2q1~6x(Xm5m8!HZ;r z(|jVb%qFL9y!BEI-zrgAOOd1%O1e6^yy5Nf|2R=fYr*wc>-cdh*|X@`YegMhR>S(< z8tr`+p&HEtCby02!gPu)3$=4+M3BMA6xfy0)kVE}FgQw;8W*l(ZX&K;r&Cn*tYTGn zx`x%cV3bT!$*N`NBEsZGutK*gYPU3zQem>U4x!L$M(WUX$+_=89C|K|@08qZFR4x* z580`vcp6WsNtQ`9o+Byh9&)Y9P4`R|T01X{%erWN=HWdjn_;nDluOxGcYK3Jv!*&8 zVXSQ?a*1*6sYfwU7GtC-lgma6Mc4A(?o6yFH-he&a)zRdjV6};w`p79xeS?m5mCm; zUdWzOI7Y^L(L|;gz9_DcYWw1YOZ-exUa>)KeB^j(_10cfZmqIP(U@vG-8X)XA?kds z`OXTm$<zPvBC{Cm;H`tb8e1`jv41PZIaG=$k zE>(HcqtlwYS1+qnEoUWKrNZqt{Tq{%h$tQy_vD5v`zjIOv)(+YRTXW{tv=6jA_m*R| z^rFi&`k6PV6*CV46z##Uc=3G`7=1?9wkEqz)7ztV`tpgGeFx>D8K6bEXuP2J>5fps zdUBgf(mcpnCxoXG7b$h(pF&7mEl5hqjkG<;D0^4HPJZ}its)eU9j1U{Sh zrdge9YPV>In4@SicRMruAGUhO4U}A7Qw?ybe!Wp6`!u73i;(bf!PWv zyO809>Mf_HsG<&&u3-YB-aHo)4Xu;TUWQU_*nh;d=6b23SBALPGsi+RrfW_nOrf~? zK7J*=VoI?BB&G4Pi0;fIfnCI80SbtM2hdk4ITu{%sVeBLGQ7fPmAx;9s&rBK zWbHEj$kAMd=tQIWq?Edfrln!>JY#4?jtab$t%NAmnU}yt7$*eyLxU4H-#rHAGpiyT zKeZ`bS#7-V*sd5@#{(|L?q3N~)40J{$Unn$(>n%4Y0j?`AOZyG{eLu^TKa4p^%A0; zJP)D4aVoWzY7)X;d}rvKuDMFKCyF%to76LLIvrEB2|}IJZD~2Jsn_e4iP=zX2a#ns zy(Dl~+n?AqGWEB;KsV!f>IMTmr0$y|~f+_4_xHIeurOx#j!Qoo)eS1+LE0fsHKp9|4(t4YesHedOX+r{F`O zgAvUXKp=O* zuE~u}5O%gR-7W^q2;stac7SPmEoy #k4w<+Kn-)x1zQ7tdmZP!qMUS9u&EkTi) zCDap&&!QD29bxvx9ID7^_N&wN98Adi(icJlZcs*asG*_bg)LY(co&;92K9FH9Dsv* zKq(odLx#Tjo9!-ho2|=URbFiyMwq@R8vG5x4h)9mC=OVqQ=FD(HbG8wlV>+DWLm2M z>V<{cGm@YdWBosY?2sGt;;g;`ALlqJ^rA?A+Env9z9Ew|G z9nhF23UTz^g9CtS!~M~9z|YBdx3g=w3xEC3wwR{2de`WJv_c5Dnrle^@&OHs&+y43 zGGy_q^YcjmhuJp|ESN9T>P2{p{UH-9HUt{^>(|!;617cpGa!EhsZ4QLifX8xV!~FZ zHd34Y!E6VWYyqa8+UG?>1##yULEPj0-$N_Xn8_G8y9EWEghr>g?h;5vSL{E`!U6@& zp)B&moO}>NKqHkk6e?6~dimKY)B!92H@-$u<%JKC<+OpNeB6RC%gYD2)Y55n1ry^Q z^u$&>f(l86*^6HHo1}S-_}BWtuyQOX%02D3+|vasNfz=^f#! zQ=kk8+>}%#kzGzmSM&<^R0dFlb@BHcC-_=lfMXhkB zgII)>y2ae3Ou zcotzYFoUY|G1cZ)q9g>{|ywTn=g5YI_HIPX*+! z?_JpNt=+=iAfFHmve0h*yDx#f1u=&y#rB9b=4p5Eq=<dMG5x1 zBH&W6owd+eY)-*81~G*1>>w{#npe-@BQ>Ub_A8rPaEqm%1@0~K6=f@8_~?^Bebx!c zK#|aVUF-|9&W+Y!RDegk#mn3jsxo52jKT*dcWq<>LWkwTN1JzW%3MNjtmR4i5FE``TelbTm3& zJG;o(Gm8DOySG#`dHKvif&?hPCJ&^5g-5@BSTI1Az@wwPQn~O6ZZm?J3hg||Gi<^h zV$mjqMCVUDh=1Y6wiF#Z*=Bz`-JmqMKKLDu;GWyPr zW&kiDHNE-UH_sy^T{5VnkQ&UkeCnQs;!3ob{pG>Al?^rbafkt{EwEw;H>>O64>w0@ z_U~In00cEw;%_FeZTaledp8z?>h5~I{NBOPD_IA{!gVI&{N(uniYZNdAaDa+bnzX^ z4bb4Op%QT6!Y$x~%`%``S|9t%ySvabDCbe?e*A5w*q6aD@eObqQM`I_WM^O6Jb_0+ zlCWbu=|*1H*`Wg`m^btzpq7uHTU2}yclGzPEtIKTe&R4x&?UMR*lld+=Eg#Fnk~-$ z_25Pi?_D47ZIY(Eptn2zF@=8e#Z%Nm7z^Lx*m%--mtzH#ux%FYR@DUrw57OxjTCn% zDU9O|hOh&b!2OpGj`Z@w!@&3edkhyxXc5^3=@$GYFkgg^e);}v&SW!kZmTdvN8g%n zb|;U&xl!HhYX?U4=BZO9!OPtN;kwO!V>;$?$yZR=0g7yEk(s7rsFmKbD6>x;+N2F8 zyC($rfo^xH9sknujeAhOjyaPSyQ-`Yljj!-2Pe$z=MK&>q)Fg4JQB`21Jzp4@7fL0 z3LQHOVOUknC%-4yDRzO86?Dka*N>PrRxgDHTTql*PHv+HpnMBXLm^SAwnK~)6Gl%# z`#Cmw?JPM0%|H|gdErtjC=;>RLKCkPelIU$u>dc4gQ12M(_oFLjWpnh2j#rne;7L| z{2KD;>~r?dTYyR-)d;}$;8SYKt5XZqP)R=h4Wa|NGQEr&_D#B`18&bQzcPbI(bU9b z{KZDPJ7GZ-n_RBp;6d!^tld+jK6J{rui8CeUu#$;&bcSlj z-(}8-9T)?mvPfdKI<-SrA!$$yvnLj#4#H71EQL=P<4VGi+b^3U!5i>`DRo6IpF0#d z3x`0ckX%TDz^|h)NNre()MxEuIU{zFsJ2J6dhD|=9@yVRpQXmNCq@*jvwY;9W4IP* z-ir5reIu1Y3SeBbLrD!>=%Tv#{;16!y?4{qaQ(nWZ_9`7HI)=IRtI1ZB{COS!a=1J z=eU|K^P3N@p;5dtk*ZWC#Xe*#plE{uGMtPkB);J)=nh7Yo8;pdq)9vool9S3pWHZ+sT()I8nb`6XGCnU zJ&0W-()FL2-C7?awnmO%nJha6O!#&B?Wg~6h|{w)?_C*r%mjRr;3iBdvZF3PJtb2> zC>kaPhh>jGvON4>vkjqVpZxJn;OeojAB&x+me?%)&PbemainKoI=DkSqtp6_A*gV_^XCveJ|9&)%PU*EniRM4 zESTu}11pdWfr_~roO-lO+P#zy2_ujQB-llwVF6)$98;?kEh46`38aZ4ZB<(8l$LL7 z!A6pHB_x9owZWp%qTK}+!!hN2=HokZ4Q?Q@TAV#GAKB&kqsB9325rx`(P~J*Eeku& zzA<0$bE%r6VMc#tbSYSX>yIKI86EkfyN+DF>(uZjWq#x%M?P}Qjdtfh0uFt94k@9h zFuFg~d~P)Q>VKa2yYc#|*+WNo3h!WU-SvZaFofwcVQkp8XkZyHK!*7uGUHB3M@Ux* z?#qz5!)DM)r?%4tdK1BmX9C{QV1^eCWEYMO&YGd4P-=J*Iffjf7AmHp{qdS+lXzZ? zFDkvW#gkXRROuqGBmIipCu@$?(}TW##Y#nl?)$Vwz3P~{a1 z(4R@g3gM6B$$wI?u6iiS7V?pM%mg<)QOxO zaueP=LBtj7j>}1y4_*x+blE*L*e&=-{6!S*(9fClX9z9_I|GL*GGv`2uf^tu$!Vv5 zZ2>=ep(l5+Dw#lp5+R>UI<)4s1MxX&9_kWv*X*EDB(GhNmE^dF7cKiA-$T9w&Xx`a zw6V?O-AzQ+@%xYAp$I40-<>`3Y6L5gfDhBuiDThU;S4W0A3|%0gIwwmN#ey9iI(%z z_2U8pCy6M$a#Yfue2c2qW(abL(uq{l{^njCQvFH@LrcQ8ZCqK51QKr{xpuFur#xgL zY|s$tz`toI576C-)hkyA?2rcnTC_{|DgiFKkZ^xu5B!A_;ZeLQcW;^K&mH;v_!Lw5 zKj!&I8giKD|JB*%t5-gD?IYqFkKFgtv*Y_dc;Nqu0?$76z(=2bN<5ck55a&u;zn?z5-WPyE=`SHJox^%o!gZ}}E0 Ai2wiq diff --git a/generator/app/gen_random_telecom_data.py b/generator/app/gen_random_telecom_data.py index cc56a8a..1b400cd 100644 --- a/generator/app/gen_random_telecom_data.py +++ b/generator/app/gen_random_telecom_data.py @@ -74,8 +74,8 @@ def gen_random_telecom_data( end_date=programmeparams.registration_end_date, fpath_firstnames=cons.fpath_llama_firstnames, fpath_lastnames=cons.fpath_llama_lastnames, - fpath_countrieseurope=cons.fpath_countrieseurope, - fpath_domain_email=cons.fpath_domain_email + fpath_countries_europe=cons.fpath_countries_europe, + fpath_email_domain =cons.fpath_email_domain ) # generate random entity counts for each user diff --git a/generator/app/gen_trans_data.py b/generator/app/gen_trans_data.py index 80d770a..363941d 100644 --- a/generator/app/gen_trans_data.py +++ b/generator/app/gen_trans_data.py @@ -108,13 +108,13 @@ def gen_trans_data( dates_series = pd.date_range(start=datetime.strptime(transaction_obj.start_date, "%Y-%m-%d"), end=datetime.strptime(transaction_obj.end_date, "%Y-%m-%d") - pd.Timedelta(days=1), freq="d") trans_data[date_columns] = trans_data[date_columns].apply(lambda s: [s['registration_date'], np.random.choice(a=dates_series[dates_series >= max(s['registration_date'], s['transaction_date'])], size=1)[0]], result_type = 'expand', axis = 1).copy() # map iso numeric country codes to iso alpha country codes - country_codes_map = gen_country_codes_map(fpath_countrieseurope=user_obj.fpath_countrieseurope) + country_codes_map = gen_country_codes_map(fpath_countries_europe=user_obj.fpath_countries_europe) trans_data = join_idhashes_dict(data=trans_data, idhashes_dict=country_codes_map, idhash_key_name='registration_country_code_alpha', idhash_val_name='registration_country_code') trans_data = join_idhashes_dict(data=trans_data, idhashes_dict=country_codes_map, idhash_key_name='card_country_code_alpha', idhash_val_name='card_country_code') trans_data = join_idhashes_dict(data=trans_data, idhashes_dict=country_codes_map, idhash_key_name='ip_country_code_alpha', idhash_val_name='ip_country_code') # generate transaction status and error code - rejection_rates_dict = gen_trans_rejection_rates(trans_data=trans_data, fpath_countrieseurope=user_obj.fpath_countrieseurope, fpath_countrycrimeindex=fpath_countrycrimeindex, fpath_domain_email=user_obj.fpath_domain_email) + rejection_rates_dict = gen_trans_rejection_rates(trans_data=trans_data, fpath_countries_europe=user_obj.fpath_countries_europe, fpath_countrycrimeindex=fpath_countrycrimeindex, fpath_email_domain =user_obj.fpath_email_domain ) trans_data[['transaction_status', 'transaction_error_code']] = trans_data.apply(lambda series: gen_trans_status(series = series, rejection_rates_dict = rejection_rates_dict), result_type = 'expand', axis = 1) # order columns and sort rows by transaction date diff --git a/generator/cons.py b/generator/cons.py index e110395..25fb9e1 100644 --- a/generator/cons.py +++ b/generator/cons.py @@ -17,9 +17,9 @@ fpath_arch_randomtelecomdata = os.path.join(subdir_data, 'arch', 'RandomTelecomPayments.csv') fpath_temp_llama_firstnames = os.path.join(subdir_data, 'temp', 'llama_firstnames_{country}.csv') fpath_temp_llama_lastnames = os.path.join(subdir_data, 'temp', 'llama_lastnames_{country}.csv') -fpath_domain_email = os.path.join(subdir_data, 'ref', 'email-domains.csv') +fpath_email_domain = os.path.join(subdir_data, 'ref', 'email-domains.csv') fpath_countrycrimeindex = os.path.join(subdir_data, 'ref', 'country_crime_index.csv') -fpath_countrieseurope = os.path.join(subdir_data, 'ref', 'Countries-Europe.csv') +fpath_countries_europe = os.path.join(subdir_data, 'ref', 'Countries-Europe.csv') fpath_firstnames = os.path.join(subdir_data, 'ref', 'first-names.txt') fpath_lastnames = os.path.join(subdir_data, 'ref', 'last-names.txt') fpath_llama_firstnames = os.path.join(subdir_data, 'ref', 'llama_firstnames.csv') diff --git a/generator/objects/Card.py b/generator/objects/Card.py index 20383a9..371423f 100644 --- a/generator/objects/Card.py +++ b/generator/objects/Card.py @@ -14,7 +14,7 @@ class Card: def __init__( self, n_card_hashes:Union[int,np.int64], - fpath_countrieseurope:str=cons.fpath_countrieseurope, + fpath_countries_europe:str=cons.fpath_countries_europe, ): """ The randomly generated card data model object. @@ -23,8 +23,8 @@ def __init__( ---------- n_card_hashes : int The number of card hashes to generate. - fpath_countrieseurope : str - The file path to the european countries reference file, default is cons.fpath_countrieseurope. + fpath_countries_europe : str + The file path to the european countries reference file, default is cons.fpath_countries_europe. Attributes ---------- @@ -50,7 +50,7 @@ def __init__( The card shared idhash mapping dictionary. """ self.n_card_hashes = n_card_hashes - self.fpath_countrieseurope = fpath_countrieseurope + self.fpath_countries_europe = fpath_countries_europe self.card_types_dict = cons.data_model_card_types_dict self.lam = cons.data_model_poisson_params["card"]["lambda"] self.power = cons.data_model_poisson_params["card"]["power"] @@ -59,8 +59,8 @@ def __init__( self.card_hashes = list(self.card_hashes_cnts_dict.keys()) self.card_hashes_props_dict = cnt2prop_dict(idhashes_cnts_dict=self.card_hashes_cnts_dict) self.card_hashes_type_dict = self.gen_card_type(card_hashes=self.card_hashes, card_types_dict=self.card_types_dict) - self.card_hashes_country_code_dict = gen_country_codes_dict(idhashes=self.card_hashes, fpath_countrieseurope=self.fpath_countrieseurope) - self.card_shared_idhash_map_dict = gen_shared_idhashes(idhashes_cnts_dict=self.card_hashes_cnts_dict, prop_shared_idhashes=self.prop_shared_card_hashes) + self.card_hashes_country_code_dict = gen_country_codes_dict(idhashes=self.card_hashes, fpath_countries_europe=self.fpath_countries_europe) + self.card_shared_idhash_map_dict = gen_shared_idhashes(idhashes=self.card_hashes, prop_shared_idhashes=self.prop_shared_card_hashes) @beartype def gen_card_type( diff --git a/generator/objects/Device.py b/generator/objects/Device.py index d8d54be..90d4a30 100644 --- a/generator/objects/Device.py +++ b/generator/objects/Device.py @@ -54,7 +54,7 @@ def __init__( self.device_hashes = list(self.device_hashes_cnts_dict.keys()) self.device_hashes_props_dict = cnt2prop_dict(idhashes_cnts_dict=self.device_hashes_cnts_dict) self.device_hashes_type_dict = self.gen_device_types(device_hashes=self.device_hashes, fpath_smartphones=self.fpath_smartphones) - self.device_shared_idhash_map_dict = gen_shared_idhashes(idhashes_cnts_dict=self.device_hashes_cnts_dict, prop_shared_idhashes=self.prop_shared_device_hashes) + self.device_shared_idhash_map_dict = gen_shared_idhashes(idhashes=self.device_hashes, prop_shared_idhashes=self.prop_shared_device_hashes) @beartype def gen_device_types( diff --git a/generator/objects/Ip.py b/generator/objects/Ip.py index daca1fb..48644b6 100644 --- a/generator/objects/Ip.py +++ b/generator/objects/Ip.py @@ -14,7 +14,7 @@ class Ip: def __init__( self, n_ip_hashes:Union[int,np.int64], - fpath_countrieseurope:str=cons.fpath_countrieseurope, + fpath_countries_europe:str=cons.fpath_countries_europe, ): """ The randomly generated ip data model object. @@ -23,8 +23,8 @@ def __init__( ---------- n_ip_hashes : int The number of ip hashes to generate. - fpath_countrieseurope : str - The file path to the european countries reference file, default is cons.fpath_countrieseurope. + fpath_countries_europe : str + The file path to the european countries reference file, default is cons.fpath_countries_europe. Attributes ---------- @@ -46,12 +46,12 @@ def __init__( The shared ip hash mapping dictionary. """ self.n_ip_hashes = n_ip_hashes - self.fpath_countrieseurope = fpath_countrieseurope + self.fpath_countries_europe = fpath_countries_europe self.lam = cons.data_model_poisson_params["ip"]["lambda"] self.power = cons.data_model_poisson_params["ip"]["power"] self.prop_shared_ip_hashes = cons.data_model_shared_entities_dict["ip"] self.ip_hashes_cnts_dict = gen_idhash_cnt_dict(idhash_type="hash", n=self.n_ip_hashes, lam=self.lam, power=self.power) self.ip_hashes = list(self.ip_hashes_cnts_dict.keys()) self.ip_hashes_props_dict = cnt2prop_dict(idhashes_cnts_dict=self.ip_hashes_cnts_dict) - self.ip_hashes_country_code_dict = gen_country_codes_dict(idhashes=self.ip_hashes, fpath_countrieseurope=self.fpath_countrieseurope) - self.ip_shared_idhash_map_dict = gen_shared_idhashes(idhashes_cnts_dict=self.ip_hashes_cnts_dict, prop_shared_idhashes=self.prop_shared_ip_hashes) \ No newline at end of file + self.ip_hashes_country_code_dict = gen_country_codes_dict(idhashes=self.ip_hashes, fpath_countries_europe=self.fpath_countries_europe) + self.ip_shared_idhash_map_dict = gen_shared_idhashes(idhashes=self.ip_hashes, prop_shared_idhashes=self.prop_shared_ip_hashes) \ No newline at end of file diff --git a/generator/objects/User.py b/generator/objects/User.py index b4107b8..475d79a 100644 --- a/generator/objects/User.py +++ b/generator/objects/User.py @@ -19,8 +19,8 @@ def __init__( end_date:str, fpath_firstnames:str=cons.fpath_firstnames, fpath_lastnames:str=cons.fpath_lastnames, - fpath_countrieseurope:str=cons.fpath_countrieseurope, - fpath_domain_email:str=cons.fpath_domain_email, + fpath_countries_europe:str=cons.fpath_countries_europe, + fpath_email_domain :str=cons.fpath_email_domain , ): """ The randomly generated user data model object @@ -37,10 +37,10 @@ def __init__( The full file path to the first names reference data, default is cons.fpath_firstnames. fpath_lastnames : str The full file path to the last names reference data, default is cons.fpath_lastnames. - fpath_countrieseurope : str - The full file path to the europe countries reference data, default is cons.fpath_countrieseurope. - fpath_domain_email : str - The full file path to the email domain reference data, default is cons.fpath_domain_email. + fpath_countries_europe : str + The full file path to the europe countries reference data, default is cons.fpath_countries_europe. + fpath_email_domain : str + The full file path to the email domain reference data, default is cons.fpath_email_domain . Attributes ---------- @@ -74,17 +74,17 @@ def __init__( self.end_date = end_date self.fpath_firstnames = fpath_firstnames self.fpath_lastnames = fpath_lastnames - self.fpath_countrieseurope = fpath_countrieseurope - self.fpath_domain_email = fpath_domain_email + self.fpath_countries_europe = fpath_countries_europe + self.fpath_email_domain = fpath_email_domain self.lam = cons.data_model_poisson_params["user"]["lambda"] self.power = cons.data_model_poisson_params["user"]["power"] self.user_ids_cnts_dict = gen_idhash_cnt_dict(idhash_type="id", n=self.n_user_ids, lam=self.lam, power=self.power) self.user_ids = list(self.user_ids_cnts_dict.keys()) self.user_ids_props_dict = cnt2prop_dict(idhashes_cnts_dict=self.user_ids_cnts_dict) - self.user_ids_country_code_dict = gen_country_codes_dict(idhashes=self.user_ids, fpath_countrieseurope=self.fpath_countrieseurope) + self.user_ids_country_code_dict = gen_country_codes_dict(idhashes=self.user_ids, fpath_countries_europe=self.fpath_countries_europe) self.user_ids_firstname_dict = self.gen_user_firstname(fpath_firstnames=self.fpath_firstnames) self.user_ids_lastname_dict = self.gen_user_lastname(fpath_lastnames=self.fpath_lastnames) - self.user_ids_email_domain_dict = self.gen_user_email_domain(fpath_domain_email=self.fpath_domain_email) + self.user_ids_email_domain_dict = self.gen_user_email_domain(fpath_email_domain=self.fpath_email_domain) self.user_ids_dates_dict = gen_dates_dict(idhashes=self.user_ids, start_date=self.start_date, end_date=self.end_date) @beartype @@ -150,14 +150,14 @@ def gen_user_lastname( @beartype def gen_user_email_domain( self, - fpath_domain_email:str, + fpath_email_domain:str, ) -> Dict[str, str]: """ Generates a dictionary of random user id email domains Parameters ---------- - fpath_domain_email : str + fpath_email_domain : str The file path to the email domains reference file Returns @@ -166,7 +166,7 @@ def gen_user_email_domain( A dictionary of user id email domains """ # load domain names data - email_domain_data = pd.read_csv(fpath_domain_email, index_col=0) + email_domain_data = pd.read_csv(fpath_email_domain, index_col=0) # calculate the proportion of email domains email_domain_data["proportion"] = email_domain_data["proportion"].divide(email_domain_data["proportion"].sum()) # convert email domain proportions to a dictionary diff --git a/generator/unittests/app/test_gen_user_trans_data.py b/generator/unittests/app/test_gen_user_trans_data.py index 8f4c8f6..540bb1c 100644 --- a/generator/unittests/app/test_gen_user_trans_data.py +++ b/generator/unittests/app/test_gen_user_trans_data.py @@ -36,8 +36,8 @@ # create relative file paths fpath_firstnames = '.' + cons.fpath_llama_firstnames.split(cons.fpath_repo_dir)[1] fpath_lastnames = '.' + cons.fpath_llama_lastnames.split(cons.fpath_repo_dir)[1] -fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] -fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1] +fpath_countries_europe = '.' + cons.fpath_countries_europe.split(cons.fpath_repo_dir)[1] +fpath_email_domain = '.' + cons.fpath_email_domain.split(cons.fpath_repo_dir)[1] fpath_smartphones = '.' + cons.fpath_smartphones.split(cons.fpath_repo_dir)[1] fpath_countrycrimeindex = '.' + cons.fpath_countrycrimeindex.split(cons.fpath_repo_dir)[1] fpath_unittest_user_data = '.' + cons.fpath_unittest_user_data.split(cons.fpath_repo_dir)[1] @@ -50,8 +50,8 @@ end_date=programmeparams.registration_end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, - fpath_countrieseurope=fpath_countrieseurope, - fpath_domain_email=fpath_domain_email + fpath_countries_europe=fpath_countries_europe, + fpath_email_domain=fpath_email_domain ) # generate random entity counts for each user @@ -59,8 +59,8 @@ # generate random entity values device_obj = Device(n_device_hashes=random_entity_counts['n_devices'].sum(), fpath_smartphones=fpath_smartphones) -card_obj = Card(n_card_hashes=random_entity_counts['n_cards'].sum(), fpath_countrieseurope=fpath_countrieseurope) -ip_obj = Ip(n_ip_hashes=random_entity_counts['n_ips'].sum(), fpath_countrieseurope=fpath_countrieseurope) +card_obj = Card(n_card_hashes=random_entity_counts['n_cards'].sum(), fpath_countries_europe=fpath_countries_europe) +ip_obj = Ip(n_ip_hashes=random_entity_counts['n_ips'].sum(), fpath_countries_europe=fpath_countries_europe) transaction_obj = Transaction(n_transaction_hashes=random_entity_counts['n_transactions'].sum(), start_date=programmeparams.transaction_start_date, end_date=programmeparams.transaction_end_date) application_obj = Application(n_application_hashes=programmeparams.n_applications) diff --git a/generator/unittests/objects/test_Card.py b/generator/unittests/objects/test_Card.py index 4b9ef35..688455f 100644 --- a/generator/unittests/objects/test_Card.py +++ b/generator/unittests/objects/test_Card.py @@ -42,8 +42,8 @@ random.seed(cons.unittest_seed) np.random.seed(cons.unittest_seed) -fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] -card_object = Card(n_card_hashes=exp_n_card_hashes, fpath_countrieseurope=fpath_countrieseurope) +fpath_countries_europe = '.' + cons.fpath_countries_europe.split(cons.fpath_repo_dir)[1] +card_object = Card(n_card_hashes=exp_n_card_hashes, fpath_countries_europe=fpath_countries_europe) obs_card_hashes_cnts_dict = card_object.card_hashes_cnts_dict obs_card_types_dict = card_object.card_types_dict diff --git a/generator/unittests/objects/test_Ip.py b/generator/unittests/objects/test_Ip.py index b8207b8..d7490fa 100644 --- a/generator/unittests/objects/test_Ip.py +++ b/generator/unittests/objects/test_Ip.py @@ -35,8 +35,8 @@ random.seed(cons.unittest_seed) np.random.seed(cons.unittest_seed) -fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] -ip_object = Ip(n_ip_hashes=exp_n_ip_hashes, fpath_countrieseurope=fpath_countrieseurope) +fpath_countries_europe = '.' + cons.fpath_countries_europe.split(cons.fpath_repo_dir)[1] +ip_object = Ip(n_ip_hashes=exp_n_ip_hashes, fpath_countries_europe=fpath_countries_europe) obs_ip_hashes_cnts_dict = ip_object.ip_hashes_cnts_dict obs_ip_hashes_props_dict = ip_object.ip_hashes_props_dict diff --git a/generator/unittests/objects/test_User.py b/generator/unittests/objects/test_User.py index be8c197..92a30bd 100644 --- a/generator/unittests/objects/test_User.py +++ b/generator/unittests/objects/test_User.py @@ -61,9 +61,9 @@ fpath_firstnames = '.' + cons.fpath_llama_firstnames.split(cons.fpath_repo_dir)[1] fpath_lastnames = '.' + cons.fpath_llama_lastnames.split(cons.fpath_repo_dir)[1] -fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] -fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1] -user_object = User(n_user_ids=exp_n_user_ids, start_date=exp_start_date, end_date=exp_end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email) +fpath_countries_europe = '.' + cons.fpath_countries_europe.split(cons.fpath_repo_dir)[1] +fpath_email_domain = '.' + cons.fpath_email_domain.split(cons.fpath_repo_dir)[1] +user_object = User(n_user_ids=exp_n_user_ids, start_date=exp_start_date, end_date=exp_end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countries_europe=fpath_countries_europe, fpath_email_domain=fpath_email_domain) obs_user_ids_cnts_dict = user_object.user_ids_cnts_dict obs_user_ids_props_dict = user_object.user_ids_props_dict diff --git a/generator/unittests/utilities/test_gen_country_codes_dict.py b/generator/unittests/utilities/test_gen_country_codes_dict.py index 0b7dd0e..f13fd23 100644 --- a/generator/unittests/utilities/test_gen_country_codes_dict.py +++ b/generator/unittests/utilities/test_gen_country_codes_dict.py @@ -12,8 +12,8 @@ idhashes = ["a", "b", "c", "d"] exp_prop_dict = {"a": 276, "b": 756, "c": 642, "d": 826} -fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] -obs_prop_dict = gen_country_codes_dict(idhashes=idhashes, fpath_countrieseurope=fpath_countrieseurope) +fpath_countries_europe = '.' + cons.fpath_countries_europe.split(cons.fpath_repo_dir)[1] +obs_prop_dict = gen_country_codes_dict(idhashes=idhashes, fpath_countries_europe=fpath_countries_europe) class Test_gen_country_codes_dict(unittest.TestCase): """""" diff --git a/generator/unittests/utilities/test_gen_country_codes_map.py b/generator/unittests/utilities/test_gen_country_codes_map.py index 9b4d2da..3e84fa5 100644 --- a/generator/unittests/utilities/test_gen_country_codes_map.py +++ b/generator/unittests/utilities/test_gen_country_codes_map.py @@ -18,8 +18,8 @@ 292: 'GI', 492: 'MC', 336: 'VA' } -fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] -obs_country_codes_map = gen_country_codes_map(fpath_countrieseurope=fpath_countrieseurope) +fpath_countries_europe = '.' + cons.fpath_countries_europe.split(cons.fpath_repo_dir)[1] +obs_country_codes_map = gen_country_codes_map(fpath_countries_europe=fpath_countries_europe) class Test_gen_country_codes_map(unittest.TestCase): """""" diff --git a/generator/unittests/utilities/test_gen_obj_idhash_series.py b/generator/unittests/utilities/test_gen_obj_idhash_series.py index 03a1b75..18faa86 100644 --- a/generator/unittests/utilities/test_gen_obj_idhash_series.py +++ b/generator/unittests/utilities/test_gen_obj_idhash_series.py @@ -22,15 +22,15 @@ n_user_ids = cons.unittest_n_entities fpath_firstnames = '.' + cons.fpath_llama_firstnames.split(cons.fpath_repo_dir)[1] fpath_lastnames = '.' + cons.fpath_llama_lastnames.split(cons.fpath_repo_dir)[1] -fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] -fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1] +fpath_countries_europe = '.' + cons.fpath_countries_europe.split(cons.fpath_repo_dir)[1] +fpath_email_domain = '.' + cons.fpath_email_domain.split(cons.fpath_repo_dir)[1] fpath_smartphones = '.' + cons.fpath_smartphones.split(cons.fpath_repo_dir)[1] random.seed(cons.unittest_seed) np.random.seed(cons.unittest_seed) # create user object -user_object = User(n_user_ids=n_user_ids, start_date=start_date, end_date=end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email) +user_object = User(n_user_ids=n_user_ids, start_date=start_date, end_date=end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countries_europe=fpath_countries_europe, fpath_email_domain=fpath_email_domain) # generate random entity counts random_entity_counts = gen_random_entity_counts(user_obj=user_object) # generate random entity values diff --git a/generator/unittests/utilities/test_gen_random_entity_counts.py b/generator/unittests/utilities/test_gen_random_entity_counts.py index 266dd4d..58a5522 100644 --- a/generator/unittests/utilities/test_gen_random_entity_counts.py +++ b/generator/unittests/utilities/test_gen_random_entity_counts.py @@ -21,9 +21,9 @@ fpath_firstnames = '.' + cons.fpath_llama_firstnames.split(cons.fpath_repo_dir)[1] fpath_lastnames = '.' + cons.fpath_llama_lastnames.split(cons.fpath_repo_dir)[1] -fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] -fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1] -user_object = User(n_user_ids=exp_n_user_ids, start_date=exp_start_date, end_date=exp_end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email) +fpath_countries_europe = '.' + cons.fpath_countries_europe.split(cons.fpath_repo_dir)[1] +fpath_email_domain = '.' + cons.fpath_email_domain.split(cons.fpath_repo_dir)[1] +user_object = User(n_user_ids=exp_n_user_ids, start_date=exp_start_date, end_date=exp_end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countries_europe=fpath_countries_europe, fpath_email_domain=fpath_email_domain) exp_randomentity_counts_dict = { 'uid': ['6374692674377254', '6720317315593519', '4264861381989413', '1751409580926382'], diff --git a/generator/unittests/utilities/test_gen_shared_idhashes.py b/generator/unittests/utilities/test_gen_shared_idhashes.py index 15ff768..6afc519 100644 --- a/generator/unittests/utilities/test_gen_shared_idhashes.py +++ b/generator/unittests/utilities/test_gen_shared_idhashes.py @@ -14,8 +14,8 @@ np.random.seed(cons.unittest_seed) obs_prop_shared_idhashes=cons.data_model_shared_entities_dict["ip"] -obs_hash_cnt_dict = gen_idhash_cnt_dict(idhash_type="hash", n=4, lam=1, nbytes=16) -obs_shared_idhashes = gen_shared_idhashes(idhashes_cnts_dict=obs_hash_cnt_dict, prop_shared_idhashes=obs_prop_shared_idhashes) +idhashes = list(gen_idhash_cnt_dict(idhash_type="hash", n=4, lam=1, nbytes=16).keys()) +obs_shared_idhashes = gen_shared_idhashes(idhashes=idhashes, prop_shared_idhashes=obs_prop_shared_idhashes) exp_shared_idhashes = {} class Test_gen_shared_idhashes(unittest.TestCase): diff --git a/generator/utilities/gen_country_codes_dict.py b/generator/utilities/gen_country_codes_dict.py index 96f5faa..4aeac49 100644 --- a/generator/utilities/gen_country_codes_dict.py +++ b/generator/utilities/gen_country_codes_dict.py @@ -10,7 +10,7 @@ @beartype def gen_country_codes_dict( idhashes:List[str], - fpath_countrieseurope:str=cons.fpath_countrieseurope, + fpath_countries_europe:str=cons.fpath_countries_europe, ) -> Dict[str, Union[int, np.int64]]: """ Generates a dictionary of randomLy sampled country codes for an input list of idhashes. @@ -19,8 +19,8 @@ def gen_country_codes_dict( ---------- idhashes : List[str] A list of idhashes. - fpath_countrieseurope : str - The file path to the european countries reference file, default is cons.fpath_countrieseurope. + fpath_countries_europe : str + The file path to the european countries reference file, default is cons.fpath_countries_europe. Returns ------- @@ -33,15 +33,15 @@ def gen_country_codes_dict( import cons idhashes_cnts_dict:{'abcd1234': 5, 'defg4567': 3, 'ghij7891': 7} gen_country_codes_dict(idhashes_cnts_dict=idhashes_cnts_dict, - fpath_countrieseurope=cons.fpath_countrieseurope, + fpath_countries_europe=cons.fpath_countries_europe, ) ``` """ # check file path exists - if os.path.exists(fpath_countrieseurope) == False: - raise FileNotFoundError(f"File not found: {fpath_countrieseurope}") + if os.path.exists(fpath_countries_europe) == False: + raise FileNotFoundError(f"File not found: {fpath_countries_europe}") # load population data of european countries - european_populations_cnt_data = pd.read_csv(filepath_or_buffer=fpath_countrieseurope, usecols=["ISO numeric", "population"],) + european_populations_cnt_data = pd.read_csv(filepath_or_buffer=fpath_countries_europe, usecols=["ISO numeric", "population"],) # convert to a dictionary of ISO country codes with population counts european_populations_cnt_dict = european_populations_cnt_data.set_index("ISO numeric")["population"].to_dict() # convert dictionary of population counts to dictionary of population proportions diff --git a/generator/utilities/gen_country_codes_map.py b/generator/utilities/gen_country_codes_map.py index ef20723..160a5a7 100644 --- a/generator/utilities/gen_country_codes_map.py +++ b/generator/utilities/gen_country_codes_map.py @@ -7,15 +7,15 @@ @beartype def gen_country_codes_map( - fpath_countrieseurope:str=cons.fpath_countrieseurope, + fpath_countries_europe:str=cons.fpath_countries_europe, ) -> Dict[int, Union[str, np.int64]]: """ Generates a dictionary of ISO numeric codes mapping to ISO alpha codes. Parameters ---------- - fpath_countrieseurope : str - The full file path to the european countries reference file, default is cons.fpath_countrieseurope. + fpath_countries_europe : str + The full file path to the european countries reference file, default is cons.fpath_countries_europe. Returns ------- @@ -26,11 +26,11 @@ def gen_country_codes_map( -------- ``` import cons - gen_country_codes_map(fpath_countrieseurope=cons.fpath_countrieseurope) + gen_country_codes_map(fpath_countries_europe=cons.fpath_countries_europe) ``` """ # load european county codes data - country_codes_data = pd.read_csv(filepath_or_buffer=fpath_countrieseurope, usecols=["ISO numeric", "ISO alpha 2"],) + country_codes_data = pd.read_csv(filepath_or_buffer=fpath_countries_europe, usecols=["ISO numeric", "ISO alpha 2"],) # convert data to a dictionary of ISO numeric codes mapping to ISO alpha codes country_codes_map = country_codes_data.set_index("ISO numeric")["ISO alpha 2"].to_dict() return country_codes_map diff --git a/generator/utilities/gen_shared_idhashes.py b/generator/utilities/gen_shared_idhashes.py index a1b880c..f255040 100644 --- a/generator/utilities/gen_shared_idhashes.py +++ b/generator/utilities/gen_shared_idhashes.py @@ -1,41 +1,47 @@ import numpy as np import pandas as pd from beartype import beartype +from typing import Dict, Union, List @beartype def gen_shared_idhashes( - idhashes_cnts_dict:dict, + idhashes:List[str], prop_shared_idhashes:float - ) -> dict: + ) -> Dict[str, str]: """ Generates a dictionary of shared idhashes proportions - + Parameters ---------- - idhashes_cnts_dict : dict - A dictionary of idhashes counts. + idhashes : list of str + A list of idhashes. prop_shared_idhashes : float The total proportion of shared idhashes. - + Returns ------- - dict - A dictionary of shared idhashes proportion. + Dict[str, str] + A dictionary idhashes and their shared idhashes. + + Examples + -------- + ``` + idhashes=['2e23f63807f6170a', 'b8816ed926bf9f83', 'b010fdb44fa68822'] + gen_shared_idhashes(idhashes=idhashes, prop_shared_idhashes=0.01) + ``` """ # calculate the total number of idhashes - n_idhashes = len(idhashes_cnts_dict) + n_idhashes = len(idhashes) # randomly sample the idhashes based on the total proportion of shared idhashes - shared_idhashes_list = list( - np.random.choice( - a=list(idhashes_cnts_dict.keys()), - size=int(np.round(n_idhashes * prop_shared_idhashes)), - replace=False - ) - ) + shared_idhashes_list = np.random.choice( + a=idhashes, + size=int(np.round(n_idhashes * prop_shared_idhashes)), + replace=False + ).tolist() shared_idhash_map_dict = {} - if shared_idhashes_list != []: + if (shared_idhashes_list != []): # determine how many networks - n_groups = int(np.floor(np.sqrt(len(shared_idhashes_list)))) + n_groups = int(np.ceil(np.sqrt(len(shared_idhashes_list)))) group_uniform_dict = {g:np.random.uniform() for g in range(n_groups)} group_prop_dict = {key:value/sum(group_uniform_dict.values()) for key, value in group_uniform_dict.items()} # generate groups for all shared id hashes @@ -43,7 +49,7 @@ def gen_shared_idhashes( shared_idhashes_groups_dict = dict(zip(shared_idhashes_list, shared_idhashes_groups_list)) shared_idhashes_groups_df = pd.Series(shared_idhashes_groups_dict, name="shared_idhashes_group").to_frame().reset_index().rename(columns={'index':'idhash'}) shared_entity_groups_dict = shared_idhashes_groups_df.groupby('shared_idhashes_group').agg({'idhash':list}).to_dict()['idhash'] - shared_idhashes_groups_df['shared_idhash'] = shared_idhashes_groups_df.apply(lambda series: np.random.choice(a=shared_entity_groups_dict[series['shared_idhashes_group']]), axis=1) + shared_idhashes_groups_df['shared_idhash'] = [np.random.choice(shared_entity_groups_dict[group]) for group in shared_idhashes_groups_df['shared_idhashes_group']] # create the shared idhash map dictionary shared_idhash_map_dict = shared_idhashes_groups_df.set_index('idhash')['shared_idhash'].to_dict() return shared_idhash_map_dict diff --git a/generator/utilities/gen_trans_rejection_rates.py b/generator/utilities/gen_trans_rejection_rates.py index af8ab6e..575742e 100644 --- a/generator/utilities/gen_trans_rejection_rates.py +++ b/generator/utilities/gen_trans_rejection_rates.py @@ -1,70 +1,71 @@ import pandas as pd import cons from beartype import beartype +from typing import Dict @beartype def gen_trans_rejection_rates( trans_data:pd.DataFrame, - fpath_countrieseurope=cons.fpath_countrieseurope, - fpath_countrycrimeindex=cons.fpath_countrycrimeindex, - fpath_domain_email=cons.fpath_domain_email - ) -> dict: + fpath_countries_europe:str=cons.fpath_countries_europe, + fpath_countrycrimeindex:str=cons.fpath_countrycrimeindex, + fpath_email_domain:str=cons.fpath_email_domain, + ) -> Dict[str, Dict[str, float]]: """ Generates the transaction rejection rates based on features within the transaction level telecom payments data. - + Parameters ---------- trans_data : pandas.DataFrame The transaction level telecom payments data. - fpath_countrieseurope : str - The file path to the europe countries reference data, default is cons.fpath_countrieseurope. + fpath_countries_europe : str + The file path to the europe countries reference data, default is cons.fpath_countries_europe. fpath_countrycrimeindex : str The file path to the country crime index reference data, default is cons.fpath_countrycrimeindex. - fpath_domain_email :str - The file path to the email domains reference data, default is cons.fpath_domain_email. - + fpath_email_domain :str + The file path to the email domains reference data, default is cons.fpath_email_domain. + Returns ------- dict The rejection rates based on features within the transaction level telecom payments data. """ - # create empty dictionary to hold rejection rates + # initialize dictionary to store all computed rejection rates rejection_rates_dict = {} - + # generate country code rejection based rates - countrieseurope = pd.read_csv(fpath_countrieseurope, usecols=["ISO numeric", "ISO alpha 2"]) + countrieseurope = pd.read_csv(fpath_countries_europe, usecols=["ISO alpha 2"]) countrycrimeindex = pd.read_csv(fpath_countrycrimeindex, usecols=["country_code", "crime_index"]) europecountrycrimeindex = pd.merge(left=countrieseurope, right=countrycrimeindex, left_on="ISO alpha 2", right_on="country_code", how="left",) europecountrycrimeindex["trans_reject_rate"] = europecountrycrimeindex["crime_index"].divide(europecountrycrimeindex["crime_index"].sum()) country_code_trans_reject_rate_dict = europecountrycrimeindex.set_index("ISO alpha 2")["trans_reject_rate"].to_dict() rejection_rates_dict["country_code_trans_reject_rate_dict"] = country_code_trans_reject_rate_dict - + # generate domain email rejection based rates - domain_email = pd.read_csv(fpath_domain_email, usecols=["domain", "proportion"]) + domain_email = pd.read_csv(fpath_email_domain, usecols=["domain", "proportion"]) domain_email["trans_reject_rate"] = (1 - domain_email["proportion"]) / (1 - domain_email["proportion"]).sum() domain_email_trans_reject_rate_dict = domain_email.set_index("domain")["trans_reject_rate"].to_dict() rejection_rates_dict["domain_email_trans_reject_rate_dict"] = domain_email_trans_reject_rate_dict - + # generate shared entities with rejection rates dictionary shared_devices = (trans_data.groupby(by="device_hash").agg({"userid": "nunique"}).sort_values(by="userid")) shared_ips = (trans_data.groupby(by="ip_hash").agg({"userid": "nunique"}).sort_values(by="userid")) shared_cards = (trans_data.groupby(by="card_hash").agg({"userid": "nunique"}).sort_values(by="userid")) - shared_devices_reject_rate_dict = shared_devices.divide(shared_devices["userid"].sum()).to_dict()["userid"] + shared_devices_reject_rate_dict = shared_devices.divide(shared_devices["userid"].sum())["userid"].to_dict() shared_ips_reject_rate_dict = shared_ips.divide(shared_ips["userid"].sum()).to_dict()["userid"] shared_cards_reject_rate_dict = shared_cards.divide(shared_cards["userid"].sum()).to_dict()["userid"] rejection_rates_dict["shared_devices_reject_rate_dict"] = shared_devices_reject_rate_dict rejection_rates_dict["shared_ips_reject_rate_dict"] = shared_ips_reject_rate_dict rejection_rates_dict["shared_cards_reject_rate_dict"] = shared_cards_reject_rate_dict - + # generate occurrence based rejection rates count_devices = (trans_data.groupby(by="userid").agg({"device_hash": "nunique"}).sort_values(by="device_hash")) count_ips = (trans_data.groupby(by="userid").agg({"ip_hash": "nunique"}).sort_values(by="ip_hash")) count_cards = (trans_data.groupby(by="userid").agg({"card_hash": "nunique"}).sort_values(by="card_hash")) - count_devices_reject_rate_dict = count_devices.divide(count_devices["device_hash"].sum()).to_dict()["device_hash"] + count_devices_reject_rate_dict = count_devices.divide(count_devices["device_hash"].sum())["device_hash"].to_dict() count_ips_reject_rate_dict = count_ips.divide(count_ips["ip_hash"].sum()).to_dict()["ip_hash"] count_cards_reject_rate_dict = count_cards.divide(count_cards["card_hash"].sum()).to_dict()["card_hash"] rejection_rates_dict["count_devices_reject_rate_dict"] = count_devices_reject_rate_dict rejection_rates_dict["count_ips_reject_rate_dict"] = count_ips_reject_rate_dict rejection_rates_dict["count_cards_reject_rate_dict"] = count_cards_reject_rate_dict - + return rejection_rates_dict diff --git a/generator/utilities/gen_user_names_file.py b/generator/utilities/gen_user_names_file.py index 22671d3..c8765fe 100644 --- a/generator/utilities/gen_user_names_file.py +++ b/generator/utilities/gen_user_names_file.py @@ -139,7 +139,7 @@ def invoke_bedrock( bedrock = Bedrock(session=session, model_region="us-east-1", model_id="meta.llama3-70b-instruct-v1:0") # load countries, firstnames and surnames files - countrieseurope = pd.read_csv(cons.fpath_countrieseurope, usecols=['name', 'ISO numeric']) + countrieseurope = pd.read_csv(cons.fpath_countries_europe, usecols=['name', 'ISO numeric']) orig_firstnames = pd.read_csv(cons.fpath_firstnames) orig_surnames = pd.read_csv(cons.fpath_lastnames) From 1e5aee0233bd8d0b7833bb9e5ea3b2f9bf0b0983 Mon Sep 17 00:00:00 2001 From: Oisin Date: Sat, 17 Jan 2026 16:35:32 +0000 Subject: [PATCH 15/16] Reviewed scripts using copilot --- data/unittest/transaction_data.parquet | Bin 30584 -> 30585 bytes generator/main.py | 2 +- generator/utilities/Bedrock.py | 2 +- generator/utilities/align_country_codes.py | 2 +- generator/utilities/gen_obj_idhash_series.py | 2 +- .../utilities/gen_random_poisson_power.py | 2 +- generator/utilities/gen_shared_idhashes.py | 2 +- .../utilities/gen_trans_rejection_rates.py | 3 +- generator/utilities/gen_trans_status.py | 30 ++++++++++-------- generator/utilities/input_error_handling.py | 25 +++++++++------ 10 files changed, 41 insertions(+), 29 deletions(-) diff --git a/data/unittest/transaction_data.parquet b/data/unittest/transaction_data.parquet index 0da3bed32bc74373ffbfeb5b45757c295f2ee073..47b78f12a50db976630997bc7713347cfe14e7f6 100644 GIT binary patch delta 455 zcmV;&0XY8n?g9Dk0kC{T1k3>y(vyHhCVyt|Wdi^J0NMos0wq8ZRRsiAKoI}|RS-o) z1VvE<5fBv=QBXxx6%hbHK@|i55fBytH$_B10Tfk102D$21ONdPKoCR(0R;tM5Kur= zKmZU#L_i2YK?D&*fdvH>1W`}~1rPxNQ~>}L1Qq-N73Kj<6$%vq5&!^xbQu5u0Fxv| z8EF#<9Y8TUQ+0E2Wn*+@WJYpfbz~9=7d$gMRdZ!>Lt$=XWo&F^WEKe)NHjW0ZgX{J zW@%$-Wo~pvb#7#H4H`%=Izw-6Ze?S1X>V>+X>Db1b#xT~6_5fIm;x*n+65I56$TXs z95%F*F;Oyq004*q@P{z~001SYzzwJGuvAUbx|lK)sDFn2x)uR9ROboyY2NR7TkXuV zb8){_*JrI|%-Ax`>mC9FHvob_!@@-g2v7*ZB?&VGqy$D#7AX`7F(U(en6BI%Y0TF7OmOzlg5Jr& delta 480 zcmezQj`7Dk#tk(tET#8s4ql|jIPRZvhgAkf9tkwq|o)xkl4F@Qyc)xniDgh5as#DS58 zg;7Avfss{^F_Do0C@CVsD#G9p!YIlj`iD{U38RlFmnee}14CVjB+y#d$!e|=^4@H96%`O=6J?VzTfn3t z=4xqxp?dN@SK0bb#t$9(Ko`Yso%>z(_4g;f>GwWwyz+Zy%h~gxHd9kPT0nUdWOcR+V37E1 pd.Series: """ Aligns inconsistent registration, ip and card country codes to have mostly common values; with a random chance of inconsistencies. diff --git a/generator/utilities/gen_obj_idhash_series.py b/generator/utilities/gen_obj_idhash_series.py index cbefbab..8900b04 100644 --- a/generator/utilities/gen_obj_idhash_series.py +++ b/generator/utilities/gen_obj_idhash_series.py @@ -5,7 +5,7 @@ @beartype def gen_obj_idhash_series( idhashes:List[str], - n_counts_series:pd.Series + n_counts_series:pd.Series, ) -> pd.Series: """ Generates a series of entity idhash lists using the entity counts per user Series and idhashes list. diff --git a/generator/utilities/gen_random_poisson_power.py b/generator/utilities/gen_random_poisson_power.py index e915046..383b9b6 100644 --- a/generator/utilities/gen_random_poisson_power.py +++ b/generator/utilities/gen_random_poisson_power.py @@ -6,7 +6,7 @@ def gen_random_poisson_power( lam:Union[int,float], size:Union[int,np.int64], - power:int + power:int, ) -> np.ndarray: """ Generates data from a polynomial random poisson variable to a given power. diff --git a/generator/utilities/gen_shared_idhashes.py b/generator/utilities/gen_shared_idhashes.py index f255040..e2901ed 100644 --- a/generator/utilities/gen_shared_idhashes.py +++ b/generator/utilities/gen_shared_idhashes.py @@ -6,7 +6,7 @@ @beartype def gen_shared_idhashes( idhashes:List[str], - prop_shared_idhashes:float + prop_shared_idhashes:float, ) -> Dict[str, str]: """ Generates a dictionary of shared idhashes proportions diff --git a/generator/utilities/gen_trans_rejection_rates.py b/generator/utilities/gen_trans_rejection_rates.py index 575742e..f24ce63 100644 --- a/generator/utilities/gen_trans_rejection_rates.py +++ b/generator/utilities/gen_trans_rejection_rates.py @@ -1,5 +1,6 @@ -import pandas as pd import cons + +import pandas as pd from beartype import beartype from typing import Dict diff --git a/generator/utilities/gen_trans_status.py b/generator/utilities/gen_trans_status.py index 492d88d..35fbbd3 100644 --- a/generator/utilities/gen_trans_status.py +++ b/generator/utilities/gen_trans_status.py @@ -1,36 +1,38 @@ +import cons + import random import numpy as np import pandas as pd -import cons from beartype import beartype +from typing import List, Dict, Union @beartype def gen_trans_status( series:pd.Series, - rejection_rates_dict:dict, - rejection_scaling_factor:int=2 - ) -> list: + rejection_rates_dict:Dict[str, Dict[str, float]], + rejection_scaling_factor:int=2, + ) -> List[Union[str, float]]: """ Generates the transaction status for a pandas series from the transaction level telecom payments data given the rejection rates dictionary from the same data. - + Parameters ---------- series : pandas.Series A pandas series from the transaction level telecom payments data. - rejection_rates_dict : dict + rejection_rates_dict : Dict[str, Dict[str, float]] Rejection rates generated the transaction level telecom payments data. rejection_scaling_factor : int A multiplicative scaling factor for rejection rates, default is 2. - + Returns ------- - list - The transaction status for the pandas series. + List[str] + The transaction status and error code. """ # set country code columns country_code_columns = ["registration_country_code","ip_country_code","card_country_code"] - - if series['card_hash'] == series['card_hash']: + # if card hash + if pd.notna(series['card_hash']): status = "rejected" # add rejections based on crime rates within country codes if rejection_rates_dict["country_code_trans_reject_rate_dict"][np.random.choice(a=series[country_code_columns].dropna().to_list(), size=1)[0]] >= random.uniform(0, 1)/rejection_scaling_factor: @@ -42,7 +44,7 @@ def gen_trans_status( elif cons.data_model_inconsistent_country_codes_rejection_rate[series[country_code_columns].dropna().nunique()] >= random.uniform(0, 1)/rejection_scaling_factor: error_code = np.random.choice(a=list(cons.data_model_rejection_codes_connection.keys()),p=list(cons.data_model_rejection_codes_connection.values()),size=1)[0] # add rejections based on shared ips, cards and devices - elif series["device_hash"] == series["device_hash"] and rejection_rates_dict["shared_devices_reject_rate_dict"][series["device_hash"]] >= random.uniform(0, 1)/rejection_scaling_factor: + elif pd.notna(series["device_hash"]) and rejection_rates_dict["shared_devices_reject_rate_dict"][series["device_hash"]] >= random.uniform(0, 1)/rejection_scaling_factor: error_code = np.random.choice(a=list(cons.data_model_rejection_codes_fraud.keys()),p=list(cons.data_model_rejection_codes_fraud.values()),size=1)[0] elif series["ip_hash"] == series["ip_hash"] and rejection_rates_dict["shared_ips_reject_rate_dict"][series["ip_hash"]] >= random.uniform(0, 1)/rejection_scaling_factor: error_code = np.random.choice(a=list(cons.data_model_rejection_codes_fraud.keys()),p=list(cons.data_model_rejection_codes_fraud.values()),size=1)[0] @@ -57,7 +59,9 @@ def gen_trans_status( error_code = np.random.choice(a=list(cons.data_model_rejection_codes_funds.keys()),p=list(cons.data_model_rejection_codes_funds.values()),size=1)[0] # otherwise return successful status else: - status = np.random.choice(a=['successful', 'pending'], size=1, p=[0.98, 0.02])[0] + successful_status = {key:cons.data_model_transaction_status[key] for key in ['successful', 'pending']} + successful_probs = [value/sum(successful_status.values()) for value in successful_status.values()] + status = np.random.choice(a=list(successful_status.keys()), size=1, p=successful_probs)[0] error_code = np.nan else: status = np.random.choice(a=['successful', 'pending'], size=1, p=[0.98, 0.02])[0] diff --git a/generator/utilities/input_error_handling.py b/generator/utilities/input_error_handling.py index ffce36a..4437ad6 100644 --- a/generator/utilities/input_error_handling.py +++ b/generator/utilities/input_error_handling.py @@ -1,29 +1,36 @@ from beartype import beartype +from typing import Dict @beartype def input_error_handling( - input_params_dict:dict + input_params_dict:Dict[str, object], ) -> int: """ Runs error handling on the input params dictionary. - + Parameters ---------- - input_params_dict : dict + input_params_dict : Dict[str, object] A dictionary of input parameters. - + Returns ------- int Returns 0 for successful completion, otherwise returns value errors depending on failed input parameter check. + + Examples + -------- + ``` + input_params_dict = {'n_users': 1000, 'use_random_seed': 1, 'n_itr': 10} + input_error_handling(input_params_dict=input_params_dict) + ``` """ # check if the n users parameter is positive - if not input_params_dict["n_users"] >= 1: + if not ((input_params_dict["n_users"] >= 1) and (isinstance(input_params_dict["n_users"], int))): raise ValueError(f"Invalid n_users parameter value {input_params_dict['n_users']}; must be a integer >= 1.") # check if the random seed is either 0 or 1 - if not input_params_dict["use_random_seed"] in (0, 1): - raise ValueError(f"Invalid random_seed use_random_seed value {input_params_dict['use_random_seed']}; must be either 0 or 1.") + if not ((input_params_dict["use_random_seed"] in (0, 1)) and (isinstance(input_params_dict["use_random_seed"], int))): + raise ValueError(f"Invalid use_random_seed value {input_params_dict['use_random_seed']}; must be either 0 or 1.") # check if the number of iterations is greater than or equal to 1 - if not input_params_dict["n_itr"] >= 1: + if not ((input_params_dict["n_itr"] >= 1) and (isinstance(input_params_dict["n_itr"], int))): raise ValueError(f"Invalid n_itr parameter value {input_params_dict['n_itr']}; must be an integer >= 1.") - return 0 From 4f3de2dcf24101adb5cf16a21323f0abb1f306e0 Mon Sep 17 00:00:00 2001 From: Oisin Date: Sat, 17 Jan 2026 17:59:13 +0000 Subject: [PATCH 16/16] Reviewed scripts using copilot --- generator/main.py | 1 + generator/utilities/input_error_handling.py | 7 +---- generator/utilities/join_idhashes_dict.py | 14 +++++---- generator/utilities/multiprocess.py | 31 ++++++++++--------- .../utilities/remove_duplicate_idhashes.py | 11 ++++--- 5 files changed, 33 insertions(+), 31 deletions(-) diff --git a/generator/main.py b/generator/main.py index a6d8943..6c1a101 100644 --- a/generator/main.py +++ b/generator/main.py @@ -35,6 +35,7 @@ ( input_params_dict['n_users'], None if input_params_dict['use_random_seed'] == 0 else itr, + 20000, input_params_dict['registration_start_date'], input_params_dict['registration_end_date'], input_params_dict['transaction_start_date'], diff --git a/generator/utilities/input_error_handling.py b/generator/utilities/input_error_handling.py index 4437ad6..20c93cc 100644 --- a/generator/utilities/input_error_handling.py +++ b/generator/utilities/input_error_handling.py @@ -4,7 +4,7 @@ @beartype def input_error_handling( input_params_dict:Dict[str, object], - ) -> int: + ): """ Runs error handling on the input params dictionary. @@ -13,11 +13,6 @@ def input_error_handling( input_params_dict : Dict[str, object] A dictionary of input parameters. - Returns - ------- - int - Returns 0 for successful completion, otherwise returns value errors depending on failed input parameter check. - Examples -------- ``` diff --git a/generator/utilities/join_idhashes_dict.py b/generator/utilities/join_idhashes_dict.py index 04663a8..c5c6e1b 100644 --- a/generator/utilities/join_idhashes_dict.py +++ b/generator/utilities/join_idhashes_dict.py @@ -1,32 +1,34 @@ +import numpy as np import pandas as pd from beartype import beartype +from typing import Dict, Union @beartype def join_idhashes_dict( data:pd.DataFrame, - idhashes_dict:dict, + idhashes_dict:Dict[Union[str, int], object], idhash_key_name:str, - idhash_val_name:str + idhash_val_name:str, ): """ Joins an entity attribute dictionary to either the user or transaction data. - + Parameters ---------- data : pd.DataFrame The user or transaction data. - idhashes_dict : dict + idhashes_dict : Dict[Union[str, int], object] The entity attribute dictionary with an idhash as the key for joining to the user or transaction data. idhash_key_name : str The name of the idhash key for joining to the user or transaction data. idhash_val_name : str The name to set for the idhash attribute when joining to the user or transaction data. - + Returns ------- pd.DataFrame The user or transaction data returned with the joined idhash attribute dictionary values. """ - idhashes_df = pd.Series(idhashes_dict, name=idhash_val_name).to_frame().reset_index().rename(columns={'index':idhash_key_name}) + idhashes_df = pd.DataFrame(list(idhashes_dict.items()), columns=[idhash_key_name, idhash_val_name]) idhashes_join = pd.merge(left=data, right=idhashes_df, on=idhash_key_name, how='left') return idhashes_join \ No newline at end of file diff --git a/generator/utilities/multiprocess.py b/generator/utilities/multiprocess.py index f8595bb..6a02bd2 100644 --- a/generator/utilities/multiprocess.py +++ b/generator/utilities/multiprocess.py @@ -1,34 +1,37 @@ import os from multiprocessing import Pool from beartype import beartype +from typing import List, Any @beartype def multiprocess( func, - args:list, - ncpu:int=os.cpu_count() - ) -> list: + args:List[tuple], + ncpu:int=None, + ) -> List[Any]: """ - Generates a dictionary of random dates for an input dictionary of idhashes counts + Generates a dictionary of random dates for an input dictionary of idhashes counts by utilizing multiprocessing. Parameters ---------- - func : + func : Callable[..., Any] The function to be executed in parallel - args : list - The input parameters as a list of tuples to be passed with the function in parallel + args : List[tuple] + The input parameters as a list of tuples to be passed with the function in parallel via starmap. ncpu : int - The number of cpus to execute across, default is os.cpu_count(). + The number of cpus to execute across, default is None. Returns ------- - list + List[Any] A list of output returned from the func calls ran in parallel """ + # set number of cpus + if ncpu is None: + ncpu = os.cpu_count() # initialize a pool of ncpus - pool = Pool(ncpu) - # execution given function and arguments across pool of ncpus - results = pool.starmap(func, args) - # close pool of ncpus - pool.close() + results = [] + with Pool(ncpu) as pool: + # execution given function and arguments across pool of ncpus + results = pool.starmap(func, args) return results diff --git a/generator/utilities/remove_duplicate_idhashes.py b/generator/utilities/remove_duplicate_idhashes.py index 820d77b..115f00b 100644 --- a/generator/utilities/remove_duplicate_idhashes.py +++ b/generator/utilities/remove_duplicate_idhashes.py @@ -5,17 +5,18 @@ @beartype def remove_duplicate_idhashes( user_data:pd.DataFrame, - idhash_col:str + idhash_col:str, ): - """Removes duplicate idhashes from a given idhash column. - + """ + Removes duplicate idhashes from a given idhash column. + Parameters ---------- user_data : pandas.DataFrame The user level telecom payments data. idhash_col : str The column with duplicate idhashes to be removed. - + Returns ------- pandas.DataFrame @@ -30,5 +31,5 @@ def remove_duplicate_idhashes( # overwrite series with empty lists tmp_data[idhash_col] = np.nan tmp_data[idhash_col] = tmp_deduplicate_series - tmp_data[idhash_col] = tmp_data[idhash_col].apply(lambda x: x if x == x else []) + tmp_data[idhash_col] = tmp_data[idhash_col].apply(lambda x: x if pd.notnull(x) else []) return tmp_data