diff --git a/.gitignore b/.gitignore index c864c2a..af913c2 100644 --- a/.gitignore +++ b/.gitignore @@ -177,3 +177,4 @@ poetry.lock codegreen_core/tools/test.py codegreen_core/data/test.py +tests/test_notebook.ipynb \ No newline at end of file diff --git a/README.md b/README.md index 4a2d457..57bb523 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,16 @@ This repository contains the main functionality of the codegreen project. The complete documentation including installation and usage are available on the [documentation website](https://codegreen-framework.github.io/codegreen-core/). +# Development + +## Installation +- `git clone` +- install poetry +- install in editable mode : `poetry install` + +## Github workflows +Changes in the repo also triggers github actions + ## Development workflow - the `release` branch contains the latest stable version of the released python package - the `main` branch contains stable, tested code ready to be released. diff --git a/codegreen_core/__init__.py b/codegreen_core/__init__.py index 6c12424..7652b2d 100644 --- a/codegreen_core/__init__.py +++ b/codegreen_core/__init__.py @@ -1,3 +1,5 @@ -from .utilities.config import Config - -Config.load_config() +from . import utilities +from . import data +from . import tools +from . import models +utilities.config.Config.load_config() \ No newline at end of file diff --git a/codegreen_core/data/entsoe.py b/codegreen_core/data/entsoe.py index 7b9e308..54a0b78 100644 --- a/codegreen_core/data/entsoe.py +++ b/codegreen_core/data/entsoe.py @@ -117,17 +117,23 @@ def _refine_data(options, data1): data1.sort_index(inplace=True) return {"data": data1, "refine_logs": refine_logs} +def _convert_local_to_utc(dte): + # datetime obj is converted from local time zone to utc + local_timezone = datetime.now().astimezone().tzinfo + return pd.Timestamp(dte,tz=local_timezone).tz_convert('UTC') def _entsoe_get_actual_generation(options={"country": "", "start": "", "end": ""}): """Fetches the aggregated actual generation per production type data (16.1.B&C) for the given country within the given start and end date params: options = {country (2 letter country code),start,end} . Both the dates are in the YYYYMMDDhhmm format and the local time zone returns : {"data":pd.DataFrame, "duration":duration (in min) of the time series data, "refine_logs":"notes on refinements made" } """ + utc_start = _convert_local_to_utc(options["start"]) + utc_end = _convert_local_to_utc(options["end"]) client1 = entsoePandas(api_key=_get_API_token()) data1 = client1.query_generation( options["country"], - start=pd.Timestamp(options["start"], tz="UTC"), - end=pd.Timestamp(options["end"], tz="UTC"), + start = utc_start , + end = utc_end , psr_type=None, ) # drop columns with actual consumption values (we want actual aggregated generation values) @@ -159,8 +165,8 @@ def _entsoe_get_total_forecast(options={"country": "", "start": "", "end": ""}): client = entsoePandas(api_key=_get_API_token()) data = client.query_generation_forecast( options["country"], - start=pd.Timestamp(options["start"], tz="UTC"), - end=pd.Timestamp(options["end"], tz="UTC"), + start=_convert_local_to_utc(options["start"]) , + end=_convert_local_to_utc(options["end"]) ) # if the data is a series instead of a dataframe, it will be converted to a dataframe if isinstance(data, pd.Series): @@ -188,8 +194,8 @@ def _entsoe_get_wind_solar_forecast(options={"country": "", "start": "", "end": client = entsoePandas(api_key=_get_API_token()) data = client.query_wind_and_solar_forecast( options["country"], - start=pd.Timestamp(options["start"], tz="UTC"), - end=pd.Timestamp(options["end"], tz="UTC"), + start=_convert_local_to_utc(options["start"]) , + end=_convert_local_to_utc(options["end"]) ) durationMin = (data.index[1] - data.index[0]).total_seconds() / 60 # refining the data @@ -225,6 +231,10 @@ def _convert_to_60min_interval(rawData): # determining how many rows need to be combined to get data in 60 min format. groupingFactor = int(60 / duration) oldData = rawData["data"] + # check if there is enough data to convert to 60 min + if (len(oldData) < groupingFactor): + raise ValueError("Data cannot be converted into 60 min interval since there is inadequate number of rows in the data") + oldData["startTimeUTC"] = pd.to_datetime(oldData["startTimeUTC"]) start_time = oldData["startTimeUTC"].min() end_time = oldData["startTimeUTC"].max() @@ -246,9 +256,19 @@ def _convert_to_60min_interval(rawData): def _convert_date_to_entsoe_format(dt: datetime): + """ rounds the date to nearest hour """ return dt.replace(minute=0, second=0, microsecond=0).strftime("%Y%m%d%H%M") +def _format_energy_data(df): + start_time_column = df.pop("startTimeUTC") + df.insert(0, "startTime", start_time_column) + local_timezone = datetime.now().astimezone().tzinfo + df["startTime"] = pd.to_datetime(df["startTime"], format="%Y%m%d%H%M").dt.tz_localize("UTC").dt.tz_convert(local_timezone) + df.insert(1, "startTimeUTC", start_time_column) + return df + + # the main methods @@ -260,6 +280,7 @@ def get_actual_production_percentage(country, start, end, interval60=False) -> d :param str country: The 2 alphabet country code. :param datetime start: The start date for data retrieval. A Datetime object. Note that this date will be rounded to the nearest hour. :param datetime end: The end date for data retrieval. A datetime object. This date is also rounded to the nearest hour. + :param boolean interval60: To convert the data into 60 min time interval. False by default :return: A DataFrame containing the hourly energy production mix and percentage of energy generated from renewable and non renewable sources. :return: A dictionary containing: - `error`: A string with an error message, empty if no errors. @@ -269,12 +290,32 @@ def get_actual_production_percentage(country, start, end, interval60=False) -> d :rtype: dict """ try: + if not isinstance(country, str): + raise ValueError("Invalid country") + if not isinstance(start, datetime): + raise ValueError("Invalid start date") + if not isinstance(end, datetime): + raise ValueError("Invalid end date") + + if start > datetime.now(): + raise ValueError("Invalid start date. Generation data is only available for the past and not the future. Use the forecast API instead") + + if start > end : + raise ValueError("Invalid date range. End date must be greater than the start date") + + # if end date is in the future and the start date is in the past , only data till the available moment will be returned. + if end > datetime.now(): + raise ValueError("Invalid end date. Generation data is only available for the past and not the future. Use the forecast API instead") + # this is not allowed because the entsoe-py returns error if it's greater than the present + #warnings.warn("End date is in the future. Will fetch data only till the present") + options = { "country": country, - "start": start, - "end": end, + "start": start.replace(minute=0,second=0), + "end": end.replace(second=0,minute=0), "interval60": interval60, } + # print(options) # get actual generation data per production type and convert it into 60 min interval if required totalRaw = _entsoe_get_actual_generation(options) total = totalRaw["data"] @@ -327,18 +368,18 @@ def get_actual_production_percentage(country, start, end, interval60=False) -> d table[fieldName] = table[fieldName].astype(int) return { - "data": table, + "data": _format_energy_data(table), "data_available": True, - "time_interval": totalRaw["duration"], + "time_interval": duration, } except Exception as e: - print(e) + # print(e) print(traceback.format_exc()) return { "data": None, "data_available": False, - "error": Exception, - "time_interval": totalRaw["duration"], + "error": e, + "time_interval": 0, } @@ -364,6 +405,13 @@ def get_forecast_percent_renewable( """ try: # print(country,start,end) + if not isinstance(country, str): + raise ValueError("Invalid country") + if not isinstance(start, datetime): + raise ValueError("Invalid start date") + if not isinstance(end, datetime): + raise ValueError("Invalid end date") + start = _convert_date_to_entsoe_format(start) end = _convert_date_to_entsoe_format(end) options = {"country": country, "start": start, "end": end} @@ -390,7 +438,7 @@ def get_forecast_percent_renewable( windsolar["startTimeUTC"], format="%Y%m%d%H%M" ) windsolar["posix_timestamp"] = windsolar["startTimeUTC"].astype(int) // 10**9 - return {"data": windsolar, "data_available": True, "time_interval": 60} + return {"data": _format_energy_data(windsolar), "data_available": True, "time_interval": 60} except Exception as e: print(e) print(traceback.format_exc()) diff --git a/codegreen_core/data/main.py b/codegreen_core/data/main.py index de0fe22..edd5d64 100644 --- a/codegreen_core/data/main.py +++ b/codegreen_core/data/main.py @@ -3,10 +3,12 @@ from ..utilities.message import Message, CodegreenDataError from ..utilities import metadata as meta -from . import entsoe as et +from ..utilities.config import Config +from . import entsoe as et +from . import offline as off -def energy(country, start_time, end_time, type="generation", interval60=True) -> dict: +def energy(country, start_time, end_time, type="generation") -> dict: """ Returns hourly time series of energy production mix for a specified country and time range. @@ -19,8 +21,9 @@ def energy(country, start_time, end_time, type="generation", interval60=True) -> ========================== ========== ================================================================ Column type Description ========================== ========== ================================================================ - startTimeUTC datetime Start date in UTC (60 min interval) - Biomass float64 + startTimeUTC object Start date in UTC (format YYYYMMDDhhmm) + startTime datetime Start time in local timezone + Biomass float64 Fossil Hard coal float64 Geothermal float64 ....more energy sources float64 @@ -47,11 +50,13 @@ def energy(country, start_time, end_time, type="generation", interval60=True) -> :param datetime start_time: The start date for data retrieval. A Datetime object. Note that this date will be rounded to the nearest hour. :param datetime end_time: The end date for data retrieval. A datetime object. This date is also rounded to the nearest hour. :param str type: The type of data to retrieve; either 'generation' or 'forecast'. Defaults to 'generation'. + :param boolean interval60: To fix the time interval of data to 60 minutes. True by default. Only applicable for generation data + :return: A dictionary containing: - `error`: A string with an error message, empty if no errors. - `data_available`: A boolean indicating if data was successfully retrieved. - `data`: A pandas DataFrame containing the energy data if available, empty DataFrame if not. - - `time_interval` : the time interval of the DataFrame + - `time_interval` : the time interval of the DataFrame :rtype: dict """ if not isinstance(country, str): @@ -70,11 +75,27 @@ def energy(country, start_time, end_time, type="generation", interval60=True) -> e_source = meta.get_country_energy_source(country) if e_source == "ENTSOE": if type == "generation": - return et.get_actual_production_percentage( - country, start_time, end_time, interval60 - ) + """ + let local_found= false + see if caching is enabled, if yes, first check in the cache + if not, + check if offline data is enabled + if yes, check is data is available locally + if no, go online + """ + offline_data = off.get_offline_data(country,start_time,end_time) + if offline_data["available"] is True and offline_data["partial"] is False and offline_data["data"] is not None: + # todo fix this if partial get remaining data and merge instead of fetching the complete data + return {"data":offline_data["data"],"data_available":True,"error":"None","time_interval":60,"source":offline_data["source"]} + else: + energy_data = et.get_actual_production_percentage(country, start_time, end_time, interval60=True) + energy_data["data"] = energy_data["data"] + energy_data["source"] = "public_data" + return energy_data elif type == "forecast": - return et.get_forecast_percent_renewable(country, start_time, end_time) + energy_data = et.get_forecast_percent_renewable(country, start_time, end_time) + energy_data["data"] = energy_data["data"] + return energy_data else: raise CodegreenDataError(Message.NO_ENERGY_SOURCE) return None diff --git a/codegreen_core/data/offline.py b/codegreen_core/data/offline.py new file mode 100644 index 0000000..e512a40 --- /dev/null +++ b/codegreen_core/data/offline.py @@ -0,0 +1,314 @@ +import os +import json +import redis +import pandas as pd +from datetime import datetime, timedelta + +from ..utilities.config import Config +from ..utilities import metadata as meta + +from . import entsoe as et +from ..utilities.log import log_stuff + + +def _get_redis_client(redis_url): + try: + return redis.from_url(redis_url, decode_responses=True) + except redis.RedisError as e: + print(f"Redis connection error: {e}") + return None + + +def _get_key_from_redis(redis_url, key): + client = _get_redis_client(redis_url) + if client: + try: + return client.get(key) # Returns None if key does not exist + except redis.RedisError as e: + print(f"Redis error: {e}") + return None + + +def _set_key_in_redis(redis_url, key, value, expiry=None): + client = _get_redis_client(redis_url) + if client: + try: + if expiry: + client.set(key, value, ex=expiry) # Set key with expiry + else: + client.set(key, value) # Set key without expiry + except redis.RedisError as e: + print(f"Redis error: {e}") + + +def _get_country_key(country_code): + """Returns the key name for the given country to be stored in redis cache""" + return "codegreen_generation_public_data_"+ country_code + +def _round_to_nearest_hour(dt): + """ Rounds a given datetime to the nearest hour.""" + return dt.replace(minute=0, second=0, microsecond=0) + +def _get_time_range(nHours): + """ Returns a tuple (start_date, end_date) where: start_date is current datetime minus nHours, end_date is the current datetime """ + end_date = _round_to_nearest_hour(datetime.now().replace(microsecond=0)) + start_date = end_date - timedelta(hours=nHours) + return start_date, end_date + +def _gather_energy_data(country, start_time, end_time): + """ Gets energy data form public energy sources (online) """ + energy_data = et.get_actual_production_percentage(country, start_time, end_time,interval60=True)["data"] + return energy_data + +def _get_filtered_data(dataframe, start_time, end_time): + """Function that returns a tuple (partial: True/False, data: DataFrame/None) indicating if the data is partially available and the corresponding data. + """ + if dataframe.empty: + return (False, None) + + # Convert startTime column to datetime + dataframe["startTime"] = pd.to_datetime(dataframe["startTime"]) + dataframe["startTime"] = dataframe["startTime"].dt.tz_localize(None) + + # Determine the available data range + data_start = dataframe["startTime"].min() + data_end = dataframe["startTime"].max() + + start_time_1 = start_time.replace(minute=0, second=0, microsecond=0) + end_time_1 = end_time.replace(minute=0, second=0, microsecond=0) + + # Check different cases for data availability + if end_time_1 < data_start or start_time_1 > data_end: + return (False, None) # No data available + + filtered_df = dataframe[(dataframe["startTime"] >= start_time_1) & (dataframe["startTime"] <= end_time_1)] + + partial = not (start_time_1 >= data_start and end_time_1 <= data_end) + + return (partial, filtered_df if not filtered_df.empty else None) + + +def _sync_offline_file(country): + if not Config.get("enable_offline_energy_generation"): + raise Exception("This method cannot be used to get data since enable_offline_energy_generation option is not enabled") + + # print("syncs offline file for the given country") + time_config = Config.get("offline_data_start_date") + # print(time_config) + start_time = datetime.strptime(time_config,"%Y-%m-%d") + base_dir = Config.get("offline_data_dir_path") + os.makedirs(base_dir, exist_ok=True) + + json_file_path = os.path.join(base_dir, f"{country}_status.json") + csv_file_path = os.path.join(base_dir, f"{country}_generation.csv") + + current_time = datetime.now() + # storing data from 5 hours from now. + end_time = _round_to_nearest_hour(current_time) - timedelta(hours=5) + + if not (os.path.exists(json_file_path) and os.path.exists(csv_file_path)): + print("Files do not exist. Gathering new data.") + try: + data = _gather_energy_data(country, start_time, end_time) + + data.to_csv(csv_file_path, index=False) + metadata = { + "country": country, + "first_start_time": int(data.iloc[0]["startTime"].timestamp()), + "last_start_time": int(data.iloc[-1]["startTime"].timestamp()), + "created_on": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "updated_on": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + } + with open(json_file_path, "w") as f: + json.dump(metadata, f, indent=4) + log_stuff("Successfully created new offline file for "+country) + return data + except Exception as e: + print(e) + else: + print("Files exist. Updating data.") + with open(json_file_path, "r") as f: + metadata = json.load(f) + + current_start_time = datetime.fromtimestamp(metadata["first_start_time"]) + current_end_time = datetime.fromtimestamp(metadata["last_start_time"]) + timedelta(hours=1) + start_diff = current_start_time - start_time + end_diff = end_time - current_end_time + df = pd.read_csv(csv_file_path) + + update_required = False + if start_diff.total_seconds() > 0: + print("Gathering missing data before current start time.") + new_data = _gather_energy_data(country, start_time, current_start_time ) + df = pd.concat([new_data, df], ignore_index=True) + update_required = True + if end_diff.total_seconds() > 0: + print("Gathering missing data after current end time.") + new_data = _gather_energy_data(country, current_end_time, end_time) + #print(new_data) + df = pd.concat([df, new_data], ignore_index=True) + update_required = True + if update_required: + df["startTime"] = pd.to_datetime(df["startTime"]) + df = df.sort_values(by="startTime") + df.to_csv(csv_file_path, index=False) + metadata["first_start_time"] = int(df.iloc[0]["startTime"].timestamp()) + metadata["last_start_time"] = int(df.iloc[-1]["startTime"].timestamp()) + metadata["updated_on"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + with open(json_file_path, "w") as f: + json.dump(metadata, f, indent=4) + log_stuff("Successfully synced offline file for "+country) + else: + print("No update required") + #last_72_hours = end_time - timedelta(hours=72) + #recent_data = df[pd.to_datetime(df["timestamp"]) >= last_72_hours] + + +def _sync_offline_cache(country): + # print("syncs offline cache for the given country") + if not Config.get("enable_energy_caching"): + raise Exception("This method cannot be used to get data since enable_energy_caching option is not enabled") + + c_key = _get_country_key(country) + hour_count = int(Config.get("generation_cache_hour")) + quarter_time = hour_count/4 + data = _get_key_from_redis(Config.get("energy_redis_path"),c_key) + update_required = False + s,e = _get_time_range(hour_count) + if data is not None: + metadata = json.loads(data) + dataframe = pd.DataFrame.from_dict(metadata["dataframe"]) + dataframe["startTime"] = pd.to_datetime(dataframe["startTime"]) + last_start_time = pd.to_datetime(dataframe.iloc[-1]["startTime"]) + # Calculate the difference in hours + time_difference = abs((e - last_start_time).total_seconds()) / 3600 + if quarter_time <= time_difference : + update_required = True + else: + update_required = True + + if update_required : + # todo : see if offline data have the required data + dataframe = _gather_energy_data(country,s,e) + dataframe["startTime"] = pd.to_datetime(dataframe["startTime"]) + dataframe["startTime"] = dataframe["startTime"].dt.tz_localize(None) + metadata = { + "country": country, + "first_start_time": int(dataframe.iloc[0]["startTime"].timestamp()), + "last_start_time": int(dataframe.iloc[-1]["startTime"].timestamp()), + "created_on": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "updated_on": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "dataframe":dataframe.to_dict() + } + _set_key_in_redis(Config.get("energy_redis_path"),c_key,json.dumps(metadata, default=str)) + + +def _get_offline_file_data(country,start_time, end_time): + """ + Returns energy generation data stored in offline file for the given country for the give time range + This assumes data files already exists and synced with latest data + Returns a tuple (partial: True/False, data: DataFrame/None) indicating if the data is partially available and the corresponding data. + """ + if not Config.get("enable_offline_energy_generation"): + raise Exception("This method cannot be used to get data since enable_offline_energy_generation option is not enabled") + base_dir = Config.get("offline_data_dir_path") + os.makedirs(base_dir, exist_ok=True) + + if not Config.get("enable_offline_energy_generation"): + raise Exception("This method cannot be used to get data since enable_offline_energy_generation option is not enabled") + + json_file_path = os.path.join(base_dir, f"{country}_status.json") + csv_file_path = os.path.join(base_dir, f"{country}_generation.csv") + + if not (os.path.exists(json_file_path) and os.path.exists(csv_file_path)): + return (False, None) + + local_data = pd.read_csv(csv_file_path) + return _get_filtered_data(local_data, start_time, end_time) + + +def _get_offline_cache_data(country,start,end): + print("offline cache data") + if not Config.get("enable_energy_caching"): + raise Exception("This method cannot be used to get data since enable_energy_caching option is not enabled") + data = _get_key_from_redis(Config.get("energy_redis_path"),_get_country_key(country)) + # print(data) + if data is not None: + metadata = json.loads(data) + # print(metadata) + dataframe = pd.DataFrame.from_dict(metadata["dataframe"]) + dataframe["startTime"] = pd.to_datetime(dataframe["startTime"]) # Converts to pandas.Timestamp + return _get_filtered_data(dataframe, start, end) + else: + return False,None + + +def get_offline_data(country,start,end,sync_first=False): + """ + This method returns locally stored energy data. + Data is stored in 2 sources : one. Redis cache and second : csv files. + Redis cache contains data only for the last 72 hours from when it was last synced + Offline data files can contain data for longer durations. + Both these options can be configured in the config file + returns {available:True/False, data:dataframe} + Note that this method assumes that syncing of the sources is being handled separately + """ + output = {"available":False,"data":None, "partial":False,"source":""} + offline = Config.get("enable_offline_energy_generation") + cache = Config.get("enable_energy_caching") + + if offline == False and cache == False : + # no offline data configured + return output + + if cache : + # first look in the cache + if(sync_first): + print("will first sync the cache to get the latest data") + _sync_offline_cache(country) + partial,data = _get_offline_cache_data(country,start,end) + if data is not None and partial is False: + output["partial"] = partial + output["data"] = data + output["available"] = True + output["source"] = "cache" + print("data from cache") + return output + + if offline: + # first look if data files are available, if yes, return data + if(sync_first): + print("will first sync the offline files to get the latest data") + _sync_offline_file(country) + partial,data = _get_offline_file_data(country,start,end) + output["partial"] = partial + output["data"] = data + output["available"] = True + output["source"] = "offline_file" + print("just got the data from offline file") + + return output + + +def sync_offline_data(file=False,cache=False): + """ + This method syncs offline data for offline sources enabled in the cache. + Data is synced for all available countries + You need to run this before getting offline data. you can even setup a CRON job to call this method on regular intervals + """ + c_keys = meta.get_country_metadata() + if Config.get("enable_offline_energy_generation") == True and file == True: + for key in c_keys: + try: + _sync_offline_file(key) + except Exception as e: + # print(e) + log_stuff("Error in syncing offline file for "+key+". Message"+ str(e)) + if Config.get("enable_energy_caching") == True and cache == True : + for key in c_keys: + try: + _sync_offline_cache(key) + except Exception as e: + # print(e) + log_stuff("Error in syncing offline file for "+key+". Message: "+ str(e)) + diff --git a/codegreen_core/utilities/__init__.py b/codegreen_core/utilities/__init__.py index 30dfd8c..6917ae6 100644 --- a/codegreen_core/utilities/__init__.py +++ b/codegreen_core/utilities/__init__.py @@ -1 +1,2 @@ from . import metadata +from . import config \ No newline at end of file diff --git a/codegreen_core/utilities/caching.py b/codegreen_core/utilities/caching.py index d89f202..431c6e6 100644 --- a/codegreen_core/utilities/caching.py +++ b/codegreen_core/utilities/caching.py @@ -51,6 +51,7 @@ def get_cache_or_update(country, start, deadline, energy_mode="public_data"): return data_object else: print("caches has no country, calling _pull_data(country, start, deadline)") + # print(energy_mode) return _pull_data(country, start, deadline, energy_mode) @@ -72,14 +73,17 @@ def _pull_data(country, start, end, energy_mode="public_data"): else: return None last_update = datetime.now().timestamp() - if forecast_data["data_available"]: - last_prediction = forecast_data["data"].iloc[-1]["posix_timestamp"] - else: - last_prediction = pd.Timestamp(datetime.now(), tz="UTC") + #if forecast_data["data_available"]: + # last_prediction = forecast_data["data"].iloc[-1]["startTimeUTC"] + #else: + # last_prediction = pd.Timestamp(datetime.now(), tz="UTC") df = forecast_data["data"] + del df["startTime"] df["startTimeUTC"] = pd.to_datetime(df["startTimeUTC"]) df["startTimeUTC"] = df["startTimeUTC"].dt.strftime("%Y%m%d%H%M").astype("str") + last_col = forecast_data["data"].iloc[-1]["startTimeUTC"] + last_prediction = int(datetime.strptime(last_col, "%Y%m%d%H%M").timestamp()) cached_object = { "data": df.to_dict(), "time_interval": forecast_data["time_interval"], @@ -87,6 +91,7 @@ def _pull_data(country, start, end, energy_mode="public_data"): "last_updated": int(last_update), "last_prediction": int(last_prediction), } + #print(cached_object) cache.set(_get_country_key(country, energy_mode), json.dumps(cached_object)) return cached_object diff --git a/codegreen_core/utilities/config.py b/codegreen_core/utilities/config.py index 90fc9e6..9764c1c 100644 --- a/codegreen_core/utilities/config.py +++ b/codegreen_core/utilities/config.py @@ -2,23 +2,101 @@ import configparser import redis - class ConfigError(Exception): """Custom exception for configuration errors.""" - pass - class Config: config_data = None section_name = "codegreen" - boolean_keys = {"enable_energy_caching", "enable_time_prediction_logging"} - defaults = { - "default_energy_mode": "public_data", - "enable_energy_caching": False, - "enable_time_prediction_logging": False, - "energy_redis_path": None, - } + all_keys = [ + { + "name":"ENTSOE_token", + "default": "None", + "use":"To fetch data from ENTSOE portal", + "boolean":False, + }, + { + "name":"default_energy_mode", + "default":"public_data", + "use":"Determines which type of data to use.", + "boolean":False, + }, + { + "name":"enable_energy_caching", + "default":"False", + "use":"To indicate if data used by tools must be cached", + "boolean":True, + }, + { + "name":"energy_redis_path", + "default":"None", + "boolean":False, + "use":"Path to redis server to cache data.required if enable_energy_caching is enabled " + }, + { + "name":"enable_time_prediction_logging", + "default":"False", + "boolean":True, + "use":"To indicate if logs must me saved in a log file " + }, + { + "name":"log_folder_path", + "default":" ", + "boolean":False, + "use":"Path of the folder where logs will be stored" + }, + { + "name":"offline_data_dir_path", + "default":"", + "boolean":False, + "use":"Path of the folder where bulk energy data will be stored" + }, + { + "name":"enable_offline_energy_generation", + "default":"False", + "boolean":True, + "use":"To enable storing energy production data for available countries locally and in cache for quick access" + }, + { + "name":"offline_data_start_date", + "default":"", + "boolean":False, + "use":"The start date for offline energy generation download,YYYY-mm-dd format" + }, + { + "name":"generation_cache_hour", + "default":"72", + "boolean":False, + "use":"Indicate the number of hours in the past the data will be stored in the cache " + }, + + { + "name":"cron_refresh_offline_files_hour", + "default":"6", + "boolean":False, + "use":"time to setup cron for updating offline energy files" + }, + { + "name":"cron_refresh_cache_hour", + "default":"6", + "boolean":False, + "use":"time to setup CRON job to update the energy generation cache" + }, + + { + "name":"enable_logging", + "default":"False", + "boolean":True, + "use":"Indicates if logging is enabled for the whole package" + }, + { + "name":"log_folder_path", + "default":"", + "boolean":False, + "use":"The folder where log files will be stored. Log files name are of the format: 'year-month' " + } + ] @classmethod def load_config(self, file_path=None): @@ -34,17 +112,23 @@ def load_config(self, file_path=None): break if file_path is None: - raise ConfigError("404 config") + raise ConfigError("Could not find the '.codegreencore.config' file. Please ensure that this file is created in the root folder of your project.") self.config_data = configparser.ConfigParser() self.config_data.read(file_path) if self.section_name not in self.config_data: self.config_data[self.section_name] = {} - for key, default_value in self.defaults.items(): - if not self.config_data.has_option(self.section_name, key): - self.config_data.set(self.section_name, key, str(default_value)) - + raise ConfigError("Invalid config file. The config file must have a section called codegreen") + + for ky in self.all_keys: + try : + value = self.config_data.get(self.section_name, ky["name"]) + # print(value) + except configparser.NoOptionError: + # print(ky) + self.config_data.set(self.section_name, ky["name"],ky["default"]) + if self.get("enable_energy_caching") == True: if self.get("energy_redis_path") is None: raise ConfigError( @@ -53,7 +137,16 @@ def load_config(self, file_path=None): else: r = redis.from_url(self.get("energy_redis_path")) r.ping() - # print(self.config_data["default_energy_mode"]) + # print("Connection to redis works") + + if self.get("enable_logging") == True: + if self.get("log_folder_path") is None: + raise ConfigError( + "Invalid configuration. If 'enable_logging' is set, 'log_folder_path' is also required " + ) + else: + base_dir = self.get("log_folder_path") + os.makedirs(base_dir, exist_ok=True) @classmethod def get(self, key): @@ -63,13 +156,11 @@ def get(self, key): ) try: value = self.config_data.get(self.section_name, key) - if value is None: - # if key not in self.defaults: - # raise KeyError(f"No default value provided for key: {key}") - value = self.defaults.get(key, None) - else: - if key in self.boolean_keys: - value = value.lower() == "true" + config = next((d for d in self.all_keys if d.get("name") == key), None) + if config["boolean"]: + return value.lower() == "true" return value - except (configparser.NoSectionError, configparser.NoOptionError): - return self.defaults.get(key) # Return default if key is missing + except (configparser.NoSectionError, configparser.NoOptionError) as e: + print("Config not found") + print(key) + raise e diff --git a/codegreen_core/utilities/country_list.json b/codegreen_core/utilities/country_list.json index 903893c..c8fb1f7 100644 --- a/codegreen_core/utilities/country_list.json +++ b/codegreen_core/utilities/country_list.json @@ -44,7 +44,7 @@ "carbon_intensity_method": "ipcc" }, "DK": { - "country": "Germany", + "country": "Denmark", "energy_source": "ENTSOE", "carbon_intensity_method": "ipcc" }, diff --git a/codegreen_core/utilities/cron_jobs.py b/codegreen_core/utilities/cron_jobs.py new file mode 100644 index 0000000..e486732 --- /dev/null +++ b/codegreen_core/utilities/cron_jobs.py @@ -0,0 +1,114 @@ +""" +to set up cron jobs +1. to update cache of energy generation data +2. to update the offline files +3. to run re-training of energy models + +1,2 can be done together and must be done more frequently +3 has to be done once every 3 month or so. +""" + +import sys +import getpass +import importlib.util +import os +from crontab import CronTab +from .config import Config +from .log import log_stuff + +PACKAGE_NAME = "codegreen_core" # Replace with your package's name +USER = getpass.getuser() # Get current user + +def get_package_path(): + """Returns the installed path of the package.""" + spec = importlib.util.find_spec(PACKAGE_NAME) + if not spec or not spec.origin: + raise RuntimeError(f"Package '{PACKAGE_NAME}' not found.") + return os.path.dirname(spec.origin) + +def get_script_path(script_name): + """Returns the full path of the script inside the package.""" + return os.path.join(get_package_path(), script_name) + +def get_cron(): + """Returns a cron object for the current user.""" + return CronTab(user=USER) + +def job_exists(command): + """Check if a cron job with the given command already exists.""" + cron = get_cron() + return any(job.command == command for job in cron) + +def add_cron_jobs(): + """Adds cron jobs for s1.py and s2.py if they don't already exist.""" + cron = get_cron() + + # Get full script paths + command_s1 = f"python3 {get_script_path('utilities/update_offline_files.py')}" + command_s2 = f"python3 {get_script_path('utilities/update_cache.py')}" + + # Check if jobs already exist + if job_exists(command_s1) or job_exists(command_s2): + print("Cron jobs already exist.") + return + + # Add new cron jobs + job1 = cron.new(command=command_s1, comment="codegreen_core_job1") + job2 = cron.new(command=command_s2, comment="codegreen_core_job2") + + # Set schedule (example: every minute) + job1_hour = int(Config.get("cron_refresh_offline_files_hour")) # cache + job2_hour = int(Config.get("cron_refresh_cache_hour")) # offline file + + if job1_hour < 0 or job1_hour > 24: + raise RuntimeError("Invalid cron_refresh_offline_files_hour must be between 1 and 24") + + if job2_hour < 0 or job2_hour > 24: + raise RuntimeError("Invalid cron_refresh_cache_hour must be between 1 and 24") + + job1.hour.every(job1_hour) + job2.hour.every(job2_hour) + + # Write the jobs to cron + cron.write() + log_stuff("Cron jobs set ") + print("Cron jobs added successfully.") + +def remove_cron_jobs(): + """Removes the cron jobs for s1.py and s2.py.""" + cron = get_cron() + cron.remove_all(comment="codegreen_core_job1") + cron.remove_all(comment="codegreen_core_job2") + cron.write() + print("Cron jobs removed successfully.") + log_stuff("Cron jobs removed ") + +def list_cron_jobs(): + """Lists all cron jobs related to the package.""" + cron = get_cron() + found = False + for job in cron: + if "codegreen_core_job1" in job.comment or "codegreen_core_job2" in job.comment: + print(f"{job}") + found = True + if not found: + print("No cron jobs found for this package.") + +# def main(action): +# """Main function to start, stop, or list cron jobs.""" +# if action == "start": +# add_cron_jobs() +# elif action == "stop": +# remove_cron_jobs() +# elif action == "list": +# list_cron_jobs() +# else: +# print("Invalid command. Use 'start', 'stop', or 'list'.") + + +# if __name__ == "__main__": +# if len(sys.argv) != 2: +# print("Usage: python manage_cron.py ") +# sys.exit(1) + +# main(sys.argv[1]) \ No newline at end of file diff --git a/codegreen_core/utilities/log.py b/codegreen_core/utilities/log.py index d545531..e71fc62 100644 --- a/codegreen_core/utilities/log.py +++ b/codegreen_core/utilities/log.py @@ -4,17 +4,41 @@ from datetime import datetime import os import csv +from datetime import datetime, timezone -def time_prediction(data): - if Config.get("enable_time_prediction_logging") == True: +# def time_prediction(data): +# if Config.get("enable_time_prediction_logging") == True: +# current_date = datetime.now() +# file_name = f"{current_date.strftime('%B')}_{current_date.year}.csv" +# file_location = os.path.join( +# Config.get("time_prediction_log_folder_path"), file_name +# ) +# file_exists = os.path.exists(file_location) +# # Open the file in append mode +# with open(file_location, mode="a", newline="") as file: +# writer = csv.DictWriter(file, fieldnames=data.keys()) +# # If the file doesn't exist, write the header +# if not file_exists: +# writer.writeheader() +# # Append the data to the file +# writer.writerow(data) +# else: +# print("Logging not enabled") + + +def log_stuff(text): + """To log text data into the log file if it is set up in the config file""" + if(Config.get("enable_logging")): + #print("logging is enabled") current_date = datetime.now() file_name = f"{current_date.strftime('%B')}_{current_date.year}.csv" file_location = os.path.join( - Config.get("time_prediction_log_folder_path"), file_name + Config.get("log_folder_path"), file_name ) file_exists = os.path.exists(file_location) - # Open the file in append mode + iso_timestamp = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + data = {"timestamp":iso_timestamp, "text": text } with open(file_location, mode="a", newline="") as file: writer = csv.DictWriter(file, fieldnames=data.keys()) # If the file doesn't exist, write the header @@ -22,5 +46,4 @@ def time_prediction(data): writer.writeheader() # Append the data to the file writer.writerow(data) - else: - print("Logging not enabled") + #print("logging done") \ No newline at end of file diff --git a/codegreen_core/utilities/update_cache.py b/codegreen_core/utilities/update_cache.py new file mode 100644 index 0000000..9fa91aa --- /dev/null +++ b/codegreen_core/utilities/update_cache.py @@ -0,0 +1,2 @@ +from codegreen_core.data.offline import sync_offline_data +sync_offline_data(file=False,cache=True) \ No newline at end of file diff --git a/codegreen_core/utilities/update_offline_files.py b/codegreen_core/utilities/update_offline_files.py new file mode 100644 index 0000000..d749933 --- /dev/null +++ b/codegreen_core/utilities/update_offline_files.py @@ -0,0 +1,2 @@ +from codegreen_core.data.offline import sync_offline_data +sync_offline_data(file=True,cache=False) \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..7b672f0 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,85 @@ +codegreen_core API +=================== + + +Package Organization +--------------------- + +.. image:: _static/modules.png + :alt: modules + :width: 400px + :align: center + + +The package is divided into two main sub packages: `data`` and `tools`. (There is also an additional module, `utilities`, which provides helper methods that support other modules.) + +The `data` sub package contains methods for fetching energy production data. This package relies on external data sources to retrieve this information, which is then processed to make it usable by other components of the package. For more details and a complete API , see the data module documentation. + +The `tools` sub package provides a variety of tools, including: + +- Carbon intensity calculator +- Carbon emission calculator +- Optimal time-shifting predictor +- Optimal location-shifting predictor + + +Example : Calculating optimal time for a computational task +------------------------------------------------------------- +Assuming all the above steps are done, you can now calculate the optimal starting time for a computations. + +.. code-block:: python + + from datetime import datetime,timedelta + from codegreen_core.tools.loadshift_time import predict_now + + country_code = "DK" + est_runtime_hour = 10 + est_runtime_min = 0 + now = datetime.now() + hard_finish_date = now + timedelta(days=1) + criteria = "percent_renewable" + per_renewable = 50 + + time = predict_now(country_code, + est_runtime_hour, + est_runtime_min, + hard_finish_date, + criteria, + per_renewable) + # (1728640800.0, , 76.9090909090909) + + + +The core package contains 2 main module : +- `data` : To fetch energy data for a country +- `tools` : To calculate various quantities like Optimal computation time, carbon intensity etc. + + +`data` module +-------------- + +.. automodule:: codegreen_core.data + :members: + + +`tools` module +--------------- + +.. automodule:: codegreen_core.tools + :members: + + +.. automodule:: codegreen_core.tools.carbon_intensity + :members: + + +.. automodule:: codegreen_core.tools.carbon_emission + :members: + + +.. automodule:: codegreen_core.tools.loadshift_time + :members: + + +.. automodule:: codegreen_core.tools.loadshift_location + :members: \ No newline at end of file diff --git a/docs/data.rst b/docs/data.rst deleted file mode 100644 index 1c83530..0000000 --- a/docs/data.rst +++ /dev/null @@ -1,27 +0,0 @@ -``data`` Module -=============== - -This module provides methods to fetch energy production data for a specific country over a defined time period. -One of the main challenges is the variability in the availability of data for different countries, which can impact the granularity and completeness of the data. - -.. automodule:: codegreen_core.data - :members: - - -List of countries ------------------ - -The list of countries for which data is available : - -.. country_table:: - -Fetching data from ENTSOE -------------------------- - -ENTSO-E (https://www.entsoe.eu), the European Network of Transmission System Operators for Electricity, is an association that facilitates the cooperation of European transmission system operators (TSOs). Through its Transparency Portal, ENTSO-E provides real-time energy data for various countries across Europe, ensuring open access to this information. We utilize this data for countries within the European Union (EU). - - -.. automodule:: codegreen_core.data.entsoe - :members: - :show-inheritance: - diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 57ce65d..792bc9a 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -1,97 +1,159 @@ .. getting_started: -Getting Started -=============== +How to use codegreen +===================== -Welcome to the guide for getting started with `codegreen_core`. This document provides installation instructions and the initial steps required to get the package up and running. It also outlines the package structure, which will help you choose the appropriate tools based on your needs. +Welcome to the guide for getting started with the `codegreen` framework. +There are four main ways to setup and use Codegreen based on your requirements. +This document describes in detail the steps involved in each method. -Installation -------------- +The four ways to use codegreen: -Using pip : +1. Using `codegreen.world` +2. Setting up and using the `codegreen_core` package +3. Setting up your own web server +4. Deploying the web service using the docker container + + +Using codegreen.world +---------------------- + +The simplest and fastest way to use Codegreen is through our web service. +This is ideal for beginners and new users looking for an easy way to reduce the carbon emissions of their computations. + +1. Visit `www.codegreen.world `_ +2. Create and account and log in +3. Generate an API token for your location and server details +4. Use the "Predict Optimal Time" form to get a time prediction for starting a computation in your selected location and server. + + +Additionally, we provide `codegreen_client`, a Python package that can automatically start your Python scripts at the optimal time. Please refer to for installation and setup guide for more details. + + +The web service also includes a dashboard where you can track how much carbon emission you have saved in each registered location. + + +Installing the `codegreen_core` package +----------------------------------------- + +The `codegreen_core` Python package contains all the core functionalities of the Codegreen framework and can be used as a standalone tool. +This is ideal for researchers and developers who need to gather energy data for a country and perform calculations such as carbon intensity analysis. + +**Step 1: Installation** + +You can install the package using pip : .. code-block:: python pip install codegreen_core -You can also use clone the git repository and install the package : +Alternatively, you can clone the Git repository and install the package manually: .. code-block:: bash git clone https://github.com/bionetslab/codegreen-core.git pip install -e . +**Step 2 : Setting up the configuration file** -Setup -------- +The package requires a configuration file where all settings are defined. Create a new file named `.codegreencore.config`` in your root directory. This file will contain all the configurations required to run the package successfully. -After successfully installing the package, the next step is to create a configuration file: +The next section describes how to set up the package based on your requirements. -- Create a new file named `.codegreencore.config`` in your root directory. -- This file will contain all the configurations required to run the package successfully. -- Below is a template for the configuration file:" + +Configuring the `codegreen_core` package +----------------------------------------- + +The codegreen_core package offers a wide range of functionalities and can be used in many applications. + +Below is the template for the basic configuration .. code-block:: bash [codegreen] ENTSOE_token = - enable_energy_caching = false - energy_redis_path = +This configuration allows you to fetch data online and use it. +It is recommended to start with the basic setup and explore the available APIs before making advanced customizations. -Description of the fields in configuration file: +The API of the package is available :doc:`here ` -- `ENTSOE_token``: The token required to fetch data from the ENTSO-E portal. Please follow the steps at https://transparency.entsoe.eu to create a free account and obtain an API token. -- `enable_energy_caching``: (boolean) Indicates whether energy data used for optimal time predictions should be cached. -- `energy_redis_path``: The path to the Redis server where energy data will be stored. This field is required if caching is enabled using the above option. +The table below summarizes all available configs : +.. list-table:: Available Configuration Options + :header-rows: 1 + :widths: 20 50 10 20 -Package Organization ---------------------- + * - Name + - Description + - Default + - Possible Values + * - `ENTSOE_token` + - The token required to fetch data from the ENTSO-E portal. Please follow the steps at https://transparency.entsoe.eu to create a free account and obtain an API token. + - None + - String + * - `default_energy_mode` + - To decide the source of energy forecasts to be used for making optimal time predictions + - public_data + - public_data / local_prediction + * - `enable_energy_caching` + - Enables or disables local caching of energy data + - false + - true/false + * - `energy_redis_path` + - Path to Redis instance for caching + - None + - String (Redis URL, redis://localhost:6379 ) + * - `enable_offline_energy_generation` + - To enable storing and periodic update of historical energy in csv files + - false + - true/false + * - `offline_data_dir_path` + - Path to the folder where historical energy data will be stored + - None + - String + * - `offline_data_start_date` + - The start date from which historical energy data must be downloaded and stored + - None + - String (`YYYY-mm-dd` format) -.. image:: _static/modules.png - :alt: modules - :width: 400px - :align: center +**Which data is used to predict optimal computation start time ?** +One of the main features of the `codegreen_core` package is the ability to calculate the optimal time for running a computation. +This calculation depends on forecasts of hourly energy generation data from renewable and non-renewable sources or time series forecasts of the carbon intensity of future energy production. -The package is divided into two main sub packages: `data`` and `tools`. (There is also an additional module, `utilities`, which provides helper methods that support other modules.) +While this data is available for some countries, it is typically only provided for short durations (usually 24 hours or less), which limits the accuracy of optimal time predictions. +To address this limitation, we have trained prediction models that generate time series forecasts for longer periods, allowing for more effective optimization. -The `data` sub package contains methods for fetching energy production data. This package relies on external data sources to retrieve this information, which is then processed to make it usable by other components of the package. For more details and a complete API , see the data module documentation. +This setting is controlled by the `default_energy_mode` option. **By default**, the package uses publicly available energy data. To use the trained prediction models (if available for a specific country), set `default_energy_mode` to `local_prediction`. -The `tools` sub package provides a variety of tools, including: +**How to enable caching of recent energy data?** -- Carbon intensity calculator -- Carbon emission calculator -- Optimal time-shifting predictor -- Optimal location-shifting predictor +Certain tools, such as `predict_optimal_time`, rely on recent energy forecasts / predictions. Fetching the same data multiple times can be avoided by intelligently caching it and updating it at regular intervals. +Energy data caching can be enabled by setting `enable_energy_caching` to `true`. -For more information, refer to the `tools` module documentation. +Additionally, this requires a connection to Redis, which is specified using the `energy_redis_path` setting. +When caching is enabled, the package first attempts to connect to Redis before storing or retrieving data. +Once enabled, two types of data values are stored in the cache for each available country: -Example : Calculating optimal time for a computational task -------------------------------------------------------------- -Assuming all the above steps are done, you can now calculate the optimal starting time for a computations. +1. **Hourly time series forecasts** for the upcoming hours. +2. **Actual energy generation data** for the past 72 hours. -.. code-block:: python - - from datetime import datetime,timedelta - from codegreen_core.tools.loadshift_time import predict_now - - country_code = "DK" - est_runtime_hour = 10 - est_runtime_min = 0 - now = datetime.now() - hard_finish_date = now + timedelta(days=1) - criteria = "percent_renewable" - per_renewable = 50 - - time = predict_now(country_code, - est_runtime_hour, - est_runtime_min, - hard_finish_date, - criteria, - per_renewable) - # (1728640800.0, , 76.9090909090909) - + +**How to download and use historical energy generation data offline?** + + + +**How to re-train prediction models ?** +TODO + + +Setting up your own web server +-------------------------------- + + + +Deploying the web server using the docker image +----------------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index b6f726c..c760d53 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,9 +11,10 @@ Welcome to codegreen_core's documentation! :caption: Contents: introduction + methodology getting_started - data - tools + api + status references version_history diff --git a/docs/tools.rst b/docs/methodology.rst similarity index 91% rename from docs/tools.rst rename to docs/methodology.rst index 16116c1..fc06d5a 100644 --- a/docs/tools.rst +++ b/docs/methodology.rst @@ -1,3 +1,10 @@ +Methodology +============ + +Here we describe how we calcualte stuff + + + ``tools`` Module ================= @@ -10,8 +17,7 @@ Each tool is implemented in a separate module and must be imported individually As a convention, methods that primarily accept DataFrame as an input (along with other parameters) and return a DataFrame are prefixed with `_df`. -.. automodule:: codegreen_core.tools - :members: + Carbon Intensity of Energy --------------------------- @@ -45,8 +51,7 @@ One challenge with the carbon intensity calculation is that the values can vary When energy generation data is not available for a country, the average values of Carbon Intensity is used. The source of this data is Carbon Footprint Ltd [8] -.. automodule:: codegreen_core.tools.carbon_intensity - :members: + Carbon emission of a job ------------------------- @@ -69,19 +74,3 @@ Carbon emission of a job depends on 2 factors : Energy consumed by the hardware - Emissions related to the production of the energy : represented by the Carbon Intensity of the energy mix during that period. Already implemented above - The result is Carbon emission in CO2e -.. automodule:: codegreen_core.tools.carbon_emission - :members: - - -Optimal time shifting ---------------------- - -.. automodule:: codegreen_core.tools.loadshift_time - :members: - - -Optimal Location shifting -------------------------- - -.. automodule:: codegreen_core.tools.loadshift_location - :members: diff --git a/docs/status.rst b/docs/status.rst new file mode 100644 index 0000000..0bbf56d --- /dev/null +++ b/docs/status.rst @@ -0,0 +1,6 @@ +List of countries +=================== + +This page displays the list of all countries for which data can be fetched using the `codegreen_core` package. + +.. country_table:: diff --git a/pyproject.toml b/pyproject.toml index be6f930..5a5cd5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "codegreen_core" -version = "0.0.2" +version = "0.0.3" description = "This package helps you become aware of the carbon footprint of your computation" authors = ["Anne Hartebrodt ","Shubh Vardhan Jain "] readme = "README.md" @@ -15,12 +15,14 @@ numpy = "<2.0.0" tensorflow = "^2.18.0" matplotlib = "^3.9.2" scikit-learn = "^1.5.2" +python-crontab = "^3.2.0" [tool.poetry.group.dev.dependencies] pytest = "^8.3.3" Sphinx = "^8.1.3" black = "^24.10.0" +jupyter = "^1.1.1" [build-system] requires = ["poetry-core"] diff --git a/tests/dev.py b/tests/dev.py new file mode 100644 index 0000000..465b222 --- /dev/null +++ b/tests/dev.py @@ -0,0 +1,55 @@ +# run this as : poetry run python -m codegreen_core.tools.test + +# import matplotlib +# # matplotlib.use('TkAgg') # Or 'Qt5Agg' + +# from .carbon_emission import plot_ce_jobs +# from datetime import datetime +# server1 = { +# "country":"DE", +# "number_core":16, +# "memory_gb": 254, +# } +# jobs = [ +# { +# "start_time":datetime(2024,11,10), +# "runtime_minutes" : 120 +# } +# ] + + +# plot_ce_jobs(server1,jobs) + +from codegreen_core.tools.loadshift_time import predict_now +from datetime import datetime, timedelta + +cases = [ + { + "country":"DE", + "h": 5, + "hd": datetime.now()+ timedelta(hours=20) + }, + { + "country":"DE", + "h": 4, + "hd": datetime.now()+ timedelta(hours=15) + }, + { + "country":"DK", + "h": 4, + "hd": datetime.now()+ timedelta(hours=15) + }, + { + "country":"FR", + "h": 4, + "hd": datetime.now()+ timedelta(hours=15) + }, + { + "country":"SE", + "h": 4, + "hd": datetime.now()+ timedelta(hours=15) + } +] + +for c in cases: + print(predict_now(c["country"],c["h"],0,c["hd"])) diff --git a/tests/test_data.py b/tests/test_data.py index 9256888..6f9d03d 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -59,7 +59,7 @@ def test_entsoe_generation_data(self): # "note":"this has issues,Hydro Pumped Storage values do not match " # }, { - "country": "GR", + "country": "FI", "start": datetime(2024, 3, 20), "end": datetime(2024, 3, 24), "dtype": "generation", @@ -67,7 +67,7 @@ def test_entsoe_generation_data(self): "interval60": True, }, { - "country": "GR", + "country": "FR", "start": datetime(2024, 1, 25), "end": datetime(2024, 1, 28), "dtype": "generation", @@ -77,15 +77,15 @@ def test_entsoe_generation_data(self): ] for case in cases: # intervals = int((case["end"].replace(minute=0, second=0, microsecond=0) - case["start"].replace(minute=0, second=0, microsecond=0)).total_seconds() // 3600) - # print(intervals) + #print(case) if case["dtype"] == "generation": d = energy( case["country"], case["start"], case["end"], - case["dtype"], - case["interval60"], + case["dtype"] ) + #print(d) data = d["data"] data_verify = pd.read_csv(case["file"]) data_verify["start_date"] = data_verify["MTU"].str.split(" - ").str[0] diff --git a/tests/test_entsoe.py b/tests/test_entsoe.py new file mode 100644 index 0000000..19f2206 --- /dev/null +++ b/tests/test_entsoe.py @@ -0,0 +1,75 @@ +import pytest +from codegreen_core.data.entsoe import * +from codegreen_core.utilities.message import CodegreenDataError +from datetime import datetime +import pandas as pd + + +class TestEntsoeData: + def test_actual_time_interval_original(self): + data = get_actual_production_percentage("DE",datetime.now()-timedelta(hours=2),datetime.now()) + assert data["time_interval"] == 15 and data["data_available"] == True + def test_actual_time_interval_60min(self): + data = get_actual_production_percentage("DE",datetime.now()-timedelta(hours=2),datetime.now(),True) + assert data["time_interval"] == 60 and data["data_available"] == True + def test_actual_invalid_country1(self): + data = get_actual_production_percentage("DE1",datetime.now()-timedelta(hours=3),datetime.now(),True) + assert data["data_available"] == False and isinstance(data["error"],ValueError) + def test_actual_invalid_country2(self): + data = get_actual_production_percentage(1234,datetime.now()-timedelta(hours=3),datetime.now(),True) + assert data["data_available"] == False and isinstance(data["error"],ValueError) + def test_actual_invalid_start(self): + data = get_actual_production_percentage("DE","invalid",datetime.now(),True) + assert data["data_available"] == False and isinstance(data["error"],ValueError) + def test_actual_invalid_end(self): + data = get_actual_production_percentage("DE",datetime.now(),"invalid",True) + assert data["data_available"] == False and isinstance(data["error"],ValueError) + def test_actual_invalid_date_range(self): + # start > end + data = get_actual_production_percentage("DE",datetime.now(),datetime.now()-timedelta(hours=3),True) + assert data["data_available"] == False and isinstance(data["error"],ValueError) + def test_actual_invalid_date_range2(self): + # start > now + data = get_actual_production_percentage("DE",datetime.now()+timedelta(hours=3),datetime.now()+timedelta(hours=4),True) + assert data["data_available"] == False and isinstance(data["error"],ValueError) + def test_actual_invalid_date_range3(self): + # end > now + data = get_actual_production_percentage("DE",datetime.now()-timedelta(hours=3),datetime.now()+timedelta(hours=3),True) + assert data["data_available"] == False and isinstance(data["error"],ValueError) + + + def test_forecast_time_interval_60(self): + data = get_forecast_percent_renewable("DE",datetime.now()-timedelta(hours=2),datetime.now()+timedelta(hours=5)) + assert data["time_interval"] == 60 and data["data_available"] == True + + +class TestActualDataFrame: + @classmethod + def setup_class(cls): + """Fetch data once for all tests.""" + # Simulate fetching data from an API + cls.country="DE" + cls.start1 = datetime(2024,5,1) + cls.end1 = datetime (2024,5,1,10,0,0) + cls.row_count_check_60 = int(((cls.end1- cls.start1).total_seconds()/60)/60) + cls.row_count_check_15 = cls.row_count_check_60*4 + # de1 is 15 min interval + # de2 is 60 min interval + cls.de1 = get_actual_production_percentage(cls.country,cls.start1,cls.end1,False)["data"] + cls.de2 = get_actual_production_percentage(cls.country,cls.start1,cls.end1,True)["data"] + def test_dataframe_nonempty(self): + """Test that the DataFrame is not empty.""" + assert not self.de1.empty, "The DataFrame should not be empty." + def test_dataframe_nonempty1(self): + """Test that the DataFrame is not empty.""" + assert not self.de2.empty, "The DataFrame should not be empty." + def test_column_presence(self): + """Test that required columns are present in the DataFrame.""" + required_columns = ["startTimeUTC", "total", "percentRenewable"] + for col in required_columns: + assert col in self.de1.columns + def test_check_row_count_1(self): + assert len(self.de2) == self.row_count_check_60 + def test_check_row_count_2(self): + assert len(self.de1) == self.row_count_check_15 + \ No newline at end of file