From a047a8fe6a55711f86274387a9c5fa589b5aaada Mon Sep 17 00:00:00 2001 From: Rafael JPD Date: Sat, 24 May 2025 14:26:29 -0300 Subject: [PATCH 1/7] Corrige envar BASE_URL --- config/settings/production.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/settings/production.py b/config/settings/production.py index c23854c..0cc417c 100644 --- a/config/settings/production.py +++ b/config/settings/production.py @@ -107,7 +107,7 @@ # Admin URL regex. DJANGO_ADMIN_URL = env("DJANGO_ADMIN_URL") # https://docs.wagtail.org/en/stable/reference/settings.html#wagtailadmin-base-url -WAGTAILADMIN_BASE_URL = env("WAGTAIL_ADMIN_URL") +WAGTAILADMIN_BASE_URL = env("WAGTAILADMIN_BASE_URL") # Anymail # ------------------------------------------------------------------------------ From 338dcc0cc0cb463e74f93146cbd8e098077c613d Mon Sep 17 00:00:00 2001 From: Rafael JPD Date: Sat, 24 May 2025 14:26:52 -0300 Subject: [PATCH 2/7] =?UTF-8?q?Remove=20logging=20desnecess=C3=A1rio?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/utils/date_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/utils/date_utils.py b/core/utils/date_utils.py index 3302748..56bfa33 100644 --- a/core/utils/date_utils.py +++ b/core/utils/date_utils.py @@ -29,7 +29,7 @@ def get_date_obj(date_str: str, format: str = "%Y-%m-%d") -> datetime.date: try: return datetime.strptime(date_str, format).date() except (ValueError, TypeError): - logging.error("Invalid date format. Use YYYY-MM-DD.") + ... def get_date_range_str(from_date_str: str = None, until_date_str: str = None, days_to_go_back: int = None) -> tuple[str, str]: From a68706c4125faf7e74ccede08c46d48b28a72a9f Mon Sep 17 00:00:00 2001 From: Rafael JPD Date: Sat, 24 May 2025 14:27:12 -0300 Subject: [PATCH 3/7] Cria novo status ING para log_file --- log_manager/choices.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/log_manager/choices.py b/log_manager/choices.py index bf71e23..0c62e01 100644 --- a/log_manager/choices.py +++ b/log_manager/choices.py @@ -6,6 +6,7 @@ LOG_FILE_STATUS_PARSING = 'PAR' LOG_FILE_STATUS_PROCESSED = 'PRO' LOG_FILE_STATUS_INVALIDATED = 'INV' +LOG_FILE_STATUS_IGNORED = 'IGN' LOG_FILE_STATUS = [ (LOG_FILE_STATUS_CREATED, _("Created")), @@ -13,6 +14,7 @@ (LOG_FILE_STATUS_PARSING, _("Parsing")), (LOG_FILE_STATUS_PROCESSED, _("Processed")), (LOG_FILE_STATUS_INVALIDATED, _("Invalidated")), + (LOG_FILE_STATUS_IGNORED, _("Ignored")), ] From a4ae6268d46c3f7798b1cd672f31f7b7cd682268 Mon Sep 17 00:00:00 2001 From: Rafael JPD Date: Sat, 24 May 2025 14:27:40 -0300 Subject: [PATCH 4/7] Cria campo validation em log_file --- .../migrations/0004_logfile_validation.py | 19 +++++++++++++++++++ log_manager/models.py | 8 ++++++++ log_manager/tasks.py | 2 ++ log_manager/wagtail_hooks.py | 1 + 4 files changed, 30 insertions(+) create mode 100644 log_manager/migrations/0004_logfile_validation.py diff --git a/log_manager/migrations/0004_logfile_validation.py b/log_manager/migrations/0004_logfile_validation.py new file mode 100644 index 0000000..2cbab1a --- /dev/null +++ b/log_manager/migrations/0004_logfile_validation.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.7 on 2025-05-24 15:54 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("log_manager", "0003_collectionlogfiledatecount_is_usage_metric_computed"), + ] + + operations = [ + migrations.AddField( + model_name="logfile", + name="validation", + field=models.JSONField( + blank=True, default=dict, null=True, verbose_name="Validation" + ), + ), + ] diff --git a/log_manager/models.py b/log_manager/models.py index d9fe981..07346bc 100644 --- a/log_manager/models.py +++ b/log_manager/models.py @@ -204,6 +204,13 @@ class LogFile(CommonControlField): null=False, ) + validation = models.JSONField( + _("Validation"), + null=True, + blank=True, + default=dict, + ) + collection = models.ForeignKey( Collection, verbose_name=_("Collection"), @@ -217,6 +224,7 @@ class LogFile(CommonControlField): FieldPanel('path'), FieldPanel('stat_result'), FieldPanel('status'), + FieldPanel('validation'), AutocompletePanel('collection'), ] diff --git a/log_manager/tasks.py b/log_manager/tasks.py index 0a4e4e6..b713bd6 100644 --- a/log_manager/tasks.py +++ b/log_manager/tasks.py @@ -1,7 +1,9 @@ import logging +import json import os from django.conf import settings +from django.core.serializers.json import DjangoJSONEncoder from django.core.mail import send_mail from django.contrib.auth import get_user_model from django.utils.translation import gettext as _ diff --git a/log_manager/wagtail_hooks.py b/log_manager/wagtail_hooks.py index d0d8692..2f0466e 100644 --- a/log_manager/wagtail_hooks.py +++ b/log_manager/wagtail_hooks.py @@ -61,6 +61,7 @@ class LogFileSnippetViewSet(SnippetViewSet): "stat_result", "collection", "status", + "validation", "hash" ) list_filter = ("status", "collection") From 12311dc3d24771f08f738888db8b74865e72c3bf Mon Sep 17 00:00:00 2001 From: Rafael JPD Date: Sat, 24 May 2025 14:28:29 -0300 Subject: [PATCH 5/7] Melhora task para validar log --- log_manager/tasks.py | 63 +++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/log_manager/tasks.py b/log_manager/tasks.py index b713bd6..0a93616 100644 --- a/log_manager/tasks.py +++ b/log_manager/tasks.py @@ -22,6 +22,8 @@ ) +LOGFILE_STAT_RESULT_CTIME_INDEX = 9 + User = get_user_model() @@ -85,37 +87,60 @@ def _add_log_file(user, collection, root, name, visible_dates): @celery_app.task(bind=True, name=_('Validate log files'), timelimit=-1) -def task_validate_log_files(self, collections=[], user_id=None, username=None): +def task_validate_log_files(self, collections=[], from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None, ignore_date=False): """ Task to validate log files in the database. Parameters: collections (list, optional): List of collection acronyms. Defaults to []. + from_date (str, optional): The start date for log discovery in YYYY-MM-DD format. Defaults to None. + until_date (str, optional): The end date for log discovery in YYYY-MM-DD format. Defaults to None. + days_to_go_back (int, optional): The number of days to go back from today for log discovery. Defaults to None. user_id (int, optional): The ID of the user initiating the task. Defaults to None. username (str, optional): The username of the user initiating the task. Defaults to None. + ignore_date (bool, optional): If True, ignore the date of the log file. Defaults to False. """ user = _get_user(self.request, username=username, user_id=user_id) + logging.info(f'Validating log files for collections: {collections}.') + + visible_dates = _get_visible_dates(from_date, until_date, days_to_go_back) + + if not ignore_date: + logging.info(f'Interval: {visible_dates[0]} to {visible_dates[-1]}.') + for col in collections or Collection.acron3_list(): for log_file in models.LogFile.objects.filter(status=choices.LOG_FILE_STATUS_CREATED, collection__acron3=col): - logging.info(f'Validating log file {log_file.path} for collection {log_file.collection.acron3}.') - - buffer_size, sample_size = _fetch_validation_parameters(col) - - val_results = utils.validate_file(path=log_file.path, buffer_size=buffer_size, sample_size=sample_size) - if val_results.get('is_valid', {}).get('all', False): - models.LogFileDate.create_or_update( - user=user, - log_file=log_file, - date=val_results.get('probably_date', ''), - ) - log_file.status = choices.LOG_FILE_STATUS_QUEUED - - else: - log_file.status = choices.LOG_FILE_STATUS_INVALIDATED - - logging.info(f'Log file {log_file.path} ({log_file.collection.acron3}) has status {log_file.status}.') - log_file.save() + file_ctime = date_utils.get_date_obj_from_timestamp(log_file.stat_result[LOGFILE_STAT_RESULT_CTIME_INDEX]) + if file_ctime in visible_dates or ignore_date: + logging.info(f'Validating log file {log_file.path} for collection {log_file.collection.acron3}.') + + buffer_size, sample_size = _fetch_validation_parameters(col) + + val_result = utils.validate_file(path=log_file.path, buffer_size=buffer_size, sample_size=sample_size) + if 'datetimes' in val_result.get('content', {}).get('summary', {}): + del val_result['content']['summary']['datetimes'] + + try: + log_file.validation['result'] = json.dumps(val_result, cls=DjangoJSONEncoder) if val_result else {} + log_file.validation['parameters'] = {'buffer_size': buffer_size, 'sample_size': sample_size} + except json.JSONDecodeError as e: + logging.error(f'Error serializing validation result: {e}') + log_file.validation = {} + + if val_result.get('is_valid', {}).get('all', False): + models.LogFileDate.create_or_update( + user=user, + log_file=log_file, + date=val_result.get('probably_date', ''), + ) + log_file.status = choices.LOG_FILE_STATUS_QUEUED + + else: + log_file.status = choices.LOG_FILE_STATUS_INVALIDATED + + logging.info(f'Log file {log_file.path} ({log_file.collection.acron3}) has status {log_file.status}.') + log_file.save() def _fetch_validation_parameters(collection, default_buffer_size=0.1, default_sample_size=2048): From 7c76acbf63e2751a66bffdad0128982aa44471b1 Mon Sep 17 00:00:00 2001 From: Rafael JPD Date: Sat, 24 May 2025 14:29:39 -0300 Subject: [PATCH 6/7] Melhora task para reportar resultados --- log_manager/tasks.py | 65 +++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/log_manager/tasks.py b/log_manager/tasks.py index 0a93616..f7552eb 100644 --- a/log_manager/tasks.py +++ b/log_manager/tasks.py @@ -197,30 +197,51 @@ def _check_missing_logs_for_date(user, collection, date): @celery_app.task(bind=True, name=_('Generate log files count report')) -def task_log_files_count_status_report(self, collection, user_id=None, username=None): - col = models.Collection.objects.get(acron3=collection) - subject = _(f'Log Files Report for {col.main_name}') - - message = _(f'Dear collection {col.main_name},\n\nThis message is to inform you of the results of the Usage Log Validation service. Here are the results:\n\n') - - missing = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES) - extra = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES) - ok = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_OK) - - if missing.count() > 0: - message += _(f'There are {missing.count()} missing log files.\n') - if extra.count() > 0: - message += _(f'There are {extra.count()} extra log files.\n') - if ok.count() > 0: - message += _(f'There are {ok.count()} dates with correct log files.\n') - - if missing.count() > 0 or extra.count() > 0: - message += _(f'\nPlease check the script that shares the logs.\n') +def task_log_files_count_status_report(self, collections=[], from_date=None, until_date=None, user_id=None, username=None): + from_date, until_date = date_utils.get_date_range_str(from_date, until_date) + possible_dates_n = len(date_utils.get_date_objs_from_date_range(from_date, until_date)) + + from_date_obj = date_utils.get_date_obj(from_date) + until_date_obj = date_utils.get_date_obj(until_date) + + for collection in collections or Collection.acron3_list(): + col = models.Collection.objects.get(acron3=collection) + subject = _(f'Usage Log Validation Results ({from_date} to {until_date})') + message = _(f'This message provides the results of the Usage Log Validation for the period {from_date} to {until_date}:\n\n') - message += _(f'\nYou can view the complete report results at {settings.WAGTAILADMIN_BASE_URL}/admin/snippets/log_manager/collectionlogfiledatecount/?collection={col.pk}>.') + missing = models.CollectionLogFileDateCount.objects.filter( + collection__acron3=collection, + status=choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES, + date__gte=from_date_obj, + date__lte=until_date_obj, + ) + extra = models.CollectionLogFileDateCount.objects.filter( + collection__acron3=collection, + status=choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES, + date__gte=from_date_obj, + date__lte=until_date_obj, + ) + ok = models.CollectionLogFileDateCount.objects.filter( + collection__acron3=collection, + status=choices.COLLECTION_LOG_FILE_DATE_COUNT_OK, + date__gte=from_date_obj, + date__lte=until_date_obj, + ) + + if missing.count() > 0: + message += _(f'- There are {missing.count()} missing log files.\n') + if extra.count() > 0: + message += _(f'- There are {extra.count()} extra log files.\n') + if ok.count() > 0: + message += _(f'- There are {ok.count()} dates with correct log files.\n') + + if missing.count() > 0 or extra.count() > 0: + message += _(f'\nPlease review the script responsible for sharing the log files.\n') + + message += _(f'\nYou can view the full report at {settings.WAGTAILADMIN_BASE_URL}/admin/snippets/log_manager/collectionlogfiledatecount/?collection={col.pk}>.') - logging.info(f'Sending email to collection {col.main_name}. Subject: {subject}. Message: {message}') - _send_message(subject, message, collection) + logging.info(f'Sending email to collection {col.main_name}. Subject: {subject}. Message: {message}') + _send_message(subject, message, collection) def _send_message(subject, message, collection): From 7b35e4914ec7539536303d0a00ff6f98d78d634a Mon Sep 17 00:00:00 2001 From: Rafael JPD Date: Sat, 24 May 2025 14:29:55 -0300 Subject: [PATCH 7/7] =?UTF-8?q?Atualiza=20vers=C3=A3o=20para=201.8.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index d263485..afa2b35 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.6 \ No newline at end of file +1.8.0 \ No newline at end of file