Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.7.6
1.8.0
2 changes: 1 addition & 1 deletion config/settings/production.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
# Admin URL regex.
DJANGO_ADMIN_URL = env("DJANGO_ADMIN_URL")
# https://docs.wagtail.org/en/stable/reference/settings.html#wagtailadmin-base-url
WAGTAILADMIN_BASE_URL = env("WAGTAIL_ADMIN_URL")
WAGTAILADMIN_BASE_URL = env("WAGTAILADMIN_BASE_URL")

# Anymail
# ------------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion core/utils/date_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_date_obj(date_str: str, format: str = "%Y-%m-%d") -> datetime.date:
try:
return datetime.strptime(date_str, format).date()
except (ValueError, TypeError):
logging.error("Invalid date format. Use YYYY-MM-DD.")
...


def get_date_range_str(from_date_str: str = None, until_date_str: str = None, days_to_go_back: int = None) -> tuple[str, str]:
Expand Down
2 changes: 2 additions & 0 deletions log_manager/choices.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
LOG_FILE_STATUS_PARSING = 'PAR'
LOG_FILE_STATUS_PROCESSED = 'PRO'
LOG_FILE_STATUS_INVALIDATED = 'INV'
LOG_FILE_STATUS_IGNORED = 'IGN'

LOG_FILE_STATUS = [
(LOG_FILE_STATUS_CREATED, _("Created")),
(LOG_FILE_STATUS_QUEUED, _("Queued")),
(LOG_FILE_STATUS_PARSING, _("Parsing")),
(LOG_FILE_STATUS_PROCESSED, _("Processed")),
(LOG_FILE_STATUS_INVALIDATED, _("Invalidated")),
(LOG_FILE_STATUS_IGNORED, _("Ignored")),
]


Expand Down
19 changes: 19 additions & 0 deletions log_manager/migrations/0004_logfile_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 5.0.7 on 2025-05-24 15:54

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("log_manager", "0003_collectionlogfiledatecount_is_usage_metric_computed"),
]

operations = [
migrations.AddField(
model_name="logfile",
name="validation",
field=models.JSONField(
blank=True, default=dict, null=True, verbose_name="Validation"
),
),
]
8 changes: 8 additions & 0 deletions log_manager/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,13 @@ class LogFile(CommonControlField):
null=False,
)

validation = models.JSONField(
_("Validation"),
null=True,
blank=True,
default=dict,
)

collection = models.ForeignKey(
Collection,
verbose_name=_("Collection"),
Expand All @@ -217,6 +224,7 @@ class LogFile(CommonControlField):
FieldPanel('path'),
FieldPanel('stat_result'),
FieldPanel('status'),
FieldPanel('validation'),
AutocompletePanel('collection'),
]

Expand Down
128 changes: 88 additions & 40 deletions log_manager/tasks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import logging
import json
import os

from django.conf import settings
from django.core.serializers.json import DjangoJSONEncoder
from django.core.mail import send_mail
from django.contrib.auth import get_user_model
from django.utils.translation import gettext as _
Expand All @@ -20,6 +22,8 @@
)


LOGFILE_STAT_RESULT_CTIME_INDEX = 9

User = get_user_model()


Expand Down Expand Up @@ -83,37 +87,60 @@ def _add_log_file(user, collection, root, name, visible_dates):


@celery_app.task(bind=True, name=_('Validate log files'), timelimit=-1)
def task_validate_log_files(self, collections=[], user_id=None, username=None):
def task_validate_log_files(self, collections=[], from_date=None, until_date=None, days_to_go_back=None, user_id=None, username=None, ignore_date=False):
"""
Task to validate log files in the database.

Parameters:
collections (list, optional): List of collection acronyms. Defaults to [].
from_date (str, optional): The start date for log discovery in YYYY-MM-DD format. Defaults to None.
until_date (str, optional): The end date for log discovery in YYYY-MM-DD format. Defaults to None.
days_to_go_back (int, optional): The number of days to go back from today for log discovery. Defaults to None.
user_id (int, optional): The ID of the user initiating the task. Defaults to None.
username (str, optional): The username of the user initiating the task. Defaults to None.
ignore_date (bool, optional): If True, ignore the date of the log file. Defaults to False.
"""
user = _get_user(self.request, username=username, user_id=user_id)

for col in collections or Collection.acron3_list():
for log_file in models.LogFile.objects.filter(status=choices.LOG_FILE_STATUS_CREATED, collection__acron3=col):
logging.info(f'Validating log file {log_file.path} for collection {log_file.collection.acron3}.')
logging.info(f'Validating log files for collections: {collections}.')

buffer_size, sample_size = _fetch_validation_parameters(col)

val_results = utils.validate_file(path=log_file.path, buffer_size=buffer_size, sample_size=sample_size)
if val_results.get('is_valid', {}).get('all', False):
models.LogFileDate.create_or_update(
user=user,
log_file=log_file,
date=val_results.get('probably_date', ''),
)
log_file.status = choices.LOG_FILE_STATUS_QUEUED
visible_dates = _get_visible_dates(from_date, until_date, days_to_go_back)

else:
log_file.status = choices.LOG_FILE_STATUS_INVALIDATED
if not ignore_date:
logging.info(f'Interval: {visible_dates[0]} to {visible_dates[-1]}.')

logging.info(f'Log file {log_file.path} ({log_file.collection.acron3}) has status {log_file.status}.')
log_file.save()
for col in collections or Collection.acron3_list():
for log_file in models.LogFile.objects.filter(status=choices.LOG_FILE_STATUS_CREATED, collection__acron3=col):
file_ctime = date_utils.get_date_obj_from_timestamp(log_file.stat_result[LOGFILE_STAT_RESULT_CTIME_INDEX])
if file_ctime in visible_dates or ignore_date:
logging.info(f'Validating log file {log_file.path} for collection {log_file.collection.acron3}.')

buffer_size, sample_size = _fetch_validation_parameters(col)

val_result = utils.validate_file(path=log_file.path, buffer_size=buffer_size, sample_size=sample_size)
if 'datetimes' in val_result.get('content', {}).get('summary', {}):
del val_result['content']['summary']['datetimes']

try:
log_file.validation['result'] = json.dumps(val_result, cls=DjangoJSONEncoder) if val_result else {}
log_file.validation['parameters'] = {'buffer_size': buffer_size, 'sample_size': sample_size}
except json.JSONDecodeError as e:
logging.error(f'Error serializing validation result: {e}')
log_file.validation = {}

if val_result.get('is_valid', {}).get('all', False):
models.LogFileDate.create_or_update(
user=user,
log_file=log_file,
date=val_result.get('probably_date', ''),
)
log_file.status = choices.LOG_FILE_STATUS_QUEUED

else:
log_file.status = choices.LOG_FILE_STATUS_INVALIDATED

logging.info(f'Log file {log_file.path} ({log_file.collection.acron3}) has status {log_file.status}.')
log_file.save()


def _fetch_validation_parameters(collection, default_buffer_size=0.1, default_sample_size=2048):
Expand Down Expand Up @@ -170,30 +197,51 @@ def _check_missing_logs_for_date(user, collection, date):


@celery_app.task(bind=True, name=_('Generate log files count report'))
def task_log_files_count_status_report(self, collection, user_id=None, username=None):
col = models.Collection.objects.get(acron3=collection)
subject = _(f'Log Files Report for {col.main_name}')

message = _(f'Dear collection {col.main_name},\n\nThis message is to inform you of the results of the Usage Log Validation service. Here are the results:\n\n')

missing = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES)
extra = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES)
ok = models.CollectionLogFileDateCount.objects.filter(collection__acron3=collection, status=choices.COLLECTION_LOG_FILE_DATE_COUNT_OK)

if missing.count() > 0:
message += _(f'There are {missing.count()} missing log files.\n')
if extra.count() > 0:
message += _(f'There are {extra.count()} extra log files.\n')
if ok.count() > 0:
message += _(f'There are {ok.count()} dates with correct log files.\n')

if missing.count() > 0 or extra.count() > 0:
message += _(f'\nPlease check the script that shares the logs.\n')
def task_log_files_count_status_report(self, collections=[], from_date=None, until_date=None, user_id=None, username=None):
from_date, until_date = date_utils.get_date_range_str(from_date, until_date)
possible_dates_n = len(date_utils.get_date_objs_from_date_range(from_date, until_date))

from_date_obj = date_utils.get_date_obj(from_date)
until_date_obj = date_utils.get_date_obj(until_date)

for collection in collections or Collection.acron3_list():
col = models.Collection.objects.get(acron3=collection)
subject = _(f'Usage Log Validation Results ({from_date} to {until_date})')
message = _(f'This message provides the results of the Usage Log Validation for the period {from_date} to {until_date}:\n\n')

message += _(f'\nYou can view the complete report results at {settings.WAGTAILADMIN_BASE_URL}/admin/snippets/log_manager/collectionlogfiledatecount/?collection={col.pk}>.')
missing = models.CollectionLogFileDateCount.objects.filter(
collection__acron3=collection,
status=choices.COLLECTION_LOG_FILE_DATE_COUNT_MISSING_FILES,
date__gte=from_date_obj,
date__lte=until_date_obj,
)
extra = models.CollectionLogFileDateCount.objects.filter(
collection__acron3=collection,
status=choices.COLLECTION_LOG_FILE_DATE_COUNT_EXTRA_FILES,
date__gte=from_date_obj,
date__lte=until_date_obj,
)
ok = models.CollectionLogFileDateCount.objects.filter(
collection__acron3=collection,
status=choices.COLLECTION_LOG_FILE_DATE_COUNT_OK,
date__gte=from_date_obj,
date__lte=until_date_obj,
)

if missing.count() > 0:
message += _(f'- There are {missing.count()} missing log files.\n')
if extra.count() > 0:
message += _(f'- There are {extra.count()} extra log files.\n')
if ok.count() > 0:
message += _(f'- There are {ok.count()} dates with correct log files.\n')

if missing.count() > 0 or extra.count() > 0:
message += _(f'\nPlease review the script responsible for sharing the log files.\n')

message += _(f'\nYou can view the full report at {settings.WAGTAILADMIN_BASE_URL}/admin/snippets/log_manager/collectionlogfiledatecount/?collection={col.pk}>.')

logging.info(f'Sending email to collection {col.main_name}. Subject: {subject}. Message: {message}')
_send_message(subject, message, collection)
logging.info(f'Sending email to collection {col.main_name}. Subject: {subject}. Message: {message}')
_send_message(subject, message, collection)


def _send_message(subject, message, collection):
Expand Down
1 change: 1 addition & 0 deletions log_manager/wagtail_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class LogFileSnippetViewSet(SnippetViewSet):
"stat_result",
"collection",
"status",
"validation",
"hash"
)
list_filter = ("status", "collection")
Expand Down