From f38caa2d6bd53b283840a52481acb88260449070 Mon Sep 17 00:00:00 2001 From: Pitanga Innovare Date: Mon, 16 Jun 2025 20:21:04 -0300 Subject: [PATCH 1/2] =?UTF-8?q?Atualiza=20para=20vers=C3=A3o=201.12.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 32bd932..6f165bc 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.12.0 \ No newline at end of file +1.12.1 \ No newline at end of file From 0294700f6d9987bfb8e431abfa355f378165351c Mon Sep 17 00:00:00 2001 From: Pitanga Innovare Date: Mon, 16 Jun 2025 20:22:25 -0300 Subject: [PATCH 2/2] =?UTF-8?q?Cria=20m=C3=A9todo=20para=20extrair=20dados?= =?UTF-8?q?,=20lidando=20com=20problemas=20de=20padroniza=C3=A7=C3=A3o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metrics/tasks.py | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/metrics/tasks.py b/metrics/tasks.py index 9154d90..78e1220 100644 --- a/metrics/tasks.py +++ b/metrics/tasks.py @@ -277,18 +277,13 @@ def _process_line(line, utm, log_file): except Exception as e: _log_discarded_line(log_file, line, tracker_choices.LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION, str(e)) return False + + try: + item_access_data = _extract_item_access_data(log_file.collection.acron3, translated_url) + except Exception as e: + _log_discarded_line(log_file, line, tracker_choices.LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION, str(e)) + return False - item_access_data = { - 'collection': log_file.collection.acron3, - 'scielo_issn': translated_url.get('scielo_issn'), - 'pid_v2': standardizer.standardize_pid_v2(translated_url.get('pid_v2')), - 'pid_v3': standardizer.standardize_pid_v3(translated_url.get('pid_v3')), - 'pid_generic': standardizer.standardize_pid_generic(translated_url.get('pid_generic')), - 'media_language': standardizer.standardize_language_code(translated_url.get('media_language')), - 'media_format': translated_url.get('media_format'), - 'content_type': translated_url.get('content_type'), - } - if not is_valid_item_access_data(item_access_data): _log_discarded_line( log_file, line, @@ -324,6 +319,31 @@ def _process_line(line, utm, log_file): return True +def _extract_item_access_data(collection_acron3, translated_url): + """ + Extracts item access data from the translated URL and standardizes it. + + Args: + collection_acron3 (str): The acronym of the collection. + translated_url (dict): The translated URL containing metadata. + + Returns: + dict: A dictionary containing standardized item access data, or None if the data is invalid. + """ + item_access_data = { + 'collection': collection_acron3, + 'scielo_issn': translated_url.get('scielo_issn'), + 'pid_v2': standardizer.standardize_pid_v2(translated_url.get('pid_v2')), + 'pid_v3': standardizer.standardize_pid_v3(translated_url.get('pid_v3')), + 'pid_generic': standardizer.standardize_pid_generic(translated_url.get('pid_generic')), + 'media_language': standardizer.standardize_language_code(translated_url.get('media_language')), + 'media_format': translated_url.get('media_format'), + 'content_type': translated_url.get('content_type'), + } + + return item_access_data + + def _register_item_access(item_access_data, line, jou_id, art_id): """ Registers an item access in the database, creating necessary objects if they do not exist.