From ab641ad80a6883f4795d24f9d1e45b862e234831 Mon Sep 17 00:00:00 2001 From: Fedor Timushev Date: Thu, 31 Oct 2024 13:48:38 +0100 Subject: [PATCH] All page data (headers, cookies, requests) as single variable --- .../function_handlers/webscraping_handlers.py | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/forloop_modules/function_handlers/webscraping_handlers.py b/forloop_modules/function_handlers/webscraping_handlers.py index 21f132b..897667a 100644 --- a/forloop_modules/function_handlers/webscraping_handlers.py +++ b/forloop_modules/function_handlers/webscraping_handlers.py @@ -24,7 +24,7 @@ from forloop_modules.globals.docs_categories import DocsCategories from forloop_modules.globals.variable_handler import variable_handler from forloop_modules.errors.errors import CriticalPipelineError, SoftPipelineError -from forloop_modules.redis.redis_connection import kv_redis +from forloop_modules.redis.redis_connection import kv_redis, redis_config import forloop_modules.queries.node_context_requests_backend as ncrb from docrawl.errors import SpiderFunctionError #from src.gui.gui_layout_context import glc @@ -302,22 +302,11 @@ def direct_execute(self, url, take_screenshot): suh.webscraping_client.load_website(url) - # get headers and cookies - headers = suh.webscraping_client.get_browser_headers() - cookies = suh.webscraping_client.get_browser_cookies() - requests = suh.webscraping_client.get_browser_requests() + # get page data (headers, cookies, requests) + page_data = suh.webscraping_client.get_page_data() - # get active pipeline uid - pipeline_uid=aet.active_pipeline_uid - - redis_key = f'pipeline:{pipeline_uid}:browser_headers' - kv_redis.set(key=redis_key, value=headers) - - redis_key = f'pipeline:{pipeline_uid}:browser_cookies' - kv_redis.set(key=redis_key, value=cookies) - - redis_key = f'pipeline:{pipeline_uid}:browser_requests' - kv_redis.set(key=redis_key, value=requests) + redis_page_data_key = redis_config.PAGE_DATA_KEY_TEMPLATE.format(pipeline_uid=aet.active_pipeline_uid) + kv_redis.set(redis_page_data_key, page_data) # Take screenshot of current page if take_screenshot: