From 4bd196d8f9bd58bc96c7d350f66c17ece3d54c5a Mon Sep 17 00:00:00 2001 From: Kaustubh BM Date: Tue, 31 Jan 2023 14:45:02 +0530 Subject: [PATCH 1/2] Update Wappalyzer.py Adding 2 functions: 1) analyze_with_cpe(self,webpage): This function adds the CPE (Common Platform Enumeration) to the output as this is also available in the analyzed data. 2) analyze_with_cpe_and_versions(self,webpage): This function matches the CPE to the corresponding version and outputs that result. --- Wappalyzer/Wappalyzer.py | 647 ++++++++++++++++++++------------------- 1 file changed, 331 insertions(+), 316 deletions(-) diff --git a/Wappalyzer/Wappalyzer.py b/Wappalyzer/Wappalyzer.py index 8169048..e1f6ded 100644 --- a/Wappalyzer/Wappalyzer.py +++ b/Wappalyzer/Wappalyzer.py @@ -1,270 +1,314 @@ - -from typing import Callable, Dict, Iterable, List, Any, Mapping, Set +import aiohttp +import asyncio import json import logging import pkg_resources import re -import os -import pathlib import requests +import warnings -from datetime import datetime, timedelta -from typing import Optional +from bs4 import BeautifulSoup +from typing import Union -from Wappalyzer.fingerprint import Fingerprint, Pattern, Technology, Category -from Wappalyzer.webpage import WebPage, IWebPage +logger = logging.getLogger(name=__name__) -logger = logging.getLogger(name="python-Wappalyzer") class WappalyzerError(Exception): - # unused for now """ Raised for fatal Wappalyzer errors. """ pass -class Wappalyzer: + +class WebPage: + """ + Simple representation of a web page, decoupled + from any particular HTTP library's API. """ - Python Wappalyzer driver. - Consider the following exemples. - - Here is how you can use the latest technologies file from AliasIO/wappalyzer repository. - - .. python:: + def __init__(self, url, html, headers): + """ + Initialize a new WebPage object. - from Wappalyzer import Wappalyzer - wappalyzer=Wappalyzer.latest(update=True) - # Create webpage - webpage=WebPage.new_from_url('http://example.com') - # analyze - results = wappalyzer.analyze_with_categories(webpage) + Parameters + ---------- + url : str + The web page URL. + html : str + The web page content (HTML) + headers : dict + The HTTP response headers + """ + self.url = url + self.html = html + self.headers = headers - Here is how you can custom request and headers arguments: - - .. python:: + try: + list(self.headers.keys()) + except AttributeError: + raise ValueError("Headers must be a dictionary-like object") - import requests - from Wappalyzer import Wappalyzer, WebPage - wappalyzer = Wappalyzer.latest() - webpage = WebPage.new_from_url('http://exemple.com', headers={'User-Agent': 'Custom user agent'}) - wappalyzer.analyze_with_categories(webpage) + self._parse_html() - """ + def _parse_html(self): + """ + Parse the HTML with BeautifulSoup to find