Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/crewai-tools/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dependencies = [
"python-docx~=1.2.0",
"youtube-transcript-api~=1.2.2",
"pymupdf~=1.26.6",
"playwright>=1.57.0",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Playwright added as hard dependency instead of optional

Medium Severity

playwright is added to the required dependencies list, making it mandatory for all crewai-tools users. Playwright is a heavyweight package that also requires separate browser binary installation (playwright install). The implementation already handles ImportError at runtime (lines 87–88 of the scrape tool), clearly indicating it was designed to be optional. It belongs in [project.optional-dependencies] alongside similar optional packages like selenium and browserbase.

Fix in Cursor Fix in Web

]


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class ScrapeWebsiteTool(BaseTool):
"Upgrade-Insecure-Requests": "1",
}
)
render_js: bool = False

def __init__(
self,
Expand Down Expand Up @@ -72,16 +73,33 @@ def _run(
website_url: str | None = kwargs.get("website_url", self.website_url)
if website_url is None:
raise ValueError("Website URL must be provided.")
html_content = ""
if self.render_js:
try:
from playwright.sync_api import sync_playwright

with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(website_url, wait_until="networkidle", timeout=30000)
html_content = page.content()
browser.close()
except ImportError:
return "Error: please install 'playwright' to use render_js=True: pip install playwright"
except Exception as e:
return f"Error rendering page with Playwright: {e!s}"
else:
page = requests.get(
website_url,
timeout=15,
headers=self.headers,
cookies=self.cookies if self.cookies else {},
)

page = requests.get(
website_url,
timeout=15,
headers=self.headers,
cookies=self.cookies if self.cookies else {},
)
page.encoding = page.apparent_encoding
html_content = page.text

page.encoding = page.apparent_encoding
parsed = BeautifulSoup(page.text, "html.parser")
parsed = BeautifulSoup(html_content, "html.parser")

text = "The following text is scraped website content:\n\n"
text += parsed.get_text(" ")
Expand Down
35 changes: 35 additions & 0 deletions lib/crewai-tools/tests/tools/test_scrape_website_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pytest
from unittest.mock import patch, MagicMock
from crewai_tools.tools.scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool

def test_scrape_website_tool_render_js_logic():
"""JavaScript rendering performance validation test with playwright"""
tool = ScrapeWebsiteTool(website_url="https://example.com", render_js=True)

with patch("playwright.sync_api.sync_playwright") as mock_playwright:
# Simulate Playwright structure
mock_context = mock_playwright.return_value.__enter__.return_value
mock_browser = mock_context.chromium.launch.return_value
mock_page = mock_browser.new_page.return_value
mock_page.content.return_value = "<html><body>JS Content</body></html>"

result = tool._run()

assert "JS Content" in result
mock_playwright.assert_called_once()

def test_scrape_website_tool_default_behavior():
"""Test that there is no change to the old behavior in the default state"""
tool = ScrapeWebsiteTool(website_url="https://example.com")

with patch("requests.get") as mock_get:
mock_response = MagicMock()
mock_response.text = "Normal Content"
mock_response.status_code = 200
mock_get.return_value = mock_response

result = tool._run()

assert "Normal Content" in result
# We make sure that it doesn't go to the playwrite in normal mode
mock_get.assert_called_once()
16 changes: 15 additions & 1 deletion lib/crewai/src/crewai/agents/crew_agent_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ async def ainvoke(self, inputs: dict[str, Any]) -> dict[str, Any]:
raise

if self.ask_for_human_input:
formatted_answer = self._handle_human_feedback(formatted_answer)
formatted_answer = await self._ahandle_human_feedback(formatted_answer)

self._create_short_term_memory(formatted_answer)
self._create_long_term_memory(formatted_answer)
Expand Down Expand Up @@ -1508,6 +1508,20 @@ def _handle_human_feedback(self, formatted_answer: AgentFinish) -> AgentFinish:
provider = get_provider()
return provider.handle_feedback(formatted_answer, self)

async def _ahandle_human_feedback(
self, formatted_answer: AgentFinish
) -> AgentFinish:
"""Process human feedback asynchronously via the configured provider.

Args:
formatted_answer: Initial agent result.

Returns:
Final answer after feedback.
"""
provider = get_provider()
return await provider.handle_feedback_async(formatted_answer, self)

def _is_training_mode(self) -> bool:
"""Check if training mode is active.

Expand Down
16 changes: 12 additions & 4 deletions lib/crewai/src/crewai/cli/plus_api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
from typing import Any
from urllib.parse import urljoin
import os

import httpx
import requests

from crewai.cli.config import Settings
Expand Down Expand Up @@ -33,7 +35,11 @@ def __init__(self, api_key: str) -> None:
if settings.org_uuid:
self.headers["X-Crewai-Organization-Id"] = settings.org_uuid

self.base_url = os.getenv("CREWAI_PLUS_URL") or str(settings.enterprise_base_url) or DEFAULT_CREWAI_ENTERPRISE_URL
self.base_url = (
os.getenv("CREWAI_PLUS_URL")
or str(settings.enterprise_base_url)
or DEFAULT_CREWAI_ENTERPRISE_URL
)

def _make_request(
self, method: str, endpoint: str, **kwargs: Any
Expand All @@ -49,8 +55,10 @@ def login_to_tool_repository(self) -> requests.Response:
def get_tool(self, handle: str) -> requests.Response:
return self._make_request("GET", f"{self.TOOLS_RESOURCE}/{handle}")

def get_agent(self, handle: str) -> requests.Response:
return self._make_request("GET", f"{self.AGENTS_RESOURCE}/{handle}")
async def get_agent(self, handle: str) -> httpx.Response:
url = urljoin(self.base_url, f"{self.AGENTS_RESOURCE}/{handle}")
async with httpx.AsyncClient() as client:
return await client.get(url, headers=self.headers)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Async get_agent drops trust_env=False proxy setting

Low Severity

The sync _make_request explicitly sets session.trust_env = False to ignore proxy environment variables, but the new async get_agent uses httpx.AsyncClient() which defaults to trust_env=True. This means the async version will pick up HTTP_PROXY/HTTPS_PROXY environment variables that the sync version intentionally ignores, potentially causing requests to route through unintended proxies or fail in corporate/CI environments.

Fix in Cursor Fix in Web


def publish_tool(
self,
Expand Down
Loading