diff --git a/bot/code_review_bot/backend.py b/bot/code_review_bot/backend.py index 17019b5b3..400a41954 100644 --- a/bot/code_review_bot/backend.py +++ b/bot/code_review_bot/backend.py @@ -9,6 +9,7 @@ from code_review_bot import taskcluster from code_review_bot.config import GetAppUserAgent, settings +from code_review_bot.revisions.github import GithubRevision from code_review_bot.tasks.lint import MozLintIssue logger = structlog.get_logger(__name__) @@ -46,6 +47,10 @@ def publish_revision(self, revision): logger.warn("Skipping revision publication on backend") return + if isinstance(revision, GithubRevision): + logger.warn("Skipping revision publication for github") + return {} + # Check the repositories are urls for url in (revision.base_repository, revision.head_repository): assert isinstance(url, str), "Repository must be a string" @@ -115,6 +120,10 @@ def publish_issues(self, issues, revision): logger.warn("Skipping issues publication on backend") return + if isinstance(revision, GithubRevision): + logger.warn("Skipping issues publication for github") + return {} + published = 0 assert ( revision.issues_url is not None diff --git a/bot/code_review_bot/github.py b/bot/code_review_bot/github.py new file mode 100644 index 000000000..e69de29bb diff --git a/bot/code_review_bot/report/lando.py b/bot/code_review_bot/report/lando.py index 330bda4ab..77262f7fb 100644 --- a/bot/code_review_bot/report/lando.py +++ b/bot/code_review_bot/report/lando.py @@ -6,6 +6,7 @@ from code_review_bot import Level from code_review_bot.report.base import Reporter +from code_review_bot.revisions.github import GithubRevision logger = structlog.get_logger(__name__) @@ -29,6 +30,10 @@ def publish(self, issues, revision, task_failures, links, reviewers): """ Send an email to administrators """ + if isinstance(revision, GithubRevision): + logger.warn("Skipping Lando publication for github") + return {} + assert ( revision.phabricator_id and revision.phabricator_phid and revision.diff ), "Revision must have a Phabricator ID, a PHID and a diff" diff --git a/bot/code_review_bot/revisions/__init__.py b/bot/code_review_bot/revisions/__init__.py new file mode 100644 index 000000000..8889c398e --- /dev/null +++ b/bot/code_review_bot/revisions/__init__.py @@ -0,0 +1,255 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +import random +from datetime import timedelta + +import rs_parsepatch +import structlog +from libmozdata.phabricator import PhabricatorAPI + +from code_review_bot import Issue, stats, taskcluster +from code_review_bot.config import ( + settings, +) +from code_review_bot.tasks.base import AnalysisTask + +logger = structlog.get_logger(__name__) + + +class ImprovementPatch: + """ + An improvement patch built by the bot + """ + + def __init__(self, analyzer, patch_name, content): + assert isinstance(analyzer, AnalysisTask) + + # Build name from analyzer and revision + self.analyzer = analyzer + self.name = f"{self.analyzer.name}-{patch_name}.diff" + self.content = content + self.url = None + self.path = None + + def __str__(self): + return f"{self.analyzer.name}: {self.url or self.path or self.name}" + + def write(self): + """ + Write patch on local FS, for dev & tests only + """ + self.path = os.path.join(settings.taskcluster.results_dir, self.name) + with open(self.path, "w") as f: + length = f.write(self.content) + logger.info("Improvement patch saved", path=self.path, length=length) + + def publish(self, days_ttl=30): + """ + Push through Taskcluster API to setup the content-type header + so it displays nicely in browsers + """ + assert ( + not settings.taskcluster.local + ), "Only publish on online Taskcluster tasks" + self.url = taskcluster.upload_artifact( + f"public/patch/{self.name}", + self.content.encode(), + content_type="text/plain; charset=utf-8", # Displays instead of download + ttl=timedelta(days=days_ttl - 1), + ) + logger.info("Improvement patch published", url=self.url) + + +class Revision: + """ + A generic revision to override using provider specific details + """ + + def __init__( + self, + ): + # backend's returned URL to list or create issues linked to the revision in bulk (diff is optional) + self.issues_url = None + + # Patches built later on + self.improvement_patches = [] + + # Patch analysis + self.files = [] + self.lines = {} + + @property + def namespaces(self): + raise NotImplementedError + + @property + def before_after_feature(self): + """ + Randomly run the before/after feature depending on a configured ratio. + All the diffs of a revision must be analysed with or without the feature. + """ + if getattr(self, "id", None) is None: + logger.debug( + "Backend ID must be set to determine if using the before/after feature. Skipping." + ) + return False + # Set random module pseudo-random seed based on the revision ID to + # ensure that successive calls to random.random will return deterministic values + random.seed(self.id) + return random.random() < taskcluster.secrets.get("BEFORE_AFTER_RATIO", 0) + # Reset random module seed to prevent deterministic values after calling that function + random.seed(os.urandom(128)) + + def __repr__(self): + raise NotImplementedError + + def __str__(self): + raise NotImplementedError + + @staticmethod + def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorAPI): + """ + Load identifiers from Phabricator, using the remote task description + """ + from code_review_bot.revisions.github import GithubRevision + from code_review_bot.revisions.phabricator import PhabricatorRevision + + # Load build target phid from the task env + code_review = try_task["extra"]["code-review"] + + if "github" in code_review: + return GithubRevision(**code_review["github"]) + else: + return PhabricatorRevision.from_try_task( + code_review, decision_task, phabricator + ) + + def analyze_patch(self): + """ + Analyze loaded patch to extract modified lines + and statistics + """ + assert self.patch is not None, "Missing patch" + assert isinstance(self.patch, str), "Invalid patch type" + + # List all modified lines from current revision changes + patch_stats = rs_parsepatch.get_lines(self.patch) + assert len(patch_stats) > 0, "Empty patch" + + self.lines = {stat["filename"]: stat["added_lines"] for stat in patch_stats} + + # Shortcut to files modified + self.files = self.lines.keys() + + # Report nb of files and lines analyzed + stats.add_metric("analysis.files", len(self.files)) + stats.add_metric( + "analysis.lines", sum(len(line) for line in self.lines.values()) + ) + + def load_file(self, path): + """ + Load a file content at current revision from remote HGMO + """ + raise NotImplementedError + + def has_file(self, path): + """ + Check if the path is in this patch + """ + assert isinstance(path, str) + return path in self.files + + def contains(self, issue): + """ + Check if the issue (path+lines) is in this patch + """ + assert isinstance(issue, Issue) + + # Get modified lines for this issue + modified_lines = self.lines.get(issue.path) + if modified_lines is None: + return False + + # Empty line means full file + if issue.line is None: + return True + + # Detect if this issue is in the patch + lines = set(range(issue.line, issue.line + issue.nb_lines)) + return not lines.isdisjoint(modified_lines) + + @property + def has_clang_files(self): + """ + Check if this revision has any file that might + be a C/C++ file + """ + + def _is_clang(filename): + _, ext = os.path.splitext(filename) + return ext.lower() in settings.cpp_extensions + + return any(_is_clang(f) for f in self.files) + + @property + def has_clang_header_files(self): + """ + Check if this revision has any file that might + be a C/C++ header file + """ + + def _is_clang_header(filename): + _, ext = os.path.splitext(filename) + return ext.lower() in settings.cpp_header_extensions + + return any(_is_clang_header(f) for f in self.files) + + @property + def has_idl_files(self): + """ + Check if this revision has any idl files + """ + + def _is_idl(filename): + _, ext = os.path.splitext(filename) + return ext.lower() in settings.idl_extensions + + return any(_is_idl(f) for f in self.files) + + @property + def is_blacklisted(self): + raise NotImplementedError + + def add_improvement_patch(self, analyzer, content): + """ + Save an improvement patch, and make it available + as a Taskcluster artifact + """ + assert isinstance(content, str) + assert len(content) > 0 + self.improvement_patches.append(ImprovementPatch(analyzer, repr(self), content)) + + def reset(self): + """ + Reset temporary data in BEFORE mode + * improvement patches + """ + self.improvement_patches = [] + + @property + def bugzilla_id(self): + raise NotImplementedError + + @property + def title(self): + raise NotImplementedError + + def as_dict(self): + """ + Outputs a serializable representation of this revision + """ + raise NotImplementedError diff --git a/bot/code_review_bot/revisions/github.py b/bot/code_review_bot/revisions/github.py new file mode 100644 index 000000000..54123a482 --- /dev/null +++ b/bot/code_review_bot/revisions/github.py @@ -0,0 +1,52 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import requests +import structlog + +from code_review_bot.revisions import Revision + +logger = structlog.get_logger(__name__) + + +class GithubRevision(Revision): + """ + A revision from a github pull-request + """ + + def __init__(self, repo_url, branch, pull_number, pull_head_sha): + super().__init__() + + self.repo_url = repo_url + self.branch = branch + self.pull_number = pull_number + self.pull_head_sha = pull_head_sha + + # Load the patch from Github + self.patch = self.load_patch() + + def __str__(self): + return f"Github pull request {self.repo_url} #{self.pull_number} ({self.pull_head_sha[:8]})" + + def __repr__(self): + return f"GithubRevision repo_url={self.repo_url} branch={self.branch} pull_number={self.pull_number} sha={self.pull_head_sha}" + + def load_patch(self): + """ + Load the patch content for the current pull request HEAD + """ + # TODO: use specific sha + url = f"{self.repo_url}/pull/{self.pull_number}.diff" + logger.info("Loading github patch", url=url) + resp = requests.get(url, allow_redirects=True) + resp.raise_for_status() + return resp.content.decode() + + def as_dict(self): + return { + "repo_url": self.repo_url, + "branch": self.branch, + "pull_number": self.pull_number, + "pull_head_sha": self.pull_head_sha, + } diff --git a/bot/code_review_bot/revisions.py b/bot/code_review_bot/revisions/phabricator.py similarity index 71% rename from bot/code_review_bot/revisions.py rename to bot/code_review_bot/revisions/phabricator.py index d6b80ef3e..05bf293b8 100644 --- a/bot/code_review_bot/revisions.py +++ b/bot/code_review_bot/revisions/phabricator.py @@ -3,74 +3,27 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. import os -import random import time import urllib.parse -from datetime import timedelta from pathlib import Path import requests -import rs_parsepatch import structlog from libmozdata.phabricator import PhabricatorAPI -from code_review_bot import InvalidRepository, InvalidTrigger, Issue, stats, taskcluster +from code_review_bot import InvalidRepository, InvalidTrigger from code_review_bot.config import ( REPO_AUTOLAND, REPO_MOZILLA_CENTRAL, GetAppUserAgent, settings, ) -from code_review_bot.tasks.base import AnalysisTask +from code_review_bot.revisions import Revision logger = structlog.get_logger(__name__) -class ImprovementPatch: - """ - An improvement patch built by the bot - """ - - def __init__(self, analyzer, patch_name, content): - assert isinstance(analyzer, AnalysisTask) - - # Build name from analyzer and revision - self.analyzer = analyzer - self.name = f"{self.analyzer.name}-{patch_name}.diff" - self.content = content - self.url = None - self.path = None - - def __str__(self): - return f"{self.analyzer.name}: {self.url or self.path or self.name}" - - def write(self): - """ - Write patch on local FS, for dev & tests only - """ - self.path = os.path.join(settings.taskcluster.results_dir, self.name) - with open(self.path, "w") as f: - length = f.write(self.content) - logger.info("Improvement patch saved", path=self.path, length=length) - - def publish(self, days_ttl=30): - """ - Push through Taskcluster API to setup the content-type header - so it displays nicely in browsers - """ - assert ( - not settings.taskcluster.local - ), "Only publish on online Taskcluster tasks" - self.url = taskcluster.upload_artifact( - f"public/patch/{self.name}", - self.content.encode(), - content_type="text/plain; charset=utf-8", # Displays instead of download - ttl=timedelta(days=days_ttl - 1), - ) - logger.info("Improvement patch published", url=self.url) - - -class Revision: +class PhabricatorRevision(Revision): """ A Phabricator revision to analyze and report on """ @@ -91,9 +44,11 @@ def __init__( base_repository=None, base_repository_conf=None, phabricator_repository=None, - url=None, patch=None, + url=None, ): + super().__init__() + # Identification self.phabricator_id = phabricator_id self.phabricator_phid = phabricator_phid @@ -121,16 +76,8 @@ def __init__( # the phabricator repository payload for later identification self.phabricator_repository = phabricator_repository - # backend's returned URL to list or create issues linked to the revision in bulk (diff is optional) - self.issues_url = None - - # Patches built later on - self.improvement_patches = [] - # Patch analysis self.patch = patch - self.files = [] - self.lines = {} @property def namespaces(self): @@ -168,24 +115,6 @@ def from_autoland(self): def from_mozilla_central(self): return self.head_repository == REPO_MOZILLA_CENTRAL - @property - def before_after_feature(self): - """ - Randomly run the before/after feature depending on a configured ratio. - All the diffs of a revision must be analysed with or without the feature. - """ - if getattr(self, "id", None) is None: - logger.debug( - "Backend ID must be set to determine if using the before/after feature. Skipping." - ) - return False - # Set random module pseudo-random seed based on the revision ID to - # ensure that successive calls to random.random will return deterministic values - random.seed(self.id) - return random.random() < taskcluster.secrets.get("BEFORE_AFTER_RATIO", 0) - # Reset random module seed to prevent deterministic values after calling that function - random.seed(os.urandom(128)) - def __repr__(self): if self.diff_phid: # Most revisions have a Diff from Phabricator @@ -201,13 +130,12 @@ def __str__(self): return f"Phabricator #{self.diff_id} - {self.diff_phid}" @staticmethod - def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorAPI): + def from_try_task( + code_review: dict, decision_task: dict, phabricator: PhabricatorAPI + ): """ Load identifiers from Phabricator, using the remote task description """ - - # Load build target phid from the task env - code_review = try_task["extra"]["code-review"] build_target_phid = code_review.get("phabricator-diff") or code_review.get( "phabricator-build-target" ) @@ -412,29 +340,6 @@ def from_phabricator_trigger(build_target_phid: str, phabricator: PhabricatorAPI repository_try_name=repository.try_name, ) - def analyze_patch(self): - """ - Analyze loaded patch to extract modified lines - and statistics - """ - assert self.patch is not None, "Missing patch" - assert isinstance(self.patch, str), "Invalid patch type" - - # List all modified lines from current revision changes - patch_stats = rs_parsepatch.get_lines(self.patch) - assert len(patch_stats) > 0, "Empty patch" - - self.lines = {stat["filename"]: stat["added_lines"] for stat in patch_stats} - - # Shortcut to files modified - self.files = self.lines.keys() - - # Report nb of files and lines analyzed - stats.add_metric("analysis.files", len(self.files)) - stats.add_metric( - "analysis.lines", sum(len(line) for line in self.lines.values()) - ) - def load_file(self, path): """ Load a file content at current revision from remote HGMO @@ -467,70 +372,6 @@ def load_file(self, path): return content - def has_file(self, path): - """ - Check if the path is in this patch - """ - assert isinstance(path, str) - return path in self.files - - def contains(self, issue): - """ - Check if the issue (path+lines) is in this patch - """ - assert isinstance(issue, Issue) - - # Get modified lines for this issue - modified_lines = self.lines.get(issue.path) - if modified_lines is None: - return False - - # Empty line means full file - if issue.line is None: - return True - - # Detect if this issue is in the patch - lines = set(range(issue.line, issue.line + issue.nb_lines)) - return not lines.isdisjoint(modified_lines) - - @property - def has_clang_files(self): - """ - Check if this revision has any file that might - be a C/C++ file - """ - - def _is_clang(filename): - _, ext = os.path.splitext(filename) - return ext.lower() in settings.cpp_extensions - - return any(_is_clang(f) for f in self.files) - - @property - def has_clang_header_files(self): - """ - Check if this revision has any file that might - be a C/C++ header file - """ - - def _is_clang_header(filename): - _, ext = os.path.splitext(filename) - return ext.lower() in settings.cpp_header_extensions - - return any(_is_clang_header(f) for f in self.files) - - @property - def has_idl_files(self): - """ - Check if this revision has any idl files - """ - - def _is_idl(filename): - _, ext = os.path.splitext(filename) - return ext.lower() in settings.idl_extensions - - return any(_is_idl(f) for f in self.files) - @property def is_blacklisted(self): """Check if the revision author is in the black-list""" @@ -541,22 +382,6 @@ def is_blacklisted(self): logger.info("Revision from a blacklisted user", revision=self, author=author) return True - def add_improvement_patch(self, analyzer, content): - """ - Save an improvement patch, and make it available - as a Taskcluster artifact - """ - assert isinstance(content, str) - assert len(content) > 0 - self.improvement_patches.append(ImprovementPatch(analyzer, repr(self), content)) - - def reset(self): - """ - Reset temporary data in BEFORE mode - * improvement patches - """ - self.improvement_patches = [] - @property def bugzilla_id(self): if self.revision is None: diff --git a/bot/code_review_bot/workflow.py b/bot/code_review_bot/workflow.py index d4ded08e5..227d6c172 100644 --- a/bot/code_review_bot/workflow.py +++ b/bot/code_review_bot/workflow.py @@ -21,6 +21,7 @@ from code_review_bot.mercurial import MercurialWorker, Repository, robust_checkout from code_review_bot.report.debug import DebugReporter from code_review_bot.revisions import Revision +from code_review_bot.revisions.github import GithubRevision from code_review_bot.sources.phabricator import ( PhabricatorActions, PhabricatorBuildState, @@ -665,6 +666,13 @@ def update_status(self, revision, state): Update build status on HarborMaster """ assert isinstance(state, BuildState) + + if isinstance(revision, GithubRevision): + logger.warning( + "No github status update yet", revision=revision, status=state + ) + return + if not revision.build_target_phid: logger.info( "No build target found, skipping HarborMaster update", state=state.value