-
-
Notifications
You must be signed in to change notification settings - Fork 276
Add support for parsing Git commit messages #1992
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
ziadhany
wants to merge
9
commits into
aboutcode-org:main
Choose a base branch
from
ziadhany:parsing-commit
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
a500ae0
Add initial improver for collect repo fix commits
ziadhany 2017bb8
Add CollectRepoFixCommitPipeline
ziadhany e1fb4e5
Remove a test script related to fix commits and issue tracker
ziadhany f31bca1
Resolve requested changes
ziadhany 12dc381
Improve CollectRepoFixCommitPipeline to use input and ensure it colle…
ziadhany 528857e
Update commit parsing pipeline to support collecting fix commits from…
ziadhany 4885821
Update the target repositories list for parsing git commits logs
ziadhany f1d5ae1
Move the CollectVCSFixCommitPipeline base pipelines to pipes
ziadhany 4c5748c
fix a typo in the test
ziadhany File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
186 changes: 186 additions & 0 deletions
186
vulnerabilities/pipelines/v2_importers/collect_fix_commits.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,186 @@ | ||
| from vulnerabilities.pipes.vcs_collector_utils import CollectVCSFixCommitPipeline | ||
|
|
||
|
|
||
| class CollectLinuxFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_linux_fix_commits" | ||
| repo_url = "https://github.com/torvalds/linux" | ||
|
|
||
|
|
||
| class CollectBusyBoxFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_busybox_fix_commits" | ||
| repo_url = "https://github.com/mirror/busybox" | ||
|
|
||
|
|
||
| class CollectNginxFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_nginx_fix_commits" | ||
| repo_url = "https://github.com/nginx/nginx" | ||
|
|
||
|
|
||
| class CollectApacheTomcatFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_apache_tomcat_fix_commits" | ||
| repo_url = "https://github.com/apache/tomcat" | ||
|
|
||
|
|
||
| class CollectMysqlServerFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_mysql_server_fix_commits" | ||
| repo_url = "https://github.com/mysql/mysql-server" | ||
|
|
||
|
|
||
| class CollectPostgresqlFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_postgresql_fix_commits" | ||
| repo_url = "https://github.com/postgres/postgres" | ||
|
|
||
|
|
||
| class CollectMongodbFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_mongodb_fix_commits" | ||
| repo_url = "https://github.com/mongodb/mongo" | ||
|
|
||
|
|
||
| class CollectRedisFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_redis_fix_commits" | ||
| repo_url = "https://github.com/redis/redis" | ||
|
|
||
|
|
||
| class CollectSqliteFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_sqlite_fix_commits" | ||
| repo_url = "https://github.com/sqlite/sqlite" | ||
|
|
||
|
|
||
| class CollectPhpFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_php_fix_commits" | ||
| repo_url = "https://github.com/php/php-src" | ||
|
|
||
|
|
||
| class CollectPythonCpythonFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_python_cpython_fix_commits" | ||
| repo_url = "https://github.com/python/cpython" | ||
|
|
||
|
|
||
| class CollectRubyFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_ruby_fix_commits" | ||
| repo_url = "https://github.com/ruby/ruby" | ||
|
|
||
|
|
||
| class CollectGoFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_go_fix_commits" | ||
| repo_url = "https://github.com/golang/go" | ||
|
|
||
|
|
||
| class CollectNodeJsFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_node_js_fix_commits" | ||
| repo_url = "https://github.com/nodejs/node" | ||
|
|
||
|
|
||
| class CollectRustFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_rust_fix_commits" | ||
| repo_url = "https://github.com/rust-lang/rust" | ||
|
|
||
|
|
||
| class CollectOpenjdkFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_openjdk_fix_commits" | ||
| repo_url = "https://github.com/openjdk/jdk" | ||
|
|
||
|
|
||
| class CollectSwiftFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_swift_fix_commits" | ||
| repo_url = "https://github.com/swiftlang/swift" | ||
|
|
||
|
|
||
| class CollectDjangoFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_django_fix_commits" | ||
| repo_url = "https://github.com/django/django" | ||
|
|
||
|
|
||
| class CollectRailsFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_rails_fix_commits" | ||
| repo_url = "https://github.com/rails/rails" | ||
|
|
||
|
|
||
| class CollectLaravelFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_laravel_fix_commits" | ||
| repo_url = "https://github.com/laravel/framework" | ||
|
|
||
|
|
||
| class CollectSpringFrameworkFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_spring_framework_fix_commits" | ||
| repo_url = "https://github.com/spring-projects/spring-framework" | ||
|
|
||
|
|
||
| class CollectReactFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_react_fix_commits" | ||
| repo_url = "https://github.com/facebook/react" | ||
|
|
||
|
|
||
| class CollectAngularFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_angular_fix_commits" | ||
| repo_url = "https://github.com/angular/angular" | ||
|
|
||
|
|
||
| class CollectWordpressFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_wordpress_fix_commits" | ||
| repo_url = "https://github.com/WordPress/WordPress" | ||
|
|
||
|
|
||
| class CollectDockerMobyFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_docker_moby_fix_commits" | ||
| repo_url = "https://github.com/moby/moby" | ||
|
|
||
|
|
||
| class CollectKubernetesFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_kubernetes_fix_commits" | ||
| repo_url = "https://github.com/kubernetes/kubernetes" | ||
|
|
||
|
|
||
| class CollectQemuFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_qemu_fix_commits" | ||
| repo_url = "https://gitlab.com/qemu-project/qemu" | ||
|
|
||
|
|
||
| class CollectXenProjectFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_xen_project_fix_commits" | ||
| repo_url = "https://github.com/xen-project/xen" | ||
|
|
||
|
|
||
| class CollectVirtualboxFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_virtualbox_fix_commits" | ||
| repo_url = "https://github.com/mirror/vbox" | ||
|
|
||
|
|
||
| class CollectContainerdFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_containerd_fix_commits" | ||
| repo_url = "https://github.com/containerd/containerd" | ||
|
|
||
|
|
||
| class CollectAnsibleFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_ansible_fix_commits" | ||
| repo_url = "https://github.com/ansible/ansible" | ||
|
|
||
|
|
||
| class CollectTerraformFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_terraform_fix_commits" | ||
| repo_url = "https://github.com/hashicorp/terraform" | ||
|
|
||
|
|
||
| class CollectWiresharkFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_wireshark_fix_commits" | ||
| repo_url = "https://gitlab.com/wireshark/wireshark" | ||
|
|
||
|
|
||
| class CollectTcpdumpFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_tcpdump_fix_commits" | ||
| repo_url = "https://github.com/the-tcpdump-group/tcpdump" | ||
|
|
||
|
|
||
| class CollectGitFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_git_fix_commits" | ||
| repo_url = "https://github.com/git/git" | ||
|
|
||
|
|
||
| class CollectJenkinsFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_jenkins_fix_commits" | ||
| repo_url = "https://github.com/jenkinsci/jenkins" | ||
|
|
||
|
|
||
| class CollectGitlabFixCommitsPipeline(CollectVCSFixCommitPipeline): | ||
| pipeline_id = "collect_gitlab_fix_commits" | ||
| repo_url = "https://gitlab.com/gitlab-org/gitlab-foss" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| # | ||
| # Copyright (c) nexB Inc. and others. All rights reserved. | ||
| # VulnerableCode is a trademark of nexB Inc. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
| # See https://github.com/aboutcode-org/vulnerablecode for support or download. | ||
| # See https://aboutcode.org for more information about nexB OSS projects. | ||
| # | ||
|
|
||
| import re | ||
| import shutil | ||
| import tempfile | ||
| from collections import defaultdict | ||
|
|
||
| from git import Repo | ||
| from packageurl.contrib.url2purl import url2purl | ||
|
|
||
| from vulnerabilities.importer import AdvisoryData | ||
| from vulnerabilities.importer import AffectedPackageV2 | ||
| from vulnerabilities.importer import PackageCommitPatchData | ||
| from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 | ||
|
|
||
|
|
||
| class CollectVCSFixCommitPipeline(VulnerableCodeBaseImporterPipelineV2): | ||
| """ | ||
| Pipeline to collect fix commits from any git repository. | ||
| """ | ||
|
|
||
| repo_url: str | ||
| patterns: list[str] = [ | ||
| r"\bCVE-\d{4}-\d{4,19}\b", | ||
| r"GHSA-[2-9cfghjmpqrvwx]{4}-[2-9cfghjmpqrvwx]{4}-[2-9cfghjmpqrvwx]{4}", | ||
| ] | ||
|
|
||
| @classmethod | ||
| def steps(cls): | ||
| return ( | ||
| cls.clone, | ||
| cls.collect_and_store_advisories, | ||
| cls.clean_downloads, | ||
| ) | ||
|
|
||
| def clone(self): | ||
| """Clone the repository.""" | ||
| self.repo = Repo.clone_from( | ||
| url=self.repo_url, | ||
| to_path=tempfile.mkdtemp(), | ||
| bare=True, | ||
| no_checkout=True, | ||
| multi_options=["--filter=blob:none"], | ||
| ) | ||
|
|
||
| def advisories_count(self) -> int: | ||
| return 0 | ||
|
|
||
| def extract_vulnerability_id(self, commit) -> list[str]: | ||
| """ | ||
| Extract vulnerability id from a commit message. | ||
| Returns a list of matched vulnerability IDs | ||
| """ | ||
| matches = [] | ||
| for pattern in self.patterns: | ||
| found = re.findall(pattern, commit.message, flags=re.IGNORECASE) | ||
| matches.extend(found) | ||
| return matches | ||
|
|
||
| def collect_fix_commits(self): | ||
| """ | ||
| Iterate through repository commits and group them by vulnerability identifiers. | ||
| return a list with (vuln_id, [(commit_id, commit_message)]). | ||
| """ | ||
| self.log("Processing git repository fix commits (grouped by vulnerability IDs).") | ||
|
|
||
| grouped_commits = defaultdict(list) | ||
| for commit in self.repo.iter_commits("--all"): | ||
| matched_ids = self.extract_vulnerability_id(commit) | ||
| if not matched_ids: | ||
| continue | ||
|
|
||
| commit_id = commit.hexsha | ||
| commit_message = commit.message.strip() | ||
|
|
||
| for vuln_id in matched_ids: | ||
| grouped_commits[vuln_id].append((commit_id, commit_message)) | ||
|
|
||
| self.log(f"Found {len(grouped_commits)} vulnerabilities with related commits.") | ||
| self.log("Finished processing all commits.") | ||
| return grouped_commits | ||
|
|
||
| def collect_advisories(self): | ||
| """ | ||
| Generate AdvisoryData objects for each vulnerability ID grouped with its related commits. | ||
| """ | ||
| self.log("Generating AdvisoryData objects from grouped commits.") | ||
| grouped_commits = self.collect_fix_commits() | ||
| purl = url2purl(self.repo_url) | ||
| for vuln_id, commits_data in grouped_commits.items(): | ||
|
|
||
| if not commits_data or not vuln_id: | ||
| continue | ||
|
|
||
| summary = "" | ||
| commit_hash_set = set() | ||
| for commit_hash, commit_message in commits_data: | ||
| summary += f"{commit_hash}:{commit_message}\n" | ||
| commit_hash_set.add(commit_hash) | ||
|
|
||
| affected_packages = [ | ||
| AffectedPackageV2( | ||
| package=purl, | ||
| fixed_by_commit_patches=[ | ||
| PackageCommitPatchData(vcs_url=self.repo_url, commit_hash=commit_hash) | ||
| for commit_hash in commit_hash_set | ||
| ], | ||
| ) | ||
| ] | ||
|
|
||
| yield AdvisoryData( | ||
| advisory_id=vuln_id, | ||
| summary=summary, | ||
| affected_packages=affected_packages, | ||
| url=self.repo_url, | ||
| ) | ||
|
|
||
| def clean_downloads(self): | ||
| """Cleanup any temporary repository data.""" | ||
| self.log("Cleaning up local repository resources.") | ||
| if hasattr(self, "repo") and self.repo.working_dir: | ||
| shutil.rmtree(path=self.repo.working_dir) | ||
|
|
||
| def on_failure(self): | ||
| """Ensure cleanup is always performed on failure.""" | ||
| self.clean_downloads() | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ziadhany let's also add commit url for these commits in reference at least for known vcs like github.