From 2cb9726ecf33de0d416f97b4fab0f48ce25b0ffb Mon Sep 17 00:00:00 2001 From: Mika Peltekis Date: Tue, 21 Feb 2023 15:02:29 +0100 Subject: [PATCH 1/3] WIP: Change way we check old Commiters --- igcommit/commit_list_checks.py | 57 ++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/igcommit/commit_list_checks.py b/igcommit/commit_list_checks.py index 5740af1..e55d0fa 100644 --- a/igcommit/commit_list_checks.py +++ b/igcommit/commit_list_checks.py @@ -6,8 +6,11 @@ from time import time + from igcommit.base_check import BaseCheck, Severity -from igcommit.git import Commit, CommitList +from igcommit.git import Commit, CommitList, git_exe_path + +from subprocess import check_output class CommitListCheck(BaseCheck): @@ -138,22 +141,44 @@ def prepare(self, obj): return new def get_problems(self): - old_contributors = self.get_old_contributors() for commit in self.commit_list: - for contributor in commit.get_contributors(): - found = self.index_contributors(old_contributors, contributor) - - for problem in self.check_contributor(contributor, commit): - yield problem - - # If there are any problems, evidently the contributor - # is not consistent with the indexes. We override - # the indexes after reporting problem to avoid the same - # ones to be reported again. - found = False - - if not found: - self.index_contributor(contributor, override=True) + author, committer = commit.get_contributors() + author = check_output([ + git_exe_path, + '--no-pager', + 'log', + '--pretty=format:"%H"', + f'--author "{author.name}"', + '-1', + ]).decode('utf-8') + + committer = check_output([ + git_exe_path, + '--no-pager', + 'log', + '--pretty=format:"%H"', + f'--committer "{committer.name}"', + '-1', + ]).decode('utf-8') + + contributor = author + committer + print(contributor) + # use git log to search for author (important that git log just shows us commits before the pre-receive) + # use git log to search for committer + # if nothing is in there, we know that there is no former commit of those contributors + # if found then we directly can check with the last found specific commit + # in the end we can store the contributors in a list to not check them again + for problem in self.check_contributor(contributor, commit): + yield problem + + # If there are any problems, evidently the contributor + # is not consistent with the indexes. We override + # the indexes after reporting problem to avoid the same + # ones to be reported again. + found = False + + if not found: + self.index_contributor(contributor, override=True) def get_old_commits(self): """Yield old commits in reverse order From 1011ad995b4a7c82d48dfe4763a08de2fafb2cb6 Mon Sep 17 00:00:00 2001 From: Yannik Schwieger Date: Tue, 21 Feb 2023 15:24:10 +0100 Subject: [PATCH 2/3] Fix command error --- igcommit/commit_list_checks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/igcommit/commit_list_checks.py b/igcommit/commit_list_checks.py index e55d0fa..fb182d9 100644 --- a/igcommit/commit_list_checks.py +++ b/igcommit/commit_list_checks.py @@ -148,7 +148,8 @@ def get_problems(self): '--no-pager', 'log', '--pretty=format:"%H"', - f'--author "{author.name}"', + '--author', + author.name, '-1', ]).decode('utf-8') @@ -157,7 +158,8 @@ def get_problems(self): '--no-pager', 'log', '--pretty=format:"%H"', - f'--committer "{committer.name}"', + '--committer', + committer.name, '-1', ]).decode('utf-8') From 8fde178dde95cb120b674afc9e6943b2bc1b3b61 Mon Sep 17 00:00:00 2001 From: Mika Peltekis Date: Tue, 21 Feb 2023 17:52:00 +0100 Subject: [PATCH 3/3] Change way we check old Commits --- igcommit/commit_list_checks.py | 197 +++++++++++++-------------------- 1 file changed, 76 insertions(+), 121 deletions(-) diff --git a/igcommit/commit_list_checks.py b/igcommit/commit_list_checks.py index fb182d9..9ccc66b 100644 --- a/igcommit/commit_list_checks.py +++ b/igcommit/commit_list_checks.py @@ -8,7 +8,7 @@ from igcommit.base_check import BaseCheck, Severity -from igcommit.git import Commit, CommitList, git_exe_path +from igcommit.git import Commit, CommitList, git_exe_path, Contributor from subprocess import check_output @@ -142,141 +142,96 @@ def prepare(self, obj): def get_problems(self): for commit in self.commit_list: - author, committer = commit.get_contributors() - author = check_output([ + for contributor in commit.get_contributors(): + # use git log to search for author (important that git log just shows us commits before the pre-receive) + # use git log to search for committer + # if nothing is in there, we know that there is no former commit of those contributors + # if found then we directly can check with the last found specific commit + # in the end we can store the contributors in a list to not check them again + for problem in self.check_contributor(contributor, commit): + yield problem + + def check_contributor(self, contributor, commit): + """Check one contributor against the indexes""" + + # TODO: Take always two and check for the latest one. + # git log with email -> check if name is the same + # `git log ... --author "^.+ <{email}>$" -E + for log_check in ['author', 'committer']: + commit_hash = check_output([ git_exe_path, '--no-pager', 'log', - '--pretty=format:"%H"', - '--author', - author.name, + '--pretty=format:%H', + f'--{log_check}', + f'^.+ <{contributor.email}>$', + '-E', '-1', ]).decode('utf-8') + if commit_hash: + break + + if commit_hash: + commit = Commit(commit_hash) + cb = getattr(commit, f'get_{log_check}') + other = cb() + if other and contributor.name != other.name: + yield ( + Severity.ERROR, + 'contributor of commit {} has a different name "{}" ' + 'than "{}" the contributor with the same email address' + .format(commit, contributor.name, other.name), + ) - committer = check_output([ + # git log with just domain `git log ... --author "^.+ <.*@{get_email_domain()}>$" -E` + # if no hash could be found, then this is a new domain + for log_check in ['author', 'committer']: + commit_hash = check_output([ git_exe_path, '--no-pager', 'log', - '--pretty=format:"%H"', - '--committer', - committer.name, + '--pretty=format:%H', + f'--{log_check}', + f'^.+ <.*@{contributor.get_email_domain()}>$', + '-E', '-1', ]).decode('utf-8') + if commit_hash: + break - contributor = author + committer - print(contributor) - # use git log to search for author (important that git log just shows us commits before the pre-receive) - # use git log to search for committer - # if nothing is in there, we know that there is no former commit of those contributors - # if found then we directly can check with the last found specific commit - # in the end we can store the contributors in a list to not check them again - for problem in self.check_contributor(contributor, commit): - yield problem - - # If there are any problems, evidently the contributor - # is not consistent with the indexes. We override - # the indexes after reporting problem to avoid the same - # ones to be reported again. - found = False - - if not found: - self.index_contributor(contributor, override=True) - - def get_old_commits(self): - """Yield old commits in reverse order - - We could call "git rev-list" here, but it is easy enough to implement - the same using the content we already need for other reasons. Though - "git rev-list" would order the commits nicer, we are not putting any - effort to the ordering in here as our caller is not sensitive. - """ - unused_commits = self.commit_list[0].get_parents() - commit_ids = {c.commit_id for c in unused_commits} - while unused_commits: - unused_commit = unused_commits.pop(0) - yield unused_commit - for commit in unused_commit.get_parents(): - if commit.commit_id not in commit_ids: - unused_commits.append(commit) - commit_ids.add(commit.commit_id) - - def get_old_contributors(self): - for commit in self.get_old_commits(): - for contributor in commit.get_contributors(): - yield contributor - - def index_contributor(self, contributor, override=False, dry_run=False): - """Index a single contributor - - The function does nothing when the contributor is already indexed. - It returns true when if would add the contributor to any index. - The dry_run argument is used to test only, if the contributor is - indexed. - """ - found = False - - if contributor.email not in self.email_index or override: - if not dry_run: - self.email_index[contributor.email] = contributor - found = True - - domain = contributor.get_email_domain() - if domain not in self.domain_index or override: - if not dry_run: - self.domain_index[domain] = None - found = True - - if (contributor.name, domain) not in self.name_index or override: - if not dry_run: - self.name_index[(contributor.name, domain)] = contributor - found = True - - return found - - def index_contributors(self, contributors, searched): - """Index contributors until the searched one is found - - The function stops when the searched one is found. We assume - the caller to pass the existing generator again to avoid starting over - to search the next one. It returns with true when the searched one - is found; returns with false when all of the items are indexes but - the searched one is not found. - """ - while self.index_contributor(searched, dry_run=True): - for contributor in contributors: - if self.index_contributor(contributor): - break - else: - return False - return True - - def check_contributor(self, contributor, commit): - """Check one contributor against the indexes""" - - other = self.email_index.get(contributor.email) - if other and contributor.name != other.name: - yield ( - Severity.ERROR, - 'contributor of commit {} has a different name "{}" ' - 'than "{}" the contributor with the same email address' - .format(commit, contributor.name, other.name), - ) - - domain = contributor.get_email_domain() - if domain not in self.domain_index: + if not commit_hash: yield ( Severity.NOTICE, 'contributor of commit {} has a email address with a new ' 'domain "{}" ' - .format(commit, domain), + .format(commit, contributor.get_email_domain()), ) - other = self.name_index.get((contributor.name, domain)) - if other and contributor.email != other.email: - yield ( - Severity.ERROR, - 'contributor of commit {} has a different email address "{}" ' - 'with the same domain than "{}" from contributor with ' - 'the same name' - .format(commit, contributor.email, other.email), - ) + # git log with name plus email domain + # git log ... --author "^{name} <.*@{get_email_domain()}>$" + for log_check in ['author', 'committer']: + commit_hash = check_output([ + git_exe_path, + '--no-pager', + 'log', + '--pretty=format:%H', + f'--{log_check}', + f'^{contributor.name} <.*@{contributor.get_email_domain()}>$', + '-E', + '-1', + ]).decode('utf-8') + if commit_hash: + break + + if commit_hash: + commit = Commit(commit_hash) + cb = getattr(commit, f'get_{log_check}') + other = cb() + if other and contributor.email != other.email: + yield ( + Severity.ERROR, + 'contributor of commit {} has a different email address "{}" ' + 'with the same domain than "{}" from contributor with ' + 'the same name' + .format(commit, contributor.email, other.email), + )