Skip to content
This repository has been archived by the owner on Sep 23, 2024. It is now read-only.

Commit

Permalink
Split get_message_id_and_emails method
Browse files Browse the repository at this point in the history
This allows us to retrieve any header value, not only Message-ID. Let's
also reduce the number of requests made by allowing to retrieve multiple
headers at once.

Signed-off-by: Veronika Kabatova <[email protected]>
  • Loading branch information
veruu committed May 14, 2018
1 parent 5f8fbf7 commit 4a6eae4
Showing 1 changed file with 114 additions and 54 deletions.
168 changes: 114 additions & 54 deletions sktm/patchwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,44 +208,73 @@ def get_apiurls(self):

return r.json()

def get_message_id_and_emails(self, pid):
def get_header_value(self, patch_id, *keys):
"""
Get Message-ID and all involved e-mail addresses from patch message
headers.
Get the value(s) of requested message headers.
Since Patchwork 2.1, all the headers with same key are returned in the
API as a list (this is relevant for eg. 'Received' header which can be
present multiple times; before, only one of them was present). Work
around this difference by concatenating the values with double newlines
as a divider (we shouldn't need headers which can be present multiple
times anyways).
Args:
pid: ID of the patch to get header values for.
patch_id: ID of the patch to retrieve header value for.
keys: Keys of the headers which values should be retrieved.
Returns:
The Message-Id header value and a set of e-mail addresses
involved with the patch.
A tuple of strings representing the values of requested headers
from patch.
"""
message_id = None
emails = set()
req = requests.get('%s/%s' % (self.apiurls.get('patches'), patch_id))

if req.status_code != 200:
raise Exception('Failed to get data for patch %d (%d)' %
(patch_id, req.status_code))

res = ()
headers = {}
# Make sure we handle case difference until we switch to mbox parsing
for key, value in req.json().get('headers', {}).items():
headers[key.lower()] = value

for key in keys:
value = headers.get(key.lower(), '')
# We need to handle strings and unicode.
# NOTE: basestring doesn't exist in PY3 but since the retrieval
# will be reworked before we get compatible it shouldn't matter
if isinstance(value, basestring):
res += (value,)
else:
res += ('\n\n'.join([val for val in value]),)

r = requests.get("%s/%s" % (self.apiurls.get("patches"), pid))
return res

if r.status_code != 200:
raise Exception("Failed to get data for patch %s (%d)" % (pid,
r.status_code))
def get_emails(self, pid):
"""
Get all involved e-mail addresses from patch message headers.
pdata = r.json()
headers = pdata.get("headers")

for header_name, header_value in headers.iteritems():
if header_name.lower() in ["from", "to", "cc"]:
for faddr in [x.strip() for x in header_value.split(",")]:
logging.debug("patch=%d; header=%s; email=%s",
pid, header_name, faddr)
maddr = re.search(r"\<([^\>]+)\>", faddr)
if maddr:
emails.add(maddr.group(1))
else:
emails.add(faddr)
elif header_name.lower() == "message-id":
message_id = header_value

return message_id, emails
Args:
pid: ID of the patch to get emails for.
Returns:
A set of e-mail addresses involved with the patch.
"""
emails = set()

logging.debug("getting emails for patch %d from 'from', 'to', 'cc'")
header_values = self.get_header_value(pid, "from", "to", "cc")
for header_value in header_values:
for faddr in [x.strip() for x in header_value.split(",") if x]:
logging.debug("patch=%d; email=%s", pid, faddr)
maddr = re.search(r"\<([^\>]+)\>", faddr)
if maddr:
emails.add(maddr.group(1))
else:
emails.add(faddr)

return emails

def get_series_from_url(self, url):
"""
Expand Down Expand Up @@ -297,8 +326,9 @@ def get_series_from_url(self, url):
logging.info("patch [%d] %s", patch.get("id"),
patch.get("name"))
plist.append(self.patchurl(patch))
message_id, emails = self.get_message_id_and_emails(
patch.get("id"))
message_id = self.get_header_value(patch.get("id"),
'Message-ID')
emails = self.get_emails(patch.get("id"))
logging.debug("patch [%d] message_id: %s", patch.get("id"),
message_id)
logging.debug("patch [%d] emails: %s", patch.get("id"),
Expand Down Expand Up @@ -743,35 +773,61 @@ def get_patch_list(self, filt):

return patches

def get_message_id_and_emails(self, pid):
def get_header_value(self, patch_id, *keys):
"""
Get Message-ID and all involved e-mail addresses from patch message
headers.
Get the value(s) of requested message headers.
In case multiple headers with the same key are present (this is
relevant for eg. 'Received' header), concatenating the values with
double newlines as a divider (we shouldn't need headers which can be
present multiple times anyways).
Args:
patch_id: ID of the patch to retrieve header value for.
keys: Keys of the headers which value should be retrieved.
Returns:
A tuple of strings representing the value of requested headers
from patch.
"""
mbox_string = stringify(self.rpc.patch_get_mbox(patch_id))
mbox_email = email.message_from_string(mbox_string)

res = ()

for key in keys:
value = mbox_email.get_all(key, [''])
if len(value) == 1:
res += (header_value[0],)
else:
res += ('\n\n'.join([val for val in value]),)

return res

def get_emails(self, pid):
"""
Get all involved e-mail addresses from patch message headers.
Args:
pid: ID of the patch to get header values for.
Returns:
The Message-Id header value and a set of e-mail addresses
involved with the patch.
A set of e-mail addresses involved with the patch.
"""
emails = set()

mboxdata = stringify(self.rpc.patch_get_mbox(pid))
mbox = email.message_from_string(mboxdata)

for header in ["From", "To", "Cc"]:
if header in mbox:
for faddr in [x.strip() for x in mbox[header].split(",")]:
logging.debug("patch=%d; header=%s; email=%s", pid, header,
faddr)
maddr = re.search(r"\<([^\>]+)\>", faddr)
if maddr:
emails.add(maddr.group(1))
else:
emails.add(faddr)
logging.debug("getting emails for patch %d from 'from', 'to', 'cc'")
header_values = self.get_header_value(pid, "From", "To", "Cc")
for header_value in header_values:
for faddr in [x.strip() for x in mbox[header].split(",") if x]:
logging.debug("patch=%d; email=%s", pid, faddr)
maddr = re.search(r"\<([^\>]+)\>", faddr)
if maddr:
emails.add(maddr.group(1))
else:
emails.add(faddr)

return mbox["Message-ID"], emails
return emails

def set_patch_check(self, pid, jurl, result):
"""
Expand All @@ -796,8 +852,10 @@ def dump_patch(self, pid):
"""
patch = self.get_patch_by_id(pid)
print "pinfo=%s\n" % patch
print "message_id=%s\nemails=%s\n" % \
self.get_message_id_and_emails(pid)
print "message_id=%s\nemails=%s\n" % (
self.get_header_value(pid, 'Message-ID'),
self.get_emails(pid)
)

# TODO Move this to __init__ or make it a class method
def get_projectid(self, projectname):
Expand Down Expand Up @@ -921,7 +979,8 @@ def parse_patch(self, patch):
for cpatch in sorted(self.series[seriesid].keys()):
patch = self.series[seriesid].get(cpatch)
pid = patch.get("id")
message_id, emails = self.get_message_id_and_emails(pid)
message_id = self.get_header_value(pid, 'Message-ID')
emails = self.get_emails(pid)
self.log_patch(pid, patch.get("name"), message_id, emails)
all_emails = all_emails.union(emails)
patchset.append(self.patchurl(patch))
Expand All @@ -932,7 +991,8 @@ def parse_patch(self, patch):
result = PatchsetSummary(message_id, all_emails, patchset)
# Else, it's a single patch
else:
message_id, emails = self.get_message_id_and_emails(pid)
message_id = self.get_header_value(pid, 'Message-ID')
emails = self.get_emails(pid)
self.log_patch(pid, pname, message_id, emails)
result = PatchsetSummary(message_id, emails,
[self.patchurl(patch)])
Expand Down

0 comments on commit 4a6eae4

Please sign in to comment.