diff --git a/AUTHORS.csv b/AUTHORS.csv new file mode 100644 index 00000000000..1c521f7779c --- /dev/null +++ b/AUTHORS.csv @@ -0,0 +1,11 @@ +Full Name, Surname First, Name Reference +Ernest W. Durbin III, "Durbin, Ernest W., III", Durbin +Inada Naoki, "Inada, Naoki", Inada +Guido van Rossum, "van Rossum, Guido (GvR)", GvR +Just van Rossum, "van Rossum, Just (JvR)", JvR +The Python core team and community, The Python core team and community, python-dev +P.J. Eby, "Eby, Phillip J.", Eby +Greg Ewing, "Ewing, Gregory", Ewing +Jim Jewett, "Jewett, Jim J.", Jewett +Nathaniel Smith, "Smith, Nathaniel J.", Smith +Martin v. Löwis, "von Löwis, Martin", von Löwis diff --git a/genpepindex.py b/genpepindex.py index 2ab6698a05a..a2c89b093b6 100755 --- a/genpepindex.py +++ b/genpepindex.py @@ -19,6 +19,7 @@ import sys import os +import csv import codecs from operator import attrgetter @@ -33,6 +34,15 @@ def main(argv): else: path = argv[1] + # AUTHORS.csv is an exception file for PEP0 name parsing + with open("AUTHORS.csv", "r", encoding="UTF8") as f: + read = csv.DictReader(f, quotechar='"', skipinitialspace=True) + author_exception_data = {} + for line in read: + full_name = line.pop("Full Name").strip() + details = {k.strip(): v.strip() for k, v in line.items()} + author_exception_data[full_name] = details + peps = [] if os.path.isdir(path): for file_path in os.listdir(path): @@ -44,7 +54,7 @@ def main(argv): if file_path.startswith("pep-") and file_path.endswith((".txt", "rst")): with codecs.open(abs_file_path, 'r', encoding='UTF-8') as pep_file: try: - pep = PEP(pep_file) + pep = PEP(pep_file, author_exception_data) if pep.number != int(file_path[4:-4]): raise PEPError('PEP number does not match file name', file_path, pep.number) @@ -57,12 +67,13 @@ def main(argv): peps.sort(key=attrgetter('number')) elif os.path.isfile(path): with open(path, 'r') as pep_file: - peps.append(PEP(pep_file)) + peps.append(PEP(pep_file, author_exception_data)) else: raise ValueError("argument must be a directory or file path") with codecs.open('pep-0000.rst', 'w', encoding='UTF-8') as pep0_file: write_pep0(peps, pep0_file) + if __name__ == "__main__": main(sys.argv) diff --git a/pep0/output.py b/pep0/output.py index 10024c221b8..bdd0ec4d90b 100644 --- a/pep0/output.py +++ b/pep0/output.py @@ -5,6 +5,7 @@ import sys import unicodedata +from itertools import groupby from operator import attrgetter from . import constants @@ -124,9 +125,9 @@ def verify_email_addresses(peps): def sort_authors(authors_dict): - authors_list = list(authors_dict.keys()) - authors_list.sort(key=attrgetter('sort_by')) - return authors_list + authors_list = sorted(authors_dict.keys(), key=attrgetter("sort_by")) + unique_authors = [next(a) for k, a in groupby(authors_list, key=attrgetter("last_first"))] + return unique_authors def normalized_last_first(name): return len(unicodedata.normalize('NFC', name.last_first)) diff --git a/pep0/pep.py b/pep0/pep.py index e01518df539..fc2850b445e 100644 --- a/pep0/pep.py +++ b/pep0/pep.py @@ -60,37 +60,39 @@ class Author(object): The author's email address. """ - def __init__(self, author_and_email_tuple): + def __init__(self, author_and_email_tuple, authors_exceptions): """Parse the name and email address of an author.""" + self.first = self.last = '' + name, email = author_and_email_tuple self.first_last = name.strip() self.email = email.lower() - last_name_fragment, suffix = self._last_name(name) - name_sep = name.index(last_name_fragment) - self.first = name[:name_sep].rstrip() - self.last = last_name_fragment - if self.last[1] == u'.': - # Add an escape to avoid docutils turning `v.` into `22.`. - self.last = u'\\' + self.last - self.suffix = suffix - if not self.first: - self.last_first = self.last + + name_dict = authors_exceptions.get(self.first_last) + if name_dict: + self.last_first = name_dict["Surname First"] + self.nick = self.last = name_dict["Name Reference"] else: - self.last_first = u', '.join([self.last, self.first]) - if self.suffix: - self.last_first += u', ' + self.suffix - if self.last == "van Rossum": - # Special case for our beloved BDFL. :) - if self.first == "Guido": - self.nick = "GvR" - elif self.first == "Just": - self.nick = "JvR" - else: - raise ValueError("unknown van Rossum %r!" % self) - self.last_first += " (%s)" % (self.nick,) + self.set_name_parts() + + def set_name_parts(self): + name_dict = self._parse_name(self.first_last) + suffix = name_dict.get("suffix") + if "name" in name_dict: + self.last_first = name_dict["name"] + self.nick = name_dict["name"] else: + self.first = name_dict["forename"].rstrip() + self.last = name_dict["surname"] + if self.last[1] == ".": + # Add an escape to avoid docutils turning `v.` into `22.`. + self.last = "\\" + self.last + self.last_first = ", ".join([self.last, self.first]) self.nick = self.last + if suffix: + self.last_first += f", {suffix}" + def __hash__(self): return hash(self.first_last) @@ -109,28 +111,60 @@ def sort_by(self): base = self.last.lower() return unicodedata.normalize('NFKD', base).encode('ASCII', 'ignore') - def _last_name(self, full_name): - """Find the last name (or nickname) of a full name. + @staticmethod + def _parse_name(full_name): + """Decompose a full name into parts. - If no last name (e.g, 'Aahz') then return the full name. If there is - a leading, lowercase portion to the last name (e.g., 'van' or 'von') - then include it. If there is a suffix (e.g., 'Jr.') that is appended - through a comma, then drop the suffix. + If a mononym (e.g, 'Aahz') then return the full name. If there are + suffixes in the name (e.g. ', Jr.' or 'III'), then find and extract + them. If there is a middle initial followed by a full stop, then + combine the following words into a surname (e.g. N. Vander Weele). If + there is a leading, lowercase portion to the last name (e.g. 'van' or + 'von') then include it in the surname. """ - name_partition = full_name.partition(u',') - no_suffix = name_partition[0].strip() - suffix = name_partition[2].strip() - name_parts = no_suffix.split() - part_count = len(name_parts) - if part_count == 1 or part_count == 2: - return name_parts[-1], suffix - else: - assert part_count > 2 + possible_suffixes = ["Jr", "Jr.", "II", "III"] + + suffix_partition = full_name.partition(",") + pre_suffix = suffix_partition[0].strip() + suffix = suffix_partition[2].strip() + + name_parts = pre_suffix.split(" ") + num_parts = len(name_parts) + name = {"suffix": suffix} + + if num_parts == 0: + raise ValueError("Name is empty!") + elif num_parts == 1: + name.update(name=name_parts[0]) + elif num_parts == 2: + name.update(forename=name_parts[0], surname=name_parts[1]) + elif num_parts > 2: + # handles III etc. + if name_parts[-1] in possible_suffixes: + new_suffix = " ".join([*name_parts[-1:], suffix]).strip() + name_parts.pop(-1) + name.update(suffix=new_suffix) + + # handles von, van, v. etc. if name_parts[-2].islower(): - return u' '.join(name_parts[-2:]), suffix + forename = " ".join(name_parts[:-2]) + surname = " ".join(name_parts[-2:]) + name.update(forename=forename, surname=surname) + + # handles double surnames after a middle initial (e.g. N. Vander Weele) + elif any(s.endswith(".") for s in name_parts): + split_position = [i for i, x in enumerate(name_parts) if x.endswith(".")][-1] + 1 + forename = " ".join(name_parts[:split_position]) + surname = " ".join(name_parts[split_position:]) + name.update(forename=forename, surname=surname) + else: - return name_parts[-1], suffix + forename = " ".join(name_parts[:-1]) + surname = " ".join(name_parts[-1:]) + name.update(forename=forename, surname=surname) + + return name class PEP(object): @@ -176,7 +210,7 @@ class PEP(object): u"Rejected", u"Withdrawn", u"Deferred", u"Final", u"Active", u"Draft", u"Superseded") - def __init__(self, pep_file): + def __init__(self, pep_file, author_exceptions: dict): """Init object from an open PEP file object.""" # Parse the headers. self.filename = pep_file @@ -244,7 +278,7 @@ def __init__(self, pep_file): if len(authors_and_emails) < 1: raise PEPError("no authors found", pep_file.name, self.number) - self.authors = list(map(Author, authors_and_emails)) + self.authors = [Author(author_email, author_exceptions) for author_email in authors_and_emails] def _parse_author(self, data): """Return a list of author names and emails."""