[v2,1/1] scripts: Add add-maintainer.py

Message ID 829b08342568735095bbd3f8c44f435f44688018.1691049436.git.quic_gurus@quicinc.com
State New
Headers
Series Add add-maintainer.py script |

Commit Message

Guru Das Srinagesh Aug. 3, 2023, 8:23 a.m. UTC
  This script runs get_maintainer.py on a given patch file and adds its
output to the patch file in place with the appropriate email headers
"To: " or "Cc: " as the case may be. These new headers are added after
the "From: " line in the patch.

Currently, for a single patch, maintainers are added as "To: ", mailing
lists and all other roles are addded as "Cc: ".

For a series of patches, however, a set-union scheme is employed in
order to solve the all-too-common problem of sending subsets of a patch
series to some lists, which results in important pieces of context such
as the cover letter being dropped. This scheme is as follows:
- Create set-union of all mailing lists corresponding to all patches and
  add this to all patches as "Cc: "
- Create set-union of all other roles corresponding to all patches and
  add this to all patches as "Cc: "
- Create set-union of all maintainers from all patches and use this to
  do the following per patch:
  - add only that specific patch's maintainers as "To: ", and
  - the other maintainers from the other patches as "Cc: "

Please note that patch files that don't have any "Maintainer"s
explicitly listed in their `get_maintainer.pl` output will not have any
"To: " entries added to them; developers are expected to manually make
edits to the added entries in such cases to convert some "Cc: " entries
to "To: " as desired.

The script is quiet by default (only prints errors) and its verbosity
can be adjusted via an optional parameter.

Signed-off-by: Guru Das Srinagesh <quic_gurus@quicinc.com>
---
 scripts/add-maintainer.py | 113 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100755 scripts/add-maintainer.py
  

Comments

Pavan Kondeti Aug. 3, 2023, 9:04 a.m. UTC | #1
On Thu, Aug 03, 2023 at 01:23:16AM -0700, Guru Das Srinagesh wrote:
> This script runs get_maintainer.py on a given patch file and adds its
> output to the patch file in place with the appropriate email headers
> "To: " or "Cc: " as the case may be. These new headers are added after
> the "From: " line in the patch.
> 
> Currently, for a single patch, maintainers are added as "To: ", mailing
> lists and all other roles are addded as "Cc: ".
> 
> For a series of patches, however, a set-union scheme is employed in
> order to solve the all-too-common problem of sending subsets of a patch
> series to some lists, which results in important pieces of context such
> as the cover letter being dropped. This scheme is as follows:
> - Create set-union of all mailing lists corresponding to all patches and
>   add this to all patches as "Cc: "
> - Create set-union of all other roles corresponding to all patches and
>   add this to all patches as "Cc: "
> - Create set-union of all maintainers from all patches and use this to
>   do the following per patch:
>   - add only that specific patch's maintainers as "To: ", and
>   - the other maintainers from the other patches as "Cc: "
> 

Thanks. I have tested this logic by running this script on two patches
from different subsystems. It does what it says.

> Please note that patch files that don't have any "Maintainer"s
> explicitly listed in their `get_maintainer.pl` output will not have any
> "To: " entries added to them; developers are expected to manually make
> edits to the added entries in such cases to convert some "Cc: " entries
> to "To: " as desired.
> 
> The script is quiet by default (only prints errors) and its verbosity
> can be adjusted via an optional parameter.
> 
> Signed-off-by: Guru Das Srinagesh <quic_gurus@quicinc.com>
> ---
>  scripts/add-maintainer.py | 113 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 113 insertions(+)
>  create mode 100755 scripts/add-maintainer.py
> 

Do you need to update MAINTAINERS file?

> diff --git a/scripts/add-maintainer.py b/scripts/add-maintainer.py
> new file mode 100755
> index 000000000000..b1682c2945f9
> --- /dev/null
> +++ b/scripts/add-maintainer.py
> @@ -0,0 +1,113 @@
> +#! /usr/bin/env python3
> +
> +import argparse
> +import logging
> +import os
> +import sys
> +import subprocess
> +import re
> +
> +def gather_maintainers_of_file(patch_file):
> +    all_entities_of_patch = dict()
> +
> +    # Run get_maintainer.pl on patch file
> +    logging.info("GET: Patch: {}".format(os.path.basename(patch_file)))
> +    cmd = ['scripts/get_maintainer.pl']
> +    cmd.extend([patch_file])
> +    p = subprocess.run(cmd, stdout=subprocess.PIPE, check=True)
> +    logging.debug("\n{}".format(p.stdout.decode()))
> +
> +    entries = p.stdout.decode().splitlines()
> +
> +    maintainers = []
> +    lists = []
> +    others = []
> +
> +    for entry in entries:
> +        entity = entry.split('(')[0].strip()
> +        if "maintainer" in entry:
> +            maintainers.append(entity)
> +        elif "list" in entry:
> +            lists.append(entity)
> +        else:
> +            others.append(entity)
> +
> +    all_entities_of_patch["maintainers"] = set(maintainers)
> +    all_entities_of_patch["lists"] = set(lists)
> +    all_entities_of_patch["others"] = set(others)
> +
> +    return all_entities_of_patch
> +
> +def add_maintainers_to_file(patch_file, entities_per_file, all_entities_union):
> +    logging.info("ADD: Patch: {}".format(os.path.basename(patch_file)))
> +
> +    # For each patch:
> +    # - Add all lists from all patches in series as Cc:
> +    # - Add all others from all patches in series as Cc:
> +    # - Add only maintainers of that patch as To:
> +    # - Add maintainers of other patches in series as Cc:
> +
> +    lists = list(all_entities_union["all_lists"])
> +    others = list(all_entities_union["all_others"])
> +    file_maintainers = all_entities_union["all_maintainers"].intersection(entities_per_file[os.path.basename(patch_file)].get("maintainers"))
> +    other_maintainers = all_entities_union["all_maintainers"].difference(entities_per_file[os.path.basename(patch_file)].get("maintainers"))
> +
> +    # Specify email headers appropriately
> +    cc_lists        = ["Cc: " + l for l in lists]
> +    cc_others       = ["Cc: " + o for o in others]
> +    to_maintainers  = ["To: " + m for m in file_maintainers]
> +    cc_maintainers  = ["Cc: " + om for om in other_maintainers]
> +    logging.debug("Cc Lists:\n{}".format('\n'.join(cc_lists)))
> +    logging.debug("Cc Others:\n{}".format('\n'.join(cc_others)))
> +    logging.debug("Cc Maintainers:\n{}".format('\n'.join(cc_maintainers) or None))
> +    logging.debug("To Maintainers:\n{}\n".format('\n'.join(to_maintainers) or None))
> +
> +    # Edit patch file in place to add maintainers
> +    with open(patch_file, "r") as pf:
> +        lines = pf.readlines()
> +
> +    from_line = [i for i, line in enumerate(lines) if re.search("From: ", line)]
> +    if len(from_line) > 1:
> +        logging.error("Only one From: line is allowed in a patch file")
> +        sys.exit(1)
> +

Few minor issues from my limited testing:

- It is very unlikely, but for whatever reason if "From:" is present in
the patch (commit description), this script bails out. Pls try running
this script on the current patch. May be you should also look for a
proper email address on this line.

- When this script is run on a file (get_maintainer.pl allows this), it
  throws a runtime warning. may be good to bail out much earlier.

- When this script runs on a non-existent file, it does not bail out
  early.

Thanks,
Pavan
  
Guru Das Srinagesh Aug. 10, 2023, 6:52 p.m. UTC | #2
On Aug 03 2023 14:34, Pavan Kondeti wrote:
> On Thu, Aug 03, 2023 at 01:23:16AM -0700, Guru Das Srinagesh wrote:
> > This script runs get_maintainer.py on a given patch file and adds its
> > output to the patch file in place with the appropriate email headers
> > "To: " or "Cc: " as the case may be. These new headers are added after
> > the "From: " line in the patch.
> > 
> > Currently, for a single patch, maintainers are added as "To: ", mailing
> > lists and all other roles are addded as "Cc: ".
> > 
> > For a series of patches, however, a set-union scheme is employed in
> > order to solve the all-too-common problem of sending subsets of a patch
> > series to some lists, which results in important pieces of context such
> > as the cover letter being dropped. This scheme is as follows:
> > - Create set-union of all mailing lists corresponding to all patches and
> >   add this to all patches as "Cc: "
> > - Create set-union of all other roles corresponding to all patches and
> >   add this to all patches as "Cc: "
> > - Create set-union of all maintainers from all patches and use this to
> >   do the following per patch:
> >   - add only that specific patch's maintainers as "To: ", and
> >   - the other maintainers from the other patches as "Cc: "
> > 
> 
> Thanks. I have tested this logic by running this script on two patches
> from different subsystems. It does what it says.

Thanks for testing this v2!

> 
> > Please note that patch files that don't have any "Maintainer"s
> > explicitly listed in their `get_maintainer.pl` output will not have any
> > "To: " entries added to them; developers are expected to manually make
> > edits to the added entries in such cases to convert some "Cc: " entries
> > to "To: " as desired.
> > 
> > The script is quiet by default (only prints errors) and its verbosity
> > can be adjusted via an optional parameter.
> > 
> > Signed-off-by: Guru Das Srinagesh <quic_gurus@quicinc.com>
> > ---
> >  scripts/add-maintainer.py | 113 ++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 113 insertions(+)
> >  create mode 100755 scripts/add-maintainer.py
> > 
> 
> Do you need to update MAINTAINERS file?

Noted.

> 

[...]

> 
> Few minor issues from my limited testing:
> 
> - It is very unlikely, but for whatever reason if "From:" is present in
> the patch (commit description), this script bails out. Pls try running
> this script on the current patch. May be you should also look for a
> proper email address on this line.
> 
> - When this script is run on a file (get_maintainer.pl allows this), it
>   throws a runtime warning. may be good to bail out much earlier.
> 
> - When this script runs on a non-existent file, it does not bail out
>   early.

Will fix these.

Thank you.

Guru Das.
  
Guru Das Srinagesh Aug. 25, 2023, 5 p.m. UTC | #3
On Aug 25 2023 13:44, Nicolas Schier wrote:
> On Thu 24 Aug 2023 14:44:36 GMT, Guru Das Srinagesh wrote:
> > > While testing, I thought that adding addresses without filtering-out duplicates
> > > was odd; but as git-send-email does the unique filtering, it doesn't matter.
> > 
> > Since I'm using `set()` in this script, the uniqueness is guaranteed here as
> > well - there won't be any duplicates.
> 
> I thought about patch files that already have 'To/Cc' headers (e.g.  
> 'git format-patch --to=... --cc=...' or by running add-maintainer.py 
> multiple times for updating the lists of recipients.  The result is a 
> patch file with possible duplicated lines; but as written: it does 
> matter, effectively.

Sorry, did you mean "does" or "does *not*"?

I'll make sure to test v3 of this script out on patches that have To/Cc already
included and also run it multiple times on the same patch (effectively the same
thing).

Thank you.

Guru Das.
  

Patch

diff --git a/scripts/add-maintainer.py b/scripts/add-maintainer.py
new file mode 100755
index 000000000000..b1682c2945f9
--- /dev/null
+++ b/scripts/add-maintainer.py
@@ -0,0 +1,113 @@ 
+#! /usr/bin/env python3
+
+import argparse
+import logging
+import os
+import sys
+import subprocess
+import re
+
+def gather_maintainers_of_file(patch_file):
+    all_entities_of_patch = dict()
+
+    # Run get_maintainer.pl on patch file
+    logging.info("GET: Patch: {}".format(os.path.basename(patch_file)))
+    cmd = ['scripts/get_maintainer.pl']
+    cmd.extend([patch_file])
+    p = subprocess.run(cmd, stdout=subprocess.PIPE, check=True)
+    logging.debug("\n{}".format(p.stdout.decode()))
+
+    entries = p.stdout.decode().splitlines()
+
+    maintainers = []
+    lists = []
+    others = []
+
+    for entry in entries:
+        entity = entry.split('(')[0].strip()
+        if "maintainer" in entry:
+            maintainers.append(entity)
+        elif "list" in entry:
+            lists.append(entity)
+        else:
+            others.append(entity)
+
+    all_entities_of_patch["maintainers"] = set(maintainers)
+    all_entities_of_patch["lists"] = set(lists)
+    all_entities_of_patch["others"] = set(others)
+
+    return all_entities_of_patch
+
+def add_maintainers_to_file(patch_file, entities_per_file, all_entities_union):
+    logging.info("ADD: Patch: {}".format(os.path.basename(patch_file)))
+
+    # For each patch:
+    # - Add all lists from all patches in series as Cc:
+    # - Add all others from all patches in series as Cc:
+    # - Add only maintainers of that patch as To:
+    # - Add maintainers of other patches in series as Cc:
+
+    lists = list(all_entities_union["all_lists"])
+    others = list(all_entities_union["all_others"])
+    file_maintainers = all_entities_union["all_maintainers"].intersection(entities_per_file[os.path.basename(patch_file)].get("maintainers"))
+    other_maintainers = all_entities_union["all_maintainers"].difference(entities_per_file[os.path.basename(patch_file)].get("maintainers"))
+
+    # Specify email headers appropriately
+    cc_lists        = ["Cc: " + l for l in lists]
+    cc_others       = ["Cc: " + o for o in others]
+    to_maintainers  = ["To: " + m for m in file_maintainers]
+    cc_maintainers  = ["Cc: " + om for om in other_maintainers]
+    logging.debug("Cc Lists:\n{}".format('\n'.join(cc_lists)))
+    logging.debug("Cc Others:\n{}".format('\n'.join(cc_others)))
+    logging.debug("Cc Maintainers:\n{}".format('\n'.join(cc_maintainers) or None))
+    logging.debug("To Maintainers:\n{}\n".format('\n'.join(to_maintainers) or None))
+
+    # Edit patch file in place to add maintainers
+    with open(patch_file, "r") as pf:
+        lines = pf.readlines()
+
+    from_line = [i for i, line in enumerate(lines) if re.search("From: ", line)]
+    if len(from_line) > 1:
+        logging.error("Only one From: line is allowed in a patch file")
+        sys.exit(1)
+
+    next_line_after_from = from_line[0] + 1
+
+    for o in cc_others:
+        lines.insert(next_line_after_from, o + "\n")
+    for l in cc_lists:
+        lines.insert(next_line_after_from, l + "\n")
+    for om in cc_maintainers:
+        lines.insert(next_line_after_from, om + "\n")
+    for m in to_maintainers:
+        lines.insert(next_line_after_from, m + "\n")
+
+    with open(patch_file, "w") as pf:
+        pf.writelines(lines)
+
+def main():
+    parser = argparse.ArgumentParser(description='Add the respective maintainers and mailing lists to patch files')
+    parser.add_argument('patches', nargs='*', help="One or more patch files")
+    parser.add_argument('--verbosity', choices=['debug', 'info', 'error'], default='error', help="Verbosity level of script output")
+    args = parser.parse_args()
+
+    logging.basicConfig(level=args.verbosity.upper(), format='%(levelname)s: %(message)s')
+
+    entities_per_file = dict()
+
+    for patch in args.patches:
+        entities_per_file[os.path.basename(patch)] = gather_maintainers_of_file(patch)
+
+    all_entities_union = {"all_maintainers": set(), "all_lists": set(), "all_others": set()}
+    for patch in args.patches:
+        all_entities_union["all_maintainers"] = all_entities_union["all_maintainers"].union(entities_per_file[os.path.basename(patch)].get("maintainers"))
+        all_entities_union["all_lists"] = all_entities_union["all_lists"].union(entities_per_file[os.path.basename(patch)].get("lists"))
+        all_entities_union["all_others"] = all_entities_union["all_others"].union(entities_per_file[os.path.basename(patch)].get("others"))
+
+    for patch in args.patches:
+        add_maintainers_to_file(patch, entities_per_file, all_entities_union)
+
+    logging.info("Maintainers added to all patch files successfully")
+
+if __name__ == "__main__":
+    main()