From bbc792f530a5b1ece36b5a6f2e3a8953e9231c65 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 3 Jul 2019 13:05:35 +0100 Subject: [PATCH 1/4] add --show-email, fix Zenodo metadata - addresses https://github.com/maintainers/discussions/issues/210#issuecomment-507841626 --- .zenodo.json | 4 ++-- README.rst | 20 ++++++++++++++++++++ git-fame_completion.bash | 2 +- gitfame/_gitfame.py | 16 ++++++++++++---- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index 3dda76a..0d0593e 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,8 +1,8 @@ -{ "metadata": { +{ "title": "git-fame: Pretty-print `git` repository collaborators sorted by contributions", "keywords": [ "git", "blame", "git-blame", "git-log", "code-analysis", "cost", "loc", "author", "commit", "shortlog", "ls-files"], "creators": [ {"name": "da Costa-Luis, Casper O.", "orcid": "0000-0002-7211-1557"}] -}} +} diff --git a/README.rst b/README.rst index 0ef5657..7fe762f 100644 --- a/README.rst +++ b/README.rst @@ -180,6 +180,7 @@ Documentation -w, --ignore-whitespace Ignore whitespace when comparing the parent's version and the child's to find where the lines came from [default: False]. + -e, --show-email Show author email instead of name [default: False]. -M Detect intra-file line moves and copies [default: False]. -C Detect inter-file line moves and copies [default: False]. --format= Table format @@ -194,6 +195,25 @@ If multiple user names and/or emails correspond to the same user, aggregate ``git-fame`` statistics and maintain a ``git`` repository properly by adding a `.mailmap file `_. +Examples +-------- + +CODEOWNERS +~~~~~~~~~~ + +Generating +`CODEOWNERS `__: + +.. code:: sh + + for f in $(git ls-files); do + # filename + echo -n "$f " + # author emails if loc distribution >= 30% + git fame -esnwMC --incl "$f" | tail -n+7 | tr '/' '|' \ + | awk -F '|' '$6 >= 30 {print $2}' | xargs echo + done >> .github/CODEOWNERS + Contributions ------------- diff --git a/git-fame_completion.bash b/git-fame_completion.bash index 08d84c5..6ab25d9 100644 --- a/git-fame_completion.bash +++ b/git-fame_completion.bash @@ -23,7 +23,7 @@ _git_fame() ;; *) if [ ${COMP_WORDS[1]} == fame ]; then - COMPREPLY=($(compgen -dW '-h --help -v --version --cost --branch --since --sort --incl --excl -n --no-regex -s --silent-progress --warn-binary -t --bytype -w --ignore-whitespace -M -C --format --manpath --log' -- ${cur})) + COMPREPLY=($(compgen -dW '-h --help -v --version --cost --branch --since --sort --incl --excl -n --no-regex -s --silent-progress --warn-binary -t --bytype -w --ignore-whitespace -e --show-email -M -C --format --manpath --log' -- ${cur})) fi ;; esac diff --git a/gitfame/_gitfame.py b/gitfame/_gitfame.py index eac0f6e..9381d19 100755 --- a/gitfame/_gitfame.py +++ b/gitfame/_gitfame.py @@ -26,6 +26,7 @@ -s, --silent-progress Suppress `tqdm` [default: False]. --warn-binary Don't silently skip files which appear to be binary data [default: False]. + -e, --show-email Show author email instead of name [default: False]. -t, --bytype Show stats per file extension [default: False]. -w, --ignore-whitespace Ignore whitespace when comparing the parent's version and the child's to find where the lines @@ -61,6 +62,9 @@ RE_AUTHS = re.compile( r'^\w+ \d+ \d+ (\d+)\nauthor (.+?)$.*?committer-time (\d+)', flags=re.M | re.DOTALL) +RE_AUTHS_EM = re.compile( + r'^\w+ \d+ \d+ (\d+)\nauthor .*?author-mail <(.*?)>.*?committer-time (\d+)', + flags=re.M | re.DOTALL) # finds all non-escaped commas # NB: does not support escaping of escaped character RE_CSPILT = re.compile(r'(? Date: Wed, 3 Jul 2019 13:28:53 +0100 Subject: [PATCH 2/4] neaten email logic --- gitfame/_gitfame.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/gitfame/_gitfame.py b/gitfame/_gitfame.py index 9381d19..c222afa 100755 --- a/gitfame/_gitfame.py +++ b/gitfame/_gitfame.py @@ -60,10 +60,7 @@ RE_AUTHS = re.compile( - r'^\w+ \d+ \d+ (\d+)\nauthor (.+?)$.*?committer-time (\d+)', - flags=re.M | re.DOTALL) -RE_AUTHS_EM = re.compile( - r'^\w+ \d+ \d+ (\d+)\nauthor .*?author-mail <(.*?)>.*?committer-time (\d+)', + r'^\w+ \d+ \d+ (\d+)\nauthor (.+?)$.*?\ncommitter-time (\d+)', flags=re.M | re.DOTALL) # finds all non-escaped commas # NB: does not support escaping of escaped character @@ -250,11 +247,7 @@ def run(args): getattr(log, "warn" if args.warn_binary else "debug")(fname + ':' + str(e)) continue log.log(logging.NOTSET, blame_out) - if args.show_email: - #git_blame_cmd.append("-e") - loc_auth_times = RE_AUTHS_EM.findall(blame_out) - else: - loc_auth_times = RE_AUTHS.findall(blame_out) + loc_auth_times = RE_AUTHS.findall(blame_out) for loc, auth, tstamp in loc_auth_times: # for each chunk loc = int(loc) @@ -282,14 +275,25 @@ def run(args): for stats in auth_stats.values(): stats.setdefault("commits", 0) log.debug(RE_NCOM_AUTH_EM.findall(auth_commits.strip())) + auth2em = {} for (ncom, auth, em) in RE_NCOM_AUTH_EM.findall(auth_commits.strip()): + auth = _str(auth) + auth2em[auth] = em # TODO: count most used email? try: - auth_stats[_str(em if args.show_email else auth)]["commits"] += int(ncom) + auth_stats[auth]["commits"] += int(ncom) except KeyError: - auth_stats[_str(em if args.show_email else auth)] = {"loc": 0, - "files": set([]), - "commits": int(ncom), - "ctimes": []} + auth_stats[auth] = {"loc": 0, + "files": set([]), + "commits": int(ncom), + "ctimes": []} + if args.show_email: + # replace author name with email + log.debug(auth2em) + old = auth_stats + auth_stats = {} + for auth, stats in getattr(old, 'iteritems', old.items)(): + auth_stats[auth2em[auth]] = stats + del old stats_tot = dict((k, 0) for stats in auth_stats.values() for k in stats) log.debug(stats_tot) From 7dafe73c2162af9bd78da79c12c882c95473ae56 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 13 Aug 2019 22:22:23 +0100 Subject: [PATCH 3/4] fix py26 travis CI tests --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 2d864f6..df66aef 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ matrix: include: - python: 2.6 env: TOXENV=py26 + dist: trusty - python: 2.7 env: TOXENV=py27 - python: 3.4 From 72cb28c301524195f1ee778e99b2d5d08573c592 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Fri, 16 Aug 2019 23:15:27 +0100 Subject: [PATCH 4/4] update CODEOWNERS docs --- README.rst | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 7fe762f..e8fbdad 100644 --- a/README.rst +++ b/README.rst @@ -206,13 +206,26 @@ Generating .. code:: sh - for f in $(git ls-files); do - # filename - echo -n "$f " - # author emails if loc distribution >= 30% - git fame -esnwMC --incl "$f" | tail -n+7 | tr '/' '|' \ - | awk -F '|' '$6 >= 30 {print $2}' | xargs echo - done >> .github/CODEOWNERS + # bash syntax function for current directory git repository + owners(){ + for f in $(git ls-files); do + # filename + echo -n "$f " + # author emails if loc distribution >= 30% + git fame -esnwMC --incl "$f" | tr '/' '|' \ + | awk -F '|' '(NR>6 && $6>=30) {print $2}' \ + | xargs echo + done + } + + # print to screen and file + owners | tee .github/CODEOWNERS + + # same but with `tqdm` progress for large repos + owners \ + | tqdm --total $(git ls-files | wc -l) \ + --unit file --desc "Generating CODEOWNERS" \ + > .github/CODEOWNERS Contributions -------------