Files
modules/script/gitlog2changelog.py.in
Xavier Delaruelle fbaaaaa858 script: sync gitlog2changelog.py with upstream
Update gitlog2changelog.py script with recent changes made on it in its
upstream repository (https://github.com/networkupstools/nut/, commit
e4739b9).

Keep our local adaptation:
* TextWrapper specific configuration

Signed-off-by: Xavier Delaruelle <xavier.delaruelle@cea.fr>
2025-05-03 14:52:57 +02:00

317 lines
11 KiB
Python

#!@PYTHON@
# Copyright 2008 Marcus D. Hanwell <marcus@cryos.org>
# Minor changes for NUT by Charles Lepple
# Subsequent maintenance for NUT by Jim Klimov (since 2021)
# Distributed under the terms of the GNU General Public License v2 or later
import re
import os
from textwrap import TextWrapper
import sys
import subprocess
# Python 3 compatibility hack
try:
try:
import unicode
except:
# Maybe built-in?
pass
unicode('')
except NameError as ex:
#DEBUG# sys.stderr.write("Using 'str' as 'unicode': %s\n" % str(ex))
#DEBUG# sys.stderr.flush()
unicode = str
try:
import unicodedata
except:
pass
rev_range = "HEAD"
if len(sys.argv) > 1:
base = sys.argv[1]
rev_range = "%s..HEAD" % base
# Execute git log with the desired command line options.
# Support Python2 and Python3 (esp. 3.6 and earlier) semantics
# with regard to utf-8 content support (avois ascii decoding in Py3)
fin_mode = 0
# Remove trailing end of line? spitlines() in py3 variant takes care of them
fin_chop = 0
try:
p = subprocess.Popen(
[
"git",
"log",
"--pretty=medium",
"--summary",
"--stat",
"--no-merges",
"--date=short",
("%s" % rev_range),
],
encoding="utf-8",
close_fds=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
fin, ferr = p.communicate()
if p.wait() != 0:
print("ERROR getting git changelog")
sys.exit(1)
fin = fin.splitlines()
fin_mode = 3
except TypeError:
fin = os.popen(
"git log --pretty=medium --summary --stat --no-merges --date=short %s"
% rev_range,
"r",
)
fin_mode = 2
fin_chop = 1
# Create a ChangeLog file in the current directory by default.
CHANGELOG_FILE = "ChangeLog"
try:
# e.g. point from Makefile to a builddir (caller ensures it exists)
if os.environ.get("CHANGELOG_FILE", None) is not None:
CHANGELOG_FILE = os.environ.get("CHANGELOG_FILE")
except Exception as ignored:
pass
if CHANGELOG_FILE == "-":
fout = sys.stdout
else:
if fin_mode == 3:
fout = open(CHANGELOG_FILE, "w", encoding="UTF-8")
else:
fout = open(CHANGELOG_FILE, "w")
# By default we collect information from a commit and output it as soon as
# we have enough. Part of it is best-effort grouping of a series of commits
# made by the same author on the same day, if they follow each other.
# The alternative is to expend memory to collect all git log entries into a
# dictionary first (key = date+author, value = list of entries) and only
# print the output in the end of processing. This costs more resources, so
# is not default behavior.
requireGroupByDateAuthor = False
try:
tmpEnvVar = os.environ.get("CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR", None)
if str(tmpEnvVar).lower() == "true":
requireGroupByDateAuthor = True
except Exception as ignored:
pass
cachedContent = None
if requireGroupByDateAuthor:
try:
from collections import defaultdict
cachedContent = defaultdict(list)
except Exception as x:
print("Failed to init requireGroupByDateAuthor processing as defaultdict(list), trying simple dict(): " + str(x))
requireGroupByDateAuthor = False
cachedContent = dict()
# Set up the loop variables in order to locate the blocks we want
authorFound = False
dateFound = False
messageFound = False
filesFound = False
message = ""
messageNL = False
files = ""
prevAuthorLine = ""
# Legacy default: keep as is
authorMustBeASCII = False
authorMustBeASCII_inverse_setting = str(os.environ.get("WITH_PDF_NONASCII_TITLES", "")).upper()
if authorMustBeASCII_inverse_setting in ["YES", "TRUE"]:
authorMustBeASCII = False
elif authorMustBeASCII_inverse_setting in ["NO", "FALSE"]:
authorMustBeASCII = True
# See also: https://github.com/python/cpython/blob/main/Lib/textwrap.py
wrapper = TextWrapper(initial_indent=" ", subsequent_indent=" ", width=78, break_on_hyphens=False)
# The main part of the loop
for line in fin:
# The commit line marks the start of a new commit object.
if line.startswith("commit"):
# Start all over again...
authorFound = False
dateFound = False
messageFound = False
messageNL = False
message = ""
filesFound = False
files = ""
continue
# Match the author line and extract the part we want
# (Don't use startswith to allow Author override inside commit message.)
elif "Author:" in line:
if sys.version_info >= (3, 13, ):
authorList = re.split(": ", line, maxsplit=1)
else:
authorList = re.split(": ", line, 1)
try:
author = authorList[1]
author = author[0 : len(author) - fin_chop]
if authorMustBeASCII:
try:
if isinstance(author, str) and unicode != str:
author = unicodedata.normalize(u'NFKD', unicode(author, "utf-8")).encode('ascii', 'ignore').decode('utf8')
else:
author = unicodedata.normalize(u'NFKD', author).encode('ascii', 'ignore').decode('utf8')
except Exception as e:
print("Could not unicodedata.normalize() author '%s': %s" % (author, str(e)))
authorFound = True
except:
print("Could not parse authorList = '%s'" % (line))
# Match the date line
elif line.startswith("Date:"):
if sys.version_info >= (3, 13, ):
dateList = re.split(": ", line, maxsplit=1)
else:
dateList = re.split(": ", line, 1)
try:
date = dateList[1]
date = date[0 : len(date) - fin_chop]
dateFound = True
except:
print("Could not parse dateList = '%s'" % (line))
# The Fossil-IDs are ignored:
elif line.startswith(" Fossil-ID:") or line.startswith(" [[SVN:"):
continue
# The svn-id lines are ignored
elif " git-svn-id:" in line:
continue
# The sign off line is ignored too
elif "Signed-off-by" in line:
continue
# Extract the actual commit message for this commit
elif authorFound and dateFound and messageFound is False:
# Find the commit message if we can (including the optional
# details after the title and a blank line)
# FIXME: Detect end of message by /^#/ to allow for longer essays
# in the detailed comments part?
# FIXME: Some such comments include asciidoc-ish markup, notably
# bullet lists - do not concatenate those into one block but do
# actually pass them as sub-lists (indented, and perhaps not
# starting with an asterisk which we use for this document).
if len(line) == fin_chop:
if messageNL:
messageFound = True
else:
messageNL = True
elif len(line) == 3 + fin_chop:
messageFound = True
else:
if len(message) == 0:
message = message + line.strip()
else:
message = message + " " + line.strip()
# If this line is hit all of the files have been stored for this commit
elif re.search("files? changed", line):
filesFound = True
continue
# Collect the files for this commit. FIXME: Still need to add +/- to files
elif authorFound and dateFound and messageFound:
if sys.version_info >= (3, 13, ):
fileList = re.split(r' \| ', line, maxsplit=2)
else:
fileList = re.split(r' \| ', line, 2)
if len(fileList) > 1:
if len(files) > 0:
files = files + ", " + fileList[0].strip()
else:
files = fileList[0].strip()
# All of the parts of the commit have been found - write out the entry
if authorFound and dateFound and messageFound and filesFound:
# First the author line, only outputted if it is the first for that
# author on this day.
# WARNING: In case of git rebase commit shuffling, merges of dormant
# branches, etc. we are not guaranteed to have all dates in the list
# nicely ordered. In fact, the same date+author can be repeated if
# there were commits with other metadata in git history between those
# (e.g. many PRs from a few authors merged during one day). While we
# could cache each section by authorLine and only output in the end,
# it can require a lot of memory - so by default we do not.
authorLine = date + " " + author
if requireGroupByDateAuthor:
if authorLine not in cachedContent:
cachedContent[authorLine] = list()
else:
if len(prevAuthorLine) == 0:
fout.write(authorLine + "\n\n")
elif authorLine == prevAuthorLine:
pass
else:
fout.write("\n" + authorLine + "\n\n")
# Assemble the actual commit message line(s) and limit the line length
# to 80 characters.
# Avoid printing same (or equivalent) filename lists twice, if commit
# message starts with them.
if message.startswith(files + ":"):
commitLine = "* " + message
else:
namesF = None
namesM = None
try:
namesM = sorted(re.split(r"[ ,]", message.split(":")[0]))
namesF = sorted(re.split(r"[ ,]", files))
except:
pass
if namesM is not None and namesM == namesF:
commitLine = "* " + message
else:
commitLine = "* " + files + ": " + message
if requireGroupByDateAuthor:
cachedContent[authorLine].append(commitLine)
else:
# Write out the commit line, wrapped for length
fout.write(wrapper.fill(commitLine) + "\n")
# Now reset all the variables ready for a new commit block.
authorFound = False
dateFound = False
messageFound = False
messageNL = False
message = ""
filesFound = False
files = ""
prevAuthorLine = authorLine
if requireGroupByDateAuthor:
# We did not print anything before, flush it out now;
# most recent date first (alphanumerically reverse)
counter = 0
for authorLine in sorted(cachedContent, reverse=True):
if counter == 0:
fout.write(authorLine + "\n\n")
else:
fout.write("\n" + authorLine + "\n\n")
# Use original list append order
for commitLine in cachedContent[authorLine]:
fout.write(wrapper.fill(commitLine) + "\n")
counter = counter + 1
# Close the input and output lines now that we are finished.
if fin_mode == 3:
p.stdout.close()
p.stderr.close()
else:
fin.close()
fout.close()