Skip to content

Commit 607a121

Browse files
committed
Port tool parseContributorsFromChanges.py to use logchange
1 parent eb79f72 commit 607a121

File tree

2 files changed

+56
-37
lines changed

2 files changed

+56
-37
lines changed

dev-tools/scripts/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,15 @@ Each YAML file complies with the schema outlined in `dev-docs/changelog.adoc`.
186186

187187
Validates changelog folder structure and feature distribution across development branches (main, stable, release). See dev-docs for more.
188188

189+
### parseContributorsFromChanges.py
190+
191+
Extracts unique author names from all YAML changelog files in a version folder and outputs them as a comma-separated list sorted alphabetically. Used by RM to assemble release notes.
192+
193+
usage: parseContributorsFromChanges.py <version>
194+
195+
# Example: Extract contributors for version 9.10.0
196+
python3 dev-tools/scripts/parseContributorsFromChanges.py 9.10.0
197+
189198
### gitignore-gen.sh
190199

191200
TBD

dev-tools/scripts/parseContributorsFromChanges.py

100644100755
Lines changed: 47 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
13
# Licensed to the Apache Software Foundation (ASF) under one or more
24
# contributor license agreements. See the NOTICE file distributed with
35
# this work for additional information regarding copyright ownership.
@@ -14,51 +16,59 @@
1416
# limitations under the License.
1517

1618
import sys
17-
import re
18-
from collections import defaultdict
19+
import yaml
20+
from pathlib import Path
1921

20-
# Read data from standard input
21-
data = sys.stdin.read()
22+
def print_usage():
23+
print("Usage: parseContributorsFromChanges.py <version>")
24+
print(" <version>: Version number (e.g., 9.10.0)")
25+
print("\nThis script parses all YAML files in changelog/v<version>/ and extracts unique authors.")
26+
print("Output is a comma-separated list of authors sorted by name.")
27+
sys.exit(1)
2228

23-
# Replace all carriage return line feed (Windows) with line feed
24-
data = data.replace('\r\n', '\n')
29+
if len(sys.argv) < 2:
30+
print("Error: Missing required argument <version>")
31+
print_usage()
2532

26-
# Replace all carriage return (Mac OS before X) with line feed
27-
data = data.replace('\r', '\n')
33+
version = sys.argv[1]
34+
changelog_dir = Path(f"changelog/v{version}")
2835

29-
# Split data at blank lines
30-
paras = data.split('\n\n')
36+
if not changelog_dir.exists():
37+
print(f"Error: Directory '{changelog_dir}' does not exist")
38+
sys.exit(1)
3139

32-
# Initialize a default dictionary to store contributors and their counts
33-
contributors = defaultdict(int)
40+
# Collect all unique authors
41+
authors = set()
3442

35-
# Regular expression to find the attribution in parentheses at the end of a line
36-
pattern = re.compile(r"\(([^()]*)\)$")
43+
# Process all .yml and .yaml files in the changelog directory
44+
yaml_files = list(changelog_dir.glob("*.yml")) + list(changelog_dir.glob("*.yaml"))
3745

38-
for para in paras:
39-
# Normalize whitespace (replace all whitespace with a single space)
40-
para = re.sub(r"\s+", ' ', para).strip()
41-
#print(f'> {para}')
46+
if not yaml_files:
47+
print(f"Warning: No YAML files found in {changelog_dir}")
48+
sys.exit(0)
4249

43-
# Find all contributors in the line
44-
match = pattern.search(para.strip())
45-
if match:
46-
attribution = match.group(1)
47-
# might have a "via" committer; we only want the author here
48-
attribution = attribution.split(" via ")[0] # keep left side
49-
# Split the contributors by comma and strip whitespace
50-
for contributor in attribution.split(','):
51-
contributor = contributor.strip()
52-
contributors[contributor] += 1
50+
for yaml_file in sorted(yaml_files):
51+
try:
52+
with open(yaml_file, 'r') as f:
53+
data = yaml.safe_load(f)
54+
if data and 'authors' in data:
55+
author_list = data['authors']
56+
if isinstance(author_list, list):
57+
for author_entry in author_list:
58+
if isinstance(author_entry, dict) and 'name' in author_entry:
59+
author_name = author_entry['name'].strip()
60+
# Filter out solrbot
61+
if author_name.lower() != 'solrbot':
62+
authors.add(author_name)
63+
except Exception as e:
64+
print(f"Warning: Error parsing {yaml_file}: {e}", file=sys.stderr)
5365

54-
if 'solrbot' in contributors:
55-
del contributors['solrbot']
66+
# Sort authors by name
67+
sorted_authors = sorted(list(authors))
5668

57-
sorted_contributors = sorted(contributors.items(), key=lambda item: item[1], reverse=True)
69+
# Print contributors
70+
for author in sorted_authors:
71+
print(author)
5872

59-
# Print the contributors and their counts
60-
for contributor, count in sorted_contributors:
61-
print(f'{contributor}: {count}')
62-
63-
print('\n\nThanks to all contributors!: ')
64-
print(', '.join([contributor for contributor, count in sorted_contributors]))
73+
print('\nThanks to all contributors!: ')
74+
print(', '.join(sorted_authors))

0 commit comments

Comments
 (0)