Refactor tag parsing logic in CVS client revision history

This commit is contained in:
Juan José Gutiérrez de Quevedo Pérez 2025-11-21 21:36:30 +01:00
parent 45ad8bb135
commit f0847cd837

View file

@ -228,11 +228,22 @@ class CVSClient:
# Use 'cvs log' to get revision history for the file
output = self._run_cvs_command(['log', file_path])
# Parse log output to extract revision details
# First pass: collect all tags and their revisions
tags_by_revision = {}
for line in output.split('\n'):
# Look for tag lines (format: "\tTAG_NAME: X.X")
tag_match = re.match(r'^\s+(\S+):\s+(\S+)', line)
if tag_match:
tag_name = tag_match.group(1)
tag_revision = tag_match.group(2)
if tag_revision not in tags_by_revision:
tags_by_revision[tag_revision] = []
tags_by_revision[tag_revision].append(tag_name)
# Second pass: parse revisions and attach tags
revisions = []
current_revision = {}
in_log = False
in_tags = False
for line in output.split('\n'):
# Look for revision lines (format: "revision X.X")
@ -241,19 +252,12 @@ class CVSClient:
# Look for date/author/state line (format: "date: YYYY/MM/DD HH:MM:SS; author: NAME; state: STATE;")
date_match = re.match(r'^date:\s+(\d{4}/\d{2}/\d{2}\s+\d{2}:\d{2}:\d{2});\s+author:\s+(\S+);\s+state:\s+(\S+);', line)
# Look for branches line (format: "branches: ...")
branches_match = re.match(r'^branches:', line)
# Look for tag lines (format: "\tTAG_NAME: X.X")
tag_match = re.match(r'^\s+(\S+):\s+(\S+)', line)
if rev_match:
# Start of a new revision
if current_revision and 'revision' in current_revision:
revisions.append(current_revision)
current_revision = {'revision': rev_match.group(1)}
in_log = False
in_tags = False
if date_match:
current_revision.update({
@ -263,29 +267,11 @@ class CVSClient:
'lines_changed': 'N/A' # cvs log doesn't provide line counts
})
in_log = False
in_tags = False
if branches_match:
# Branches section starts, tags follow
in_tags = True
in_log = False
continue
# Capture tags (lines with indentation after branches line)
if in_tags and tag_match and not re.match(r'^---', line):
tag_name = tag_match.group(1)
tag_revision = tag_match.group(2)
# Only add tags that match the current revision
if tag_revision == current_revision.get('revision'):
if 'tags' not in current_revision:
current_revision['tags'] = []
current_revision['tags'].append(tag_name)
# Capture log message (lines after date/author/state until next revision or separator)
if in_log:
if line.strip() == '' or re.match(r'^---', line):
in_log = False
in_tags = False
elif not re.match(r'^(revision|date|branches):', line):
if 'log' not in current_revision:
current_revision['log'] = ''
@ -302,15 +288,14 @@ class CVSClient:
if current_revision and 'revision' in current_revision:
revisions.append(current_revision)
# Clean up logs - strip whitespace and take first line only
# Clean up logs and attach tags
for rev in revisions:
if 'log' in rev:
rev['log'] = rev['log'].strip().split('\n')[0]
else:
rev['log'] = ''
# Ensure tags field exists
if 'tags' not in rev:
rev['tags'] = []
# Attach tags from the first pass
rev['tags'] = tags_by_revision.get(rev['revision'], [])
return revisions
except subprocess.CalledProcessError as e: