Refactor tag parsing logic in CVS client revision history

2025-11-21 21:36:30 +01:00 · 2025-11-21 21:36:30 +01:00 · f0847cd837
commit f0847cd837
parent 45ad8bb135
1 changed files with 16 additions and 31 deletions
--- a/cvs_proxy/cvs_client.py
+++ b/cvs_proxy/cvs_client.py
@ -228,11 +228,22 @@ class CVSClient:
            # Use 'cvs log' to get revision history for the file
            output = self._run_cvs_command(['log', file_path])
            
-            # Parse log output to extract revision details
+            # First pass: collect all tags and their revisions
+            tags_by_revision = {}
+            for line in output.split('\n'):
+                # Look for tag lines (format: "\tTAG_NAME: X.X")
+                tag_match = re.match(r'^\s+(\S+):\s+(\S+)', line)
+                if tag_match:
+                    tag_name = tag_match.group(1)
+                    tag_revision = tag_match.group(2)
+                    if tag_revision not in tags_by_revision:
+                        tags_by_revision[tag_revision] = []
+                    tags_by_revision[tag_revision].append(tag_name)
+            
+            # Second pass: parse revisions and attach tags
            revisions = []
            current_revision = {}
            in_log = False
-            in_tags = False
            
            for line in output.split('\n'):
                # Look for revision lines (format: "revision X.X")
@ -241,19 +252,12 @@ class CVSClient:
                # Look for date/author/state line (format: "date: YYYY/MM/DD HH:MM:SS;  author: NAME;  state: STATE;")
                date_match = re.match(r'^date:\s+(\d{4}/\d{2}/\d{2}\s+\d{2}:\d{2}:\d{2});\s+author:\s+(\S+);\s+state:\s+(\S+);', line)
                
-                # Look for branches line (format: "branches: ...")
-                branches_match = re.match(r'^branches:', line)
-                
-                # Look for tag lines (format: "\tTAG_NAME: X.X")
-                tag_match = re.match(r'^\s+(\S+):\s+(\S+)', line)
-                
                if rev_match:
                    # Start of a new revision
                    if current_revision and 'revision' in current_revision:
                        revisions.append(current_revision)
                    current_revision = {'revision': rev_match.group(1)}
                    in_log = False
-                    in_tags = False
                
                if date_match:
                    current_revision.update({
@ -263,29 +267,11 @@ class CVSClient:
                        'lines_changed': 'N/A'  # cvs log doesn't provide line counts
                    })
                    in_log = False
-                    in_tags = False
-                
-                if branches_match:
-                    # Branches section starts, tags follow
-                    in_tags = True
-                    in_log = False
-                    continue
-                
-                # Capture tags (lines with indentation after branches line)
-                if in_tags and tag_match and not re.match(r'^---', line):
-                    tag_name = tag_match.group(1)
-                    tag_revision = tag_match.group(2)
-                    # Only add tags that match the current revision
-                    if tag_revision == current_revision.get('revision'):
-                        if 'tags' not in current_revision:
-                            current_revision['tags'] = []
-                        current_revision['tags'].append(tag_name)
                
                # Capture log message (lines after date/author/state until next revision or separator)
                if in_log:
                    if line.strip() == '' or re.match(r'^---', line):
                        in_log = False
-                        in_tags = False
                    elif not re.match(r'^(revision|date|branches):', line):
                        if 'log' not in current_revision:
                            current_revision['log'] = ''
@ -302,15 +288,14 @@ class CVSClient:
            if current_revision and 'revision' in current_revision:
                revisions.append(current_revision)
            
-            # Clean up logs - strip whitespace and take first line only
+            # Clean up logs and attach tags
            for rev in revisions:
                if 'log' in rev:
                    rev['log'] = rev['log'].strip().split('\n')[0]
                else:
                    rev['log'] = ''
-                # Ensure tags field exists
-                if 'tags' not in rev:
-                    rev['tags'] = []
+                # Attach tags from the first pass
+                rev['tags'] = tags_by_revision.get(rev['revision'], [])
            
            return revisions
        except subprocess.CalledProcessError as e: