cloudposse
diff --git a/‎build.log‎
Lines changed: 476 additions & 0 deletions b/‎build.log‎
Lines changed: 476 additions & 0 deletions
diff --git a/‎conductor.json‎
Lines changed: 1 addition & 1 deletion b/‎conductor.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎find-broken-links.py‎
Lines changed: 116 additions & 0 deletions b/‎find-broken-links.py‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎find-broken-links.py.v1‎
Lines changed: 111 additions & 0 deletions b/‎find-broken-links.py.v1‎
Lines changed: 111 additions & 0 deletions
@@ -1,6 +1,6 @@
 {
   "scripts": {
-    "setup": "./conductor-setup.sh",
+    "setup": "./scripts/conductor-setup.sh",
     "run": "npm start -- --port $CONDUCTOR_PORT --host 0.0.0.0"
   },
   "runScriptMode": "nonconcurrent"
 
@@ -0,0 +1,116 @@
+import re
+import os
+from collections import defaultdict
+import pprint
+import csv
+
+# Function to recursively search for filenames in a directory, ignoring extensions
+def find_files(base_dir, filename_without_ext):
+    matches = []
+    for root, _, files in os.walk(base_dir):
+        for file in files:
+            file_without_ext = file.rsplit('.', 1)[0]
+            if file_without_ext == filename_without_ext:
+                # Construct the suggested path, ignoring 'docs/' and dropping the extension
+                relative_path = os.path.relpath(root, base_dir)
+                suggestion = os.path.join(relative_path, filename_without_ext)
+                matches.append(suggestion.replace("\\", "/"))  # Ensure POSIX-style paths
+    return matches
+
+# Initialize the dictionary to store the broken links
+broken_links_map = defaultdict(list)
+
+# Read the log file as a whole
+with open('build.log', 'r') as file:
+    log_content = file.read()
+
+# Regular expression to match the broken link warnings
+pattern1 = re.compile(r"\[WARNING\] Docs markdown link couldn't be resolved: \((.*?)\) in source file \"(.*?)\"")
+pattern2 = re.compile(r"Broken link on source page path = (.*?):\n(?:\s*-> linking to (.*)\n?)+", re.MULTILINE)
+
+# Extract broken links and their source files for the first pattern
+for match in pattern1.finditer(log_content):
+    broken_link, source_file = match.groups()
+    broken_link = re.sub(r'#.*$', '', broken_link)  # Remove the anchor from the broken link
+    broken_links_map[broken_link].append(source_file)
+
+# Extract broken links and their source files for the second pattern
+matches = pattern2.finditer(log_content)
+for match in matches:
+    source_path = match.group(1).strip()
+    links = re.findall(r"-> linking to (.*)", match.group(0))
+    for broken_link in links:
+        broken_link = re.sub(r'#.*$', '', broken_link)  # Remove the anchor from the broken link
+        broken_links_map[broken_link.strip()].append(source_path)
+
+# Debugging: Print the extracted broken links map
+print("Extracted broken links map:")
+pprint.pprint(dict(broken_links_map))
+
+# Base directory to search for files (assuming it's 'docs/')
+base_dir = 'docs/'
+
+# Dictionary to store suggestions for broken links
+suggestions_map = defaultdict(list)
+# List to store links with no suggestions
+no_suggestions = []
+
+# Filter out unwanted broken links and process each broken link to find potential matches in the docs folder
+ignored_prefixes = ['/components/library', '/tags/']
+for broken_link, source_files in broken_links_map.items():
+    if any(broken_link.startswith(prefix) for prefix in ignored_prefixes):
+        continue
+
+    # Skip any source paths ending with "design-decisions" with an optional trailing slash
+    if any(re.search(r'design-decisions/?$', source_file) for source_file in source_files):
+        no_suggestions.append(broken_link)
+        continue
+
+    # Extract filename without extension, handling different path formats
+    filename_without_ext = os.path.splitext(os.path.basename(broken_link.strip("/")))[0]
+    print(f"Searching for matches for broken link: {broken_link} (filename: {filename_without_ext})")
+    potential_matches = find_files(base_dir, filename_without_ext)
+    if potential_matches:
+        for match in potential_matches:
+            suggestions_map[broken_link].append(match)
+    else:
+        no_suggestions.append(broken_link)
+
+# Print the broken links with their suggestions
+suggestions_list = []
+for broken_link, suggestions in suggestions_map.items():
+    print(f"Broken link: {broken_link}")
+    for suggestion in suggestions:
+        print(f"  Suggested replacement: /{suggestion}")
+        suggestions_list.append((broken_link, f"/{suggestion}"))
+
+# Write the suggestions to a TSV file
+with open('suggested_links.tsv', 'w', newline='') as tsvfile:
+    writer = csv.writer(tsvfile, delimiter='\t')
+    writer.writerow(['Old Link', 'New Suggested Link'])
+    for old_link, new_link in suggestions_list:
+        writer.writerow([old_link, new_link])
+    writer.writerow([])  # Blank line for separation
+    writer.writerow(['Links with No Suggestions'])  # Header for no suggestions section
+    for link in no_suggestions:
+        writer.writerow([link])
+
+# Print the map of broken links and their source files for reference
+print("\nMap of broken links and their source files:")
+pprint.pprint(dict(broken_links_map))
+
+# Print the links with no suggestions
+if no_suggestions:
+    print("\nLinks with no suggestions found:")
+    for link in no_suggestions:
+        print(link)
+else:
+    print("\nAll broken links have suggestions.")
+
+# Count the total unique broken links
+total_unique_broken_links = len(broken_links_map)
+print(f"\nTotal unique broken links: {total_unique_broken_links}")
+
+# Count the total unique broken links without any suggestions
+total_no_suggestions = len(no_suggestions)
+print(f"Total unique broken links without any suggestions: {total_no_suggestions}")
@@ -0,0 +1,111 @@
+import re
+import os
+from collections import defaultdict
+import pprint
+import csv
+
+# Function to recursively search for filenames in a directory, ignoring extensions
+def find_files(base_dir, filename_without_ext):
+    matches = []
+    for root, _, files in os.walk(base_dir):
+        for file in files:
+            if file.rsplit('.', 1)[0] == filename_without_ext:
+                # Construct the suggested path, ignoring 'docs/' and dropping the extension
+                relative_path = os.path.relpath(root, base_dir)
+                suggestion = os.path.join(relative_path, filename_without_ext)
+                matches.append(suggestion.replace("\\", "/"))  # Ensure POSIX-style paths
+    return matches
+
+# Initialize the dictionary to store the broken links
+broken_links_map = defaultdict(list)
+
+# Read the log file as a whole
+with open('build.log', 'r') as file:
+    log_content = file.read()
+
+# Regular expression to match the broken link warnings
+pattern1 = re.compile(r"\[WARNING\] Docs markdown link couldn't be resolved: \((.*?)\) in source file \"(.*?)\"")
+pattern2 = re.compile(r"Broken link on source page path = (.*?):\n(?:\s*-> linking to (.*)\n?)+", re.MULTILINE)
+
+# Extract broken links and their source files for the first pattern
+for match in pattern1.finditer(log_content):
+    broken_link, source_file = match.groups()
+    broken_links_map[broken_link].append(source_file)
+
+# Extract broken links and their source files for the second pattern
+matches = pattern2.finditer(log_content)
+for match in matches:
+    source_path = match.group(1).strip()
+    links = re.findall(r"-> linking to (.*)", match.group(0))
+    for broken_link in links:
+        broken_links_map[broken_link.strip()].append(source_path)
+
+# Debugging: Print the extracted broken links map
+print("Extracted broken links map:")
+pprint.pprint(dict(broken_links_map))
+
+# Base directory to search for files (assuming it's 'docs/')
+base_dir = 'docs/'
+
+# Dictionary to store suggestions for broken links
+suggestions_map = defaultdict(list)
+# List to store links with no suggestions
+no_suggestions = []
+
+# Filter out unwanted broken links and process each broken link to find potential matches in the docs folder
+ignored_prefixes = ['/components/library', '/tags/']
+for broken_link, source_files in broken_links_map.items():
+    if any(broken_link.startswith(prefix) for prefix in ignored_prefixes):
+        continue
+    if 'design-decisions' in broken_link:
+        no_suggestions.append(broken_link)
+        continue
+
+    # Extract filename without extension, handling different path formats
+    filename_without_ext = os.path.splitext(os.path.basename(broken_link.strip("/")))[0]
+    print(f"Searching for matches for broken link: {broken_link} (filename: {filename_without_ext})")
+    potential_matches = find_files(base_dir, filename_without_ext)
+    if potential_matches:
+        for match in potential_matches:
+            suggestions_map[broken_link].append(match)
+    else:
+        no_suggestions.append(broken_link)
+
+# Print the broken links with their suggestions
+suggestions_list = []
+for broken_link, suggestions in suggestions_map.items():
+    print(f"Broken link: {broken_link}")
+    for suggestion in suggestions:
+        print(f"  Suggested replacement: /{suggestion}")
+        suggestions_list.append((broken_link, f"/{suggestion}"))
+
+# Write the suggestions to a TSV file
+with open('suggested_links.tsv', 'w', newline='') as tsvfile:
+    writer = csv.writer(tsvfile, delimiter='\t')
+    writer.writerow(['Old Link', 'New Suggested Link'])
+    for old_link, new_link in suggestions_list:
+        writer.writerow([old_link, new_link])
+    writer.writerow([])  # Blank line for separation
+    writer.writerow(['Links with No Suggestions'])  # Header for no suggestions section
+    for link in no_suggestions:
+        writer.writerow([link])
+
+# Print the map of broken links and their source files for reference
+print("\nMap of broken links and their source files:")
+pprint.pprint(dict(broken_links_map))
+
+# Print the links with no suggestions
+if no_suggestions:
+    print("\nLinks with no suggestions found:")
+    for link in no_suggestions:
+        print(link)
+else:
+    print("\nAll broken links have suggestions.")
+
+# Count the total unique broken links
+total_unique_broken_links = len(broken_links_map)
+print(f"\nTotal unique broken links: {total_unique_broken_links}")
+
+# Count the total unique broken links without any suggestions
+total_no_suggestions = len(no_suggestions)
+print(f"Total unique broken links without any suggestions: {total_no_suggestions}")
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"scripts": {`
`3`		`- "setup": "./conductor-setup.sh",`
	`3`	`+ "setup": "./scripts/conductor-setup.sh",`
`4`	`4`	`"run": "npm start -- --port $CONDUCTOR_PORT --host 0.0.0.0"`
`5`	`5`	`},`
`6`	`6`	`"runScriptMode": "nonconcurrent"`