swirlai · peterswirl · Aug 22, 2024 · Aug 22, 2024 · peterswirl · Aug 22, 2024
diff --git a/.github/scripts/generate_broken_links_summary.py b/.github/scripts/generate_broken_links_summary.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+'''
+@author: Peter Kahn
+@contact: [email protected]
+@description: This script processes a CSV file containing URL check results, generates a fixed CSV file, and creates a summary report. Used in URL check processing.
+'''
+
+import argparse
+import sys
+import os
+import csv
+import logging
+
+# Get the script name without the .py extension
+script_name = os.path.basename(__file__).split('.')[0]
+
+def main(argv):
+    parser = argparse.ArgumentParser(description="Fix CSV file for loading into SQLite3")
+    parser.add_argument('filespec', help="path to a csv file to fix")
+    parser.add_argument('-o', '--output', help="path to a new csv file - otherwise, _fixed is appended to filespec")
+    args = parser.parse_args()
+
+    # Configure logging to write to both a file and the console
+    logging.basicConfig(level=logging.DEBUG,
+                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                        handlers=[
+                            logging.FileHandler('fix_csv.log'),
+                            logging.StreamHandler(sys.stdout)
+                        ])
+    logger = logging.getLogger(script_name)
+
+    if not os.path.exists(args.filespec):
+        logger.error(f"File not found: {args.filespec}")
+        return False
+
+    if not args.filespec.endswith(".csv"):
+        logger.error("File must be .csv")
+        return False
+
+    if args.output:
+        outfile = args.output
+    else:
+        outfile = args.filespec[:-4] + '_fixed.csv'
+
+    summary_file = args.filespec[:-4] + '_summary.md'
+    logger.info(f"Reading {args.filespec}, Output Files: [Fixed Csv: {outfile}, Summary: {summary_file}]")
+
+    passed_count = 0
+    excluded_count = 0
+    failed_count = 0
+    unhandled_count = 0
+    failed_urls = []
+    unhandled_lines = []
+
+    try:
+        with open(args.filespec, 'r', encoding='utf-8') as fi, open(outfile, 'w', encoding='utf-8') as fo:
+            csv_i = csv.reader(fi)
+            csv_o = csv.writer(fo, quoting=csv.QUOTE_NONNUMERIC)
+
+            for i, row in enumerate(csv_i):
+                if i == 0:
+                    csv_o.writerow(row)
+                    continue
+
+                url, result, filename = row
+                if result == "passed":
+                    passed_count += 1
+                elif result == "excluded":
+                    excluded_count += 1
+                elif result == "failed":
+                    failed_count += 1
+                    failed_urls.append(f"{url}, {filename}")
+                else:
+                    unhandled_count += 1
+                    unhandled_lines.append(f"Line {i}: {row}")
+
+                csv_o.writerow([url, result, filename])
+
+        logger.info(f"Finished processing CSV with {i} lines and Passed: {passed_count}, Excluded: {excluded_count}, Failed: {failed_count}, Unhandled: {unhandled_count}")
+
+        # Generate summary
+        with open(summary_file, 'w', encoding='utf-8') as summary:
+            summary.write("## URL Check Summary\n")
+            summary.write(f"*Passed:* {passed_count}\n")
+            summary.write(f"*Excluded:* {excluded_count}\n")
+            summary.write(f"*Failed:* {failed_count}\n")
+            summary.write(f"*Unhandled Lines:* {unhandled_count}\n")
+
+            if failed_count > 0:
+                summary.write("\n### Failed URLs:\n")
+                summary.write("* " + "\n* ".join(failed_urls) + "\n")
+
+            if unhandled_count > 0:
+                summary.write("\n### Unhandled Lines:\n")
+                summary.write("* " + "\n* ".join(unhandled_lines) + "\n")
+
+        logger.info(f"Summary stored {summary_file}")
+
+    except Exception as e:
+        logger.error(f"An error occurred: {e}")
+        return False
+
+if __name__ == "__main__":
+    main(sys.argv)
+
diff --git a/.github/workflows/urls-checker.yml b/.github/workflows/urls-checker.yml
@@ -0,0 +1,74 @@
+name: Check URLs
+
+on:
+  pull_request:
+    branches:
+      - "main"
+      - "develop"
+    paths:
+      - "docs/**"
+      - "README.md"
+
+  workflow_dispatch:
+
+jobs:
+  check-urls:
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: read
+      pull-requests: write  # Ensure the workflow has permission to write comments on pull requests
+
+    env:
+      URLCHECK_RESULTS: urlcheck_results.csv  # Define global variable
+      URLCHECK_SUMMARY: urlcheck_results_summary.md  # Define global variable
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Print Environment Variables
+        run: env
+
+      - name: URLs Checker
+        id: url-checker
+        uses: urlstechie/[email protected]
+        with:
+          file_types: .md,.html,.yml,.conf,.txt,.py,.json,.sh,.ini,.spg
+          print_all: false
+          verbose: true
+          timeout: 5
+          retry_count: 3
+          exclude_patterns: localhost,api,apis,rss,etc,xx,googleapis,hostname,snowflake,graph.microsoft.com,login.microsoftonline.com,my-host.com,azure.com,github.com
+          exclude_files: Swirl.postman_collection.json,docs/googlec95caf0bd4a8c5df.html,docs/Gemfile,docs/Gemfile.lock,docs/_config.yml,tests/,SearchProviders/,DevUtils/
+          save: ${{ env.URLCHECK_RESULTS }}
+
+      - name: Generate Summary
+        if: always()
+        run: |
+          python .github/scripts/generate_broken_links_summary.py ${{ env.URLCHECK_RESULTS }}
+          echo "$(cat ${{ env.URLCHECK_SUMMARY }})" >> $GITHUB_STEP_SUMMARY
+          echo -e "\n" >> $GITHUB_STEP_SUMMARY
+
+      - name: Upload Results
+        if: failure()
+        uses: actions/upload-artifact@v3
+        with:
+          name: urlcheck-results
+          path:  ${{ env.URLCHECK_RESULTS }}
+
+      - name: Add comment to PR
+        if: failure() && github.event_name == 'pull_request'
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const fs = require('fs');
+            const summary = fs.readFileSync('${{ env.URLCHECK_SUMMARY }}', 'utf8');
+            github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: `### URL Check Summary\n\n${summary}`
+            });
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}  # Ensure the correct GitHub token is used
+