From e7914f1bba3616eafbc99e46bc0782ac90cb573f Mon Sep 17 00:00:00 2001 From: Peter Kahn Date: Thu, 22 Aug 2024 16:40:19 -0400 Subject: [PATCH 1/2] Improve workflow - auto run for PR to develop and main - fix csv format - it's potentially partially quoted-csv - generate summary in markup - run decorated with summary - on failure: - store csv file - add message to PR (not really necessary but makes the issue very salient --- .../scripts/generate_broken_links_summary.py | 104 ++++++++++++++++++ .github/workflows/urls-checker.yml | 73 ++++++++++++ 2 files changed, 177 insertions(+) create mode 100644 .github/scripts/generate_broken_links_summary.py create mode 100644 .github/workflows/urls-checker.yml diff --git a/.github/scripts/generate_broken_links_summary.py b/.github/scripts/generate_broken_links_summary.py new file mode 100644 index 00000000..d0f5197c --- /dev/null +++ b/.github/scripts/generate_broken_links_summary.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +''' +@author: Peter Kahn +@contact: peter@swirl.today +@description: This script processes a CSV file containing URL check results, generates a fixed CSV file, and creates a summary report. Used in URL check processing. +''' + +import argparse +import sys +import os +import csv +import logging + +# Get the script name without the .py extension +script_name = os.path.basename(__file__).split('.')[0] + +def main(argv): + parser = argparse.ArgumentParser(description="Fix CSV file for loading into SQLite3") + parser.add_argument('filespec', help="path to a csv file to fix") + parser.add_argument('-o', '--output', help="path to a new csv file - otherwise, _fixed is appended to filespec") + args = parser.parse_args() + + # Configure logging to write to both a file and the console + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('fix_csv.log'), + logging.StreamHandler(sys.stdout) + ]) + logger = logging.getLogger(script_name) + + if not os.path.exists(args.filespec): + logger.error(f"File not found: {args.filespec}") + return False + + if not args.filespec.endswith(".csv"): + logger.error("File must be .csv") + return False + + if args.output: + outfile = args.output + else: + outfile = args.filespec[:-4] + '_fixed.csv' + + summary_file = args.filespec[:-4] + '_summary.md' + logger.info(f"Reading {args.filespec}, Output Files: [Fixed Csv: {outfile}, Summary: {summary_file}]") + + passed_count = 0 + excluded_count = 0 + failed_count = 0 + unhandled_count = 0 + failed_urls = [] + unhandled_lines = [] + + try: + with open(args.filespec, 'r', encoding='utf-8') as fi, open(outfile, 'w', encoding='utf-8') as fo: + csv_i = csv.reader(fi) + csv_o = csv.writer(fo, quoting=csv.QUOTE_NONNUMERIC) + + for i, row in enumerate(csv_i): + if i == 0: + csv_o.writerow(row) + continue + + url, result, filename = row + if result == "passed": + passed_count += 1 + elif result == "excluded": + excluded_count += 1 + elif result == "failed": + failed_count += 1 + failed_urls.append(f"{url}, {filename}") + else: + unhandled_count += 1 + unhandled_lines.append(f"Line {i}: {row}") + + csv_o.writerow([url, result, filename]) + + logger.info(f"Finished processing CSV with {i} lines and Passed: {passed_count}, Excluded: {excluded_count}, Failed: {failed_count}, Unhandled: {unhandled_count}") + + # Generate summary + with open(summary_file, 'w', encoding='utf-8') as summary: + summary.write("## URL Check Summary\n") + summary.write(f"*Passed:* {passed_count}\n") + summary.write(f"*Excluded:* {excluded_count}\n") + summary.write(f"*Failed:* {failed_count}\n") + summary.write(f"*Unhandled Lines:* {unhandled_count}\n") + + if failed_count > 0: + summary.write("\n### Failed URLs:\n") + summary.write("* " + "\n* ".join(failed_urls) + "\n") + + if unhandled_count > 0: + summary.write("\n### Unhandled Lines:\n") + summary.write("* " + "\n* ".join(unhandled_lines) + "\n") + + logger.info(f"Summary stored {summary_file}") + + except Exception as e: + logger.error(f"An error occurred: {e}") + return False + +if __name__ == "__main__": + main(sys.argv) \ No newline at end of file diff --git a/.github/workflows/urls-checker.yml b/.github/workflows/urls-checker.yml new file mode 100644 index 00000000..8b1c0cf7 --- /dev/null +++ b/.github/workflows/urls-checker.yml @@ -0,0 +1,73 @@ +name: Check URLs + +on: + pull_request: + branches: + - "main" + - "develop" + paths: + - "docs/**" + - "README.md" + + workflow_dispatch: + +jobs: + check-urls: + runs-on: ubuntu-latest + + permissions: + contents: read + pull-requests: write # Ensure the workflow has permission to write comments on pull requests + + env: + URLCHECK_RESULTS: urlcheck_results.csv # Define global variable + URLCHECK_SUMMARY: urlcheck_results_summary.md # Define global variable + + steps: + - uses: actions/checkout@v4 + + - name: Print Environment Variables + run: env + + - name: URLs Checker + id: url-checker + uses: urlstechie/urlchecker-action@0.0.34 + with: + file_types: .md,.html,.yml,.conf,.txt,.py,.json,.sh,.ini,.spg + print_all: false + verbose: true + timeout: 5 + retry_count: 3 + exclude_patterns: localhost,api,apis,rss,etc,xx,googleapis,hostname,snowflake,graph.microsoft.com,login.microsoftonline.com,my-host.com,azure.com,github.com + exclude_files: Swirl.postman_collection.json,docs/googlec95caf0bd4a8c5df.html,docs/Gemfile,docs/Gemfile.lock,docs/_config.yml,tests/,SearchProviders/,DevUtils/ + save: ${{ env.URLCHECK_RESULTS }} + + - name: Generate Summary + if: always() + run: | + python .github/scripts/generate_broken_links_summary.py ${{ env.URLCHECK_RESULTS }} + echo "$(cat ${{ env.URLCHECK_SUMMARY }})" >> $GITHUB_STEP_SUMMARY + echo -e "\n" >> $GITHUB_STEP_SUMMARY + + - name: Upload Results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: urlcheck-results + path: ${{ env.URLCHECK_RESULTS }} + + - name: Add comment to PR + if: failure() && github.event_name == 'pull_request' + uses: actions/github-script@v6 + with: + script: | + const fs = require('fs'); + const summary = fs.readFileSync('${{ env.URLCHECK_SUMMARY }}', 'utf8'); + github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: `### URL Check Summary\n\n${summary}` + }); + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Ensure the correct GitHub token is used \ No newline at end of file From 61359f941f4d1f46c5b690462a1478f7f120455d Mon Sep 17 00:00:00 2001 From: Peter Kahn Date: Thu, 22 Aug 2024 16:49:14 -0400 Subject: [PATCH 2/2] newlines --- .github/scripts/generate_broken_links_summary.py | 3 ++- .github/workflows/urls-checker.yml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/scripts/generate_broken_links_summary.py b/.github/scripts/generate_broken_links_summary.py index d0f5197c..2e95e01a 100644 --- a/.github/scripts/generate_broken_links_summary.py +++ b/.github/scripts/generate_broken_links_summary.py @@ -101,4 +101,5 @@ def main(argv): return False if __name__ == "__main__": - main(sys.argv) \ No newline at end of file + main(sys.argv) + diff --git a/.github/workflows/urls-checker.yml b/.github/workflows/urls-checker.yml index 8b1c0cf7..eebcf39e 100644 --- a/.github/workflows/urls-checker.yml +++ b/.github/workflows/urls-checker.yml @@ -70,4 +70,5 @@ jobs: body: `### URL Check Summary\n\n${summary}` }); env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Ensure the correct GitHub token is used \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Ensure the correct GitHub token is used +