-
Notifications
You must be signed in to change notification settings - Fork 222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DS-2758 improve broken links workflow #1417
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#!/usr/bin/env python3 | ||
''' | ||
@author: Peter Kahn | ||
@contact: [email protected] | ||
@description: This script processes a CSV file containing URL check results, generates a fixed CSV file, and creates a summary report. Used in URL check processing. | ||
''' | ||
|
||
import argparse | ||
import sys | ||
import os | ||
import csv | ||
import logging | ||
|
||
# Get the script name without the .py extension | ||
script_name = os.path.basename(__file__).split('.')[0] | ||
|
||
def main(argv): | ||
parser = argparse.ArgumentParser(description="Fix CSV file for loading into SQLite3") | ||
parser.add_argument('filespec', help="path to a csv file to fix") | ||
parser.add_argument('-o', '--output', help="path to a new csv file - otherwise, _fixed is appended to filespec") | ||
args = parser.parse_args() | ||
|
||
# Configure logging to write to both a file and the console | ||
logging.basicConfig(level=logging.DEBUG, | ||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||
handlers=[ | ||
logging.FileHandler('fix_csv.log'), | ||
logging.StreamHandler(sys.stdout) | ||
]) | ||
logger = logging.getLogger(script_name) | ||
|
||
if not os.path.exists(args.filespec): | ||
logger.error(f"File not found: {args.filespec}") | ||
return False | ||
|
||
if not args.filespec.endswith(".csv"): | ||
logger.error("File must be .csv") | ||
return False | ||
|
||
if args.output: | ||
outfile = args.output | ||
else: | ||
outfile = args.filespec[:-4] + '_fixed.csv' | ||
|
||
summary_file = args.filespec[:-4] + '_summary.md' | ||
logger.info(f"Reading {args.filespec}, Output Files: [Fixed Csv: {outfile}, Summary: {summary_file}]") | ||
|
||
passed_count = 0 | ||
excluded_count = 0 | ||
failed_count = 0 | ||
unhandled_count = 0 | ||
failed_urls = [] | ||
unhandled_lines = [] | ||
|
||
try: | ||
with open(args.filespec, 'r', encoding='utf-8') as fi, open(outfile, 'w', encoding='utf-8') as fo: | ||
csv_i = csv.reader(fi) | ||
csv_o = csv.writer(fo, quoting=csv.QUOTE_NONNUMERIC) | ||
|
||
for i, row in enumerate(csv_i): | ||
if i == 0: | ||
csv_o.writerow(row) | ||
continue | ||
|
||
url, result, filename = row | ||
if result == "passed": | ||
passed_count += 1 | ||
elif result == "excluded": | ||
excluded_count += 1 | ||
elif result == "failed": | ||
failed_count += 1 | ||
failed_urls.append(f"{url}, {filename}") | ||
else: | ||
unhandled_count += 1 | ||
unhandled_lines.append(f"Line {i}: {row}") | ||
|
||
csv_o.writerow([url, result, filename]) | ||
|
||
logger.info(f"Finished processing CSV with {i} lines and Passed: {passed_count}, Excluded: {excluded_count}, Failed: {failed_count}, Unhandled: {unhandled_count}") | ||
|
||
# Generate summary | ||
with open(summary_file, 'w', encoding='utf-8') as summary: | ||
summary.write("## URL Check Summary\n") | ||
summary.write(f"*Passed:* {passed_count}\n") | ||
summary.write(f"*Excluded:* {excluded_count}\n") | ||
summary.write(f"*Failed:* {failed_count}\n") | ||
summary.write(f"*Unhandled Lines:* {unhandled_count}\n") | ||
|
||
if failed_count > 0: | ||
summary.write("\n### Failed URLs:\n") | ||
summary.write("* " + "\n* ".join(failed_urls) + "\n") | ||
|
||
if unhandled_count > 0: | ||
summary.write("\n### Unhandled Lines:\n") | ||
summary.write("* " + "\n* ".join(unhandled_lines) + "\n") | ||
|
||
logger.info(f"Summary stored {summary_file}") | ||
|
||
except Exception as e: | ||
logger.error(f"An error occurred: {e}") | ||
return False | ||
|
||
if __name__ == "__main__": | ||
main(sys.argv) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey @peterswirl , There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that was my question. So, this is process related but,... docs do go straight to main. Let me redo this using main |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
name: Check URLs | ||
|
||
on: | ||
pull_request: | ||
branches: | ||
- "main" | ||
- "develop" | ||
paths: | ||
- "docs/**" | ||
- "README.md" | ||
|
||
workflow_dispatch: | ||
|
||
jobs: | ||
check-urls: | ||
runs-on: ubuntu-latest | ||
|
||
permissions: | ||
contents: read | ||
pull-requests: write # Ensure the workflow has permission to write comments on pull requests | ||
|
||
env: | ||
URLCHECK_RESULTS: urlcheck_results.csv # Define global variable | ||
URLCHECK_SUMMARY: urlcheck_results_summary.md # Define global variable | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- name: Print Environment Variables | ||
run: env | ||
|
||
- name: URLs Checker | ||
id: url-checker | ||
uses: urlstechie/[email protected] | ||
with: | ||
file_types: .md,.html,.yml,.conf,.txt,.py,.json,.sh,.ini,.spg | ||
print_all: false | ||
verbose: true | ||
timeout: 5 | ||
retry_count: 3 | ||
exclude_patterns: localhost,api,apis,rss,etc,xx,googleapis,hostname,snowflake,graph.microsoft.com,login.microsoftonline.com,my-host.com,azure.com,github.com | ||
exclude_files: Swirl.postman_collection.json,docs/googlec95caf0bd4a8c5df.html,docs/Gemfile,docs/Gemfile.lock,docs/_config.yml,tests/,SearchProviders/,DevUtils/ | ||
save: ${{ env.URLCHECK_RESULTS }} | ||
|
||
- name: Generate Summary | ||
if: always() | ||
run: | | ||
python .github/scripts/generate_broken_links_summary.py ${{ env.URLCHECK_RESULTS }} | ||
echo "$(cat ${{ env.URLCHECK_SUMMARY }})" >> $GITHUB_STEP_SUMMARY | ||
echo -e "\n" >> $GITHUB_STEP_SUMMARY | ||
|
||
- name: Upload Results | ||
if: failure() | ||
uses: actions/upload-artifact@v3 | ||
with: | ||
name: urlcheck-results | ||
path: ${{ env.URLCHECK_RESULTS }} | ||
|
||
- name: Add comment to PR | ||
if: failure() && github.event_name == 'pull_request' | ||
uses: actions/github-script@v6 | ||
with: | ||
script: | | ||
const fs = require('fs'); | ||
const summary = fs.readFileSync('${{ env.URLCHECK_SUMMARY }}', 'utf8'); | ||
github.rest.issues.createComment({ | ||
owner: context.repo.owner, | ||
repo: context.repo.repo, | ||
issue_number: context.issue.number, | ||
body: `### URL Check Summary\n\n${summary}` | ||
}); | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Ensure the correct GitHub token is used | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FYI, I could include the excluded. Let me know if we want to do that