From dfc4952717f4c47919a2fa91a7e4e81dc424c0b7 Mon Sep 17 00:00:00 2001 From: Timid Robot Zehta Date: Fri, 6 Feb 2026 15:45:53 +0100 Subject: [PATCH 1/2] update actions/* versions and add entries or comments for all scripts --- .github/workflows/1-fetch.yml | 56 ++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/.github/workflows/1-fetch.yml b/.github/workflows/1-fetch.yml index 154d04d9..22d9c420 100644 --- a/.github/workflows/1-fetch.yml +++ b/.github/workflows/1-fetch.yml @@ -4,12 +4,9 @@ on: schedule: # Normal schedule # # at 01:15 on all days in first month of each quarter - # - cron: '15 1 * 1,4,7,10 *' + - cron: '15 1 * 1,4,7,10 *' # # at 01:15 on days 1-14 in second month of each quarter - # - cron: '15 1 1-14 2,5,8,11 *' - # Temp schedule - # at 01:15 on all days in all months - - cron: '15 1 * * *' + - cron: '15 1 1-14 2,5,8,11 *' workflow_dispatch: @@ -29,7 +26,7 @@ jobs: git config --global user.email "${{ secrets.BOT_EMAIL }}" - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: # Default fetch-depth is 1, however that value results in errors # when GitPython attempts to push changes: @@ -38,7 +35,7 @@ jobs: token: ${{ secrets.BOT_TOKEN }} - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.11' @@ -50,16 +47,26 @@ jobs: run: | pipenv sync --system - # CC Technology team members: - # See cc-quantifying-bot Google Workspace entry in Bitwarden for - # information on GCS_ secrets - - name: Fetch from Google Custom Search (GCS) - run: | - ./scripts/1-fetch/gcs_fetch.py \ - --limit=100 --enable-save --enable-git - env: - GCS_DEVELOPER_KEY: ${{ secrets.GCS_DEVELOPER_KEY }} - GCS_CX: ${{ secrets.GCS_CX }} + # Fetch from arXiv disabled due to long run time (~6 hours) + # + # For now, data is fetched manually :/ + + # Fetch from Europeana disabled due to being considered incomplete + # https://github.com/creativecommons/quantifying/issues/224 + + # Fetch from GCS disabled due to Google blocking GitHub Action runners + # # CC Technology team members: + # # See cc-quantifying-bot Google Workspace entry in Bitwarden for + # # information on GCS_ secrets + # - name: Fetch from Google Custom Search (GCS) + # run: | + # ./scripts/1-fetch/gcs_fetch.py \ + # --limit=100 --enable-save --enable-git + # env: + # GCS_DEVELOPER_KEY: ${{ secrets.GCS_DEVELOPER_KEY }} + # GCS_CX: ${{ secrets.GCS_CX }} + # + # For now, data is fetched manually :/ - name: Fetch from GitHub run: | @@ -67,3 +74,18 @@ jobs: --enable-save --enable-git env: GH_TOKEN: ${{ secrets.BOT_TOKEN }} + + # Fetch from Openverse disabled due to limitations of anonymous API + # access + + - name: Fetch from Smithsonian + run: | + ./scripts/1-fetch/smithsonian_fetch.py \ + --enable-save --enable-git + env: + DATA_GOV_API_KEY: ${{ secrets.DATA_GOV_API_KEY }} + + - name: Fetch from Wikipedia + run: | + ./scripts/1-fetch/wikipedia_fetch.py \ + --enable-save --enable-git From 40dda667e91919b4b4bf7cec2e3c554f404831c2 Mon Sep 17 00:00:00 2001 From: Timid Robot Zehta Date: Fri, 6 Feb 2026 15:53:55 +0100 Subject: [PATCH 2/2] start after 3am to avoid complications from clock changes related to daylight savings --- .github/workflows/1-fetch.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/1-fetch.yml b/.github/workflows/1-fetch.yml index 22d9c420..86590b0f 100644 --- a/.github/workflows/1-fetch.yml +++ b/.github/workflows/1-fetch.yml @@ -3,10 +3,10 @@ name: Fetch Data on: schedule: # Normal schedule - # # at 01:15 on all days in first month of each quarter - - cron: '15 1 * 1,4,7,10 *' - # # at 01:15 on days 1-14 in second month of each quarter - - cron: '15 1 1-14 2,5,8,11 *' + # # at 03:15 on all days in first month of each quarter + - cron: '15 3 * 1,4,7,10 *' + # # at 03:15 on days 1-14 in second month of each quarter + - cron: '15 3 1-14 2,5,8,11 *' workflow_dispatch: