Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions .github/workflows/check-cache-migration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
---
name: Check cache migration (GitHub → S3)

on:
workflow_dispatch:
inputs:
environment:
description: Cache environment to check
required: true
default: prod
type: choice
options:
- prod
- dev

jobs:
check-migration:
runs-on: ubuntu-latest
name: Compare GitHub cache vs S3
permissions:
id-token: write
contents: read
actions: read # required to list GitHub cache entries
variables: write # required to set the opt-out variable when migration is complete

steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

- name: Setup S3 cache credentials
id: aws-auth
uses: SonarSource/gh-action_cache/credential-setup@v1
with:
environment: ${{ inputs.environment }}

- name: List GitHub cache entries
id: gh-caches
shell: bash
env:
GITHUB_TOKEN: ${{ github.token }}
GITHUB_REPOSITORY: ${{ github.repository }}
run: |
# Fetch all GitHub cache entries (paginated, up to 10 000)
PAGE=1
PER_PAGE=100
ALL_ENTRIES="[]"
while true; do
RESPONSE=$(curl -s -f \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/caches?per_page=${PER_PAGE}&page=${PAGE}")
ENTRIES=$(echo "$RESPONSE" | jq '.actions_caches')
COUNT=$(echo "$ENTRIES" | jq 'length')
ALL_ENTRIES=$(echo "$ALL_ENTRIES $ENTRIES" | jq -s 'add')
if [[ "$COUNT" -lt "$PER_PAGE" ]]; then
break
fi
PAGE=$((PAGE + 1))
done

# Filter: include only target branches, exclude unwanted key patterns
FILTERED=$(echo "$ALL_ENTRIES" | jq '[
.[] |
select(
(.ref | test("^refs/heads/(main|master|branch-.+|dogfood-on-.+|feature/long/.+)$")) and
(.key | test("^(build-number-|mise-)") | not)
) |
{ ref: .ref, key: .key }
]')

TOTAL=$(echo "$ALL_ENTRIES" | jq 'length')
INCLUDED=$(echo "$FILTERED" | jq 'length')
echo "Total GitHub cache entries: $TOTAL"
echo "Included for migration check: $INCLUDED"

# Write to file for next step
echo "$FILTERED" > /tmp/gh_caches.json
echo "included-count=$INCLUDED" >> "$GITHUB_OUTPUT"

- name: List S3 cache objects
id: s3-objects
shell: bash
env:
AWS_ACCESS_KEY_ID: ${{ steps.aws-auth.outputs.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ steps.aws-auth.outputs.AWS_SECRET_ACCESS_KEY }}
AWS_SESSION_TOKEN: ${{ steps.aws-auth.outputs.AWS_SESSION_TOKEN }}
AWS_DEFAULT_REGION: eu-central-1
S3_BUCKET: sonarsource-s3-cache-${{ inputs.environment }}-bucket
run: |
# List all S3 objects and write keys to a file
aws s3api list-objects-v2 \
--bucket "$S3_BUCKET" \
--query 'Contents[].Key' \
--output json | jq -r '.[]' | sort > /tmp/s3_keys.txt

S3_COUNT=$(wc -l < /tmp/s3_keys.txt)
echo "Total S3 cache objects: $S3_COUNT"
echo "s3-count=$S3_COUNT" >> "$GITHUB_OUTPUT"

- name: Compare and report
id: compare
shell: bash
env:
GITHUB_TOKEN: ${{ github.token }}
GITHUB_REPOSITORY: ${{ github.repository }}
run: |
INCLUDED=$(jq 'length' /tmp/gh_caches.json)
MIGRATED=0
MISSING=()

while IFS= read -r ENTRY; do
REF=$(echo "$ENTRY" | jq -r '.ref')
KEY=$(echo "$ENTRY" | jq -r '.key')
# Expected S3 key: {ref}/{key} (e.g. refs/heads/main/my-gradle-abc123)
S3_KEY="${REF}/${KEY}"
if grep -qxF "$S3_KEY" /tmp/s3_keys.txt; then
MIGRATED=$((MIGRATED + 1))
else
MISSING+=("$S3_KEY")
fi
done < <(jq -c '.[]' /tmp/gh_caches.json)

echo ""
echo "========================================="
echo " Migration status: $MIGRATED / $INCLUDED"
echo "========================================="

if [[ "${#MISSING[@]}" -gt 0 ]]; then
echo ""
echo "Missing in S3 (${#MISSING[@]} entries):"
printf ' %s\n' "${MISSING[@]}"
fi

if [[ "$MIGRATED" -eq "$INCLUDED" && "$INCLUDED" -gt 0 ]]; then
echo ""
echo "All included GitHub cache entries are present in S3."
echo "migration-complete=true" >> "$GITHUB_OUTPUT"
else
echo "migration-complete=false" >> "$GITHUB_OUTPUT"
fi

- name: Set CACHE_IMPORT_GITHUB=false (migration complete)
if: steps.compare.outputs.migration-complete == 'true'
shell: bash
env:
GITHUB_TOKEN: ${{ github.token }}
GITHUB_REPOSITORY: ${{ github.repository }}
run: |
VARIABLE_NAME="CACHE_IMPORT_GITHUB"

# Check if variable already exists
STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/variables/${VARIABLE_NAME}")

if [[ "$STATUS" == "200" ]]; then
# Update existing variable
curl -s -f -X PATCH \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/variables/${VARIABLE_NAME}" \
-d '{"name":"'"$VARIABLE_NAME"'","value":"false"}'
echo "Updated repository variable $VARIABLE_NAME=false"
else
# Create new variable
curl -s -f -X POST \
-H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/variables" \
-d '{"name":"'"$VARIABLE_NAME"'","value":"false"}'
echo "Created repository variable $VARIABLE_NAME=false"
fi

echo ""
echo "Migration complete — CACHE_IMPORT_GITHUB set to false"
echo "Import fallback will be disabled on next workflow runs."
echo "To re-enable migration mode, delete or set CACHE_IMPORT_GITHUB=true in repository variables."
58 changes: 56 additions & 2 deletions action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,17 @@
description: >
Force cache backend ('github' or 's3'). If not set, falls back to the CACHE_BACKEND environment variable if defined,
then automatically determined based on repository visibility.
import-github-cache:
description: >
Import GitHub cache to S3 when no S3 cache exists (migration mode). Enabled by default when using S3 backend.
Set to 'false' to disable, or control via CACHE_IMPORT_GITHUB environment variable
(e.g. from a repository variable: ${{ vars.CACHE_IMPORT_GITHUB }}).
default: ''

outputs:
cache-hit:
description: A boolean value to indicate an exact match was found for the primary key
value: ${{ steps.github-cache.outputs.cache-hit || steps.s3-cache.outputs.cache-hit }}
value: ${{ steps.github-cache.outputs.cache-hit || steps.s3-cache.outputs.cache-hit || steps.github-import.outputs.cache-hit }}

runs:
using: composite
Expand Down Expand Up @@ -113,6 +119,26 @@
GITHUB_REPOSITORY: ${{ github.repository }}
run: $ACTION_PATH_CACHE/scripts/prepare-keys.sh

- name: Determine GitHub cache import mode
if: steps.cache-backend.outputs.cache-backend == 's3'
id: import-mode
shell: bash
env:
INPUT_IMPORT_GITHUB_CACHE: ${{ inputs.import-github-cache }}
run: |
# Resolution order: input → CACHE_IMPORT_GITHUB env var → default true
if [[ -n "$INPUT_IMPORT_GITHUB_CACHE" ]]; then
IMPORT_GITHUB="$INPUT_IMPORT_GITHUB_CACHE"
echo "Using import mode from input: $IMPORT_GITHUB"
elif [[ -n "${CACHE_IMPORT_GITHUB:-}" ]]; then
IMPORT_GITHUB="$CACHE_IMPORT_GITHUB"
echo "Using import mode from CACHE_IMPORT_GITHUB environment variable: $IMPORT_GITHUB"
else
IMPORT_GITHUB="true"
echo "Using default import mode: $IMPORT_GITHUB"
fi
echo "import-github=$IMPORT_GITHUB" >> "$GITHUB_OUTPUT"

- name: Cache on S3
if: steps.cache-backend.outputs.cache-backend == 's3'
uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # v4.3.0
Expand All @@ -134,9 +160,37 @@
restore-keys: ${{ steps.prepare-keys.outputs.branch-restore-keys }}
upload-chunk-size: ${{ inputs.upload-chunk-size }}
enableCrossOsArchive: ${{ inputs.enableCrossOsArchive }}
fail-on-cache-miss: ${{ inputs.fail-on-cache-miss }}
# When import mode is active, suppress fail-on-cache-miss here: a subsequent step handles it
# after also attempting the GitHub cache fallback.
fail-on-cache-miss: ${{ steps.import-mode.outputs.import-github == 'true' && 'false' || inputs.fail-on-cache-miss }}
lookup-only: ${{ inputs.lookup-only }}

- name: Import GitHub cache to S3 (migration fallback)
if: >-
steps.cache-backend.outputs.cache-backend == 's3' &&
steps.s3-cache.outputs.cache-hit != 'true' &&
steps.import-mode.outputs.import-github == 'true'
uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
id: github-import
with:
path: ${{ inputs.path }}
key: ${{ inputs.key }}
restore-keys: ${{ inputs.restore-keys }}
lookup-only: ${{ inputs.lookup-only }}
fail-on-cache-miss: false

- name: Enforce fail-on-cache-miss after GitHub import fallback
if: >-
steps.cache-backend.outputs.cache-backend == 's3' &&
steps.import-mode.outputs.import-github == 'true' &&
inputs.fail-on-cache-miss == 'true' &&
steps.s3-cache.outputs.cache-hit != 'true' &&
steps.github-import.outputs.cache-hit != 'true'
shell: bash
run: |
echo "::error::Cache miss: no cache found in S3 or GitHub for key '${{ inputs.key }}'"

Check failure on line 191 in action.yml

View check run for this annotation

SonarQube Cloud US / SonarCloud Code Analysis

Change this action to not use user-controlled data directly in a run block.

See more on https://sonarqube.us/project/issues?id=SonarSource_gh-action_cache&issues=AZ0B8JM92XJ_4skM0Msg&open=AZ0B8JM92XJ_4skM0Msg&pullRequest=45

Check failure on line 191 in action.yml

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Change this action to not use user-controlled data directly in a run block.

See more on https://sonarcloud.io/project/issues?id=SonarSource_gh-action_cache&issues=AZ0B8KfwEwPRkYN4HH6y&open=AZ0B8KfwEwPRkYN4HH6y&pullRequest=45

Check failure

Code scanning / SonarCloud

GitHub Actions should not be vulnerable to script injections High

Change this action to not use user-controlled data directly in a run block. See more on SonarQube Cloud
exit 1

- name: Credential guard for S3 cache save
if: steps.cache-backend.outputs.cache-backend == 's3'
uses: SonarSource/gh-action_cache/credential-guard@v1
Expand Down
Loading