Skip to content

Labeler: Predict (Pulls) #4598

Labeler: Predict (Pulls)

Labeler: Predict (Pulls) #4598

# Workflow template imported and updated from:
# https://github.com/dotnet/issue-labeler/wiki/Onboarding
#
# See labeler.md for more information
#
# Predict labels for Pull Requests using a trained model
name: "Labeler: Predict (Pulls)"
on:
# Per to the following documentation:
# https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#pull_request_target
#
# The `pull_request_target` event runs in the context of the base of the pull request, rather
# than in the context of the merge commit, as the `pull_request` event does. This prevents
# execution of unsafe code from the head of the pull request that could alter the repository
# or steal any secrets you use in your workflow. This event allows your workflow to do things
# like label or comment on pull requests from forks.
#
# Only automatically predict area labels when pull requests are first opened
pull_request_target:
types: opened
# Configure the branches that need to have PRs labeled
branches:
- 'main'
- 'release/*'
# Poll for open pull requests that need labels every 5 minutes
schedule:
- cron: "*/5 * * * *"
# Allow dispatching the workflow via the Actions UI, specifying ranges of numbers
# If no pull request numbers are provided, it behaves as a polling event
workflow_dispatch:
inputs:
pulls:
description: "Pull Request Numbers (comma-separated list of ranges). Leave empty to poll."
required: false
cache_key:
description: "The cache key suffix to use for restoring the model. Defaults to 'ACTIVE'."
required: true
default: "ACTIVE"
env:
# Do not allow failure for jobs triggered automatically (this can block PR merge)
ALLOW_FAILURE: ${{ github.event_name == 'workflow_dispatch' }}
LABEL_PREFIX: "area-"
THRESHOLD: 0.40
DEFAULT_LABEL: "needs-area-label"
jobs:
poll-pull-requests:
# Run on schedule trigger or workflow_dispatch without PR numbers, within the 'dotnet' org
if: ${{ (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && inputs.pulls == '')) && github.repository_owner == 'dotnet' }}
runs-on: ubuntu-latest
permissions:
actions: read
pull-requests: read
outputs:
pulls: ${{ steps.get-pulls.outputs.pulls }}
steps:
- name: "Get open pull requests needing labels"
id: get-pulls
env:
GITHUB_TOKEN: ${{ github.token }}
run: |
# Get the last successful schedule run's timestamp (minus 5 minutes for overlap)
last_run=$(gh run list --repo ${{ github.repository }} --workflow "${{ github.workflow }}" --event schedule --status success --limit 1 --json updatedAt --jq '.[0].updatedAt // empty')
if [ -n "$last_run" ]; then
# Subtract 5 minutes from the last run timestamp for overlap
since=$(date -u -d "$last_run - 5 minutes" +"%Y-%m-%dT%H:%M:%SZ")
echo "Filtering PRs updated since: $since (last run: $last_run)"
pulls=$(gh pr list --repo ${{ github.repository }} --state open --json number,labels,updatedAt --limit 1000 --search "updated:>=$since")
else
# No previous run found; get all open pull requests
echo "No previous schedule run found. Getting all open pull requests."
pulls=$(gh pr list --repo ${{ github.repository }} --state open --json number,labels --limit 1000)
fi
# Filter to PRs that don't have a label starting with LABEL_PREFIX
needs_label=$(echo "$pulls" | jq -r --arg prefix "${{ env.LABEL_PREFIX }}" '
[.[] | select(
(.labels | map(.name) | any(startswith($prefix)) | not)
) | .number] | join(",")
')
echo "Pull requests needing labels: $needs_label"
echo "pulls=$needs_label" >> $GITHUB_OUTPUT
predict-pull-label:
# The 'if' uses always() so this job runs even when poll-pull-requests is skipped
# Do not automatically run the workflow on forks outside the 'dotnet' org
if: ${{ always() && (github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet') }}
needs: [poll-pull-requests]
runs-on: ubuntu-latest
permissions:
pull-requests: write
steps:
- name: "Determine pull requests to process"
id: determine-pulls
run: |
if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ -n "${{ inputs.pulls }}" ]; then
pulls="${{ inputs.pulls }}"
elif [ "${{ github.event_name }}" == "workflow_dispatch" ] || [ "${{ github.event_name }}" == "schedule" ]; then
pulls="${{ needs.poll-pull-requests.outputs.pulls }}"
else
pulls="${{ github.event.number }}"
fi
echo "pulls=$pulls" >> $GITHUB_OUTPUT
echo "Processing pull requests: $pulls"
- name: "Restore pulls model from cache"
id: restore-model
if: ${{ steps.determine-pulls.outputs.pulls != '' }}
uses: dotnet/issue-labeler/restore@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0
with:
type: pulls
fail-on-cache-miss: ${{ env.ALLOW_FAILURE }}
quiet: true
- name: "Predict pull labels"
id: prediction
if: ${{ steps.determine-pulls.outputs.pulls != '' && steps.restore-model.outputs.cache-hit == 'true' }}
uses: dotnet/issue-labeler/predict@46125e85e6a568dc712f358c39f35317366f5eed # v2.0.0
with:
pulls: ${{ steps.determine-pulls.outputs.pulls }}
label_prefix: ${{ env.LABEL_PREFIX }}
threshold: ${{ env.THRESHOLD }}
default_label: ${{ env.DEFAULT_LABEL }}
env:
GITHUB_TOKEN: ${{ github.token }}
continue-on-error: ${{ !env.ALLOW_FAILURE }}