Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,11 @@ The metrics exposed beyond the default Prometheus metrics are:
outgoing HTTP POST to upload the deployment record.
* `deptracker_post_record_ok`: the number of successful deployment
record uploads.
* `deptracker_post_record_rate_limited`: the number of post attempts
that were rate limited.
* `deptracker_post_record_no_attestation`: the number of attempts
that resulted in no matching attestation for the container digest
(404 "no artifacts found" responses).
* `deptracker_post_record_soft_fail`: the number of recoverable failed
attempts to upload the deployment record.
* `deptracker_post_record_hard_fail`: the number of failures to
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ require (
github.com/google/uuid v1.6.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
Expand Down
6 changes: 6 additions & 0 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,12 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
)

if err := c.apiClient.PostOne(ctx, record); err != nil {
// Return if no artifact is found
var noArtifactErr *deploymentrecord.NoArtifactError
if errors.As(err, &noArtifactErr) {
return nil
}

// Make sure to not retry on client error messages
var clientErr *deploymentrecord.ClientError
if errors.As(err, &clientErr) {
Expand Down
75 changes: 59 additions & 16 deletions pkg/deploymentrecord/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,19 @@ func (c *ClientError) Unwrap() error {
return c.err
}

// NoArtifactError represents a 404 client response whose body indicates "no artifacts found".
type NoArtifactError struct {
err error
}

func (n *NoArtifactError) Error() string {
return fmt.Sprintf("no artifact found: %s", n.err.Error())
}

func (n *NoArtifactError) Unwrap() error {
return n.err
}

// PostOne posts a single deployment record to the GitHub deployment
// records API.
func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error {
Expand Down Expand Up @@ -249,34 +262,64 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error {
}

// Drain and close response body to enable connection reuse by reading body for error logging
body, _ := io.ReadAll(resp.Body)
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
_, _ = io.Copy(io.Discard, resp.Body)
_ = resp.Body.Close()

lastErr = fmt.Errorf("unexpected status code: %d", resp.StatusCode)

// Don't retry on client errors (4xx) except for 429
// (rate limit)
if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 {
switch {
case resp.StatusCode == 404:
// No artifact found
dtmetrics.PostDeploymentRecordNoAttestation.Inc()
slog.Debug("no artifact attestation found, no record created",
"attempt", attempt,
"status_code", resp.StatusCode,
"container_name", record.Name,
"resp_msg", string(respBody),
"digest", record.Digest,
)
return &NoArtifactError{err: fmt.Errorf("no attestation found for %s", record.Digest)}
case resp.StatusCode >= 400 && resp.StatusCode < 500:
if resp.Header.Get("retry-after") != "" || resp.Header.Get("x-ratelimit-remaining") == "0" {
// Rate limited — retry with backoff
// Could be 403 or 429
dtmetrics.PostDeploymentRecordRateLimited.Inc()
slog.Warn("rate limited, retrying",
"attempt", attempt,
"status_code", resp.StatusCode,
"retry_after", resp.Header.Get("Retry-After"),
"container_name", record.Name,
"resp_msg", string(respBody),
)
lastErr = fmt.Errorf("rate limited, attempt %d", attempt)
continue
}
// Don't retry non rate limiting client errors
dtmetrics.PostDeploymentRecordClientError.Inc()
slog.Warn("client error, aborting",
"attempt", attempt,
"error", lastErr,
"status_code", resp.StatusCode,
"msg", string(body),
"container_name", record.Name,
"resp_msg", string(respBody),
)
return &ClientError{err: fmt.Errorf("unexpected client err with status code %d", resp.StatusCode)}
default:
// Retry with backoff
dtmetrics.PostDeploymentRecordSoftFail.Inc()
slog.Debug("retriable error",
"attempt", attempt,
"status_code", resp.StatusCode,
"container_name", record.Name,
"resp_msg", string(respBody),
)
return &ClientError{err: lastErr}
lastErr = fmt.Errorf("server error, attempt %d", attempt)
}
dtmetrics.PostDeploymentRecordSoftFail.Inc()
slog.Debug("retriable server error",
"attempt", attempt,
"status_code", resp.StatusCode,
"msg", string(body),
)
}

dtmetrics.PostDeploymentRecordHardFail.Inc()
slog.Error("all retries exhausted",
"count", c.retries,
"error", lastErr)
"error", lastErr,
"container_name", record.Name,
)
return fmt.Errorf("all retries exhausted: %w", lastErr)
}
Loading
Loading