From 6299c2ee561e26382092fce13956a0adb9b6dc56 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Fri, 13 Mar 2026 17:35:09 -0400 Subject: [PATCH 1/7] wip: improve logging and metrics --- internal/controller/controller.go | 6 +++ pkg/deploymentrecord/client.go | 61 +++++++++++++++++++++++-------- pkg/dtmetrics/prom.go | 8 ++++ 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 3749e56..4c44952 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -471,6 +471,12 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta ) if err := c.apiClient.PostOne(ctx, record); err != nil { + // Return if no artifact is found + var noArtifactErr *deploymentrecord.NoArtifactError + if errors.As(err, &noArtifactErr) { + return nil + } + // Make sure to not retry on client error messages var clientErr *deploymentrecord.ClientError if errors.As(err, &clientErr) { diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index fe50b74..fb673f5 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -160,6 +160,19 @@ func (c *ClientError) Unwrap() error { return c.err } +// NoArtifactError represents a 404 client response where no artifact was found. +type NoArtifactError struct { + err error +} + +func (n *NoArtifactError) Error() string { + return fmt.Sprintf("client_error: %s", n.err.Error()) +} + +func (n *NoArtifactError) Unwrap() error { + return n.err +} + // PostOne posts a single deployment record to the GitHub deployment // records API. func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { @@ -249,29 +262,47 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { } // Drain and close response body to enable connection reuse by reading body for error logging - body, _ := io.ReadAll(resp.Body) + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + _, _ = io.Copy(io.Discard, resp.Body) _ = resp.Body.Close() - lastErr = fmt.Errorf("unexpected status code: %d", resp.StatusCode) - - // Don't retry on client errors (4xx) except for 429 - // (rate limit) - if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 { + switch { + case resp.StatusCode == 429: + // Retry with backoff + dtmetrics.PostDeploymentRecordSoftFail.Inc() + slog.Debug("rate limited, retrying", + "attempt", attempt, + "status_code", resp.StatusCode, + ) + lastErr = fmt.Errorf("rate limited, attempt %d", attempt) + continue + case resp.StatusCode == 404: + // No artifact found + dtmetrics.PostDeploymentRecordNotSaved.Inc() + slog.Debug("no artifact attestation found, no record created", + "attempt", attempt, + "status_code", resp.StatusCode, + ) + return &NoArtifactError{err: fmt.Errorf("no artifact found")} + case resp.StatusCode >= 400 && resp.StatusCode < 500: + // Don't retry non rate limiting client errors dtmetrics.PostDeploymentRecordClientError.Inc() slog.Warn("client error, aborting", "attempt", attempt, - "error", lastErr, "status_code", resp.StatusCode, - "msg", string(body), + "resp_msg", string(body), + ) + return &ClientError{err: fmt.Errorf("unexpected client err with status code %d", resp.StatusCode)} + default: + // Retry with backoff + dtmetrics.PostDeploymentRecordSoftFail.Inc() + slog.Debug("retriable error", + "attempt", attempt, + "status_code", resp.StatusCode, + "resp_msg", string(body), ) - return &ClientError{err: lastErr} + lastErr = fmt.Errorf("server error, attempt %d", attempt) } - dtmetrics.PostDeploymentRecordSoftFail.Inc() - slog.Debug("retriable server error", - "attempt", attempt, - "status_code", resp.StatusCode, - "msg", string(body), - ) } dtmetrics.PostDeploymentRecordHardFail.Inc() diff --git a/pkg/dtmetrics/prom.go b/pkg/dtmetrics/prom.go index 8e76ff8..a2e6071 100644 --- a/pkg/dtmetrics/prom.go +++ b/pkg/dtmetrics/prom.go @@ -49,6 +49,14 @@ var ( }, ) + //nolint: revive + PostDeploymentRecordNotSaved = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "deptracker_post_record_not_saved", + Help: "The total number of successful posts with no attestation that result in no record creation", + }, + ) + //nolint: revive PostDeploymentRecordSoftFail = promauto.NewCounter( prometheus.CounterOpts{ From 966aea82d66588c666f481c7a4254b6eca838836 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Mon, 16 Mar 2026 17:00:09 -0400 Subject: [PATCH 2/7] refactor client error logging, change not_saved metric to no_attestation, add rate_limited metric --- README.md | 4 + go.mod | 1 + pkg/deploymentrecord/client.go | 39 ++-- pkg/deploymentrecord/client_test.go | 337 ++++++++++++++++++++++++++++ pkg/dtmetrics/prom.go | 14 +- 5 files changed, 376 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 9f66b14..cc1b354 100644 --- a/README.md +++ b/README.md @@ -184,6 +184,10 @@ The metrics exposed beyond the default Prometheus metrics are: outgoing HTTP POST to upload the deployment record. * `deptracker_post_record_ok`: the number of successful deployment record uploads. +* `deptracker_post_record_rate_limited`: the number of post attempts + that were rate limited. +* `deptracker_post_record_no_attestation`: the number of successful + posts for container digest with no matching attestation for the org. * `deptracker_post_record_soft_fail`: the number of recoverable failed attempts to upload the deployment record. * `deptracker_post_record_hard_fail`: the number of failures to diff --git a/go.mod b/go.mod index 47393b5..e6bac62 100644 --- a/go.mod +++ b/go.mod @@ -32,6 +32,7 @@ require ( github.com/google/uuid v1.6.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index fb673f5..231d223 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -166,7 +166,10 @@ type NoArtifactError struct { } func (n *NoArtifactError) Error() string { - return fmt.Sprintf("client_error: %s", n.err.Error()) + if n.err == nil { + return "no artifact found" + } + return fmt.Sprintf("no artifact found: %s", n.err.Error()) } func (n *NoArtifactError) Unwrap() error { @@ -262,35 +265,39 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { } // Drain and close response body to enable connection reuse by reading body for error logging - body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) _, _ = io.Copy(io.Discard, resp.Body) _ = resp.Body.Close() switch { - case resp.StatusCode == 429: - // Retry with backoff - dtmetrics.PostDeploymentRecordSoftFail.Inc() - slog.Debug("rate limited, retrying", - "attempt", attempt, - "status_code", resp.StatusCode, - ) - lastErr = fmt.Errorf("rate limited, attempt %d", attempt) - continue - case resp.StatusCode == 404: + case resp.StatusCode == 404 && bytes.Contains(respBody, []byte("no artifacts found")): // No artifact found - dtmetrics.PostDeploymentRecordNotSaved.Inc() + dtmetrics.PostDeploymentRecordNoAttestation.Inc() slog.Debug("no artifact attestation found, no record created", "attempt", attempt, "status_code", resp.StatusCode, ) - return &NoArtifactError{err: fmt.Errorf("no artifact found")} + return &NoArtifactError{} case resp.StatusCode >= 400 && resp.StatusCode < 500: + if resp.Header.Get("retry-after") != "" || resp.Header.Get("x-ratelimit-remaining") == "0" { + // rate limited — retry with backoff + // Could be 403 or 429 + dtmetrics.PostDeploymentRecordRateLimited.Inc() + slog.Warn("rate limited, retrying", + "attempt", attempt, + "status_code", resp.StatusCode, + "retry_after", resp.Header.Get("Retry-After"), + "resp_msg", string(respBody), + ) + lastErr = fmt.Errorf("rate limited, attempt %d", attempt) + continue + } // Don't retry non rate limiting client errors dtmetrics.PostDeploymentRecordClientError.Inc() slog.Warn("client error, aborting", "attempt", attempt, "status_code", resp.StatusCode, - "resp_msg", string(body), + "resp_msg", string(respBody), ) return &ClientError{err: fmt.Errorf("unexpected client err with status code %d", resp.StatusCode)} default: @@ -299,7 +306,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { slog.Debug("retriable error", "attempt", attempt, "status_code", resp.StatusCode, - "resp_msg", string(body), + "resp_msg", string(respBody), ) lastErr = fmt.Errorf("server error, attempt %d", attempt) } diff --git a/pkg/deploymentrecord/client_test.go b/pkg/deploymentrecord/client_test.go index 97e4bba..e3d5519 100644 --- a/pkg/deploymentrecord/client_test.go +++ b/pkg/deploymentrecord/client_test.go @@ -1,9 +1,18 @@ package deploymentrecord import ( + "context" + "errors" + "net/http" + "net/http/httptest" "strings" + "sync/atomic" "testing" "time" + + "github.com/github/deployment-tracker/pkg/dtmetrics" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" ) func TestNewClient(t *testing.T) { @@ -264,3 +273,331 @@ func TestValidOrgPattern(t *testing.T) { } } } + +// testRecord returns a minimal valid DeploymentRecord for testing. +func testRecord() *DeploymentRecord { + return NewDeploymentRecord( + "ghcr.io/my-org/my-image", + "sha256:abc123", + "v1.0.0", + "production", + "us-east-1", + "cluster-1", + StatusDeployed, + "my-deployment", + nil, + nil, + ) +} + +// allCounters returns all PostDeploymentRecord counters for snapshotting. +func allCounters() []prometheus.Counter { + return []prometheus.Counter{ + dtmetrics.PostDeploymentRecordOk, + dtmetrics.PostDeploymentRecordNoAttestation, + dtmetrics.PostDeploymentRecordRateLimited, + dtmetrics.PostDeploymentRecordSoftFail, + dtmetrics.PostDeploymentRecordHardFail, + dtmetrics.PostDeploymentRecordClientError, + } +} + +func TestPostOne(t *testing.T) { + tests := []struct { + name string + record *DeploymentRecord + retries int + handler http.HandlerFunc + wantErr bool + errType any // expected error type for errors.As + errContain string + wantOk float64 + wantNoAttestation float64 + wantRateLimited float64 + wantSoftFail float64 + wantHardFail float64 + wantClientError float64 + }{ + { + name: "success on 200", + record: testRecord(), + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }, + wantOk: 1, + }, + { + name: "success on 201", + record: testRecord(), + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusCreated) + }, + wantOk: 1, + }, + { + name: "nil record returns error", + record: nil, + wantErr: true, + handler: func(w http.ResponseWriter, r *http.Request) { + t.Fatal("server should not be called with nil record") + }, + errContain: "record cannot be nil", + }, + { + name: "404 with no artifacts found returns NoArtifactError", + record: testRecord(), + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"message":"no artifacts found"}`)) + }, + wantErr: true, + errType: &NoArtifactError{}, + wantNoAttestation: 1, + }, + { + name: "404 without no artifacts found returns ClientError", + record: testRecord(), + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"message":"not found"}`)) + }, + wantErr: true, + errType: &ClientError{}, + wantClientError: 1, + }, + { + name: "400 returns ClientError", + record: testRecord(), + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte("bad request")) + }, + wantErr: true, + errType: &ClientError{}, + wantClientError: 1, + }, + { + name: "403 forbidden returns ClientError", + record: testRecord(), + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte(`{"message":"forbidden"}`)) + }, + wantErr: true, + errType: &ClientError{}, + wantClientError: 1, + }, + { + name: "429 rate limit retries then fails", + record: testRecord(), + retries: 1, + handler: func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Retry-After", "1") + w.WriteHeader(http.StatusTooManyRequests) + }, + wantErr: true, + errContain: "all retries exhausted", + wantRateLimited: 2, + wantHardFail: 1, + }, + { + name: "403 with Retry-After header retries then fails", + record: testRecord(), + retries: 1, + handler: func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Retry-After", "1") + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte(`{"message":"rate limit"}`)) + }, + wantErr: true, + errContain: "all retries exhausted", + wantRateLimited: 2, + wantHardFail: 1, + }, + { + name: "403 with x-ratelimit-remaining 0 retries then fails", + record: testRecord(), + retries: 1, + handler: func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("X-Ratelimit-Remaining", "0") + w.WriteHeader(http.StatusForbidden) + }, + wantErr: true, + errContain: "all retries exhausted", + wantRateLimited: 2, + wantHardFail: 1, + }, + { + name: "500 server error retries then fails", + record: testRecord(), + retries: 1, + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte("internal error")) + }, + wantErr: true, + errContain: "all retries exhausted", + wantSoftFail: 2, + wantHardFail: 1, + }, + { + name: "500 then 200 succeeds on retry", + record: testRecord(), + retries: 2, + handler: func() http.HandlerFunc { + var count atomic.Int32 + return func(w http.ResponseWriter, r *http.Request) { + if count.Add(1) == 1 { + w.WriteHeader(http.StatusInternalServerError) + return + } + w.WriteHeader(http.StatusOK) + } + }(), + wantSoftFail: 1, + wantOk: 1, + }, + { + name: "429 then 200 succeeds on retry", + record: testRecord(), + retries: 2, + handler: func() http.HandlerFunc { + var count atomic.Int32 + return func(w http.ResponseWriter, r *http.Request) { + if count.Add(1) == 1 { + w.Header().Set("Retry-After", "0") + w.WriteHeader(http.StatusTooManyRequests) + return + } + w.WriteHeader(http.StatusOK) + } + }(), + wantRateLimited: 1, + wantOk: 1, + }, + { + name: "403 secondary rate limit then 200 succeeds on retry", + record: testRecord(), + retries: 2, + handler: func() http.HandlerFunc { + var count atomic.Int32 + return func(w http.ResponseWriter, r *http.Request) { + if count.Add(1) == 1 { + w.Header().Set("Retry-After", "0") + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte(`{"message":"secondary rate limit"}`)) + return + } + w.WriteHeader(http.StatusOK) + } + }(), + wantRateLimited: 1, + wantOk: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + srv := httptest.NewServer(tt.handler) + t.Cleanup(srv.Close) + + client, err := NewClient(srv.URL, "test-org", WithRetries(tt.retries)) + if err != nil { + t.Fatalf("failed to create client: %v", err) + } + + // Snapshot all counters before the call + counters := allCounters() + snapshots := make([]float64, len(counters)) + for i, c := range counters { + snapshots[i] = testutil.ToFloat64(c) + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + + err = client.PostOne(ctx, tt.record) + + if tt.wantErr { + if err == nil { + t.Fatal("expected error, got nil") + } + if tt.errType != nil { + target := tt.errType + switch target.(type) { + case *NoArtifactError: + var e *NoArtifactError + if !errors.As(err, &e) { + t.Errorf("expected NoArtifactError, got %T: %v", err, err) + } + case *ClientError: + var e *ClientError + if !errors.As(err, &e) { + t.Errorf("expected ClientError, got %T: %v", err, err) + } + } + } + if tt.errContain != "" && !strings.Contains(err.Error(), tt.errContain) { + t.Errorf("error %q should contain %q", err.Error(), tt.errContain) + } + } else if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Assert all metric deltas + wantDeltas := []float64{ + tt.wantOk, + tt.wantNoAttestation, + tt.wantRateLimited, + tt.wantSoftFail, + tt.wantHardFail, + tt.wantClientError, + } + names := []string{ + "PostDeploymentRecordOk", + "PostDeploymentRecordNoAttestation", + "PostDeploymentRecordRateLimited", + "PostDeploymentRecordSoftFail", + "PostDeploymentRecordHardFail", + "PostDeploymentRecordClientError", + } + for i, c := range counters { + got := testutil.ToFloat64(c) - snapshots[i] + if got != wantDeltas[i] { + t.Errorf("%s delta = %v, want %v", names[i], got, wantDeltas[i]) + } + } + }) + } +} + +func TestPostOneSendsCorrectRequest(t *testing.T) { + record := testRecord() + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + t.Errorf("method = %s, want POST", r.Method) + } + if got := r.URL.Path; got != "/orgs/test-org/artifacts/metadata/deployment-record" { + t.Errorf("path = %s, want /orgs/test-org/artifacts/metadata/deployment-record", got) + } + if got := r.Header.Get("Content-Type"); got != "application/json" { + t.Errorf("Content-Type = %s, want application/json", got) + } + if got := r.Header.Get("Authorization"); got != "Bearer test-token" { + t.Errorf("Authorization = %s, want Bearer test-token", got) + } + w.WriteHeader(http.StatusOK) + })) + t.Cleanup(srv.Close) + + client, err := NewClient(srv.URL, "test-org", + WithAPIToken("test-token")) + if err != nil { + t.Fatalf("failed to create client: %v", err) + } + + if err := client.PostOne(context.Background(), record); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} diff --git a/pkg/dtmetrics/prom.go b/pkg/dtmetrics/prom.go index a2e6071..1585671 100644 --- a/pkg/dtmetrics/prom.go +++ b/pkg/dtmetrics/prom.go @@ -50,10 +50,18 @@ var ( ) //nolint: revive - PostDeploymentRecordNotSaved = promauto.NewCounter( + PostDeploymentRecordNoAttestation = promauto.NewCounter( prometheus.CounterOpts{ - Name: "deptracker_post_record_not_saved", - Help: "The total number of successful posts with no attestation that result in no record creation", + Name: "deptracker_post_record_no_attestation", + Help: "The total number of successful posts for container digest with no matching attestation for the org", + }, + ) + + //nolint: revive + PostDeploymentRecordRateLimited = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "deptracker_post_record_rate_limited", + Help: "The total number of post failures due to rate limits", }, ) From e29627a27944b9dc4b9a4a44f84c7901900c5ee6 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Mon, 16 Mar 2026 17:16:42 -0400 Subject: [PATCH 3/7] fix lint findings --- README.md | 4 ++-- pkg/deploymentrecord/client.go | 5 +---- pkg/deploymentrecord/client_test.go | 31 ++++++++++++++++------------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index cc1b354..999350c 100644 --- a/README.md +++ b/README.md @@ -184,9 +184,9 @@ The metrics exposed beyond the default Prometheus metrics are: outgoing HTTP POST to upload the deployment record. * `deptracker_post_record_ok`: the number of successful deployment record uploads. -* `deptracker_post_record_rate_limited`: the number of post attempts +* `deptracker_post_record_rate_limited`: the number of post attempts that were rate limited. -* `deptracker_post_record_no_attestation`: the number of successful +* `deptracker_post_record_no_attestation`: the number of successful posts for container digest with no matching attestation for the org. * `deptracker_post_record_soft_fail`: the number of recoverable failed attempts to upload the deployment record. diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index 231d223..ad980fb 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -166,9 +166,6 @@ type NoArtifactError struct { } func (n *NoArtifactError) Error() string { - if n.err == nil { - return "no artifact found" - } return fmt.Sprintf("no artifact found: %s", n.err.Error()) } @@ -277,7 +274,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { "attempt", attempt, "status_code", resp.StatusCode, ) - return &NoArtifactError{} + return &NoArtifactError{err: fmt.Errorf("no attestation found for %s", record.Digest)} case resp.StatusCode >= 400 && resp.StatusCode < 500: if resp.Header.Get("retry-after") != "" || resp.Header.Get("x-ratelimit-remaining") == "0" { // rate limited — retry with backoff diff --git a/pkg/deploymentrecord/client_test.go b/pkg/deploymentrecord/client_test.go index e3d5519..3bc49d8 100644 --- a/pkg/deploymentrecord/client_test.go +++ b/pkg/deploymentrecord/client_test.go @@ -321,7 +321,7 @@ func TestPostOne(t *testing.T) { { name: "success on 200", record: testRecord(), - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) }, wantOk: 1, @@ -329,7 +329,7 @@ func TestPostOne(t *testing.T) { { name: "success on 201", record: testRecord(), - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusCreated) }, wantOk: 1, @@ -338,7 +338,7 @@ func TestPostOne(t *testing.T) { name: "nil record returns error", record: nil, wantErr: true, - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(_ http.ResponseWriter, _ *http.Request) { t.Fatal("server should not be called with nil record") }, errContain: "record cannot be nil", @@ -346,18 +346,19 @@ func TestPostOne(t *testing.T) { { name: "404 with no artifacts found returns NoArtifactError", record: testRecord(), - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusNotFound) _, _ = w.Write([]byte(`{"message":"no artifacts found"}`)) }, wantErr: true, errType: &NoArtifactError{}, + errContain: "sha256:abc123", wantNoAttestation: 1, }, { name: "404 without no artifacts found returns ClientError", record: testRecord(), - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusNotFound) _, _ = w.Write([]byte(`{"message":"not found"}`)) }, @@ -368,7 +369,7 @@ func TestPostOne(t *testing.T) { { name: "400 returns ClientError", record: testRecord(), - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusBadRequest) _, _ = w.Write([]byte("bad request")) }, @@ -379,7 +380,7 @@ func TestPostOne(t *testing.T) { { name: "403 forbidden returns ClientError", record: testRecord(), - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusForbidden) _, _ = w.Write([]byte(`{"message":"forbidden"}`)) }, @@ -391,7 +392,7 @@ func TestPostOne(t *testing.T) { name: "429 rate limit retries then fails", record: testRecord(), retries: 1, - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(w http.ResponseWriter, _ *http.Request) { w.Header().Set("Retry-After", "1") w.WriteHeader(http.StatusTooManyRequests) }, @@ -404,7 +405,7 @@ func TestPostOne(t *testing.T) { name: "403 with Retry-After header retries then fails", record: testRecord(), retries: 1, - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(w http.ResponseWriter, _ *http.Request) { w.Header().Set("Retry-After", "1") w.WriteHeader(http.StatusForbidden) _, _ = w.Write([]byte(`{"message":"rate limit"}`)) @@ -418,7 +419,7 @@ func TestPostOne(t *testing.T) { name: "403 with x-ratelimit-remaining 0 retries then fails", record: testRecord(), retries: 1, - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(w http.ResponseWriter, _ *http.Request) { w.Header().Set("X-Ratelimit-Remaining", "0") w.WriteHeader(http.StatusForbidden) }, @@ -431,7 +432,7 @@ func TestPostOne(t *testing.T) { name: "500 server error retries then fails", record: testRecord(), retries: 1, - handler: func(w http.ResponseWriter, r *http.Request) { + handler: func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusInternalServerError) _, _ = w.Write([]byte("internal error")) }, @@ -446,7 +447,7 @@ func TestPostOne(t *testing.T) { retries: 2, handler: func() http.HandlerFunc { var count atomic.Int32 - return func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, _ *http.Request) { if count.Add(1) == 1 { w.WriteHeader(http.StatusInternalServerError) return @@ -463,7 +464,7 @@ func TestPostOne(t *testing.T) { retries: 2, handler: func() http.HandlerFunc { var count atomic.Int32 - return func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, _ *http.Request) { if count.Add(1) == 1 { w.Header().Set("Retry-After", "0") w.WriteHeader(http.StatusTooManyRequests) @@ -481,7 +482,7 @@ func TestPostOne(t *testing.T) { retries: 2, handler: func() http.HandlerFunc { var count atomic.Int32 - return func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, _ *http.Request) { if count.Add(1) == 1 { w.Header().Set("Retry-After", "0") w.WriteHeader(http.StatusForbidden) @@ -535,6 +536,8 @@ func TestPostOne(t *testing.T) { if !errors.As(err, &e) { t.Errorf("expected ClientError, got %T: %v", err, err) } + default: + t.Fatalf("unexpected error type in test: %T", target) } } if tt.errContain != "" && !strings.Contains(err.Error(), tt.errContain) { From 6b975cd29b9a64a4c311ed48b9f4c6568373167f Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Mon, 16 Mar 2026 17:23:57 -0400 Subject: [PATCH 4/7] add record digest to no artifact debug message, add container name to log messages --- pkg/deploymentrecord/client.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index ad980fb..028c6ba 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -273,6 +273,8 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { slog.Debug("no artifact attestation found, no record created", "attempt", attempt, "status_code", resp.StatusCode, + "container_name", record.Name, + "digest", record.Digest, ) return &NoArtifactError{err: fmt.Errorf("no attestation found for %s", record.Digest)} case resp.StatusCode >= 400 && resp.StatusCode < 500: @@ -284,6 +286,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { "attempt", attempt, "status_code", resp.StatusCode, "retry_after", resp.Header.Get("Retry-After"), + "container_name", record.Name, "resp_msg", string(respBody), ) lastErr = fmt.Errorf("rate limited, attempt %d", attempt) @@ -294,6 +297,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { slog.Warn("client error, aborting", "attempt", attempt, "status_code", resp.StatusCode, + "container_name", record.Name, "resp_msg", string(respBody), ) return &ClientError{err: fmt.Errorf("unexpected client err with status code %d", resp.StatusCode)} @@ -303,6 +307,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { slog.Debug("retriable error", "attempt", attempt, "status_code", resp.StatusCode, + "container_name", record.Name, "resp_msg", string(respBody), ) lastErr = fmt.Errorf("server error, attempt %d", attempt) @@ -312,6 +317,8 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { dtmetrics.PostDeploymentRecordHardFail.Inc() slog.Error("all retries exhausted", "count", c.retries, - "error", lastErr) + "error", lastErr, + "container_name", record.Name, + ) return fmt.Errorf("all retries exhausted: %w", lastErr) } From bf817f804f6b3d84f56af0be865b3d294232294c Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Mon, 16 Mar 2026 17:41:43 -0400 Subject: [PATCH 5/7] improve documentation wording --- README.md | 5 +++-- pkg/deploymentrecord/client.go | 2 +- pkg/dtmetrics/prom.go | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 999350c..0f86d4e 100644 --- a/README.md +++ b/README.md @@ -186,8 +186,9 @@ The metrics exposed beyond the default Prometheus metrics are: record uploads. * `deptracker_post_record_rate_limited`: the number of post attempts that were rate limited. -* `deptracker_post_record_no_attestation`: the number of successful - posts for container digest with no matching attestation for the org. +* `deptracker_post_record_no_attestation`: the number of attempts + that resulted in no matching attestation for the container digest + (404 "no artifacts found" responses). * `deptracker_post_record_soft_fail`: the number of recoverable failed attempts to upload the deployment record. * `deptracker_post_record_hard_fail`: the number of failures to diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index 028c6ba..384c701 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -160,7 +160,7 @@ func (c *ClientError) Unwrap() error { return c.err } -// NoArtifactError represents a 404 client response where no artifact was found. +// NoArtifactError represents a 404 client response whose body indicates "no artifacts found". type NoArtifactError struct { err error } diff --git a/pkg/dtmetrics/prom.go b/pkg/dtmetrics/prom.go index 1585671..79c01eb 100644 --- a/pkg/dtmetrics/prom.go +++ b/pkg/dtmetrics/prom.go @@ -53,7 +53,7 @@ var ( PostDeploymentRecordNoAttestation = promauto.NewCounter( prometheus.CounterOpts{ Name: "deptracker_post_record_no_attestation", - Help: "The total number of successful posts for container digest with no matching attestation for the org", + Help: "The total number of post attempts that resulted in no matching attestation for the container digest (404 'no artifacts found' responses)", }, ) @@ -61,7 +61,7 @@ var ( PostDeploymentRecordRateLimited = promauto.NewCounter( prometheus.CounterOpts{ Name: "deptracker_post_record_rate_limited", - Help: "The total number of post failures due to rate limits", + Help: "The total number of post attempts that were rate limited", }, ) From ca8c6f477376b1ea44666e932b752a931d4929c5 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Tue, 17 Mar 2026 10:49:35 -0400 Subject: [PATCH 6/7] address comments --- pkg/deploymentrecord/client.go | 4 ++-- pkg/deploymentrecord/client_test.go | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index 384c701..89158bc 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -267,7 +267,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { _ = resp.Body.Close() switch { - case resp.StatusCode == 404 && bytes.Contains(respBody, []byte("no artifacts found")): + case resp.StatusCode == 404: // No artifact found dtmetrics.PostDeploymentRecordNoAttestation.Inc() slog.Debug("no artifact attestation found, no record created", @@ -279,7 +279,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { return &NoArtifactError{err: fmt.Errorf("no attestation found for %s", record.Digest)} case resp.StatusCode >= 400 && resp.StatusCode < 500: if resp.Header.Get("retry-after") != "" || resp.Header.Get("x-ratelimit-remaining") == "0" { - // rate limited — retry with backoff + // Rate limited — retry with backoff // Could be 403 or 429 dtmetrics.PostDeploymentRecordRateLimited.Inc() slog.Warn("rate limited, retrying", diff --git a/pkg/deploymentrecord/client_test.go b/pkg/deploymentrecord/client_test.go index 3bc49d8..d72e54f 100644 --- a/pkg/deploymentrecord/client_test.go +++ b/pkg/deploymentrecord/client_test.go @@ -356,33 +356,33 @@ func TestPostOne(t *testing.T) { wantNoAttestation: 1, }, { - name: "404 without no artifacts found returns ClientError", + name: "400 returns ClientError", record: testRecord(), handler: func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusNotFound) - _, _ = w.Write([]byte(`{"message":"not found"}`)) + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte("bad request")) }, wantErr: true, errType: &ClientError{}, wantClientError: 1, }, { - name: "400 returns ClientError", + name: "403 forbidden returns ClientError", record: testRecord(), handler: func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusBadRequest) - _, _ = w.Write([]byte("bad request")) + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte(`{"message":"forbidden"}`)) }, wantErr: true, errType: &ClientError{}, wantClientError: 1, }, { - name: "403 forbidden returns ClientError", + name: "422 invalid body returns ClientError", record: testRecord(), handler: func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusForbidden) - _, _ = w.Write([]byte(`{"message":"forbidden"}`)) + w.WriteHeader(http.StatusUnprocessableEntity) + _, _ = w.Write([]byte("invalid body")) }, wantErr: true, errType: &ClientError{}, From 9df0f358c272810c02569f39f06b7aa0a1daf4c3 Mon Sep 17 00:00:00 2001 From: Eric Pickard Date: Tue, 17 Mar 2026 10:58:41 -0400 Subject: [PATCH 7/7] add resp_msg to 404 logs --- pkg/deploymentrecord/client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index 89158bc..fa6286a 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -274,6 +274,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { "attempt", attempt, "status_code", resp.StatusCode, "container_name", record.Name, + "resp_msg", string(respBody), "digest", record.Digest, ) return &NoArtifactError{err: fmt.Errorf("no attestation found for %s", record.Digest)}