forkjo/services/packages/container/cleanup_sha256.go
Earl Warren 00749b3a8f fix: referenced sha256:* container images may be deleted
The inventory of the sha256:* images and the manifest index that
reference them is incomplete because it does not take into account any
image older than the expiration limit. As a result some sha256:* will
be considered orphaned although they are referenced from a manifest
index that was created more recently than the expiration limit.

There must not be any filtering based on the creation time when
building the inventory. The expiration limit must only be taken into
account when deleting orphaned images: those that are more recent than
the expiration limit must not be deleted.

This limit is specially important because it protects against a race
between a cleanup task and an ongoing mirroring task. A mirroring
task (such as skopeo sync) will first upload sha256:* images and then
create the corresponding manifest index. If a cleanup races against
it, the sha256:* images that are not yet referenced will be deleted
without skopeo noticing and the published index manifest that happens
at a later time will contain references to non-existent images.

(cherry picked from commit 0a5fd7fdb8)
2024-09-30 16:46:11 +00:00

158 lines
5.7 KiB
Go

// Copyright 2024 The Forgejo Authors. All rights reserved.
// SPDX-License-Identifier: GPL-3.0-or-later
package container
import (
"context"
"strings"
"time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/packages"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
container_module "code.gitea.io/gitea/modules/packages/container"
"code.gitea.io/gitea/modules/timeutil"
)
var (
SHA256BatchSize = 500
SHA256Log = "cleanup dangling images with a sha256:* version"
SHA256LogStart = "Start to " + SHA256Log
SHA256LogFinish = "Finished to " + SHA256Log
)
func CleanupSHA256(ctx context.Context, olderThan time.Duration) error {
log.Info(SHA256LogStart)
err := cleanupSHA256(ctx, olderThan)
log.Info(SHA256LogFinish)
return err
}
func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
ctx, committer, err := db.TxContext(outerCtx)
if err != nil {
return err
}
defer committer.Close()
foundAtLeastOneSHA256 := false
type packageVersion struct {
id int64
created timeutil.TimeStamp
}
shaToPackageVersion := make(map[string]packageVersion, 100)
knownSHA := make(map[string]any, 100)
// compute before making the inventory to not race against ongoing
// image creations
old := timeutil.TimeStamp(time.Now().Add(-olderThan).Unix())
log.Debug("Look for all package_version.version that start with sha256:")
// Iterate over all container versions in ascending order and store
// in shaToPackageVersion all versions with a sha256: prefix. If an index
// manifest is found, the sha256: digest it references are removed
// from shaToPackageVersion. If the sha256: digest found in an index
// manifest is not already in shaToPackageVersion, it is stored in
// knownSHA to be dealt with later.
//
// Although it is theoretically possible that a sha256: is uploaded
// after the index manifest that references it, this is not the
// normal order of operations. First the sha256: version is uploaded
// and then the index manifest. When the iteration completes,
// knownSHA will therefore be empty most of the time and
// shaToPackageVersion will only contain unreferenced sha256: versions.
if err := db.GetEngine(ctx).
Select("`package_version`.`id`, `package_version`.`created_unix`, `package_version`.`lower_version`, `package_version`.`metadata_json`").
Join("INNER", "`package`", "`package`.`id` = `package_version`.`package_id`").
Where("`package`.`type` = ?", packages.TypeContainer).
OrderBy("`package_version`.`id` ASC").
Iterate(new(packages.PackageVersion), func(_ int, bean any) error {
v := bean.(*packages.PackageVersion)
if strings.HasPrefix(v.LowerVersion, "sha256:") {
shaToPackageVersion[v.LowerVersion] = packageVersion{id: v.ID, created: v.CreatedUnix}
foundAtLeastOneSHA256 = true
} else if strings.Contains(v.MetadataJSON, `"manifests":[{`) {
var metadata container_module.Metadata
if err := json.Unmarshal([]byte(v.MetadataJSON), &metadata); err != nil {
log.Error("package_version.id = %d package_version.metadata_json %s is not a JSON string containing valid metadata. It was ignored but it is an inconsistency in the database that should be looked at. %v", v.ID, v.MetadataJSON, err)
return nil
}
for _, manifest := range metadata.Manifests {
if _, ok := shaToPackageVersion[manifest.Digest]; ok {
delete(shaToPackageVersion, manifest.Digest)
} else {
knownSHA[manifest.Digest] = true
}
}
}
return nil
}); err != nil {
return err
}
for sha := range knownSHA {
delete(shaToPackageVersion, sha)
}
if len(shaToPackageVersion) == 0 {
if foundAtLeastOneSHA256 {
log.Debug("All container images with a version matching sha256:* are referenced by an index manifest")
} else {
log.Debug("There are no container images with a version matching sha256:*")
}
log.Info("Nothing to cleanup")
return nil
}
found := len(shaToPackageVersion)
log.Warn("%d container image(s) with a version matching sha256:* are not referenced by an index manifest", found)
log.Debug("Deleting unreferenced image versions from `package_version`, `package_file` and `package_property` (%d at a time)", SHA256BatchSize)
packageVersionIDs := make([]int64, 0, SHA256BatchSize)
tooYoung := 0
for _, p := range shaToPackageVersion {
if p.created < old {
packageVersionIDs = append(packageVersionIDs, p.id)
} else {
tooYoung++
}
}
if tooYoung > 0 {
log.Warn("%d out of %d container image(s) are not deleted because they were created less than %v ago", tooYoung, found, olderThan)
}
for len(packageVersionIDs) > 0 {
upper := min(len(packageVersionIDs), SHA256BatchSize)
versionIDs := packageVersionIDs[0:upper]
var packageFileIDs []int64
if err := db.GetEngine(ctx).Select("id").Table("package_file").In("version_id", versionIDs).Find(&packageFileIDs); err != nil {
return err
}
log.Info("Removing %d entries from `package_file` and `package_property`", len(packageFileIDs))
if _, err := db.GetEngine(ctx).In("id", packageFileIDs).Delete(&packages.PackageFile{}); err != nil {
return err
}
if _, err := db.GetEngine(ctx).In("ref_id", packageFileIDs).And("ref_type = ?", packages.PropertyTypeFile).Delete(&packages.PackageProperty{}); err != nil {
return err
}
log.Info("Removing %d entries from `package_version` and `package_property`", upper)
if _, err := db.GetEngine(ctx).In("id", versionIDs).Delete(&packages.PackageVersion{}); err != nil {
return err
}
if _, err := db.GetEngine(ctx).In("ref_id", versionIDs).And("ref_type = ?", packages.PropertyTypeVersion).Delete(&packages.PackageProperty{}); err != nil {
return err
}
packageVersionIDs = packageVersionIDs[upper:]
}
return committer.Commit()
}