Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions deployments/k8s/dev/docker-desktop-oracle/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ patches:
target:
kind: Deployment
name: tmi-server
- path: ../docker-desktop/patches/redis-pullpolicy.yaml
target:
kind: Deployment
name: redis
- path: ../docker-desktop/patches/extractor-image.yaml
target:
group: tmi.dev
Expand Down
4 changes: 4 additions & 0 deletions deployments/k8s/dev/docker-desktop/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ patches:
target:
kind: Deployment
name: tmi-server
- path: patches/redis-pullpolicy.yaml
target:
kind: Deployment
name: redis
- path: patches/extractor-image.yaml
target:
group: tmi.dev
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# docker-desktop pins redis to IfNotPresent so the base image pre-imported into
# the node's containerd (deploy.py build_and_push, #517) is used and the node
# never re-pulls cgr.dev/chainguard/redis:latest (a :latest tag would otherwise
# default to Always). redis.yml has no imagePullPolicy field, so this is an `add`,
# not a `replace` (contrast server-pullpolicy.yaml, whose manifest sets Always).
# k3s does not use this patch: it remaps redis to redis:7-alpine (Docker Hub).
- op: add
path: /spec/template/spec/containers/0/imagePullPolicy
value: IfNotPresent
4 changes: 4 additions & 0 deletions deployments/k8s/dev/docker-desktop/postgres.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ spec:
containers:
- name: postgres
image: cgr.dev/chainguard/postgres:latest
# The image is pre-imported into the node's containerd by deploy.py
# (build_and_push), so use the imported copy and never re-pull from
# cgr.dev (a :latest tag would otherwise default to Always). See #517.
imagePullPolicy: IfNotPresent
envFrom:
- secretRef: { name: tmi-postgres }
env:
Expand Down
17 changes: 7 additions & 10 deletions scripts/devenv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
cluster up|down the cluster target

Global: --db postgres|oracle (default postgres), --cluster docker-desktop|k3s
(default docker-desktop), --no-workers, --yes
(default docker-desktop), --yes
"""
from __future__ import annotations

Expand All @@ -39,7 +39,7 @@
def cmd_up(args) -> None:
cluster.up(cluster=args.cluster)
deploy.start(db=args.db, cluster_target=args.cluster,
no_workers=args.no_workers, skip_context_guard=args.yes)
skip_context_guard=args.yes)


def cmd_down(args) -> None:
Expand All @@ -50,14 +50,14 @@ def cmd_down(args) -> None:

def cmd_restart(args) -> None:
deploy.restart(db=args.db, cluster_target=args.cluster,
no_workers=args.no_workers, skip_context_guard=args.yes)
skip_context_guard=args.yes)


def cmd_reset(args) -> None:
log_info("dev-reset: redeploying the in-cluster stack (keeping cluster + db data)")
deploy.teardown(db=args.db)
deploy.start(db=args.db, cluster_target=args.cluster,
no_workers=args.no_workers, skip_context_guard=args.yes)
skip_context_guard=args.yes)
log_success("dev-reset complete")


Expand All @@ -70,7 +70,7 @@ def cmd_nuke(args) -> None:
deploy.remove_local_images(args.db, cluster_target=args.cluster)
_clean_logs_and_files()
deploy.start(db=args.db, cluster_target=args.cluster,
no_workers=args.no_workers, skip_context_guard=args.yes)
skip_context_guard=args.yes)
log_success(f"dev-nuke complete (fresh {args.cluster} environment up)")


Expand All @@ -86,7 +86,7 @@ def cmd_status(args) -> None:

def cmd_deploy(args) -> None:
deploy.start(db=args.db, cluster_target=args.cluster,
no_workers=args.no_workers, skip_context_guard=args.yes)
skip_context_guard=args.yes)


def cmd_logs(args) -> None:
Expand All @@ -109,7 +109,7 @@ def _add_global_options(
*,
is_subparser: bool = False,
) -> None:
"""Add --db/--no-workers/--yes to a parser (top-level or subparser).
"""Add --db/--cluster/--yes to a parser (top-level or subparser).

Defined as a helper so the exact same options are added to both the
top-level parser and each subparser, enabling both orderings:
Expand All @@ -127,15 +127,12 @@ def _add_global_options(
default=argparse.SUPPRESS)
parser.add_argument("--cluster", choices=["k3s", "docker-desktop"],
default=argparse.SUPPRESS)
parser.add_argument("--no-workers", action="store_true", dest="no_workers",
default=argparse.SUPPRESS)
parser.add_argument("--yes", action="store_true", default=argparse.SUPPRESS,
help="Skip the local-kube-context safety check")
else:
parser.add_argument("--db", choices=["postgres", "oracle"], default="postgres")
parser.add_argument("--cluster", choices=["k3s", "docker-desktop"], default="docker-desktop",
help="Kube cluster target: docker-desktop (default) or k3s (remote)")
parser.add_argument("--no-workers", action="store_true", dest="no_workers")
parser.add_argument("--yes", action="store_true",
help="Skip the local-kube-context safety check")

Expand Down
88 changes: 55 additions & 33 deletions scripts/lib/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,19 @@
# low-throughput from the host (test setup only), so a port-forward is fine here.
REDIS_PORT_FORWARD_PID = "/tmp/tmi-dev-redis-portforward.pid"

# Public base images the docker-desktop node would otherwise pull from cgr.dev on
# first bring-up. Docker Desktop Kubernetes' containerd pulls these independently
# of the host Docker daemon, and that first pull occasionally fails with a
# transient EOF from cgr.dev (#517), leaving postgres/redis in ErrImagePull. We
# instead `docker pull` them on the host and import them into the node's
# containerd alongside the tmi-* images, then pin imagePullPolicy: IfNotPresent on
# the postgres/redis manifests so the imported copy is used and no cgr.dev pull is
# attempted. KEEP THESE IN SYNC with the image refs in
# deployments/k8s/dev/docker-desktop/postgres.yml and deployments/k8s/dev/redis.yml.
DD_POSTGRES_IMAGE = "cgr.dev/chainguard/postgres:latest"
DD_REDIS_IMAGE = "cgr.dev/chainguard/redis:latest"
DD_BASE_IMAGES = (DD_POSTGRES_IMAGE, DD_REDIS_IMAGE)


# ---------------------------------------------------------------------------
# Pure helpers
Expand Down Expand Up @@ -95,15 +108,12 @@ def overlay_dir_for(db: str, cluster_target: str = "docker-desktop") -> str:
raise ValueError(f"unknown cluster target: {cluster_target!r}")


def _no_workers_files(db: str) -> tuple[str, ...]:
"""Return the per-file manifest list for --no-workers mode.

controller.yml and redis.yml are shared across DB flavors; only the server
manifest filename differs (server.yml for postgres, server-oracle.yml for oracle).
All files physically live in DEV_DIR.
"""
server_file = "server-oracle.yml" if db == "oracle" else "server.yml"
return ("controller.yml", "redis.yml", server_file)
def dd_base_images_for(db: str) -> tuple[str, ...]:
"""Public base images to pre-import into the docker-desktop node for the given
DB flavor (#517). Redis is always deployed; in-cluster Postgres is deployed
only for the postgres flavor (oracle uses an external ADB and brings up no
Postgres pod), so skip the Postgres base image there."""
return (DD_REDIS_IMAGE,) if db == "oracle" else DD_BASE_IMAGES


def content_hash(text: str) -> str:
Expand All @@ -128,8 +138,20 @@ def import_image_to_node(ref: str, node: str) -> None:
log_info(f"Importing {ref} -> {node} containerd (k8s.io)")
saver = subprocess.Popen(save_cmd, stdout=subprocess.PIPE)
try:
importer = subprocess.Popen(import_cmd, stdin=saver.stdout)
saver.stdout.close() # allow saver to receive SIGPIPE if importer exits
try:
importer = subprocess.Popen(import_cmd, stdin=saver.stdout)
except BaseException:
# The importer never started, so nobody will drain saver's stdout.
# Kill saver before the finally's wait() so it can't block forever
# writing into a pipe with no reader, then re-raise.
saver.kill()
raise
finally:
# Release the parent's copy of the pipe's read end unconditionally,
# whether or not the importer Popen raised. This both lets saver
# receive SIGPIPE if the importer exits and guarantees saver.wait()
# below cannot deadlock on a pipe the parent is still holding open.
saver.stdout.close()
importer.communicate()
if importer.returncode != 0:
log_error(f"ctr import failed for {ref} (exit {importer.returncode})")
Expand Down Expand Up @@ -383,6 +405,14 @@ def build_and_push(db: str, cluster_target: str = "docker-desktop") -> None:
run_cmd(["docker", "push", ref])

if cluster_target == "docker-desktop":
# Import the public postgres/redis base images too, so first bring-up
# doesn't depend on the DD node's containerd reaching cgr.dev (#517).
# Pull on the host first (respects the host Docker's proxy/cache), then
# stream into the node's containerd exactly like the tmi-* images.
for base in dd_base_images_for(db):
log_info(f"Pulling base image {base}")
run_cmd(["docker", "pull", base])
import_image_to_node(base, cluster.DD_NODE)
log_success("All images built and imported into the docker-desktop node")
else:
log_success("All images built and pushed to local registry")
Expand Down Expand Up @@ -503,28 +533,20 @@ def create_oracle_db_secret() -> None:
log_success("oracle DB connection delivered as Secret/tmi-oracle-db")


def apply_overlay(no_workers: bool, db: str, cluster_target: str = "docker-desktop") -> None:
def apply_overlay(db: str, cluster_target: str = "docker-desktop") -> None:
"""Apply the dev overlay.

When --no-workers: apply the three core manifests individually to avoid
kustomize referencing the component CR files (which include TMIComponent
resources from ../platform/components/).

Otherwise: render the full kustomize overlay with --load-restrictor
LoadRestrictionsNone (needed because the overlay references files outside
its own directory tree, e.g. ../../platform/components/).
Render the full kustomize overlay with --load-restrictor LoadRestrictionsNone
(needed because the overlay references files outside its own directory tree,
e.g. ../../platform/components/).
"""
project_root = get_project_root()
if no_workers:
for f in _no_workers_files(db):
kubectl(["apply", "-f", str(project_root / DEV_DIR / f)])
else:
rendered = run_cmd(
["kubectl", "kustomize", "--load-restrictor", "LoadRestrictionsNone",
str(project_root / overlay_dir_for(db, cluster_target))],
capture=True,
).stdout
kubectl(["apply", "-f", "-"], input_text=rendered)
rendered = run_cmd(
["kubectl", "kustomize", "--load-restrictor", "LoadRestrictionsNone",
str(project_root / overlay_dir_for(db, cluster_target))],
capture=True,
).stdout
kubectl(["apply", "-f", "-"], input_text=rendered)


def server_rollout_timeout(db: str) -> str:
Expand Down Expand Up @@ -657,7 +679,7 @@ def server_http_status() -> tuple[bool, str]:
# Orchestration entry points
# ---------------------------------------------------------------------------

def start(*, db: str, cluster_target: str = "docker-desktop", no_workers: bool = False,
def start(*, db: str, cluster_target: str = "docker-desktop",
skip_context_guard: bool = False) -> None:
"""Build images, deploy all components, wait for readiness, and start port-forwards."""
_preflight()
Expand All @@ -678,7 +700,7 @@ def start(*, db: str, cluster_target: str = "docker-desktop", no_workers: bool =
if db == "oracle":
create_oracle_wallet_secret()
create_oracle_db_secret()
apply_overlay(no_workers, db, cluster_target)
apply_overlay(db, cluster_target)
# `kubectl apply` of an unchanged Deployment spec does not roll a new pod, so
# a freshly-built :dev image (same tag) would not be picked up, and a pod
# stuck in CrashLoopBackOff from a prior transient outage (e.g. the host DB
Expand All @@ -691,7 +713,7 @@ def start(*, db: str, cluster_target: str = "docker-desktop", no_workers: bool =
wait_and_forward(db, cluster_target)


def restart(*, db: str, cluster_target: str = "docker-desktop", no_workers: bool = False,
def restart(*, db: str, cluster_target: str = "docker-desktop",
skip_context_guard: bool = False) -> None:
"""Rebuild the server image, re-deliver config, and roll the server deployment."""
_preflight()
Expand All @@ -704,7 +726,7 @@ def restart(*, db: str, cluster_target: str = "docker-desktop", no_workers: bool
if db == "oracle":
create_oracle_wallet_secret()
create_oracle_db_secret()
apply_overlay(no_workers, db, cluster_target)
apply_overlay(db, cluster_target)
kubectl(["-n", NS, "rollout", "restart", "deploy/tmi-server"])
kubectl(["-n", NS, "rollout", "status", "deploy/tmi-server", f"--timeout={server_rollout_timeout(db)}"])
start_redis_port_forward()
Expand Down
66 changes: 56 additions & 10 deletions scripts/lib/tests/test_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import unittest
from pathlib import Path
from unittest import mock

sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
import deploy # noqa: E402
Expand Down Expand Up @@ -74,18 +75,36 @@ def test_k3s_rewrites_url_host_to_postgres_service(self):
self.assertIn("@postgres:5432/tmi_dev", out)


class TestNoWorkersFiles(unittest.TestCase):
def test_no_workers_files_oracle_uses_oracle_server(self):
self.assertIn("server-oracle.yml", deploy._no_workers_files("oracle"))
class TestDdBaseImages(unittest.TestCase):
"""The docker-desktop base images pre-imported to dodge the cgr.dev first-run
pull flake (#517) must stay in sync with the refs in the manifests that
reference them, or the pre-imported copy won't match what the pods request."""

def test_no_workers_files_postgres_uses_plain_server(self):
self.assertIn("server.yml", deploy._no_workers_files("postgres"))
def test_includes_postgres_and_redis(self):
self.assertIn("cgr.dev/chainguard/postgres:latest", deploy.DD_BASE_IMAGES)
self.assertIn("cgr.dev/chainguard/redis:latest", deploy.DD_BASE_IMAGES)

def test_no_workers_files_includes_controller_and_redis(self):
for db in ("postgres", "oracle"):
files = deploy._no_workers_files(db)
self.assertIn("controller.yml", files)
self.assertIn("redis.yml", files)
def test_postgres_ref_matches_docker_desktop_manifest(self):
text = (_DEV_DIR / "docker-desktop" / "postgres.yml").read_text()
self.assertIn("cgr.dev/chainguard/postgres:latest", deploy.DD_BASE_IMAGES)
self.assertIn("image: cgr.dev/chainguard/postgres:latest", text)

def test_redis_ref_matches_shared_manifest(self):
text = (_DEV_DIR / "redis.yml").read_text()
self.assertIn("cgr.dev/chainguard/redis:latest", deploy.DD_BASE_IMAGES)
self.assertIn("image: cgr.dev/chainguard/redis:latest", text)

def test_postgres_flavor_imports_both_base_images(self):
imgs = deploy.dd_base_images_for("postgres")
self.assertIn(deploy.DD_POSTGRES_IMAGE, imgs)
self.assertIn(deploy.DD_REDIS_IMAGE, imgs)

def test_oracle_flavor_skips_postgres_base_image(self):
# Oracle uses an external ADB — no in-cluster Postgres pod — so importing
# the Postgres base image would be wasted work.
imgs = deploy.dd_base_images_for("oracle")
self.assertIn(deploy.DD_REDIS_IMAGE, imgs)
self.assertNotIn(deploy.DD_POSTGRES_IMAGE, imgs)


class TestRenderConfigmapYaml(unittest.TestCase):
Expand Down Expand Up @@ -276,5 +295,32 @@ def test_builds_docker_save_and_ctr_import_pair(self):
)


class TestImportImageToNode(unittest.TestCase):
"""#519: if the importer Popen raises before we release the saver's stdout,
the saver must be torn down (stdout closed + killed + waited) so it can't
deadlock writing into a pipe with no reader — rather than left to hang."""

def test_importer_popen_raises_tears_down_saver(self):
saver = mock.MagicMock()
saver.returncode = 0

def popen_side_effect(*_args, **_kwargs):
# First call (docker save) succeeds; second call (ctr import) fails to
# spawn, e.g. FileNotFoundError if docker exec were unavailable.
popen_side_effect.calls += 1
if popen_side_effect.calls == 1:
return saver
raise FileNotFoundError("docker exec not found")
popen_side_effect.calls = 0

with mock.patch.object(deploy.subprocess, "Popen", side_effect=popen_side_effect):
with self.assertRaises(FileNotFoundError):
deploy.import_image_to_node("tmi-server:dev", "desktop-control-plane")

saver.stdout.close.assert_called_once() # pipe read end released
saver.kill.assert_called_once() # saver stopped, can't block
saver.wait.assert_called_once() # reaped in the finally


if __name__ == "__main__":
unittest.main()
12 changes: 0 additions & 12 deletions scripts/lib/tests/test_devenv_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ def test_parse_up_defaults_to_docker_desktop(self):
args = devenv_cli.build_parser().parse_args(["up"])
self.assertEqual(args.cluster, "docker-desktop")

def test_parse_up_no_workers_default_false(self):
args = devenv_cli.build_parser().parse_args(["up"])
self.assertFalse(args.no_workers)

def test_parse_up_yes_default_false(self):
args = devenv_cli.build_parser().parse_args(["up"])
self.assertFalse(args.yes)
Expand All @@ -73,14 +69,6 @@ def test_parse_db_oracle_before_verb(self):
args = devenv_cli.build_parser().parse_args(["--db", "oracle", "up"])
self.assertEqual(args.db, "oracle")

def test_parse_no_workers_after_verb(self):
args = devenv_cli.build_parser().parse_args(["up", "--no-workers"])
self.assertTrue(args.no_workers)

def test_no_workers_before_verb(self):
args = devenv_cli.build_parser().parse_args(["--no-workers", "up"])
self.assertTrue(args.no_workers)

def test_parse_yes_after_verb(self):
args = devenv_cli.build_parser().parse_args(["up", "--yes"])
self.assertTrue(args.yes)
Expand Down
Loading