From 4e5e49419ec9406ddaec9c0064894947c2eb0735 Mon Sep 17 00:00:00 2001 From: Chad McElligott Date: Tue, 10 Mar 2026 17:10:17 +0000 Subject: [PATCH] fix: check for existing buildkitd before mounting sticky disk When setup-docker-builder is invoked twice in the same job (e.g. via a composite action called twice), the second invocation was calling setupStickyDisk() before detecting the already-running buildkitd. This caused a new sticky disk to be mounted on top of /var/lib/buildkit while buildkitd was still running with in-memory metadata referencing snapshot directories from the original disk. The subsequent build then failed with: ERROR: failed to solve: failed to read dockerfile: failed to walk: resolve: lstat /var/lib/buildkit/runc-overlayfs/snapshots/snapshots/N: no such file or directory Fix: move the buildkitd process check to the very beginning of startBlacksmithBuilder(), before any sticky disk setup. If buildkitd is already running, log an informational message and return immediately so the fallback path reuses the existing configured builder (from the first invocation) without corrupting its overlayfs snapshot state. --- src/main.ts | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/src/main.ts b/src/main.ts index 8e500e9..67ef2f8 100644 --- a/src/main.ts +++ b/src/main.ts @@ -428,6 +428,27 @@ async function startBlacksmithBuilder( inputs: Inputs, ): Promise<{ addr: string | null; exposeId: string }> { try { + // If buildkitd is already running, skip - the builder is already initialized. + try { + const { stdout } = await execAsync("pgrep buildkitd"); + if (stdout.trim()) { + core.info( + `Detected existing buildkitd process (PID: ${stdout.trim()}). ` + + `Skipping builder setup - builder is already initialized.`, + ); + return { addr: null, exposeId: "" }; + } + } catch (error) { + if ((error as { code?: number }).code !== 1) { + // pgrep returns exit code 1 when no process found, which is what we want + // Any other error code indicates a real problem + throw new Error( + `Failed to check for existing buildkitd process: ${(error as Error).message}`, + ); + } + // Exit code 1 means no buildkitd process found, which is good - we can proceed + } + // Setup sticky disk const stickyDiskStartTime = Date.now(); const stickyDiskSetup = await setupStickyDisk(); @@ -464,25 +485,6 @@ async function startBlacksmithBuilder( parallelism = inputs["max-parallelism"]; } - // Check if buildkitd is already running before starting - try { - const { stdout } = await execAsync("pgrep buildkitd"); - if (stdout.trim()) { - throw new Error( - `Detected existing buildkitd process (PID: ${stdout.trim()}). Refusing to start to avoid conflicts.`, - ); - } - } catch (error) { - if ((error as { code?: number }).code !== 1) { - // pgrep returns exit code 1 when no process found, which is what we want - // Any other error code indicates a real problem - throw new Error( - `Failed to check for existing buildkitd process: ${(error as Error).message}`, - ); - } - // Exit code 1 means no buildkitd process found, which is good - we can proceed - } - // Check for potential boltdb corruption const boltdbIntegrity = await checkBoltDbIntegrity( inputs["skip-integrity-check"], @@ -639,8 +641,12 @@ void actionsToolkit.run( core.info("Blacksmith builder is ready for use by Docker"); }); } else { - // Fallback to local builder - core.warning("Failed to setup Blacksmith builder, using local builder"); + // Fallback: either Blacksmith builder setup failed, or buildkitd was + // already running. In both cases, reuse whatever builder is already + // configured. + core.warning( + "Blacksmith builder setup skipped or failed, checking for existing configured builder", + ); await core.group(`Checking for configured builder`, async () => { try { const builder = await toolkit.builder.inspect();