/** * Seed data system — creates example projects with pre-built conversations * and artifacts so users see demo content on first launch. * * Data lives in JSON manifest files (generated by scripts/dump_seed_manifests.py * in the original anthropics/operon repo) plus per-example asset tarballs * (extracted on first seed to ~/.operon/seed-assets/). * * Adapters trigger this lazily before the first projects.list call: * - CLI: GET /api/projects route * - Desktop: OperonProjects.list eIPC handler */ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; import { homedir } from "node:os"; import { dirname, join } from "node:path"; import { fileURLToPath } from "node:url"; import { x as extractTar } from "tar"; import type { ArtifactServiceImpl } from "../artifacts/artifactService.js"; import type { AnyDb } from "../db/repositories/_base.js"; import { ArtifactRepository } from "../db/repositories/artifacts.js"; import { ProjectRepository } from "../db/repositories/projects.js"; import { getOrCreateConversationFolder, getOrCreateUserUploadsFolder, } from "../services/project.js"; export const EXAMPLE_PROJECT_ID = "proj_example"; // --------------------------------------------------------------------------- // Idempotency helpers // --------------------------------------------------------------------------- const _seededUsers = new Set(); function getMarkerDir(): string { return ( process.env.OPERON_ARTIFACT_STORAGE_PATH ?? join(homedir(), ".operon", "artifacts") ); } function markerExists(name: string): boolean { return existsSync(join(getMarkerDir(), name)); } function writeMarker(name: string): void { try { const dir = getMarkerDir(); mkdirSync(dir, { recursive: true }); writeFileSync(join(dir, name), "seeded"); } catch { /* non-fatal */ } } // --------------------------------------------------------------------------- // Asset / manifest resolution // --------------------------------------------------------------------------- /** * Locate the seed directory (contains manifest_*.json and assets.tar.gz). * * Search order: * 1. OPERON_SEED_DIR env var (explicit override) * 2. Directory of this module (dev / CLI — manifests are co-located) * 3. {process.resourcesPath}/seed (packaged Electron — extraResources) */ function resolveSeedDir(): string { const override = process.env.OPERON_SEED_DIR; if (override && existsSync(override)) return override; const here = dirname(fileURLToPath(import.meta.url)); if (existsSync(join(here, "manifest_immunotherapy.json"))) return here; const resourcesPath = (process as { resourcesPath?: string }).resourcesPath; if (resourcesPath) { const bundled = join(resourcesPath, "seed"); if (existsSync(bundled)) return bundled; } return here; } /** * Extract a per-example asset tarball to ~/.operon/seed-assets/{subdir}/. * Packaged app's resources dir is read-only on macOS (code signing), so we * extract to a user-writable location. Idempotent via .extracted marker. * * Assets ship as 4 small tarballs (each <2MB) rather than one big one to * stay under the repo's 5MB file-size limit without needing Git LFS. */ async function ensureAssetsExtracted( seedDir: string, assetSubdir: string, ): Promise { const extractRoot = join(homedir(), ".operon", "seed-assets"); const extractDir = join(extractRoot, assetSubdir); const marker = join(extractDir, ".extracted"); if (existsSync(marker)) return extractDir; const tarballName = `assets_${assetSubdir.replace(/^example_/, "")}.tar.gz`; const tarball = join(seedDir, tarballName); if (!existsSync(tarball)) { throw new Error(`Seed assets tarball not found: ${tarball}`); } mkdirSync(extractRoot, { recursive: true }); await extractTar({ file: tarball, cwd: extractRoot }); writeFileSync(marker, "ok"); return extractDir; } // --------------------------------------------------------------------------- // Manifest types // --------------------------------------------------------------------------- interface ManifestFrame { id: string; parent_frame_id: string | null; root_frame_id: string; agent_name: string; status: string; input_data: Record | null; output_data: Record | null; context_data: Record | null; project_id: string; name: string | null; conversation_type: string | null; task_summary: string | null; specialists_used: string[] | null; mentioned_artifact_ids: unknown[] | null; } interface ManifestArtifact { artifact_id: string; filename: string; root_frame_id: string; frame_id: string | null; is_user_upload: boolean; version_id: string; content_type: string; agent_name: string | null; extracted_code: string | null; lineage_messages: unknown[] | null; environment_snapshot: Record | null; storage_path: string; } interface Manifest { root_frame: ManifestFrame; child_frames: ManifestFrame[]; artifacts: ManifestArtifact[]; folders: Array<{ id: string; name: string; root_frame_id: string | null; is_conversation_folder: boolean; is_user_uploads_folder: boolean; }>; } // --------------------------------------------------------------------------- // Project seeder // --------------------------------------------------------------------------- export async function seedExampleProject( db: AnyDb, userId: string, ): Promise { if (_seededUsers.has(userId)) return; const repo = new ProjectRepository(db); const existing = await repo.getProject(EXAMPLE_PROJECT_ID); if (existing) { if (existing.userId !== userId) { const { eq } = await import("drizzle-orm"); const { projects } = await import("../db/schema/sqlite.js"); await db .update(projects) .set({ userId }) .where(eq(projects.id, EXAMPLE_PROJECT_ID)); } _seededUsers.add(userId); return; } if (markerExists(".example_seeded")) { _seededUsers.add(userId); return; } await repo.createProject({ projectId: EXAMPLE_PROJECT_ID, name: "Example project", description: "Example project with pre-built analyses: immunotherapy scRNA-seq, " + "extremophile phylogenetics, CRISPR screen design, and enzyme engineering.", userId, }); await getOrCreateUserUploadsFolder(db, EXAMPLE_PROJECT_ID); writeMarker(".example_seeded"); _seededUsers.add(userId); } // --------------------------------------------------------------------------- // Generic manifest loader // --------------------------------------------------------------------------- /** * Seed a single example conversation from a JSON manifest + asset directory. * Idempotent via filesystem marker. */ export async function seedFromManifest( db: AnyDb, artifactService: ArtifactServiceImpl, manifestName: string, assetSubdir: string, markerName: string, ): Promise { if (markerExists(markerName)) return; const repo = new ProjectRepository(db); if (!(await repo.getProject(EXAMPLE_PROJECT_ID))) return; // Load manifest + assets from the resolved seed directory const seedDir = resolveSeedDir(); const manifestPath = join(seedDir, manifestName); if (!existsSync(manifestPath)) { console.warn(`Seed manifest not found: ${manifestPath}`); return; } const manifest: Manifest = JSON.parse(readFileSync(manifestPath, "utf8")); const assetsDir = await ensureAssetsExtracted(seedDir, assetSubdir); const { frames } = await import("../db/schema/sqlite.js"); const now = new Date(); // Insert root frame. onConflictDoNothing makes this idempotent at the DB // level — handles concurrent seeding (two adapters firing at once) and // partial-retry (frames inserted but marker never written). const root = manifest.root_frame; await db .insert(frames) .values({ id: root.id, parentFrameId: null, rootFrameId: root.id, agentName: root.agent_name, inputData: root.input_data, status: "completed", projectId: EXAMPLE_PROJECT_ID, conversationType: root.conversation_type ?? "agent", name: root.name, contextData: root.context_data, outputData: root.output_data, taskSummary: root.task_summary, specialistsUsed: root.specialists_used, mentionedArtifactIds: root.mentioned_artifact_ids, completedAt: now, }) .onConflictDoNothing(); // Insert child frames for (const child of manifest.child_frames) { await db .insert(frames) .values({ id: child.id, parentFrameId: child.parent_frame_id, rootFrameId: root.id, agentName: child.agent_name, inputData: child.input_data, status: "completed", projectId: EXAMPLE_PROJECT_ID, contextData: child.context_data, outputData: child.output_data, completedAt: now, }) .onConflictDoNothing(); } // Create conversation folder await getOrCreateConversationFolder(db, EXAMPLE_PROJECT_ID, root.id); // Save artifacts const artRepo = new ArtifactRepository(db); let missingAssets = false; for (const art of manifest.artifacts) { const filePath = join(assetsDir, art.filename); if (!existsSync(filePath)) { console.warn(`Seed asset not found: ${filePath}`); missingAssets = true; continue; } const content = readFileSync(filePath); const meta = await artifactService.save_artifact({ project_id: EXAMPLE_PROJECT_ID, root_frame_id: root.id, frame_id: art.frame_id, filename: art.filename, content: Buffer.from(content), content_type: art.content_type, agent_name: art.agent_name, is_user_upload: art.is_user_upload, }); // Update lineage + environment if present if (art.extracted_code || art.lineage_messages) { await artRepo.updateVersionLineage( meta.version_id, art.extracted_code, art.lineage_messages, ); } if (art.environment_snapshot) { await artRepo.updateVersionEnvironmentSnapshot( meta.version_id, art.environment_snapshot, ); } } if (!missingAssets) writeMarker(markerName); } // --------------------------------------------------------------------------- // Convenience wrappers (one per example) // --------------------------------------------------------------------------- export const seedExampleExtremophile = (db: AnyDb, svc: ArtifactServiceImpl) => seedFromManifest( db, svc, "manifest_extremophile.json", "example_extremophile", ".example_extremophile_seeded", ); export const seedExampleEnzymeEngineering = ( db: AnyDb, svc: ArtifactServiceImpl, ) => seedFromManifest( db, svc, "manifest_enzyme_engineering.json", "example_enzyme_engineering", ".example_enzyme_engineering_seeded", ); export const seedExampleCrisprScreen = (db: AnyDb, svc: ArtifactServiceImpl) => seedFromManifest( db, svc, "manifest_crispr_screen.json", "example_crispr_screen", ".example_crispr_screen_seeded", ); export const seedExampleImmunotherapy = (db: AnyDb, svc: ArtifactServiceImpl) => seedFromManifest( db, svc, "manifest_immunotherapy.json", "example_immunotherapy", ".example_immunotherapy_seeded", ); /** * Seed the example project + all four example conversations. * Idempotent — safe to call on every projects.list. Errors are swallowed * (seeding failure should never block the real project listing). */ export async function seedAllExamples( db: AnyDb, artifactService: ArtifactServiceImpl, userId: string, ): Promise { try { await seedExampleProject(db, userId); for (const seedFn of [ seedExampleImmunotherapy, seedExampleEnzymeEngineering, seedExampleCrisprScreen, seedExampleExtremophile, ]) { await seedFn(db, artifactService); } } catch (e) { console.warn( "Seed data error (non-fatal):", e instanceof Error ? e.message : e, ); } }