diff --git a/Cargo.lock b/Cargo.lock index 2a565b0..8c86361 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3129,6 +3129,7 @@ dependencies = [ "opentake-core", "opentake-domain", "opentake-gen", + "opentake-media", "opentake-ops", "regex", "reqwest 0.12.28", diff --git a/crates/opentake-agent/Cargo.toml b/crates/opentake-agent/Cargo.toml index f8300e2..9dca585 100644 --- a/crates/opentake-agent/Cargo.toml +++ b/crates/opentake-agent/Cargo.toml @@ -11,6 +11,7 @@ opentake-domain = { workspace = true } opentake-ops = { workspace = true } opentake-core = { workspace = true } opentake-gen = { workspace = true } +opentake-media = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } serde_path_to_error = "0.1" diff --git a/crates/opentake-agent/src/mcp/core_handle.rs b/crates/opentake-agent/src/mcp/core_handle.rs index 455cc96..fc2c344 100644 --- a/crates/opentake-agent/src/mcp/core_handle.rs +++ b/crates/opentake-agent/src/mcp/core_handle.rs @@ -11,7 +11,8 @@ use std::path::PathBuf; use opentake_core::AppCore; -use opentake_domain::{MediaManifest, Timeline}; +use opentake_domain::{MediaManifest, MediaResolver, Timeline}; +use opentake_media::{extract_pcm, PcmBuffer, PcmSpec}; use opentake_ops::command::{EditCommand, EditResult}; /// The narrow document surface the dispatch shell needs. `Send + Sync` so a @@ -32,6 +33,29 @@ pub trait CoreHandle: Send + Sync { /// The open project's bundle directory, or `None` for an unsaved project. fn project_dir(&self) -> Option; + + /// Resolve an asset id to the local file path that media analysis can read. + /// The default implementation mirrors `MediaResolver.expected_path`. + fn media_path(&self, media_ref: &str) -> Option { + let manifest = self.media(); + let project_dir = self.project_dir(); + MediaResolver::new(&manifest, project_dir.as_deref()).expected_path(media_ref) + } + + /// Decode a media asset's first audio track into the PCM format requested by + /// an analysis tool. Test handles can override this to inject synthetic PCM + /// without invoking ffmpeg. + fn extract_analysis_pcm( + &self, + media_ref: &str, + spec: PcmSpec, + range: Option<(f64, f64)>, + ) -> anyhow::Result { + let path = self + .media_path(media_ref) + .ok_or_else(|| anyhow::anyhow!("media path not found for mediaRef: {media_ref}"))?; + extract_pcm(&path, &spec, range).map_err(|e| anyhow::anyhow!("{e}")) + } } /// Production [`CoreHandle`] over the authoritative [`AppCore`]. A clone of the diff --git a/crates/opentake-agent/src/mcp/dispatch.rs b/crates/opentake-agent/src/mcp/dispatch.rs index 90001ce..28e854d 100644 --- a/crates/opentake-agent/src/mcp/dispatch.rs +++ b/crates/opentake-agent/src/mcp/dispatch.rs @@ -16,6 +16,7 @@ //! generation / media tools are stubs in this phase and return an honest //! "not yet implemented" so the tool table is complete. +use std::collections::BTreeMap; use std::sync::{Arc, Mutex, RwLock}; use opentake_domain::{AnimPair, Crop, Interpolation, Keyframe, KeyframeTrack}; @@ -23,6 +24,10 @@ use opentake_domain::{ ChromaKey, ColorGrade, Effect, LiftGammaGain, Mask, MaskShape, MediaManifest, Point2, Rgb, Rgba, TextStyle, Timeline, Transform, VideoType, }; +use opentake_media::analysis::{ + detect_beats, detect_silences, BeatDetectionConfig, SilenceDetectionConfig, +}; +use opentake_media::{PcmFormat, PcmSpec}; use opentake_ops::{ ClipEntry, ClipMove, ClipProperties, EditCommand, FrameRange, KeyframePayload, KeyframeProperty, RenameEntry, TextEntry, @@ -147,7 +152,7 @@ impl Dispatcher { ToolName::RemoveTracks => self.remove_tracks(args), ToolName::SplitClip => self.split_clip(args, before, op), ToolName::SetKeyframes => self.set_keyframes(args), - ToolName::RippleDeleteRanges => self.ripple_delete_ranges(args, op), + ToolName::RippleDeleteRanges => self.ripple_delete_ranges(args, before, op), ToolName::AddTexts => self.add_texts(args), ToolName::CreateFolder => self.create_folder(args), ToolName::MoveToFolder => self.move_to_folder(args), @@ -168,6 +173,12 @@ impl Dispatcher { ToolName::ActivateWorkflow => self.activate_workflow(args), ToolName::DeactivateWorkflow => self.deactivate_workflow(), + // --- Analysis-driven edit surface --- + ToolName::DetectBeats => self.detect_beats(args, before), + ToolName::AutoCutToBeats => self.auto_cut_to_beats(args, before), + ToolName::SmartReframe => self.smart_reframe(args), + ToolName::TightenSilences => self.tighten_silences(args, before), + // --- Not yet implementable in this phase (honest stubs) --- // Media reads (inspect/transcript/search) + import need the media // backend via a widened CoreHandle; generation/upscale need the async @@ -217,9 +228,16 @@ impl Dispatcher { let a: AddClipsArgs = decode_tool_args(args, "")?; let mut entries = Vec::with_capacity(a.entries.len()); let mut media_refs = Vec::new(); + let mut omitted_count = 0usize; + let mut explicit_count = 0usize; for (i, raw) in a.entries.iter().enumerate() { let e: AddClipEntry = decode_tool_args(raw, &format!("entries[{i}]"))?; let (media_type, has_audio) = resolve_media_kind(manifest, &e.media_ref); + if e.track_index.is_some() { + explicit_count += 1; + } else { + omitted_count += 1; + } media_refs.push(e.media_ref.clone()); entries.push(ClipEntry { media_ref: e.media_ref, @@ -235,9 +253,20 @@ impl Dispatcher { transform: None, }); } + if omitted_count > 0 && explicit_count > 0 { + return Ok(ToolResult::error( + "add_clips: mixing entries with trackIndex and entries without trackIndex is rejected; split into separate calls", + )); + } op.added_media_refs = media_refs; - op.track_index = entries.first().map(|e| e.track_index); - let res = self.apply(EditCommand::AddClips { entries })?; + let command = if omitted_count > 0 { + op.track_index = None; + EditCommand::AddClipsAutoTrack { entries } + } else { + op.track_index = entries.first().map(|e| e.track_index); + EditCommand::AddClips { entries } + }; + let res = self.apply(command)?; Ok(ToolResult::ok(res.summary)) } @@ -343,23 +372,265 @@ impl Dispatcher { Ok(ToolResult::ok(res.summary)) } + fn detect_beats(&self, args: &Value, before: &Timeline) -> Result { + let a: DetectBeatsArgs = decode_tool_args(args, "")?; + let beats = self.detect_beat_hints( + before, + BeatAnalysisRequest { + clip_id: a.clip_id.as_deref(), + media_ref: a.media_ref.as_deref(), + start_frame: a.start_frame, + end_frame: a.end_frame, + sensitivity: a.sensitivity, + tool_name: "detect_beats", + }, + )?; + let payload = serde_json::json!({ + "applied": false, + "beats": beats.iter().map(|beat| serde_json::json!({ + "frame": beat.frame, + "strength": beat.strength, + })).collect::>(), + "count": beats.len(), + }); + Ok(ToolResult::ok(round_floats_3dp(payload).to_string())) + } + + fn auto_cut_to_beats(&self, args: &Value, before: &Timeline) -> Result { + let a: AutoCutToBeatsArgs = decode_tool_args(args, "")?; + let beats = self.detect_beat_hints( + before, + BeatAnalysisRequest { + clip_id: a.beat_clip_id.as_deref(), + media_ref: a.beat_media_ref.as_deref(), + start_frame: a.start_frame, + end_frame: a.end_frame, + sensitivity: None, + tool_name: "auto_cut_to_beats", + }, + )?; + let min_gap = a.min_clip_frames.unwrap_or(1).max(1); + let max_gap = a.max_clip_frames.unwrap_or(i32::MAX).max(min_gap); + let mut cut_frames = Vec::new(); + let mut last = None; + for beat in &beats { + if let Some(prev) = last { + let gap = beat.frame - prev; + if gap < min_gap { + continue; + } + if gap > max_gap { + cut_frames.push(prev + max_gap); + } + } + cut_frames.push(beat.frame); + last = Some(beat.frame); + } + cut_frames.sort_unstable(); + cut_frames.dedup(); + + let placements = a + .clip_ids + .unwrap_or_default() + .into_iter() + .zip(cut_frames.iter().copied()) + .map(|(clip_id, to_frame)| { + serde_json::json!({ + "clipId": clip_id, + "toFrame": to_frame, + }) + }) + .collect::>(); + + let payload = serde_json::json!({ + "applied": false, + "alignCuts": a.align_cuts.unwrap_or(false), + "beats": beats.iter().map(|beat| serde_json::json!({ + "frame": beat.frame, + "strength": beat.strength, + })).collect::>(), + "cutFrames": cut_frames, + "placements": placements, + "note": "Preview only. Apply returned frames through split_clip/move_clips/ripple_delete_ranges as needed.", + }); + Ok(ToolResult::ok(round_floats_3dp(payload).to_string())) + } + + fn smart_reframe(&self, args: &Value) -> Result { + let _: SmartReframeArgs = decode_tool_args(args, "")?; + Ok(ToolResult::error( + "smart_reframe: needs vision analysis backend; CoreHandle does not expose sampled frames or saliency/subject analysis yet", + )) + } + + fn tighten_silences(&self, args: &Value, before: &Timeline) -> Result { + let a: TightenSilencesArgs = decode_tool_args(args, "")?; + let targets = silence_targets(before, &a)?; + let spec = analysis_pcm_spec(); + let fps = timeline_fps(before); + let mut config = SilenceDetectionConfig::with_window( + spec.sample_rate, + fps, + analysis_window_samples(spec.sample_rate), + ); + config.rms_threshold = threshold_db_to_rms(a.threshold_db.unwrap_or(-40.0)); + config.min_silence_frames = a.min_silence_frames.unwrap_or(12).max(1) as u64; + let padding = a.padding_frames.unwrap_or(3).max(0); + + let mut by_track: BTreeMap> = BTreeMap::new(); + let mut clip_payloads = Vec::new(); + let mut warnings = Vec::new(); + for target in targets { + let source_range = visible_source_range_secs(target.clip, fps); + let pcm = match self.handle.extract_analysis_pcm( + &target.clip.media_ref, + spec, + Some(source_range), + ) { + Ok(pcm) => pcm, + Err(e) => { + warnings.push(format!("{}: {e}", target.clip.id)); + continue; + } + }; + config.sample_rate = pcm.spec.sample_rate; + config.window_size_samples = analysis_window_samples(pcm.spec.sample_rate); + config.hop_size_samples = (config.window_size_samples / 2).max(1); + let ranges = detect_silences(&pcm.samples_f32, config); + let mut clip_ranges = Vec::new(); + for range in ranges { + let start_seconds = source_range.0 + range.start_frame as f64 / fps; + let end_seconds = source_range.0 + range.end_frame as f64 / fps; + let start = source_seconds_to_timeline_frame_clamped( + target.clip, + start_seconds, + before.fps, + ) + padding; + let end = + source_seconds_to_timeline_frame_clamped(target.clip, end_seconds, before.fps) + - padding; + if end <= start { + continue; + } + by_track + .entry(target.track_index) + .or_default() + .push((start, end)); + clip_ranges.push(serde_json::json!([start, end])); + } + clip_payloads.push(serde_json::json!({ + "clipId": target.clip.id, + "trackIndex": target.track_index, + "ranges": clip_ranges, + })); + } + + for ranges in by_track.values_mut() { + ranges.sort_unstable(); + ranges.dedup(); + } + let commands = by_track + .iter() + .filter(|(_, ranges)| !ranges.is_empty()) + .map(|(track_index, ranges)| { + serde_json::json!({ + "tool": "ripple_delete_ranges", + "args": { + "trackIndex": track_index, + "units": "frames", + "ranges": ranges.iter().map(|(start, end)| { + serde_json::json!([start, end]) + }).collect::>(), + } + }) + }) + .collect::>(); + + let payload = serde_json::json!({ + "applied": false, + "clips": clip_payloads, + "commands": commands, + "warnings": warnings, + "note": "Preview only. Run each returned ripple_delete_ranges command to apply.", + }); + Ok(ToolResult::ok(round_floats_3dp(payload).to_string())) + } + + fn detect_beat_hints( + &self, + timeline: &Timeline, + request: BeatAnalysisRequest<'_>, + ) -> Result, ToolError> { + let target = analysis_target( + timeline, + &self.handle.media(), + request.clip_id, + request.media_ref, + request.start_frame, + request.end_frame, + request.tool_name, + )?; + let spec = analysis_pcm_spec(); + let pcm = self + .handle + .extract_analysis_pcm(&target.media_ref, spec, target.source_range) + .map_err(|e| ToolError::new(format!("{}: {e}", request.tool_name)))?; + let fps = timeline_fps(timeline); + let mut config = BeatDetectionConfig::with_window( + pcm.spec.sample_rate, + fps, + analysis_window_samples(pcm.spec.sample_rate), + ); + config.min_onset_strength = sensitivity_to_onset_threshold(request.sensitivity); + let beats = detect_beats(&pcm.samples_f32, config) + .into_iter() + .map(|beat| BeatHint { + frame: target.map_relative_frame(beat.frame as i32, timeline.fps), + strength: beat.strength, + }) + .collect(); + Ok(beats) + } + fn ripple_delete_ranges( &self, args: &Value, + before: &Timeline, op: &mut OpContext, ) -> Result { let a: RippleDeleteRangesArgs = decode_tool_args(args, "")?; - let track_index = a.track_index.unwrap_or(0); + let units = parse_range_units(a.units.as_deref())?; + let track_index = match (a.track_index, a.clip_id.as_deref()) { + (Some(track_index), None) => { + if units == RangeUnits::Seconds { + return Ok(ToolResult::error( + "ripple_delete_ranges: units='seconds' is only valid with clipId; trackIndex mode requires units='frames'", + )); + } + track_index + } + (None, Some(clip_id)) => { + let (track_index, _) = clip_location(before, clip_id); + track_index.ok_or_else(|| { + ToolError::new(format!("ripple_delete_ranges: clip not found: {clip_id}")) + })? + } + (Some(_), Some(_)) => { + return Ok(ToolResult::error( + "ripple_delete_ranges: pass exactly one of trackIndex or clipId", + )); + } + (None, None) => { + return Ok(ToolResult::error( + "ripple_delete_ranges: missing trackIndex or clipId", + )); + } + }; op.track_index = Some(track_index); - let ranges: Vec = a - .ranges - .iter() - .map(|r| { - let start = r.first().copied().unwrap_or(0.0).round() as i32; - let end = r.get(1).copied().unwrap_or(0.0).round() as i32; - FrameRange::new(start, end) - }) - .collect(); + if let Some(clip_id) = a.clip_id.as_ref() { + op.clip_ids = vec![clip_id.clone()]; + } + let ranges = build_ripple_ranges(before, &a, units)?; let res = self.apply(EditCommand::RippleDeleteRanges { track_index, ranges, @@ -749,6 +1020,292 @@ fn clip_location(timeline: &Timeline, clip_id: &str) -> (Option, Option { + clip_id: Option<&'a str>, + media_ref: Option<&'a str>, + start_frame: Option, + end_frame: Option, + sensitivity: Option, + tool_name: &'a str, +} + +struct AnalysisTarget<'a> { + media_ref: String, + clip: Option<&'a opentake_domain::Clip>, + source_range: Option<(f64, f64)>, + source_start_seconds: f64, + project_start_frame: i32, +} + +impl AnalysisTarget<'_> { + fn map_relative_frame(&self, frame: i32, timeline_fps: i32) -> i32 { + match self.clip { + Some(clip) => { + let fps = timeline_fps.max(1) as f64; + let seconds = self.source_start_seconds + frame as f64 / fps; + source_seconds_to_timeline_frame_clamped(clip, seconds, timeline_fps) + } + None => self.project_start_frame + frame, + } + } +} + +struct SilenceTarget<'a> { + track_index: usize, + clip: &'a opentake_domain::Clip, +} + +fn analysis_pcm_spec() -> PcmSpec { + PcmSpec { + sample_rate: 16_000, + channels: 1, + format: PcmFormat::F32, + } +} + +fn timeline_fps(timeline: &Timeline) -> f64 { + timeline.fps.max(1) as f64 +} + +fn analysis_window_samples(sample_rate: u32) -> usize { + ((sample_rate.max(1) as f64) * 0.05).round().max(1.0) as usize +} + +fn sensitivity_to_onset_threshold(sensitivity: Option) -> f32 { + let sensitivity = sensitivity.unwrap_or(0.5).clamp(0.0, 1.0); + (0.16 - sensitivity * 0.12).clamp(0.02, 0.20) as f32 +} + +fn threshold_db_to_rms(db: f64) -> f32 { + let db = db.clamp(-90.0, 0.0); + 10f64.powf(db / 20.0) as f32 +} + +fn analysis_target<'a>( + timeline: &'a Timeline, + manifest: &MediaManifest, + clip_id: Option<&str>, + media_ref: Option<&str>, + start_frame: Option, + end_frame: Option, + tool_name: &str, +) -> Result, ToolError> { + match (clip_id, media_ref) { + (Some(_), Some(_)) => Err(ToolError::new(format!( + "{tool_name}: pass exactly one of clipId or mediaRef" + ))), + (None, None) => Err(ToolError::new(format!( + "{tool_name}: missing clipId or mediaRef" + ))), + (Some(clip_id), None) => { + let clip = find_clip(timeline, clip_id) + .ok_or_else(|| ToolError::new(format!("{tool_name}: clip not found: {clip_id}")))?; + let project_start = start_frame + .unwrap_or(clip.start_frame) + .clamp(clip.start_frame, clip.end_frame()); + let project_end = end_frame + .unwrap_or(clip.end_frame()) + .clamp(clip.start_frame, clip.end_frame()); + if project_end <= project_start { + return Err(ToolError::new(format!( + "{tool_name}: analysis range is empty" + ))); + } + let fps = timeline_fps(timeline); + let speed = normalized_speed(clip); + let source_start_frame = + clip.trim_start_frame as f64 + (project_start - clip.start_frame) as f64 * speed; + let source_end_frame = + clip.trim_start_frame as f64 + (project_end - clip.start_frame) as f64 * speed; + let source_range = (source_start_frame / fps, source_end_frame / fps); + Ok(AnalysisTarget { + media_ref: clip.media_ref.clone(), + clip: Some(clip), + source_range: Some(source_range), + source_start_seconds: source_range.0, + project_start_frame: project_start, + }) + } + (None, Some(media_ref)) => { + let fps = timeline_fps(timeline); + let start = start_frame.unwrap_or(0).max(0); + let entry = manifest.entries.iter().find(|entry| entry.id == media_ref); + let default_end = entry + .and_then(|entry| (entry.duration > 0.0).then_some((entry.duration * fps) as i32)); + let source_range = match (start_frame, end_frame.or(default_end)) { + (None, None) => None, + (_, Some(end)) if end > start => Some((start as f64 / fps, end as f64 / fps)), + _ => { + return Err(ToolError::new(format!( + "{tool_name}: mediaRef analysis range is empty or missing endFrame" + ))); + } + }; + Ok(AnalysisTarget { + media_ref: media_ref.to_string(), + clip: None, + source_range, + source_start_seconds: source_range.map(|range| range.0).unwrap_or(0.0), + project_start_frame: start, + }) + } + } +} + +fn silence_targets<'a>( + timeline: &'a Timeline, + args: &TightenSilencesArgs, +) -> Result>, ToolError> { + match (&args.clip_ids, args.track_index) { + (Some(_), Some(_)) => Err(ToolError::new( + "tighten_silences: pass clipIds or trackIndex, not both", + )), + (Some(ids), None) => { + if ids.is_empty() { + return Err(ToolError::new("tighten_silences: clipIds is empty")); + } + let mut out = Vec::new(); + for id in ids { + let (track_index, clip) = find_clip_with_track(timeline, id).ok_or_else(|| { + ToolError::new(format!("tighten_silences: clip not found: {id}")) + })?; + out.push(SilenceTarget { track_index, clip }); + } + Ok(out) + } + (None, Some(track_index)) => { + let track = timeline.tracks.get(track_index).ok_or_else(|| { + ToolError::new(format!("tighten_silences: track not found: {track_index}")) + })?; + Ok(track + .clips + .iter() + .map(|clip| SilenceTarget { track_index, clip }) + .collect()) + } + (None, None) => timeline + .tracks + .iter() + .enumerate() + .find(|(_, track)| track.kind == opentake_domain::ClipType::Audio) + .map(|(track_index, track)| { + track + .clips + .iter() + .map(|clip| SilenceTarget { track_index, clip }) + .collect() + }) + .ok_or_else(|| { + ToolError::new("tighten_silences: missing clipIds/trackIndex and no audio track") + }), + } +} + +fn find_clip_with_track<'a>( + timeline: &'a Timeline, + clip_id: &str, +) -> Option<(usize, &'a opentake_domain::Clip)> { + timeline + .tracks + .iter() + .enumerate() + .find_map(|(track_index, track)| { + track + .clips + .iter() + .find(|clip| clip.id == clip_id) + .map(|clip| (track_index, clip)) + }) +} + +fn visible_source_range_secs(clip: &opentake_domain::Clip, fps: f64) -> (f64, f64) { + let speed = normalized_speed(clip); + let start = clip.trim_start_frame as f64 / fps; + let end = (clip.trim_start_frame as f64 + clip.duration_frames as f64 * speed) / fps; + (start.max(0.0), end.max(start)) +} + +fn normalized_speed(clip: &opentake_domain::Clip) -> f64 { + if clip.speed.is_finite() && clip.speed > 0.0 { + clip.speed + } else { + 1.0 + } +} + +fn source_seconds_to_timeline_frame_clamped( + clip: &opentake_domain::Clip, + source_seconds: f64, + timeline_fps: i32, +) -> i32 { + let fps = timeline_fps.max(1) as f64; + let source_frame = source_seconds * fps; + let relative_source = source_frame - clip.trim_start_frame as f64; + let frame = clip.start_frame as f64 + relative_source / normalized_speed(clip); + (frame.round() as i32).clamp(clip.start_frame, clip.end_frame()) +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum RangeUnits { + Frames, + Seconds, +} + +fn parse_range_units(units: Option<&str>) -> Result { + match units.unwrap_or("frames") { + "frames" => Ok(RangeUnits::Frames), + "seconds" => Ok(RangeUnits::Seconds), + other => Err(ToolError::new(format!( + "units: unknown '{other}'. Allowed: frames, seconds." + ))), + } +} + +fn build_ripple_ranges( + timeline: &Timeline, + args: &RippleDeleteRangesArgs, + units: RangeUnits, +) -> Result, ToolError> { + let clip = args + .clip_id + .as_deref() + .and_then(|clip_id| find_clip(timeline, clip_id)); + let mut ranges = Vec::with_capacity(args.ranges.len()); + for (i, row) in args.ranges.iter().enumerate() { + if row.len() < 2 { + return Err(ToolError::new(format!( + "ranges[{i}]: expected [start, end]" + ))); + } + let (mut start, mut end) = match units { + RangeUnits::Frames => (row[0] as i32, row[1] as i32), + RangeUnits::Seconds => { + if let Some(clip) = clip { + ( + source_seconds_to_timeline_frame_clamped(clip, row[0], timeline.fps), + source_seconds_to_timeline_frame_clamped(clip, row[1], timeline.fps), + ) + } else { + let fps = timeline.fps.max(1) as f64; + ((row[0] * fps).round() as i32, (row[1] * fps).round() as i32) + } + } + }; + if let Some(clip) = clip { + start = start.clamp(clip.start_frame, clip.end_frame()); + end = end.clamp(clip.start_frame, clip.end_frame()); + } + ranges.push(FrameRange::new(start, end)); + } + Ok(ranges) +} + /// Build a domain [`Transform`] from the optional partial `TransformArg`, leaving /// unspecified fields at their identity defaults. fn build_transform(arg: Option) -> Transform { @@ -1358,6 +1915,54 @@ mod tests { } } + struct AnalysisHandle { + timeline: Timeline, + manifest: MediaManifest, + pcm: opentake_media::PcmBuffer, + } + + impl CoreHandle for AnalysisHandle { + fn timeline(&self) -> Timeline { + self.timeline.clone() + } + fn media(&self) -> MediaManifest { + self.manifest.clone() + } + fn apply(&self, _cmd: EditCommand) -> anyhow::Result { + anyhow::bail!("read-only analysis test handle") + } + fn project_dir(&self) -> Option { + None + } + fn extract_analysis_pcm( + &self, + _media_ref: &str, + _spec: opentake_media::PcmSpec, + _range: Option<(f64, f64)>, + ) -> anyhow::Result { + Ok(self.pcm.clone()) + } + } + + fn pcm(samples: Vec, sample_rate: u32) -> opentake_media::PcmBuffer { + opentake_media::PcmBuffer { + spec: opentake_media::PcmSpec { + sample_rate, + channels: 1, + format: opentake_media::PcmFormat::F32, + }, + samples_f32: samples, + } + } + + fn first_json(result: &ToolResult) -> Value { + let first = match &result.content[0] { + crate::tools::result::Block::Text { text } => text, + _ => panic!("expected text block"), + }; + serde_json::from_str(first).unwrap() + } + impl CoreHandle for StateHandle { fn timeline(&self) -> Timeline { self.state.lock().unwrap().timeline.clone() @@ -1395,6 +2000,16 @@ mod tests { } } + fn audio_entry(id: &str, name: &str) -> MediaManifestEntry { + let mut e = entry(id, name); + e.kind = ClipType::Audio; + e.has_audio = Some(true); + e.source = MediaSource::External { + absolute_path: format!("/{id}.mp3"), + }; + e + } + fn entry_with_size(id: &str, name: &str, width: i32, height: i32) -> MediaManifestEntry { let mut e = entry(id, name); e.source_width = Some(width); @@ -1432,6 +2047,369 @@ mod tests { Arc::new(StateHandle::new(tl, m)) } + fn empty_manifest_handle(entries: Vec) -> Arc { + let mut m = MediaManifest::new(); + m.entries = entries; + Arc::new(StateHandle::new(Timeline::new(), m)) + } + + fn two_track_ripple_handle() -> Arc { + let mut tl = Timeline::new(); + tl.fps = 30; + let mut first = Track::new("track-1", ClipType::Video); + first.clips.push(Clip::new("clip-a", "asset-1", 0, 90)); + let mut second = Track::new("track-2", ClipType::Video); + second.clips.push(Clip::new("clip-b", "asset-2", 100, 30)); + tl.tracks.push(first); + tl.tracks.push(second); + + let mut m = MediaManifest::new(); + m.entries.push(entry("asset-1", "A")); + m.entries.push(entry("asset-2", "B")); + Arc::new(StateHandle::new(tl, m)) + } + + #[test] + fn add_clips_omitted_track_index_creates_shared_video_track() { + let h = empty_manifest_handle(vec![entry("asset-1", "A"), entry("asset-2", "B")]); + let d = dispatcher_with(h.clone()); + + let r = d.dispatch( + "add_clips", + serde_json::json!({ + "entries": [ + {"mediaRef": "asset-1", "startFrame": 0, "durationFrames": 30}, + {"mediaRef": "asset-2", "startFrame": 40, "durationFrames": 20} + ] + }), + ); + + assert!(!r.is_error, "{}", r.text_joined()); + let tl = h.timeline(); + assert_eq!(tl.tracks.len(), 1); + assert_eq!(tl.tracks[0].kind, ClipType::Video); + assert_eq!(tl.tracks[0].clips.len(), 2); + assert_eq!(tl.tracks[0].clips[0].media_ref, "asset-1"); + assert_eq!(tl.tracks[0].clips[1].media_ref, "asset-2"); + } + + #[test] + fn add_clips_omitted_track_index_creates_shared_audio_track() { + let h = empty_manifest_handle(vec![ + audio_entry("asset-1", "A"), + audio_entry("asset-2", "B"), + ]); + let d = dispatcher_with(h.clone()); + + let r = d.dispatch( + "add_clips", + serde_json::json!({ + "entries": [ + {"mediaRef": "asset-1", "startFrame": 0, "durationFrames": 30}, + {"mediaRef": "asset-2", "startFrame": 40, "durationFrames": 20} + ] + }), + ); + + assert!(!r.is_error, "{}", r.text_joined()); + let tl = h.timeline(); + assert_eq!(tl.tracks.len(), 1); + assert_eq!(tl.tracks[0].kind, ClipType::Audio); + assert_eq!(tl.tracks[0].clips.len(), 2); + } + + #[test] + fn add_clips_omitted_track_index_is_one_undo_step() { + let h = empty_manifest_handle(vec![entry("asset-1", "A"), entry("asset-2", "B")]); + let d = dispatcher_with(h.clone()); + + let add = d.dispatch( + "add_clips", + serde_json::json!({ + "entries": [ + {"mediaRef": "asset-1", "startFrame": 0, "durationFrames": 30}, + {"mediaRef": "asset-2", "startFrame": 40, "durationFrames": 20} + ] + }), + ); + assert!(!add.is_error, "{}", add.text_joined()); + assert_eq!(h.timeline().tracks.len(), 1); + + let undo = d.dispatch("undo", serde_json::json!({})); + assert!(!undo.is_error, "{}", undo.text_joined()); + assert!(h.timeline().tracks.is_empty()); + } + + #[test] + fn add_clips_mixed_track_index_presence_is_rejected() { + let h = empty_manifest_handle(vec![entry("asset-1", "A"), entry("asset-2", "B")]); + let d = dispatcher_with(h.clone()); + + let r = d.dispatch( + "add_clips", + serde_json::json!({ + "entries": [ + {"mediaRef": "asset-1", "trackIndex": 0, "startFrame": 0, "durationFrames": 30}, + {"mediaRef": "asset-2", "startFrame": 40, "durationFrames": 20} + ] + }), + ); + + assert!(r.is_error); + assert!( + r.text_joined().contains("trackIndex"), + "{}", + r.text_joined() + ); + assert!(h.timeline().tracks.is_empty()); + } + + #[test] + fn add_clips_omitted_track_index_invalid_entry_does_not_create_track() { + let h = empty_manifest_handle(vec![entry("asset-1", "A")]); + let d = dispatcher_with(h.clone()); + + let r = d.dispatch( + "add_clips", + serde_json::json!({ + "entries": [ + {"mediaRef": "asset-1", "startFrame": 0, "durationFrames": 0} + ] + }), + ); + + assert!(r.is_error); + assert!( + r.text_joined().contains("durationFrames"), + "{}", + r.text_joined() + ); + assert!(h.timeline().tracks.is_empty()); + } + + #[test] + fn ripple_delete_ranges_clip_id_seconds_uses_clip_track_and_timeline_fps() { + let h = two_track_ripple_handle(); + let d = dispatcher_with(h.clone()); + + let r = d.dispatch( + "ripple_delete_ranges", + serde_json::json!({ + "clipId": "clip-b", + "units": "seconds", + "ranges": [[0.2, 0.5]] + }), + ); + + assert!(!r.is_error, "{}", r.text_joined()); + let tl = h.timeline(); + assert_eq!(tl.tracks[0].clips[0].duration_frames, 90); + let spans: Vec<(i32, i32)> = tl.tracks[1] + .clips + .iter() + .map(|clip| (clip.start_frame, clip.duration_frames)) + .collect(); + assert_eq!(spans, vec![(100, 6), (106, 15)]); + } + + #[test] + fn ripple_delete_ranges_clip_id_seconds_rounds_after_speed_mapping() { + let mut tl = Timeline::new(); + tl.fps = 30; + let mut track = Track::new("track-1", ClipType::Video); + let mut clip = Clip::new("clip-b", "asset-2", 100, 30); + clip.speed = 2.0; + track.clips.push(clip); + tl.tracks.push(track); + let mut manifest = MediaManifest::new(); + manifest.entries.push(entry("asset-2", "B")); + let h = Arc::new(StateHandle::new(tl, manifest)); + let d = dispatcher_with(h.clone()); + + let r = d.dispatch( + "ripple_delete_ranges", + serde_json::json!({ + "clipId": "clip-b", + "units": "seconds", + "ranges": [[0.24, 0.50]] + }), + ); + + assert!(!r.is_error, "{}", r.text_joined()); + let spans: Vec<(i32, i32)> = h.timeline().tracks[0] + .clips + .iter() + .map(|clip| (clip.start_frame, clip.duration_frames)) + .collect(); + assert_eq!(spans, vec![(100, 4), (104, 22)]); + } + + #[test] + fn ripple_delete_ranges_frames_are_used_without_rounding() { + let h = two_track_ripple_handle(); + let d = dispatcher_with(h.clone()); + + let r = d.dispatch( + "ripple_delete_ranges", + serde_json::json!({ + "trackIndex": 1, + "units": "frames", + "ranges": [[105.9, 110.9]] + }), + ); + + assert!(!r.is_error, "{}", r.text_joined()); + let tl = h.timeline(); + let spans: Vec<(i32, i32)> = tl.tracks[1] + .clips + .iter() + .map(|clip| (clip.start_frame, clip.duration_frames)) + .collect(); + assert_eq!(spans, vec![(100, 5), (105, 20)]); + } + + #[test] + fn ripple_delete_ranges_rejects_track_index_with_seconds() { + let h = two_track_ripple_handle(); + let d = dispatcher_with(h.clone()); + + let r = d.dispatch( + "ripple_delete_ranges", + serde_json::json!({ + "trackIndex": 1, + "units": "seconds", + "ranges": [[3.5, 3.8]] + }), + ); + + assert!(r.is_error); + assert!(r.text_joined().contains("seconds"), "{}", r.text_joined()); + assert_eq!(h.timeline(), two_track_ripple_handle().timeline()); + } + + #[test] + fn detect_beats_returns_pcm_frame_hints() { + let mut manifest = MediaManifest::new(); + manifest.entries.push(audio_entry("music-1", "Music")); + let mut samples = vec![0.0f32; 1_000]; + for sample in &mut samples[500..530] { + *sample = 1.0; + } + let mut timeline = Timeline::new(); + timeline.fps = 10; + let h = Arc::new(AnalysisHandle { + timeline, + manifest, + pcm: pcm(samples, 1_000), + }); + let d = dispatcher_with(h); + + let beats = d.dispatch( + "detect_beats", + serde_json::json!({"mediaRef": "music-1", "sensitivity": 1.0}), + ); + assert!(!beats.is_error, "{}", beats.text_joined()); + let json = first_json(&beats); + let frames: Vec = json["beats"] + .as_array() + .unwrap() + .iter() + .map(|beat| beat["frame"].as_i64().unwrap()) + .collect(); + assert!( + frames.iter().any(|frame| (4..=5).contains(frame)), + "{frames:?}" + ); + } + + #[test] + fn smart_reframe_reports_needs_vision_backend() { + let d = dispatcher_with(empty_manifest_handle(vec![])); + let reframe = d.dispatch( + "smart_reframe", + serde_json::json!({"clipIds": ["clip-a"], "aspectRatio": "9:16"}), + ); + assert!(reframe.is_error); + assert!( + reframe + .text_joined() + .contains("needs vision analysis backend") + || reframe.text_joined().contains("needs vision backend") + || reframe.text_joined().contains("needs vision"), + "{}", + reframe.text_joined() + ); + } + + #[test] + fn tighten_silences_returns_ripple_delete_preview() { + let mut timeline = Timeline::new(); + timeline.fps = 10; + let mut track = Track::new("audio-track", ClipType::Audio); + track.clips.push(Clip::new("clip-a", "asset-1", 0, 10)); + timeline.tracks.push(track); + let mut manifest = MediaManifest::new(); + manifest.entries.push(audio_entry("asset-1", "Voice")); + let mut samples = vec![0.5f32; 300]; + samples.extend(std::iter::repeat_n(0.0f32, 400)); + samples.extend(std::iter::repeat_n(0.5f32, 300)); + let h = Arc::new(AnalysisHandle { + timeline, + manifest, + pcm: pcm(samples, 1_000), + }); + let d = dispatcher_with(h); + + let result = d.dispatch( + "tighten_silences", + serde_json::json!({ + "clipIds": ["clip-a"], + "thresholdDb": -40.0, + "minSilenceFrames": 2, + "paddingFrames": 0 + }), + ); + + assert!(!result.is_error, "{}", result.text_joined()); + let json = first_json(&result); + let ranges = json["commands"][0]["args"]["ranges"].as_array().unwrap(); + assert!(!ranges.is_empty(), "{json}"); + let first = ranges[0].as_array().unwrap(); + let start = first[0].as_i64().unwrap(); + let end = first[1].as_i64().unwrap(); + assert!(start <= 3, "{json}"); + assert!(end >= 6, "{json}"); + assert_eq!(json["applied"], serde_json::json!(false)); + } + + #[test] + fn analysis_tools_reject_unknown_args_before_unsupported_error() { + let d = dispatcher_with(empty_manifest_handle(vec![])); + let r = d.dispatch( + "tighten_silences", + serde_json::json!({"clipIds": ["clip-a"], "bogus": true}), + ); + assert!(r.is_error); + assert!( + r.text_joined().contains("unknown field"), + "{}", + r.text_joined() + ); + } + + #[test] + fn remove_filler_words_stays_disabled_until_transcript_is_wired() { + let d = dispatcher_with(empty_manifest_handle(vec![])); + let r = d.dispatch("remove_filler_words", serde_json::json!({})); + assert!(r.is_error); + assert!( + r.text_joined() + .contains("Unknown tool: remove_filler_words"), + "{}", + r.text_joined() + ); + } + #[test] fn rename_media_updates_manifest_name() { let h = seeded_handle(); diff --git a/crates/opentake-agent/src/mcp/server.rs b/crates/opentake-agent/src/mcp/server.rs index a606b67..bc1cc1e 100644 --- a/crates/opentake-agent/src/mcp/server.rs +++ b/crates/opentake-agent/src/mcp/server.rs @@ -38,7 +38,7 @@ pub const DEFAULT_ADDR: &str = "127.0.0.1:19789"; /// One MCP session: owns a [`Dispatcher`] (its own agent-undo stack) and the /// system-prompt instructions snapshotted at construction. pub struct McpServer { - dispatcher: Dispatcher, + dispatcher: Arc, instructions: String, } @@ -50,7 +50,7 @@ impl McpServer { .map(|r| assemble_system_prompt(&r, "default")) .unwrap_or_default(); McpServer { - dispatcher: Dispatcher::new(handle, registry), + dispatcher: Arc::new(Dispatcher::new(handle, registry)), instructions, } } @@ -113,7 +113,15 @@ impl ServerHandler for McpServer { request: CallToolRequestParam, _context: RequestContext, ) -> Result { - Ok(self.call(&request.name, request.arguments)) + let dispatcher = self.dispatcher.clone(); + let name = request.name.to_string(); + let args = request + .arguments + .map(Value::Object) + .unwrap_or(Value::Object(Map::new())); + tokio::task::spawn_blocking(move || to_call_tool_result(dispatcher.dispatch(&name, args))) + .await + .map_err(|e| McpError::internal_error(format!("tool dispatch task failed: {e}"), None)) } } @@ -257,7 +265,7 @@ mod tests { } #[test] - fn lists_all_40_tools() { + fn lists_all_44_tools() { assert_eq!(McpServer::tools().len(), ToolName::ALL.len()); // Names round-trip to the wire names. let names: Vec = McpServer::tools() @@ -265,6 +273,7 @@ mod tests { .map(|t| t.name.to_string()) .collect(); assert!(names.contains(&"add_clips".to_string())); + assert!(names.contains(&"detect_beats".to_string())); assert!(names.contains(&"activate_workflow".to_string())); } diff --git a/crates/opentake-agent/src/tools/args.rs b/crates/opentake-agent/src/tools/args.rs index 663c3b8..92e60fb 100644 --- a/crates/opentake-agent/src/tools/args.rs +++ b/crates/opentake-agent/src/tools/args.rs @@ -580,6 +580,85 @@ impl ToolArgs for AddCaptionsArgs { ]; } +// --- detect_beats --- +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct DetectBeatsArgs { + pub clip_id: Option, + pub media_ref: Option, + pub start_frame: Option, + pub end_frame: Option, + pub sensitivity: Option, +} +impl ToolArgs for DetectBeatsArgs { + const ALLOWED_KEYS: &'static [&'static str] = &[ + "clipId", + "mediaRef", + "startFrame", + "endFrame", + "sensitivity", + ]; +} + +// --- auto_cut_to_beats --- +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct AutoCutToBeatsArgs { + pub clip_ids: Option>, + pub beat_clip_id: Option, + pub beat_media_ref: Option, + pub start_frame: Option, + pub end_frame: Option, + pub min_clip_frames: Option, + pub max_clip_frames: Option, + pub align_cuts: Option, +} +impl ToolArgs for AutoCutToBeatsArgs { + const ALLOWED_KEYS: &'static [&'static str] = &[ + "clipIds", + "beatClipId", + "beatMediaRef", + "startFrame", + "endFrame", + "minClipFrames", + "maxClipFrames", + "alignCuts", + ]; +} + +// --- smart_reframe --- +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct SmartReframeArgs { + pub clip_ids: Vec, + pub aspect_ratio: String, + pub subject: Option, + pub mode: Option, +} +impl ToolArgs for SmartReframeArgs { + const ALLOWED_KEYS: &'static [&'static str] = &["clipIds", "aspectRatio", "subject", "mode"]; +} + +// --- tighten_silences --- +#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct TightenSilencesArgs { + pub clip_ids: Option>, + pub track_index: Option, + pub threshold_db: Option, + pub min_silence_frames: Option, + pub padding_frames: Option, +} +impl ToolArgs for TightenSilencesArgs { + const ALLOWED_KEYS: &'static [&'static str] = &[ + "clipIds", + "trackIndex", + "thresholdDb", + "minSilenceFrames", + "paddingFrames", + ]; +} + // --- generate_video --- #[derive(Debug, Clone, Default, Deserialize, PartialEq)] #[serde(rename_all = "camelCase")] diff --git a/crates/opentake-agent/src/tools/descriptions.rs b/crates/opentake-agent/src/tools/descriptions.rs index da09c56..0a94084 100644 --- a/crates/opentake-agent/src/tools/descriptions.rs +++ b/crates/opentake-agent/src/tools/descriptions.rs @@ -53,6 +53,14 @@ pub fn description(tool: ToolName) -> &'static str { ToolName::AddCaptions => "Auto-caption spoken audio: transcribes on-device and places styled caption clips on a new track — the same pipeline as the editor's Captions tab. This is the reliable path for 'caption this'; prefer it over hand-placing add_texts from a transcript. Omit clipIds to auto-pick the track with the most speech; pass clipIds to caption specific clips (e.g. only the interview).", + ToolName::DetectBeats => "Detects musical beat positions for a clip or media asset using lightweight PCM energy/onset analysis. Returns project-frame beat hints and strengths; it does not mutate the timeline.", + + ToolName::AutoCutToBeats => "Plans beat-synced cuts for one or more clips against an audio or music source. Returns beat frames, suggested cut frames, and optional clip placement hints; it does not mutate the timeline. Apply the plan with existing edit tools.", + + ToolName::SmartReframe => "Plans subject-aware reframing for target aspect ratios such as 9:16 or 1:1. The typed surface is present, but MCP frame sampling / vision analysis is not wired yet; calls return a deterministic needs-vision-backend error and do not mutate the timeline.", + + ToolName::TightenSilences => "Plans silence tightening by finding low-energy PCM spans and converting them into ripple_delete_ranges candidate commands. Returns a preview only; it does not mutate the timeline.", + ToolName::GenerateVideo => "Starts an async AI video generation. Returns a placeholder asset ID immediately; generation runs in the background and the asset becomes usable in add_clips once ready. Costs real money and is not undoable.", ToolName::GenerateImage => "Starts an async AI image generation. Returns a placeholder asset ID immediately; generation runs in the background. Costs real money and is not undoable.", @@ -342,6 +350,52 @@ pub fn input_schema(tool: ToolName) -> Value { &[], ), + ToolName::DetectBeats => object( + json!({ + "clipId": {"type": "string", "description": "Optional clip id to analyze. Mutually exclusive with mediaRef."}, + "mediaRef": {"type": "string", "description": "Optional media asset id to analyze directly."}, + "startFrame": {"type": "integer", "description": "Optional project-frame window start."}, + "endFrame": {"type": "integer", "description": "Optional project-frame window end (exclusive)."}, + "sensitivity": {"type": "number", "description": "Optional beat-picking sensitivity 0.0-1.0."} + }), + &[], + ), + + ToolName::AutoCutToBeats => object( + json!({ + "clipIds": {"type": "array", "items": {"type": "string"}, "description": "Optional visual clips to cut or align."}, + "beatClipId": {"type": "string", "description": "Optional timeline clip whose audio supplies the beat grid."}, + "beatMediaRef": {"type": "string", "description": "Optional media asset whose audio supplies the beat grid."}, + "startFrame": {"type": "integer", "description": "Optional project-frame window start."}, + "endFrame": {"type": "integer", "description": "Optional project-frame window end (exclusive)."}, + "minClipFrames": {"type": "integer", "description": "Optional lower bound for generated cut lengths."}, + "maxClipFrames": {"type": "integer", "description": "Optional upper bound for generated cut lengths."}, + "alignCuts": {"type": "boolean", "description": "Optional. true means move/split cuts to the detected beat grid."} + }), + &[], + ), + + ToolName::SmartReframe => object( + json!({ + "clipIds": {"type": "array", "items": {"type": "string"}, "description": "Clip ids to reframe."}, + "aspectRatio": {"type": "string", "description": "Target aspect ratio, e.g. '9:16', '1:1', or '16:9'."}, + "subject": {"type": "string", "description": "Optional subject hint to keep in frame."}, + "mode": {"type": "string", "enum": ["plan", "apply"], "description": "Optional future mode. Current phase always returns needs-vision-backend."} + }), + &["clipIds", "aspectRatio"], + ), + + ToolName::TightenSilences => object( + json!({ + "clipIds": {"type": "array", "items": {"type": "string"}, "description": "Optional clip ids to analyze. Omit to analyze the primary spoken track in the future backend."}, + "trackIndex": {"type": "integer", "description": "Optional track index to analyze."}, + "thresholdDb": {"type": "number", "description": "Optional silence threshold in dB."}, + "minSilenceFrames": {"type": "integer", "description": "Optional minimum silence span to cut."}, + "paddingFrames": {"type": "integer", "description": "Optional context to preserve around each silence."} + }), + &[], + ), + ToolName::GenerateVideo => object( json!({ "prompt": {"type": "string", "description": "Text description of the video to generate"}, diff --git a/crates/opentake-agent/src/tools/names.rs b/crates/opentake-agent/src/tools/names.rs index d012030..1f19d1b 100644 --- a/crates/opentake-agent/src/tools/names.rs +++ b/crates/opentake-agent/src/tools/names.rs @@ -1,11 +1,12 @@ //! Tool-name enum. The 31 upstream tools (`ToolDefinitions.swift:4-36`) plus -//! the OpenTake workflow-plugin tools (`agent-SPEC.md` §7.4). String values are -//! 1:1 with upstream; ordering matches `ToolName`. +//! OpenTake workflow-plugin, analysis, effect, and motion-graphics additions. +//! String values are 1:1 with upstream where applicable; ordering matches +//! `ToolName`. use std::str::FromStr; -/// Every tool the agent layer exposes. The first 31 are the upstream -/// ToolExecutor set; the last three are OpenTake's workflow-plugin additions. +/// Every tool the agent layer exposes. The `UPSTREAM` const pins the 31-tool +/// upstream-compatible set; `ALL` also includes OpenTake additions. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ToolName { // --- Read / introspect (7) --- @@ -29,6 +30,10 @@ pub enum ToolName { Undo, AddTexts, AddCaptions, + DetectBeats, + AutoCutToBeats, + SmartReframe, + TightenSilences, // --- Media generation / import (5) --- GenerateVideo, GenerateImage, @@ -80,6 +85,10 @@ impl ToolName { ToolName::Undo => "undo", ToolName::AddTexts => "add_texts", ToolName::AddCaptions => "add_captions", + ToolName::DetectBeats => "detect_beats", + ToolName::AutoCutToBeats => "auto_cut_to_beats", + ToolName::SmartReframe => "smart_reframe", + ToolName::TightenSilences => "tighten_silences", ToolName::GenerateVideo => "generate_video", ToolName::GenerateImage => "generate_image", ToolName::GenerateAudio => "generate_audio", @@ -105,7 +114,7 @@ impl ToolName { } /// All tools in registration order. - pub const ALL: [ToolName; 40] = [ + pub const ALL: [ToolName; 44] = [ ToolName::GetTimeline, ToolName::GetMedia, ToolName::InspectMedia, @@ -125,6 +134,10 @@ impl ToolName { ToolName::Undo, ToolName::AddTexts, ToolName::AddCaptions, + ToolName::DetectBeats, + ToolName::AutoCutToBeats, + ToolName::SmartReframe, + ToolName::TightenSilences, ToolName::GenerateVideo, ToolName::GenerateImage, ToolName::GenerateAudio, @@ -205,8 +218,25 @@ mod tests { } #[test] - fn all_set_is_40() { - assert_eq!(ToolName::ALL.len(), 40); + fn all_set_is_44() { + assert_eq!(ToolName::ALL.len(), 44); + } + + #[test] + fn analysis_tools_have_expected_wire_names() { + assert_eq!(ToolName::DetectBeats.as_str(), "detect_beats"); + assert_eq!(ToolName::AutoCutToBeats.as_str(), "auto_cut_to_beats"); + assert_eq!(ToolName::SmartReframe.as_str(), "smart_reframe"); + assert_eq!(ToolName::TightenSilences.as_str(), "tighten_silences"); + for t in [ + ToolName::DetectBeats, + ToolName::AutoCutToBeats, + ToolName::SmartReframe, + ToolName::TightenSilences, + ] { + assert_eq!(ToolName::from_str(t.as_str()), Ok(t)); + assert!(!ToolName::UPSTREAM.contains(&t)); + } } #[test] diff --git a/crates/opentake-media/src/analysis/autocrop.rs b/crates/opentake-media/src/analysis/autocrop.rs new file mode 100644 index 0000000..b0d18e5 --- /dev/null +++ b/crates/opentake-media/src/analysis/autocrop.rs @@ -0,0 +1,241 @@ +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum PixelFormat { + Rgb, + Rgba, +} + +impl PixelFormat { + fn channels(self) -> usize { + match self { + PixelFormat::Rgb => 3, + PixelFormat::Rgba => 4, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct FrameBuffer<'a> { + pub width: u32, + pub height: u32, + pub data: &'a [u8], + pub pixel_format: PixelFormat, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct CropRect { + pub x: u32, + pub y: u32, + pub width: u32, + pub height: u32, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct CropTransform { + pub scale_x: f32, + pub scale_y: f32, + pub translate_x: f32, + pub translate_y: f32, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct AutocropPlan { + pub crop: CropRect, + pub transform: CropTransform, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct AutocropConfig { + pub black_threshold: u8, + pub min_alpha: u8, + pub sample_step: u32, + pub target_aspect_ratio: Option, +} + +impl Default for AutocropConfig { + fn default() -> Self { + AutocropConfig { + black_threshold: 16, + min_alpha: 16, + sample_step: 1, + target_aspect_ratio: None, + } + } +} + +pub fn detect_autocrop(frame: &FrameBuffer<'_>, config: AutocropConfig) -> Option { + let channels = frame.pixel_format.channels(); + let width = frame.width as usize; + let height = frame.height as usize; + let expected_len = width.checked_mul(height)?.checked_mul(channels)?; + if width == 0 || height == 0 || frame.data.len() < expected_len { + return None; + } + + let step = config.sample_step.max(1) as usize; + let mut bounds = ContentBounds::empty(); + for y in (0..height).step_by(step) { + for x in (0..width).step_by(step) { + if is_content(frame, x, y, config) { + bounds.include(x as u32, y as u32); + } + } + } + + let mut crop = bounds.to_crop_rect().unwrap_or(CropRect { + x: 0, + y: 0, + width: frame.width, + height: frame.height, + }); + + if let Some(aspect) = config.target_aspect_ratio.filter(|aspect| *aspect > 0.0) { + crop = expand_to_aspect(crop, frame.width, frame.height, aspect); + } + + Some(AutocropPlan { + crop, + transform: crop_transform(crop, frame.width, frame.height), + }) +} + +fn is_content(frame: &FrameBuffer<'_>, x: usize, y: usize, config: AutocropConfig) -> bool { + let channels = frame.pixel_format.channels(); + let base = (y * frame.width as usize + x) * channels; + let r = frame.data[base]; + let g = frame.data[base + 1]; + let b = frame.data[base + 2]; + let alpha_ok = + frame.pixel_format == PixelFormat::Rgb || frame.data[base + 3] >= config.min_alpha; + alpha_ok && r.max(g).max(b) > config.black_threshold +} + +#[derive(Clone, Copy)] +struct ContentBounds { + min_x: u32, + min_y: u32, + max_x: u32, + max_y: u32, + found: bool, +} + +impl ContentBounds { + fn empty() -> Self { + ContentBounds { + min_x: u32::MAX, + min_y: u32::MAX, + max_x: 0, + max_y: 0, + found: false, + } + } + + fn include(&mut self, x: u32, y: u32) { + self.min_x = self.min_x.min(x); + self.min_y = self.min_y.min(y); + self.max_x = self.max_x.max(x); + self.max_y = self.max_y.max(y); + self.found = true; + } + + fn to_crop_rect(self) -> Option { + self.found.then_some(CropRect { + x: self.min_x, + y: self.min_y, + width: self.max_x - self.min_x + 1, + height: self.max_y - self.min_y + 1, + }) + } +} + +fn expand_to_aspect(rect: CropRect, frame_width: u32, frame_height: u32, target: f32) -> CropRect { + let current = rect.width as f32 / rect.height as f32; + if (current - target).abs() <= f32::EPSILON { + return rect; + } + + if current < target { + let desired_width = ((rect.height as f32 * target).ceil() as u32).min(frame_width); + expand_width(rect, desired_width.max(rect.width), frame_width) + } else { + let desired_height = ((rect.width as f32 / target).ceil() as u32).min(frame_height); + expand_height(rect, desired_height.max(rect.height), frame_height) + } +} + +fn expand_width(rect: CropRect, width: u32, frame_width: u32) -> CropRect { + let center = rect.x as i64 + rect.width as i64 / 2; + let mut x = center - width as i64 / 2; + x = x.clamp(0, (frame_width - width) as i64); + CropRect { + x: x as u32, + width, + ..rect + } +} + +fn expand_height(rect: CropRect, height: u32, frame_height: u32) -> CropRect { + let center = rect.y as i64 + rect.height as i64 / 2; + let mut y = center - height as i64 / 2; + y = y.clamp(0, (frame_height - height) as i64); + CropRect { + y: y as u32, + height, + ..rect + } +} + +fn crop_transform(crop: CropRect, frame_width: u32, frame_height: u32) -> CropTransform { + let crop_center_x = crop.x as f32 + crop.width as f32 / 2.0; + let crop_center_y = crop.y as f32 + crop.height as f32 / 2.0; + let frame_center_x = frame_width as f32 / 2.0; + let frame_center_y = frame_height as f32 / 2.0; + CropTransform { + scale_x: frame_width as f32 / crop.width as f32, + scale_y: frame_height as f32 / crop.height as f32, + translate_x: (frame_center_x - crop_center_x) / frame_width as f32 * 2.0, + translate_y: (frame_center_y - crop_center_y) / frame_height as f32 * 2.0, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn black_bars_generate_crop_rect_and_transform() { + let width = 8; + let height = 6; + let mut rgb = vec![0u8; width * height * 3]; + for y in 1..5 { + for x in 2..6 { + let base = (y * width + x) * 3; + rgb[base] = 240; + rgb[base + 1] = 240; + rgb[base + 2] = 240; + } + } + + let frame = FrameBuffer { + width: width as u32, + height: height as u32, + data: &rgb, + pixel_format: PixelFormat::Rgb, + }; + let plan = detect_autocrop(&frame, AutocropConfig::default()) + .expect("valid RGB frame should produce a plan"); + + assert_eq!( + plan.crop, + CropRect { + x: 2, + y: 1, + width: 4, + height: 4, + } + ); + assert!((plan.transform.scale_x - 2.0).abs() < f32::EPSILON); + assert!((plan.transform.scale_y - 1.5).abs() < f32::EPSILON); + assert!(plan.transform.translate_x.abs() < f32::EPSILON); + assert!(plan.transform.translate_y.abs() < f32::EPSILON); + } +} diff --git a/crates/opentake-media/src/analysis/beat.rs b/crates/opentake-media/src/analysis/beat.rs new file mode 100644 index 0000000..5ae1f97 --- /dev/null +++ b/crates/opentake-media/src/analysis/beat.rs @@ -0,0 +1,123 @@ +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct BeatDetectionConfig { + pub sample_rate: u32, + pub fps: f64, + pub window_size_samples: usize, + pub hop_size_samples: usize, + pub min_onset_strength: f32, + pub min_gap_frames: u64, +} + +impl BeatDetectionConfig { + pub fn with_window(sample_rate: u32, fps: f64, window_size_samples: usize) -> Self { + let window_size_samples = window_size_samples.max(1); + BeatDetectionConfig { + sample_rate, + fps, + window_size_samples, + hop_size_samples: (window_size_samples / 2).max(1), + min_onset_strength: 0.08, + min_gap_frames: 2, + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct BeatOnset { + pub frame: u64, + pub strength: f32, +} + +pub fn detect_beats(samples: &[f32], config: BeatDetectionConfig) -> Vec { + if samples.is_empty() || config.sample_rate == 0 || !config.fps.is_finite() || config.fps <= 0.0 + { + return Vec::new(); + } + + let window = config.window_size_samples.max(1); + let hop = config.hop_size_samples.max(1); + let energies = window_energies(samples, window, hop); + if energies.len() < 2 { + return Vec::new(); + } + + let peak_delta = energies + .windows(2) + .map(|pair| (pair[1] - pair[0]).max(0.0)) + .fold(0.0f32, f32::max); + if peak_delta <= f32::EPSILON { + return Vec::new(); + } + + let mut beats = Vec::new(); + let mut last_frame = None; + for i in 1..energies.len() { + let delta = (energies[i] - energies[i - 1]).max(0.0); + let strength = delta / peak_delta; + if strength < config.min_onset_strength { + continue; + } + + let frame = sample_to_frame(i * hop, config.sample_rate, config.fps); + if last_frame.is_some_and(|last| frame < last + config.min_gap_frames) { + continue; + } + + beats.push(BeatOnset { frame, strength }); + last_frame = Some(frame); + } + beats +} + +fn window_energies(samples: &[f32], window: usize, hop: usize) -> Vec { + let mut out = Vec::new(); + let mut start = 0; + while start < samples.len() { + let end = (start + window).min(samples.len()); + let slice = &samples[start..end]; + let mut sum = 0.0f64; + for &sample in slice { + let sample = sample as f64; + sum += sample * sample; + } + out.push((sum / slice.len() as f64) as f32); + start += hop; + } + out +} + +fn sample_to_frame(sample: usize, sample_rate: u32, fps: f64) -> u64 { + ((sample as f64 / sample_rate as f64) * fps) + .floor() + .max(0.0) as u64 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn pulse_audio_detects_beat_frame_with_strength() { + let mut samples = vec![0.0f32; 1_000]; + for sample in &mut samples[500..530] { + *sample = 1.0; + } + + let config = BeatDetectionConfig { + sample_rate: 1_000, + fps: 10.0, + window_size_samples: 100, + hop_size_samples: 100, + min_onset_strength: 0.05, + min_gap_frames: 1, + }; + + let beats = detect_beats(&samples, config); + + let beat = beats + .iter() + .find(|beat| beat.frame == 5) + .expect("pulse should produce a beat on frame 5"); + assert!(beat.strength > 0.0); + } +} diff --git a/crates/opentake-media/src/analysis/mod.rs b/crates/opentake-media/src/analysis/mod.rs new file mode 100644 index 0000000..dce2342 --- /dev/null +++ b/crates/opentake-media/src/analysis/mod.rs @@ -0,0 +1,12 @@ +//! Lightweight offline media analysis primitives. + +pub mod autocrop; +pub mod beat; +pub mod silence; + +pub use autocrop::{ + detect_autocrop, AutocropConfig, AutocropPlan, CropRect, CropTransform, FrameBuffer, + PixelFormat, +}; +pub use beat::{detect_beats, BeatDetectionConfig, BeatOnset}; +pub use silence::{detect_silences, SilenceDetectionConfig, SilenceRange}; diff --git a/crates/opentake-media/src/analysis/silence.rs b/crates/opentake-media/src/analysis/silence.rs new file mode 100644 index 0000000..a935228 --- /dev/null +++ b/crates/opentake-media/src/analysis/silence.rs @@ -0,0 +1,126 @@ +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct SilenceDetectionConfig { + pub sample_rate: u32, + pub fps: f64, + pub window_size_samples: usize, + pub hop_size_samples: usize, + pub rms_threshold: f32, + pub min_silence_frames: u64, +} + +impl SilenceDetectionConfig { + pub fn with_window(sample_rate: u32, fps: f64, window_size_samples: usize) -> Self { + let window_size_samples = window_size_samples.max(1); + SilenceDetectionConfig { + sample_rate, + fps, + window_size_samples, + hop_size_samples: (window_size_samples / 2).max(1), + rms_threshold: 0.01, + min_silence_frames: 1, + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct SilenceRange { + pub start_frame: u64, + pub end_frame: u64, +} + +pub fn detect_silences(samples: &[f32], config: SilenceDetectionConfig) -> Vec { + if samples.is_empty() || config.sample_rate == 0 || !config.fps.is_finite() || config.fps <= 0.0 + { + return Vec::new(); + } + + let window = config.window_size_samples.max(1); + let hop = config.hop_size_samples.max(1); + let mut ranges = Vec::new(); + let mut active_start = None; + let mut active_end = 0usize; + let mut start = 0usize; + + while start < samples.len() { + let end = (start + window).min(samples.len()); + let silent = rms(&samples[start..end]) <= config.rms_threshold; + if silent { + active_start.get_or_insert(start); + active_end = end; + } else if let Some(silence_start) = active_start.take() { + push_range(&mut ranges, silence_start, active_end, &config); + } + start += hop; + } + + if let Some(silence_start) = active_start { + push_range(&mut ranges, silence_start, active_end, &config); + } + + ranges +} + +fn push_range( + ranges: &mut Vec, + start_sample: usize, + end_sample: usize, + config: &SilenceDetectionConfig, +) { + let start_frame = sample_to_frame(start_sample, config.sample_rate, config.fps); + let mut end_frame = sample_to_frame(end_sample, config.sample_rate, config.fps); + if end_frame <= start_frame { + end_frame = start_frame + 1; + } + if end_frame - start_frame >= config.min_silence_frames { + ranges.push(SilenceRange { + start_frame, + end_frame, + }); + } +} + +fn rms(samples: &[f32]) -> f32 { + let mut sum = 0.0f64; + for &sample in samples { + let sample = sample as f64; + sum += sample * sample; + } + (sum / samples.len() as f64).sqrt() as f32 +} + +fn sample_to_frame(sample: usize, sample_rate: u32, fps: f64) -> u64 { + ((sample as f64 / sample_rate as f64) * fps) + .floor() + .max(0.0) as u64 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn alternating_audio_detects_half_open_silence_range() { + let mut samples = vec![0.5f32; 300]; + samples.extend(std::iter::repeat_n(0.0f32, 400)); + samples.extend(std::iter::repeat_n(0.5f32, 300)); + + let config = SilenceDetectionConfig { + sample_rate: 1_000, + fps: 10.0, + window_size_samples: 100, + hop_size_samples: 100, + rms_threshold: 0.01, + min_silence_frames: 2, + }; + + let ranges = detect_silences(&samples, config); + + assert_eq!( + ranges, + vec![SilenceRange { + start_frame: 3, + end_frame: 7, + }] + ); + } +} diff --git a/crates/opentake-media/src/lib.rs b/crates/opentake-media/src/lib.rs index 5f8b76c..0d96c51 100644 --- a/crates/opentake-media/src/lib.rs +++ b/crates/opentake-media/src/lib.rs @@ -26,6 +26,7 @@ mod ff; +pub mod analysis; pub mod cache_key; pub mod decode; pub mod encode; diff --git a/crates/opentake-ops/src/command.rs b/crates/opentake-ops/src/command.rs index 35fe6e6..1f15b0e 100644 --- a/crates/opentake-ops/src/command.rs +++ b/crates/opentake-ops/src/command.rs @@ -175,6 +175,10 @@ pub enum KeyframePayload { pub enum EditCommand { /// Overwrite-place clips (clears each destination range first). AddClips { entries: Vec }, + /// Overwrite-place clips on fresh shared tracks chosen by media type. + /// Visual entries share one new visual track; audio entries share one new + /// audio track. Track insertion and placement commit as one transaction. + AddClipsAutoTrack { entries: Vec }, /// Ripple-insert clips at `at_frame`, pushing later clips right. InsertClips { track_index: usize, @@ -370,6 +374,7 @@ pub fn apply( } EditCommand::AddClips { entries } => add_clips(state, entries, ids), + EditCommand::AddClipsAutoTrack { entries } => add_clips_auto_track(state, entries, ids), EditCommand::InsertClips { track_index, at_frame, @@ -542,6 +547,79 @@ fn add_clips( ) } +fn add_clips_auto_track( + state: &mut EditorState, + entries: Vec, + ids: &dyn IdGen, +) -> Result { + if entries.is_empty() { + return Err(EditError::Invalid( + "Missing or empty 'entries' array".into(), + )); + } + for (i, e) in entries.iter().enumerate() { + validate_auto_track_entry(e, i)?; + } + let has_visual = entries + .iter() + .any(|entry| entry.source_clip_type != ClipType::Audio); + let has_audio = entries + .iter() + .any(|entry| entry.source_clip_type == ClipType::Audio); + let action_name = if entries.len() == 1 { + "Add Clip" + } else { + "Add Clips" + }; + transact( + state, + action_name, + |added| format!("Added {} clip(s): {}", added.len(), added.join(", ")), + |st| { + let visual_track_index = has_visual.then(|| { + let at = st.timeline.tracks.len(); + ops::insert_track(&mut st.timeline, at, ClipType::Video, ids) + }); + let audio_track_index = has_audio.then(|| { + let at = st.timeline.tracks.len(); + ops::insert_track(&mut st.timeline, at, ClipType::Audio, ids) + }); + let mut placed = Vec::new(); + for entry in &entries { + let track_index = if entry.source_clip_type == ClipType::Audio { + audio_track_index + } else { + visual_track_index + } + .expect("validated required track kind above"); + let mut entry = entry.clone(); + entry.track_index = track_index; + let track_id = st.timeline.tracks[track_index].id.clone(); + if let Some(ti) = st.track_index(&track_id) { + ops::clear_region( + &mut st.timeline, + ti, + entry.start_frame, + entry.start_frame + entry.duration_frames, + false, + ids, + ); + } + if let Some(ti) = st.track_index(&track_id) { + placed.extend(ops::place_clip( + &mut st.timeline, + &entry.to_spec(), + ti, + None, + ids, + )); + } + } + Ok(placed) + }, + ) +} + fn insert_track_cmd( state: &mut EditorState, kind: ClipType, @@ -2023,6 +2101,46 @@ fn validate_entry(state: &EditorState, e: &ClipEntry, i: usize) -> Result<(), Ed Ok(()) } +fn validate_auto_track_entry(e: &ClipEntry, i: usize) -> Result<(), EditError> { + let target = if e.source_clip_type == ClipType::Audio { + ClipType::Audio + } else { + ClipType::Video + }; + if !e.source_clip_type.is_compatible(target) { + return Err(EditError::Invalid(format!( + "entries[{i}]: asset type is not compatible with an auto-created track" + ))); + } + if e.duration_frames < 1 { + return Err(EditError::Invalid(format!( + "entries[{i}]: durationFrames must be >= 1 (got {})", + e.duration_frames + ))); + } + if e.start_frame < 0 { + return Err(EditError::Invalid(format!( + "entries[{i}]: startFrame must be >= 0 (got {})", + e.start_frame + ))); + } + if let Some(t) = e.trim_start_frame { + if t < 0 { + return Err(EditError::Invalid(format!( + "entries[{i}]: trimStartFrame must be >= 0 (got {t})" + ))); + } + } + if let Some(t) = e.trim_end_frame { + if t < 0 { + return Err(EditError::Invalid(format!( + "entries[{i}]: trimEndFrame must be >= 0 (got {t})" + ))); + } + } + Ok(()) +} + fn empty_to_none( track: opentake_domain::KeyframeTrack, ) -> Option> { diff --git a/crates/opentake-ops/src/intent.rs b/crates/opentake-ops/src/intent.rs new file mode 100644 index 0000000..77a1277 --- /dev/null +++ b/crates/opentake-ops/src/intent.rs @@ -0,0 +1,301 @@ +//! High-level editing intents normalized into existing [`EditCommand`] values. +//! +//! This layer is deliberately thin: it performs preflight validation and expands +//! convenience intents (for example "add clips, creating a compatible track if +//! needed") into commands. It never mutates [`EditorState`] and never bypasses +//! [`crate::command::apply`]. + +use opentake_domain::{Clip, ClipType, Crop, Timeline, Transform}; + +use crate::command::{ClipEntry, ClipProperties, EditCommand, EditError}; +use crate::engines::FrameRange; +use crate::ops::{self, TrimEdge}; + +/// Preflight output for a high-level edit intent. +#[derive(Clone, Debug)] +pub struct EditPlan { + pub label: String, + pub commands: Vec, + pub warnings: Vec, +} + +impl EditPlan { + fn new(label: impl Into, commands: Vec) -> Self { + EditPlan { + label: label.into(), + commands, + warnings: Vec::new(), + } + } +} + +/// Clip placement intent. `track_index = None` means "pick or create a shared +/// compatible track" during preflight. +#[derive(Clone, Debug)] +pub struct IntentClipEntry { + pub media_ref: String, + pub media_type: ClipType, + pub source_clip_type: ClipType, + pub track_index: Option, + pub start_frame: i32, + pub duration_frames: i32, + pub trim_start_frame: Option, + pub trim_end_frame: Option, + pub has_audio: bool, + pub add_linked_audio: bool, + pub transform: Option, +} + +impl IntentClipEntry { + fn into_entry(self, track_index: usize) -> ClipEntry { + ClipEntry { + media_ref: self.media_ref, + media_type: self.media_type, + source_clip_type: self.source_clip_type, + track_index, + start_frame: self.start_frame, + duration_frames: self.duration_frames, + trim_start_frame: self.trim_start_frame, + trim_end_frame: self.trim_end_frame, + has_audio: self.has_audio, + add_linked_audio: self.add_linked_audio, + transform: self.transform, + } + } +} + +/// Add clips to explicitly provided tracks, or pick/create shared compatible +/// tracks when every entry omits `track_index`. +pub fn plan_auto_track_add( + timeline: &Timeline, + entries: Vec, +) -> Result { + if entries.is_empty() { + return Err(EditError::Invalid( + "Missing or empty intent entries".to_string(), + )); + } + + let provided = entries.iter().filter(|e| e.track_index.is_some()).count(); + if provided != 0 && provided != entries.len() { + return Err(EditError::Invalid( + "Either provide trackIndex for every entry or omit it for every entry".to_string(), + )); + } + + for (i, entry) in entries.iter().enumerate() { + validate_intent_entry(timeline, entry, i)?; + } + + if provided == entries.len() { + let add_entries = entries + .into_iter() + .map(|entry| { + let track_index = entry.track_index.expect("validated above"); + entry.into_entry(track_index) + }) + .collect(); + return Ok(EditPlan::new( + "auto_track_add", + vec![EditCommand::AddClips { + entries: add_entries, + }], + )); + } + + Ok(EditPlan::new( + "auto_track_add", + vec![EditCommand::AddClipsAutoTrack { + entries: entries + .into_iter() + .map(|entry| entry.into_entry(0)) + .collect(), + }], + )) +} + +/// Plan a CapCut-style trim to playhead for specific clips. +pub fn plan_trim_to_playhead( + timeline: &Timeline, + clip_ids: &[String], + frame: i32, + edge: TrimEdge, +) -> Result { + if clip_ids.is_empty() { + return Err(EditError::Invalid( + "Missing or empty clipIds array".to_string(), + )); + } + + let mut edits = Vec::new(); + for id in clip_ids { + let clip = find_clip(timeline, id) + .ok_or_else(|| EditError::Invalid(format!("Clip not found: {id}")))?; + if frame <= clip.start_frame || frame >= clip.end_frame() { + continue; + } + let raw_delta = match edge { + TrimEdge::Left => frame - clip.start_frame, + TrimEdge::Right => frame - clip.end_frame(), + }; + let delta = clamp_trim_delta(clip, edge, raw_delta); + if delta == 0 { + continue; + } + let speed = if clip.speed > 0.0 { clip.speed } else { 1.0 }; + let (trim_start, trim_end) = ops::trim_values( + clip.media_type, + speed, + clip.trim_start_frame, + clip.trim_end_frame, + edge, + delta, + ); + edits.push((id.clone(), trim_start, trim_end)); + } + + if edits.is_empty() { + let mut plan = EditPlan::new("trim_to_playhead", Vec::new()); + plan.warnings + .push("No clips intersect the playhead frame".to_string()); + return Ok(plan); + } + + Ok(EditPlan::new( + "trim_to_playhead", + vec![EditCommand::TrimClips { edits }], + )) +} + +/// Plan a single half-open project-frame ripple range delete on one track. +pub fn plan_ripple_delete_range( + track_index: usize, + start_frame: i32, + end_frame: i32, +) -> Result { + if end_frame <= start_frame { + return Err(EditError::Invalid(format!( + "range end must be greater than start ({start_frame}..{end_frame})" + ))); + } + Ok(EditPlan::new( + "ripple_delete_range", + vec![EditCommand::RippleDeleteRanges { + track_index, + ranges: vec![FrameRange::new(start_frame, end_frame)], + }], + )) +} + +/// Place clips at beat frames, then use the same auto-track planning as manual +/// placement. +pub fn plan_beat_sync_placement( + timeline: &Timeline, + entries: Vec, + beat_frames: &[i32], +) -> Result { + if beat_frames.len() < entries.len() { + return Err(EditError::Invalid(format!( + "Need at least {} beat frame(s), got {}", + entries.len(), + beat_frames.len() + ))); + } + let beat_entries = entries + .into_iter() + .zip(beat_frames.iter().copied()) + .map(|(mut entry, beat)| { + entry.start_frame = beat; + entry + }) + .collect(); + let mut plan = plan_auto_track_add(timeline, beat_entries)?; + plan.label = "beat_sync_placement".to_string(); + Ok(plan) +} + +/// Apply a smart-reframe crop/transform proposal to clips through +/// `SetClipProperties`. +pub fn plan_smart_reframe( + clip_ids: &[String], + crop: Crop, + transform: Option, +) -> Result { + if clip_ids.is_empty() { + return Err(EditError::Invalid( + "Missing or empty clipIds array".to_string(), + )); + } + Ok(EditPlan::new( + "smart_reframe", + vec![EditCommand::SetClipProperties { + clip_ids: clip_ids.to_vec(), + properties: ClipProperties { + crop: Some(crop), + transform, + ..Default::default() + }, + }], + )) +} + +fn validate_intent_entry( + timeline: &Timeline, + entry: &IntentClipEntry, + index: usize, +) -> Result<(), EditError> { + if entry.duration_frames < 1 { + return Err(EditError::Invalid(format!( + "entries[{index}]: durationFrames must be >= 1 (got {})", + entry.duration_frames + ))); + } + if entry.start_frame < 0 { + return Err(EditError::Invalid(format!( + "entries[{index}]: startFrame must be >= 0 (got {})", + entry.start_frame + ))); + } + if let Some(trim) = entry.trim_start_frame { + if trim < 0 { + return Err(EditError::Invalid(format!( + "entries[{index}]: trimStartFrame must be >= 0 (got {trim})" + ))); + } + } + if let Some(trim) = entry.trim_end_frame { + if trim < 0 { + return Err(EditError::Invalid(format!( + "entries[{index}]: trimEndFrame must be >= 0 (got {trim})" + ))); + } + } + if let Some(track_index) = entry.track_index { + let Some(track) = timeline.tracks.get(track_index) else { + return Err(EditError::Invalid(format!( + "entries[{index}]: track index {track_index} out of range" + ))); + }; + if !entry.source_clip_type.is_compatible(track.kind) { + return Err(EditError::Invalid(format!( + "entries[{index}]: asset type is not compatible with the destination track" + ))); + } + } + Ok(()) +} + +fn find_clip<'a>(timeline: &'a Timeline, clip_id: &str) -> Option<&'a Clip> { + timeline + .tracks + .iter() + .flat_map(|track| track.clips.iter()) + .find(|clip| clip.id == clip_id) +} + +fn clamp_trim_delta(clip: &Clip, edge: TrimEdge, raw_delta: i32) -> i32 { + match edge { + TrimEdge::Left => raw_delta.clamp(0, clip.duration_frames - 1), + TrimEdge::Right => raw_delta.clamp(-(clip.duration_frames - 1), 0), + } +} diff --git a/crates/opentake-ops/src/lib.rs b/crates/opentake-ops/src/lib.rs index a49f098..cbb1f21 100644 --- a/crates/opentake-ops/src/lib.rs +++ b/crates/opentake-ops/src/lib.rs @@ -20,6 +20,7 @@ pub mod command; pub mod editor_state; pub mod engines; pub mod id; +pub mod intent; pub mod ops; // --- Pure engines --- diff --git a/crates/opentake-ops/tests/command_apply.rs b/crates/opentake-ops/tests/command_apply.rs index 4eb719a..17191af 100644 --- a/crates/opentake-ops/tests/command_apply.rs +++ b/crates/opentake-ops/tests/command_apply.rs @@ -141,6 +141,34 @@ fn add_clips_rejects_incompatible_type() { assert!(matches!(err, EditError::Invalid(_))); } +#[test] +fn add_clips_auto_track_mixed_audio_video_is_one_undoable_transaction() { + let mut st = state(vec![]); + let g = SeqIdGen::new("n-"); + let res = apply( + &mut st, + EditCommand::AddClipsAutoTrack { + entries: vec![ + entry(0, ClipType::Audio, 0, 30), + entry(0, ClipType::Video, 10, 20), + ], + }, + &g, + ) + .unwrap(); + + assert!(res.changed); + assert_eq!(st.timeline.tracks.len(), 2); + assert_eq!(st.timeline.tracks[0].kind, ClipType::Video); + assert_eq!(st.timeline.tracks[1].kind, ClipType::Audio); + assert_eq!(st.timeline.tracks[0].clips[0].media_type, ClipType::Video); + assert_eq!(st.timeline.tracks[1].clips[0].media_type, ClipType::Audio); + assert_eq!(st.undo_depth(), 1); + + apply(&mut st, EditCommand::Undo, &g).unwrap(); + assert!(st.timeline.tracks.is_empty()); +} + // ---- split + keyframes ---------------------------------------------------- #[test] diff --git a/crates/opentake-ops/tests/intent_planner.rs b/crates/opentake-ops/tests/intent_planner.rs new file mode 100644 index 0000000..bd4ac86 --- /dev/null +++ b/crates/opentake-ops/tests/intent_planner.rs @@ -0,0 +1,192 @@ +use opentake_domain::{Clip, ClipType, Crop, Timeline, Track, Transform}; +use opentake_ops::intent::{ + plan_auto_track_add, plan_beat_sync_placement, plan_ripple_delete_range, plan_smart_reframe, + plan_trim_to_playhead, IntentClipEntry, +}; +use opentake_ops::{EditCommand, TrimEdge}; + +fn clip(id: &str, start: i32, dur: i32) -> Clip { + Clip::new(id, "asset", start, dur) +} + +fn track(id: &str, kind: ClipType, clips: Vec) -> Track { + let mut t = Track::new(id, kind); + t.clips = clips; + t +} + +fn intent_entry(track_index: Option, media_type: ClipType, start: i32) -> IntentClipEntry { + IntentClipEntry { + media_ref: "asset".into(), + media_type, + source_clip_type: media_type, + track_index, + start_frame: start, + duration_frames: 30, + trim_start_frame: None, + trim_end_frame: None, + has_audio: false, + add_linked_audio: false, + transform: None, + } +} + +#[test] +fn auto_track_add_on_empty_timeline_plans_insert_then_add_on_new_track() { + let timeline = Timeline::new(); + + let plan = plan_auto_track_add(&timeline, vec![intent_entry(None, ClipType::Video, 12)]) + .expect("auto-track add plan"); + + assert_eq!(plan.label, "auto_track_add"); + assert!(plan.warnings.is_empty()); + assert_eq!(plan.commands.len(), 1); + match plan.commands[0].clone() { + EditCommand::AddClipsAutoTrack { entries } => { + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].track_index, 0); + assert_eq!(entries[0].start_frame, 12); + } + other => panic!("expected AddClipsAutoTrack, got {other:?}"), + } +} + +#[test] +fn auto_track_add_with_explicit_track_index_uses_add_clips() { + let mut timeline = Timeline::new(); + timeline.tracks = vec![ + track("v1", ClipType::Video, vec![]), + track("a1", ClipType::Audio, vec![]), + ]; + + let plan = plan_auto_track_add(&timeline, vec![intent_entry(Some(0), ClipType::Image, 0)]) + .expect("auto-track add plan"); + + assert_eq!(plan.commands.len(), 1); + match plan.commands[0].clone() { + EditCommand::AddClips { entries } => { + assert_eq!(entries[0].track_index, 0); + assert_eq!(entries[0].source_clip_type, ClipType::Image); + } + other => panic!("expected AddClips, got {other:?}"), + } +} + +#[test] +fn auto_track_add_mixed_audio_video_uses_atomic_command_without_precomputed_indexes() { + let timeline = Timeline::new(); + + let plan = plan_auto_track_add( + &timeline, + vec![ + intent_entry(None, ClipType::Audio, 0), + intent_entry(None, ClipType::Video, 10), + ], + ) + .expect("auto-track add plan"); + + assert_eq!(plan.commands.len(), 1); + match plan.commands[0].clone() { + EditCommand::AddClipsAutoTrack { entries } => { + assert_eq!(entries.len(), 2); + assert_eq!(entries[0].source_clip_type, ClipType::Audio); + assert_eq!(entries[1].source_clip_type, ClipType::Video); + } + other => panic!("expected AddClipsAutoTrack, got {other:?}"), + } +} + +#[test] +fn trim_to_playhead_plans_source_frame_trim_for_left_edge() { + let mut timeline = Timeline::new(); + let mut c = clip("c1", 100, 60); + c.trim_start_frame = 5; + timeline.tracks.push(track("v1", ClipType::Video, vec![c])); + + let plan = plan_trim_to_playhead(&timeline, &["c1".to_string()], 130, TrimEdge::Left) + .expect("trim plan"); + + match plan.commands[0].clone() { + EditCommand::TrimClips { edits } => { + assert_eq!(edits, vec![("c1".to_string(), 35, 0)]); + } + other => panic!("expected TrimClips, got {other:?}"), + } +} + +#[test] +fn ripple_delete_range_plans_half_open_range_command() { + let plan = plan_ripple_delete_range(2, 10, 25).expect("ripple plan"); + + match plan.commands[0].clone() { + EditCommand::RippleDeleteRanges { + track_index, + ranges, + } => { + assert_eq!(track_index, 2); + assert_eq!(ranges[0].start, 10); + assert_eq!(ranges[0].end, 25); + } + other => panic!("expected RippleDeleteRanges, got {other:?}"), + } +} + +#[test] +fn beat_sync_placement_sets_entry_start_frames_from_beats_then_auto_tracks() { + let timeline = Timeline::new(); + + let plan = plan_beat_sync_placement( + &timeline, + vec![ + intent_entry(None, ClipType::Video, 999), + intent_entry(None, ClipType::Video, 999), + ], + &[12, 42], + ) + .expect("beat plan"); + + assert_eq!(plan.commands.len(), 1); + match plan.commands[0].clone() { + EditCommand::AddClipsAutoTrack { entries } => { + assert_eq!(entries[0].start_frame, 12); + assert_eq!(entries[1].start_frame, 42); + assert_eq!(entries[0].track_index, 0); + assert_eq!(entries[1].track_index, 0); + } + other => panic!("expected AddClipsAutoTrack, got {other:?}"), + } +} + +#[test] +fn smart_reframe_plans_crop_and_transform_properties() { + let crop = Crop { + left: 0.1, + top: 0.0, + right: 0.1, + bottom: 0.0, + }; + let transform = Transform { + center_x: 0.5, + center_y: 0.5, + width: 0.75, + height: 1.0, + rotation: 0.0, + flip_horizontal: false, + flip_vertical: false, + }; + + let plan = + plan_smart_reframe(&["c1".to_string()], crop, Some(transform)).expect("reframe plan"); + + match plan.commands[0].clone() { + EditCommand::SetClipProperties { + clip_ids, + properties, + } => { + assert_eq!(clip_ids, vec!["c1".to_string()]); + assert_eq!(properties.crop, Some(crop)); + assert_eq!(properties.transform, Some(transform)); + } + other => panic!("expected SetClipProperties, got {other:?}"), + } +} diff --git a/docs/DOS/EDITING-AUTOMATION-DOS.md b/docs/DOS/EDITING-AUTOMATION-DOS.md new file mode 100644 index 0000000..fece5a8 --- /dev/null +++ b/docs/DOS/EDITING-AUTOMATION-DOS.md @@ -0,0 +1,65 @@ +# Editing Automation DOS + +This document is the shared technical contract for automation features that edit an OpenTake timeline. It is deliberately narrow: reuse the current command path, keep the editing engine authoritative, and put automation-specific analysis outside `opentake-ops`. + +## Current Baseline + +Use these documents as current baseline: [Editing engine plan](../EDITING-ENGINE-PLAN.md), [CapCut gap report](../CAPCUT-GAP.md), [Agent context signal](../AGENT-CONTEXT-SIGNAL.md), [Workflow plugin system](../WORKFLOW-PLUGIN-SYSTEM.md), [Module port map](../MODULE-PORT-MAP.md), [Known bugs](../BUGS.md), and specs: [agent](../specs/agent-SPEC.md), [core](../specs/core-SPEC.md), [frontend UI](../specs/frontend-UI-1to1-SPEC.md), [media](../specs/media-SPEC.md), [render](../specs/render-SPEC.md), [gen](../specs/gen-SPEC.md). + +[PORT-1TO1-GAP.md](../PORT-1TO1-GAP.md) is historical reference only, not current fact. + +## Design Rule + +Automation may analyze media, propose edits, and build commands. It must not bypass the edit transaction path or mutate timeline mirrors directly. + +Authoritative UI chain: + +`TimelineContainer/Inspector/Toolbar` -> `web/src/store/editActions.ts` -> `web/src/lib/api.ts editApply()` -> `src-tauri/src/commands.rs edit_apply` -> `AppCore::apply()` -> `opentake-ops::EditCommand` -> `ops/*` -> `timeline_changed` -> `sync.ts`. + +Authoritative MCP/Agent chain: + +`Dispatcher::dispatch()` -> short-id expansion -> typed args -> `EditCommand` -> `CoreHandle::apply()` -> `context_signal` -> short-id shortening. + +Swift alignment chain: + +`EditorViewModel` gesture methods -> `withTimelineSwap` -> `OverwriteEngine/RippleEngine/SnapEngine` -> `Timeline/Clip` pure value model. + +## Core Invariants + +- Intervals are half-open. Clip occupancy is `[startFrame, endFrame)`, where `endFrame = startFrame + durationFrames`. +- Keyframe storage is clip-relative. Incoming Agent/UI frames that are timeline absolute must be converted at the command boundary. +- `trimStartFrame` and `trimEndFrame` are source-frame trims. They are not timeline coordinates. +- Speed math consumes source frames as `round(durationFrames * speed)`. Any derived v1 automation must avoid inventing alternate frame math. +- Linked group sync is preserved for trim, move, split, delete, and ripple unless the command is `Unlink`. +- Track partition is structural: visual tracks are `[0, firstAudioIndex)`, audio tracks are `[firstAudioIndex, trackCount)`. +- Every edit is one atomic `EditCommand` transaction. If analysis cannot produce a valid command, return a suggestion or error, not a partial edit. + +## Automation Surfaces + +The v1 editing automation set is: + +- `smart_reframe`: compute crop/transform changes for aspect adaptation, black-bar removal, and stable subject framing. +- `detect_beats`: read audio PCM and return beat/onset candidates without changing the timeline. +- `auto_cut_to_beats`: align selected clips or media ranges to beat candidates through existing edit commands. +- `tighten_silences`: find low-energy gaps and produce ripple delete ranges. +- `remove_filler_words`: disabled until timeline transcript tooling is truly wired; it depends on word-level transcript frames. + +## Scope Boundaries + +Automatic crop v1 covers smart reframe, black-bar removal, and aspect-ratio adaptation. It does not include ML face tracking. + +Automatic music beat sync v1 uses PCM energy/onset detection. It must not add heavy ML or FFT dependencies. If later work needs spectral methods, add them as an explicit v2 design with a dependency and performance budget. + +Agent tools may suggest edits without applying them. A write path must apply via `EditCommand` only. + +Current MCP status: `detect_beats`, `auto_cut_to_beats`, and `tighten_silences` are typed tools backed by `CoreHandle::extract_analysis_pcm`, so they can produce PCM-based frame hints and candidate edit commands without mutating the timeline. `smart_reframe` is still a typed preflight surface that returns a vision-backend diagnostic until sampled-frame/saliency access is exposed. `remove_filler_words` remains disabled until transcript access is truly wired. + +## Failure Semantics + +- No media decode: return a structured diagnostic and no edit. +- Ambiguous short IDs: fail before typed args or command creation. +- Analysis confidence below threshold: return suggestions, not writes. +- Transcript unavailable: `remove_filler_words` remains unavailable; `tighten_silences` can still use PCM energy. +- `ripple_delete_ranges` accepts exactly one of `trackIndex` or `clipId`. `units="frames"` is the default; `units="seconds"` is valid only with `clipId` and is converted through the timeline fps plus the clip's source-frame trim/speed mapping before producing half-open project-frame ranges. +- `add_clips` with omitted `trackIndex` must route through one atomic auto-track `EditCommand`; track creation and clip placement must undo together. +- `swapMedia` consumes only `clipId` + `mediaRef`. Frontend types and wrappers must not expose duration/type/trim options unless the backend starts consuming them. diff --git a/docs/DOS/EDITING-AUTOMATION/acceptance-tests.md b/docs/DOS/EDITING-AUTOMATION/acceptance-tests.md new file mode 100644 index 0000000..2ed8568 --- /dev/null +++ b/docs/DOS/EDITING-AUTOMATION/acceptance-tests.md @@ -0,0 +1,58 @@ +# Editing Automation Acceptance Tests + +## Purpose + +Define acceptance checks for the DOS docs and the future implementation they describe. These tests are contract-level; implementation workers should add concrete unit, integration, and E2E tests in their crates. + +Parent contract: [Editing Automation DOS](../EDITING-AUTOMATION-DOS.md). Source baseline: [Editing engine plan](../../EDITING-ENGINE-PLAN.md), [Known bugs](../../BUGS.md), [agent spec](../../specs/agent-SPEC.md), [core spec](../../specs/core-SPEC.md), [media spec](../../specs/media-SPEC.md), [render spec](../../specs/render-SPEC.md). [PORT-1TO1-GAP.md](../../PORT-1TO1-GAP.md) is historical reference only. + +## Documentation Checks + +- Every DOS Markdown link resolves locally. +- The UI call chain is present exactly: + `TimelineContainer/Inspector/Toolbar` -> `web/src/store/editActions.ts` -> `web/src/lib/api.ts editApply()` -> `src-tauri/src/commands.rs edit_apply` -> `AppCore::apply()` -> `opentake-ops::EditCommand` -> `ops/*` -> `timeline_changed` -> `sync.ts`. +- The MCP/Agent call chain is present exactly: + `Dispatcher::dispatch()` -> short-id expansion -> typed args -> `EditCommand` -> `CoreHandle::apply()` -> `context_signal` -> short-id shortening. +- The Swift alignment chain is present exactly: + `EditorViewModel` gesture methods -> `withTimelineSwap` -> `OverwriteEngine/RippleEngine/SnapEngine` -> `Timeline/Clip` pure value model. +- [PORT-1TO1-GAP.md](../../PORT-1TO1-GAP.md) is marked historical reference only. + +## Shared Implementation Checks + +- `write=false` automation tools return proposals and do not call `CoreHandle::apply()`. +- `write=true` tools call exactly one atomic `EditCommand` transaction per user action. +- Validation failure leaves the document unchanged and emits no `timeline_changed`. +- Successful writes emit `timeline_changed`, then `sync.ts` refreshes the read-only mirror. +- Short IDs are expanded before typed args and shortened after `context_signal`. +- Linked group sync is preserved for every write. +- Visual/audio track partition is preserved. + +## Smart Reframe Checks + +- Landscape source to vertical timeline writes only crop/transform properties. +- Stable letterbox bars are removed; unstable dark content is not treated as bars. +- Audio-only clips are rejected without mutation. +- Clip-relative crop keyframes stay within `[0, durationFrames]`. +- Undo restores the exact prior crop/transform state. + +## Beat Sync Checks + +- Synthetic click or pulse audio yields expected beat frames within a defined tolerance. +- Low-energy speech does not generate dense montage beats. +- Beat detection is read-only. +- `auto_cut_to_beats` preserves linked A/V sync. +- V1 implementation uses PCM energy/onset and does not add heavy ML or FFT dependencies. + +## Agent / Workflow Checks + +- `detect_beats`, `auto_cut_to_beats`, `smart_reframe`, and `tighten_silences` are visible in tool metadata when implemented. +- `remove_filler_words` reports unavailable until word-level transcript is wired to timeline frames. +- Active workflow plugin roles affect tool target selection. +- Plugin rules appear in `context_signal` warnings without suppressing built-in warnings. +- Agent `ripple_delete_ranges` rejects calls that pass both `trackIndex` and `clipId`, accepts `clipId + units=seconds`, and emits half-open project-frame ranges after fps/source-trim conversion. +- Agent `add_clips` with omitted `trackIndex` creates shared auto tracks and clips in one undoable transaction; one `undo` removes both clips and auto-created tracks. +- PCM-backed MCP tools return deterministic preview data: `detect_beats` returns beat frame hints, `auto_cut_to_beats` returns beat/cut/placement suggestions, and `tighten_silences` returns `ripple_delete_ranges` candidate commands without mutating the timeline. `smart_reframe` still returns a deterministic vision-backend diagnostic until sampled-frame analysis is wired. + +## Minimum Local Verification + +Run a local Markdown link existence check over `docs/DOS/**/*.md`. This does not prove implementation behavior, but it prevents stale cross-document references in the DOS set. diff --git a/docs/DOS/EDITING-AUTOMATION/agent-editing-suggestions.md b/docs/DOS/EDITING-AUTOMATION/agent-editing-suggestions.md new file mode 100644 index 0000000..ea0a5b3 --- /dev/null +++ b/docs/DOS/EDITING-AUTOMATION/agent-editing-suggestions.md @@ -0,0 +1,75 @@ +# Agent Editing Suggestions DOS + +## Purpose + +Define how the Agent proposes or applies editing automation without moving frame math into the LLM. + +Parent contract: [Editing Automation DOS](../EDITING-AUTOMATION-DOS.md). Source baseline: [Agent context signal](../../AGENT-CONTEXT-SIGNAL.md), [Workflow plugin system](../../WORKFLOW-PLUGIN-SYSTEM.md), [agent spec](../../specs/agent-SPEC.md), [core spec](../../specs/core-SPEC.md), [known bugs](../../BUGS.md). [PORT-1TO1-GAP.md](../../PORT-1TO1-GAP.md) is historical reference only. + +## Dispatcher Contract + +All Agent tools follow: + +`Dispatcher::dispatch()` -> short-id expansion -> typed args -> `EditCommand` -> `CoreHandle::apply()` -> `context_signal` -> short-id shortening. + +The Agent sees short IDs. The dispatcher expands them before typed args and shortens newly created IDs after `context_signal` attachment. + +## Tool Set + +V1 automation tools: + +- `detect_beats`: read-only, returns beat/onset candidates. +- `auto_cut_to_beats`: proposal or write mode, applies beat-aligned edit commands. +- `smart_reframe`: proposal or write mode, applies crop/transform commands. +- `tighten_silences`: detects low-energy PCM ranges and maps them to `RippleDeleteRanges`. + +Deferred: + +- `remove_filler_words`: depends on word-level `get_transcript` being truly wired through timeline frames. Until then, it must report unavailable rather than guessing from captions or segments. + +## Suggestion Shape + +Read-only suggestions should be structured: + +```text +{ + tool, + confidence, + proposedCommands, + affectedClipIds, + frameRanges, + warnings, + requiresTranscript?: boolean +} +``` + +`proposedCommands` must be valid `EditCommand` mirrors. The LLM can choose among proposals, but it should not hand-calculate clip-relative keyframes, source trims, or ripple shifts. + +## Context Signal + +Attach `context_signal` after every tool run. For automation, it should include: + +- inferred video type and workflow, for example `montage_beat` or `audio_driven`; +- track roles, especially `BGM`, `VoiceOver`, `MainCamera`, and `B_RollOverlay`; +- warnings such as "do not cut within a word" or "BGM beat detection was low confidence"; +- plugin-derived rules when a workflow is active. + +Workflow plugin rules are additive. Built-in signal rules still apply. + +## Ripple Range Contract + +`ripple_delete_ranges` must pass exactly one of `trackIndex` or `clipId`. `trackIndex` mode takes project-frame ranges only. `clipId` mode may use `units="frames"` or `units="seconds"`; seconds are converted to source frames with timeline fps and then mapped through clip trim/speed into project-frame half-open ranges. + +## Current Tool Availability + +The analysis-driven tool names are intentionally visible in MCP. `detect_beats`, `auto_cut_to_beats`, and `tighten_silences` validate args and use PCM analysis through the `CoreHandle` boundary to return preview data or candidate edit commands. `smart_reframe` validates args but still returns a vision-backend diagnostic until sampled-frame/saliency access is available. + +## Acceptance Hooks + +See [acceptance tests](acceptance-tests.md). Required checks: + +- ambiguous short ID fails before command execution; +- `write=false` never calls `CoreHandle::apply()`; +- successful writes return shortened IDs; +- `context_signal` survives both success and no-op proposal paths; +- `remove_filler_words` is unavailable until transcript is wired. diff --git a/docs/DOS/EDITING-AUTOMATION/auto-crop-smart-reframe.md b/docs/DOS/EDITING-AUTOMATION/auto-crop-smart-reframe.md new file mode 100644 index 0000000..42b04e1 --- /dev/null +++ b/docs/DOS/EDITING-AUTOMATION/auto-crop-smart-reframe.md @@ -0,0 +1,71 @@ +# Auto Crop / Smart Reframe DOS + +## Purpose + +Define v1 automatic framing for timeline clips. It should produce deterministic crop/transform edits through the shared edit path, not a separate render-only effect. + +Parent contract: [Editing Automation DOS](../EDITING-AUTOMATION-DOS.md). Source baseline: [Editing engine plan](../../EDITING-ENGINE-PLAN.md), [CapCut gap report](../../CAPCUT-GAP.md), [render spec](../../specs/render-SPEC.md), [frontend UI spec](../../specs/frontend-UI-1to1-SPEC.md), [known bugs](../../BUGS.md). [PORT-1TO1-GAP.md](../../PORT-1TO1-GAP.md) is historical reference only. + +## V1 Scope + +Included: + +- Smart reframe for aspect-ratio adaptation, for example 16:9 source to 9:16 or 1:1 timeline. +- Black-bar removal by detecting stable letterbox or pillarbox regions. +- Crop/transform output that remains inspectable and editable in the Inspector. +- Optional keyframe smoothing only when the subject window changes gradually and the result can stay clip-relative. + +Excluded: + +- ML face tracking. +- Multi-person identity tracking. +- Scene understanding that requires a remote model. +- Render-only dynamic crops that are invisible to `Timeline/Clip`. + +## Command Contract + +Recommended tool shape: + +```text +smart_reframe { + clipIds: string[], + targetAspect?: "timeline" | "9:16" | "16:9" | "1:1" | "4:5", + mode?: "fit" | "fill" | "remove_black_bars" | "stable_subject", + write?: boolean +} +``` + +`write=false` returns proposed `SetClipProperties` or `SetKeyframes` payloads. `write=true` applies through: + +`Dispatcher::dispatch()` -> short-id expansion -> typed args -> `EditCommand::SetClipProperties` or `EditCommand::SetKeyframes` -> `CoreHandle::apply()` -> `context_signal` -> short-id shortening. + +UI writes use the same chain: + +`TimelineContainer/Inspector/Toolbar` -> `web/src/store/editActions.ts` -> `web/src/lib/api.ts editApply()` -> `src-tauri/src/commands.rs edit_apply` -> `AppCore::apply()` -> `opentake-ops::EditCommand` -> `ops/*` -> `timeline_changed` -> `sync.ts`. + +## Algorithm Sketch + +1. Sample a bounded set of frames from each target clip after trim and speed mapping. +2. Detect black bars with edge luminance/variance tests. Require stability across sampled frames before modifying crop. +3. Estimate a content bounding box from non-bar pixels and motion/contrast energy. This is not face detection. +4. Convert desired visible source rectangle into `Crop` plus `Transform` using existing normalized coordinate semantics. +5. Smooth across samples only if resulting keyframes are sparse and clip-relative. +6. Apply as one atomic edit command per user action. + +## Invariants + +- Output must respect half-open clip intervals. +- Crop keyframes are clip-relative. +- Source trim remains source-frame trim; reframe must not change trim unless explicitly requested by another command. +- Linked audio partners must not receive visual crop/transform edits. +- The visual/audio track partition must not change. +- Undo must restore the exact prior timeline snapshot through the shared `EditCommand` transaction. + +## Acceptance Hooks + +See [acceptance tests](acceptance-tests.md). Minimum cases: + +- 16:9 landscape clip reframed to 9:16 without changing duration or trim. +- Letterboxed clip gets black bars cropped only when bars are stable. +- Audio-only clip is rejected with no edit. +- `write=false` returns a proposal and does not emit `timeline_changed`. diff --git a/docs/DOS/EDITING-AUTOMATION/beat-sync-auto-cut.md b/docs/DOS/EDITING-AUTOMATION/beat-sync-auto-cut.md new file mode 100644 index 0000000..1b79071 --- /dev/null +++ b/docs/DOS/EDITING-AUTOMATION/beat-sync-auto-cut.md @@ -0,0 +1,83 @@ +# Beat Sync / Auto Cut DOS + +## Purpose + +Define v1 music beat detection and beat-aligned cutting. The first version must be cheap, local, and deterministic. + +Parent contract: [Editing Automation DOS](../EDITING-AUTOMATION-DOS.md). Source baseline: [Agent context signal](../../AGENT-CONTEXT-SIGNAL.md), [CapCut gap report](../../CAPCUT-GAP.md), [media spec](../../specs/media-SPEC.md), [agent spec](../../specs/agent-SPEC.md), [known bugs](../../BUGS.md). [PORT-1TO1-GAP.md](../../PORT-1TO1-GAP.md) is historical reference only. + +## V1 Scope + +Included: + +- `detect_beats`: PCM energy and onset candidate detection. +- `auto_cut_to_beats`: align a selected set of clips or source ranges to detected beat frames. +- Local media decoding only. +- No timeline mutation unless `auto_cut_to_beats` is called with `write=true`. + +Excluded: + +- Heavy ML beat tracking. +- New FFT dependency or large DSP stack. +- Tempo maps requiring full musical structure analysis. +- Cloud audio analysis. + +## Detection Contract + +`detect_beats` reads a target audio clip, linked audio partner, or selected BGM track and returns: + +```text +{ + fps, + trackIndex, + source: "clip" | "track", + beats: [{ frame, strength, kind: "onset" | "energy_peak" }], + confidence, + warnings +} +``` + +V1 algorithm: + +1. Decode audio to PCM using the existing media layer. +2. Downmix to mono and use fixed windows, for example 20-40 ms. +3. Compute RMS energy per window. +4. Smooth the envelope. +5. Detect positive energy deltas and local peaks with a refractory window. +6. Convert source time to project frames using timeline fps. + +No FFT is required for v1. If a future version adds spectral flux, it must be documented as v2 with dependency review. + +## Auto Cut Contract + +`auto_cut_to_beats` consumes beat frames and selected visual material. It may: + +- split clips on beat frames; +- place selected media ranges at beat-aligned starts; +- trim clip boundaries to nearest beat when within a small tolerance; +- return a proposal when confidence is low. + +It must apply edits only through the shared path: + +`TimelineContainer/Inspector/Toolbar` -> `web/src/store/editActions.ts` -> `web/src/lib/api.ts editApply()` -> `src-tauri/src/commands.rs edit_apply` -> `AppCore::apply()` -> `opentake-ops::EditCommand` -> `ops/*` -> `timeline_changed` -> `sync.ts`. + +Agent path: + +`Dispatcher::dispatch()` -> short-id expansion -> typed args -> `EditCommand` -> `CoreHandle::apply()` -> `context_signal` -> short-id shortening. + +## Safety Rules + +- Never cut audio voice tracks as if they were BGM unless the workflow plugin marks them as BGM. +- Linked A/V must remain synchronized. +- Beat alignment should prefer moving/placing visual clips; mutating BGM is out of scope for v1. +- When `syncLocked` tracks cannot absorb ripple shifts, the whole edit is refused. +- The Agent should receive `context_signal` warnings when a montage workflow is not active but the user requests aggressive beat cutting. + +## Acceptance Hooks + +See [acceptance tests](acceptance-tests.md). Minimum cases: + +- Synthetic click track produces beat frames within tolerance. +- Low-energy speech track is not over-detected as montage beats. +- `auto_cut_to_beats(write=false)` emits no `timeline_changed`. +- Linked visual/audio pairs stay in the same `linkGroupId` alignment after auto cut. diff --git a/docs/DOS/EDITING-AUTOMATION/workflow-plugin-recipes.md b/docs/DOS/EDITING-AUTOMATION/workflow-plugin-recipes.md new file mode 100644 index 0000000..6e45bab --- /dev/null +++ b/docs/DOS/EDITING-AUTOMATION/workflow-plugin-recipes.md @@ -0,0 +1,107 @@ +# Workflow Plugin Recipes DOS + +## Purpose + +Define reusable workflow recipes that bind automation tools to editing intent. Recipes are Agent-level orchestration; they do not modify Rust core editing algorithms. + +Parent contract: [Editing Automation DOS](../EDITING-AUTOMATION-DOS.md). Source baseline: [Workflow plugin system](../../WORKFLOW-PLUGIN-SYSTEM.md), [Agent context signal](../../AGENT-CONTEXT-SIGNAL.md), [agent spec](../../specs/agent-SPEC.md), [module port map](../../MODULE-PORT-MAP.md). [PORT-1TO1-GAP.md](../../PORT-1TO1-GAP.md) is historical reference only. + +## Recipe Format + +A workflow recipe lives in `plugin.json` and optional `instructions.md`. + +Required fields for automation: + +- `video_type.primary` +- `workflow.approach` +- `workflow.stages` +- `workflow.rules.do` +- `workflow.rules.dont` +- `track_roles` + +Plugin instructions may guide the Agent, but write operations still use: + +`Dispatcher::dispatch()` -> short-id expansion -> typed args -> `EditCommand` -> `CoreHandle::apply()` -> `context_signal` -> short-id shortening. + +## Built-In Recipes + +### Talking Head Cleanup + +Approach: `audio_driven`. + +Stages: + +1. `get_transcript` when available. +2. `tighten_silences` on the `VoiceOver` track. +3. `remove_filler_words` only after transcript is truly wired. +4. `smart_reframe` for vertical repurposing if target aspect differs. + +Rules: + +- Do not cut inside a word. +- Do not remove all breathing room; preserve configurable padding. +- Keep linked audio/video synchronized. + +### Montage Beat Cut + +Approach: `montage_beat`. + +Stages: + +1. Mark BGM track role. +2. `detect_beats` on BGM. +3. Select visual source ranges. +4. `auto_cut_to_beats(write=false)` for preview. +5. Apply `auto_cut_to_beats(write=true)` only after the proposal is coherent. + +Rules: + +- Prefer visual cuts on beats; do not ripple the BGM track in v1. +- Avoid using low-confidence beats for hard cuts. +- Keep shot durations above the configured minimum. + +### Vertical Repurpose + +Approach: `audio_driven` or `montage_beat`, depending on source. + +Stages: + +1. Set target aspect. +2. `smart_reframe(write=false)` for every selected visual clip. +3. Apply accepted crop/transform edits. +4. Re-run Preview/Inspector checks. + +Rules: + +- No ML face tracking in v1. +- Reject audio-only clips. +- Keep output edits visible as Inspector crop/transform properties. + +### Silence Tighten + +Approach: `audio_driven`. + +Stages: + +1. Identify `VoiceOver` or main linked audio track. +2. `tighten_silences(write=false)` using PCM energy. +3. Apply as `RippleDeleteRanges` project-frame ranges after review. + +Rules: + +- Use `trackIndex` mode for project-frame ranges. Use `clipId + units=seconds` only when a workflow is expressing source-relative clip ranges. +- Preserve linked group synchronization. +- Refuse the whole edit if sync-locked tracks cannot shift safely. + +## Plugin Signal Integration + +When active, plugin declarations override automatic `video_type` and `track_roles`, then append stage guidance and rules to `context_signal`. Plugin content does not replace built-in safety warnings. + +## Acceptance Hooks + +See [acceptance tests](acceptance-tests.md). Required checks: + +- plugin track roles influence `detect_beats` target selection; +- `workflow.rules.dont` warnings appear in `context_signal`; +- recipes can run in proposal mode without emitting `timeline_changed`; +- all writes still route through `EditCommand`. diff --git a/docs/DOS/README.md b/docs/DOS/README.md new file mode 100644 index 0000000..0d9a552 --- /dev/null +++ b/docs/DOS/README.md @@ -0,0 +1,52 @@ +# OpenTake Editing DOS + +DOS means Design Operating Spec: a compact contract for workers implementing independent editing automation. This set covers the user plan's Key Changes 1 and 2: + +1. Define the independent editing automation surface without rewriting the existing editing engine. +2. Define Agent/workflow recipes and acceptance gates for automation tools. + +## Documents + +- [Editing Automation DOS](EDITING-AUTOMATION-DOS.md) - shared contracts, call chains, invariants, and scope. +- [Auto Crop / Smart Reframe](EDITING-AUTOMATION/auto-crop-smart-reframe.md) - v1 framing automation. +- [Beat Sync / Auto Cut](EDITING-AUTOMATION/beat-sync-auto-cut.md) - v1 PCM energy and onset based cutting. +- [Agent Editing Suggestions](EDITING-AUTOMATION/agent-editing-suggestions.md) - tool contracts and context-signal behavior. +- [Workflow Plugin Recipes](EDITING-AUTOMATION/workflow-plugin-recipes.md) - reusable workflow plugin patterns. +- [Acceptance Tests](EDITING-AUTOMATION/acceptance-tests.md) - verification matrix for this DOS. + +## Source Baseline + +Current facts should be taken from: + +- [Editing engine plan](../EDITING-ENGINE-PLAN.md) +- [CapCut gap report](../CAPCUT-GAP.md) +- [Agent context signal](../AGENT-CONTEXT-SIGNAL.md) +- [Workflow plugin system](../WORKFLOW-PLUGIN-SYSTEM.md) +- [Module port map](../MODULE-PORT-MAP.md) +- [Known bugs](../BUGS.md) +- Specs: [agent](../specs/agent-SPEC.md), [core](../specs/core-SPEC.md), [frontend UI](../specs/frontend-UI-1to1-SPEC.md), [media](../specs/media-SPEC.md), [render](../specs/render-SPEC.md), [gen](../specs/gen-SPEC.md) + +[PORT-1TO1-GAP.md](../PORT-1TO1-GAP.md) is historical reference only. Do not treat it as current implementation truth unless a newer document points back to a specific item. + +## Authoritative Call Chains + +UI editing: + +`TimelineContainer/Inspector/Toolbar` -> `web/src/store/editActions.ts` -> `web/src/lib/api.ts editApply()` -> `src-tauri/src/commands.rs edit_apply` -> `AppCore::apply()` -> `opentake-ops::EditCommand` -> `ops/*` -> `timeline_changed` -> `sync.ts`. + +MCP/Agent editing: + +`Dispatcher::dispatch()` -> short-id expansion -> typed args -> `EditCommand` -> `CoreHandle::apply()` -> `context_signal` -> short-id shortening. + +Swift alignment: + +`EditorViewModel` gesture methods -> `withTimelineSwap` -> `OverwriteEngine/RippleEngine/SnapEngine` -> `Timeline/Clip` pure value model. + +## Non-Negotiable Invariants + +- Frame intervals are half-open: `[startFrame, startFrame + durationFrames)`. +- Keyframes are stored clip-relative; public APIs may use absolute timeline frames. +- Trim values are source-frame offsets, not timeline-frame positions. +- Linked audio/video groups must remain synchronized unless a command explicitly unlinks. +- Visual tracks live above audio tracks; insertion and drop routing preserve the partition. +- `EditCommand` application is atomic: validation failure or ripple refusal leaves the document unchanged. diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs index e40bfdf..934f64d 100644 --- a/src-tauri/src/commands.rs +++ b/src-tauri/src/commands.rs @@ -19,11 +19,13 @@ use opentake_core::dto::{ use opentake_core::{AppCore, CmdError, EditCommand}; use opentake_ops::{ - ClipEntry, ClipMove, ClipProperties, FrameRange, KeyframePayload, KeyframeProperty, TextEntry, + ClipEntry, ClipMove, ClipProperties, FrameRange, KeyframePayload, KeyframeProperty, + RenameEntry, TextEntry, }; use opentake_domain::{ - AnimPair, ClipType, Crop, Interpolation, Keyframe, KeyframeTrack, TextStyle, Transform, + AnimPair, ChromaKey, ClipType, ColorGrade, Crop, Effect, Interpolation, Keyframe, + KeyframeTrack, Mask, TextStyle, Transform, }; // MARK: - Read / lifecycle commands (direct DTO passthrough) @@ -196,6 +198,26 @@ pub enum EditRequest { interpolation: Interpolation, }, #[serde(rename_all = "camelCase")] + SetColorGrade { + clip_ids: Vec, + grade: Option, + }, + #[serde(rename_all = "camelCase")] + SetChromaKey { + clip_ids: Vec, + chroma_key: Option, + }, + #[serde(rename_all = "camelCase")] + SetMasks { + clip_ids: Vec, + masks: Vec, + }, + #[serde(rename_all = "camelCase")] + SetEffects { + clip_ids: Vec, + effects: Vec, + }, + #[serde(rename_all = "camelCase")] RippleDeleteRanges { track_index: usize, ranges: Vec, @@ -230,6 +252,14 @@ pub enum EditRequest { folder_id: Option, }, #[serde(rename_all = "camelCase")] + RenameMedia { entries: Vec }, + #[serde(rename_all = "camelCase")] + RenameFolder { entries: Vec }, + #[serde(rename_all = "camelCase")] + DeleteMedia { asset_ids: Vec }, + #[serde(rename_all = "camelCase")] + DeleteFolder { folder_ids: Vec }, + #[serde(rename_all = "camelCase")] SwapMedia { clip_id: String, media_ref: String }, } @@ -323,6 +353,20 @@ impl EditRequest { frame, interpolation, }, + EditRequest::SetColorGrade { clip_ids, grade } => { + EditCommand::SetColorGrade { clip_ids, grade } + } + EditRequest::SetChromaKey { + clip_ids, + chroma_key, + } => EditCommand::SetChromaKey { + clip_ids, + chroma_key, + }, + EditRequest::SetMasks { clip_ids, masks } => EditCommand::SetMasks { clip_ids, masks }, + EditRequest::SetEffects { clip_ids, effects } => { + EditCommand::SetEffects { clip_ids, effects } + } EditRequest::RippleDeleteRanges { track_index, ranges, @@ -367,6 +411,20 @@ impl EditRequest { asset_ids, folder_id, }, + EditRequest::RenameMedia { entries } => EditCommand::RenameMedia { + entries: entries + .into_iter() + .map(RenameEntryDto::into_entry) + .collect(), + }, + EditRequest::RenameFolder { entries } => EditCommand::RenameFolder { + entries: entries + .into_iter() + .map(RenameEntryDto::into_entry) + .collect(), + }, + EditRequest::DeleteMedia { asset_ids } => EditCommand::DeleteMedia { asset_ids }, + EditRequest::DeleteFolder { folder_ids } => EditCommand::DeleteFolder { folder_ids }, EditRequest::SwapMedia { clip_id, media_ref } => { EditCommand::SwapMedia { clip_id, media_ref } } @@ -540,6 +598,22 @@ impl TextEntryDto { } } +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RenameEntryDto { + pub id: String, + pub name: String, +} + +impl RenameEntryDto { + fn into_entry(self) -> RenameEntry { + RenameEntry { + id: self.id, + name: self.name, + } + } +} + #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub enum KeyframePropertyDto { @@ -677,4 +751,87 @@ mod edit_request_serde_tests { other => panic!("expected SwapMedia, got {other:?}"), } } + + #[test] + fn deserializes_effect_commands_and_maps_to_ops_variants() { + let grade = serde_json::from_str::( + r#"{"type":"setColorGrade","clipIds":["clip-1"],"grade":{"exposure":1.0}}"#, + ) + .expect("setColorGrade camelCase"); + match grade.into_command().expect("setColorGrade command") { + EditCommand::SetColorGrade { clip_ids, grade } => { + assert_eq!(clip_ids, vec!["clip-1"]); + assert_eq!(grade.expect("grade").exposure, 1.0); + } + other => panic!("expected SetColorGrade, got {other:?}"), + } + + let chroma = serde_json::from_str::( + r#"{"type":"setChromaKey","clipIds":["clip-1"],"chromaKey":{"similarity":0.2}}"#, + ) + .expect("setChromaKey camelCase"); + assert!(matches!( + chroma.into_command().expect("setChromaKey command"), + EditCommand::SetChromaKey { .. } + )); + + let masks = serde_json::from_str::( + r#"{"type":"setMasks","clipIds":["clip-1"],"masks":[]}"#, + ) + .expect("setMasks camelCase"); + assert!(matches!( + masks.into_command().expect("setMasks command"), + EditCommand::SetMasks { .. } + )); + + let effects = serde_json::from_str::( + r#"{"type":"setEffects","clipIds":["clip-1"],"effects":[{"name":"gaussianBlur","params":{"radius":4.0}}]}"#, + ) + .expect("setEffects camelCase"); + match effects.into_command().expect("setEffects command") { + EditCommand::SetEffects { effects, .. } => { + assert_eq!(effects[0].name, "gaussianBlur"); + assert_eq!(effects[0].param("radius", 0.0), 4.0); + } + other => panic!("expected SetEffects, got {other:?}"), + } + } + + #[test] + fn deserializes_media_library_commands_and_maps_to_ops_variants() { + let rename_media = serde_json::from_str::( + r#"{"type":"renameMedia","entries":[{"id":"asset-1","name":"Hero"}]}"#, + ) + .expect("renameMedia camelCase"); + assert!(matches!( + rename_media.into_command().expect("renameMedia command"), + EditCommand::RenameMedia { .. } + )); + + let rename_folder = serde_json::from_str::( + r#"{"type":"renameFolder","entries":[{"id":"folder-1","name":"B-roll"}]}"#, + ) + .expect("renameFolder camelCase"); + assert!(matches!( + rename_folder.into_command().expect("renameFolder command"), + EditCommand::RenameFolder { .. } + )); + + let delete_media = + serde_json::from_str::(r#"{"type":"deleteMedia","assetIds":["asset-1"]}"#) + .expect("deleteMedia camelCase"); + assert!(matches!( + delete_media.into_command().expect("deleteMedia command"), + EditCommand::DeleteMedia { .. } + )); + + let delete_folder = serde_json::from_str::( + r#"{"type":"deleteFolder","folderIds":["folder-1"]}"#, + ) + .expect("deleteFolder camelCase"); + assert!(matches!( + delete_folder.into_command().expect("deleteFolder command"), + EditCommand::DeleteFolder { .. } + )); + } } diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index 4c9a713..459717e 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -52,6 +52,16 @@ export async function editApply(command: EditRequest): Promise { return fallback.editApply(command); } +/** Sequential automation wrapper: each command still goes through the single + * Rust `EditCommand` authority via `edit_apply`. */ +export async function editApplyMany(commands: EditRequest[]): Promise { + const results: EditResult[] = []; + for (const command of commands) { + results.push(await editApply(command)); + } + return results; +} + export async function undo(): Promise { await ensureTauri(); if (invokeImpl) return invokeImpl("undo"); diff --git a/web/src/lib/fallback.test.ts b/web/src/lib/fallback.test.ts index 5d89844..dd1d53b 100644 --- a/web/src/lib/fallback.test.ts +++ b/web/src/lib/fallback.test.ts @@ -287,4 +287,38 @@ describe("browser fallback edit store", () => { expect(clip?.fadeInInterpolation).toBe("smooth"); expect(clip?.fadeOutInterpolation).toBe("smooth"); }); + + it("keeps effect setters atomic when any clip id is missing", () => { + const fallback = createFallbackStore(); + + const result = fallback.editApply({ + type: "setEffects", + clipIds: ["c1", "missing"], + effects: [{ name: "gaussianBlur", params: { radius: 4 }, enabled: true }], + }); + const clip = fallback + .getTimeline() + .timeline.tracks.flatMap((track) => track.clips) + .find((candidate) => candidate.id === "c1"); + + expect(result.changed).toBe(false); + expect(clip?.effects).toBeUndefined(); + }); + + it("does not emulate swapMedia without the Tauri media manifest", () => { + const fallback = createFallbackStore(); + + const result = fallback.editApply({ + type: "swapMedia", + clipId: "c1", + mediaRef: "replacement", + }); + const clip = fallback + .getTimeline() + .timeline.tracks.flatMap((track) => track.clips) + .find((candidate) => candidate.id === "c1"); + + expect(result.changed).toBe(false); + expect(clip?.mediaRef).toBe("demo-video"); + }); }); diff --git a/web/src/lib/fallback.ts b/web/src/lib/fallback.ts index 3061e41..91e63a6 100644 --- a/web/src/lib/fallback.ts +++ b/web/src/lib/fallback.ts @@ -29,6 +29,68 @@ function defaultCrop() { return { left: 0, top: 0, right: 0, bottom: 0 }; } +function defaultRgb() { + return { r: 1, g: 1, b: 1 }; +} + +function normalizeRgb(input: Partial> | undefined, fallback = defaultRgb()) { + return { + r: input?.r ?? fallback.r, + g: input?.g ?? fallback.g, + b: input?.b ?? fallback.b, + }; +} + +function normalizeColorGrade( + grade: Extract["grade"], +): NonNullable | undefined { + if (grade == null) return undefined; + return { + exposure: grade.exposure ?? 0, + temperature: grade.temperature ?? 0, + tint: grade.tint ?? 0, + liftGammaGain: { + lift: normalizeRgb(grade.liftGammaGain?.lift, { r: 0, g: 0, b: 0 }), + gamma: normalizeRgb(grade.liftGammaGain?.gamma), + gain: normalizeRgb(grade.liftGammaGain?.gain), + }, + contrast: grade.contrast ?? 0, + saturation: grade.saturation ?? 1, + }; +} + +function normalizeChromaKey( + chromaKey: Extract["chromaKey"], +): NonNullable | undefined { + if (chromaKey == null) return undefined; + return { + keyColor: normalizeRgb(chromaKey.keyColor, { r: 0, g: 1, b: 0 }), + similarity: chromaKey.similarity ?? 0.15, + smoothness: chromaKey.smoothness ?? 0.35, + spill: chromaKey.spill ?? 0.5, + }; +} + +function normalizeMask(mask: Extract["masks"][number]): NonNullable[number] { + return { + shape: mask.shape ?? { + kind: "circle", + center: { x: 0.5, y: 0.5 }, + radius: { x: 1.5, y: 1.5 }, + }, + feather: mask.feather ?? 0, + invert: mask.invert ?? false, + }; +} + +function normalizeEffect(effect: Extract["effects"][number]): NonNullable[number] { + return { + name: effect.name, + params: { ...(effect.params ?? {}) }, + enabled: effect.enabled ?? true, + }; +} + function isVisual(type: Clip["mediaType"]): boolean { return type !== "audio"; } @@ -138,6 +200,16 @@ export function createFallbackStore() { return null; } + function findAllClips(ids: string[]): Array<[number, number]> | null { + const locations: Array<[number, number]> = []; + for (const id of ids) { + const loc = findClip(id); + if (!loc) return null; + locations.push(loc); + } + return locations; + } + function insertionIndex(kind: Clip["mediaType"], requested = timeline.tracks.length): number { const firstAudio = timeline.tracks.findIndex((track) => track.type === "audio"); const firstAudioIndex = firstAudio >= 0 ? firstAudio : timeline.tracks.length; @@ -358,6 +430,8 @@ export function createFallbackStore() { if (p.volume !== undefined) (c.volume = p.volume), (changed = true); if (p.speed !== undefined) (c.speed = p.speed), (changed = true); if (p.transform !== undefined) (c.transform = p.transform), (changed = true); + if (p.crop !== undefined) (c.crop = p.crop), (changed = true); + if (p.textContent !== undefined) (c.textContent = p.textContent), (changed = true); if (p.fadeInFrames !== undefined) (c.fadeInFrames = p.fadeInFrames), (changed = true); if (p.fadeOutFrames !== undefined) (c.fadeOutFrames = p.fadeOutFrames), (changed = true); if (p.fadeInInterpolation !== undefined) @@ -367,6 +441,70 @@ export function createFallbackStore() { } return result(changed, "Set Clip Property", cmd.clipIds); } + case "setColorGrade": { + const locations = findAllClips(cmd.clipIds); + if (!locations) return result(false, "Set Color Grade", []); + const next = normalizeColorGrade(cmd.grade); + let changed = false; + for (const loc of locations) { + const clip = timeline.tracks[loc[0]].clips[loc[1]]; + if (JSON.stringify(clip.colorGrade) !== JSON.stringify(next)) { + clip.colorGrade = next; + changed = true; + } + } + return result(changed, "Set Color Grade", cmd.clipIds); + } + case "setChromaKey": { + const locations = findAllClips(cmd.clipIds); + if (!locations) return result(false, "Set Chroma Key", []); + const next = normalizeChromaKey(cmd.chromaKey); + let changed = false; + for (const loc of locations) { + const clip = timeline.tracks[loc[0]].clips[loc[1]]; + if (JSON.stringify(clip.chromaKey) !== JSON.stringify(next)) { + clip.chromaKey = next; + changed = true; + } + } + return result(changed, "Set Chroma Key", cmd.clipIds); + } + case "setMasks": { + const locations = findAllClips(cmd.clipIds); + if (!locations) return result(false, "Set Masks", []); + const next = cmd.masks.map(normalizeMask); + let changed = false; + for (const loc of locations) { + const clip = timeline.tracks[loc[0]].clips[loc[1]]; + if (JSON.stringify(clip.masks ?? []) !== JSON.stringify(next)) { + clip.masks = structuredClone(next); + changed = true; + } + } + return result(changed, "Set Masks", cmd.clipIds); + } + case "setEffects": { + const locations = findAllClips(cmd.clipIds); + if (!locations) return result(false, "Set Effects", []); + const next = cmd.effects.map(normalizeEffect); + let changed = false; + for (const loc of locations) { + const clip = timeline.tracks[loc[0]].clips[loc[1]]; + if (JSON.stringify(clip.effects ?? []) !== JSON.stringify(next)) { + clip.effects = structuredClone(next); + changed = true; + } + } + return result(changed, "Set Effects", cmd.clipIds); + } + case "swapMedia": { + return result(false, "Swap Media", []); + } + case "renameMedia": + case "renameFolder": + case "deleteMedia": + case "deleteFolder": + return result(false, cmd.type, []); default: return result(false, cmd.type, []); } diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index ca4a276..14d722a 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -40,6 +40,39 @@ export interface AnimPair { b: number; } +export interface Rgba { + r: number; + g: number; + b: number; + a: number; +} + +export type TextAlignment = "left" | "center" | "right"; + +export interface Shadow { + enabled: boolean; + color: Rgba; + offsetX: number; + offsetY: number; + blur: number; +} + +export interface Fill { + enabled: boolean; + color: Rgba; +} + +export interface TextStyle { + fontName: string; + fontSize: number; + fontScale: number; + color: Rgba; + alignment: TextAlignment; + shadow: Shadow; + background: Fill; + border: Fill; +} + export interface Transform { centerX: number; // default 0.5 centerY: number; // default 0.5 @@ -57,6 +90,90 @@ export interface Crop { bottom: number; } +export interface Rgb { + r: number; + g: number; + b: number; +} + +export interface LiftGammaGain { + lift: Rgb; + gamma: Rgb; + gain: Rgb; +} + +export interface ColorGrade { + exposure: number; + temperature: number; + tint: number; + liftGammaGain: LiftGammaGain; + contrast: number; + saturation: number; +} + +export interface ChromaKey { + keyColor: Rgb; + similarity: number; + smoothness: number; + spill: number; +} + +export interface Point2 { + x: number; + y: number; +} + +export type MaskShape = + | { kind: "linear"; point: Point2; normal: Point2 } + | { kind: "circle"; center: Point2; radius: Point2 } + | { kind: "poly"; points: Point2[] }; + +export interface Mask { + shape: MaskShape; + feather: number; + invert: boolean; +} + +export interface Effect { + name: string; + params: Record; + enabled: boolean; +} + +export interface LiftGammaGainInput { + lift?: Partial; + gamma?: Partial; + gain?: Partial; +} + +export interface ColorGradeInput { + exposure?: number; + temperature?: number; + tint?: number; + liftGammaGain?: LiftGammaGainInput; + contrast?: number; + saturation?: number; +} + +export interface ChromaKeyInput { + keyColor?: Partial; + similarity?: number; + smoothness?: number; + spill?: number; +} + +export interface MaskInput { + shape?: MaskShape; + feather?: number; + invert?: boolean; +} + +export interface EffectInput { + name: string; + params?: Record; + enabled?: boolean; +} + export interface Clip { id: string; mediaRef: string; @@ -78,13 +195,17 @@ export interface Clip { linkGroupId?: string; captionGroupId?: string; textContent?: string; - textStyle?: unknown; + textStyle?: TextStyle; opacityTrack?: KeyframeTrack; positionTrack?: KeyframeTrack; scaleTrack?: KeyframeTrack; rotationTrack?: KeyframeTrack; cropTrack?: KeyframeTrack; volumeTrack?: KeyframeTrack; + colorGrade?: ColorGrade; + chromaKey?: ChromaKey; + masks?: Mask[]; + effects?: Effect[]; } // MARK: - Command DTOs (mirror src-tauri EditRequest) @@ -138,6 +259,11 @@ export interface ClipPropertiesReq { flipVertical?: boolean; } +export interface RenameEntryReq { + id: string; + name: string; +} + /** Which property a keyframe track targets (mirror of `KeyframeProperty`). */ export type KeyframeProperty = | "opacity" @@ -180,6 +306,10 @@ export type EditRequest = | { type: "removeKeyframe"; clipId: string; property: KeyframeProperty; frame: number } | { type: "moveKeyframe"; clipId: string; property: KeyframeProperty; fromFrame: number; toFrame: number } | { type: "setKeyframeInterpolation"; clipId: string; property: KeyframeProperty; frame: number; interpolation: Interpolation } + | { type: "setColorGrade"; clipIds: string[]; grade?: ColorGradeInput | null } + | { type: "setChromaKey"; clipIds: string[]; chromaKey?: ChromaKeyInput | null } + | { type: "setMasks"; clipIds: string[]; masks: MaskInput[] } + | { type: "setEffects"; clipIds: string[]; effects: EffectInput[] } | { type: "rippleDeleteRanges"; trackIndex: number; ranges: FrameRangeReq[] } | { type: "rippleDeleteClips"; clipIds: string[] } | { type: "addTexts"; entries: TextEntryReq[] } @@ -196,22 +326,18 @@ export type EditRequest = } | { type: "createFolder"; name: string; parentFolderId?: string } | { type: "moveToFolder"; assetIds: string[]; folderId?: string } - | { - type: "swapMedia"; - clipId: string; - mediaRef: string; - mediaType?: ClipType; - sourceClipType?: ClipType; - durationFrames?: number; - trimStartFrame?: number; - }; + | { type: "renameMedia"; entries: RenameEntryReq[] } + | { type: "renameFolder"; entries: RenameEntryReq[] } + | { type: "deleteMedia"; assetIds: string[] } + | { type: "deleteFolder"; folderIds: string[] } + | { type: "swapMedia"; clipId: string; mediaRef: string }; export interface TextEntryReq { trackIndex: number; startFrame: number; durationFrames: number; content: string; - textStyle: unknown; + textStyle: TextStyle; transform: Transform; } diff --git a/web/src/store/editActions.ts b/web/src/store/editActions.ts index 93791ba..9b6efcf 100644 --- a/web/src/store/editActions.ts +++ b/web/src/store/editActions.ts @@ -17,12 +17,20 @@ import type { ClipMoveReq, ClipPropertiesReq, ClipType, + ChromaKeyInput, + ColorGradeInput, + Crop, + EffectInput, + EditRequest, FrameRangeReq, Interpolation, KeyframePayloadReq, KeyframeProperty, + MaskInput, MediaItem, + RenameEntryReq, TextEntryReq, + TextStyle, Timeline, Transform, TrimEditReq, @@ -85,6 +93,26 @@ export async function setClipProperties(clipIds: string[], properties: ClipPrope await applyAndRefresh({ type: "setClipProperties", clipIds, properties }); } +export async function setColorGrade(clipIds: string[], grade: ColorGradeInput | null) { + if (clipIds.length === 0) return; + await applyAndRefresh({ type: "setColorGrade", clipIds, grade }); +} + +export async function setChromaKey(clipIds: string[], chromaKey: ChromaKeyInput | null) { + if (clipIds.length === 0) return; + await applyAndRefresh({ type: "setChromaKey", clipIds, chromaKey }); +} + +export async function setMasks(clipIds: string[], masks: MaskInput[]) { + if (clipIds.length === 0) return; + await applyAndRefresh({ type: "setMasks", clipIds, masks }); +} + +export async function setEffects(clipIds: string[], effects: EffectInput[]) { + if (clipIds.length === 0) return; + await applyAndRefresh({ type: "setEffects", clipIds, effects }); +} + export async function linkClips(clipIds: string[]) { await applyAndRefresh({ type: "link", clipIds }); } @@ -172,30 +200,58 @@ export async function moveToFolder(assetIds: string[], folderId?: string) { await applyAndRefresh({ type: "moveToFolder", assetIds, folderId }); } -/** Replace a clip's media source in place, preserving all editing attributes - * (transform / crop / keyframe tracks / grade / masks / effects / fade). When - * the new media is shorter than the clip's current duration, the backend - * truncates the duration and clamps `trim_end_frame` to fit. `mediaType`, when - * set, also implies `sourceClipType` unless `sourceClipType` is explicit. */ -export async function swapMedia( - clipId: string, - mediaRef: string, - options?: { - mediaType?: ClipType; - sourceClipType?: ClipType; - durationFrames?: number; - trimStartFrame?: number; - }, -) { - await applyAndRefresh({ - type: "swapMedia", - clipId, - mediaRef, - mediaType: options?.mediaType, - sourceClipType: options?.sourceClipType, - durationFrames: options?.durationFrames, - trimStartFrame: options?.trimStartFrame, - }); +export async function renameMedia(entries: RenameEntryReq[]) { + if (entries.length === 0) return; + await applyAndRefresh({ type: "renameMedia", entries }); +} + +export async function renameFolder(entries: RenameEntryReq[]) { + if (entries.length === 0) return; + await applyAndRefresh({ type: "renameFolder", entries }); +} + +export async function deleteMedia(assetIds: string[]) { + if (assetIds.length === 0) return; + await applyAndRefresh({ type: "deleteMedia", assetIds }); +} + +export async function deleteFolder(folderIds: string[]) { + if (folderIds.length === 0) return; + await applyAndRefresh({ type: "deleteFolder", folderIds }); +} + +/** Replace a clip's media source in place, preserving all editing attributes. + * The backend intentionally consumes only `clipId` + `mediaRef`; it does not + * rewrite trim, duration, or type metadata. */ +export async function swapMedia(clipId: string, mediaRef: string) { + await applyAndRefresh({ type: "swapMedia", clipId, mediaRef }); +} + +export async function applyAutomationCommands(commands: EditRequest[]) { + if (commands.length === 0) return []; + const results = []; + for (const command of commands) { + results.push(await applyAndRefresh(command)); + } + return results; +} + +export async function applySmartReframe(clipIds: string[], crop: Crop, transform?: Transform) { + if (clipIds.length === 0) return; + await setClipProperties(clipIds, { crop, transform }); +} + +export async function addClipsToBeatFrames(entries: ClipEntryReq[], beatFrames: number[]) { + if (entries.length === 0) return; + const placed = entries.map((entry, index) => ({ + ...entry, + startFrame: beatFrames[index] ?? entry.startFrame, + })); + await addClips(placed); +} + +export async function tightenSilenceRanges(trackIndex: number, ranges: FrameRangeReq[]) { + await rippleDeleteRanges(trackIndex, ranges); } export async function undo() { @@ -541,6 +597,29 @@ const DEFAULT_TEXT_TRANSFORM: Transform = { flipVertical: false, }; +const DEFAULT_TEXT_STYLE: TextStyle = { + fontName: "Helvetica-Bold", + fontSize: 96, + fontScale: 1, + color: { r: 1, g: 1, b: 1, a: 1 }, + alignment: "center", + shadow: { + enabled: true, + color: { r: 0, g: 0, b: 0, a: 0.6 }, + offsetX: 0, + offsetY: -2, + blur: 6, + }, + background: { + enabled: false, + color: { r: 0, g: 0, b: 0, a: 0.6 }, + }, + border: { + enabled: false, + color: { r: 0, g: 0, b: 0, a: 1 }, + }, +}; + /** Find the first visual track (video/image/text/lottie) index, or null. */ function firstVisualTrackIndex(timeline: Timeline): number | null { for (let i = 0; i < timeline.tracks.length; i++) { @@ -573,7 +652,7 @@ export async function addTextClip() { startFrame, durationFrames, content: "", - textStyle: {}, + textStyle: DEFAULT_TEXT_STYLE, transform: DEFAULT_TEXT_TRANSFORM, };