Spaces:
Paused
Paused
| import path from "node:path" | |
| import { v4 as uuidv4 } from "uuid" | |
| import tmpDir from "temp-dir" | |
| import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts" | |
| import { generateAudio } from "./generateAudio.mts" | |
| import { generateVideo } from "./generateVideo.mts" | |
| import { upscaleVideo } from "./upscaleVideo.mts" | |
| import { generateVoice } from "./generateVoice.mts" | |
| import { generateSeed } from "../utils/generateSeed.mts" | |
| import { mergeAudio } from "./mergeAudio.mts" | |
| import { addAudioToVideo } from "./addAudioToVideo.mts" | |
| import { interpolateVideo } from "./interpolateVideo.mts" | |
| import { postInterpolation } from "./postInterpolation.mts" | |
| export const generateShot = async ({ | |
| seed = 0, | |
| shotId = "", | |
| actorPrompt = "", | |
| shotPrompt = "", | |
| backgroundAudioPrompt = "", | |
| foregroundAudioPrompt = "", | |
| actorDialoguePrompt = "", | |
| actorVoicePrompt = "", | |
| duration = 2, | |
| nbFrames = 24, | |
| resolution = 576, | |
| nbSteps = 35, | |
| upscale = true, | |
| interpolate = true, | |
| noise = true, | |
| }: { | |
| seed?: number; | |
| shotId?: string; | |
| actorPrompt?: string; | |
| shotPrompt?: string; | |
| backgroundAudioPrompt?: string; | |
| foregroundAudioPrompt?: string; | |
| actorDialoguePrompt?: string; | |
| actorVoicePrompt?: string; | |
| duration?: number; // 2 seconds | |
| nbFrames?: number; // 24 FPS | |
| resolution?: number; // 256, 320, 512, 576, 720, 1080.. | |
| nbSteps?: number; | |
| upscale?: boolean; | |
| interpolate?: boolean; | |
| noise?: boolean; | |
| }) => { | |
| seed = seed || generateSeed() | |
| shotId = shotId || uuidv4() | |
| const shotFileName = `${shotId}.mp4` | |
| console.log("generating video shot:", { | |
| seed, | |
| shotId, | |
| actorPrompt, | |
| shotPrompt, | |
| backgroundAudioPrompt, | |
| foregroundAudioPrompt, | |
| actorDialoguePrompt, | |
| actorVoicePrompt, | |
| duration, | |
| nbFrames, | |
| resolution, | |
| nbSteps, | |
| upscale, | |
| interpolate, | |
| noise, | |
| }) | |
| if (actorPrompt) { | |
| console.log("generating actor..") | |
| const actorIdentityFileName = `actor_${Date.now()}.png` | |
| // await generateActor(actorPrompt, actorIdentityFileName, seed) | |
| } | |
| console.log("generating base video ..") | |
| let generatedVideoUrl = "" | |
| // currenty the base model is incapable of generating more than 24 FPS, | |
| // because otherwise the upscaler will have trouble | |
| // so for now, we fix it to 24 frames | |
| // const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8 | |
| const nbFramesForBaseModel = 24 | |
| try { | |
| generatedVideoUrl = await generateVideo(shotPrompt, { | |
| seed, | |
| nbFrames: nbFramesForBaseModel, | |
| nbSteps | |
| }) | |
| } catch (err) { | |
| // upscaling can be finicky, if it fails we try again | |
| console.log('- trying again to generate base shot..') | |
| generatedVideoUrl = await generateVideo(shotPrompt, { | |
| seed, | |
| nbFrames: nbFramesForBaseModel, | |
| nbSteps | |
| }) | |
| } | |
| console.log("downloading video..") | |
| const videoFileName = await downloadFileToTmp(generatedVideoUrl, shotFileName) | |
| if (upscale) { | |
| console.log("upscaling video..") | |
| try { | |
| await upscaleVideo(videoFileName, shotPrompt) | |
| } catch (err) { | |
| // upscaling can be finicky, if it fails we try again | |
| console.log('- trying again to upscale shot..') | |
| await upscaleVideo(videoFileName, shotPrompt) | |
| } | |
| } | |
| if (interpolate) { | |
| console.log("upscaling video..") | |
| // ATTENTION 1: | |
| // the interpolation step always create a SLOW MOTION video | |
| // it means it can last a lot longer (eg. 2x, 3x, 4x.. longer) | |
| // than the duration generated by the original video model | |
| // ATTENTION 2: | |
| // the interpolation step generates videos in 910x512! | |
| // ATTENTION 3: | |
| // the interpolation step parameters are currently not passed to the space, | |
| // so changing those two variables below will have no effect! | |
| const interpolationSteps = 3 | |
| const interpolatedFramesPerSecond = 24 | |
| await interpolateVideo( | |
| task, | |
| interpolationSteps, | |
| interpolatedFramesPerSecond | |
| ) | |
| console.log('creating slow-mo video (910x512 @ 24 FPS)') | |
| // with our current interpolation settings, the 3 seconds video generated by the model | |
| // become a 7 seconds video, at 24 FPS | |
| // so we want to scale it back to the desired duration length | |
| // also, as a last trick we want to upscale it (without AI) and add some FXs | |
| console.log('performing final scaling (1280x720 @ 24 FPS)') | |
| await postInterpolation(videoFileName, duration, nbFrames) | |
| } | |
| let backgroundAudioFileName = '' | |
| if (backgroundAudioPrompt) { | |
| console.log("generating background audio..") | |
| backgroundAudioFileName = await generateAudio(backgroundAudioPrompt, `shot_${shotId}_audio_${uuidv4}.m4a`) | |
| } | |
| let foregroundAudioFileName = '' | |
| if (foregroundAudioPrompt) { | |
| console.log("generating foreground audio..") | |
| foregroundAudioFileName = await generateAudio(foregroundAudioPrompt, `shot_${shotId}_audio_${uuidv4()}.m4a`) | |
| } | |
| let voiceAudioFileName = '' | |
| if (actorDialoguePrompt) { | |
| console.log("configuring dialogue..") | |
| if (actorVoicePrompt) { | |
| console.log("configuring voice..") | |
| // well.. that's a TODO! | |
| // for now let's always use the same voice model | |
| console.log('TODO this should be done in the sequence, not the prompt!') | |
| voiceAudioFileName = await generateVoice(actorDialoguePrompt, `shot_${shotId}_voice_${uuidv4()}.m4a`) | |
| } | |
| } | |
| console.log('merging audio with video..') | |
| if (backgroundAudioFileName || foregroundAudioFileName) { | |
| let audioFileName = '' | |
| // we have both background and foreground | |
| if (backgroundAudioFileName && foregroundAudioFileName) { | |
| audioFileName = await mergeAudio({ | |
| input1FileName: backgroundAudioFileName, | |
| input1Volume: 0.2,// 20% volume | |
| input2FileName: foregroundAudioFileName, | |
| input2Volume: 0.7, // 70% volume | |
| }) | |
| } else if (backgroundAudioFileName) { | |
| audioFileName = backgroundAudioFileName | |
| } else if (foregroundAudioFileName) { | |
| audioFileName = foregroundAudioFileName | |
| } | |
| await addAudioToVideo(task, audioFileName) | |
| } | |
| console.log("returning result to user..") | |
| const filePath = path.resolve(tmpDir, videoFileName) | |
| return { | |
| shotId, | |
| filePath, | |
| videoFileName | |
| } | |
| } |