The Video Generation API we are building in this tutorial will allow users to convert audio files (e.g., MP3 songs) and lyric files (e.g., LRC files) into video content with subtitles. This can be particularly useful for creating lyric videos or promotional material for your music.
We will cover the following key components in our API:
File Upload: Users can upload their MP3 and LRC files to the API.
LRC to SRT Conversion: We will convert LRC (Lyric) files into SRT (SubRip) format, which is commonly used for subtitles in videos.
Video Generation: We will use FFmpeg to combine the background image, audio file, and subtitles to create the final video.
API Endpoint: We will expose an API endpoint that users can call to trigger the video generation process.
Prerequisites
Before we dive into the code, make sure you have the following prerequisites installed:
Node.js: You can download it here.
FFmpeg: Install FFmpeg on your system or use the @ffmpeg-installer/ffmpeg package in Node.js.
Basic knowledge of JavaScript and Node.js.
Setting Up the Project
Let's start by setting up our Node.js project and installing the necessary packages. We'll use multer for file uploads and child_process for running FFmpeg commands. The project structure should look something like this:
project-root/
├─ public/
│ ├─ uploads/
│ │ ├─ audio/
│ │ ├─ subtitle/
│ │ ├─ thumbnail/
│ │ └─ video/
│ └─ bg.jpg
├─ api/
│ └─ videoGeneration.js
├─ package.json
└─ ...
Setting Up multer for File Uploads
import multer from "multer";
// Define storage configuration for multer
const storage = multer.diskStorage({
destination: (req, file, cb) => {
// Specify the destination directory for uploaded files
cb(null, path.join(baseDir, uploadDir, "subtitle"));
},
filename: (req, file, cb) => {
// Define the filename for uploaded files
cb(null, file.originalname);
},
});
// Create a multer instance with the storage configuration
const upload = multer({ storage });
// Handle file uploads using upload.fields() in your route handler
LRC to SRT Conversion
function lrcToSrt(lrcContent) {
const lrcLines = lrcContent.split("\n");
let srt = "";
let counter = 1;
const formatTime = (timeStr) => {
// Split the input time string into minutes and seconds
const [minutes, seconds] = timeStr.split(":");
// Extract seconds and milliseconds
const [secondsPart, millisecondsPart] = seconds.split(".");
// Format each part with leading zeros
const formattedMinutes = minutes.padStart(2, "0");
const formattedSeconds = secondsPart.padStart(2, "0");
const formattedMilliseconds = millisecondsPart.padEnd(3, "0");
// Combine the formatted parts into the desired format
const formattedTime = `00:${formattedMinutes}:${formattedSeconds},${formattedMilliseconds}`;
return formattedTime;
};
for (let i = 0; i < lrcLines.length; i++) {
const line = lrcLines[i].trim();
if (line === "") continue;
// Extract timestamps and lyrics
const match = line.match(/\[(\d+:\d+\.\d+)\](.*)/);
if (match) {
const startTime = formatTime(match[1]); // Convert to SRT format
let endTime;
if (i < lrcLines.length - 1) {
const nextLineMatch = lrcLines[i + 1].match(/\[(\d+:\d+\.\d+)\]/);
endTime = nextLineMatch ? formatTime(nextLineMatch[1]) : "99:59:59,999";
} else {
// Use start time + 5 seconds if it's the last line
const timeSplits = startTime.split(":");
const addSecondsInSecondSplit = parseInt(timeSplits[1]) + 5;
endTime = [
timeSplits[0],
":",
addSecondsInSecondSplit,
":",
timeSplits[2],
].join("");
}
const lyrics = match[2].trim();
srt += `${counter}\n${startTime} --> ${endTime}\n${lyrics}\n\n`;
counter++;
}
}
return srt;
}
Video Generation with FFmpeg
import { exec } from "child_process";
import ffmpegPath from "@ffmpeg-installer/ffmpeg";
async function generateVideo(mp3Path, lrcPath, songTitle) {
return new Promise((resolve, reject) => {
const mp3FilePath = `"${mp3Path}"`;
const srtPath = path.join(
publicDir,
uploadDir,
"subtitle",
`${songTitle}.srt`
);
const videoPath = path.join(
publicDir,
uploadDir,
"video",
`${songTitle}.mp4`
);
let isVideoFileExist = fs.existsSync(videoPath);
const lrcContent = fs.readFileSync(lrcPath, "utf8");
const cleanedLrcContent = lrcContent.replace(/[\r\n]+/gm, "\n");
const srtContent = lrcToSrt(cleanedLrcContent);
console.log("🚀 Generating LRC");
fs.writeFileSync(srtPath, srtContent, "utf8");
console.log("🎉 Done making LRC");
function convertVideoToSubtitle(mp3FilePath) {
let subtitlePath = mp3FilePath
.replace(/\\/g, "\\\\")
.replace(/audio/g, "subtitle")
.replace(/:/g, "\\:")
.replace(/-/g, "\\-")
.replace(".mp3", ".srt");
return subtitlePath;
}
const subtitlePath = convertVideoToSubtitle(mp3FilePath);
if (isVideoFileExist) {
return resolve({ videoUrl: videoPath });
}
const ffmpegCommand = [
ffmpegPath.path,
"-loop",
"1",
"-i",
backgroundImage,
"-i",
mp3FilePath,
"-c:v",
"libx264",
"-c:a",
"aac",
"-strict",
"experimental",
"-b:a",
"192k",
"-shortest",
"-y",
"-vf",
`subtitles='${subtitlePath}:force_style=Alignment=10,Fontsize=48,Outline=2'`,
`"${videoPath}"`,
].join(" ");
console.log({ ffmpegCommand });
exec(ffmpegCommand, (ffmpegError, stdout) => {
if (ffmpegError) {
console.error("❌", ffmpegError);
if (fs.existsSync(videoPath)) {
fs.unlink(videoPath, (unlinkError) => {
if (unlinkError) {
console.error("❌ Error deleting video file:", unlinkError);
}
});
}
console.error("❌ Video generation failed");
return reject(new Error("Video generation failed"));
} else {
console.log("FFmpeg output:", stdout);
console.log("🎉 Video generation completed!");
return resolve({ videoUrl: videoPath });
}
});
});
}
These code snippets cover the file upload setup with multer, LRC to SRT conversion, and video generation using FFmpeg. You can integrate these snippets into your Node.js project as discussed in the blog post to create your Video Generation API.
API Endpoint
Now, let's create the API endpoint that users can call to trigger the video generation process. In your Node.js route handler:
import express from "express";
// Create an Express application
const app = express();
// Define a POST endpoint for video generation
app.post("/generate-video", async (req, res) => {
const { lrcFile } = await uploadFiles(req, res);
const mp3FileName = req.body.mp3File;
const lrcFileName = lrcFile[0].originalname;
const mp3Path = path.join(baseDir, uploadDir, "audio", `${mp3FileName}.mp3`);
const lrcPath = path.join(baseDir, uploadDir, "subtitle", lrcFileName);
try {
const response = await generateVideo(mp3Path, lrcPath, mp3FileName);
const videoUrl = response.videoUrl;
return res.status(200).json({ videoUrl });
} catch (error) {
console.error("Error generating video:", error);
return res.status(500).json({ error: "Video generation failed" });
}
});
// Start the Express server
app.listen(3000, () => {
console.log("Server is running on port 3000");
});
Here is my whole code, PS. I used next.js API
import multer from "multer";
import fs from "fs";
import path from "path";
import { exec } from "child_process";
import ffmpegPath from "@ffmpeg-installer/ffmpeg";
export const config = {
api: {
bodyParser: false, // Disables the default body parser as we will handle the form data ourselves
},
};
const publicDir = path.join(process.cwd(), "public");
const uploadDir = "/uploads"; // Directory to store uploaded files
const backgroundImage = path.join(publicDir, "/bg.jpg"); // Replace with the correct path to your image file
const baseDir = publicDir; // Set the base directory to public/uploads
// Create multer storage configuration
const storage = multer.diskStorage({
destination: (req, file, cb) => {
cb(null, path.join(baseDir, uploadDir, "subtitle"));
},
filename: (req, file, cb) => {
cb(null, file.originalname);
},
});
const upload = multer({ storage });
function uploadFiles(req, res) {
return new Promise((resolve, reject) => {
upload.fields([{ name: "lrcFile" }])(req, res, (err) => {
if (err) {
console.error("Error uploading files:", err);
return reject(new Error("Error uploading files"));
}
const { lrcFile } = req.files;
resolve({ lrcFile });
});
});
}
function lrcToSrt(lrcContent) {
const lrcLines = lrcContent.split("\n");
let srt = "";
let counter = 1;
const formatTime = (timeStr) => {
// Split the input time string into minutes and seconds
const [minutes, seconds] = timeStr.split(":");
// Extract seconds and milliseconds
const [secondsPart, millisecondsPart] = seconds.split(".");
// Format each part with leading zeros
const formattedMinutes = minutes.padStart(2, "0");
const formattedSeconds = secondsPart.padStart(2, "0");
const formattedMilliseconds = millisecondsPart.padEnd(3, "0");
// Combine the formatted parts into the desired format
const formattedTime = `00:${formattedMinutes}:${formattedSeconds},${formattedMilliseconds}`;
return formattedTime;
};
for (let i = 0; i < lrcLines.length; i++) {
const line = lrcLines[i].trim();
if (line === "") continue;
// Extract timestamps and lyrics
const match = line.match(/\[(\d+:\d+\.\d+)\](.*)/);
if (match) {
const startTime = formatTime(match[1]); // Convert to SRT format
let endTime;
if (i < lrcLines.length - 1) {
const nextLineMatch = lrcLines[i + 1].match(/\[(\d+:\d+\.\d+)\]/);
endTime = nextLineMatch ? formatTime(nextLineMatch[1]) : "99:59:59,999";
} else {
// Use start time + 5 seconds if it's the last line
const timeSplits = startTime.split(":");
const addSecondsInSecondSplit = parseInt(timeSplits[1]) + 5;
endTime = [
timeSplits[0],
":",
addSecondsInSecondSplit,
":",
timeSplits[2],
].join("");
}
const lyrics = match[2].trim();
srt += `${counter}\n${startTime} --> ${endTime}\n${lyrics}\n\n`;
counter++;
}
}
return srt;
}
async function generateVideo(mp3Path, lrcPath, songTitle) {
return new Promise((resolve, reject) => {
const mp3FilePath = `"${mp3Path}"`;
const srtPath = path.join(
publicDir,
uploadDir,
"subtitle",
`${songTitle}.srt`
);
const videoPath = path.join(
publicDir,
uploadDir,
"video",
`${songTitle}.mp4`
);
let isVideoFileExist = fs.existsSync(videoPath);
const lrcContent = fs.readFileSync(lrcPath, "utf8");
const cleanedLrcContent = lrcContent.replace(/[\r\n]+/gm, "\n");
const srtContent = lrcToSrt(cleanedLrcContent);
console.log("🚀 Generating LRC");
fs.writeFileSync(srtPath, srtContent, "utf8");
console.log("🎉 Done making LRC");
function convertVideoToSubtitle(mp3FilePath) {
let subtitlePath = mp3FilePath
.replace(/\\/g, "\\\\")
.replace(/audio/g, "subtitle")
.replace(/:/g, "\\:")
.replace(/-/g, "\\-")
.replace(".mp3", ".srt");
return subtitlePath;
}
const subtitlePath = convertVideoToSubtitle(mp3FilePath);
if (isVideoFileExist) {
return resolve({ videoUrl: videoPath });
}
const ffmpegCommand = [
ffmpegPath.path,
"-loop",
"1",
"-i",
backgroundImage,
"-i",
mp3FilePath,
"-c:v",
"libx264",
"-c:a",
"aac",
"-strict",
"experimental",
"-b:a",
"192k",
"-shortest",
"-y",
"-vf",
`subtitles='${subtitlePath}:force_style=Alignment=10,Fontsize=48,Outline=2'`,
`"${videoPath}"`,
].join(" ");
console.log({ ffmpegCommand });
exec(ffmpegCommand, (ffmpegError, stdout) => {
if (ffmpegError) {
console.error("❌", ffmpegError);
if (fs.existsSync(videoPath)) {
fs.unlink(videoPath, (unlinkError) => {
if (unlinkError) {
console.error("❌ Error deleting video file:", unlinkError);
}
});
}
console.error("❌ Video generation failed");
return reject(new Error("Video generation failed"));
} else {
console.log("FFmpeg output:", stdout);
console.log("🎉 Video generation completed!");
return resolve({ videoUrl: videoPath });
}
});
});
}
async function handleVideoGeneration(req, res) {
const { lrcFile } = await uploadFiles(req, res);
const mp3FileName = req.body.mp3File;
const lrcFileName = lrcFile[0].originalname;
const mp3Path = path.join(baseDir, uploadDir, "audio", `${mp3FileName}.mp3`);
const lrcPath = path.join(baseDir, uploadDir, "subtitle", lrcFileName);
try {
const response = await generateVideo(mp3Path, lrcPath, mp3FileName);
const videoUrl = response.videoUrl;
return res.status(200).json({ videoUrl });
} catch (error) {
console.error("Error generating video:", error);
return res.status(500).json({ error: "Video generation failed" });
}
}
export default async function handler(req, res) {
if (req.method !== "POST") {
return res.status(405).json({ error: "Method Not Allowed" });
}
try {
await handleVideoGeneration(req, res);
} catch (error) {
console.error("Error handling video generation:", error);
return res.status(500).json({ error: "Video generation failed" });
}
}
Conclusion
In this blog post, we've explored how to create a Video Generation API using Node.js and FFmpeg. This API can be a valuable addition to your multimedia content creation toolkit, allowing you to automate the process of turning audio and lyric files into engaging videos.
Feel free to customize and extend this API to suit your specific needs. You can enhance it by adding more features, such as custom styling for subtitles or support for different video formats.
Now that you have a basic understanding of how to create a Video Generation API, you can explore more advanced options and integrate it into your projects for creative video content generation.
Happy coding!
Top comments (1)
Hey, I need help with ffmpeg for merging multiple images and an audio file into a single video.
I am using nextjs. And the process of merging will happen locally on the browser.
Can you help me with it?
Here is the link for the Stack Overflow question:
stackoverflow.com/questions/777499...