Why is my .mp4 file created using cv2.VideoWriter not syncing up with the audio when i combine the video and audio using ffmpeg

The aim of the script is to take text from a text file and put it onto a stock video with an ai reading the text. Similar to those reddit stories on social media with parkour minecraft in the background.

import cv2
import time
from ffpyplayer.player import MediaPlayer
from Transcription import newTranscribeAudio
from pydub import AudioSegment

#get a gpt text generation to create a story based on a prompt, for example sci-fi story and spread it over 3-4 parts
#get stock footage, like minecraft parkour etc
#write text of script on the footage
#create video for each part
#have ai voiceover to read the transcript
cap = cv2.VideoCapture("Stock_Videos\Minecraft_Parkour.mp4")
transcription = newTranscribeAudio("final_us.wav")
player = MediaPlayer("final_us.mp3")
audio = AudioSegment.from_file("final_us.mp3")
story = open("Story.txt", "r").read()
story_split = story.split("||")
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
video_duration = frame_count / fps # Duration of one loop of the video
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
audio_duration = len(audio) / 1000 # Duration in seconds
video_writer = cv2.VideoWriter(f"CompletedVideo.mp4", fourcc, fps, (1080, 1920))

choice = 0#part of the story choice
part_split = story_split[choice].split("")
with open("Segment.txt", "w") as file:
 file.write(story_split[choice])
start_time = time.time()
length = len(part_split) - 1
next_text = []
for j in range(0, length):
 temp = part_split[j].replace("\n", "")
 next_text.append([temp])
index = 0
word_index = 0
frame_size_x = 1080
frame_size_y = 1920
audio_duration = len(audio) / 1000 # Duration in seconds
start_time = time.time()
wait_time = 1 / fps
while (time.time() - start_time) < audio_duration:
 cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # Restart video
 elapsed_time = time.time() - start_time
 print(video_writer)
 if index >= len(transcription):
 break
 while cap.isOpened():
 # Capture frames in the video 
 ret, frame = cap.read()
 if not ret:
 break
 audio_frame, val = player.get_frame()
 if val == 'eof': # End of file
 print("Audio playback finished.")
 break
 if index >= len(transcription):
 break
 
 if frame_size_x == -1:
 frame_size_x = frame.shape[1]
 frame_size_y = frame.shape[0]

 elapsed_time = time.time() - start_time

 # describe the type of font 
 # to be used. 
 font = cv2.FONT_HERSHEY_SIMPLEX 
 trans = transcription[index]["words"]
 end_time = trans[word_index]["end"]
 if trans[word_index]["start"] < elapsed_time < trans[word_index]["end"]:
 video_text = trans[word_index]["text"]
 elif elapsed_time >= trans[word_index]["end"]:
 #index += 1
 word_index += 1
 if (word_index >= len(trans)):
 index += 1
 word_index = 0
 # get boundary of this text
 textsize = cv2.getTextSize(video_text, font, 3, 6)[0]
 # get coords based on boundary
 textX = int((frame.shape[1] - textsize[0]) / 2)
 textY = int((frame.shape[0] + textsize[1]) / 2)
 
 cv2.putText(frame, 
 video_text, 
 (textX, textY), 
 font, 3, 
 (0, 255, 255), 
 6, 
 cv2.LINE_4)
 
 # Define the resize scale
 scale_percent = 50 # Resize to 50% of the original size
 # Get new dimensions
 width = 1080
 height = 1920
 new_size = (width, height)

 # Resize the frame
 resized_frame = cv2.resize(frame, new_size)
 video_writer.write(resized_frame)
 cv2.imshow('video', resized_frame)
 cv2.waitKey(wait_time)
 if cv2.waitKey(1) & 0xFF == ord('q'): 
 break
cv2.destroyAllWindows()
video_writer.release()
cap.release()

When I run this script the audio matches the text in the video perfectly and it runs for the correct amount of time to match with the audio (2 min 44 sec). However, the saved video CompletedVideo.mp4 only lasts for 1 min 10 sec. I am unsure why the video has sped up. The fps is 60 fps. If you require any more information please let me know and thanks in advance.

I have tried changing the fps, changing the wait_time after writing each frame. I am expecting the CompletedVideo.mp4 to be 2 min 44 sec long not 1 min 10 sec long.

Latest Images

Trending Articles

Latest Images