Download as odt, pdf, or txt
Download as odt, pdf, or txt
You are on page 1of 6

!

pip install TTS


!pip install numpy==1.24.0

from google.colab import files


import os
import subprocess

uploaded = None

def upload_video():
global uploaded
global video_path
uploaded = files.upload()
for filename in uploaded.keys():
print(f'Uploaded {filename}')
video_path = filename
return filename

import ipywidgets as widgets


from IPython.display import display

button = widgets.Button(description="Upload Video")


output = widgets.Output()

def on_button_clicked(b):
with output:
global video_path
video_path = upload_video()

button.on_click(on_button_clicked)
display(button, output)

!pip install googletrans==3.1.0a0

import subprocess
from tqdm.notebook import tqdm
import time

# Ensure video_path variable exists


if 'video_path' in globals() and video_path is not None:
ffmpeg_command = f"ffmpeg -i '{video_path}' -acodec pcm_s24le -ar 48000 -
q:a 0 -map a -y 'output_audio.wav'"
start_time = time.time() # Inicia a contagem do tempo
subprocess.run(ffmpeg_command, shell=True)
end_time = time.time() # Termina a contagem do tempo
duration = end_time - start_time
print(f"Conversão de áudio concluída em {duration:.2f} segundos.")
else:
print("No video uploaded. Please upload a video first.")
import whisper

model = whisper.load_model("base")
# Transcreva o áudio original e salve o resultado em um arquivo de texto
result_original = model.transcribe("output_audio.wav", verbose=True,
fp16=False, language='pt')
with open("legendas_original.txt", "w") as f:
f.write(result_original["text"])

print("Legendas para o áudio original salvas em legendas_original.txt")


start_time = time.time() # Inicia a contagem do tempo
result = model.transcribe("output_audio.wav", verbose=True, fp16=False,
language='pt') # Força o Whisper a detectar Português
end_time = time.time() # Termina a contagem do tempo
duration = end_time - start_time
print(f"Transcrição concluída em {duration:.2f} segundos.")

whisper_text = result["text"]
whisper_language = result['language']

print("Texto transcrito:", whisper_text)


print("Idioma original (detectado como Português):", whisper_language)

!pip install googletrans==3.1.0a0 # Downgrade para uma versão que funcione

target_language = "English" #@param ["English", "Spanish", "French",


"German", "Italian", "Portuguese", "Polish", "Turkish", "Russian", "Dutch",
"Czech", "Arabic", "Chinese (Simplified)"]

# Mapping between full names and ISO 639-1 codes


language_mapping = {
'English': 'en',
'Spanish': 'es',
'French': 'fr',
'German': 'de',
'Italian': 'it',
'Portuguese': 'pt',
'Polish': 'pl',
'Turkish': 'tr',
'Russian': 'ru',
'Dutch': 'nl',
'Czech': 'cs',
'Arabic': 'ar',
'Chinese (Simplified)': 'zh-cn'
}

target_language_code = language_mapping[target_language]

from googletrans import Translator


translator = Translator()

# Adiciona um atraso para evitar limitação de taxa


time.sleep(1)
# Salve o texto traduzido em um arquivo de texto
with open("legendas_traduzidas.txt", "w") as f:
f.write(translated_text)

print("Legendas para o áudio traduzido salvas em legendas_traduzidas.txt")


# Lida com potenciais erros na tradução
try:
start_time = time.time() # Inicia a contagem do tempo
translated_text = translator.translate(whisper_text,
dest=target_language_code).text # Traduz do Português detectado
end_time = time.time() # Termina a contagem do tempo
duration = end_time - start_time
print(f"Tradução concluída em {duration:.2f} segundos.")
print(f"Texto traduzido para {target_language}:", translated_text)
except Exception as e:
print(f"Translation failed: {e}")
print("Objeto retornado:", translator.translate(whisper_text,
dest=target_language_code))

from TTS.api import TTS


import torch
from IPython.display import Audio, display

# Criação da instância do modelo TTS corrigida para uso da CPU


tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cpu")

# Gerando o áudio
tts.tts_to_file(
translated_text,
speaker_wav='output_audio.wav',
file_path="output_synth.wav",
language=target_language_code
)

audio_widget = Audio(filename="output_synth.wav", autoplay=False)


display(audio_widget)

If you are on Google Colab free version (T4), Delete TTS and Whisper at this step

import torch

try:
del tts
except NameError:
print("Voice model already deleted")
try:
del model
except NameError:
print("Whisper model already deleted")

torch.cuda.empty_cache()

Lip Sync - High Quality


Takes arround 15 mins approx for installation on free colab version (T4)

# Dependencies
%cd /content/

import locale
locale.getpreferredencoding = lambda: "UTF-8"

!git clone https://github.com/vinthony/video-retalking.git &> /dev/null

!sudo apt-get install -y libblas-dev liblapack-dev libx11-dev libopenblas-


dev

!git clone https://github.com/davisking/dlib.git

!pip install basicsr==1.4.2 face-alignment==1.3.4 kornia==0.5.1


ninja==1.10.2.3 einops==0.4.1 facexlib==0.2.5 librosa==0.9.2 build

!cd dlib && python setup.py install

%cd /content/video-retalking

!mkdir ./checkpoints
!wget
https://github.com/vinthony/video-retalking/releases/download/v0.0.1/30_net_
gen.pth -O ./checkpoints/30_net_gen.pth
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
BFM.zip -O ./checkpoints/BFM.zip
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
DNet.pt -O ./checkpoints/DNet.pt
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
ENet.pth -O ./checkpoints/ENet.pth
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
expression.mat -O ./checkpoints/expression.mat
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
face3d_pretrain_epoch_20.pth -O ./checkpoints/face3d_pretrain_epoch_20.pth
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
GFPGANv1.3.pth -O ./checkpoints/GFPGANv1.3.pth
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
GPEN-BFR-512.pth -O ./checkpoints/GPEN-BFR-512.pth
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
LNet.pth -O ./checkpoints/LNet.pth
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
ParseNet-latest.pth -O ./checkpoints/ParseNet-latest.pth
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
RetinaFace-R50.pth -O ./checkpoints/RetinaFace-R50.pth
!wget https://github.com/vinthony/video-retalking/releases/download/v0.0.1/
shape_predictor_68_face_landmarks.dat -O
./checkpoints/shape_predictor_68_face_landmarks.dat
!unzip -d ./checkpoints/BFM ./checkpoints/BFM.zip

#Generate video

%cd /content/video-retalking

video_path_fix = f"'../{video_path}'"

!python inference.py \
--face $video_path_fix \
--audio "/content/output_synth.wav" \
--outfile '/content/output_high_qual.mp4'

Download videos
from google.colab import files
from IPython.core.display import display, HTML
import ipywidgets as widgets
import base64
import os

# List of video paths to check


video_paths = ["/content/output_video.mp4", "/content/output_high_qual.mp4"]

def download_video(b):
files.download(b.video_path)

download_buttons = []

# Layout definition for button


button_layout = widgets.Layout(width='250px')

# Loop through each video path to check for existence and display
for video_path in video_paths:
if os.path.exists(video_path):
# Encode video base64
with open(video_path, "rb") as video_file:
video_base64 = base64.b64encode(video_file.read()).decode()
# HTML widget for video
video_html = HTML(data=f"""
<video width=400 controls>
<source src="data:video/mp4;base64,{video_base64}" type="video/mp4" />
</video>
""")

# button widget for download and link to the video path


download_button = widgets.Button(description=f"Download
{os.path.basename(video_path)}",
layout=button_layout)
download_button.video_path = video_path
download_button.on_click(download_video)
download_buttons.append(download_button)

# Display widgets
display(video_html)
display(download_button)

You might also like