问题描述
我将从 Twitch 中抓取剪辑并将它们合并以创建单个视频文件。 我已经弄清楚了 twitch 剪辑链接的抓取(但我只得到 16-20 个视频,因为我需要用 selenium 滚动,但我并不介意,如果你有一个可行的解决方案,那么就回答它)以及简单的合并视频。
我正在抓取以下链接:
#!/usr/bin/python3.9
import bs4
import requests
import time
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
# Initialize driver and run it headless
options = Options()
options.headless = True
driver = webdriver.Firefox(options=options)
def extract_source(url):
agent = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0"}
source=requests.get(url,headers=agent).text
return source
def extract_data(source):
soup=bs4.BeautifulSoup(source,'html.parser')
names=soup.find_all('a',attrs={'data-a-target':'preview-card-image-link'})
return names
driver.get('https://www.twitch.tv/directory/game/League%20of%20Legends/clips?range=24hr')
# I wait 3 seconds for the clips to get pulled in
# I'd like here to scroll down a bit so i can scrape more clips,but even after i tried some solutions my firefox(was debugging in GUI mode,not headless as it is Now) wasnt scrolling
time.sleep(3)
extract_links=extract_data(driver.page_source)
for a in extract_links:
print(a.get('href'))
driver.quit()
# I tried scrolling using this but didnt work,not sure why
# this script is supposed to scroll until youre at the end of the page
# SCROLL_PAUSE_TIME = 0.5
# # Get scroll height
# last_height = driver.execute_script("return document.body.scrollHeight")
# for i in range(3):
# # Scroll down to bottom
# driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
# # Wait to load page
# time.sleep(SCROLL_PAUSE_TIME)
# # Calculate new scroll height and compare with last scroll height
# new_height = driver.execute_script("return document.body.scrollHeight")
# if new_height == last_height:
# break
# last_height = new_height
我在使用 ffmpeg 下载(使用 youtube-dl)后将视频连接在一起:
ffmpeg -safe 0 -f concat -segment_time_Metadata 1 -i videos.txt -vf select=concatdec_select -af aselect=concatdec_select,aresample=async=1 out.mp4
其中videos.txt如下:
file 'video_file1.mp4'
file 'video_file2.mp4'
...
我真的找不到关于如何添加水印的答案(每个视频都不同,虽然我发现 this 它没有解释如何为单个视频添加唯一的水印,但为两个视频添加相同的水印)无需将每个视频渲染两次,而是一次性渲染。
我想我偶然发现了一些人为了给每个视频添加额外的选项而将他们的 videos.txt
制作如下:
file 'video_file1.mp4'
option 1(for video_file1.mp4)
option 2(for video_file1.mp4)
file 'video_file2.mp4'
option 1(for video_file2.mp4)
option 2(for video_file2.mp4)
...
这是否适用于每个视频的唯一水印(假设水印名为 video_file1.png,...含义与视频相同,如果需要更多配置,水印也是透明的)
解决方法
您可以通过链接 FFmpeg 过滤器来解决它(使用 filter_complex
,如 here 所述)。
语法令人困惑但易于管理...
例如,我选择创建合成输入文件,而不是为 WEB 下载视频(这使示例更具可重现性)。
首先构建 3 个合成视频文件和 3 个合成水印图像(后来用作输入):
import subprocess as sp
import shlex
vid1 = 'in1.mp4'
vid2 = 'in2.mp4'
vid3 = 'in3.mp4'
in_videos = [vid1,vid2,vid3]
watermark1 = 'waterMark1.png'
watermark2 = 'waterMark2.png'
watermark3 = 'waterMark3.png'
watermarks = [watermark1,watermark2,watermark3]
out_video = 'output.mp4'
n = len(in_videos)
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i testsrc=size=320x240:rate=1 -f lavfi -i sine=frequency=300 -c:v libx264 -c:a aac -ar 22050 -t 5 {vid1}'))
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i testsrc=size=320x240:rate=1 -f lavfi -i sine=frequency=400 -c:v libx264 -c:a aac -ar 22050 -t 5 {vid2}'))
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i testsrc=size=320x240:rate=1 -f lavfi -i sine=frequency=500 -c:v libx264 -c:a aac -ar 22050 -t 5 {vid3}'))
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i mandelbrot=rate=1:size=64x64 -t 1 {watermark1}'))
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i cellauto=rate=1:size=64x64 -t 1 {watermark2}'))
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i life=rate=1:size=64x64:mold=10:r=100:ratio=0.1:death_color=blue:life_color=#00ff00 -frames:v 1 {watermark3}'))
现在我们要构建一个命令如下:
sp.run(shlex.split('ffmpeg -y -i in1.mp4 -i in2.mp4 -i in3.mp4 -i waterMark1.png -i waterMark2.png -i waterMark3.png -filter_complex "'
'[0:v:0][3:v]overlay=10:main_h-overlay_h-10[v0];'
'[1:v:0][4:v]overlay=10:main_h-overlay_h-10[v1];'
'[2:v:0][5:v]overlay=10:main_h-overlay_h-10[v2];'
'[v0][0:a:0][v1][1:a:0][v2][2:a:0]concat=n=3:v=1:a=1[outv][outa]" -map "[outv]" -map "[outa]" '
'-vcodec libx264 -crf 17 -pix_fmt yuv420p -acodec aac -ar 22050 output.mp4'))
该命令覆盖水印并连接带水印的视频。
可以使用很少的 for 循环以编程方式构建命令:
cmd = 'ffmpeg -y '
# 'ffmpeg -y -i in1.mp4 -i in2.mp4 -i in3.mp4 '
for vid in in_videos:
cmd += '-i ' + vid + ' '
# ffmpeg -y -i in1.mp4 -i in2.mp4 -i in3.mp4 -i waterMark1.png -i waterMark2.png -i waterMark3.png
for watermark in watermarks:
cmd += '-i ' + watermark + ' '
# ffmpeg -y -i in1.mp4 -i in2.mp4 -i in3.mp4 -i waterMark1.png -i waterMark2.png -i waterMark3.png -filter_complex "
cmd += '-filter_complex "'
for i in range(n):
cmd += f'[{i}:v:0][{i+n}:v]overlay=10:main_h-overlay_h-10[v{i}];' # [0:v:0][3:v]overlay=10:main_h-overlay_h-10[v0];
for i in range(n):
cmd += f'[v{i}][{i}:a:0]' # [v0][0:a:0]
cmd += f'concat=n={n}:v=1:a=1[outv][outa]" -map "[outv]" -map "[outa]" -vcodec libx264 -crf 17 -pix_fmt yuv420p -acodec aac -ar 22050 {out_video}' # concat=n=3:v=1:a=1[outv][outa]" -map "[outv]" -map "[outa]" output.mp4'
这是一个完整的(可执行的)代码示例:
import subprocess as sp
import shlex
# Input video files
vid1 = 'in1.mp4'
vid2 = 'in2.mp4'
vid3 = 'in3.mp4'
in_videos = [vid1,vid3]
# Input watermark images
watermark1 = 'waterMark1.png'
watermark2 = 'waterMark2.png'
watermark3 = 'waterMark3.png'
watermarks = [watermark1,watermark3]
out_video = 'output.mp4' # Output file name
n = len(in_videos)
# Build synthetic input files for testing (synthetic video with synthetic audio)
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i testsrc=size=320x240:rate=1 -f lavfi -i sine=frequency=300 -c:v libx264 -c:a aac -ar 22050 -t 5 {vid1}'))
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i testsrc=size=320x240:rate=1 -f lavfi -i sine=frequency=400 -c:v libx264 -c:a aac -ar 22050 -t 5 {vid2}'))
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i testsrc=size=320x240:rate=1 -f lavfi -i sine=frequency=500 -c:v libx264 -c:a aac -ar 22050 -t 5 {vid3}'))
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i mandelbrot=rate=1:size=64x64 -t 1 {watermark1}'))
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i cellauto=rate=1:size=64x64 -t 1 {watermark2}'))
sp.run(shlex.split(f'ffmpeg -y -f lavfi -i life=rate=1:size=64x64:mold=10:r=100:ratio=0.1:death_color=blue:life_color=#00ff00 -frames:v 1 {watermark3}'))
# We want to get to the following command:
#sp.run(shlex.split('ffmpeg -y -i in1.mp4 -i in2.mp4 -i in3.mp4 -i waterMark1.png -i waterMark2.png -i waterMark3.png -filter_complex "'
# '[0:v:0][3:v]overlay=10:main_h-overlay_h-10[v0];'
# '[1:v:0][4:v]overlay=10:main_h-overlay_h-10[v1];'
# '[2:v:0][5:v]overlay=10:main_h-overlay_h-10[v2];'
# '[v0][0:a:0][v1][1:a:0][v2][2:a:0]concat=n=3:v=1:a=1[outv][outa]" -map "[outv]" -map "[outa]" '
# '-vcodec libx264 -crf 17 -pix_fmt yuv420p -acodec aac -ar 22050 output.mp4'))
# Build ffmpeg command with arguments as a long string
cmd = 'ffmpeg -y '
# 'ffmpeg -y -i in1.mp4 -i in2.mp4 -i in3.mp4 '
for vid in in_videos:
cmd += '-i ' + vid + ' '
# ffmpeg -y -i in1.mp4 -i in2.mp4 -i in3.mp4 -i waterMark1.png -i waterMark2.png -i waterMark3.png
for watermark in watermarks:
cmd += '-i ' + watermark + ' '
# ffmpeg -y -i in1.mp4 -i in2.mp4 -i in3.mp4 -i waterMark1.png -i waterMark2.png -i waterMark3.png -filter_complex "
cmd += '-filter_complex "'
for i in range(n):
cmd += f'[{i}:v:0][{i+n}:v]overlay=10:main_h-overlay_h-10[v{i}];' # [0:v:0][3:v]overlay=10:main_h-overlay_h-10[v0];
for i in range(n):
cmd += f'[v{i}][{i}:a:0]' # [v0][0:a:0]
cmd += f'concat=n={n}:v=1:a=1[outv][outa]" -map "[outv]" -map "[outa]" -vcodec libx264 -crf 17 -pix_fmt yuv420p -acodec aac -ar 22050 {out_video}' # concat=n=3:v=1:a=1[outv][outa]" -map "[outv]" -map "[outa]" output.mp4'
# Execute FFmpeg
sp.run(shlex.split(cmd))
注意:
- 构建一个长字符串并使用
shlex.split
将字符串拆分为列表并不是最好的选择。
我使用该解决方案是因为我希望命令看起来像一个可执行的命令行(对于不使用 Python 的 FFmpeg 用户来说很熟悉)。
几个示例视频帧:
,选择过滤器更适合这个。
ffmpeg -i video -vf "select='between(t,4,6.5)+between(t,17,26)+between(t,74,91)',setpts=N/FRAME_RATE/TB" -af "aselect='between(t,asetpts=N/SR/TB" out.mp4
select 及其对应的过滤器分别应用于视频和音频。选择的段是时间 4 到 6.5 秒、17 到 26 秒,最后是 74 到 91 秒。时间戳与 setpts 及其对应过滤器连续。