pydub及其示例

2021-03-04 本文已影响0人 KyoDante

该库是比较好的音频处理库，适用于音频切分等功能。

Windows下安装：

pip install pydub

如果使用非wav格式的文件，通过ffmpeg.org安装ffmpeg。

然后，按以下步骤添加libav：

Download and extract libav from Windows binaries provided here
Add the libav /bin folder to your PATH envvar

翻译：

下载libav，因为音频处理底层需要，从上面选一个和系统位数一样的压缩包下载(x64后缀的即可)。
把libav下面的/bin路径添加到环境系统变量里面的PATH。

完成后，运行以下的导入，不会有error出现，如果没有添加libav，会提示缺少文件之类的错误。

import pydub

注意：切分的时候，单位是毫秒(ms)
具体使用请参考网上的教程或者官方文档。
以下仅提供一些实例，核心类为AudioSegment：

# Import PyDub main class 
from pydub import AudioSegment
# Import an audio file
wav_file = AudioSegment.from_file(file="wav_file.wav", format="wav")
# Format parameter only for readability
wav_file = AudioSegment.from_file(file="wav_file.wav")
type(wav_file)

输出：pydub.audio_segment.AudioSegment

# Install simpleaudio for wav playback
# 需要安装simpleaudio，为了播放。
$pip install simpleaudio
# Import play function
from pydub.playback import play
# Import audio file
wav_file = AudioSegment.from_file(file="wav_file.wav")
# Play audio file
play(wav_file)

音频的一些属性：比如采样率，通道数，数据位宽，最大振幅，时长等。

# Import audio files
wav_file = AudioSegment.from_file(file="wav_file.wav")
two_speakers = AudioSegment.from_file(file="two_speakers.wav")
# Check number of channels
wav_file.channels, two_speakers.channels
# 输出：1, 2
wav_file.frame_rate
# 输出：48000
# Find the number of bytes per sample
wav_file.sample_width
# 输出：2
# Find the max amplitude
wav_file.max
# 输出：8488
# Duration of audio file in milliseconds
len(wav_file)
# 输出：3284

改变一些属性：

# Change ATTRIBUTENAME of AudioSegment to x，其中ATTRIBUTENAME根据实际情况，包括channels等。
changeed_audio_segment = audio_segment.set_ATTRIBUTENAME(x)
# Change sample width to 1
wav_file_width_1 = wav_file.sample_width(1)
wav_file_width_1.sample_width
# 输出：1

# Change sample rate
wav_file_16k = wav_file.frame_rate(16000)
wav_file_16k.frame_rate16000
# Change number of channels
wav_file_1_channel = wav_file.set_channels(1)
wav_file_1_channel.channels
# 输出：1

操作音频文件（加减音量、标准化、）：

# Import audio file
wav_file = AudioSegment.from_file("wav_file.wav")
# Minus 60 dB
quiet_wav_file = wav_file - 60
# Try to recognize quiet audio
recognizer.recognize_google(quiet_wav_file)
# 输出：UnknownValueError:

# Increase the volume by 10 dB
louder_wav_file = wav_file + 10
# Try to recognize
recognizer.recognize_google(louder_wav_file)
# 输出：this is a wav file

# Import AudioSegment and normalize
from pydub import AudioSegment
from pydub.effects import normalize
from pydub.playback import play
# Import uneven sound audio file
loud_quiet = AudioSegment.from_file("loud_quiet.wav")
# Normalize the sound levels
normalized_loud_quiet = normalize(loud_quiet)
# Check the sound
play(normalized_loud_quiet)

# 去掉前面5秒的内容。
# Import audio with static at start
static_at_start = AudioSegment.from_file("static_at_start.wav")
# Remove the static via slicing
no_static_at_start = static_at_start[5000:]
# Check the new sound
play(no_static_at_start)

# 把两个wav concat到一起。
# Import two audio files
wav_file_1 = AudioSegment.from_file("wav_file_1.wav")
wav_file_2 = AudioSegment.from_file("wav_file_2.wav")
# Combine the two audio files
wav_file_3 = wav_file_1 + wav_file_2
# Check the sound
play(wav_file_3)
# Combine two wav files and make the combination louder
louder_wav_file_3 = wav_file_1 + wav_file_2 + 10

# 把多声道转单声道
# Import phone call audio
phone_call = AudioSegment.from_file("phone_call.wav")
# Find number of channels
phone_call.channels
#输出：2
# Split stereo to mono
phone_call_channels = phone_call.split_to_mono()
phone_call_channels
#输出：[<pydub.audio_segment.AudioSegment, <pydub.audio_segment.AudioSegment>]

# Find number of channels of first list item
phone_call_channels[0].channels
#输出：1
# Recognize the first channel
recognizer.recognize_google(phone_call_channel_1)
#输出：the pydub library is really useful

转换并保存（其他格式转wav再保存）

from pydub import AudioSegment
# Import audio file
wav_file = AudioSegment.from_file("wav_file.wav")
# Increase by 10 decibels
louder_wav_file = wav_file + 10
# Export louder audio file
louder_wav_file.export(out_f="louder_wav_file.wav", format="wav")
# 输出：<_io.BufferedRandom name='louder_wav_file.wav'>


def make_wav(wrong_folder_path, right_folder_path):
# Loop through wrongly formatted files
  for file in os.scandir(wrong_folder_path):
  # Only work with files with audio extensions we're fixing
    if file.path.endswith(".mp3") or file.path.endswith(".flac"):
    # Create the new .wav filename
        out_file = right_folder_path + os.path.splitext(os.path.basename(file.path))[0] + ".wav"
        # Read in the audio file and export it in wav format
        AudioSegment.from_file(file.path).export(out_file, format="wav")
        print(f"Creating {out_file}")

# Call our new function
make_wav("data/wrong_formats/", "data/right_format/")
# 输出：
# Creating data/right_types/wav_file.wav
# Creating data/right_types/flac_file.wav
# Creating data/right_types/mp3_file.wav

def make_no_static_louder(static_quiet, louder_no_static):
# Loop through files with static and quiet (already in wav format)
  for file in os.scandir(static_quiet_folder_path):
  # Create new file path
        out_file = louder_no_static + os.path.splitext(os.path.basename(file.path))[0] + ".wav"
        # Read the audio file
        audio_file = AudioSegment.from_file(file.path)
        # Remove first three seconds and add 10 decibels and export
        audio_file = (audio_file[3100:] + 10).export(out_file, format="wav")
        print(f"Creating {out_file}")

# Remove static and make louder
make_no_static_louder("data/static_quiet/", "data/louder_no_static/")
# 输出：
# Creating data/louder_no_static/speech-recognition-services.wav
# Creating data/louder_no_static/order-issue.wav
# Creating data/louder_no_static/help-with-acount.wav