""" Various functions for finding/manipulating silence in AudioSegments """ import itertools from .utils import db_to_float def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1): """ Returns a list of all silent sections [start, end] in milliseconds of audio_segment. Inverse of detect_nonsilent() audio_segment - the segment to find silence in min_silence_len - the minimum length for any silent section silence_thresh - the upper bound for how quiet is silent in dFBS seek_step - step size for interating over the segment in ms """ seg_len = len(audio_segment) # you can't have a silent portion of a sound that is longer than the sound if seg_len < min_silence_len: return [] # convert silence threshold to a float value (so we can compare it to rms) silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude # find silence and add start and end indicies to the to_cut list silence_starts = [] # check successive (1 sec by default) chunk of sound for silence # try a chunk at every "seek step" (or every chunk for a seek step == 1) last_slice_start = seg_len - min_silence_len slice_starts = range(0, last_slice_start + 1, seek_step) # guarantee last_slice_start is included in the range # to make sure the last portion of the audio is searched if last_slice_start % seek_step: slice_starts = itertools.chain(slice_starts, [last_slice_start]) for i in slice_starts: audio_slice = audio_segment[i:i + min_silence_len] if audio_slice.rms <= silence_thresh: silence_starts.append(i) # short circuit when there is no silence if not silence_starts: return [] # combine the silence we detected into ranges (start ms - end ms) silent_ranges = [] prev_i = silence_starts.pop(0) current_range_start = prev_i for silence_start_i in silence_starts: continuous = (silence_start_i == prev_i + seek_step) # sometimes two small blips are enough for one particular slice to be # non-silent, despite the silence all running together. Just combine # the two overlapping silent ranges. silence_has_gap = silence_start_i > (prev_i + min_silence_len) if not continuous and silence_has_gap: silent_ranges.append([current_range_start, prev_i + min_silence_len]) current_range_start = silence_start_i prev_i = silence_start_i silent_ranges.append([current_range_start, prev_i + min_silence_len]) return silent_ranges def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1): """ Returns a list of all nonsilent sections [start, end] in milliseconds of audio_segment. Inverse of detect_silent() audio_segment - the segment to find silence in min_silence_len - the minimum length for any silent section silence_thresh - the upper bound for how quiet is silent in dFBS seek_step - step size for interating over the segment in ms """ silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh, seek_step) len_seg = len(audio_segment) # if there is no silence, the whole thing is nonsilent if not silent_ranges: return [[0, len_seg]] # short circuit when the whole audio segment is silent if silent_ranges[0][0] == 0 and silent_ranges[0][1] == len_seg: return [] prev_end_i = 0 nonsilent_ranges = [] for start_i, end_i in silent_ranges: nonsilent_ranges.append([prev_end_i, start_i]) prev_end_i = end_i if end_i != len_seg: nonsilent_ranges.append([prev_end_i, len_seg]) if nonsilent_ranges[0] == [0, 0]: nonsilent_ranges.pop(0) return nonsilent_ranges def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100, seek_step=1): """ Returns list of audio segments from splitting audio_segment on silent sections audio_segment - original pydub.AudioSegment() object min_silence_len - (in ms) minimum length of a silence to be used for a split. default: 1000ms silence_thresh - (in dBFS) anything quieter than this will be considered silence. default: -16dBFS keep_silence - (in ms or True/False) leave some silence at the beginning and end of the chunks. Keeps the sound from sounding like it is abruptly cut off. When the length of the silence is less than the keep_silence duration it is split evenly between the preceding and following non-silent segments. If True is specified, all the silence is kept, if False none is kept. default: 100ms seek_step - step size for interating over the segment in ms """ # from the itertools documentation def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." a, b = itertools.tee(iterable) next(b, None) return zip(a, b) if isinstance(keep_silence, bool): keep_silence = len(audio_segment) if keep_silence else 0 output_ranges = [ [ start - keep_silence, end + keep_silence ] for (start,end) in detect_nonsilent(audio_segment, min_silence_len, silence_thresh, seek_step) ] for range_i, range_ii in pairwise(output_ranges): last_end = range_i[1] next_start = range_ii[0] if next_start < last_end: range_i[1] = (last_end+next_start)//2 range_ii[0] = range_i[1] return [ audio_segment[ max(start,0) : min(end,len(audio_segment)) ] for start,end in output_ranges ] def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=10): """ Returns the millisecond/index that the leading silence ends. audio_segment - the segment to find silence in silence_threshold - the upper bound for how quiet is silent in dFBS chunk_size - chunk size for interating over the segment in ms """ trim_ms = 0 # ms assert chunk_size > 0 # to avoid infinite loop while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound): trim_ms += chunk_size # if there is no end it should return the length of the segment return min(trim_ms, len(sound))