Module: Muze::Feature
- Defined in:
- lib/muze/feature/mfcc.rb,
lib/muze/feature/chroma.rb,
lib/muze/feature/context.rb,
lib/muze/feature/spectral.rb,
lib/muze/feature/aggregation.rb
Overview
Feature extraction methods.
Defined Under Namespace
Classes: Context
Class Method Summary collapse
-
.beat_sync(data, beats:, aggregate: :mean) ⇒ Object
Beat-synchronous aggregation over the frame axis.
-
.chroma_stft(y: nil, sr: 22_050, s: nil, n_chroma: 12, n_fft: 2048, hop_length: 512, norm: 2, tuning: 0.0, ctroct: nil, octwidth: nil) ⇒ Numo::SFloat
Shape: [n_chroma, frames].
- .context(y:, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect) ⇒ Object
- .delta(data, order: 1, width: 9, mode: :interp) ⇒ Numo::SFloat
- .extract(y:, sr: 22_050, features: Context::DEFAULT_FEATURES, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect) ⇒ Object
- .melspectrogram(y: nil, sr: 22_050, s: nil, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil, power: 2.0, center: true, window: :hann, pad_mode: :reflect, norm: nil, s_kind: :power) ⇒ Numo::SFloat
- .mfcc(y: nil, sr: 22_050, s: nil, n_mfcc: 20, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil, dct_type: 2, lifter: 0, norm: :ortho, s_kind: :mel_power) ⇒ Numo::SFloat
-
.poly_features(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, order: 1, frequency: nil, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [order + 1, frames].
-
.rms(y: nil, s: nil, frame_length: 2048, hop_length: 512, center: false) ⇒ Numo::SFloat
Shape: [1, frames].
-
.spectral_bandwidth(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, p: 2, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [1, frames].
-
.spectral_centroid(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [1, frames].
-
.spectral_contrast(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, n_bands: 6, quantile: 0.02, fmin: 200.0, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [n_bands + 1, frames].
- .spectral_contrast_edges(frequencies, n_bands:, fmin:, sr:) ⇒ Array<Integer>
-
.spectral_crest(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [1, frames].
-
.spectral_decrease(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [1, frames].
-
.spectral_entropy(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [1, frames].
-
.spectral_flatness(y: nil, s: nil, n_fft: 2048, hop_length: 512, amin: 1.0e-10, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [1, frames].
-
.spectral_flux(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [1, frames].
-
.spectral_rolloff(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, roll_percent: 0.85, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [1, frames].
-
.spectral_slope(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Shape: [1, frames].
- .tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384, normalize: false) ⇒ Numo::SFloat
-
.tonnetz(y: nil, chroma: nil, sr: 22_050, n_fft: 2048, hop_length: 512) ⇒ Numo::SFloat
Shape: [6, frames].
-
.zero_crossing_rate(y, frame_length: 2048, hop_length: 512, threshold: 0.0, center: false) ⇒ Numo::SFloat
Shape: [1, frames].
Class Method Details
.beat_sync(data, beats:, aggregate: :mean) ⇒ Object
Beat-synchronous aggregation over the frame axis.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/muze/feature/aggregation.rb', line 8 def beat_sync(data, beats:, aggregate: :mean) raise Muze::ParameterError, "aggregate must be :mean, :median, or :max" unless %i[mean median max].include?(aggregate) matrix = Numo::SFloat.cast(data) matrix = matrix.(0) if matrix.ndim == 1 raise Muze::ParameterError, "data must be one- or two-dimensional" unless [1, 2].include?(matrix.ndim) rows, frames = matrix.shape boundaries = beat_boundaries(beats, frames) output = Numo::SFloat.zeros(rows, boundaries.length - 1) boundaries.each_cons(2).with_index do |(left, right), segment_index| rows.times do |row| values = matrix[row, left...right].to_a output[row, segment_index] = aggregate_values(values, aggregate:) end end output end |
.chroma_stft(y: nil, sr: 22_050, s: nil, n_chroma: 12, n_fft: 2048, hop_length: 512, norm: 2, tuning: 0.0, ctroct: nil, octwidth: nil) ⇒ Numo::SFloat
Returns shape: [n_chroma, frames].
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/muze/feature/chroma.rb', line 16 def chroma_stft(y: nil, sr: 22_050, s: nil, n_chroma: 12, n_fft: 2048, hop_length: 512, norm: 2, tuning: 0.0, ctroct: nil, octwidth: nil) spectrum = if s provided = Numo::SFloat.cast(s) validate_spectrum!(provided) provided else stft_matrix = Muze.stft(y, n_fft:, hop_length:) magnitude, = Muze.magphase(stft_matrix) magnitude end spectrum = spectrum.(1) if spectrum.ndim == 1 filter_bank = Muze::Filters.chroma(sr:, n_fft:, n_chroma:, tuning:, ctroct:, octwidth:) chroma = Muze::Core::Matrix.multiply(filter_bank, spectrum) normalize(chroma, norm:) end |
.context(y:, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect) ⇒ Object
73 74 75 |
# File 'lib/muze/feature/context.rb', line 73 def context(y:, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect) Context.new(y:, sr:, n_fft:, hop_length:, center:, pad_mode:) end |
.delta(data, order: 1, width: 9, mode: :interp) ⇒ Numo::SFloat
80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/muze/feature/mfcc.rb', line 80 def delta(data, order: 1, width: 9, mode: :interp) raise Muze::ParameterError, "order must be >= 1" unless order >= 1 raise Muze::ParameterError, "width must be odd and >= 3" unless width.odd? && width >= 3 raise Muze::ParameterError, "mode must be :interp, :nearest, :mirror, or :constant" unless %i[interp nearest mirror constant].include?(mode) result = Numo::SFloat.cast(data) original_ndim = result.ndim order.times { result = finite_difference(result, width, mode:) } result = result[true, 0] if original_ndim == 1 && result.ndim == 2 result end |
.extract(y:, sr: 22_050, features: Context::DEFAULT_FEATURES, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect) ⇒ Object
77 78 79 |
# File 'lib/muze/feature/context.rb', line 77 def extract(y:, sr: 22_050, features: Context::DEFAULT_FEATURES, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect) context(y:, sr:, n_fft:, hop_length:, center:, pad_mode:).extract(features:) end |
.melspectrogram(y: nil, sr: 22_050, s: nil, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil, power: 2.0, center: true, window: :hann, pad_mode: :reflect, norm: nil, s_kind: :power) ⇒ Numo::SFloat
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/muze/feature/mfcc.rb', line 22 def melspectrogram(y: nil, sr: 22_050, s: nil, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil, power: 2.0, center: true, window: :hann, pad_mode: :reflect, norm: nil, s_kind: :power) raise Muze::ParameterError, "power must be positive" unless power.positive? raise Muze::ParameterError, "s_kind must be :power or :magnitude" unless %i[power magnitude].include?(s_kind) spectrum = if s provided = Numo::SFloat.cast(s) validate_finite_array!(provided.to_a.flatten, "s") raise Muze::ParameterError, "spectrogram input must be non-negative" if provided.to_a.flatten.any?(&:negative?) s_kind == :magnitude ? (provided**power).cast_to(Numo::SFloat) : provided else spectrogram(y, n_fft:, hop_length:, power:, center:, window:, pad_mode:) end filter_bank = Muze::Filters.mel(sr:, n_fft:, n_mels:, fmin:, fmax:, norm:) Muze::Core::Matrix.multiply(filter_bank, spectrum) end |
.mfcc(y: nil, sr: 22_050, s: nil, n_mfcc: 20, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil, dct_type: 2, lifter: 0, norm: :ortho, s_kind: :mel_power) ⇒ Numo::SFloat
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/muze/feature/mfcc.rb', line 52 def mfcc(y: nil, sr: 22_050, s: nil, n_mfcc: 20, n_fft: 2048, hop_length: 512, n_mels: 128, fmin: 0.0, fmax: nil, dct_type: 2, lifter: 0, norm: :ortho, s_kind: :mel_power) raise Muze::ParameterError, "n_mfcc must be positive" unless n_mfcc.positive? raise Muze::ParameterError, "lifter must be >= 0" if lifter.negative? raise Muze::ParameterError, "s_kind must be :mel_power or :log_mel" unless %i[mel_power log_mel].include?(s_kind) mel_spec = if s provided = Numo::SFloat.cast(s) validate_finite_array!(provided.to_a.flatten, "s") if s_kind == :mel_power && provided.to_a.flatten.any?(&:negative?) raise Muze::ParameterError, "mel power spectrogram must be non-negative" end provided else melspectrogram(y:, sr:, n_fft:, hop_length:, n_mels:, fmin:, fmax:) end log_mel = s_kind == :log_mel ? mel_spec : Muze.power_to_db(mel_spec) dct = Muze::Core::DCT.dct(log_mel, type: dct_type, axis: 0, norm:) coeffs = dct[0...n_mfcc, true].cast_to(Numo::SFloat) apply_lifter(coeffs, lifter:) end |
.poly_features(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, order: 1, frequency: nil, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [order + 1, frames].
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 |
# File 'lib/muze/feature/spectral.rb', line 255 def poly_features(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, order: 1, frequency: nil, center: true, pad_mode: :reflect, s_kind: :magnitude) raise Muze::ParameterError, "order must be >= 0" unless order.is_a?(Integer) && order >= 0 magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:) bins, frames = magnitude.shape x_values = frequency ? Numo::SFloat.cast(frequency).to_a.flatten : frequencies raise Muze::ParameterError, "frequency length must match spectrum bins" unless x_values.length == bins x_values = normalize_frequency_axis(x_values) output = Numo::SFloat.zeros(order + 1, frames) frames.times do |frame_index| coefficients = polynomial_coefficients(x_values, magnitude[true, frame_index].to_a, order) coefficients.each_with_index { |value, index| output[index, frame_index] = value } end output end |
.rms(y: nil, s: nil, frame_length: 2048, hop_length: 512, center: false) ⇒ Numo::SFloat
Returns shape: [1, frames].
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 |
# File 'lib/muze/feature/spectral.rb', line 298 def rms(y: nil, s: nil, frame_length: 2048, hop_length: 512, center: false) if s matrix = Numo::SFloat.cast(s) validate_finite_array!(matrix.to_a.flatten, "s") matrix = matrix.(1) if matrix.ndim == 1 _, frames = matrix.shape values = Array.new(frames) do |frame_index| frame = matrix[true, frame_index] Math.sqrt((frame**2).sum / frame.size) end return Numo::SFloat[values].reshape(1, values.length) end signal = mono_signal_to_a(y, "y") signal = Array.new(frame_length / 2, 0.0) + signal + Array.new(frame_length / 2, 0.0) if center frames = Muze::Core::Frames.slice(signal, frame_length:, hop_length:) values = frames.map do |frame| Math.sqrt(frame.sum { |value| value * value } / frame.length) end Numo::SFloat[values].reshape(1, values.length) end |
.spectral_bandwidth(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, p: 2, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [1, frames].
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/muze/feature/spectral.rb', line 41 def spectral_bandwidth(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, p: 2, center: true, pad_mode: :reflect, s_kind: :magnitude) raise Muze::ParameterError, "p must be positive" unless p.positive? magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:) centroids = spectral_centroid(y:, s: magnitude, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind: :magnitude) _, frames = magnitude.shape output = Numo::SFloat.zeros(1, frames) frames.times do |frame_index| spectrum = magnitude[true, frame_index] denominator = spectrum.sum if denominator <= 0.0 output[0, frame_index] = 0.0 next end sum = 0.0 frequencies.length.times do |bin| distance = (frequencies[bin] - centroids[0, frame_index]).abs sum += spectrum[bin] * (distance**p) end output[0, frame_index] = (sum / denominator)**(1.0 / p) end output end |
.spectral_centroid(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [1, frames].
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/muze/feature/spectral.rb', line 13 def spectral_centroid(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:) _, frames = magnitude.shape output = Numo::SFloat.zeros(1, frames) frames.times do |frame_index| spectrum = magnitude[true, frame_index] denominator = spectrum.sum if denominator <= 0.0 output[0, frame_index] = 0.0 next end numerator = 0.0 frequencies.length.times { |bin| numerator += frequencies[bin] * spectrum[bin] } output[0, frame_index] = numerator / denominator end output end |
.spectral_contrast(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, n_bands: 6, quantile: 0.02, fmin: 200.0, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [n_bands + 1, frames].
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/muze/feature/spectral.rb', line 129 def spectral_contrast(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, n_bands: 6, quantile: 0.02, fmin: 200.0, center: true, pad_mode: :reflect, s_kind: :magnitude) raise Muze::ParameterError, "n_bands must be positive" unless n_bands.positive? raise Muze::ParameterError, "quantile must satisfy 0 < quantile < 0.5" unless quantile.positive? && quantile < 0.5 raise Muze::ParameterError, "fmin must be positive" unless fmin.positive? magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:) bins, frames = magnitude.shape edges = spectral_contrast_edges(frequencies, n_bands:, fmin:, sr:) output = Numo::SFloat.zeros(n_bands + 1, frames) (n_bands + 1).times do |band| lower = edges[band] upper = [edges[band + 1], lower + 1].max frames.times do |frame_index| segment = magnitude[lower...upper, frame_index].to_a.sort next if segment.empty? low_idx = [(segment.length * quantile).floor, segment.length - 1].min high_idx = [(segment.length * (1.0 - quantile)).floor, segment.length - 1].min valley = [segment[low_idx], 1.0e-10].max peak = [segment[high_idx], 1.0e-10].max output[band, frame_index] = 10.0 * Math.log10(peak / valley) end end output end |
.spectral_contrast_edges(frequencies, n_bands:, fmin:, sr:) ⇒ Array<Integer>
159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/muze/feature/spectral.rb', line 159 def spectral_contrast_edges(frequencies, n_bands:, fmin:, sr:) nyquist = sr / 2.0 hz_edges = [0.0, fmin] n_bands.times { |band| hz_edges << [fmin * (2.0**(band + 1)), nyquist].min } hz_edges << nyquist hz_edges.map do |hz| index = frequencies.each_index.min_by { |idx| (frequencies[idx] - hz).abs } [[index, 0].max, frequencies.length - 1].min end.each_cons(2).with_object([0]) do |(left, right), edges| edges << [right, left + 1].max end end |
.spectral_crest(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [1, frames].
204 205 206 207 208 209 210 211 212 213 214 215 |
# File 'lib/muze/feature/spectral.rb', line 204 def spectral_crest(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:) _, frames = magnitude.shape output = Numo::SFloat.zeros(1, frames) frames.times do |frame_index| spectrum = magnitude[true, frame_index] mean = spectrum.mean output[0, frame_index] = mean <= 0.0 ? 0.0 : spectrum.max / mean end output end |
.spectral_decrease(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [1, frames].
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 |
# File 'lib/muze/feature/spectral.rb', line 235 def spectral_decrease(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:) bins, frames = magnitude.shape output = Numo::SFloat.zeros(1, frames) frames.times do |frame_index| first = magnitude[0, frame_index] denominator = 0.0 numerator = 0.0 (1...bins).each do |bin| value = magnitude[bin, frame_index] numerator += (value - first) / bin denominator += value end output[0, frame_index] = denominator <= 0.0 ? 0.0 : numerator / denominator end output end |
.spectral_entropy(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [1, frames].
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
# File 'lib/muze/feature/spectral.rb', line 186 def spectral_entropy(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:) _, frames = magnitude.shape output = Numo::SFloat.zeros(1, frames) frames.times do |frame_index| spectrum = magnitude[true, frame_index] total = spectrum.sum next if total <= 0.0 probs = spectrum / total entropy = probs.to_a.sum { |value| value.positive? ? -(value * Math.log2(value)) : 0.0 } output[0, frame_index] = entropy / Math.log2([spectrum.size, 2].max) end output end |
.spectral_flatness(y: nil, s: nil, n_fft: 2048, hop_length: 512, amin: 1.0e-10, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [1, frames].
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/muze/feature/spectral.rb', line 107 def spectral_flatness(y: nil, s: nil, n_fft: 2048, hop_length: 512, amin: 1.0e-10, center: true, pad_mode: :reflect, s_kind: :magnitude) raise Muze::ParameterError, "amin must be positive" unless amin.positive? magnitude, = prepare_magnitude(y:, s:, sr: 22_050, n_fft:, hop_length:, center:, pad_mode:, s_kind:) _, frames = magnitude.shape output = Numo::SFloat.zeros(1, frames) frames.times do |frame_index| spectrum = magnitude[true, frame_index].to_a.map { |value| [value, amin].max } geometric = Math.exp(spectrum.sum { |value| Math.log(value) } / spectrum.length) arithmetic = spectrum.sum(0.0) / spectrum.length output[0, frame_index] = geometric / arithmetic end output end |
.spectral_flux(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [1, frames].
173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/muze/feature/spectral.rb', line 173 def spectral_flux(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) magnitude, = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:) _, frames = magnitude.shape output = Numo::SFloat.zeros(1, frames) (1...frames).each do |frame_index| diff = magnitude[true, frame_index] - magnitude[true, frame_index - 1] output[0, frame_index] = Math.sqrt((diff * diff).sum) end output end |
.spectral_rolloff(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, roll_percent: 0.85, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [1, frames].
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/muze/feature/spectral.rb', line 76 def spectral_rolloff(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, roll_percent: 0.85, center: true, pad_mode: :reflect, s_kind: :magnitude) raise Muze::ParameterError, "roll_percent must satisfy 0 < roll_percent < 1" unless roll_percent.positive? && roll_percent < 1.0 magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:) _, frames = magnitude.shape output = Numo::SFloat.zeros(1, frames) frames.times do |frame_index| spectrum = magnitude[true, frame_index] threshold = spectrum.sum * roll_percent cumulative = 0.0 rolloff_frequency = frequencies.last frequencies.length.times do |bin| cumulative += spectrum[bin] if cumulative >= threshold rolloff_frequency = frequencies[bin] break end end output[0, frame_index] = rolloff_frequency end output end |
.spectral_slope(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) ⇒ Numo::SFloat
Returns shape: [1, frames].
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
# File 'lib/muze/feature/spectral.rb', line 218 def spectral_slope(y: nil, s: nil, sr: 22_050, n_fft: 2048, hop_length: 512, center: true, pad_mode: :reflect, s_kind: :magnitude) magnitude, frequencies = prepare_magnitude(y:, s:, sr:, n_fft:, hop_length:, center:, pad_mode:, s_kind:) mean_frequency = frequencies.sum / frequencies.length.to_f frequency_variance = frequencies.sum { |frequency| (frequency - mean_frequency)**2 } _, frames = magnitude.shape output = Numo::SFloat.zeros(1, frames) frames.times do |frame_index| spectrum = magnitude[true, frame_index].to_a mean_spectrum = spectrum.sum / spectrum.length.to_f covariance = frequencies.each_with_index.sum { |frequency, idx| (frequency - mean_frequency) * (spectrum[idx] - mean_spectrum) } output[0, frame_index] = frequency_variance.zero? ? 0.0 : covariance / frequency_variance end output end |
.tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384, normalize: false) ⇒ Numo::SFloat
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 |
# File 'lib/muze/feature/spectral.rb', line 328 def tempogram(y: nil, onset_envelope: nil, sr: 22_050, hop_length: 512, win_length: 384, normalize: false) raise Muze::ParameterError, "sr must be a positive integer" unless sr.is_a?(Integer) && sr.positive? raise Muze::ParameterError, "hop_length must be a positive integer" unless hop_length.is_a?(Integer) && hop_length.positive? raise Muze::ParameterError, "win_length must be a positive integer" unless win_length.is_a?(Integer) && win_length.positive? raise Muze::ParameterError, "normalize must be true or false" unless [true, false].include?(normalize) envelope = if onset_envelope onset_envelope.is_a?(Numo::NArray) ? onset_envelope.to_a : Array(onset_envelope) else onset_env_from_signal(y, sr:, hop_length:) end validate_finite_array!(envelope, "onset_envelope") frames = envelope.length tempogram = Numo::SFloat.zeros(win_length, frames) frames.times do |frame_index| window_start = [0, frame_index - win_length + 1].max segment = envelope[window_start..frame_index] win_length.times do |lag| break if lag >= segment.length value = 0.0 (lag...segment.length).each do |offset| value += segment[offset] * segment[offset - lag] end tempogram[lag, frame_index] = normalize ? normalized_autocorrelation(segment, lag, value) : value end end tempogram end |
.tonnetz(y: nil, chroma: nil, sr: 22_050, n_fft: 2048, hop_length: 512) ⇒ Numo::SFloat
Returns shape: [6, frames].
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/muze/feature/chroma.rb', line 34 def tonnetz(y: nil, chroma: nil, sr: 22_050, n_fft: 2048, hop_length: 512) chroma_matrix = chroma ? Numo::SFloat.cast(chroma) : chroma_stft(y:, sr:, n_fft:, hop_length:) chroma_matrix = chroma_matrix.(1) if chroma_matrix.ndim == 1 validate_spectrum!(chroma_matrix) raise Muze::ParameterError, "tonnetz requires 12-bin chroma" unless chroma_matrix.shape[0] == 12 frames = chroma_matrix.shape[1] output = Numo::SFloat.zeros(6, frames) intervals = [7, 7, 3, 3, 4, 4] phases = [0.0, Math::PI / 2.0, 0.0, Math::PI / 2.0, 0.0, Math::PI / 2.0] frames.times do |frame| vector = chroma_matrix[true, frame] total = vector.sum next if total <= 0.0 normalized = vector / total 6.times do |dimension| sum = 0.0 12.times do |chroma_index| angle = ((Math::PI * intervals[dimension] * chroma_index) / 6.0) + phases[dimension] sum += normalized[chroma_index] * Math.cos(angle) end output[dimension, frame] = sum end end output end |
.zero_crossing_rate(y, frame_length: 2048, hop_length: 512, threshold: 0.0, center: false) ⇒ Numo::SFloat
Returns shape: [1, frames].
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 |
# File 'lib/muze/feature/spectral.rb', line 277 def zero_crossing_rate(y, frame_length: 2048, hop_length: 512, threshold: 0.0, center: false) raise Muze::ParameterError, "threshold must be >= 0" if threshold.negative? signal = mono_signal_to_a(y, "y") signal = Array.new(frame_length / 2, 0.0) + signal + Array.new(frame_length / 2, 0.0) if center frames = Muze::Core::Frames.slice(signal, frame_length:, hop_length:) values = frames.map do |frame| crossings = 0 signs = frame.map { |value| value.abs <= threshold ? 0.0 : value } (1...signs.length).each { |idx| crossings += 1 if (signs[idx - 1] >= 0) != (signs[idx] >= 0) } crossings.to_f / frame_length end Numo::SFloat[values].reshape(1, values.length) end |