WIP : selecting BPM candidates

This commit is contained in:
Stepland 2023-07-03 01:40:51 +02:00
parent bc24b8f3cf
commit f74ad4d701
3 changed files with 164 additions and 52 deletions

57
include/eigen_polyfit.hpp Normal file
View File

@ -0,0 +1,57 @@
// Adapted from https://github.com/patLoeber/Polyfit
//
// PolyfitEigen.hpp
// Polyfit
//
// Created by Patrick Löber on 23.11.18.
// Copyright © 2018 Patrick Loeber. All rights reserved.
//
// Use the Eigen library for fitting: http://eigen.tuxfamily.org
// See https://eigen.tuxfamily.org/dox/group__TutorialLinearAlgebra.html for different methods
#include "Eigen/Dense"
template<typename T>
std::vector<T> polyfit(const std::vector<T> &yValues, const int degree, const std::vector<T>& weights = std::vector<T>(), bool useJacobi = true)
{
using namespace Eigen;
bool useWeights = weights.size() > 0 && weights.size() == yValues.size();
int numCoefficients = degree + 1;
size_t nCount = yValues.size();
MatrixXf X(nCount, numCoefficients);
MatrixXf Y(nCount, 1);
// fill Y matrix
for (size_t i = 0; i < nCount; i++)
{
if (useWeights)
Y(i, 0) = yValues[i] * weights[i];
else
Y(i, 0) = yValues[i];
}
// fill X matrix (Vandermonde matrix)
for (size_t nRow = 0; nRow < nCount; nRow++)
{
T nVal = 1.0f;
for (int nCol = 0; nCol < numCoefficients; nCol++)
{
if (useWeights)
X(nRow, nCol) = nVal * weights[nRow];
else
X(nRow, nCol) = nVal;
nVal *= nRow;
}
}
VectorXf coefficients;
if (useJacobi)
coefficients = X.jacobiSvd(ComputeThinU | ComputeThinV).solve(Y);
else
coefficients = X.colPivHouseholderQr().solve(Y);
return std::vector<T>(coefficients.data(), coefficients.data() + numCoefficients);
}

View File

@ -1,6 +1,8 @@
#include "guess_tempo.hpp"
#include <algorithm>
#include <bits/ranges_algo.h>
#include <cassert>
#include <cstddef>
#include <deque>
#include <functional>
@ -14,9 +16,78 @@
#include <SFML/Config.hpp>
#include <SFML/System/Utf.hpp>
#include <eigen_polyfit.hpp>
#include "Eigen/src/Core/Array.h"
#include "aubio_cpp.hpp"
std::vector<float> guess_tempo(const std::filesystem::path& path) {
feis::InputSoundFile music;
music.open_from_path(path);
const auto onsets = detect_onsets(music);
estimate_bpm(onsets, music.getSampleRate());
// estimate offsets
}
void estimate_bpm(const std::set<std::size_t>& onsets, const std::size_t sample_rate) {
const auto fitness = broad_interval_test(onsets, sample_rate);
const auto corrected_fitness = correct_bias(fitness);
const auto max_fitness = corrected_fitness.maxCoeff();
std::vector<std::future<std::vector<IntervalFitness>>> futures;
for (std::size_t i = 0; i < corrected_fitness.size(); i++) {
if ((corrected_fitness[i] / max_fitness) > 0.4f) {
const auto interval = fitness.at(i).interval;
futures.emplace_back(
std::async(
std::launch::async,
std::bind(test_intervals, std::cref(onsets), interval - 9, 19, 1)
)
);
}
}
std::vector<IntervalFitness> candidates;
for (auto& future : futures) {
const auto results = future.get();
const auto it = std::ranges::max_element(results, {}, [](const IntervalFitness& f){return f.fitness;});
candidates.push_back(*it);
}
std::sort(
candidates.begin(),
candidates.end(),
[](const IntervalFitness& a, const IntervalFitness& b){return a.fitness > b.fitness;}
);
std::vector<IntervalFitness>
}
std::set<std::size_t> detect_onsets(feis::InputSoundFile& music) {
const auto sample_rate = music.getSampleRate();
const auto channel_count = music.getChannelCount();
std::size_t hop_size = 256;
std::size_t window_size = 1024;
std::size_t read = 0;
std::size_t chunk_size = hop_size * music.getChannelCount();
std::vector<sf::Int16> samples(chunk_size);
std::vector<sf::Int16> downmixed_samples(hop_size);
std::set<std::size_t> onsets;
auto detector = aubio::onset_detector("specflux", window_size, hop_size, sample_rate);
do {
read = music.read(samples.data(), chunk_size);
std::ranges::fill(downmixed_samples, 0);
for (std::size_t i = 0; i < read / channel_count; i++) {
std::int64_t downmixed_sample = 0;
for (std::size_t channel = 0; channel < channel_count; channel++) {
downmixed_sample += samples[i*channel_count+channel];
}
downmixed_samples[i] = downmixed_sample / channel_count;
}
auto onset = detector.detect(downmixed_samples);
if (onset) {
onsets.emplace(*onset);
}
} while ( read == chunk_size );
aubio_cleanup();
return onsets;
}
const float min_tested_bpm = 89.f;
const float max_tested_bpm = 205.f;
@ -66,60 +137,23 @@ float evidence(const Eigen::ArrayXf& histogram, const std::size_t sample) {
}
}
std::vector<float> guess_tempo(const std::filesystem::path& path) {
feis::InputSoundFile music;
music.open_from_path(path);
const auto onsets = detect_onsets(music);
return compute_fitness(onsets, music.getSampleRate());
}
std::set<std::size_t> detect_onsets(feis::InputSoundFile& music) {
const auto sample_rate = music.getSampleRate();
const auto channel_count = music.getChannelCount();
std::size_t hop_size = 256;
std::size_t window_size = 1024;
std::size_t read = 0;
std::size_t chunk_size = hop_size * music.getChannelCount();
std::vector<sf::Int16> samples(chunk_size);
std::vector<sf::Int16> downmixed_samples(hop_size);
std::set<std::size_t> onsets;
auto detector = aubio::onset_detector("specflux", window_size, hop_size, sample_rate);
do {
read = music.read(samples.data(), chunk_size);
std::ranges::fill(downmixed_samples, 0);
for (std::size_t i = 0; i < read / channel_count; i++) {
std::int64_t downmixed_sample = 0;
for (std::size_t channel = 0; channel < channel_count; channel++) {
downmixed_sample += samples[i*channel_count+channel];
}
downmixed_samples[i] = downmixed_sample / channel_count;
}
auto onset = detector.detect(downmixed_samples);
if (onset) {
onsets.emplace(*onset);
}
} while ( read == chunk_size );
aubio_cleanup();
return onsets;
}
std::vector<float> compute_fitness(const std::set<std::size_t>& onsets, const std::size_t sample_rate) {
std::vector<IntervalFitness> broad_interval_test(const std::set<std::size_t>& onsets, const std::size_t sample_rate) {
const std::size_t min_tested_interval = sample_rate * 60.f / max_tested_bpm;
const std::size_t max_tested_interval = sample_rate * 60.f / min_tested_bpm;
const std::size_t interval_range_width = max_tested_interval - min_tested_interval + 1;
const std::size_t stride = 10;
const std::size_t number_of_tested_intervals = (interval_range_width - 1) / stride + 1;
std::vector<float> fitness_values;
std::vector<IntervalFitness> fitness_values;
fitness_values.reserve(number_of_tested_intervals);
const auto threads = std::thread::hardware_concurrency();
const auto chunk_size = number_of_tested_intervals / threads;
std::vector<std::future<std::vector<float>>> futures;
std::vector<std::future<std::vector<IntervalFitness>>> futures;
for (std::size_t thread_index = 0; thread_index < threads; thread_index++) {
futures.emplace_back(
std::async(
std::launch::async,
std::bind(
compute_fitness_at_intervals,
test_intervals,
std::cref(onsets),
min_tested_interval + thread_index * chunk_size * stride,
chunk_size,
@ -129,15 +163,14 @@ std::vector<float> compute_fitness(const std::set<std::size_t>& onsets, const st
);
}
for (auto& future : futures) {
future.wait();
const auto new_values = future.get();
fitness_values.insert(fitness_values.end(), new_values.begin(), new_values.end());
}
return fitness_values;
}
std::vector<float> compute_fitness_at_intervals(const std::set<std::size_t>& onsets, const std::size_t start_interval, const std::size_t count, const std::size_t stride) {
std::vector<float> fitness_values;
std::vector<IntervalFitness> test_intervals(const std::set<std::size_t>& onsets, const std::size_t start_interval, const std::size_t count, const std::size_t stride) {
std::vector<IntervalFitness> fitness_values;
fitness_values.reserve(count);
const auto max_tested_interval = start_interval + (count - 1) * stride;
Eigen::ArrayXf evidence_cache(max_tested_interval);
@ -161,7 +194,26 @@ std::vector<float> compute_fitness_at_intervals(const std::set<std::size_t>& ons
const auto second_half_interval = tested_interval - first_half_interval;
confidence.head(first_half_interval) += evidence_cache.tail(first_half_interval) / 2;
confidence.tail(second_half_interval) += evidence_cache.head(second_half_interval) / 2;
fitness_values.push_back(confidence.maxCoeff());
fitness_values.push_back({tested_interval, confidence.maxCoeff()});
}
return fitness_values;
}
Eigen::ArrayXf correct_bias(const std::vector<IntervalFitness>& fitness_results) {
std::vector<float> y_values(fitness_results.size());
std::transform(
fitness_results.cbegin(),
fitness_results.cend(),
y_values.begin(),
[](const IntervalFitness& f){return f.fitness;}
);
const auto coeffs = polyfit(y_values, 3);
Eigen::ArrayXf fitness(y_values.size());
std::copy(y_values.cbegin(), y_values.cend(), fitness.begin());
return fitness - (
coeffs[0]
+ coeffs[1] * fitness
+ coeffs[2] * fitness.pow(2)
+ coeffs[3] * fitness.pow(3)
);
}

View File

@ -7,22 +7,25 @@
#include <Eigen/Dense>
#include <SFML/System/Time.hpp>
#include "Eigen/src/Core/Matrix.h"
#include "utf8_sfml_redefinitions.hpp"
/*
struct TempoEstimate {
Decimal bpm;
sf::Time offset;
struct IntervalFitness {
std::size_t interval;
float fitness;
};
*/
std::vector<float> guess_tempo(const std::filesystem::path& audio);
std::set<std::size_t> detect_onsets(feis::InputSoundFile& music);
std::vector<float> compute_fitness(const std::set<std::size_t>& onsets, const std::size_t sample_rate);
std::vector<float> compute_fitness_at_intervals(
void estimate_bpm(const std::set<std::size_t>& onsets, const std::size_t sample_rate);
std::vector<IntervalFitness> broad_interval_test(const std::set<std::size_t>& onsets, const std::size_t sample_rate);
std::vector<IntervalFitness> test_intervals(
const std::set<std::size_t>& onsets,
const std::size_t start_interval,
const std::size_t count,
const std::size_t stride
);
Eigen::ArrayXf correct_bias(const std::vector<IntervalFitness>& fitness_results);