#include #include "include/cgaborator.h" #include "gaborator.h" #include #include #ifdef __AVX2__ #include #endif class Gaborator { public: Gaborator(int block_size, double sampleRate, int bandsPerOctave, double minimumFrequency, double referenceFrequency, double maximumFrequency, int stepSize) : parameters(bandsPerOctave, minimumFrequency / sampleRate, referenceFrequency / sampleRate, 1.0, 1e-5), analyzer(parameters), coefs(analyzer), frequencyBinTimeStepSize(stepSize), sample_rate((int) sampleRate), blockSize(block_size) { //converts frequency (ff_max) in hertz to the number of bands above the min frequency //the ceil is used to end up at a full band int interesting_bands = ceil(bandsPerOctave * log(maximumFrequency/minimumFrequency)/log(2.0f)); //since bands are ordered from high to low we are only interested in lower bands: //fs/2.0 is the nyquist frequency int total_bands = ceil(bandsPerOctave * log(sampleRate/2.0/minimumFrequency)/log(2.0f)); latency = (int64_t) ceil(analyzer.analysis_support()); min_band = total_bands - interesting_bands; t_in = 0; int max_band = analyzer.bandpass_bands_end(); std::vector bandcenterCache(max_band); for(int i = 0 ; i < max_band ; i++){ if(i < min_band){ bandcenterCache[i] = -1; }else{ bandcenterCache[i] = analyzer.band_ff(i) * sample_rate; } if (bandcenterCache[i] > 0) { ++numberOfBandsCache; if (firstBandCache == -1) { firstBandCache = i; } } } coefficientSize = (latency + 2*blockSize) / frequencyBinTimeStepSize; //Allocate ring buffer and members in a contiguous array coefficients = static_cast(calloc(coefficientSize * numberOfBandsCache, sizeof(float))); assert(t_in == 0); } float* gaborTransform(float* audio_block, int64_t audio_block_length, size_t* return_size, size_t* slice_size) { resultCache.clear(); if (audio_block == nullptr || audio_block_length == 0) { //finish finish(); } else { analyze(audio_block, audio_block_length); } if(!resultCache.empty()){ *return_size = resultCache.size(); *slice_size = numberOfBandsCache; return resultCache.data(); } else{ *return_size = 0; *slice_size = 0; return nullptr; } } int64_t analysisSupport() const { return latency; } int numberOfBands() const { return numberOfBandsCache; } ~Gaborator() { free(coefficients); } private: void analyze(float* audio_block, int64_t audio_block_length){ analyzer.analyze(audio_block, t_in, t_in + audio_block_length, coefs); int64_t st0 = t_in - latency; int64_t st1 = t_in - latency + audio_block_length; gaborApplySlice(st0, st1); t_in += audio_block_length; int64_t t_out = t_in - latency; forget_before(analyzer, coefs, t_out - audio_block_length); } void finish(){ int64_t st0 = t_in - latency; int64_t st1 = t_in; //flush all till latency spot gaborApplySlice(st0, st1); //flush remaining for (int i = 1; i < coefficientSize; ++i) { float* currentCoefficient = &coefficients[((mostRecentCoefficentIndex + i) % coefficientSize) * numberOfBandsCache]; resultCache.insert(resultCache.end(), currentCoefficient, currentCoefficient + numberOfBandsCache); // fill the oldest with zeros, but only the first round if(i <= coefficientSize) { std::fill(currentCoefficient, currentCoefficient + numberOfBandsCache, 0); } } } inline void gaborApplySlice(int64_t st0, int64_t st1) { //Adjust start to match gaborProcessEntry requirements if((st0 / frequencyBinTimeStepSize) <= 0){ st0 = frequencyBinTimeStepSize; } //Skip if nothing to process, the first results have a negative audio sample index if(st0 > st1){ return; } int b0 = min_band; int b1 = numberOfBandsCache + firstBandCache; /* Following code is equivalent, but it has been inlined for performance gaborator::process([&](int band, int64_t audioSampleIndex, std::complex& coef) { gaborProcessEntry(band, audioSampleIndex, coef); }, b0, b1, st0, st1, coefs); */ gaborator::apply_to_slice(false, [&](int band, int64_t sampleIndex, int time_step, unsigned len, const std::complex *p0) { //process magnitudes beforehand for easier auto-vectorization if(magnitudeCache.size() < len){ magnitudeCache.resize(len); } #ifdef __AVX2__ int64_t i; for (i = 0; i < (((int64_t)len) - 7); i += 8) { // load 8 complex values (--> 16 floats overall) into two SIMD registers __m256 inLo = _mm256_loadu_ps(reinterpret_cast (p0 + i )); __m256 inHi = _mm256_loadu_ps(reinterpret_cast (p0 + i + 4)); // separates the real and imaginary part, however values are in a wrong order __m256 re = _mm256_shuffle_ps (inLo, inHi, _MM_SHUFFLE(2, 0, 2, 0)); __m256 im = _mm256_shuffle_ps (inLo, inHi, _MM_SHUFFLE(3, 1, 3, 1)); // do the heavy work on the unordered vectors __m256 abs = _mm256_sqrt_ps(_mm256_add_ps(_mm256_mul_ps(re, re), _mm256_mul_ps(im, im))); // reorder values prior to storing __m256d ordered = _mm256_permute4x64_pd (_mm256_castps_pd(abs), _MM_SHUFFLE(3, 1, 2, 0)); _mm256_storeu_ps(magnitudeCache.data() + i, _mm256_castpd_ps(ordered)); } for (int64_t j = i; j < len; j++) { #else for (int64_t j = 0; j < len; j++) { #endif magnitudeCache[j] = std::abs(p0[j]); } int bandIndex = band - firstBandCache; for (unsigned int j = 0; j < len; j++) { gaborProcessEntry(bandIndex, (sampleIndex + time_step * j) / frequencyBinTimeStepSize, magnitudeCache[j]); } }, b0, b1, st0, st1, coefs); } inline void gaborProcessEntry(int bandIndex, int64_t coefficientIndex, float coefficient) { float* currentCoefficient = &coefficients[(coefficientIndex % coefficientSize) * numberOfBandsCache]; // If a new index is reached, save the old (fixed) coefficients in the history // Fill the array with zeros to get the max if (coefficientIndex > mostRecentCoefficentIndex && coefficientIndex > coefficientSize) { // keep the new maximum mostRecentCoefficentIndex = coefficientIndex; // "copy" the oldest data to the history // the slice can be reused thanks to the oldest being filled with zeros just after resultCache.insert(resultCache.end(), currentCoefficient, currentCoefficient + numberOfBandsCache); // fill the oldest with zeros std::fill(currentCoefficient, currentCoefficient + numberOfBandsCache, 0); } // due to reduction in precision (from audio sample accuracy to steps) multiple // magnitudes could be placed in the same stepIndex, bandIndex pair. // We take the maximum magnitudes value. currentCoefficient[bandIndex] = std::max(currentCoefficient[bandIndex], coefficient); } private: std::vector resultCache; //circular buffer with current coefficients float* coefficients = nullptr; int firstBandCache = -1; int numberOfBandsCache = 0; //The index of the most recent coefficient (in steps) int64_t mostRecentCoefficentIndex = 0; const int blockSize; std::vector magnitudeCache; const int64_t frequencyBinTimeStepSize; int64_t t_in; int min_band; const int sample_rate; int64_t latency; int64_t coefficientSize; private: const gaborator::parameters parameters; gaborator::analyzer analyzer; gaborator::coefs coefs; }; uintptr_t gaborator_initialize(int blockSize, double sampleRate, int bandsPerOctave, double minimumFrequency, double referenceFrequency, double maximumFrequency, int stepSize){ return reinterpret_cast(new Gaborator(blockSize, sampleRate, bandsPerOctave, minimumFrequency, referenceFrequency, maximumFrequency, stepSize)); } int64_t gaborator_analysis_support(uintptr_t ptr) { return reinterpret_cast(ptr)->analysisSupport(); } int gaborator_number_of_bands(uintptr_t ptr) { return reinterpret_cast(ptr)->numberOfBands(); } float* gaborator_transform(uintptr_t ptr, float* audio_block, int64_t audio_block_length, size_t* return_size, size_t* slice_size){ return reinterpret_cast(ptr)->gaborTransform(audio_block, audio_block_length, return_size, slice_size); } void gaborator_release(uintptr_t ptr) { auto g = reinterpret_cast(ptr); delete g; }