Compare commits
3 commits
13f6e179e3
...
b77b6d919c
Author | SHA1 | Date | |
---|---|---|---|
DataHoarder | b77b6d919c | ||
DataHoarder | 57709448de | ||
DataHoarder | c20789e8ce |
|
@ -41,9 +41,9 @@ else()
|
|||
# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fipa-pta -ftree-loop-ivcanon -floop-nest-optimize -ftree-vectorize -ftree-loop-im -fgraphite-identity -floop-parallelize-all -fdevirtualize-speculatively -fdevirtualize-at-ltrans -ftree-parallelize-loops=4 -fopt-info-all -frecord-gcc-switches")
|
||||
# set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fipa-pta -ftree-loop-ivcanon -floop-nest-optimize -ftree-vectorize -ftree-loop-im -fgraphite-identity -floop-parallelize-all -fdevirtualize-speculatively -fdevirtualize-at-ltrans -ftree-parallelize-loops=4 -fopt-info-all -frecord-gcc-switches")
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fvect-cost-model=unlimited -fopt-info-all -frecord-gcc-switches")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fvect-cost-model=unlimited -fopt-info-all -frecord-gcc-switches")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fvect-cost-model=unlimited -fopt-info-all -frecord-gcc-switches")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fvect-cost-model=unlimited -fipa-pta -ftree-loop-ivcanon -ftree-loop-im -fdevirtualize-speculatively -fdevirtualize-at-ltrans -fopt-info-all -frecord-gcc-switches")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fvect-cost-model=unlimited -fipa-pta -ftree-loop-ivcanon -ftree-loop-im -fdevirtualize-speculatively -fdevirtualize-at-ltrans -fopt-info-all -frecord-gcc-switches")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fvect-cost-model=unlimited -fipa-pta -ftree-loop-ivcanon -ftree-loop-im -fdevirtualize-speculatively -fdevirtualize-at-ltrans -fopt-info-all -frecord-gcc-switches")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
102
cgaborator.cpp
102
cgaborator.cpp
|
@ -56,10 +56,8 @@ public:
|
|||
|
||||
coefficientSize = (latency + 2*blockSize) / frequencyBinTimeStepSize;
|
||||
|
||||
coefficients.resize(coefficientSize);
|
||||
for (auto & coefficient : coefficients){
|
||||
coefficient.resize(numberOfBandsCache);
|
||||
}
|
||||
//Allocate ring buffer and members in a contiguous array
|
||||
coefficients = static_cast<float *>(calloc(coefficientSize * numberOfBandsCache, sizeof(float)));
|
||||
|
||||
assert(t_in == 0);
|
||||
|
||||
|
@ -94,7 +92,9 @@ public:
|
|||
return numberOfBandsCache;
|
||||
}
|
||||
|
||||
~Gaborator()= default;
|
||||
~Gaborator() {
|
||||
free(coefficients);
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
|
@ -122,32 +122,42 @@ private:
|
|||
|
||||
//flush remaining
|
||||
for (int i = 1; i < coefficientSize; ++i) {
|
||||
int64_t circularIndex = (mostRecentCoefficentIndex + i) % coefficientSize;
|
||||
float* currentCoefficient = &coefficients[((mostRecentCoefficentIndex + i) % coefficientSize) * numberOfBandsCache];
|
||||
|
||||
auto& currentCoefficient = coefficients[circularIndex];
|
||||
|
||||
resultCache.insert(resultCache.end(), currentCoefficient.begin(), currentCoefficient.end());
|
||||
resultCache.insert(resultCache.end(), currentCoefficient, currentCoefficient + numberOfBandsCache);
|
||||
// fill the oldest with zeros, but only the first round
|
||||
if(i <= coefficientSize) {
|
||||
std::fill(currentCoefficient.begin(), currentCoefficient.end(), 0);
|
||||
std::fill(currentCoefficient, currentCoefficient + numberOfBandsCache, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void gaborApplySlice(int64_t st0, int64_t st1) {
|
||||
//Adjust start to match gaborProcessEntry requirements
|
||||
if((st0 / frequencyBinTimeStepSize) <= 0){
|
||||
st0 = frequencyBinTimeStepSize;
|
||||
}
|
||||
|
||||
//Skip if nothing to process, the first results have a negative audio sample index
|
||||
if(st0 > st1){
|
||||
return;
|
||||
}
|
||||
|
||||
int b0 = min_band;
|
||||
int b1 = numberOfBandsCache + firstBandCache;
|
||||
|
||||
/*
|
||||
Following code is equivalent, but it has been inlined for performance
|
||||
|
||||
gaborator::process([&](int band, int64_t audioSampleIndex, std::complex<float>& coef) {
|
||||
gaborProcessEntry(band, audioSampleIndex, coef);
|
||||
}, min_band, INT_MAX, st0, st1, coefs);
|
||||
}, b0, b1, st0, st1, coefs);
|
||||
*/
|
||||
|
||||
std::vector<float> magnitudes;
|
||||
gaborator::apply_to_slice(false, [&](int band, int64_t sampleIndex, int time_step, unsigned len, const std::complex<float> *p0) {
|
||||
|
||||
//process magnitudes beforehand for easier auto-vectorization
|
||||
magnitudes.resize(len);
|
||||
magnitudeCache.resize(len);
|
||||
|
||||
#ifdef __AVX2__
|
||||
|
||||
|
@ -166,52 +176,43 @@ private:
|
|||
|
||||
// reorder values prior to storing
|
||||
__m256d ordered = _mm256_permute4x64_pd (_mm256_castps_pd(abs), _MM_SHUFFLE(3, 1, 2, 0));
|
||||
_mm256_storeu_ps(magnitudes.data() + i, _mm256_castpd_ps(ordered));
|
||||
_mm256_storeu_ps(magnitudeCache.data() + i, _mm256_castpd_ps(ordered));
|
||||
}
|
||||
|
||||
for (int64_t j = i; j < len; j++) {
|
||||
#else
|
||||
for (unsigned int j = 0; j < len; j++) {
|
||||
for (int64_t j = 0; j < len; j++) {
|
||||
#endif
|
||||
magnitudes[j] = std::abs(p0[j]);
|
||||
magnitudeCache[j] = std::abs(p0[j]);
|
||||
}
|
||||
|
||||
for(auto magnitude : magnitudes){
|
||||
gaborProcessEntry(band, sampleIndex, magnitude);
|
||||
sampleIndex += time_step;
|
||||
|
||||
int bandIndex = band - firstBandCache;
|
||||
for (unsigned int j = 0; j < len; j++) {
|
||||
gaborProcessEntry(bandIndex, (sampleIndex + time_step * j) / frequencyBinTimeStepSize, magnitudeCache[j]);
|
||||
}
|
||||
}, min_band, INT_MAX, st0, st1, coefs);
|
||||
}, b0, b1, st0, st1, coefs);
|
||||
}
|
||||
|
||||
inline void gaborProcessEntry(int band, int64_t sampleIndex, float coefficient) {
|
||||
int64_t coefficientIndex = sampleIndex / frequencyBinTimeStepSize;
|
||||
int bandIndex = band - firstBandCache;
|
||||
inline void gaborProcessEntry(int bandIndex, int64_t coefficientIndex, float coefficient) {
|
||||
float* currentCoefficient = &coefficients[(coefficientIndex % coefficientSize) * numberOfBandsCache];
|
||||
|
||||
// The first results have a negative audio sample index
|
||||
// ignore these
|
||||
if (coefficientIndex > 0 && bandIndex < numberOfBandsCache) {
|
||||
|
||||
int64_t circularIndex = coefficientIndex % coefficientSize;
|
||||
|
||||
auto& currentCoefficient = coefficients[circularIndex];
|
||||
|
||||
// If a new index is reached, save the old (fixed) coefficients in the history
|
||||
// Fill the array with zeros to get the max
|
||||
if (coefficientIndex > mostRecentCoefficentIndex && coefficientIndex > coefficientSize) {
|
||||
// keep the new maximum
|
||||
mostRecentCoefficentIndex = coefficientIndex;
|
||||
// "copy" the oldest data to the history
|
||||
// the slice can be reused thanks to the oldest being filled with zeros just after
|
||||
resultCache.insert(resultCache.end(), currentCoefficient.begin(), currentCoefficient.end());
|
||||
// fill the oldest with zeros
|
||||
std::fill(currentCoefficient.begin(), currentCoefficient.end(), 0);
|
||||
}
|
||||
// due to reduction in precision (from audio sample accuracy to steps) multiple
|
||||
// magnitudes could be placed in the same stepIndex, bandIndex pair.
|
||||
// We take the maximum magnitudes value.
|
||||
currentCoefficient[bandIndex] = std::max(currentCoefficient[bandIndex], coefficient);
|
||||
}
|
||||
// If a new index is reached, save the old (fixed) coefficients in the history
|
||||
// Fill the array with zeros to get the max
|
||||
if (coefficientIndex > mostRecentCoefficentIndex && coefficientIndex > coefficientSize) {
|
||||
// keep the new maximum
|
||||
mostRecentCoefficentIndex = coefficientIndex;
|
||||
// "copy" the oldest data to the history
|
||||
// the slice can be reused thanks to the oldest being filled with zeros just after
|
||||
resultCache.insert(resultCache.end(), currentCoefficient, currentCoefficient + numberOfBandsCache);
|
||||
// fill the oldest with zeros
|
||||
std::fill(currentCoefficient, currentCoefficient + numberOfBandsCache, 0);
|
||||
}
|
||||
|
||||
// due to reduction in precision (from audio sample accuracy to steps) multiple
|
||||
// magnitudes could be placed in the same stepIndex, bandIndex pair.
|
||||
// We take the maximum magnitudes value.
|
||||
currentCoefficient[bandIndex] = std::max(currentCoefficient[bandIndex], coefficient);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -219,15 +220,16 @@ private:
|
|||
|
||||
std::vector<float> resultCache;
|
||||
|
||||
//circular buffer with current coefficents
|
||||
std::vector<std::vector<float>> coefficients;
|
||||
//circular buffer with current coefficients
|
||||
float* coefficients = nullptr;
|
||||
int firstBandCache = -1;
|
||||
int numberOfBandsCache = 0;
|
||||
|
||||
//The index of the most recent coefficent (in steps)
|
||||
//The index of the most recent coefficient (in steps)
|
||||
int64_t mostRecentCoefficentIndex = 0;
|
||||
|
||||
const int blockSize;
|
||||
std::vector<float> magnitudeCache;
|
||||
const int64_t frequencyBinTimeStepSize;
|
||||
int64_t t_in;
|
||||
int min_band;
|
||||
|
|
Loading…
Reference in a new issue