diff --git a/.drone.yml b/.drone.yml
index d5e3bfe..3af0bcf 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -1,7 +1,7 @@
 ---
 kind: pipeline
 type: docker
-name: default
+name: default-gcc
 
 steps:
   - name: submodules
@@ -17,5 +17,25 @@ steps:
       - cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS_RELEASE="-march=native" -DCMAKE_C_FLAGS_RELEASE="-march=native" -DCMAKE_INSTALL_PREFIX="/usr"
       - make -j$(nproc)
       - make install
+---
+kind: pipeline
+type: docker
+name: default-clang
 
-...
+steps:
+  - name: submodules
+    image: alpine/git
+    commands:
+      - git submodule update --init --recursive
+  - name: build
+    image: debian:bullseye
+    commands:
+      - DEBIAN_FRONTEND=noninteractive apt update
+      - DEBIAN_FRONTEND=noninteractive apt install -y clang cmake make
+      - update-alternatives --install /usr/bin/cc cc /usr/bin/clang 100
+      - update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang 100
+      - mkdir build && cd build
+      - cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS_RELEASE="-march=native" -DCMAKE_C_FLAGS_RELEASE="-march=native" -DCMAKE_INSTALL_PREFIX="/usr"
+      - make -j$(nproc)
+      - make install
+...
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 6c080b1..a734e00 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
 /.idea
 /build
-/cmake-build-debug
\ No newline at end of file
+/cmake-build-*
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7c4c311..6d93b61 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,8 @@ set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 cmake_policy(SET CMP0069 NEW)
 set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
+cmake_policy(SET CMP0074 NEW)
+set(CMAKE_POLICY_DEFAULT_CMP0074 NEW)
 
 
 if(NOT CMAKE_BUILD_TYPE)
@@ -23,16 +25,25 @@ set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -ggdb -O0 -fno-exceptions")
 if(EMSCRIPTEN)
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -O3 -ffast-math -fno-exceptions -fno-rtti")
     set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -DNDEBUG -O3 -ffast-math -fno-exceptions")
+    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DNDEBUG -O3 -ffast-math -fno-exceptions -fno-rtti")
+    set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -DNDEBUG -O3 -ffast-math -fno-exceptions")
 else()
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -Ofast -fno-exceptions -fno-rtti")
     set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -DNDEBUG -Ofast -fno-exceptions")
+    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DNDEBUG -Ofast -fno-exceptions -fno-rtti")
+    set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -DNDEBUG -Ofast -fno-exceptions")
 
     if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
         set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -frecord-gcc-switches -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize")
         set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -frecord-gcc-switches -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize")
+        set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -frecord-gcc-switches -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize")
     elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fopt-info-all -frecord-gcc-switches")
-        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fopt-info-all -frecord-gcc-switches")
+#        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fipa-pta -ftree-loop-ivcanon -floop-nest-optimize -ftree-vectorize -ftree-loop-im -fgraphite-identity -floop-parallelize-all -fdevirtualize-speculatively -fdevirtualize-at-ltrans -ftree-parallelize-loops=4 -fopt-info-all -frecord-gcc-switches")
+#        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fipa-pta -ftree-loop-ivcanon -floop-nest-optimize -ftree-vectorize -ftree-loop-im -fgraphite-identity -floop-parallelize-all -fdevirtualize-speculatively -fdevirtualize-at-ltrans -ftree-parallelize-loops=4 -fopt-info-all -frecord-gcc-switches")
+
+        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fvect-cost-model=unlimited -fopt-info-all -frecord-gcc-switches")
+        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fvect-cost-model=unlimited -fopt-info-all -frecord-gcc-switches")
+        set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fvect-cost-model=unlimited -fopt-info-all -frecord-gcc-switches")
     endif()
 endif()
 
@@ -51,7 +62,7 @@ include_directories(lib/gaborator)
 include_directories(lib/pffft)
 include_directories(lib/MIPP/src)
 
-set(MIPP_ROOT "lib/MIPP/src")
+set(MIPP_ROOT "${CMAKE_SOURCE_DIR}/lib/MIPP/src")
 add_subdirectory(lib/pffft EXCLUDE_FROM_ALL)
 
 add_executable(test test.cpp)
diff --git a/cgaborator.cpp b/cgaborator.cpp
index eb9e62c..8d72a48 100644
--- a/cgaborator.cpp
+++ b/cgaborator.cpp
@@ -5,6 +5,12 @@
 #include <cmath>
 #include <memory>
 
+#ifdef __AVX2__
+
+#include <immintrin.h>
+
+#endif
+
 class Gaborator {
 
 public:
@@ -137,15 +143,47 @@ private:
 		}, min_band, INT_MAX, st0, st1, coefs);
 		*/
 
-		gaborator::apply_to_slice(false, [&](int band, int64_t st, int time_step, unsigned len, const std::complex<float> *p0){
-			for (unsigned int i = 0; i < len; i++) {
-				gaborProcessEntry(band, st, std::abs(*p0++));
-				st += time_step;
+		std::vector<float> magnitudes;
+		gaborator::apply_to_slice(false, [&](int band, int64_t sampleIndex, int time_step, unsigned len, const std::complex<float> *p0) {
+
+			//process magnitudes beforehand for easier auto-vectorization
+			magnitudes.resize(len);
+
+#ifdef __AVX2__
+
+			int64_t i;
+			for (i = 0; i < (((int64_t)len) - 7); i += 8) {
+				// load 8 complex values (--> 16 floats overall) into two SIMD registers
+				__m256 inLo = _mm256_loadu_ps(reinterpret_cast<const float *> (p0 + i    ));
+				__m256 inHi = _mm256_loadu_ps(reinterpret_cast<const float *> (p0 + i + 4));
+
+				// separates the real and imaginary part, however values are in a wrong order
+				__m256 re = _mm256_shuffle_ps (inLo, inHi, _MM_SHUFFLE(2, 0, 2, 0));
+				__m256 im = _mm256_shuffle_ps (inLo, inHi, _MM_SHUFFLE(3, 1, 3, 1));
+
+				// do the heavy work on the unordered vectors
+				__m256 abs = _mm256_sqrt_ps(_mm256_add_ps(_mm256_mul_ps(re, re), _mm256_mul_ps(im, im)));
+
+				// reorder values prior to storing
+				__m256d ordered = _mm256_permute4x64_pd (_mm256_castps_pd(abs), _MM_SHUFFLE(3, 1, 2, 0));
+				_mm256_storeu_ps(magnitudes.data() + i, _mm256_castpd_ps(ordered));
+			}
+
+			for (int64_t j = i; j < len; j++) {
+#else
+			for (unsigned int j = 0; j < len; j++) {
+#endif
+				magnitudes[j] = std::abs(p0[j]);
+			}
+
+			for(auto magnitude : magnitudes){
+				gaborProcessEntry(band, sampleIndex, magnitude);
+				sampleIndex += time_step;
 			}
 		}, min_band, INT_MAX, st0, st1, coefs);
 	}
 
-    inline void gaborProcessEntry(int band, int64_t sampleIndex, float coef) {
+    inline void gaborProcessEntry(int band, int64_t sampleIndex, float coefficient) {
 		int64_t coefficientIndex = sampleIndex / frequencyBinTimeStepSize;
         int bandIndex = band - firstBandCache;
 
@@ -171,7 +209,7 @@ private:
             // due to reduction in precision (from audio sample accuracy to steps) multiple
             // magnitudes could be placed in the same stepIndex, bandIndex pair.
             // We take the maximum magnitudes value.
-            currentCoefficient[bandIndex] = std::max(currentCoefficient[bandIndex], coef);
+            currentCoefficient[bandIndex] = std::max(currentCoefficient[bandIndex], coefficient);
         }
 
     }
@@ -180,9 +218,13 @@ private:
 
 
     std::vector<float> resultCache;
+
+	//circular buffer with current coefficents
     std::vector<std::vector<float>> coefficients;
     int firstBandCache = -1;
     int numberOfBandsCache = 0;
+
+	//The index of the most recent coefficent (in steps)
     int64_t mostRecentCoefficentIndex = 0;
 
     const int blockSize;
diff --git a/cgaborator.pc.in b/cgaborator.pc.in
index 0d7ab9e..d763bca 100644
--- a/cgaborator.pc.in
+++ b/cgaborator.pc.in
@@ -10,4 +10,4 @@ Requires: @pc_req_public@
 Requires.private: @pc_req_private@
 Cflags: -I"${includedir}"
 Libs: -L"${libdir}" -l@target1@
-Libs.private: -L"${libdir}" -l@target1@ -l@target2@ @pc_libs_private@
\ No newline at end of file
+Libs.private: -L"${libdir}" -l@target1@ @pc_libs_private@
\ No newline at end of file