diff --git a/eigen/Eigen/src/Core/MathFunctions.h b/eigen/Eigen/src/Core/MathFunctions.h index 61b78f4..393f83b 100644 --- a/eigen/Eigen/src/Core/MathFunctions.h +++ b/eigen/Eigen/src/Core/MathFunctions.h @@ -1042,7 +1042,7 @@ EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_ms #elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) -#if EIGEN_GNUC_AT_LEAST(5,0) +#if EIGEN_GNUC_AT_LEAST(5,0) && !(defined(__e2k__) && defined(__LCC__)) #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) #else // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), diff --git a/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index c9fbaf6..708b4be 100644 --- a/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -619,7 +619,7 @@ inline float trig_reduce_huge (float xf, int *quadrant) template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED -#if EIGEN_GNUC_AT_LEAST(4,4) && EIGEN_COMP_GNUC_STRICT +#if EIGEN_GNUC_AT_LEAST(4,4) && EIGEN_COMP_GNUC_STRICT && !defined(__e2k__) __attribute__((optimize("-fno-unsafe-math-optimizations"))) #endif Packet psincos_float(const Packet& _x) diff --git a/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h index f35b760..aba984d 100644 --- a/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1201,7 +1201,7 @@ struct lhs_process_one_packet traits.madd(*A0, *rhs_panel, *C1, *T0, fix<1>); traits.madd(*A0, *rhs_panel, *C2, *T0, fix<2>); traits.madd(*A0, *rhs_panel, *C3, *T0, fix<3>); - #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE) + #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE) && !defined(__e2k__) __asm__ ("" : "+x,m" (*A0)); #endif EIGEN_ASM_COMMENT("end step of gebp micro kernel 1X4"); @@ -1727,7 +1727,7 @@ void gebp_kernel=6 without FMA (bug 1637) - #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE) + #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE) && !defined(__e2k__) #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__ ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1)); #else #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND diff --git a/eigen/Eigen/src/Core/util/ConfigureVectorization.h b/eigen/Eigen/src/Core/util/ConfigureVectorization.h index af4e696..ee7566b 100644 --- a/eigen/Eigen/src/Core/util/ConfigureVectorization.h +++ b/eigen/Eigen/src/Core/util/ConfigureVectorization.h @@ -237,6 +237,8 @@ #ifdef __SSE4_1__ #define EIGEN_VECTORIZE_SSE4_1 #endif + // e2k has SSE up to 4.1, effectively (SSE4.2/AVX1 might be faster or not) + #ifndef __e2k__ #ifdef __SSE4_2__ #define EIGEN_VECTORIZE_SSE4_2 #endif @@ -294,6 +296,7 @@ #endif #endif #endif + #endif // __e2k__ // Disable AVX support on broken xcode versions #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 ) diff --git a/eigen/Eigen/src/SparseCore/SparseDenseProduct.h b/eigen/Eigen/src/SparseCore/SparseDenseProduct.h index f005a18..584e795 100644 --- a/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/eigen/Eigen/src/SparseCore/SparseDenseProduct.h @@ -48,7 +48,8 @@ struct sparse_time_dense_product_impl1 && lhsEval.nonZerosEstimate() > 20000) { - #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads) + int sched = (n+threads*4-1)/(threads*4); + #pragma omp parallel for schedule(dynamic,sched) num_threads(threads) for(Index i=0; i1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000) { - #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads) + int sched = (n+threads*4-1)/(threads*4); + #pragma omp parallel for schedule(dynamic,sched) num_threads(threads) for(Index i=0; i