Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions include/xsimd/config/xsimd_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,25 +357,50 @@
/**
* @ingroup xsimd_config_macro
*
* Set to 1 if NEON64 is available at compile-time, to 0 otherwise.
* Set to 1 if the target is in the ARM architecture family in 64 bits, to 0 otherwise
*/
#if defined(__aarch64__) || defined(_M_ARM64)
#define XSIMD_WITH_NEON64 1
#define XSIMD_TARGET_ARM64 1
#else
#define XSIMD_WITH_NEON64 0
#define XSIMD_TARGET_ARM64 0
#endif

/**
* @ingroup xsimd_config_macro
*
* Set to 1 if the target is in the ARM architecture family, to 0 otherwise
*/
#if defined(__arm__) || defined(_M_ARM) || XSIMD_TARGET_ARM64
#define XSIMD_TARGET_ARM 1
#else
#define XSIMD_TARGET_ARM 0
#endif

/**
* @ingroup xsimd_config_macro
*
* Set to 1 if NEON is available at compile-time, to 0 otherwise.
*/
#if (defined(__ARM_NEON) && __ARM_ARCH >= 7) || XSIMD_WITH_NEON64
#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && defined(__ARM_NEON)) || XSIMD_TARGET_ARM64
#define XSIMD_WITH_NEON 1
#else
#define XSIMD_WITH_NEON 0
#endif

// Neon is always available on Arm64, though it is theoritially possible to compile
// without it, such as -march=armv8-a+nosimd.
// Note that MSVC may never define __ARM_NEON even when available.
/**
* @ingroup xsimd_config_macro
*
* Set to 1 if NEON64 is available at compile-time, to 0 otherwise.
*/
#if XSIMD_TARGET_ARM64
#define XSIMD_WITH_NEON64 1
#else
#define XSIMD_WITH_NEON64 0
#endif

/**
* @ingroup xsimd_config_macro
*
Expand Down Expand Up @@ -497,4 +522,15 @@
#define XSIMD_NO_SUPPORTED_ARCHITECTURE
#endif

/**
* @ingroup xsimd_config_macro
*
* Set to 1 if the target is a linux
*/
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
#define XSIMD_WITH_LINUX_GETAUXVAL 1
#else
#define XSIMD_WITH_LINUX_GETAUXVAL 0
#endif

#endif
121 changes: 121 additions & 0 deletions include/xsimd/config/xsimd_cpu_features_arm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
***************************************************************************/

#ifndef XSIMD_CPU_FEATURES_ARM_HPP
#define XSIMD_CPU_FEATURES_ARM_HPP

#include "./xsimd_config.hpp"

#if XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL
#include "../utils/bits.hpp"
#include "./xsimd_getauxval.hpp"

// HWCAP_XXX masks to use on getauxval results.
// Header does not exists on all architectures and masks are architecture
// specific.
#include <asm/hwcap.h>

// Port possibly missing mask. Should only be defined on Arm64.
#if XSIMD_TARGET_ARM64 && !defined(HWCAP2_I8MM)
#define HWCAP2_I8MM (1 << 13)
#endif
#endif // XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL

namespace xsimd
{
/**
* An opinionated CPU feature detection utility for ARM.
*
* Combines compile-time knowledge with runtime detection when available.
* On Linux, runtime detection uses getauxval to query the auxiliary vector.
* On other platforms, only compile-time information is used.
*
* This is well defined on all architectures.
* It will always return false on non-ARM architectures.
*/
class arm_cpu_features
{
public:
arm_cpu_features() noexcept = default;

inline bool neon() const noexcept
{
#if XSIMD_TARGET_ARM && !XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL
return hwcap().has_feature(HWCAP_NEON);
#else
return static_cast<bool>(XSIMD_WITH_NEON);
#endif
}

constexpr bool neon64() const noexcept
{
return static_cast<bool>(XSIMD_WITH_NEON64);
}

inline bool sve() const noexcept
{
#if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL
return hwcap().has_feature(HWCAP_SVE);
#else
return false;
#endif
}

inline bool i8mm() const noexcept
{
#if XSIMD_TARGET_ARM64 && XSIMD_WITH_LINUX_GETAUXVAL
return hwcap2().has_feature(HWCAP2_I8MM);
#else
return false;
#endif
}

private:
#if XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL
enum class status
{
hwcap_valid = 0,
hwcap2_valid = 1,
};

using status_bitset = utils::uint_bitset<status, std::uint32_t>;

mutable status_bitset m_status {};

mutable xsimd::linux_auxval m_hwcap {};

inline xsimd::linux_auxval const& hwcap() const noexcept
{
if (!m_status.bit_is_set<status::hwcap_valid>())
{
m_hwcap = xsimd::linux_auxval::read(AT_HWCAP);
m_status.set_bit<status::hwcap_valid>();
}
return m_hwcap;
}

#if XSIMD_TARGET_ARM64
mutable xsimd::linux_auxval m_hwcap2 {};

inline xsimd::linux_auxval const& hwcap2() const noexcept
{
if (!m_status.bit_is_set<status::hwcap2_valid>())
{
m_hwcap2 = xsimd::linux_auxval::read(AT_HWCAP2);
m_status.set_bit<status::hwcap2_valid>();
}
return m_hwcap2;
}
#endif
#endif // XSIMD_TARGET_ARM && XSIMD_WITH_LINUX_GETAUXVAL
};
}
#endif
37 changes: 14 additions & 23 deletions include/xsimd/config/xsimd_cpuid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,13 @@
#define XSIMD_CPUID_HPP

#include "../types/xsimd_all_registers.hpp"
#include "./xsimd_cpu_features_arm.hpp"
#include "./xsimd_cpu_features_x86.hpp"
#include "xsimd_inline.hpp"
#include "./xsimd_inline.hpp"

#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector))
#if XSIMD_WITH_LINUX_GETAUXVAL && defined(__riscv_vector)
#include <asm/hwcap.h>
#include <sys/auxv.h>

#ifndef HWCAP2_I8MM
#define HWCAP2_I8MM (1 << 13)
#endif

#endif

namespace xsimd
Expand Down Expand Up @@ -92,29 +88,24 @@ namespace xsimd
vsx = 1;
#endif

#if defined(__aarch64__) || defined(_M_ARM64)
neon = 1;
neon64 = 1;
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
i8mm_neon64 = bool(getauxval(AT_HWCAP2) & HWCAP2_I8MM);
sve = bool(getauxval(AT_HWCAP) & HWCAP_SVE);
#endif

#elif defined(__ARM_NEON) || defined(_M_ARM)
#if XSIMD_WITH_LINUX_GETAUXVAL
#if defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0

#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
neon = bool(getauxval(AT_HWCAP) & HWCAP_NEON);
#endif

#elif defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0

#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
#ifndef HWCAP_V
#define HWCAP_V (1 << ('V' - 'A'))
#endif
rvv = bool(getauxval(AT_HWCAP) & HWCAP_V);
#endif
#endif

// Safe on all platforms, it will be all false if non arm.
const auto arm_cpu = xsimd::arm_cpu_features();

neon = arm_cpu.neon();
neon64 = arm_cpu.neon64();
i8mm_neon64 = arm_cpu.neon64() && arm_cpu.i8mm();
sve = arm_cpu.sve();

// Safe on all platforms, it will be all false if non x86.
const auto x86_cpu = xsimd::x86_cpu_features();

Expand Down
91 changes: 91 additions & 0 deletions include/xsimd/config/xsimd_getauxval.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
***************************************************************************/

#ifndef XSIMD_GETAUXVAL_HPP
#define XSIMD_GETAUXVAL_HPP

#include "./xsimd_config.hpp"

#if XSIMD_WITH_LINUX_GETAUXVAL
#include <sys/auxv.h> // getauxval
#endif

namespace xsimd
{
namespace detail
{
using linux_getauxval_t = unsigned long;

inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept;
}

/*
* Holds the value of a Linux auxiliary vector entry (e.g. AT_HWCAP).
*
* On Linux systems, the kernel exposes some CPU features through the
* auxiliary vector, which can be queried via `getauxval(AT_HWCAP)`.
* Well defined on all platforms, and will return always falsw on
* non-linux platforms.
*
* Usage:
* auto hwcap = linux_auxval::read(AT_HWCAP);
* bool neon = hwcap.has_feature(HWCAP_NEON);
*
* @see https://www.kernel.org/doc/Documentation/arm64/elf_hwcaps.txt
*/
class linux_auxval
{
private:
using getauxval_t = detail::linux_getauxval_t;

public:
constexpr linux_auxval() noexcept = default;

inline static linux_auxval read(getauxval_t type) noexcept
{
return linux_auxval(detail::linux_getauxval(type));
}

constexpr bool has_feature(getauxval_t feat) const noexcept
{
return (m_auxval & feat) == feat;
}

private:
getauxval_t m_auxval = {};

constexpr explicit linux_auxval(getauxval_t v) noexcept
: m_auxval(v)
{
}
};

/********************
* Implementation *
********************/

namespace detail
{
#if XSIMD_WITH_LINUX_GETAUXVAL
inline linux_getauxval_t linux_getauxval(linux_getauxval_t type) noexcept
{
return getauxval(type);
}
#else
inline linux_getauxval_t linux_getauxval(linux_getauxval_t) noexcept
{
return {}; // All bits set to 0
}
#endif
}
}

#endif
Loading