diff --git a/CMakeLists.txt b/CMakeLists.txt index eb933df..a20c28b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,17 @@ include(cmake/idna-flags.cmake) add_subdirectory(src) +option(ADA_IDNA_BENCHMARKS "Build benchmarks" OFF) + +if (ADA_IDNA_BENCHMARKS) + message(STATUS "Ada benchmarks enabled.") + add_subdirectory(benchmarks) +else(ADA_IDNA_BENCHMARKS) + if (is_top_project) + message(STATUS "Ada IDNA benchmarks disabled. Set ADA_IDNA_BENCHMARKS=ON to enable them.") + endif() +endif() + if (BUILD_TESTING) message(STATUS "The tests are enabled.") add_subdirectory(tests) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt new file mode 100644 index 0000000..7c43f77 --- /dev/null +++ b/benchmarks/CMakeLists.txt @@ -0,0 +1,22 @@ +# ToASCII Bench +add_executable(to_ascii to_ascii.cpp) +target_link_libraries(to_ascii PRIVATE ada-idna) +target_include_directories(to_ascii PUBLIC "$") +target_include_directories(to_ascii PUBLIC "$") + +include(${PROJECT_SOURCE_DIR}/cmake/import.cmake) + +set_off(BENCHMARK_ENABLE_TESTING) +set_off(BENCHMARK_ENABLE_INSTALL) +set_off(BENCHMARK_ENABLE_WERROR) + +# Google Benchmarks +import_dependency(google_benchmarks google/benchmark f91b6b4) +add_dependency(google_benchmarks) +target_link_libraries(to_ascii PRIVATE benchmark::benchmark) + +message(STATUS "Compiler is " ${CMAKE_CXX_COMPILER_ID}) + +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") +message(STATUS "Compiler version " ${CMAKE_CXX_COMPILER_VERSION}) +endif() diff --git a/benchmarks/performancecounters/apple_arm_events.h b/benchmarks/performancecounters/apple_arm_events.h new file mode 100644 index 0000000..d99b4dc --- /dev/null +++ b/benchmarks/performancecounters/apple_arm_events.h @@ -0,0 +1,1108 @@ + +// Original design from: +// ============================================================================= +// XNU kperf/kpc +// Available for 64-bit Intel/Apple Silicon, macOS/iOS, with root privileges +// +// References: +// +// XNU source (since xnu 2422.1.72): +// https://github.com/apple/darwin-xnu/blob/main/osfmk/kern/kpc.h +// https://github.com/apple/darwin-xnu/blob/main/bsd/kern/kern_kpc.c +// +// Lightweight PET (Profile Every Thread, since xnu 3789.1.32): +// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/pet.c +// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/kperf_kpc.c +// +// System Private frameworks (since macOS 10.11, iOS 8.0): +// /System/Library/PrivateFrameworks/kperf.framework +// /System/Library/PrivateFrameworks/kperfdata.framework +// +// Xcode framework (since Xcode 7.0): +// /Applications/Xcode.app/Contents/SharedFrameworks/DVTInstrumentsFoundation.framework +// +// CPU database (plist files) +// macOS (since macOS 10.11): +// /usr/share/kpep/.plist +// iOS (copied from Xcode, since iOS 10.0, Xcode 8.0): +// /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform +// /DeviceSupport//DeveloperDiskImage.dmg/usr/share/kpep/.plist +// +// +// Created by YaoYuan on 2021. +// Released into the public domain (unlicense.org). +// ============================================================================= + +#ifndef M1CYCLES_H +#define M1CYCLES_H + +#include // for dlopen() and dlsym() +#include // for mach_absolute_time() +#include +#include +#include +#include +#include +#include // for kdebug trace decode +#include // for sysctl() +#include // for usleep() + +struct performance_counters { + double cycles; + double branches; + double missed_branches; + double instructions; + performance_counters(uint64_t c, uint64_t b, uint64_t m, uint64_t i) + : cycles(c), branches(b), missed_branches(m), instructions(i) {} + performance_counters(double c, double b, double m, double i) + : cycles(c), branches(b), missed_branches(m), instructions(i) {} + performance_counters(double init) + : cycles(init), + branches(init), + missed_branches(init), + instructions(init) {} + + inline performance_counters &operator-=(const performance_counters &other) { + cycles -= other.cycles; + branches -= other.branches; + missed_branches -= other.missed_branches; + instructions -= other.instructions; + return *this; + } + inline performance_counters &min(const performance_counters &other) { + cycles = other.cycles < cycles ? other.cycles : cycles; + branches = other.branches < branches ? other.branches : branches; + missed_branches = other.missed_branches < missed_branches + ? other.missed_branches + : missed_branches; + instructions = + other.instructions < instructions ? other.instructions : instructions; + return *this; + } + inline performance_counters &operator+=(const performance_counters &other) { + cycles += other.cycles; + branches += other.branches; + missed_branches += other.missed_branches; + instructions += other.instructions; + return *this; + } + + inline performance_counters &operator/=(double numerator) { + cycles /= numerator; + branches /= numerator; + missed_branches /= numerator; + instructions /= numerator; + return *this; + } +}; + +inline performance_counters operator-(const performance_counters &a, + const performance_counters &b) { + return performance_counters(a.cycles - b.cycles, a.branches - b.branches, + a.missed_branches - b.missed_branches, + a.instructions - b.instructions); +} + +typedef float f32; +typedef double f64; +typedef int8_t i8; +typedef uint8_t u8; +typedef int16_t i16; +typedef uint16_t u16; +typedef int32_t i32; +typedef uint32_t u32; +typedef int64_t i64; +typedef uint64_t u64; +typedef size_t usize; + +// ----------------------------------------------------------------------------- +// header (reverse engineered) +// This framework wraps some sysctl calls to communicate with the kpc in kernel. +// Most functions requires root privileges, or process is "blessed". +// ----------------------------------------------------------------------------- + +// Cross-platform class constants. +#define KPC_CLASS_FIXED (0) +#define KPC_CLASS_CONFIGURABLE (1) +#define KPC_CLASS_POWER (2) +#define KPC_CLASS_RAWPMU (3) + +// Cross-platform class mask constants. +#define KPC_CLASS_FIXED_MASK (1u << KPC_CLASS_FIXED) // 1 +#define KPC_CLASS_CONFIGURABLE_MASK (1u << KPC_CLASS_CONFIGURABLE) // 2 +#define KPC_CLASS_POWER_MASK (1u << KPC_CLASS_POWER) // 4 +#define KPC_CLASS_RAWPMU_MASK (1u << KPC_CLASS_RAWPMU) // 8 + +// PMU version constants. +#define KPC_PMU_ERROR (0) // Error +#define KPC_PMU_INTEL_V3 (1) // Intel +#define KPC_PMU_ARM_APPLE (2) // ARM64 +#define KPC_PMU_INTEL_V2 (3) // Old Intel +#define KPC_PMU_ARM_V2 (4) // Old ARM + +// The maximum number of counters we could read from every class in one go. +// ARMV7: FIXED: 1, CONFIGURABLE: 4 +// ARM32: FIXED: 2, CONFIGURABLE: 6 +// ARM64: FIXED: 2, CONFIGURABLE: CORE_NCTRS - FIXED (6 or 8) +// x86: 32 +#define KPC_MAX_COUNTERS 32 + +// Bits for defining what to do on an action. +// Defined in https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/action.h +#define KPERF_SAMPLER_TH_INFO (1U << 0) +#define KPERF_SAMPLER_TH_SNAPSHOT (1U << 1) +#define KPERF_SAMPLER_KSTACK (1U << 2) +#define KPERF_SAMPLER_USTACK (1U << 3) +#define KPERF_SAMPLER_PMC_THREAD (1U << 4) +#define KPERF_SAMPLER_PMC_CPU (1U << 5) +#define KPERF_SAMPLER_PMC_CONFIG (1U << 6) +#define KPERF_SAMPLER_MEMINFO (1U << 7) +#define KPERF_SAMPLER_TH_SCHEDULING (1U << 8) +#define KPERF_SAMPLER_TH_DISPATCH (1U << 9) +#define KPERF_SAMPLER_TK_SNAPSHOT (1U << 10) +#define KPERF_SAMPLER_SYS_MEM (1U << 11) +#define KPERF_SAMPLER_TH_INSCYC (1U << 12) +#define KPERF_SAMPLER_TK_INFO (1U << 13) + +// Maximum number of kperf action ids. +#define KPERF_ACTION_MAX (32) + +// Maximum number of kperf timer ids. +#define KPERF_TIMER_MAX (8) + +// x86/arm config registers are 64-bit +typedef u64 kpc_config_t; + +/// Print current CPU identification string to the buffer (same as snprintf), +/// such as "cpu_7_8_10b282dc_46". This string can be used to locate the PMC +/// database in /usr/share/kpep. +/// @return string's length, or negative value if error occurs. +/// @note This method does not requires root privileges. +/// @details sysctl get(hw.cputype), get(hw.cpusubtype), +/// get(hw.cpufamily), get(machdep.cpu.model) +static int (*kpc_cpu_string)(char *buf, usize buf_size); + +/// Get the version of KPC that's being run. +/// @return See `PMU version constants` above. +/// @details sysctl get(kpc.pmu_version) +static u32 (*kpc_pmu_version)(void); + +/// Get running PMC classes. +/// @return See `class mask constants` above, +/// 0 if error occurs or no class is set. +/// @details sysctl get(kpc.counting) +static u32 (*kpc_get_counting)(void); + +/// Set PMC classes to enable counting. +/// @param classes See `class mask constants` above, set 0 to shutdown counting. +/// @return 0 for success. +/// @details sysctl set(kpc.counting) +static int (*kpc_set_counting)(u32 classes); + +/// Get running PMC classes for current thread. +/// @return See `class mask constants` above, +/// 0 if error occurs or no class is set. +/// @details sysctl get(kpc.thread_counting) +static u32 (*kpc_get_thread_counting)(void); + +/// Set PMC classes to enable counting for current thread. +/// @param classes See `class mask constants` above, set 0 to shutdown counting. +/// @return 0 for success. +/// @details sysctl set(kpc.thread_counting) +static int (*kpc_set_thread_counting)(u32 classes); + +/// Get how many config registers there are for a given mask. +/// For example: Intel may returns 1 for `KPC_CLASS_FIXED_MASK`, +/// returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`. +/// @param classes See `class mask constants` above. +/// @return 0 if error occurs or no class is set. +/// @note This method does not requires root privileges. +/// @details sysctl get(kpc.config_count) +static u32 (*kpc_get_config_count)(u32 classes); + +/// Get config registers. +/// @param classes see `class mask constants` above. +/// @param config Config buffer to receive values, should not smaller than +/// kpc_get_config_count(classes) * sizeof(kpc_config_t). +/// @return 0 for success. +/// @details sysctl get(kpc.config_count), get(kpc.config) +static int (*kpc_get_config)(u32 classes, kpc_config_t *config); + +/// Set config registers. +/// @param classes see `class mask constants` above. +/// @param config Config buffer, should not smaller than +/// kpc_get_config_count(classes) * sizeof(kpc_config_t). +/// @return 0 for success. +/// @details sysctl get(kpc.config_count), set(kpc.config) +static int (*kpc_set_config)(u32 classes, kpc_config_t *config); + +/// Get how many counters there are for a given mask. +/// For example: Intel may returns 3 for `KPC_CLASS_FIXED_MASK`, +/// returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`. +/// @param classes See `class mask constants` above. +/// @note This method does not requires root privileges. +/// @details sysctl get(kpc.counter_count) +static u32 (*kpc_get_counter_count)(u32 classes); + +/// Get counter accumulations. +/// If `all_cpus` is true, the buffer count should not smaller than +/// (cpu_count * counter_count). Otherwize, the buffer count should not smaller +/// than (counter_count). +/// @see kpc_get_counter_count(), kpc_cpu_count(). +/// @param all_cpus true for all CPUs, false for current cpu. +/// @param classes See `class mask constants` above. +/// @param curcpu A pointer to receive current cpu id, can be NULL. +/// @param buf Buffer to receive counter's value. +/// @return 0 for success. +/// @details sysctl get(hw.ncpu), get(kpc.counter_count), get(kpc.counters) +static int (*kpc_get_cpu_counters)(bool all_cpus, u32 classes, int *curcpu, + u64 *buf); + +/// Get counter accumulations for current thread. +/// @param tid Thread id, should be 0. +/// @param buf_count The number of buf's elements (not bytes), +/// should not smaller than kpc_get_counter_count(). +/// @param buf Buffer to receive counter's value. +/// @return 0 for success. +/// @details sysctl get(kpc.thread_counters) +static int (*kpc_get_thread_counters)(u32 tid, u32 buf_count, u64 *buf); + +/// Acquire/release the counters used by the Power Manager. +/// @param val 1:acquire, 0:release +/// @return 0 for success. +/// @details sysctl set(kpc.force_all_ctrs) +static int (*kpc_force_all_ctrs_set)(int val); + +/// Get the state of all_ctrs. +/// @return 0 for success. +/// @details sysctl get(kpc.force_all_ctrs) +static int (*kpc_force_all_ctrs_get)(int *val_out); + +/// Set number of actions, should be `KPERF_ACTION_MAX`. +/// @details sysctl set(kperf.action.count) +static int (*kperf_action_count_set)(u32 count); + +/// Get number of actions. +/// @details sysctl get(kperf.action.count) +static int (*kperf_action_count_get)(u32 *count); + +/// Set what to sample when a trigger fires an action, e.g. +/// `KPERF_SAMPLER_PMC_CPU`. +/// @details sysctl set(kperf.action.samplers) +static int (*kperf_action_samplers_set)(u32 actionid, u32 sample); + +/// Get what to sample when a trigger fires an action. +/// @details sysctl get(kperf.action.samplers) +static int (*kperf_action_samplers_get)(u32 actionid, u32 *sample); + +/// Apply a task filter to the action, -1 to disable filter. +/// @details sysctl set(kperf.action.filter_by_task) +static int (*kperf_action_filter_set_by_task)(u32 actionid, i32 port); + +/// Apply a pid filter to the action, -1 to disable filter. +/// @details sysctl set(kperf.action.filter_by_pid) +static int (*kperf_action_filter_set_by_pid)(u32 actionid, i32 pid); + +/// Set number of time triggers, should be `KPERF_TIMER_MAX`. +/// @details sysctl set(kperf.timer.count) +static int (*kperf_timer_count_set)(u32 count); + +/// Get number of time triggers. +/// @details sysctl get(kperf.timer.count) +static int (*kperf_timer_count_get)(u32 *count); + +/// Set timer number and period. +/// @details sysctl set(kperf.timer.period) +static int (*kperf_timer_period_set)(u32 actionid, u64 tick); + +/// Get timer number and period. +/// @details sysctl get(kperf.timer.period) +static int (*kperf_timer_period_get)(u32 actionid, u64 *tick); + +/// Set timer number and actionid. +/// @details sysctl set(kperf.timer.action) +static int (*kperf_timer_action_set)(u32 actionid, u32 timerid); + +/// Get timer number and actionid. +/// @details sysctl get(kperf.timer.action) +static int (*kperf_timer_action_get)(u32 actionid, u32 *timerid); + +/// Set which timer ID does PET (Profile Every Thread). +/// @details sysctl set(kperf.timer.pet_timer) +static int (*kperf_timer_pet_set)(u32 timerid); + +/// Get which timer ID does PET (Profile Every Thread). +/// @details sysctl get(kperf.timer.pet_timer) +static int (*kperf_timer_pet_get)(u32 *timerid); + +/// Enable or disable sampling. +/// @details sysctl set(kperf.sampling) +static int (*kperf_sample_set)(u32 enabled); + +/// Get is currently sampling. +/// @details sysctl get(kperf.sampling) +static int (*kperf_sample_get)(u32 *enabled); + +/// Reset kperf: stop sampling, kdebug, timers and actions. +/// @return 0 for success. +static int (*kperf_reset)(void); + +/// Nanoseconds to CPU ticks. +static u64 (*kperf_ns_to_ticks)(u64 ns); + +/// CPU ticks to nanoseconds. +static u64 (*kperf_ticks_to_ns)(u64 ticks); + +/// CPU ticks frequency (mach_absolute_time). +static u64 (*kperf_tick_frequency)(void); + +/// Get lightweight PET mode (not in kperf.framework). +static int kperf_lightweight_pet_get(u32 *enabled) { + if (!enabled) return -1; + usize size = 4; + return sysctlbyname("kperf.lightweight_pet", enabled, &size, NULL, 0); +} + +/// Set lightweight PET mode (not in kperf.framework). +static int kperf_lightweight_pet_set(u32 enabled) { + return sysctlbyname("kperf.lightweight_pet", NULL, NULL, &enabled, 4); +} + +// ----------------------------------------------------------------------------- +// header (reverse engineered) +// This framework provides some functions to access the local CPU database. +// These functions do not require root privileges. +// ----------------------------------------------------------------------------- + +// KPEP CPU archtecture constants. +#define KPEP_ARCH_I386 0 +#define KPEP_ARCH_X86_64 1 +#define KPEP_ARCH_ARM 2 +#define KPEP_ARCH_ARM64 3 + +/// KPEP event (size: 48/28 bytes on 64/32 bit OS) +typedef struct kpep_event { + const char *name; ///< Unique name of a event, such as "INST_RETIRED.ANY". + const char *description; ///< Description for this event. + const char *errata; ///< Errata, currently NULL. + const char *alias; ///< Alias name, such as "Instructions", "Cycles". + const char *fallback; ///< Fallback event name for fixed counter. + u32 mask; + u8 number; + u8 umask; + u8 reserved; + u8 is_fixed; +} kpep_event; + +/// KPEP database (size: 144/80 bytes on 64/32 bit OS) +typedef struct kpep_db { + const char *name; ///< Database name, such as "haswell". + const char *cpu_id; ///< Plist name, such as "cpu_7_8_10b282dc". + const char *marketing_name; ///< Marketing name, such as "Intel Haswell". + void *plist_data; ///< Plist data (CFDataRef), currently NULL. + void *event_map; ///< All events (CFDict). + kpep_event + *event_arr; ///< Event struct buffer (sizeof(kpep_event) * events_count). + kpep_event **fixed_event_arr; ///< Fixed counter events (sizeof(kpep_event *) + ///< * fixed_counter_count) + void *alias_map; ///< All aliases (CFDict). + usize reserved_1; + usize reserved_2; + usize reserved_3; + usize event_count; ///< All events count. + usize alias_count; + usize fixed_counter_count; + usize config_counter_count; + usize power_counter_count; + u32 archtecture; ///< see `KPEP CPU archtecture constants` above. + u32 fixed_counter_bits; + u32 config_counter_bits; + u32 power_counter_bits; +} kpep_db; + +/// KPEP config (size: 80/44 bytes on 64/32 bit OS) +typedef struct kpep_config { + kpep_db *db; + kpep_event **ev_arr; ///< (sizeof(kpep_event *) * counter_count), init NULL + usize *ev_map; ///< (sizeof(usize *) * counter_count), init 0 + usize *ev_idx; ///< (sizeof(usize *) * counter_count), init -1 + u32 *flags; ///< (sizeof(u32 *) * counter_count), init 0 + u64 *kpc_periods; ///< (sizeof(u64 *) * counter_count), init 0 + usize event_count; /// kpep_config_events_count() + usize counter_count; + u32 classes; ///< See `class mask constants` above. + u32 config_counter; + u32 power_counter; + u32 reserved; +} kpep_config; + +/// Error code for kpep_config_xxx() and kpep_db_xxx() functions. +typedef enum { + KPEP_CONFIG_ERROR_NONE = 0, + KPEP_CONFIG_ERROR_INVALID_ARGUMENT = 1, + KPEP_CONFIG_ERROR_OUT_OF_MEMORY = 2, + KPEP_CONFIG_ERROR_IO = 3, + KPEP_CONFIG_ERROR_BUFFER_TOO_SMALL = 4, + KPEP_CONFIG_ERROR_CUR_SYSTEM_UNKNOWN = 5, + KPEP_CONFIG_ERROR_DB_PATH_INVALID = 6, + KPEP_CONFIG_ERROR_DB_NOT_FOUND = 7, + KPEP_CONFIG_ERROR_DB_ARCH_UNSUPPORTED = 8, + KPEP_CONFIG_ERROR_DB_VERSION_UNSUPPORTED = 9, + KPEP_CONFIG_ERROR_DB_CORRUPT = 10, + KPEP_CONFIG_ERROR_EVENT_NOT_FOUND = 11, + KPEP_CONFIG_ERROR_CONFLICTING_EVENTS = 12, + KPEP_CONFIG_ERROR_COUNTERS_NOT_FORCED = 13, + KPEP_CONFIG_ERROR_EVENT_UNAVAILABLE = 14, + KPEP_CONFIG_ERROR_ERRNO = 15, + KPEP_CONFIG_ERROR_MAX +} kpep_config_error_code; + +/// Error description for kpep_config_error_code. +static const char *kpep_config_error_names[KPEP_CONFIG_ERROR_MAX] = { + "none", + "invalid argument", + "out of memory", + "I/O", + "buffer too small", + "current system unknown", + "database path invalid", + "database not found", + "database architecture unsupported", + "database version unsupported", + "database corrupt", + "event not found", + "conflicting events", + "all counters must be forced", + "event unavailable", + "check errno"}; + +/// Error description. +static const char *kpep_config_error_desc(int code) { + if (0 <= code && code < KPEP_CONFIG_ERROR_MAX) { + return kpep_config_error_names[code]; + } + return "unknown error"; +} + +/// Create a config. +/// @param db A kpep db, see kpep_db_create() +/// @param cfg_ptr A pointer to receive the new config. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_create)(kpep_db *db, kpep_config **cfg_ptr); + +/// Free the config. +static void (*kpep_config_free)(kpep_config *cfg); + +/// Add an event to config. +/// @param cfg The config. +/// @param ev_ptr A event pointer. +/// @param flag 0: all, 1: user space only +/// @param err Error bitmap pointer, can be NULL. +/// If return value is `CONFLICTING_EVENTS`, this bitmap contains +/// the conflicted event indices, e.g. "1 << 2" means index 2. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_add_event)(kpep_config *cfg, kpep_event **ev_ptr, + u32 flag, u32 *err); + +/// Remove event at index. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_remove_event)(kpep_config *cfg, usize idx); + +/// Force all counters. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_force_counters)(kpep_config *cfg); + +/// Get events count. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_events_count)(kpep_config *cfg, usize *count_ptr); + +/// Get all event pointers. +/// @param buf A buffer to receive event pointers. +/// @param buf_size The buffer's size in bytes, should not smaller than +/// kpep_config_events_count() * sizeof(void *). +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_events)(kpep_config *cfg, kpep_event **buf, + usize buf_size); + +/// Get kpc register configs. +/// @param buf A buffer to receive kpc register configs. +/// @param buf_size The buffer's size in bytes, should not smaller than +/// kpep_config_kpc_count() * sizeof(kpc_config_t). +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_kpc)(kpep_config *cfg, kpc_config_t *buf, + usize buf_size); + +/// Get kpc register config count. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_kpc_count)(kpep_config *cfg, usize *count_ptr); + +/// Get kpc classes. +/// @param classes See `class mask constants` above. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_kpc_classes)(kpep_config *cfg, u32 *classes_ptr); + +/// Get the index mapping from event to counter. +/// @param buf A buffer to receive indexes. +/// @param buf_size The buffer's size in bytes, should not smaller than +/// kpep_config_events_count() * sizeof(kpc_config_t). +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_config_kpc_map)(kpep_config *cfg, usize *buf, usize buf_size); + +/// Open a kpep database file in "/usr/share/kpep/" or "/usr/local/share/kpep/". +/// @param name File name, for example "haswell", "cpu_100000c_1_92fb37c8". +/// Pass NULL for current CPU. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_create)(const char *name, kpep_db **db_ptr); + +/// Free the kpep database. +static void (*kpep_db_free)(kpep_db *db); + +/// Get the database's name. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_name)(kpep_db *db, const char **name); + +/// Get the event alias count. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_aliases_count)(kpep_db *db, usize *count); + +/// Get all alias. +/// @param buf A buffer to receive all alias strings. +/// @param buf_size The buffer's size in bytes, +/// should not smaller than kpep_db_aliases_count() * sizeof(void *). +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_aliases)(kpep_db *db, const char **buf, usize buf_size); + +/// Get counters count for given classes. +/// @param classes 1: Fixed, 2: Configurable. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_counters_count)(kpep_db *db, u8 classes, usize *count); + +/// Get all event count. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_events_count)(kpep_db *db, usize *count); + +/// Get all events. +/// @param buf A buffer to receive all event pointers. +/// @param buf_size The buffer's size in bytes, +/// should not smaller than kpep_db_events_count() * sizeof(void *). +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_events)(kpep_db *db, kpep_event **buf, usize buf_size); + +/// Get one event by name. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_db_event)(kpep_db *db, const char *name, kpep_event **ev_ptr); + +/// Get event's name. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_event_name)(kpep_event *ev, const char **name_ptr); + +/// Get event's alias. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_event_alias)(kpep_event *ev, const char **alias_ptr); + +/// Get event's description. +/// @return kpep_config_error_code, 0 for success. +static int (*kpep_event_description)(kpep_event *ev, const char **str_ptr); + +// ----------------------------------------------------------------------------- +// load kperf/kperfdata dynamic library +// ----------------------------------------------------------------------------- + +typedef struct { + const char *name; + void **impl; +} lib_symbol; + +#define lib_nelems(x) (sizeof(x) / sizeof((x)[0])) +#define lib_symbol_def(name) \ + { \ +#name, (void **)&name \ + } + +static const lib_symbol lib_symbols_kperf[] = { + lib_symbol_def(kpc_pmu_version), + lib_symbol_def(kpc_cpu_string), + lib_symbol_def(kpc_set_counting), + lib_symbol_def(kpc_get_counting), + lib_symbol_def(kpc_set_thread_counting), + lib_symbol_def(kpc_get_thread_counting), + lib_symbol_def(kpc_get_config_count), + lib_symbol_def(kpc_get_counter_count), + lib_symbol_def(kpc_set_config), + lib_symbol_def(kpc_get_config), + lib_symbol_def(kpc_get_cpu_counters), + lib_symbol_def(kpc_get_thread_counters), + lib_symbol_def(kpc_force_all_ctrs_set), + lib_symbol_def(kpc_force_all_ctrs_get), + lib_symbol_def(kperf_action_count_set), + lib_symbol_def(kperf_action_count_get), + lib_symbol_def(kperf_action_samplers_set), + lib_symbol_def(kperf_action_samplers_get), + lib_symbol_def(kperf_action_filter_set_by_task), + lib_symbol_def(kperf_action_filter_set_by_pid), + lib_symbol_def(kperf_timer_count_set), + lib_symbol_def(kperf_timer_count_get), + lib_symbol_def(kperf_timer_period_set), + lib_symbol_def(kperf_timer_period_get), + lib_symbol_def(kperf_timer_action_set), + lib_symbol_def(kperf_timer_action_get), + lib_symbol_def(kperf_sample_set), + lib_symbol_def(kperf_sample_get), + lib_symbol_def(kperf_reset), + lib_symbol_def(kperf_timer_pet_set), + lib_symbol_def(kperf_timer_pet_get), + lib_symbol_def(kperf_ns_to_ticks), + lib_symbol_def(kperf_ticks_to_ns), + lib_symbol_def(kperf_tick_frequency), +}; + +static const lib_symbol lib_symbols_kperfdata[] = { + lib_symbol_def(kpep_config_create), + lib_symbol_def(kpep_config_free), + lib_symbol_def(kpep_config_add_event), + lib_symbol_def(kpep_config_remove_event), + lib_symbol_def(kpep_config_force_counters), + lib_symbol_def(kpep_config_events_count), + lib_symbol_def(kpep_config_events), + lib_symbol_def(kpep_config_kpc), + lib_symbol_def(kpep_config_kpc_count), + lib_symbol_def(kpep_config_kpc_classes), + lib_symbol_def(kpep_config_kpc_map), + lib_symbol_def(kpep_db_create), + lib_symbol_def(kpep_db_free), + lib_symbol_def(kpep_db_name), + lib_symbol_def(kpep_db_aliases_count), + lib_symbol_def(kpep_db_aliases), + lib_symbol_def(kpep_db_counters_count), + lib_symbol_def(kpep_db_events_count), + lib_symbol_def(kpep_db_events), + lib_symbol_def(kpep_db_event), + lib_symbol_def(kpep_event_name), + lib_symbol_def(kpep_event_alias), + lib_symbol_def(kpep_event_description), +}; + +#define lib_path_kperf "/System/Library/PrivateFrameworks/kperf.framework/kperf" +#define lib_path_kperfdata \ + "/System/Library/PrivateFrameworks/kperfdata.framework/kperfdata" + +static bool lib_inited = false; +static bool lib_has_err = false; +static char lib_err_msg[256]; + +static void *lib_handle_kperf = NULL; +static void *lib_handle_kperfdata = NULL; + +static void lib_deinit(void) { + lib_inited = false; + lib_has_err = false; + if (lib_handle_kperf) dlclose(lib_handle_kperf); + if (lib_handle_kperfdata) dlclose(lib_handle_kperfdata); + lib_handle_kperf = NULL; + lib_handle_kperfdata = NULL; + for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) { + const lib_symbol *symbol = &lib_symbols_kperf[i]; + *symbol->impl = NULL; + } + for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) { + const lib_symbol *symbol = &lib_symbols_kperfdata[i]; + *symbol->impl = NULL; + } +} + +static bool lib_init(void) { +#define return_err() \ + do { \ + lib_deinit(); \ + lib_inited = true; \ + lib_has_err = true; \ + return false; \ + } while (false) + + if (lib_inited) return !lib_has_err; + + // load dynamic library + lib_handle_kperf = dlopen(lib_path_kperf, RTLD_LAZY); + if (!lib_handle_kperf) { + snprintf(lib_err_msg, sizeof(lib_err_msg), + "Failed to load kperf.framework, message: %s.", dlerror()); + return_err(); + } + lib_handle_kperfdata = dlopen(lib_path_kperfdata, RTLD_LAZY); + if (!lib_handle_kperfdata) { + snprintf(lib_err_msg, sizeof(lib_err_msg), + "Failed to load kperfdata.framework, message: %s.", dlerror()); + return_err(); + } + + // load symbol address from dynamic library + for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) { + const lib_symbol *symbol = &lib_symbols_kperf[i]; + *symbol->impl = dlsym(lib_handle_kperf, symbol->name); + if (!*symbol->impl) { + snprintf(lib_err_msg, sizeof(lib_err_msg), + "Failed to load kperf function: %s.", symbol->name); + return_err(); + } + } + for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) { + const lib_symbol *symbol = &lib_symbols_kperfdata[i]; + *symbol->impl = dlsym(lib_handle_kperfdata, symbol->name); + if (!*symbol->impl) { + snprintf(lib_err_msg, sizeof(lib_err_msg), + "Failed to load kperfdata function: %s.", symbol->name); + return_err(); + } + } + + lib_inited = true; + lib_has_err = false; + return true; + +#undef return_err +} + +// ----------------------------------------------------------------------------- +// kdebug private structs +// https://github.com/apple/darwin-xnu/blob/main/bsd/sys_private/kdebug_private.h +// ----------------------------------------------------------------------------- + +/* + * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf + * structure. + */ +#if defined(__arm64__) +typedef uint64_t kd_buf_argtype; +#else +typedef uintptr_t kd_buf_argtype; +#endif + +typedef struct { + uint64_t timestamp; + kd_buf_argtype arg1; + kd_buf_argtype arg2; + kd_buf_argtype arg3; + kd_buf_argtype arg4; + kd_buf_argtype arg5; /* the thread ID */ + uint32_t debugid; /* see */ + +/* + * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf + * structure. + */ +#if defined(__LP64__) || defined(__arm64__) + uint32_t cpuid; /* cpu index, from 0 */ + kd_buf_argtype unused; +#endif +} kd_buf; + +/* bits for the type field of kd_regtype */ +#define KDBG_CLASSTYPE 0x10000 +#define KDBG_SUBCLSTYPE 0x20000 +#define KDBG_RANGETYPE 0x40000 +#define KDBG_TYPENONE 0x80000 +#define KDBG_CKTYPES 0xF0000 + +/* only trace at most 4 types of events, at the code granularity */ +#define KDBG_VALCHECK 0x00200000U + +typedef struct { + unsigned int type; + unsigned int value1; + unsigned int value2; + unsigned int value3; + unsigned int value4; +} kd_regtype; + +typedef struct { + /* number of events that can fit in the buffers */ + int nkdbufs; + /* set if trace is disabled */ + int nolog; + /* kd_ctrl_page.flags */ + unsigned int flags; + /* number of threads in thread map */ + int nkdthreads; + /* the owning pid */ + int bufid; +} kbufinfo_t; + +// ----------------------------------------------------------------------------- +// kdebug utils +// ----------------------------------------------------------------------------- + +/// Clean up trace buffers and reset ktrace/kdebug/kperf. +/// @return 0 on success. +static int kdebug_reset(void) { + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREMOVE}; + return sysctl(mib, 3, NULL, NULL, NULL, 0); +} + +/// Disable and reinitialize the trace buffers. +/// @return 0 on success. +static int kdebug_reinit(void) { + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETUP}; + return sysctl(mib, 3, NULL, NULL, NULL, 0); +} + +/// Set debug filter. +static int kdebug_setreg(kd_regtype *kdr) { + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETREG}; + usize size = sizeof(kd_regtype); + return sysctl(mib, 3, kdr, &size, NULL, 0); +} + +/// Set maximum number of trace entries (kd_buf). +/// Only allow allocation up to half the available memory (sane_size). +/// @return 0 on success. +static int kdebug_trace_setbuf(int nbufs) { + int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETBUF, nbufs}; + return sysctl(mib, 4, NULL, NULL, NULL, 0); +} + +/// Enable or disable kdebug trace. +/// Trace buffer must already be initialized. +/// @return 0 on success. +static int kdebug_trace_enable(bool enable) { + int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDENABLE, enable}; + return sysctl(mib, 4, NULL, 0, NULL, 0); +} + +/// Retrieve trace buffer information from kernel. +/// @return 0 on success. +static int kdebug_get_bufinfo(kbufinfo_t *info) { + if (!info) return -1; + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDGETBUF}; + size_t needed = sizeof(kbufinfo_t); + return sysctl(mib, 3, info, &needed, NULL, 0); +} + +/// Retrieve trace buffers from kernel. +/// @param buf Memory to receive buffer data, array of `kd_buf`. +/// @param len Length of `buf` in bytes. +/// @param count Number of trace entries (kd_buf) obtained. +/// @return 0 on success. +static int kdebug_trace_read(void *buf, usize len, usize *count) { + if (count) *count = 0; + if (!buf || !len) return -1; + + // Note: the input and output units are not the same. + // input: bytes + // output: number of kd_buf + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREADTR}; + int ret = sysctl(mib, 3, buf, &len, NULL, 0); + if (ret != 0) return ret; + *count = len; + return 0; +} + +/// Block until there are new buffers filled or `timeout_ms` have passed. +/// @param timeout_ms timeout milliseconds, 0 means wait forever. +/// @param suc set true if new buffers filled. +/// @return 0 on success. +static int kdebug_wait(usize timeout_ms, bool *suc) { + if (timeout_ms == 0) return -1; + int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDBUFWAIT}; + usize val = timeout_ms; + int ret = sysctl(mib, 3, NULL, &val, NULL, 0); + if (suc) *suc = !!val; + return ret; +} + +// ----------------------------------------------------------------------------- +// Demo +// ----------------------------------------------------------------------------- + +#define EVENT_NAME_MAX 8 +typedef struct { + const char *alias; /// name for print + const char *names[EVENT_NAME_MAX]; /// name from pmc db +} event_alias; + +/// Event names from /usr/share/kpep/.plist +static const event_alias profile_events[] = { + {"cycles", + { + "CORE_ACTIVE_CYCLE", // was: "FIXED_CYCLES", // Apple + // A7-A15 + "CPU_CLK_UNHALTED.THREAD", // Intel Core 1th-10th + "CPU_CLK_UNHALTED.CORE", // Intel Yonah, Merom + }}, + {"instructions", + { + "INST_ALL", // was:"FIXED_INSTRUCTIONS", // Apple A7-A15 + "INST_RETIRED.ANY" // Intel Yonah, Merom, Core 1th-10th + }}, + {"branches", + { + "INST_BRANCH", // Apple A7-A15 + "BR_INST_RETIRED.ALL_BRANCHES", // Intel Core 1th-10th + "INST_RETIRED.ANY", // Intel Yonah, Merom + }}, + {"branch-misses", + { + "BRANCH_MISPRED_NONSPEC", // Apple A7-A15, since iOS 15, macOS 12 + "BRANCH_MISPREDICT", // Apple A7-A14 + "BR_MISP_RETIRED.ALL_BRANCHES", // Intel Core 2th-10th + "BR_INST_RETIRED.MISPRED", // Intel Yonah, Merom + }}, +}; + +static kpep_event *get_event(kpep_db *db, const event_alias *alias) { + for (usize j = 0; j < EVENT_NAME_MAX; j++) { + const char *name = alias->names[j]; + if (!name) break; + kpep_event *ev = NULL; + if (kpep_db_event(db, name, &ev) == 0) { + return ev; + } + } + return NULL; +} + +kpc_config_t regs[KPC_MAX_COUNTERS] = {0}; +usize counter_map[KPC_MAX_COUNTERS] = {0}; +u64 counters_0[KPC_MAX_COUNTERS] = {0}; +u64 counters_1[KPC_MAX_COUNTERS] = {0}; +const usize ev_count = sizeof(profile_events) / sizeof(profile_events[0]); + +bool setup_performance_counters() { + static bool init = false; + static bool worked = false; + + if (init) { + return worked; + } + init = true; + + // load dylib + if (!lib_init()) { + printf("Error: %s\n", lib_err_msg); + return (worked = false); + } + + // check permission + int force_ctrs = 0; + if (kpc_force_all_ctrs_get(&force_ctrs)) { + printf("Permission denied, xnu/kpc requires root privileges.\n"); + return (worked = false); + } + int ret; + // load pmc db + kpep_db *db = NULL; + if ((ret = kpep_db_create(NULL, &db))) { + printf("Error: cannot load pmc database: %d.\n", ret); + return (worked = false); + } + printf("loaded db: %s (%s)\n", db->name, db->marketing_name); + // printf("number of fixed counters: %zu\n", db->fixed_counter_count); + // printf("number of configurable counters: %zu\n", db->config_counter_count); + + // create a config + kpep_config *cfg = NULL; + if ((ret = kpep_config_create(db, &cfg))) { + printf("Failed to create kpep config: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + if ((ret = kpep_config_force_counters(cfg))) { + printf("Failed to force counters: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + + // get events + kpep_event *ev_arr[ev_count] = {0}; + for (usize i = 0; i < ev_count; i++) { + const event_alias *alias = profile_events + i; + ev_arr[i] = get_event(db, alias); + if (!ev_arr[i]) { + printf("Cannot find event: %s.\n", alias->alias); + return (worked = false); + } + } + + // add event to config + for (usize i = 0; i < ev_count; i++) { + kpep_event *ev = ev_arr[i]; + if ((ret = kpep_config_add_event(cfg, &ev, 0, NULL))) { + printf("Failed to add event: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + } + + // prepare buffer and config + u32 classes = 0; + usize reg_count = 0; + if ((ret = kpep_config_kpc_classes(cfg, &classes))) { + printf("Failed get kpc classes: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + if ((ret = kpep_config_kpc_count(cfg, ®_count))) { + printf("Failed get kpc count: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + if ((ret = kpep_config_kpc_map(cfg, counter_map, sizeof(counter_map)))) { + printf("Failed get kpc map: %d (%s).\n", ret, kpep_config_error_desc(ret)); + return (worked = false); + } + if ((ret = kpep_config_kpc(cfg, regs, sizeof(regs)))) { + printf("Failed get kpc registers: %d (%s).\n", ret, + kpep_config_error_desc(ret)); + return (worked = false); + } + + // set config to kernel + if ((ret = kpc_force_all_ctrs_set(1))) { + printf("Failed force all ctrs: %d.\n", ret); + return (worked = false); + } + if ((classes & KPC_CLASS_CONFIGURABLE_MASK) && reg_count) { + if ((ret = kpc_set_config(classes, regs))) { + printf("Failed set kpc config: %d.\n", ret); + return (worked = false); + } + } + + // start counting + if ((ret = kpc_set_counting(classes))) { + printf("Failed set counting: %d.\n", ret); + return (worked = false); + } + if ((ret = kpc_set_thread_counting(classes))) { + printf("Failed set thread counting: %d.\n", ret); + return (worked = false); + } + + return (worked = true); +} + +inline performance_counters get_counters() { + static bool warned = false; + int ret; + // get counters before + if ((ret = kpc_get_thread_counters(0, KPC_MAX_COUNTERS, counters_0))) { + if (!warned) { + printf("Failed get thread counters before: %d.\n", ret); + warned = true; + } + return 1; + } + /* +// We could print it out this way if we wanted to: +printf("counters value:\n"); +for (usize i = 0; i < ev_count; i++) { + const event_alias *alias = profile_events + i; + usize idx = counter_map[i]; + u64 val = counters_1[idx] - counters_0[idx]; + printf("%14s: %llu\n", alias->alias, val); +}*/ + return performance_counters{ + counters_0[counter_map[0]], counters_0[counter_map[3]], + counters_0[counter_map[2]], counters_0[counter_map[1]]}; +} + +#endif diff --git a/benchmarks/performancecounters/event_counter.h b/benchmarks/performancecounters/event_counter.h new file mode 100644 index 0000000..6e055cf --- /dev/null +++ b/benchmarks/performancecounters/event_counter.h @@ -0,0 +1,147 @@ +#ifndef __EVENT_COUNTER_H +#define __EVENT_COUNTER_H + +#include +#ifndef _MSC_VER +#include +#endif +#include +#include +#include +#include + +#include "linux_perf_events.h" +#ifdef __linux__ +#include +#endif + +#if __APPLE__ && __aarch64__ +#include "apple_arm_events.h" +#endif + +struct event_count { + std::chrono::duration elapsed; + std::vector event_counts; + event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {} + event_count(const std::chrono::duration _elapsed, + const std::vector _event_counts) + : elapsed(_elapsed), event_counts(_event_counts) {} + event_count(const event_count& other) + : elapsed(other.elapsed), event_counts(other.event_counts) {} + + // The types of counters (so we can read the getter more easily) + enum event_counter_types { + CPU_CYCLES, + INSTRUCTIONS, + }; + + double elapsed_sec() const { + return std::chrono::duration(elapsed).count(); + } + double elapsed_ns() const { + return std::chrono::duration(elapsed).count(); + } + double cycles() const { + return static_cast(event_counts[CPU_CYCLES]); + } + double instructions() const { + return static_cast(event_counts[INSTRUCTIONS]); + } + + event_count& operator=(const event_count& other) { + this->elapsed = other.elapsed; + this->event_counts = other.event_counts; + return *this; + } + event_count operator+(const event_count& other) const { + return event_count(elapsed + other.elapsed, + { + event_counts[0] + other.event_counts[0], + event_counts[1] + other.event_counts[1], + event_counts[2] + other.event_counts[2], + event_counts[3] + other.event_counts[3], + event_counts[4] + other.event_counts[4], + }); + } + + void operator+=(const event_count& other) { *this = *this + other; } +}; + +struct event_aggregate { + bool has_events = false; + int iterations = 0; + event_count total{}; + event_count best{}; + event_count worst{}; + + event_aggregate() = default; + + void operator<<(const event_count& other) { + if (iterations == 0 || other.elapsed < best.elapsed) { + best = other; + } + if (iterations == 0 || other.elapsed > worst.elapsed) { + worst = other; + } + iterations++; + total += other; + } + + double elapsed_sec() const { return total.elapsed_sec() / iterations; } + double elapsed_ns() const { return total.elapsed_ns() / iterations; } + double cycles() const { return total.cycles() / iterations; } + double instructions() const { return total.instructions() / iterations; } +}; + +struct event_collector { + event_count count{}; + std::chrono::time_point start_clock{}; + +#if defined(__linux__) + LinuxEvents linux_events; + event_collector() + : linux_events(std::vector{ + PERF_COUNT_HW_CPU_CYCLES, + PERF_COUNT_HW_INSTRUCTIONS, + }) {} + bool has_events() { return linux_events.is_working(); } +#elif __APPLE__ && __aarch64__ + performance_counters diff; + event_collector() : diff(0) { setup_performance_counters(); } + bool has_events() { return setup_performance_counters(); } +#else + event_collector() {} + bool has_events() { return false; } +#endif + + inline void start() { +#if defined(__linux) + linux_events.start(); +#elif __APPLE__ && __aarch64__ + if (has_events()) { + diff = get_counters(); + } +#endif + start_clock = std::chrono::steady_clock::now(); + } + inline event_count& end() { + const auto end_clock = std::chrono::steady_clock::now(); +#if defined(__linux) + linux_events.end(count.event_counts); +#elif __APPLE__ && __aarch64__ + if (has_events()) { + performance_counters end = get_counters(); + diff = end - diff; + } + count.event_counts[0] = diff.cycles; + count.event_counts[1] = diff.instructions; + count.event_counts[2] = diff.missed_branches; + count.event_counts[3] = 0; + count.event_counts[4] = diff.branches; +#endif + count.elapsed = end_clock - start_clock; + return count; + } +}; + +#endif diff --git a/benchmarks/performancecounters/linux_perf_events.h b/benchmarks/performancecounters/linux_perf_events.h new file mode 100644 index 0000000..e405aec --- /dev/null +++ b/benchmarks/performancecounters/linux_perf_events.h @@ -0,0 +1,100 @@ +// https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h +#pragma once +#ifdef __linux__ + +#include // for __NR_perf_event_open +#include // for perf event constants +#include // for ioctl +#include // for syscall + +#include // for errno +#include // for memset +#include +#include +#include + +template +class LinuxEvents { + int fd; + bool working; + perf_event_attr attribs{}; + size_t num_events{}; + std::vector temp_result_vec{}; + std::vector ids{}; + + public: + explicit LinuxEvents(std::vector config_vec) : fd(0), working(true) { + memset(&attribs, 0, sizeof(attribs)); + attribs.type = TYPE; + attribs.size = sizeof(attribs); + attribs.disabled = 1; + attribs.exclude_kernel = 1; + attribs.exclude_hv = 1; + + attribs.sample_period = 0; + attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; + const int pid = 0; // the current process + const int cpu = -1; // all CPUs + const unsigned long flags = 0; + + int group = -1; // no group + num_events = config_vec.size(); + ids.resize(config_vec.size()); + uint32_t i = 0; + for (auto config : config_vec) { + attribs.config = config; + fd = static_cast( + syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags)); + if (fd == -1) { + report_error("perf_event_open"); + } + ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]); + if (group == -1) { + group = fd; + } + } + + temp_result_vec.resize(num_events * 2 + 1); + } + + ~LinuxEvents() { + if (fd != -1) { + close(fd); + } + } + + inline void start() { + if (fd != -1) { + if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) { + report_error("ioctl(PERF_EVENT_IOC_RESET)"); + } + + if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) { + report_error("ioctl(PERF_EVENT_IOC_ENABLE)"); + } + } + } + + inline void end(std::vector &results) { + if (fd != -1) { + if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) { + report_error("ioctl(PERF_EVENT_IOC_DISABLE)"); + } + + if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) { + report_error("read"); + } + } + // our actual results are in slots 1,3,5, ... of this structure + // we really should be checking our ids obtained earlier to be safe + for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) { + results[i / 2] = temp_result_vec[i]; + } + } + + bool is_working() { return working; } + + private: + void report_error(const std::string &) { working = false; } +}; +#endif diff --git a/benchmarks/to_ascii.cpp b/benchmarks/to_ascii.cpp new file mode 100644 index 0000000..da0f090 --- /dev/null +++ b/benchmarks/to_ascii.cpp @@ -0,0 +1,76 @@ +#include "ada/idna/to_ascii.h" + +#include +#include +#include + +#include "idna.h" +#include "performancecounters/event_counter.h" +event_collector collector; +size_t N = 1000; + +#include + +std::string inputs[] = { + "-x.xn--zca", + "xn--zca.xn--zca", + "xn--mgba3gch31f060k", + "xn--1ch", +}; + +double inputs_total_byte = []() -> double { + size_t bytes{0}; + for (std::string& url_string : inputs) { + bytes += url_string.size(); + } + return double(bytes); +}(); + +static void Ada(benchmark::State& state) { + for (auto _ : state) { + for (std::string& url_string : inputs) { + benchmark::DoNotOptimize(ada::idna::to_ascii(url_string)); + } + } + + if (collector.has_events()) { + event_aggregate aggregate{}; + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + for (std::string& url_string : inputs) { + benchmark::DoNotOptimize(ada::idna::to_ascii(url_string)); + } + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + } + state.counters["cycles/url"] = aggregate.best.cycles() / std::size(inputs); + state.counters["instructions/url"] = + aggregate.best.instructions() / std::size(inputs); + state.counters["instructions/cycle"] = + aggregate.best.instructions() / aggregate.best.cycles(); + state.counters["instructions/byte"] = + aggregate.best.instructions() / inputs_total_byte; + state.counters["instructions/ns"] = + aggregate.best.instructions() / aggregate.best.elapsed_ns(); + state.counters["GHz"] = + aggregate.best.cycles() / aggregate.best.elapsed_ns(); + state.counters["ns/url"] = aggregate.best.elapsed_ns() / std::size(inputs); + state.counters["cycle/byte"] = aggregate.best.cycles() / inputs_total_byte; + } + state.counters["time/byte"] = benchmark::Counter( + inputs_total_byte, benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["time/url"] = benchmark::Counter( + double(std::size(inputs)), benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["speed"] = benchmark::Counter( + inputs_total_byte, benchmark::Counter::kIsIterationInvariantRate); + state.counters["url/s"] = benchmark::Counter( + double(std::size(inputs)), benchmark::Counter::kIsIterationInvariantRate); +} + +BENCHMARK(Ada); + +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/cmake/import.cmake b/cmake/import.cmake index 43c72a0..382f1eb 100644 --- a/cmake/import.cmake +++ b/cmake/import.cmake @@ -1,6 +1,6 @@ -set(dep_root "${ada_SOURCE_DIR}/dependencies/.cache") -if(DEFINED ENV{ada_DEPENDENCY_CACHE_DIR}) - set(dep_root "$ENV{ada_DEPENDENCY_CACHE_DIR}") +set(dep_root "${ada-idna_SOURCE_DIR}/dependencies/.cache") +if(DEFINED ENV{ada-idna_DEPENDENCY_CACHE_DIR}) + set(dep_root "$ENV{ada-idna_DEPENDENCY_CACHE_DIR}") endif() function(import_dependency NAME GITHUB_REPO COMMIT) diff --git a/tests/to_ascii_alternating.txt b/tests/fixtures/to_ascii_alternating.txt similarity index 100% rename from tests/to_ascii_alternating.txt rename to tests/fixtures/to_ascii_alternating.txt diff --git a/tests/to_ascii_invalid.txt b/tests/fixtures/to_ascii_invalid.txt similarity index 100% rename from tests/to_ascii_invalid.txt rename to tests/fixtures/to_ascii_invalid.txt diff --git a/tests/utf8_punycode_alternating.txt b/tests/fixtures/utf8_punycode_alternating.txt similarity index 100% rename from tests/utf8_punycode_alternating.txt rename to tests/fixtures/utf8_punycode_alternating.txt diff --git a/tests/punycode_tests.cpp b/tests/punycode_tests.cpp index a18e509..4604941 100644 --- a/tests/punycode_tests.cpp +++ b/tests/punycode_tests.cpp @@ -91,7 +91,7 @@ bool test(std::string ut8_string, std::string puny_string) { } int main(int argc, char** argv) { - std::string filename = "utf8_punycode_alternating.txt"; + std::string filename = "fixtures/utf8_punycode_alternating.txt"; if (argc > 1) { filename = argv[1]; } diff --git a/tests/to_ascii_tests.cpp b/tests/to_ascii_tests.cpp index 9f990b6..60314fe 100644 --- a/tests/to_ascii_tests.cpp +++ b/tests/to_ascii_tests.cpp @@ -55,14 +55,20 @@ bool test(std::string ut8_string, std::string puny_string) { } bool special_cases() { - if(!ada::idna::to_ascii("\u00AD").empty()) { return false; } - if(!ada::idna::to_ascii("\xef\xbf\xbd.com").empty()) { return false; } + if (!ada::idna::to_ascii("\u00AD").empty()) { + return false; + } + if (!ada::idna::to_ascii("\xef\xbf\xbd.com").empty()) { + return false; + } return true; } int main(int argc, char** argv) { - if(!special_cases()) { return EXIT_FAILURE; } - std::string filename = "to_ascii_alternating.txt"; + if (!special_cases()) { + return EXIT_FAILURE; + } + std::string filename = "fixtures/to_ascii_alternating.txt"; if (argc > 1) { filename = argv[1]; } @@ -79,7 +85,7 @@ int main(int argc, char** argv) { return EXIT_FAILURE; } } - filename = "to_ascii_invalid.txt"; + filename = "fixtures/to_ascii_invalid.txt"; if (argc > 2) { filename = argv[2]; }