#include "measure_time.h" #include #include #include #include #define COUNT (1u << 24) // use "incl" for 32-bit and "incq" for 64-bit in asm below: // use 32-bit for now: _Atomic_word should be 32-bit too; // 64-bit is basically as fast as 32-bit if it fits in your cache typedef std::uint32_t counting; volatile counting __attribute__ ((aligned (64))) volatile_counter; counting __attribute__ ((aligned (64))) simple_counter; std::atomic __attribute__ ((aligned (64))) atomic_counter; _Atomic_word __attribute__ ((aligned (64))) gxx_counter; void benchmark_volatile() { // should be the same as asm below volatile_counter = 0; for (size_t i = 0; i < COUNT; ++i) { ++volatile_counter; } } void benchmark_asm() { simple_counter = 0; for (size_t i = 0; i < COUNT; ++i) { asm volatile( "incl %0;" : "+m"(simple_counter) ); } } void benchmark_local_atomic() { // compiler doesn't realize no one else can read this counter and // uses real atomic (i.e. "lock") std::atomic local_atomic_counter; local_atomic_counter = 0; for (size_t i = 0; i < COUNT; ++i) { ++local_atomic_counter; } } void benchmark_atomic() { // should be the same as "lock_asm" atomic_counter = 0; for (size_t i = 0; i < COUNT; ++i) { ++atomic_counter; } } void benchmark_lock_asm() { simple_counter = 0; for (size_t i = 0; i < COUNT; ++i) { asm volatile( "lock incl %0;" : "+m"(simple_counter) ); } } void benchmark_gxx_atomic() { // if linked with pthread this will use "lock" same as atomic, otherwise // it uses a simple non-volatile non-atomic add instruction // https://gcc.gnu.org/onlinedocs/libstdc++/manual/ext_concurrency.html // claims __atomic_add_dispatch uses a volatile counter, but this isn't // true in gcc 4.9 gxx_counter = 0; for (size_t i = 0; i < COUNT; ++i) { __gnu_cxx::__atomic_add_dispatch(&gxx_counter, 1); } } int main() { // std::thread fake([] { return; }); // fake.join(); std::cout << "volatile counter : "; measure_time_with_dry_run(benchmark_volatile); std::cout << "asm counter : "; measure_time_with_dry_run(benchmark_asm); std::cout << "local atomic counter: "; measure_time_with_dry_run(benchmark_local_atomic); std::cout << "atomic counter : "; measure_time_with_dry_run(benchmark_atomic); std::cout << "lock asm counter : "; measure_time_with_dry_run(benchmark_lock_asm); std::cout << "gxx atomic counter : "; measure_time_with_dry_run(benchmark_gxx_atomic); }