103 lines
2.5 KiB
C++
103 lines
2.5 KiB
C++
|
|
#include "measure_time.h"
|
|
|
|
#include <atomic>
|
|
#include <cstdint>
|
|
#include <thread>
|
|
#include <ext/concurrence.h>
|
|
|
|
#define COUNT (1u << 24)
|
|
|
|
// use "incl" for 32-bit and "incq" for 64-bit in asm below:
|
|
// use 32-bit for now: _Atomic_word should be 32-bit too;
|
|
// 64-bit is basically as fast as 32-bit if it fits in your cache
|
|
typedef std::uint32_t counting;
|
|
|
|
volatile counting __attribute__ ((aligned (64))) volatile_counter;
|
|
counting __attribute__ ((aligned (64))) simple_counter;
|
|
std::atomic<counting> __attribute__ ((aligned (64))) atomic_counter;
|
|
|
|
_Atomic_word __attribute__ ((aligned (64))) gxx_counter;
|
|
|
|
void benchmark_volatile() {
|
|
// should be the same as asm below
|
|
volatile_counter = 0;
|
|
for (size_t i = 0; i < COUNT; ++i) {
|
|
++volatile_counter;
|
|
}
|
|
}
|
|
|
|
void benchmark_asm() {
|
|
simple_counter = 0;
|
|
for (size_t i = 0; i < COUNT; ++i) {
|
|
asm volatile(
|
|
"incl %0;"
|
|
: "+m"(simple_counter)
|
|
);
|
|
}
|
|
}
|
|
|
|
void benchmark_local_atomic() {
|
|
// compiler doesn't realize no one else can read this counter and
|
|
// uses real atomic (i.e. "lock")
|
|
std::atomic<counting> local_atomic_counter;
|
|
local_atomic_counter = 0;
|
|
for (size_t i = 0; i < COUNT; ++i) {
|
|
++local_atomic_counter;
|
|
}
|
|
}
|
|
|
|
void benchmark_atomic() {
|
|
// should be the same as "lock_asm"
|
|
atomic_counter = 0;
|
|
for (size_t i = 0; i < COUNT; ++i) {
|
|
++atomic_counter;
|
|
}
|
|
}
|
|
|
|
void benchmark_lock_asm() {
|
|
simple_counter = 0;
|
|
for (size_t i = 0; i < COUNT; ++i) {
|
|
asm volatile(
|
|
"lock incl %0;"
|
|
: "+m"(simple_counter)
|
|
);
|
|
}
|
|
}
|
|
|
|
void benchmark_gxx_atomic() {
|
|
// if linked with pthread this will use "lock" same as atomic, otherwise
|
|
// it uses a simple non-volatile non-atomic add instruction
|
|
// https://gcc.gnu.org/onlinedocs/libstdc++/manual/ext_concurrency.html
|
|
// claims __atomic_add_dispatch uses a volatile counter, but this isn't
|
|
// true in gcc 4.9
|
|
gxx_counter = 0;
|
|
for (size_t i = 0; i < COUNT; ++i) {
|
|
__gnu_cxx::__atomic_add_dispatch(&gxx_counter, 1);
|
|
}
|
|
}
|
|
|
|
|
|
int main() {
|
|
// std::thread fake([] { return; });
|
|
// fake.join();
|
|
|
|
std::cout << "volatile counter : ";
|
|
measure_time_with_dry_run(benchmark_volatile);
|
|
|
|
std::cout << "asm counter : ";
|
|
measure_time_with_dry_run(benchmark_asm);
|
|
|
|
std::cout << "local atomic counter: ";
|
|
measure_time_with_dry_run(benchmark_local_atomic);
|
|
|
|
std::cout << "atomic counter : ";
|
|
measure_time_with_dry_run(benchmark_atomic);
|
|
|
|
std::cout << "lock asm counter : ";
|
|
measure_time_with_dry_run(benchmark_lock_asm);
|
|
|
|
std::cout << "gxx atomic counter : ";
|
|
measure_time_with_dry_run(benchmark_gxx_atomic);
|
|
}
|