mirror of
https://git.proxmox.com/git/qemu
synced 2025-08-14 10:31:18 +00:00
add a header file for atomic operations
We're already using them in several places, but __sync builtins are just too ugly to type, and do not provide seqcst load/store operations. Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
22fc860b0a
commit
5444e768ee
352
docs/atomics.txt
Normal file
352
docs/atomics.txt
Normal file
@ -0,0 +1,352 @@
|
|||||||
|
CPUs perform independent memory operations effectively in random order.
|
||||||
|
but this can be a problem for CPU-CPU interaction (including interactions
|
||||||
|
between QEMU and the guest). Multi-threaded programs use various tools
|
||||||
|
to instruct the compiler and the CPU to restrict the order to something
|
||||||
|
that is consistent with the expectations of the programmer.
|
||||||
|
|
||||||
|
The most basic tool is locking. Mutexes, condition variables and
|
||||||
|
semaphores are used in QEMU, and should be the default approach to
|
||||||
|
synchronization. Anything else is considerably harder, but it's
|
||||||
|
also justified more often than one would like. The two tools that
|
||||||
|
are provided by qemu/atomic.h are memory barriers and atomic operations.
|
||||||
|
|
||||||
|
Macros defined by qemu/atomic.h fall in three camps:
|
||||||
|
|
||||||
|
- compiler barriers: barrier();
|
||||||
|
|
||||||
|
- weak atomic access and manual memory barriers: atomic_read(),
|
||||||
|
atomic_set(), smp_rmb(), smp_wmb(), smp_mb(), smp_read_barrier_depends();
|
||||||
|
|
||||||
|
- sequentially consistent atomic access: everything else.
|
||||||
|
|
||||||
|
|
||||||
|
COMPILER MEMORY BARRIER
|
||||||
|
=======================
|
||||||
|
|
||||||
|
barrier() prevents the compiler from moving the memory accesses either
|
||||||
|
side of it to the other side. The compiler barrier has no direct effect
|
||||||
|
on the CPU, which may then reorder things however it wishes.
|
||||||
|
|
||||||
|
barrier() is mostly used within qemu/atomic.h itself. On some
|
||||||
|
architectures, CPU guarantees are strong enough that blocking compiler
|
||||||
|
optimizations already ensures the correct order of execution. In this
|
||||||
|
case, qemu/atomic.h will reduce stronger memory barriers to simple
|
||||||
|
compiler barriers.
|
||||||
|
|
||||||
|
Still, barrier() can be useful when writing code that can be interrupted
|
||||||
|
by signal handlers.
|
||||||
|
|
||||||
|
|
||||||
|
SEQUENTIALLY CONSISTENT ATOMIC ACCESS
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
Most of the operations in the qemu/atomic.h header ensure *sequential
|
||||||
|
consistency*, where "the result of any execution is the same as if the
|
||||||
|
operations of all the processors were executed in some sequential order,
|
||||||
|
and the operations of each individual processor appear in this sequence
|
||||||
|
in the order specified by its program".
|
||||||
|
|
||||||
|
qemu/atomic.h provides the following set of atomic read-modify-write
|
||||||
|
operations:
|
||||||
|
|
||||||
|
void atomic_inc(ptr)
|
||||||
|
void atomic_dec(ptr)
|
||||||
|
void atomic_add(ptr, val)
|
||||||
|
void atomic_sub(ptr, val)
|
||||||
|
void atomic_and(ptr, val)
|
||||||
|
void atomic_or(ptr, val)
|
||||||
|
|
||||||
|
typeof(*ptr) atomic_fetch_inc(ptr)
|
||||||
|
typeof(*ptr) atomic_fetch_dec(ptr)
|
||||||
|
typeof(*ptr) atomic_fetch_add(ptr, val)
|
||||||
|
typeof(*ptr) atomic_fetch_sub(ptr, val)
|
||||||
|
typeof(*ptr) atomic_fetch_and(ptr, val)
|
||||||
|
typeof(*ptr) atomic_fetch_or(ptr, val)
|
||||||
|
typeof(*ptr) atomic_xchg(ptr, val
|
||||||
|
typeof(*ptr) atomic_cmpxchg(ptr, old, new)
|
||||||
|
|
||||||
|
all of which return the old value of *ptr. These operations are
|
||||||
|
polymorphic; they operate on any type that is as wide as an int.
|
||||||
|
|
||||||
|
Sequentially consistent loads and stores can be done using:
|
||||||
|
|
||||||
|
atomic_fetch_add(ptr, 0) for loads
|
||||||
|
atomic_xchg(ptr, val) for stores
|
||||||
|
|
||||||
|
However, they are quite expensive on some platforms, notably POWER and
|
||||||
|
ARM. Therefore, qemu/atomic.h provides two primitives with slightly
|
||||||
|
weaker constraints:
|
||||||
|
|
||||||
|
typeof(*ptr) atomic_mb_read(ptr)
|
||||||
|
void atomic_mb_set(ptr, val)
|
||||||
|
|
||||||
|
The semantics of these primitives map to Java volatile variables,
|
||||||
|
and are strongly related to memory barriers as used in the Linux
|
||||||
|
kernel (see below).
|
||||||
|
|
||||||
|
As long as you use atomic_mb_read and atomic_mb_set, accesses cannot
|
||||||
|
be reordered with each other, and it is also not possible to reorder
|
||||||
|
"normal" accesses around them.
|
||||||
|
|
||||||
|
However, and this is the important difference between
|
||||||
|
atomic_mb_read/atomic_mb_set and sequential consistency, it is important
|
||||||
|
for both threads to access the same volatile variable. It is not the
|
||||||
|
case that everything visible to thread A when it writes volatile field f
|
||||||
|
becomes visible to thread B after it reads volatile field g. The store
|
||||||
|
and load have to "match" (i.e., be performed on the same volatile
|
||||||
|
field) to achieve the right semantics.
|
||||||
|
|
||||||
|
|
||||||
|
These operations operate on any type that is as wide as an int or smaller.
|
||||||
|
|
||||||
|
|
||||||
|
WEAK ATOMIC ACCESS AND MANUAL MEMORY BARRIERS
|
||||||
|
=============================================
|
||||||
|
|
||||||
|
Compared to sequentially consistent atomic access, programming with
|
||||||
|
weaker consistency models can be considerably more complicated.
|
||||||
|
In general, if the algorithm you are writing includes both writes
|
||||||
|
and reads on the same side, it is generally simpler to use sequentially
|
||||||
|
consistent primitives.
|
||||||
|
|
||||||
|
When using this model, variables are accessed with atomic_read() and
|
||||||
|
atomic_set(), and restrictions to the ordering of accesses is enforced
|
||||||
|
using the smp_rmb(), smp_wmb(), smp_mb() and smp_read_barrier_depends()
|
||||||
|
memory barriers.
|
||||||
|
|
||||||
|
atomic_read() and atomic_set() prevents the compiler from using
|
||||||
|
optimizations that might otherwise optimize accesses out of existence
|
||||||
|
on the one hand, or that might create unsolicited accesses on the other.
|
||||||
|
In general this should not have any effect, because the same compiler
|
||||||
|
barriers are already implied by memory barriers. However, it is useful
|
||||||
|
to do so, because it tells readers which variables are shared with
|
||||||
|
other threads, and which are local to the current thread or protected
|
||||||
|
by other, more mundane means.
|
||||||
|
|
||||||
|
Memory barriers control the order of references to shared memory.
|
||||||
|
They come in four kinds:
|
||||||
|
|
||||||
|
- smp_rmb() guarantees that all the LOAD operations specified before
|
||||||
|
the barrier will appear to happen before all the LOAD operations
|
||||||
|
specified after the barrier with respect to the other components of
|
||||||
|
the system.
|
||||||
|
|
||||||
|
In other words, smp_rmb() puts a partial ordering on loads, but is not
|
||||||
|
required to have any effect on stores.
|
||||||
|
|
||||||
|
- smp_wmb() guarantees that all the STORE operations specified before
|
||||||
|
the barrier will appear to happen before all the STORE operations
|
||||||
|
specified after the barrier with respect to the other components of
|
||||||
|
the system.
|
||||||
|
|
||||||
|
In other words, smp_wmb() puts a partial ordering on stores, but is not
|
||||||
|
required to have any effect on loads.
|
||||||
|
|
||||||
|
- smp_mb() guarantees that all the LOAD and STORE operations specified
|
||||||
|
before the barrier will appear to happen before all the LOAD and
|
||||||
|
STORE operations specified after the barrier with respect to the other
|
||||||
|
components of the system.
|
||||||
|
|
||||||
|
smp_mb() puts a partial ordering on both loads and stores. It is
|
||||||
|
stronger than both a read and a write memory barrier; it implies both
|
||||||
|
smp_rmb() and smp_wmb(), but it also prevents STOREs coming before the
|
||||||
|
barrier from overtaking LOADs coming after the barrier and vice versa.
|
||||||
|
|
||||||
|
- smp_read_barrier_depends() is a weaker kind of read barrier. On
|
||||||
|
most processors, whenever two loads are performed such that the
|
||||||
|
second depends on the result of the first (e.g., the first load
|
||||||
|
retrieves the address to which the second load will be directed),
|
||||||
|
the processor will guarantee that the first LOAD will appear to happen
|
||||||
|
before the second with respect to the other components of the system.
|
||||||
|
However, this is not always true---for example, it was not true on
|
||||||
|
Alpha processors. Whenever this kind of access happens to shared
|
||||||
|
memory (that is not protected by a lock), a read barrier is needed,
|
||||||
|
and smp_read_barrier_depends() can be used instead of smp_rmb().
|
||||||
|
|
||||||
|
Note that the first load really has to have a _data_ dependency and not
|
||||||
|
a control dependency. If the address for the second load is dependent
|
||||||
|
on the first load, but the dependency is through a conditional rather
|
||||||
|
than actually loading the address itself, then it's a _control_
|
||||||
|
dependency and a full read barrier or better is required.
|
||||||
|
|
||||||
|
|
||||||
|
This is the set of barriers that is required *between* two atomic_read()
|
||||||
|
and atomic_set() operations to achieve sequential consistency:
|
||||||
|
|
||||||
|
| 2nd operation |
|
||||||
|
|-----------------------------------------|
|
||||||
|
1st operation | (after last) | atomic_read | atomic_set |
|
||||||
|
---------------+--------------+-------------+------------|
|
||||||
|
(before first) | | none | smp_wmb() |
|
||||||
|
---------------+--------------+-------------+------------|
|
||||||
|
atomic_read | smp_rmb() | smp_rmb()* | ** |
|
||||||
|
---------------+--------------+-------------+------------|
|
||||||
|
atomic_set | none | smp_mb()*** | smp_wmb() |
|
||||||
|
---------------+--------------+-------------+------------|
|
||||||
|
|
||||||
|
* Or smp_read_barrier_depends().
|
||||||
|
|
||||||
|
** This requires a load-store barrier. How to achieve this varies
|
||||||
|
depending on the machine, but in practice smp_rmb()+smp_wmb()
|
||||||
|
should have the desired effect. For example, on PowerPC the
|
||||||
|
lwsync instruction is a combined load-load, load-store and
|
||||||
|
store-store barrier.
|
||||||
|
|
||||||
|
*** This requires a store-load barrier. On most machines, the only
|
||||||
|
way to achieve this is a full barrier.
|
||||||
|
|
||||||
|
|
||||||
|
You can see that the two possible definitions of atomic_mb_read()
|
||||||
|
and atomic_mb_set() are the following:
|
||||||
|
|
||||||
|
1) atomic_mb_read(p) = atomic_read(p); smp_rmb()
|
||||||
|
atomic_mb_set(p, v) = smp_wmb(); atomic_set(p, v); smp_mb()
|
||||||
|
|
||||||
|
2) atomic_mb_read(p) = smp_mb() atomic_read(p); smp_rmb()
|
||||||
|
atomic_mb_set(p, v) = smp_wmb(); atomic_set(p, v);
|
||||||
|
|
||||||
|
Usually the former is used, because smp_mb() is expensive and a program
|
||||||
|
normally has more reads than writes. Therefore it makes more sense to
|
||||||
|
make atomic_mb_set() the more expensive operation.
|
||||||
|
|
||||||
|
There are two common cases in which atomic_mb_read and atomic_mb_set
|
||||||
|
generate too many memory barriers, and thus it can be useful to manually
|
||||||
|
place barriers instead:
|
||||||
|
|
||||||
|
- when a data structure has one thread that is always a writer
|
||||||
|
and one thread that is always a reader, manual placement of
|
||||||
|
memory barriers makes the write side faster. Furthermore,
|
||||||
|
correctness is easy to check for in this case using the "pairing"
|
||||||
|
trick that is explained below:
|
||||||
|
|
||||||
|
thread 1 thread 1
|
||||||
|
------------------------- ------------------------
|
||||||
|
(other writes)
|
||||||
|
smp_wmb()
|
||||||
|
atomic_mb_set(&a, x) atomic_set(&a, x)
|
||||||
|
smp_wmb()
|
||||||
|
atomic_mb_set(&b, y) atomic_set(&b, y)
|
||||||
|
|
||||||
|
=>
|
||||||
|
thread 2 thread 2
|
||||||
|
------------------------- ------------------------
|
||||||
|
y = atomic_mb_read(&b) y = atomic_read(&b)
|
||||||
|
smp_rmb()
|
||||||
|
x = atomic_mb_read(&a) x = atomic_read(&a)
|
||||||
|
smp_rmb()
|
||||||
|
|
||||||
|
- sometimes, a thread is accessing many variables that are otherwise
|
||||||
|
unrelated to each other (for example because, apart from the current
|
||||||
|
thread, exactly one other thread will read or write each of these
|
||||||
|
variables). In this case, it is possible to "hoist" the implicit
|
||||||
|
barriers provided by atomic_mb_read() and atomic_mb_set() outside
|
||||||
|
a loop. For example, the above definition atomic_mb_read() gives
|
||||||
|
the following transformation:
|
||||||
|
|
||||||
|
n = 0; n = 0;
|
||||||
|
for (i = 0; i < 10; i++) => for (i = 0; i < 10; i++)
|
||||||
|
n += atomic_mb_read(&a[i]); n += atomic_read(&a[i]);
|
||||||
|
smp_rmb();
|
||||||
|
|
||||||
|
Similarly, atomic_mb_set() can be transformed as follows:
|
||||||
|
smp_mb():
|
||||||
|
|
||||||
|
smp_wmb();
|
||||||
|
for (i = 0; i < 10; i++) => for (i = 0; i < 10; i++)
|
||||||
|
atomic_mb_set(&a[i], false); atomic_set(&a[i], false);
|
||||||
|
smp_mb();
|
||||||
|
|
||||||
|
|
||||||
|
The two tricks can be combined. In this case, splitting a loop in
|
||||||
|
two lets you hoist the barriers out of the loops _and_ eliminate the
|
||||||
|
expensive smp_mb():
|
||||||
|
|
||||||
|
smp_wmb();
|
||||||
|
for (i = 0; i < 10; i++) { => for (i = 0; i < 10; i++)
|
||||||
|
atomic_mb_set(&a[i], false); atomic_set(&a[i], false);
|
||||||
|
atomic_mb_set(&b[i], false); smb_wmb();
|
||||||
|
} for (i = 0; i < 10; i++)
|
||||||
|
atomic_set(&a[i], false);
|
||||||
|
smp_mb();
|
||||||
|
|
||||||
|
The other thread can still use atomic_mb_read()/atomic_mb_set()
|
||||||
|
|
||||||
|
|
||||||
|
Memory barrier pairing
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
A useful rule of thumb is that memory barriers should always, or almost
|
||||||
|
always, be paired with another barrier. In the case of QEMU, however,
|
||||||
|
note that the other barrier may actually be in a driver that runs in
|
||||||
|
the guest!
|
||||||
|
|
||||||
|
For the purposes of pairing, smp_read_barrier_depends() and smp_rmb()
|
||||||
|
both count as read barriers. A read barriers shall pair with a write
|
||||||
|
barrier or a full barrier; a write barrier shall pair with a read
|
||||||
|
barrier or a full barrier. A full barrier can pair with anything.
|
||||||
|
For example:
|
||||||
|
|
||||||
|
thread 1 thread 2
|
||||||
|
=============== ===============
|
||||||
|
a = 1;
|
||||||
|
smp_wmb();
|
||||||
|
b = 2; x = b;
|
||||||
|
smp_rmb();
|
||||||
|
y = a;
|
||||||
|
|
||||||
|
Note that the "writing" thread are accessing the variables in the
|
||||||
|
opposite order as the "reading" thread. This is expected: stores
|
||||||
|
before the write barrier will normally match the loads after the
|
||||||
|
read barrier, and vice versa. The same is true for more than 2
|
||||||
|
access and for data dependency barriers:
|
||||||
|
|
||||||
|
thread 1 thread 2
|
||||||
|
=============== ===============
|
||||||
|
b[2] = 1;
|
||||||
|
smp_wmb();
|
||||||
|
x->i = 2;
|
||||||
|
smp_wmb();
|
||||||
|
a = x; x = a;
|
||||||
|
smp_read_barrier_depends();
|
||||||
|
y = x->i;
|
||||||
|
smp_read_barrier_depends();
|
||||||
|
z = b[y];
|
||||||
|
|
||||||
|
smp_wmb() also pairs with atomic_mb_read(), and smp_rmb() also pairs
|
||||||
|
with atomic_mb_set().
|
||||||
|
|
||||||
|
|
||||||
|
COMPARISON WITH LINUX KERNEL MEMORY BARRIERS
|
||||||
|
============================================
|
||||||
|
|
||||||
|
Here is a list of differences between Linux kernel atomic operations
|
||||||
|
and memory barriers, and the equivalents in QEMU:
|
||||||
|
|
||||||
|
- atomic operations in Linux are always on a 32-bit int type and
|
||||||
|
use a boxed atomic_t type; atomic operations in QEMU are polymorphic
|
||||||
|
and use normal C types.
|
||||||
|
|
||||||
|
- atomic_read and atomic_set in Linux give no guarantee at all;
|
||||||
|
atomic_read and atomic_set in QEMU include a compiler barrier
|
||||||
|
(similar to the ACCESS_ONCE macro in Linux).
|
||||||
|
|
||||||
|
- most atomic read-modify-write operations in Linux return void;
|
||||||
|
in QEMU, all of them return the old value of the variable.
|
||||||
|
|
||||||
|
- different atomic read-modify-write operations in Linux imply
|
||||||
|
a different set of memory barriers; in QEMU, all of them enforce
|
||||||
|
sequential consistency, which means they imply full memory barriers
|
||||||
|
before and after the operation.
|
||||||
|
|
||||||
|
- Linux does not have an equivalent of atomic_mb_read() and
|
||||||
|
atomic_mb_set(). In particular, note that set_mb() is a little
|
||||||
|
weaker than atomic_mb_set().
|
||||||
|
|
||||||
|
|
||||||
|
SOURCES
|
||||||
|
=======
|
||||||
|
|
||||||
|
* Documentation/memory-barriers.txt from the Linux kernel
|
||||||
|
|
||||||
|
* "The JSR-133 Cookbook for Compiler Writers", available at
|
||||||
|
http://g.oswego.edu/dl/jmm/cookbook.html
|
@ -23,6 +23,7 @@
|
|||||||
#include "qemu-common.h"
|
#include "qemu-common.h"
|
||||||
#include "qemu/timer.h"
|
#include "qemu/timer.h"
|
||||||
#include "qemu/queue.h"
|
#include "qemu/queue.h"
|
||||||
|
#include "qemu/atomic.h"
|
||||||
#include "monitor/monitor.h"
|
#include "monitor/monitor.h"
|
||||||
#include "sysemu/sysemu.h"
|
#include "sysemu/sysemu.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
@ -1726,7 +1727,7 @@ static void qxl_send_events(PCIQXLDevice *d, uint32_t events)
|
|||||||
trace_qxl_send_events_vm_stopped(d->id, events);
|
trace_qxl_send_events_vm_stopped(d->id, events);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
old_pending = __sync_fetch_and_or(&d->ram->int_pending, le_events);
|
old_pending = atomic_fetch_or(&d->ram->int_pending, le_events);
|
||||||
if ((old_pending & le_events) == le_events) {
|
if ((old_pending & le_events) == le_events) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include "hw/virtio/vhost.h"
|
#include "hw/virtio/vhost.h"
|
||||||
#include "hw/hw.h"
|
#include "hw/hw.h"
|
||||||
|
#include "qemu/atomic.h"
|
||||||
#include "qemu/range.h"
|
#include "qemu/range.h"
|
||||||
#include <linux/vhost.h>
|
#include <linux/vhost.h>
|
||||||
#include "exec/address-spaces.h"
|
#include "exec/address-spaces.h"
|
||||||
@ -47,11 +48,9 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
|
|||||||
addr += VHOST_LOG_CHUNK;
|
addr += VHOST_LOG_CHUNK;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* Data must be read atomically. We don't really
|
/* Data must be read atomically. We don't really need barrier semantics
|
||||||
* need the barrier semantics of __sync
|
* but it's easier to use atomic_* than roll our own. */
|
||||||
* builtins, but it's easier to use them than
|
log = atomic_xchg(from, 0);
|
||||||
* roll our own. */
|
|
||||||
log = __sync_fetch_and_and(from, 0);
|
|
||||||
while ((bit = sizeof(log) > sizeof(int) ?
|
while ((bit = sizeof(log) > sizeof(int) ?
|
||||||
ffsll(log) : ffs(log))) {
|
ffsll(log) : ffs(log))) {
|
||||||
hwaddr page_addr;
|
hwaddr page_addr;
|
||||||
|
@ -1,68 +1,202 @@
|
|||||||
#ifndef __QEMU_BARRIER_H
|
/*
|
||||||
#define __QEMU_BARRIER_H 1
|
* Simple interface for atomic operations.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013 Red Hat, Inc.
|
||||||
|
*
|
||||||
|
* Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
*
|
||||||
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||||
|
* See the COPYING file in the top-level directory.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __QEMU_ATOMIC_H
|
||||||
|
#define __QEMU_ATOMIC_H 1
|
||||||
|
|
||||||
|
#include "qemu/compiler.h"
|
||||||
|
|
||||||
|
/* For C11 atomic ops */
|
||||||
|
|
||||||
/* Compiler barrier */
|
/* Compiler barrier */
|
||||||
#define barrier() asm volatile("" ::: "memory")
|
#define barrier() ({ asm volatile("" ::: "memory"); (void)0; })
|
||||||
|
|
||||||
#if defined(__i386__)
|
#ifndef __ATOMIC_RELAXED
|
||||||
|
|
||||||
#include "qemu/compiler.h" /* QEMU_GNUC_PREREQ */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Because of the strongly ordered x86 storage model, wmb() and rmb() are nops
|
* We use GCC builtin if it's available, as that can use mfence on
|
||||||
* on x86(well, a compiler barrier only). Well, at least as long as
|
* 32-bit as well, e.g. if built with -march=pentium-m. However, on
|
||||||
* qemu doesn't do accesses to write-combining memory or non-temporal
|
* i386 the spec is buggy, and the implementation followed it until
|
||||||
* load/stores from C code.
|
* 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793).
|
||||||
|
*/
|
||||||
|
#if defined(__i386__) || defined(__x86_64__)
|
||||||
|
#if !QEMU_GNUC_PREREQ(4, 4)
|
||||||
|
#if defined __x86_64__
|
||||||
|
#define smp_mb() ({ asm volatile("mfence" ::: "memory"); (void)0; })
|
||||||
|
#else
|
||||||
|
#define smp_mb() ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; })
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __alpha__
|
||||||
|
#define smp_read_barrier_depends() asm volatile("mb":::"memory")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Because of the strongly ordered storage model, wmb() and rmb() are nops
|
||||||
|
* here (a compiler barrier only). QEMU doesn't do accesses to write-combining
|
||||||
|
* qemu memory or non-temporal load/stores from C code.
|
||||||
*/
|
*/
|
||||||
#define smp_wmb() barrier()
|
#define smp_wmb() barrier()
|
||||||
#define smp_rmb() barrier()
|
#define smp_rmb() barrier()
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use GCC builtin if it's available, as that can use
|
* __sync_lock_test_and_set() is documented to be an acquire barrier only,
|
||||||
* mfence on 32 bit as well, e.g. if built with -march=pentium-m.
|
* but it is a full barrier at the hardware level. Add a compiler barrier
|
||||||
* However, on i386, there seem to be known bugs as recently as 4.3.
|
* to make it a full barrier also at the compiler level.
|
||||||
* */
|
*/
|
||||||
#if QEMU_GNUC_PREREQ(4, 4)
|
#define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i))
|
||||||
#define smp_mb() __sync_synchronize()
|
|
||||||
#else
|
|
||||||
#define smp_mb() asm volatile("lock; addl $0,0(%%esp) " ::: "memory")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#elif defined(__x86_64__)
|
/*
|
||||||
|
* Load/store with Java volatile semantics.
|
||||||
#define smp_wmb() barrier()
|
*/
|
||||||
#define smp_rmb() barrier()
|
#define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i))
|
||||||
#define smp_mb() asm volatile("mfence" ::: "memory")
|
|
||||||
|
|
||||||
#elif defined(_ARCH_PPC)
|
#elif defined(_ARCH_PPC)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use an eieio() for wmb() on powerpc. This assumes we don't
|
* We use an eieio() for wmb() on powerpc. This assumes we don't
|
||||||
* need to order cacheable and non-cacheable stores with respect to
|
* need to order cacheable and non-cacheable stores with respect to
|
||||||
* each other
|
* each other.
|
||||||
|
*
|
||||||
|
* smp_mb has the same problem as on x86 for not-very-new GCC
|
||||||
|
* (http://patchwork.ozlabs.org/patch/126184/, Nov 2011).
|
||||||
*/
|
*/
|
||||||
#define smp_wmb() asm volatile("eieio" ::: "memory")
|
#define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; })
|
||||||
|
|
||||||
#if defined(__powerpc64__)
|
#if defined(__powerpc64__)
|
||||||
#define smp_rmb() asm volatile("lwsync" ::: "memory")
|
#define smp_rmb() ({ asm volatile("lwsync" ::: "memory"); (void)0; })
|
||||||
#else
|
#else
|
||||||
#define smp_rmb() asm volatile("sync" ::: "memory")
|
#define smp_rmb() ({ asm volatile("sync" ::: "memory"); (void)0; })
|
||||||
#endif
|
#endif
|
||||||
|
#define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; })
|
||||||
|
|
||||||
#define smp_mb() asm volatile("sync" ::: "memory")
|
#endif /* _ARCH_PPC */
|
||||||
|
|
||||||
#else
|
#endif /* C11 atomics */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For (host) platforms we don't have explicit barrier definitions
|
* For (host) platforms we don't have explicit barrier definitions
|
||||||
* for, we use the gcc __sync_synchronize() primitive to generate a
|
* for, we use the gcc __sync_synchronize() primitive to generate a
|
||||||
* full barrier. This should be safe on all platforms, though it may
|
* full barrier. This should be safe on all platforms, though it may
|
||||||
* be overkill for wmb() and rmb().
|
* be overkill for smp_wmb() and smp_rmb().
|
||||||
*/
|
*/
|
||||||
|
#ifndef smp_mb
|
||||||
|
#define smp_mb() __sync_synchronize()
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef smp_wmb
|
||||||
|
#ifdef __ATOMIC_RELEASE
|
||||||
|
#define smp_wmb() __atomic_thread_fence(__ATOMIC_RELEASE)
|
||||||
|
#else
|
||||||
#define smp_wmb() __sync_synchronize()
|
#define smp_wmb() __sync_synchronize()
|
||||||
#define smp_mb() __sync_synchronize()
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef smp_rmb
|
||||||
|
#ifdef __ATOMIC_ACQUIRE
|
||||||
|
#define smp_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE)
|
||||||
|
#else
|
||||||
#define smp_rmb() __sync_synchronize()
|
#define smp_rmb() __sync_synchronize()
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef smp_read_barrier_depends
|
||||||
|
#ifdef __ATOMIC_CONSUME
|
||||||
|
#define smp_read_barrier_depends() __atomic_thread_fence(__ATOMIC_CONSUME)
|
||||||
|
#else
|
||||||
|
#define smp_read_barrier_depends() barrier()
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef atomic_read
|
||||||
|
#define atomic_read(ptr) (*(__typeof__(*ptr) *volatile) (ptr))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef atomic_set
|
||||||
|
#define atomic_set(ptr, i) ((*(__typeof__(*ptr) *volatile) (ptr)) = (i))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* These have the same semantics as Java volatile variables.
|
||||||
|
* See http://gee.cs.oswego.edu/dl/jmm/cookbook.html:
|
||||||
|
* "1. Issue a StoreStore barrier (wmb) before each volatile store."
|
||||||
|
* 2. Issue a StoreLoad barrier after each volatile store.
|
||||||
|
* Note that you could instead issue one before each volatile load, but
|
||||||
|
* this would be slower for typical programs using volatiles in which
|
||||||
|
* reads greatly outnumber writes. Alternatively, if available, you
|
||||||
|
* can implement volatile store as an atomic instruction (for example
|
||||||
|
* XCHG on x86) and omit the barrier. This may be more efficient if
|
||||||
|
* atomic instructions are cheaper than StoreLoad barriers.
|
||||||
|
* 3. Issue LoadLoad and LoadStore barriers after each volatile load."
|
||||||
|
*
|
||||||
|
* If you prefer to think in terms of "pairing" of memory barriers,
|
||||||
|
* an atomic_mb_read pairs with an atomic_mb_set.
|
||||||
|
*
|
||||||
|
* And for the few ia64 lovers that exist, an atomic_mb_read is a ld.acq,
|
||||||
|
* while an atomic_mb_set is a st.rel followed by a memory barrier.
|
||||||
|
*
|
||||||
|
* These are a bit weaker than __atomic_load/store with __ATOMIC_SEQ_CST
|
||||||
|
* (see docs/atomics.txt), and I'm not sure that __ATOMIC_ACQ_REL is enough.
|
||||||
|
* Just always use the barriers manually by the rules above.
|
||||||
|
*/
|
||||||
|
#ifndef atomic_mb_read
|
||||||
|
#define atomic_mb_read(ptr) ({ \
|
||||||
|
typeof(*ptr) _val = atomic_read(ptr); \
|
||||||
|
smp_rmb(); \
|
||||||
|
_val; \
|
||||||
|
})
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef atomic_mb_set
|
||||||
|
#define atomic_mb_set(ptr, i) do { \
|
||||||
|
smp_wmb(); \
|
||||||
|
atomic_set(ptr, i); \
|
||||||
|
smp_mb(); \
|
||||||
|
} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef atomic_xchg
|
||||||
|
#ifdef __ATOMIC_SEQ_CST
|
||||||
|
#define atomic_xchg(ptr, i) ({ \
|
||||||
|
typeof(*ptr) _new = (i), _old; \
|
||||||
|
__atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \
|
||||||
|
_old; \
|
||||||
|
})
|
||||||
|
#elif defined __clang__
|
||||||
|
#define atomic_xchg(ptr, i) __sync_exchange(ptr, i)
|
||||||
|
#else
|
||||||
|
/* __sync_lock_test_and_set() is documented to be an acquire barrier only. */
|
||||||
|
#define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i))
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Provide shorter names for GCC atomic builtins. */
|
||||||
|
#define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1)
|
||||||
|
#define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1)
|
||||||
|
#define atomic_fetch_add __sync_fetch_and_add
|
||||||
|
#define atomic_fetch_sub __sync_fetch_and_sub
|
||||||
|
#define atomic_fetch_and __sync_fetch_and_and
|
||||||
|
#define atomic_fetch_or __sync_fetch_and_or
|
||||||
|
#define atomic_cmpxchg __sync_val_compare_and_swap
|
||||||
|
|
||||||
|
/* And even shorter names that return void. */
|
||||||
|
#define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1))
|
||||||
|
#define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1))
|
||||||
|
#define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n))
|
||||||
|
#define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n))
|
||||||
|
#define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n))
|
||||||
|
#define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n))
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -293,8 +293,7 @@ static void migrate_fd_cleanup(void *opaque)
|
|||||||
|
|
||||||
static void migrate_finish_set_state(MigrationState *s, int new_state)
|
static void migrate_finish_set_state(MigrationState *s, int new_state)
|
||||||
{
|
{
|
||||||
if (__sync_val_compare_and_swap(&s->state, MIG_STATE_ACTIVE,
|
if (atomic_cmpxchg(&s->state, MIG_STATE_ACTIVE, new_state) == new_state) {
|
||||||
new_state) == new_state) {
|
|
||||||
trace_migrate_set_state(new_state);
|
trace_migrate_set_state(new_state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,15 +17,15 @@ typedef struct {
|
|||||||
static int worker_cb(void *opaque)
|
static int worker_cb(void *opaque)
|
||||||
{
|
{
|
||||||
WorkerTestData *data = opaque;
|
WorkerTestData *data = opaque;
|
||||||
return __sync_fetch_and_add(&data->n, 1);
|
return atomic_fetch_inc(&data->n);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int long_cb(void *opaque)
|
static int long_cb(void *opaque)
|
||||||
{
|
{
|
||||||
WorkerTestData *data = opaque;
|
WorkerTestData *data = opaque;
|
||||||
__sync_fetch_and_add(&data->n, 1);
|
atomic_inc(&data->n);
|
||||||
g_usleep(2000000);
|
g_usleep(2000000);
|
||||||
__sync_fetch_and_add(&data->n, 1);
|
atomic_inc(&data->n);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -169,7 +169,7 @@ static void test_cancel(void)
|
|||||||
/* Cancel the jobs that haven't been started yet. */
|
/* Cancel the jobs that haven't been started yet. */
|
||||||
num_canceled = 0;
|
num_canceled = 0;
|
||||||
for (i = 0; i < 100; i++) {
|
for (i = 0; i < 100; i++) {
|
||||||
if (__sync_val_compare_and_swap(&data[i].n, 0, 3) == 0) {
|
if (atomic_cmpxchg(&data[i].n, 0, 3) == 0) {
|
||||||
data[i].ret = -ECANCELED;
|
data[i].ret = -ECANCELED;
|
||||||
bdrv_aio_cancel(data[i].aiocb);
|
bdrv_aio_cancel(data[i].aiocb);
|
||||||
active--;
|
active--;
|
||||||
|
Loading…
Reference in New Issue
Block a user