您的位置：首页 > 编程语言

x86平台读取cpu支持sse2指令集的代码，以及原子操作的代码

2014-01-16 16:54 381 查看

// This module gets enough CPU information to optimize the

// atomicops module on x86.

#include <string.h>

#include "base/atomicops.h"

#include "base/basictypes.h"

// This file only makes sense with atomicops_internals_x86_gcc.h -- it

// depends on structs that are defined in that file. If atomicops.h

// doesn't sub-include that file, then we aren't needed, and shouldn't

// try to do anything.

#ifdef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_

// Inline cpuid instruction. In PIC compilations, %ebx contains the address

// of the global offset table. To avoid breaking such executables, this code

// must preserve that register's value across cpuid instructions.

#if defined(__i386__)

#define cpuid(a, b, c, d, inp) \

asm ("mov %%ebx, %%edi\n"    \

       "cpuid\n"               \

       "xchg %%edi, %%ebx\n"   \

       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))

#elif defined (__x86_64__)

#define cpuid(a, b, c, d, inp) \

asm ("mov %%rbx, %%rdi\n"    \

       "cpuid\n"               \

       "xchg %%rdi, %%rbx\n"   \

       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))

#endif

#if defined(cpuid)        // initialize the struct only on x86

// Set the flags so that code will run correctly and conservatively, so even

// if we haven't been initialized yet, we're probably single threaded, and our

// default values should hopefully be pretty safe.

struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures = {

false,          // bug can't exist before process spawns multiple threads

false,          // no SSE2

};

// Initialize the AtomicOps_Internalx86CPUFeatures struct.

static void AtomicOps_Internalx86CPUFeaturesInit()

{

uint32 eax;

uint32 ebx;

uint32 ecx;

uint32 edx;

// Get vendor string (issue CPUID with eax = 0)

cpuid(eax, ebx, ecx, edx, 0);

char vendor[13];

memcpy(vendor, &ebx, 4);

memcpy(vendor + 4, &edx, 4);

memcpy(vendor + 8, &ecx, 4);

vendor[12] = 0;

// get feature flags in ecx/edx, and family/model in eax

cpuid(eax, ebx, ecx, edx, 1);

int family = (eax >> 8) & 0xf;        // family and model fields

int model = (eax >> 4) & 0xf;

if (family == 0xf) {                  // use extended family and model fields

    family += (eax >> 20) & 0xff;

    model += ((eax >> 16) & 0xf) << 4;

}

// Opteron Rev E has a bug in which on very rare occasions a locked

// instruction doesn't act as a read-acquire barrier if followed by a

// non-locked read-modify-write instruction. Rev F has this bug in

// pre-release versions, but not in versions released to customers,

// so we test only for Rev E, which is family 15, model 32..63 inclusive.

if (strcmp(vendor, "AuthenticAMD") == 0 &&       // AMD

      family == 15 &&

      32 <= model && model <= 63) {

    AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = true;

} else {

    AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = false;

}

// edx bit 26 is SSE2 which we use to tell use whether we can use mfence

AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1);

}

namespace

{

class AtomicOpsx86Initializer

{

public:



AtomicOpsx86Initializer()

{

    AtomicOps_Internalx86CPUFeaturesInit();

}

};

// A global to get use initialized on startup via static initialization :/

AtomicOpsx86Initializer g_initer;

} // namespace

#endif // if x86

#endif // ifdef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_

// This file is an internal atomic implementation, use base/atomicops.h instead.

#ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_

#define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_

#include "base/base_export.h"

// This struct is not part of the public API of this module; clients may not

// use it. (However, it's exported via BASE_EXPORT because clients implicitly

// do use it at link time by inlining these functions.)

// Features of this x86. Values may not be correct before main() is run,

// but are set conservatively.

struct AtomicOps_x86CPUFeatureStruct

{

bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence

                            // after acquire compare-and-swap.

bool has_sse2;            // Processor has SSE2.

};

BASE_EXPORT extern struct AtomicOps_x86CPUFeatureStruct

    AtomicOps_Internalx86CPUFeatures;

#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")

namespace base

{

namespace subtle

{

// 32-bit low-level operations on any platform.

inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,

                                         Atomic32 old_value,

                                         Atomic32 new_value)

{

Atomic32 prev;

__asm__ __volatile__("lock; cmpxchgl %1,%2"

                       : "=a" (prev)

                       : "q" (new_value), "m" (*ptr), "0" (old_value)

                       : "memory");

return prev;

}

inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,

                                         Atomic32 new_value) {

__asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.

                       : "=r" (new_value)

                       : "m" (*ptr), "0" (new_value)

                       : "memory");

return new_value; // Now it's the previous value.

}

inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,

                                          Atomic32 increment) {

Atomic32 temp = increment;

__asm__ __volatile__("lock; xaddl %0,%1"

                       : "+r" (temp), "+m" (*ptr)

                       : : "memory");

// temp now holds the old value of *ptr

return temp + increment;

}

inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,

                                        Atomic32 increment) {

Atomic32 temp = increment;

__asm__ __volatile__("lock; xaddl %0,%1"

                       : "+r" (temp), "+m" (*ptr)

                       : : "memory");

// temp now holds the old value of *ptr

if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

    __asm__ __volatile__("lfence" : : : "memory");

}

return temp + increment;

}

inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,

                                       Atomic32 old_value,

                                       Atomic32 new_value) {

Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);

if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug)

{

    __asm__ __volatile__("lfence" : : : "memory");

}

return x;

}

inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,

                                       Atomic32 old_value,

                                       Atomic32 new_value)

{

return NoBarrier_CompareAndSwap(ptr, old_value, new_value);

}

inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value)

{

*ptr = value;

}

#if defined(__x86_64__)

// 64-bit implementations of memory barrier can be simpler, because it

// "mfence" is guaranteed to exist.

inline void MemoryBarrier()

{

__asm__ __volatile__("mfence" : : : "memory");

}

inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value)

{

*ptr = value;

MemoryBarrier();

}

#else

inline void MemoryBarrier()

{

if (AtomicOps_Internalx86CPUFeatures.has_sse2)

{

    __asm__ __volatile__("mfence" : : : "memory");

} else { // mfence is faster but not present on PIII

    Atomic32 x = 0;

    NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII

}

}

inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {

if (AtomicOps_Internalx86CPUFeatures.has_sse2) {

    *ptr = value;

    __asm__ __volatile__("mfence" : : : "memory");

} else {

    NoBarrier_AtomicExchange(ptr, value);

                          // acts as a barrier on PIII

}

}

#endif

inline void Release_Store(volatile Atomic32* ptr, Atomic32 value)

{

ATOMICOPS_COMPILER_BARRIER();

*ptr = value; // An x86 store acts as a release barrier.

// See comments in Atomic64 version of Release_Store(), below.

}

inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {

return *ptr;

}

inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {

Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.

// See comments in Atomic64 version of Release_Store(), below.

ATOMICOPS_COMPILER_BARRIER();

return value;

}

inline Atomic32 Release_Load(volatile const Atomic32* ptr) {

MemoryBarrier();

return *ptr;

}

#if defined(__x86_64__)

// 64-bit low-level operations on 64-bit platform.

inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,

                                         Atomic64 old_value,

                                         Atomic64 new_value) {

Atomic64 prev;

__asm__ __volatile__("lock; cmpxchgq %1,%2"

                       : "=a" (prev)

                       : "q" (new_value), "m" (*ptr), "0" (old_value)

                       : "memory");

return prev;

}

inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,

                                         Atomic64 new_value) {

__asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.

                       : "=r" (new_value)

                       : "m" (*ptr), "0" (new_value)

                       : "memory");

return new_value; // Now it's the previous value.

}

inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,

                                          Atomic64 increment) {

Atomic64 temp = increment;

__asm__ __volatile__("lock; xaddq %0,%1"

                       : "+r" (temp), "+m" (*ptr)

                       : : "memory");

// temp now contains the previous value of *ptr

return temp + increment;

}

inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,

                                        Atomic64 increment) {

Atomic64 temp = increment;

__asm__ __volatile__("lock; xaddq %0,%1"

                       : "+r" (temp), "+m" (*ptr)

                       : : "memory");

// temp now contains the previous value of *ptr

if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

    __asm__ __volatile__("lfence" : : : "memory");

}

return temp + increment;

}

inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {

*ptr = value;

}

inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {

*ptr = value;

MemoryBarrier();

}

inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {

ATOMICOPS_COMPILER_BARRIER();

*ptr = value; // An x86 store acts as a release barrier

                // for current AMD/Intel chips as of Jan 2008.

                // See also Acquire_Load(), below.

// When new chips come out, check:

// IA-32 Intel Architecture Software Developer's Manual, Volume 3:

// System Programming Guide, Chatper 7: Multiple-processor management,

// Section 7.2, Memory Ordering.

// Last seen at:

//   http://developer.intel.com/design/pentium4/manuals/index_new.htm
//

// x86 stores/loads fail to act as barriers for a few instructions (clflush

// maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are

// not generated by the compiler, and are rare. Users of these instructions

// need to know about cache behaviour in any case since all of these involve

// either flushing cache lines or non-temporal cache hints.

}

inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {

return *ptr;

}

inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {

Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,

                         // for current AMD/Intel chips as of Jan 2008.

                         // See also Release_Store(), above.

ATOMICOPS_COMPILER_BARRIER();

return value;

}

inline Atomic64 Release_Load(volatile const Atomic64* ptr) {

MemoryBarrier();

return *ptr;

}

inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,

                                       Atomic64 old_value,

                                       Atomic64 new_value) {

Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);

if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

    __asm__ __volatile__("lfence" : : : "memory");

}

return x;

}

inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,

                                       Atomic64 old_value,

                                       Atomic64 new_value) {

return NoBarrier_CompareAndSwap(ptr, old_value, new_value);

}

#endif // defined(__x86_64__)

} // namespace base::subtle

} // namespace base

#undef ATOMICOPS_COMPILER_BARRIER

#endif // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航