您的位置:首页 > 编程语言

x86平台读取cpu支持sse2指令集的代码,以及原子操作的代码

2014-01-16 16:54 381 查看
// This module gets enough CPU information to optimize the

// atomicops module on x86.

#include <string.h>

#include "base/atomicops.h"

#include "base/basictypes.h"

// This file only makes sense with atomicops_internals_x86_gcc.h -- it

// depends on structs that are defined in that file.  If atomicops.h

// doesn't sub-include that file, then we aren't needed, and shouldn't

// try to do anything.

#ifdef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_

// Inline cpuid instruction.  In PIC compilations, %ebx contains the address

// of the global offset table.  To avoid breaking such executables, this code

// must preserve that register's value across cpuid instructions.

#if defined(__i386__)

#define cpuid(a, b, c, d, inp) \

  asm ("mov %%ebx, %%edi\n"    \

       "cpuid\n"               \

       "xchg %%edi, %%ebx\n"   \

       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))

#elif defined (__x86_64__)

#define cpuid(a, b, c, d, inp) \

  asm ("mov %%rbx, %%rdi\n"    \

       "cpuid\n"               \

       "xchg %%rdi, %%rbx\n"   \

       : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp))

#endif

#if defined(cpuid)        // initialize the struct only on x86

// Set the flags so that code will run correctly and conservatively, so even

// if we haven't been initialized yet, we're probably single threaded, and our

// default values should hopefully be pretty safe.

struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures = {

  false,          // bug can't exist before process spawns multiple threads

  false,          // no SSE2

};

// Initialize the AtomicOps_Internalx86CPUFeatures struct.

static void AtomicOps_Internalx86CPUFeaturesInit()

{

  uint32 eax;

  uint32 ebx;

  uint32 ecx;

  uint32 edx;

  // Get vendor string (issue CPUID with eax = 0)

  cpuid(eax, ebx, ecx, edx, 0);

  char vendor[13];

  memcpy(vendor, &ebx, 4);

  memcpy(vendor + 4, &edx, 4);

  memcpy(vendor + 8, &ecx, 4);

  vendor[12] = 0;

  // get feature flags in ecx/edx, and family/model in eax

  cpuid(eax, ebx, ecx, edx, 1);

  int family = (eax >> 8) & 0xf;        // family and model fields

  int model = (eax >> 4) & 0xf;

  if (family == 0xf) {                  // use extended family and model fields

    family += (eax >> 20) & 0xff;

    model += ((eax >> 16) & 0xf) << 4;

  }

  // Opteron Rev E has a bug in which on very rare occasions a locked

  // instruction doesn't act as a read-acquire barrier if followed by a

  // non-locked read-modify-write instruction.  Rev F has this bug in

  // pre-release versions, but not in versions released to customers,

  // so we test only for Rev E, which is family 15, model 32..63 inclusive.

  if (strcmp(vendor, "AuthenticAMD") == 0 &&       // AMD

      family == 15 &&

      32 <= model && model <= 63) {

    AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = true;

  } else {

    AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = false;

  }

  // edx bit 26 is SSE2 which we use to tell use whether we can use mfence

  AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1);

}

namespace

{

class AtomicOpsx86Initializer

{

 public:

  

  AtomicOpsx86Initializer()

  {

    AtomicOps_Internalx86CPUFeaturesInit();

  }

 

};

// A global to get use initialized on startup via static initialization :/

AtomicOpsx86Initializer g_initer;

}  // namespace

#endif  // if x86

#endif  // ifdef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_

 

 

 

 

// This file is an internal atomic implementation, use base/atomicops.h instead.

#ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_

#define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_

#include "base/base_export.h"

// This struct is not part of the public API of this module; clients may not

// use it.  (However, it's exported via BASE_EXPORT because clients implicitly

// do use it at link time by inlining these functions.)

// Features of this x86.  Values may not be correct before main() is run,

// but are set conservatively.

struct AtomicOps_x86CPUFeatureStruct

{

  bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence

                            // after acquire compare-and-swap.

  bool has_sse2;            // Processor has SSE2.

};

BASE_EXPORT extern struct AtomicOps_x86CPUFeatureStruct

    AtomicOps_Internalx86CPUFeatures;

#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")

namespace base

{

namespace subtle

{

// 32-bit low-level operations on any platform.

inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,

                                         Atomic32 old_value,

                                         Atomic32 new_value)

{

  Atomic32 prev;

  __asm__ __volatile__("lock; cmpxchgl %1,%2"

                       : "=a" (prev)

                       : "q" (new_value), "m" (*ptr), "0" (old_value)

                       : "memory");

  return prev;

}

inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,

                                         Atomic32 new_value) {

  __asm__ __volatile__("xchgl %1,%0"  // The lock prefix is implicit for xchg.

                       : "=r" (new_value)

                       : "m" (*ptr), "0" (new_value)

                       : "memory");

  return new_value;  // Now it's the previous value.

}

inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,

                                          Atomic32 increment) {

  Atomic32 temp = increment;

  __asm__ __volatile__("lock; xaddl %0,%1"

                       : "+r" (temp), "+m" (*ptr)

                       : : "memory");

  // temp now holds the old value of *ptr

  return temp + increment;

}

inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,

                                        Atomic32 increment) {

  Atomic32 temp = increment;

  __asm__ __volatile__("lock; xaddl %0,%1"

                       : "+r" (temp), "+m" (*ptr)

                       : : "memory");

  // temp now holds the old value of *ptr

  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

    __asm__ __volatile__("lfence" : : : "memory");

  }

  return temp + increment;

}

inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,

                                       Atomic32 old_value,

                                       Atomic32 new_value) {

  Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);

  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug)

  {

    __asm__ __volatile__("lfence" : : : "memory");

  }

  return x;

}

inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,

                                       Atomic32 old_value,

                                       Atomic32 new_value)

{

  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);

}

inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value)

{

  *ptr = value;

}

#if defined(__x86_64__)

// 64-bit implementations of memory barrier can be simpler, because it

// "mfence" is guaranteed to exist.

inline void MemoryBarrier()

{

  __asm__ __volatile__("mfence" : : : "memory");

}

inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value)

{

  *ptr = value;

  MemoryBarrier();

}

#else

inline void MemoryBarrier()

{

  if (AtomicOps_Internalx86CPUFeatures.has_sse2)

  {

    __asm__ __volatile__("mfence" : : : "memory");

  } else { // mfence is faster but not present on PIII

    Atomic32 x = 0;

    NoBarrier_AtomicExchange(&x, 0);  // acts as a barrier on PIII

  }

}

inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {

  if (AtomicOps_Internalx86CPUFeatures.has_sse2) {

    *ptr = value;

    __asm__ __volatile__("mfence" : : : "memory");

  } else {

    NoBarrier_AtomicExchange(ptr, value);

                          // acts as a barrier on PIII

  }

}

#endif

inline void Release_Store(volatile Atomic32* ptr, Atomic32 value)

{

  ATOMICOPS_COMPILER_BARRIER();

  *ptr = value; // An x86 store acts as a release barrier.

  // See comments in Atomic64 version of Release_Store(), below.

}

inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {

  return *ptr;

}

inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {

  Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.

  // See comments in Atomic64 version of Release_Store(), below.

  ATOMICOPS_COMPILER_BARRIER();

  return value;

}

inline Atomic32 Release_Load(volatile const Atomic32* ptr) {

  MemoryBarrier();

  return *ptr;

}

#if defined(__x86_64__)

// 64-bit low-level operations on 64-bit platform.

inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,

                                         Atomic64 old_value,

                                         Atomic64 new_value) {

  Atomic64 prev;

  __asm__ __volatile__("lock; cmpxchgq %1,%2"

                       : "=a" (prev)

                       : "q" (new_value), "m" (*ptr), "0" (old_value)

                       : "memory");

  return prev;

}

inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,

                                         Atomic64 new_value) {

  __asm__ __volatile__("xchgq %1,%0"  // The lock prefix is implicit for xchg.

                       : "=r" (new_value)

                       : "m" (*ptr), "0" (new_value)

                       : "memory");

  return new_value;  // Now it's the previous value.

}

inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,

                                          Atomic64 increment) {

  Atomic64 temp = increment;

  __asm__ __volatile__("lock; xaddq %0,%1"

                       : "+r" (temp), "+m" (*ptr)

                       : : "memory");

  // temp now contains the previous value of *ptr

  return temp + increment;

}

inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,

                                        Atomic64 increment) {

  Atomic64 temp = increment;

  __asm__ __volatile__("lock; xaddq %0,%1"

                       : "+r" (temp), "+m" (*ptr)

                       : : "memory");

  // temp now contains the previous value of *ptr

  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

    __asm__ __volatile__("lfence" : : : "memory");

  }

  return temp + increment;

}

inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {

  *ptr = value;

}

inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {

  *ptr = value;

  MemoryBarrier();

}

inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {

  ATOMICOPS_COMPILER_BARRIER();

  *ptr = value; // An x86 store acts as a release barrier

                // for current AMD/Intel chips as of Jan 2008.

                // See also Acquire_Load(), below.

  // When new chips come out, check:

  //  IA-32 Intel Architecture Software Developer's Manual, Volume 3:

  //  System Programming Guide, Chatper 7: Multiple-processor management,

  //  Section 7.2, Memory Ordering.

  // Last seen at:

  //   http://developer.intel.com/design/pentium4/manuals/index_new.htm
  //

  // x86 stores/loads fail to act as barriers for a few instructions (clflush

  // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are

  // not generated by the compiler, and are rare.  Users of these instructions

  // need to know about cache behaviour in any case since all of these involve

  // either flushing cache lines or non-temporal cache hints.

}

inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {

  return *ptr;

}

inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {

  Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,

                         // for current AMD/Intel chips as of Jan 2008.

                         // See also Release_Store(), above.

  ATOMICOPS_COMPILER_BARRIER();

  return value;

}

inline Atomic64 Release_Load(volatile const Atomic64* ptr) {

  MemoryBarrier();

  return *ptr;

}

inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,

                                       Atomic64 old_value,

                                       Atomic64 new_value) {

  Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);

  if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {

    __asm__ __volatile__("lfence" : : : "memory");

  }

  return x;

}

inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,

                                       Atomic64 old_value,

                                       Atomic64 new_value) {

  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);

}

#endif  // defined(__x86_64__)

} // namespace base::subtle

} // namespace base

#undef ATOMICOPS_COMPILER_BARRIER

#endif  // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_

 
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: