Actual source code: sseenabled.c

petsc-3.9.4 2018-09-11
Report Typos and Errors

  2:  #include <petscsys.h>

  4: #if defined(PETSC_HAVE_SSE)

  6: #include PETSC_HAVE_SSE
  7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */

  9: PetscErrorCode  PetscSSEHardwareTest(PetscBool  *flag)
 10: {
 12:   char           *vendor;
 13:   char           Intel[13]="GenuineIntel";
 14:   char           AMD[13]  ="AuthenticAMD";

 17:   PetscMalloc1(13,&vendor);
 18:   strcpy(vendor,"************");
 19:   CPUID_GET_VENDOR(vendor);
 20:   if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
 21:     /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
 22:     /* to denote availability of SSE Support */
 23:     unsigned long myeax,myebx,myecx,myedx;
 24:     CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
 25:     if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
 26:     else *flag = PETSC_FALSE;
 27:   }
 28:   PetscFree(vendor);
 29:   return(0);
 30: }

 32: #if defined(PETSC_HAVE_FORK)
 33: #include <signal.h>
 34: /*
 35:    Early versions of the Linux kernel disables SSE hardware because
 36:    it does not know how to preserve the SSE state at a context switch.
 37:    To detect this feature, try an sse instruction in another process.
 38:    If it works, great!  If not, an illegal instruction signal will be thrown,
 39:    so catch it and return an error code.
 40: */
 41: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)

 43: static void PetscSSEDisabledHandler(int sig)
 44: {
 45:   signal(SIGILL,SIG_IGN);
 46:   exit(-1);
 47: }

 49: PetscErrorCode  PetscSSEOSEnabledTest_Linux(PetscBool  *flag)
 50: {
 51:   int status, pid = 0;

 54:   signal(SIGILL,PetscSSEDisabledHandler);
 55:   pid = fork();
 56:   if (pid==0) {
 57:     SSE_SCOPE_BEGIN;
 58:     XOR_PS(XMM0,XMM0);
 59:     SSE_SCOPE_END;
 60:     exit(0);
 61:   } else wait(&status);
 62:   if (!status) *flag = PETSC_TRUE;
 63:   else *flag = PETSC_FALSE;
 64:   return(0);
 65: }

 67: #else
 68: /*
 69:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 70:    Windows ME/2000 doesn't disable SSE Hardware
 71: */
 72: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 73: #endif

 75: PetscErrorCode  PetscSSEOSEnabledTest_TRUE(PetscBool  *flag)
 76: {
 78:   if (flag) *flag = PETSC_TRUE;
 79:   return(0);
 80: }

 82: #else  /* Not defined PETSC_HAVE_SSE */

 84: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
 85: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)

 87: PetscErrorCode  PetscSSEEnabledTest_FALSE(PetscBool  *flag)
 88: {
 90:   if (flag) *flag = PETSC_FALSE;
 91:   return(0);
 92: }

 94: #endif /* defined PETSC_HAVE_SSE */

 96: /*@C
 97:      PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
 98:      set can be used.  Some operating systems do not allow the use of these instructions despite
 99:      hardware availability.

101:      Collective on MPI_Comm

103:      Input Parameter:
104: .    comm - the MPI Communicator

106:      Output Parameters:
107: .    lflag - Local Flag:  PETSC_TRUE if enabled in this process
108: .    gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm

110:      Notes:
111:      NULL can be specified for lflag or gflag if either of these values are not desired.

113:      Options Database Keys:
114: .    -disable_sse - Disable use of hand tuned Intel SSE implementations

116:      Level: developer
117: @*/
118: static PetscBool petsc_sse_local_is_untested  = PETSC_TRUE;
119: static PetscBool petsc_sse_enabled_local      = PETSC_FALSE;
120: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
121: static PetscBool petsc_sse_enabled_global     = PETSC_FALSE;
122: PetscErrorCode  PetscSSEIsEnabled(MPI_Comm comm,PetscBool  *lflag,PetscBool  *gflag)
123: {
125:   PetscBool      disabled_option;

128:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
129:     disabled_option = PETSC_FALSE;

131:     PetscOptionsGetBool(NULL,NULL,"-disable_sse",&disabled_option,NULL);
132:     if (disabled_option) {
133:       petsc_sse_local_is_untested  = PETSC_FALSE;
134:       petsc_sse_enabled_local      = PETSC_FALSE;
135:       petsc_sse_global_is_untested = PETSC_FALSE;
136:       petsc_sse_enabled_global     = PETSC_FALSE;
137:     }

139:     if (petsc_sse_local_is_untested) {
140:       PetscSSEHardwareTest(&petsc_sse_enabled_local);
141:       if (petsc_sse_enabled_local) {
142:         PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
143:       }
144:       petsc_sse_local_is_untested = PETSC_FALSE;
145:     }

147:     if (gflag && petsc_sse_global_is_untested) {
148:       MPIU_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPIU_BOOL,MPI_LAND,comm);

150:       petsc_sse_global_is_untested = PETSC_FALSE;
151:     }
152:   }

154:   if (lflag) *lflag = petsc_sse_enabled_local;
155:   if (gflag) *gflag = petsc_sse_enabled_global;
156:   return(0);
157: }