Actual source code: sseenabled.c

petsc-3.13.6 2020-09-29
Report Typos and Errors

  2:  #include <petscsys.h>

  4: #if defined(PETSC_HAVE_SSE)

  6: #include PETSC_HAVE_SSE
  7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */

  9: PetscErrorCode  PetscSSEHardwareTest(PetscBool  *flag)
 10: {
 12:   char           vendor[13];
 13:   char           Intel[13]="GenuineIntel";
 14:   char           AMD[13]  ="AuthenticAMD";
 15:   char           Hygon[13]="HygonGenuine";
 16:   PetscBool      flg;

 19:   PetscStrncpy(vendor,"************",sizeof(vendor));
 20:   CPUID_GET_VENDOR(vendor);
 21:   PetscStrcmp(vendor,Intel,&flg);
 22:   if (!flg) {PetscStrcmp(vendor,AMD,&flg);}
 23:   if (!flg) {PetscStrcmp(vendor,Hygon,&flg);
 24:     if (flg) {
 25:     /* Intel, AMD, and Hygon use bit 25 of CPUID_FEATURES */
 26:     /* to denote availability of SSE Support */
 27:     unsigned long myeax,myebx,myecx,myedx;
 28:     CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
 29:     if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
 30:     else *flag = PETSC_FALSE;
 31:   }
 32:   return(0);
 33: }

 35: #if defined(PETSC_HAVE_FORK)
 36: #include <signal.h>
 37: /*
 38:    Early versions of the Linux kernel disables SSE hardware because
 39:    it does not know how to preserve the SSE state at a context switch.
 40:    To detect this feature, try an sse instruction in another process.
 41:    If it works, great!  If not, an illegal instruction signal will be thrown,
 42:    so catch it and return an error code.
 43: */
 44: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)

 46: static void PetscSSEDisabledHandler(int sig)
 47: {
 48:   signal(SIGILL,SIG_IGN);
 49:   exit(-1);
 50: }

 52: PetscErrorCode  PetscSSEOSEnabledTest_Linux(PetscBool  *flag)
 53: {
 54:   int status, pid = 0;

 57:   signal(SIGILL,PetscSSEDisabledHandler);
 58:   pid = fork();
 59:   if (pid==0) {
 60:     SSE_SCOPE_BEGIN;
 61:     XOR_PS(XMM0,XMM0);
 62:     SSE_SCOPE_END;
 63:     exit(0);
 64:   } else wait(&status);
 65:   if (!status) *flag = PETSC_TRUE;
 66:   else *flag = PETSC_FALSE;
 67:   return(0);
 68: }

 70: #else
 71: /*
 72:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 73:    Windows ME/2000 doesn't disable SSE Hardware
 74: */
 75: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 76: #endif

 78: PetscErrorCode  PetscSSEOSEnabledTest_TRUE(PetscBool  *flag)
 79: {
 81:   if (flag) *flag = PETSC_TRUE;
 82:   return(0);
 83: }

 85: #else  /* Not defined PETSC_HAVE_SSE */

 87: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
 88: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)

 90: PetscErrorCode  PetscSSEEnabledTest_FALSE(PetscBool  *flag)
 91: {
 93:   if (flag) *flag = PETSC_FALSE;
 94:   return(0);
 95: }

 97: #endif /* defined PETSC_HAVE_SSE */

 99: /*@C
100:      PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
101:      set can be used.  Some operating systems do not allow the use of these instructions despite
102:      hardware availability.

104:      Collective

106:      Input Parameter:
107: .    comm - the MPI Communicator

109:      Output Parameters:
110: +    lflag - Local Flag:  PETSC_TRUE if enabled in this process
111: -    gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm

113:      Notes:
114:      NULL can be specified for lflag or gflag if either of these values are not desired.

116:      Options Database Keys:
117: .    -disable_sse - Disable use of hand tuned Intel SSE implementations

119:      Level: developer
120: @*/
121: static PetscBool petsc_sse_local_is_untested  = PETSC_TRUE;
122: static PetscBool petsc_sse_enabled_local      = PETSC_FALSE;
123: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
124: static PetscBool petsc_sse_enabled_global     = PETSC_FALSE;
125: PetscErrorCode  PetscSSEIsEnabled(MPI_Comm comm,PetscBool  *lflag,PetscBool  *gflag)
126: {
128:   PetscBool      disabled_option;

131:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
132:     disabled_option = PETSC_FALSE;

134:     PetscOptionsGetBool(NULL,NULL,"-disable_sse",&disabled_option,NULL);
135:     if (disabled_option) {
136:       petsc_sse_local_is_untested  = PETSC_FALSE;
137:       petsc_sse_enabled_local      = PETSC_FALSE;
138:       petsc_sse_global_is_untested = PETSC_FALSE;
139:       petsc_sse_enabled_global     = PETSC_FALSE;
140:     }

142:     if (petsc_sse_local_is_untested) {
143:       PetscSSEHardwareTest(&petsc_sse_enabled_local);
144:       if (petsc_sse_enabled_local) {
145:         PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
146:       }
147:       petsc_sse_local_is_untested = PETSC_FALSE;
148:     }

150:     if (gflag && petsc_sse_global_is_untested) {
151:       MPIU_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPIU_BOOL,MPI_LAND,comm);

153:       petsc_sse_global_is_untested = PETSC_FALSE;
154:     }
155:   }

157:   if (lflag) *lflag = petsc_sse_enabled_local;
158:   if (gflag) *gflag = petsc_sse_enabled_global;
159:   return(0);
160: }