Actual source code: sseenabled.c
petsc-3.5.4 2015-05-23
2: #include <petscsys.h> /*I "petscsys.h" I*/
4: #if defined(PETSC_HAVE_SSE)
6: #include PETSC_HAVE_SSE
7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
11: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
12: {
14: char *vendor;
15: char Intel[13]="GenuineIntel";
16: char AMD[13] ="AuthenticAMD";
19: PetscMalloc1(13,&vendor);
20: strcpy(vendor,"************");
21: CPUID_GET_VENDOR(vendor);
22: if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
23: /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
24: /* to denote availability of SSE Support */
25: unsigned long myeax,myebx,myecx,myedx;
26: CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
27: if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
28: else *flag = PETSC_FALSE;
29: }
30: PetscFree(vendor);
31: return(0);
32: }
34: #if defined(PETSC_HAVE_FORK)
35: #include <signal.h>
36: /*
37: Early versions of the Linux kernel disables SSE hardware because
38: it does not know how to preserve the SSE state at a context switch.
39: To detect this feature, try an sse instruction in another process.
40: If it works, great! If not, an illegal instruction signal will be thrown,
41: so catch it and return an error code.
42: */
43: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
45: static void PetscSSEDisabledHandler(int sig)
46: {
47: signal(SIGILL,SIG_IGN);
48: exit(-1);
49: }
53: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
54: {
55: int status, pid = 0;
58: signal(SIGILL,PetscSSEDisabledHandler);
59: pid = fork();
60: if (pid==0) {
61: SSE_SCOPE_BEGIN;
62: XOR_PS(XMM0,XMM0);
63: SSE_SCOPE_END;
64: exit(0);
65: } else wait(&status);
66: if (!status) *flag = PETSC_TRUE;
67: else *flag = PETSC_FALSE;
68: return(0);
69: }
71: #else
72: /*
73: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
74: Windows ME/2000 doesn't disable SSE Hardware
75: */
76: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
77: #endif
81: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
82: {
84: if (flag) *flag = PETSC_TRUE;
85: return(0);
86: }
88: #else /* Not defined PETSC_HAVE_SSE */
90: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
91: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
95: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
96: {
98: if (flag) *flag = PETSC_FALSE;
99: return(0);
100: }
102: #endif /* defined PETSC_HAVE_SSE */
106: /*@C
107: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
108: set can be used. Some operating systems do not allow the use of these instructions despite
109: hardware availability.
111: Collective on MPI_Comm
113: Input Parameter:
114: . comm - the MPI Communicator
116: Output Parameters:
117: . lflag - Local Flag: PETSC_TRUE if enabled in this process
118: . gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm
120: Notes:
121: NULL can be specified for lflag or gflag if either of these values are not desired.
123: Options Database Keys:
124: . -disable_sse - Disable use of hand tuned Intel SSE implementations
126: Level: developer
127: @*/
128: static PetscBool petsc_sse_local_is_untested = PETSC_TRUE;
129: static PetscBool petsc_sse_enabled_local = PETSC_FALSE;
130: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
131: static PetscBool petsc_sse_enabled_global = PETSC_FALSE;
132: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm,PetscBool *lflag,PetscBool *gflag)
133: {
135: PetscBool disabled_option;
138: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
139: disabled_option = PETSC_FALSE;
141: PetscOptionsBool("-disable_sse",
142: "Disable use of hand tuned Intel SSE implementations <true,false>.",
143: "PetscSSEIsEnabled",disabled_option,&disabled_option,NULL);
144: if (disabled_option) {
145: petsc_sse_local_is_untested = PETSC_FALSE;
146: petsc_sse_enabled_local = PETSC_FALSE;
147: petsc_sse_global_is_untested = PETSC_FALSE;
148: petsc_sse_enabled_global = PETSC_FALSE;
149: }
151: if (petsc_sse_local_is_untested) {
152: PetscSSEHardwareTest(&petsc_sse_enabled_local);
153: if (petsc_sse_enabled_local) {
154: PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
155: }
156: petsc_sse_local_is_untested = PETSC_FALSE;
157: }
159: if (gflag && petsc_sse_global_is_untested) {
160: MPI_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPIU_BOOL,MPI_LAND,comm);
162: petsc_sse_global_is_untested = PETSC_FALSE;
163: }
164: }
166: if (lflag) *lflag = petsc_sse_enabled_local;
167: if (gflag) *gflag = petsc_sse_enabled_global;
168: return(0);
169: }