Actual source code: sseenabled.c
1: #include "petscsys.h" /*I "petscsys.h" I*/
3: #ifdef PETSC_HAVE_SSE
5: #include PETSC_HAVE_SSE
6: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
8: #include <string.h>
12: PetscErrorCode PetscSSEHardwareTest(PetscTruth *flag)
13: {
15: char *vendor;
16: char Intel[13]="GenuineIntel";
17: char AMD[13] ="AuthenticAMD";
20: PetscMalloc(13*sizeof(char),&vendor);
21: strcpy(vendor,"************");
22: CPUID_GET_VENDOR(vendor);
23: if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
24: /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
25: /* to denote availability of SSE Support */
26: unsigned long myeax,myebx,myecx,myedx;
27: CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
28: if (myedx & SSE_FEATURE_FLAG) {
29: *flag = PETSC_TRUE;
30: } else {
31: *flag = PETSC_FALSE;
32: }
33: }
34: PetscFree(vendor);
35: return(0);
36: }
38: #if defined(PETSC_HAVE_FORK)
39: #include <signal.h>
40: /*
41: Early versions of the Linux kernel disables SSE hardware because
42: it does not know how to preserve the SSE state at a context switch.
43: To detect this feature, try an sse instruction in another process.
44: If it works, great! If not, an illegal instruction signal will be thrown,
45: so catch it and return an error code.
46: */
47: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
49: static void PetscSSEDisabledHandler(int sig) {
50: signal(SIGILL,SIG_IGN);
51: exit(-1);
52: }
56: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscTruth *flag)
57: {
58: int status, pid = 0;
60: signal(SIGILL,PetscSSEDisabledHandler);
61: pid = fork();
62: if (pid==0) {
63: SSE_SCOPE_BEGIN;
64: XOR_PS(XMM0,XMM0);
65: SSE_SCOPE_END;
66: exit(0);
67: } else {
68: wait(&status);
69: }
70: if (!status) {
71: *flag = PETSC_TRUE;
72: } else {
73: *flag = PETSC_FALSE;
74: }
75: return(0);
76: }
78: #else
79: /*
80: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
81: Windows ME/2000 doesn't disable SSE Hardware
82: */
83: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
84: #endif
88: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscTruth *flag)
89: {
91: if (flag) {
92: *flag = PETSC_TRUE;
93: }
94: return(0);
95: }
97: #else /* Not defined PETSC_HAVE_SSE */
99: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
100: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
104: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscTruth *flag)
105: {
107: if (flag) {
108: *flag = PETSC_FALSE;
109: }
110: return(0);
111: }
113: #endif /* defined PETSC_HAVE_SSE */
117: /*@C
118: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
119: set can be used. Some operating systems do not allow the use of these instructions despite
120: hardware availability.
122: Collective on MPI_Comm
124: Input Parameter:
125: . comm - the MPI Communicator
127: Output Parameters:
128: . lflag - Local Flag: PETSC_TRUE if enabled in this process
129: . gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm
131: Notes:
132: PETSC_NULL can be specified for lflag or gflag if either of these values are not desired.
134: Options Database Keys:
135: . -disable_sse - Disable use of hand tuned Intel SSE implementations
137: Level: developer
138: @*/
139: static PetscTruth petsc_sse_local_is_untested = PETSC_TRUE;
140: static PetscTruth petsc_sse_enabled_local = PETSC_FALSE;
141: static PetscTruth petsc_sse_global_is_untested = PETSC_TRUE;
142: static PetscTruth petsc_sse_enabled_global = PETSC_FALSE;
143: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm,PetscTruth *lflag,PetscTruth *gflag) {
145: PetscTruth disabled_option;
149: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
150: disabled_option = PETSC_FALSE;
152: PetscOptionsName("-disable_sse",
153: "Disable use of hand tuned Intel SSE implementations <true,false>.",
154: "PetscSSEIsEnabled",&disabled_option);
155: if (disabled_option) {
156: petsc_sse_local_is_untested = PETSC_FALSE;
157: petsc_sse_enabled_local = PETSC_FALSE;
158: petsc_sse_global_is_untested = PETSC_FALSE;
159: petsc_sse_enabled_global = PETSC_FALSE;
160: }
162: if (petsc_sse_local_is_untested) {
163: PetscSSEHardwareTest(&petsc_sse_enabled_local);
164: if (petsc_sse_enabled_local) {
165: PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
166: }
167: petsc_sse_local_is_untested = PETSC_FALSE;
168: }
170: if (gflag && petsc_sse_global_is_untested) {
171: MPI_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPI_INT,MPI_LAND,comm);
172: petsc_sse_global_is_untested = PETSC_FALSE;
173: }
174: }
176: if (lflag) {
177: *lflag = petsc_sse_enabled_local;
178: }
179: if (gflag) {
180: *gflag = petsc_sse_enabled_global;
181: }
182: return(0);
183: }