Actual source code: sseenabled.c
petsc-3.3-p7 2013-05-11
2: #include <petscsys.h> /*I "petscsys.h" I*/
4: #ifdef PETSC_HAVE_SSE
6: #include PETSC_HAVE_SSE
7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
9: #include <string.h>
13: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
14: {
16: char *vendor;
17: char Intel[13]="GenuineIntel";
18: char AMD[13] ="AuthenticAMD";
21: PetscMalloc(13*sizeof(char),&vendor);
22: strcpy(vendor,"************");
23: CPUID_GET_VENDOR(vendor);
24: if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
25: /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
26: /* to denote availability of SSE Support */
27: unsigned long myeax,myebx,myecx,myedx;
28: CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
29: if (myedx & SSE_FEATURE_FLAG) {
30: *flag = PETSC_TRUE;
31: } else {
32: *flag = PETSC_FALSE;
33: }
34: }
35: PetscFree(vendor);
36: return(0);
37: }
39: #if defined(PETSC_HAVE_FORK)
40: #include <signal.h>
41: /*
42: Early versions of the Linux kernel disables SSE hardware because
43: it does not know how to preserve the SSE state at a context switch.
44: To detect this feature, try an sse instruction in another process.
45: If it works, great! If not, an illegal instruction signal will be thrown,
46: so catch it and return an error code.
47: */
48: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
50: static void PetscSSEDisabledHandler(int sig) {
51: signal(SIGILL,SIG_IGN);
52: exit(-1);
53: }
57: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
58: {
59: int status, pid = 0;
61: signal(SIGILL,PetscSSEDisabledHandler);
62: pid = fork();
63: if (pid==0) {
64: SSE_SCOPE_BEGIN;
65: XOR_PS(XMM0,XMM0);
66: SSE_SCOPE_END;
67: exit(0);
68: } else {
69: wait(&status);
70: }
71: if (!status) {
72: *flag = PETSC_TRUE;
73: } else {
74: *flag = PETSC_FALSE;
75: }
76: return(0);
77: }
79: #else
80: /*
81: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
82: Windows ME/2000 doesn't disable SSE Hardware
83: */
84: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
85: #endif
89: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
90: {
92: if (flag) {
93: *flag = PETSC_TRUE;
94: }
95: return(0);
96: }
98: #else /* Not defined PETSC_HAVE_SSE */
100: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
101: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
105: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
106: {
108: if (flag) {
109: *flag = PETSC_FALSE;
110: }
111: return(0);
112: }
114: #endif /* defined PETSC_HAVE_SSE */
118: /*@C
119: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
120: set can be used. Some operating systems do not allow the use of these instructions despite
121: hardware availability.
123: Collective on MPI_Comm
125: Input Parameter:
126: . comm - the MPI Communicator
128: Output Parameters:
129: . lflag - Local Flag: PETSC_TRUE if enabled in this process
130: . gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm
132: Notes:
133: PETSC_NULL can be specified for lflag or gflag if either of these values are not desired.
135: Options Database Keys:
136: . -disable_sse - Disable use of hand tuned Intel SSE implementations
138: Level: developer
139: @*/
140: static PetscBool petsc_sse_local_is_untested = PETSC_TRUE;
141: static PetscBool petsc_sse_enabled_local = PETSC_FALSE;
142: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
143: static PetscBool petsc_sse_enabled_global = PETSC_FALSE;
144: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm,PetscBool *lflag,PetscBool *gflag) {
146: PetscBool disabled_option;
150: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
151: disabled_option = PETSC_FALSE;
153: PetscOptionsBool("-disable_sse",
154: "Disable use of hand tuned Intel SSE implementations <true,false>.",
155: "PetscSSEIsEnabled",disabled_option,&disabled_option,PETSC_NULL);
156: if (disabled_option) {
157: petsc_sse_local_is_untested = PETSC_FALSE;
158: petsc_sse_enabled_local = PETSC_FALSE;
159: petsc_sse_global_is_untested = PETSC_FALSE;
160: petsc_sse_enabled_global = PETSC_FALSE;
161: }
163: if (petsc_sse_local_is_untested) {
164: PetscSSEHardwareTest(&petsc_sse_enabled_local);
165: if (petsc_sse_enabled_local) {
166: PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
167: }
168: petsc_sse_local_is_untested = PETSC_FALSE;
169: }
171: if (gflag && petsc_sse_global_is_untested) {
172: MPI_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPI_INT,MPI_LAND,comm);
173: petsc_sse_global_is_untested = PETSC_FALSE;
174: }
175: }
177: if (lflag) {
178: *lflag = petsc_sse_enabled_local;
179: }
180: if (gflag) {
181: *gflag = petsc_sse_enabled_global;
182: }
183: return(0);
184: }