Actual source code: sseenabled.c
petsc-3.9.4 2018-09-11
2: #include <petscsys.h>
4: #if defined(PETSC_HAVE_SSE)
6: #include PETSC_HAVE_SSE
7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
9: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
10: {
12: char *vendor;
13: char Intel[13]="GenuineIntel";
14: char AMD[13] ="AuthenticAMD";
17: PetscMalloc1(13,&vendor);
18: strcpy(vendor,"************");
19: CPUID_GET_VENDOR(vendor);
20: if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
21: /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
22: /* to denote availability of SSE Support */
23: unsigned long myeax,myebx,myecx,myedx;
24: CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
25: if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
26: else *flag = PETSC_FALSE;
27: }
28: PetscFree(vendor);
29: return(0);
30: }
32: #if defined(PETSC_HAVE_FORK)
33: #include <signal.h>
34: /*
35: Early versions of the Linux kernel disables SSE hardware because
36: it does not know how to preserve the SSE state at a context switch.
37: To detect this feature, try an sse instruction in another process.
38: If it works, great! If not, an illegal instruction signal will be thrown,
39: so catch it and return an error code.
40: */
41: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
43: static void PetscSSEDisabledHandler(int sig)
44: {
45: signal(SIGILL,SIG_IGN);
46: exit(-1);
47: }
49: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
50: {
51: int status, pid = 0;
54: signal(SIGILL,PetscSSEDisabledHandler);
55: pid = fork();
56: if (pid==0) {
57: SSE_SCOPE_BEGIN;
58: XOR_PS(XMM0,XMM0);
59: SSE_SCOPE_END;
60: exit(0);
61: } else wait(&status);
62: if (!status) *flag = PETSC_TRUE;
63: else *flag = PETSC_FALSE;
64: return(0);
65: }
67: #else
68: /*
69: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
70: Windows ME/2000 doesn't disable SSE Hardware
71: */
72: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
73: #endif
75: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
76: {
78: if (flag) *flag = PETSC_TRUE;
79: return(0);
80: }
82: #else /* Not defined PETSC_HAVE_SSE */
84: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
85: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
87: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
88: {
90: if (flag) *flag = PETSC_FALSE;
91: return(0);
92: }
94: #endif /* defined PETSC_HAVE_SSE */
96: /*@C
97: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
98: set can be used. Some operating systems do not allow the use of these instructions despite
99: hardware availability.
101: Collective on MPI_Comm
103: Input Parameter:
104: . comm - the MPI Communicator
106: Output Parameters:
107: . lflag - Local Flag: PETSC_TRUE if enabled in this process
108: . gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm
110: Notes:
111: NULL can be specified for lflag or gflag if either of these values are not desired.
113: Options Database Keys:
114: . -disable_sse - Disable use of hand tuned Intel SSE implementations
116: Level: developer
117: @*/
118: static PetscBool petsc_sse_local_is_untested = PETSC_TRUE;
119: static PetscBool petsc_sse_enabled_local = PETSC_FALSE;
120: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
121: static PetscBool petsc_sse_enabled_global = PETSC_FALSE;
122: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm,PetscBool *lflag,PetscBool *gflag)
123: {
125: PetscBool disabled_option;
128: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
129: disabled_option = PETSC_FALSE;
131: PetscOptionsGetBool(NULL,NULL,"-disable_sse",&disabled_option,NULL);
132: if (disabled_option) {
133: petsc_sse_local_is_untested = PETSC_FALSE;
134: petsc_sse_enabled_local = PETSC_FALSE;
135: petsc_sse_global_is_untested = PETSC_FALSE;
136: petsc_sse_enabled_global = PETSC_FALSE;
137: }
139: if (petsc_sse_local_is_untested) {
140: PetscSSEHardwareTest(&petsc_sse_enabled_local);
141: if (petsc_sse_enabled_local) {
142: PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
143: }
144: petsc_sse_local_is_untested = PETSC_FALSE;
145: }
147: if (gflag && petsc_sse_global_is_untested) {
148: MPIU_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPIU_BOOL,MPI_LAND,comm);
150: petsc_sse_global_is_untested = PETSC_FALSE;
151: }
152: }
154: if (lflag) *lflag = petsc_sse_enabled_local;
155: if (gflag) *gflag = petsc_sse_enabled_global;
156: return(0);
157: }