Actual source code: sseenabled.c
petsc-3.14.6 2021-03-30
2: #include <petscsys.h>
4: #if defined(PETSC_HAVE_SSE)
6: #include PETSC_HAVE_SSE
7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
9: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
10: {
12: char vendor[13];
13: char Intel[13]="GenuineIntel";
14: char AMD[13] ="AuthenticAMD";
15: char Hygon[13]="HygonGenuine";
16: PetscBool flg;
19: PetscStrncpy(vendor,"************",sizeof(vendor));
20: CPUID_GET_VENDOR(vendor);
21: PetscStrcmp(vendor,Intel,&flg);
22: if (!flg) {PetscStrcmp(vendor,AMD,&flg);}
23: if (!flg) {PetscStrcmp(vendor,Hygon,&flg);
24: if (flg) {
25: /* Intel, AMD, and Hygon use bit 25 of CPUID_FEATURES */
26: /* to denote availability of SSE Support */
27: unsigned long myeax,myebx,myecx,myedx;
28: CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
29: if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
30: else *flag = PETSC_FALSE;
31: }
32: return(0);
33: }
35: #if defined(PETSC_HAVE_FORK)
36: #include <signal.h>
37: /*
38: Early versions of the Linux kernel disables SSE hardware because
39: it does not know how to preserve the SSE state at a context switch.
40: To detect this feature, try an sse instruction in another process.
41: If it works, great! If not, an illegal instruction signal will be thrown,
42: so catch it and return an error code.
43: */
44: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
46: static void PetscSSEDisabledHandler(int sig)
47: {
48: signal(SIGILL,SIG_IGN);
49: exit(-1);
50: }
52: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
53: {
54: int status, pid = 0;
57: signal(SIGILL,PetscSSEDisabledHandler);
58: pid = fork();
59: if (pid==0) {
60: SSE_SCOPE_BEGIN;
61: XOR_PS(XMM0,XMM0);
62: SSE_SCOPE_END;
63: exit(0);
64: } else wait(&status);
65: if (!status) *flag = PETSC_TRUE;
66: else *flag = PETSC_FALSE;
67: return(0);
68: }
70: #else
71: /*
72: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
73: Windows ME/2000 doesn't disable SSE Hardware
74: */
75: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
76: #endif
78: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
79: {
81: if (flag) *flag = PETSC_TRUE;
82: return(0);
83: }
85: #else /* Not defined PETSC_HAVE_SSE */
87: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
88: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
90: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
91: {
93: if (flag) *flag = PETSC_FALSE;
94: return(0);
95: }
97: #endif /* defined PETSC_HAVE_SSE */
99: /*@C
100: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
101: set can be used. Some operating systems do not allow the use of these instructions despite
102: hardware availability.
104: Collective
106: Input Parameter:
107: . comm - the MPI Communicator
109: Output Parameters:
110: + lflag - Local Flag: PETSC_TRUE if enabled in this process
111: - gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm
113: Notes:
114: NULL can be specified for lflag or gflag if either of these values are not desired.
116: Options Database Keys:
117: . -disable_sse - Disable use of hand tuned Intel SSE implementations
119: Level: developer
120: @*/
121: static PetscBool petsc_sse_local_is_untested = PETSC_TRUE;
122: static PetscBool petsc_sse_enabled_local = PETSC_FALSE;
123: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
124: static PetscBool petsc_sse_enabled_global = PETSC_FALSE;
125: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm,PetscBool *lflag,PetscBool *gflag)
126: {
128: PetscBool disabled_option;
131: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
132: disabled_option = PETSC_FALSE;
134: PetscOptionsGetBool(NULL,NULL,"-disable_sse",&disabled_option,NULL);
135: if (disabled_option) {
136: petsc_sse_local_is_untested = PETSC_FALSE;
137: petsc_sse_enabled_local = PETSC_FALSE;
138: petsc_sse_global_is_untested = PETSC_FALSE;
139: petsc_sse_enabled_global = PETSC_FALSE;
140: }
142: if (petsc_sse_local_is_untested) {
143: PetscSSEHardwareTest(&petsc_sse_enabled_local);
144: if (petsc_sse_enabled_local) {
145: PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
146: }
147: petsc_sse_local_is_untested = PETSC_FALSE;
148: }
150: if (gflag && petsc_sse_global_is_untested) {
151: MPIU_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPIU_BOOL,MPI_LAND,comm);
153: petsc_sse_global_is_untested = PETSC_FALSE;
154: }
155: }
157: if (lflag) *lflag = petsc_sse_enabled_local;
158: if (gflag) *gflag = petsc_sse_enabled_global;
159: return(0);
160: }