Actual source code: sseenabled.c
1: #include <petscsys.h>
3: #if defined(PETSC_HAVE_SSE)
5: #include PETSC_HAVE_SSE
6: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
8: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
9: {
10: char vendor[13];
11: char Intel[13] = "GenuineIntel";
12: char AMD[13] = "AuthenticAMD";
13: char Hygon[13] = "HygonGenuine";
14: PetscBool flg;
16: PetscFunctionBegin;
17: PetscCall(PetscStrncpy(vendor, "************", sizeof(vendor)));
18: CPUID_GET_VENDOR(vendor);
19: PetscCall(PetscStrcmp(vendor, Intel, &flg));
20: if (!flg) PetscCall(PetscStrcmp(vendor, AMD, &flg));
21: if (!flg) {
22: PetscCall(PetscStrcmp(vendor, Hygon, &flg));
23: if (flg) {
24: /* Intel, AMD, and Hygon use bit 25 of CPUID_FEATURES */
25: /* to denote availability of SSE Support */
26: unsigned long myeax, myebx, myecx, myedx;
27: CPUID(CPUID_FEATURES, &myeax, &myebx, &myecx, &myedx);
28: if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
29: else *flag = PETSC_FALSE;
30: }
31: PetscFunctionReturn(PETSC_SUCCESS);
32: }
33: }
35: #if defined(PETSC_HAVE_FORK)
36: #include <signal.h>
37: /*
38: Early versions of the Linux kernel disables SSE hardware because
39: it does not know how to preserve the SSE state at a context switch.
40: To detect this feature, try an sse instruction in another process.
41: If it works, great! If not, an illegal instruction signal will be thrown,
42: so catch it and return an error code.
43: */
44: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
46: static void PetscSSEDisabledHandler(int sig)
47: {
48: signal(SIGILL, SIG_IGN);
49: exit(-1);
50: }
52: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
53: {
54: int status, pid = 0;
56: PetscFunctionBegin;
57: signal(SIGILL, PetscSSEDisabledHandler);
58: pid = fork();
59: if (pid == 0) {
60: SSE_SCOPE_BEGIN;
61: XOR_PS(XMM0, XMM0);
62: SSE_SCOPE_END;
63: exit(0);
64: } else wait(&status);
65: if (!status) *flag = PETSC_TRUE;
66: else *flag = PETSC_FALSE;
67: PetscFunctionReturn(PETSC_SUCCESS);
68: }
70: #else
71: /*
72: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
73: Windows ME/2000 doesn't disable SSE Hardware
74: */
75: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
76: #endif
78: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
79: {
80: PetscFunctionBegin;
81: if (flag) *flag = PETSC_TRUE;
82: PetscFunctionReturn(PETSC_SUCCESS);
83: }
85: #else /* Not defined PETSC_HAVE_SSE */
87: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
88: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
90: static PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
91: {
92: PetscFunctionBegin;
93: if (flag) *flag = PETSC_FALSE;
94: PetscFunctionReturn(PETSC_SUCCESS);
95: }
97: #endif /* defined PETSC_HAVE_SSE */
99: static PetscBool petsc_sse_local_is_untested = PETSC_TRUE;
100: static PetscBool petsc_sse_enabled_local = PETSC_FALSE;
101: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
102: static PetscBool petsc_sse_enabled_global = PETSC_FALSE;
103: /*@C
104: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
105: set can be used. Some operating systems do not allow the use of these instructions despite
106: hardware availability.
108: Collective
110: Input Parameter:
111: . comm - the MPI Communicator
113: Output Parameters:
114: + lflag - Local Flag `PETSC_TRUE` if enabled in this process
115: - gflag - Global Flag `PETSC_TRUE` if enabled for all processes in comm
117: Options Database Key:
118: . -disable_sse - Disable use of hand tuned Intel SSE implementations
120: Level: developer
122: Note:
123: `NULL` can be specified for `lflag` or `gflag` if either of these values are not desired.
125: .seealso: [](ch_profiling)
126: @*/
127: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm, PetscBool *lflag, PetscBool *gflag)
128: {
129: PetscBool disabled_option;
131: PetscFunctionBegin;
132: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
133: disabled_option = PETSC_FALSE;
135: PetscCall(PetscOptionsGetBool(NULL, NULL, "-disable_sse", &disabled_option, NULL));
136: if (disabled_option) {
137: petsc_sse_local_is_untested = PETSC_FALSE;
138: petsc_sse_enabled_local = PETSC_FALSE;
139: petsc_sse_global_is_untested = PETSC_FALSE;
140: petsc_sse_enabled_global = PETSC_FALSE;
141: }
143: if (petsc_sse_local_is_untested) {
144: PetscCall(PetscSSEHardwareTest(&petsc_sse_enabled_local));
145: if (petsc_sse_enabled_local) { PetscCall(PetscSSEOSEnabledTest(&petsc_sse_enabled_local)); }
146: petsc_sse_local_is_untested = PETSC_FALSE;
147: }
149: if (gflag && petsc_sse_global_is_untested) {
150: PetscCall(MPIU_Allreduce(&petsc_sse_enabled_local, &petsc_sse_enabled_global, 1, MPIU_BOOL, MPI_LAND, comm));
152: petsc_sse_global_is_untested = PETSC_FALSE;
153: }
154: }
156: if (lflag) *lflag = petsc_sse_enabled_local;
157: if (gflag) *gflag = petsc_sse_enabled_global;
158: PetscFunctionReturn(PETSC_SUCCESS);
159: }