Actual source code: ad_grad_daxpy.c
petsc-3.3-p7 2013-05-11
1: /*
2: THIS PROGRAM DISCLOSES MATERIAL PROTECTABLE UNDER COPYRIGHT
3: LAWS OF THE UNITED STATES. FOR LICENSING INFORMATION CONTACT:
5: Paul Hovland and Boyana Norris, Mathematics and Computer Science Division,
6: Argonne National Laboratory, 9700 S. Cass Avenue, Argonne IL 60439,
7: {hovland,norris}@mcs.anl.gov.
8: */
10: #include <string.h>
11: #include <stdarg.h>
12: #include <ad_grad.h>
13: #include <ad_grad_daxpy.h>
14: void ad_grad_daxpy_init(void) {
15: ad_adic_deriv_init( ad_grad_size*sizeof(double), 0 );
16: }
17: void ad_grad_daxpy_final(void) {
18: ad_adic_deriv_final();
19: }
22: void ad_grad_daxpy_0(double** ppz)
23: {
24: INVALIDATE(ppz);
25: }
28: void ad_grad_daxpy_copy(double** ppz, double* pa)
29: {
30: if (IS_ZERO(pa)) {
31: INVALIDATE(ppz);
32: }
33: else {
34: VALIDATE(ppz);
35: memcpy(*ppz, pa, sizeof(double)*ad_grad_size);
36: }
37: }
40: void ad_grad_daxpy_1(double** ppz, double a, double* pa)
41: {
42: if (IS_ZERO(pa)) {
43: INVALIDATE(ppz);
44: }
45: else {
46: DAXPY1(ppz,a,pa);
47: }
48: }
51: void ad_grad_daxpy_2(double** ppz, double a, double* pa,
52: double b, double* pb)
53: {
54: if (IS_ZERO(pa)) {
55: if (IS_ZERO(pb)) {
56: INVALIDATE(ppz);
57: }
58: else {
59: DAXPY1(ppz,b,pb);
60: }
61: }
62: else if (IS_ZERO(pb)) {
63: DAXPY1(ppz,a,pa);
64: }
65: else {
66: DAXPY2(ppz,a,pa,b,pb);
67: }
68: }
70: void ad_grad_daxpy_3(double** ppz, double a, double* pa,
71: double b, double* pb, double c, double* pc)
72: {
73: if (IS_ZERO(pa)) {
74: if (IS_ZERO(pb)) {
75: if (IS_ZERO(pc)) {
76: INVALIDATE(ppz);
77: }
78: else {
79: DAXPY1(ppz,c,pc);
80: }
81: }
82: else if (IS_ZERO(pc)) {
83: DAXPY1(ppz,b,pb);
84: }
85: else {
86: DAXPY2(ppz,b,pb,c,pc);
87: }
88: }
89: else if (IS_ZERO(pb)) {
90: if (IS_ZERO(pc)) {
91: DAXPY1(ppz,a,pa);
92: }
93: else {
94: DAXPY2(ppz,a,pa,c,pc);
95: }
96: }
97: else if (IS_ZERO(pc)) {
98: DAXPY2(ppz,a,pa,b,pb);
99: }
100: else {
101: DAXPY3(ppz,a,pa,b,pb,c,pc);
102: }
103: }
104: void ad_grad_daxpy_4(double** ppz, double ca, double* pa, double cb, double* pb, double cc, double* pc, double cd, double* pd){ double *pz; int i, flag = 0;
105: SET_ZERO_FLAG(flag, pa, 0);
106: SET_ZERO_FLAG(flag, pb, 1);
107: SET_ZERO_FLAG(flag, pc, 2);
108: SET_ZERO_FLAG(flag, pd, 3);
109: switch (flag) {
110: case 0:
111: VALIDATE(ppz); pz = *ppz;
112: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i];}
113: break;
114: case 1:
115: VALIDATE(ppz); pz = *ppz;
116: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i];}
117: break;
118: case 2:
119: VALIDATE(ppz); pz = *ppz;
120: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cc)*pc[i] +(cd)*pd[i];}
121: break;
122: case 3:
123: VALIDATE(ppz); pz = *ppz;
124: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cc)*pc[i] +(cd)*pd[i];}
125: break;
126: case 4:
127: VALIDATE(ppz); pz = *ppz;
128: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i] +(cd)*pd[i];}
129: break;
130: case 5:
131: VALIDATE(ppz); pz = *ppz;
132: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i] +(cd)*pd[i];}
133: break;
134: case 6:
135: VALIDATE(ppz); pz = *ppz;
136: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cd)*pd[i];}
137: break;
138: case 7:
139: VALIDATE(ppz); pz = *ppz;
140: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cd)*pd[i];}
141: break;
142: case 8:
143: VALIDATE(ppz); pz = *ppz;
144: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i];}
145: break;
146: case 9:
147: VALIDATE(ppz); pz = *ppz;
148: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i] +(cc)*pc[i];}
149: break;
150: case 10:
151: VALIDATE(ppz); pz = *ppz;
152: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cc)*pc[i];}
153: break;
154: case 11:
155: VALIDATE(ppz); pz = *ppz;
156: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cc)*pc[i];}
157: break;
158: case 12:
159: VALIDATE(ppz); pz = *ppz;
160: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i];}
161: break;
162: case 13:
163: VALIDATE(ppz); pz = *ppz;
164: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i];}
165: break;
166: case 14:
167: VALIDATE(ppz); pz = *ppz;
168: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i];}
169: break;
170: case 15:
171: INVALIDATE(ppz);
172: }}
173: void ad_grad_daxpy_5(double** ppz, double ca, double* pa, double cb, double* pb, double cc, double* pc, double cd, double* pd, double ce, double* pe){ double *pz; int i, flag = 0;
174: SET_ZERO_FLAG(flag, pa, 0);
175: SET_ZERO_FLAG(flag, pb, 1);
176: SET_ZERO_FLAG(flag, pc, 2);
177: SET_ZERO_FLAG(flag, pd, 3);
178: SET_ZERO_FLAG(flag, pe, 4);
179: switch (flag) {
180: case 0:
181: VALIDATE(ppz); pz = *ppz;
182: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i] +(ce)*pe[i];}
183: break;
184: case 1:
185: VALIDATE(ppz); pz = *ppz;
186: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i] +(ce)*pe[i];}
187: break;
188: case 2:
189: VALIDATE(ppz); pz = *ppz;
190: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cc)*pc[i] +(cd)*pd[i] +(ce)*pe[i];}
191: break;
192: case 3:
193: VALIDATE(ppz); pz = *ppz;
194: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cc)*pc[i] +(cd)*pd[i] +(ce)*pe[i];}
195: break;
196: case 4:
197: VALIDATE(ppz); pz = *ppz;
198: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i] +(cd)*pd[i] +(ce)*pe[i];}
199: break;
200: case 5:
201: VALIDATE(ppz); pz = *ppz;
202: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i] +(cd)*pd[i] +(ce)*pe[i];}
203: break;
204: case 6:
205: VALIDATE(ppz); pz = *ppz;
206: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cd)*pd[i] +(ce)*pe[i];}
207: break;
208: case 7:
209: VALIDATE(ppz); pz = *ppz;
210: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cd)*pd[i] +(ce)*pe[i];}
211: break;
212: case 8:
213: VALIDATE(ppz); pz = *ppz;
214: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i] +(ce)*pe[i];}
215: break;
216: case 9:
217: VALIDATE(ppz); pz = *ppz;
218: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i] +(cc)*pc[i] +(ce)*pe[i];}
219: break;
220: case 10:
221: VALIDATE(ppz); pz = *ppz;
222: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cc)*pc[i] +(ce)*pe[i];}
223: break;
224: case 11:
225: VALIDATE(ppz); pz = *ppz;
226: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cc)*pc[i] +(ce)*pe[i];}
227: break;
228: case 12:
229: VALIDATE(ppz); pz = *ppz;
230: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i] +(ce)*pe[i];}
231: break;
232: case 13:
233: VALIDATE(ppz); pz = *ppz;
234: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i] +(ce)*pe[i];}
235: break;
236: case 14:
237: VALIDATE(ppz); pz = *ppz;
238: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(ce)*pe[i];}
239: break;
240: case 15:
241: VALIDATE(ppz); pz = *ppz;
242: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ce)*pe[i];}
243: break;
244: case 16:
245: VALIDATE(ppz); pz = *ppz;
246: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i];}
247: break;
248: case 17:
249: VALIDATE(ppz); pz = *ppz;
250: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i];}
251: break;
252: case 18:
253: VALIDATE(ppz); pz = *ppz;
254: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cc)*pc[i] +(cd)*pd[i];}
255: break;
256: case 19:
257: VALIDATE(ppz); pz = *ppz;
258: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cc)*pc[i] +(cd)*pd[i];}
259: break;
260: case 20:
261: VALIDATE(ppz); pz = *ppz;
262: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i] +(cd)*pd[i];}
263: break;
264: case 21:
265: VALIDATE(ppz); pz = *ppz;
266: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i] +(cd)*pd[i];}
267: break;
268: case 22:
269: VALIDATE(ppz); pz = *ppz;
270: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cd)*pd[i];}
271: break;
272: case 23:
273: VALIDATE(ppz); pz = *ppz;
274: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cd)*pd[i];}
275: break;
276: case 24:
277: VALIDATE(ppz); pz = *ppz;
278: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i];}
279: break;
280: case 25:
281: VALIDATE(ppz); pz = *ppz;
282: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i] +(cc)*pc[i];}
283: break;
284: case 26:
285: VALIDATE(ppz); pz = *ppz;
286: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cc)*pc[i];}
287: break;
288: case 27:
289: VALIDATE(ppz); pz = *ppz;
290: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cc)*pc[i];}
291: break;
292: case 28:
293: VALIDATE(ppz); pz = *ppz;
294: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i] +(cb)*pb[i];}
295: break;
296: case 29:
297: VALIDATE(ppz); pz = *ppz;
298: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(cb)*pb[i];}
299: break;
300: case 30:
301: VALIDATE(ppz); pz = *ppz;
302: for (i = 0; i < ad_grad_size; i++) { pz[i] = +(ca)*pa[i];}
303: break;
304: case 31:
305: INVALIDATE(ppz);
306: }}
308: void ad_grad_daxpy_n(int n, double** ppz, ...)
309: {
310: static double alphas[100];
311: static double* grads[100];
312: int i, j, count = 0;
313: double* z;
314: va_list parg;
316: va_start(parg, ppz);
317: for (i = 0; i < n; i++) {
318: alphas[count] = va_arg(parg, double);
319: grads[count] = va_arg(parg, double*);
320: if (!IS_ZERO(grads[count])) {
321: count++;
322: }
323: }
324: va_end(parg);
326: switch (count) {
327: case 0:
328: INVALIDATE(ppz);
329: break;
331: case 1:
332: DAXPY1(ppz,alphas[0],grads[0]);
333: break;
334:
335: case 2:
336: DAXPY2(ppz,alphas[0],grads[0],alphas[1],grads[1]);
337: break;
338:
339: case 3:
340: VALIDATE(ppz);
341: DAXPY3(ppz,alphas[0],grads[0],alphas[1],grads[1],alphas[2],grads[2]);
342: break;
343:
344: case 4:
345: VALIDATE(ppz);
346: z = *ppz;
347: for (i = 0; i < ad_grad_size; i++) {
348: z[i] = alphas[0]*grads[0][i] + alphas[1]*grads[1][i] +
349: alphas[2]*grads[2][i] + alphas[3]*grads[3][i];
350: }
351: break;
352:
353: case 5:
354: VALIDATE(ppz);
355: z = *ppz;
356: for (i = 0; i < ad_grad_size; i++) {
357: z[i] = alphas[0]*grads[0][i] + alphas[1]*grads[1][i] +
358: alphas[2]*grads[2][i] + alphas[3]*grads[3][i] +
359: alphas[4]*grads[4][i];
360: }
361: break;
362:
363: default:
364: z = *ppz;
365: for (i = 0; i < ad_grad_size; i++) {
366: z[i] = alphas[0]*grads[0][i] + alphas[1]*grads[1][i] +
367: alphas[2]*grads[2][i] + alphas[3]*grads[3][i] +
368: alphas[4]*grads[4][i];
369: }
370: for (j = 5; j < count; j++) {
371: double a = alphas[j], *grad = grads[j];
372: for (i = 0; i < ad_grad_size; i++) {
373: z[i] += a*grad[i];
374: }
375: }
376: break;
377: }
378:
379: }