Actual source code: ad_grad_daxpy.c

petsc-3.3-p7 2013-05-11
  1: /*
  2:   THIS PROGRAM DISCLOSES MATERIAL PROTECTABLE UNDER COPYRIGHT
  3:   LAWS OF THE UNITED STATES.  FOR LICENSING INFORMATION CONTACT:

  5:   Paul Hovland and Boyana Norris, Mathematics and Computer Science Division,
  6:   Argonne National Laboratory, 9700 S. Cass Avenue, Argonne IL 60439,
  7:   {hovland,norris}@mcs.anl.gov.
  8: */

 10: #include <string.h>
 11: #include <stdarg.h>
 12: #include <ad_grad.h>
 13: #include <ad_grad_daxpy.h>
 14: void ad_grad_daxpy_init(void) {
 15:     ad_adic_deriv_init( ad_grad_size*sizeof(double), 0 );
 16: }
 17: void ad_grad_daxpy_final(void) {
 18:     ad_adic_deriv_final();
 19: }


 22: void ad_grad_daxpy_0(double** ppz)
 23: {
 24:     INVALIDATE(ppz);
 25: }


 28: void ad_grad_daxpy_copy(double** ppz, double* pa)
 29: {
 30:     if (IS_ZERO(pa)) {
 31:         INVALIDATE(ppz);
 32:     }
 33:     else {
 34:         VALIDATE(ppz);
 35:         memcpy(*ppz, pa, sizeof(double)*ad_grad_size);
 36:     }
 37: }


 40: void ad_grad_daxpy_1(double** ppz, double a, double* pa)
 41: {
 42:     if (IS_ZERO(pa)) {
 43:         INVALIDATE(ppz);
 44:     }
 45:     else {
 46:         DAXPY1(ppz,a,pa);
 47:     }
 48: }


 51: void ad_grad_daxpy_2(double** ppz, double a, double* pa, 
 52:                      double b, double* pb)
 53: {
 54:     if (IS_ZERO(pa)) {
 55:         if (IS_ZERO(pb)) {
 56:             INVALIDATE(ppz);
 57:         }
 58:         else {
 59:             DAXPY1(ppz,b,pb);
 60:         }
 61:     }
 62:     else if (IS_ZERO(pb)) {
 63:         DAXPY1(ppz,a,pa);
 64:     }
 65:     else {
 66:         DAXPY2(ppz,a,pa,b,pb);
 67:     }
 68: }

 70: void ad_grad_daxpy_3(double** ppz, double a, double* pa, 
 71:                      double b, double* pb, double c, double* pc)
 72: {
 73:     if (IS_ZERO(pa)) {
 74:         if (IS_ZERO(pb)) {
 75:             if (IS_ZERO(pc)) {
 76:                 INVALIDATE(ppz);
 77:             }
 78:             else {
 79:                 DAXPY1(ppz,c,pc);
 80:             }
 81:         }
 82:         else if (IS_ZERO(pc)) {
 83:             DAXPY1(ppz,b,pb);
 84:         }
 85:         else {
 86:             DAXPY2(ppz,b,pb,c,pc);
 87:         }
 88:     }
 89:     else if (IS_ZERO(pb)) {
 90:         if (IS_ZERO(pc)) {
 91:             DAXPY1(ppz,a,pa);
 92:         }
 93:         else {
 94:             DAXPY2(ppz,a,pa,c,pc);
 95:         }
 96:     }
 97:     else if (IS_ZERO(pc)) {
 98:         DAXPY2(ppz,a,pa,b,pb);
 99:     }
100:     else {
101:         DAXPY3(ppz,a,pa,b,pb,c,pc);
102:     }
103: }
104: void ad_grad_daxpy_4(double** ppz, double ca, double* pa, double cb, double* pb, double cc, double* pc, double cd, double* pd){ double *pz; int i, flag = 0;
105: SET_ZERO_FLAG(flag, pa, 0);
106: SET_ZERO_FLAG(flag, pb, 1);
107: SET_ZERO_FLAG(flag, pc, 2);
108: SET_ZERO_FLAG(flag, pd, 3);
109: switch (flag) {
110: case 0:
111: VALIDATE(ppz); pz = *ppz;
112:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i];}
113: break;
114: case 1:
115: VALIDATE(ppz); pz = *ppz;
116:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i];}
117: break;
118: case 2:
119: VALIDATE(ppz); pz = *ppz;
120:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cc)*pc[i] +(cd)*pd[i];}
121: break;
122: case 3:
123: VALIDATE(ppz); pz = *ppz;
124:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cc)*pc[i] +(cd)*pd[i];}
125: break;
126: case 4:
127: VALIDATE(ppz); pz = *ppz;
128:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i] +(cd)*pd[i];}
129: break;
130: case 5:
131: VALIDATE(ppz); pz = *ppz;
132:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i] +(cd)*pd[i];}
133: break;
134: case 6:
135: VALIDATE(ppz); pz = *ppz;
136:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cd)*pd[i];}
137: break;
138: case 7:
139: VALIDATE(ppz); pz = *ppz;
140:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cd)*pd[i];}
141: break;
142: case 8:
143: VALIDATE(ppz); pz = *ppz;
144:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i];}
145: break;
146: case 9:
147: VALIDATE(ppz); pz = *ppz;
148:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i] +(cc)*pc[i];}
149: break;
150: case 10:
151: VALIDATE(ppz); pz = *ppz;
152:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cc)*pc[i];}
153: break;
154: case 11:
155: VALIDATE(ppz); pz = *ppz;
156:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cc)*pc[i];}
157: break;
158: case 12:
159: VALIDATE(ppz); pz = *ppz;
160:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i];}
161: break;
162: case 13:
163: VALIDATE(ppz); pz = *ppz;
164:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i];}
165: break;
166: case 14:
167: VALIDATE(ppz); pz = *ppz;
168:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i];}
169: break;
170: case 15:
171: INVALIDATE(ppz);
172: }}
173: void ad_grad_daxpy_5(double** ppz, double ca, double* pa, double cb, double* pb, double cc, double* pc, double cd, double* pd, double ce, double* pe){ double *pz; int i, flag = 0;
174: SET_ZERO_FLAG(flag, pa, 0);
175: SET_ZERO_FLAG(flag, pb, 1);
176: SET_ZERO_FLAG(flag, pc, 2);
177: SET_ZERO_FLAG(flag, pd, 3);
178: SET_ZERO_FLAG(flag, pe, 4);
179: switch (flag) {
180: case 0:
181: VALIDATE(ppz); pz = *ppz;
182:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i] +(ce)*pe[i];}
183: break;
184: case 1:
185: VALIDATE(ppz); pz = *ppz;
186:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i] +(ce)*pe[i];}
187: break;
188: case 2:
189: VALIDATE(ppz); pz = *ppz;
190:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cc)*pc[i] +(cd)*pd[i] +(ce)*pe[i];}
191: break;
192: case 3:
193: VALIDATE(ppz); pz = *ppz;
194:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cc)*pc[i] +(cd)*pd[i] +(ce)*pe[i];}
195: break;
196: case 4:
197: VALIDATE(ppz); pz = *ppz;
198:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i] +(cd)*pd[i] +(ce)*pe[i];}
199: break;
200: case 5:
201: VALIDATE(ppz); pz = *ppz;
202:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i] +(cd)*pd[i] +(ce)*pe[i];}
203: break;
204: case 6:
205: VALIDATE(ppz); pz = *ppz;
206:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cd)*pd[i] +(ce)*pe[i];}
207: break;
208: case 7:
209: VALIDATE(ppz); pz = *ppz;
210:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cd)*pd[i] +(ce)*pe[i];}
211: break;
212: case 8:
213: VALIDATE(ppz); pz = *ppz;
214:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i] +(ce)*pe[i];}
215: break;
216: case 9:
217: VALIDATE(ppz); pz = *ppz;
218:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i] +(cc)*pc[i] +(ce)*pe[i];}
219: break;
220: case 10:
221: VALIDATE(ppz); pz = *ppz;
222:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cc)*pc[i] +(ce)*pe[i];}
223: break;
224: case 11:
225: VALIDATE(ppz); pz = *ppz;
226:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cc)*pc[i] +(ce)*pe[i];}
227: break;
228: case 12:
229: VALIDATE(ppz); pz = *ppz;
230:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i] +(ce)*pe[i];}
231: break;
232: case 13:
233: VALIDATE(ppz); pz = *ppz;
234:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i] +(ce)*pe[i];}
235: break;
236: case 14:
237: VALIDATE(ppz); pz = *ppz;
238:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(ce)*pe[i];}
239: break;
240: case 15:
241: VALIDATE(ppz); pz = *ppz;
242:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ce)*pe[i];}
243: break;
244: case 16:
245: VALIDATE(ppz); pz = *ppz;
246:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i];}
247: break;
248: case 17:
249: VALIDATE(ppz); pz = *ppz;
250:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i] +(cc)*pc[i] +(cd)*pd[i];}
251: break;
252: case 18:
253: VALIDATE(ppz); pz = *ppz;
254:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cc)*pc[i] +(cd)*pd[i];}
255: break;
256: case 19:
257: VALIDATE(ppz); pz = *ppz;
258:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cc)*pc[i] +(cd)*pd[i];}
259: break;
260: case 20:
261: VALIDATE(ppz); pz = *ppz;
262:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i] +(cd)*pd[i];}
263: break;
264: case 21:
265: VALIDATE(ppz); pz = *ppz;
266:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i] +(cd)*pd[i];}
267: break;
268: case 22:
269: VALIDATE(ppz); pz = *ppz;
270:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cd)*pd[i];}
271: break;
272: case 23:
273: VALIDATE(ppz); pz = *ppz;
274:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cd)*pd[i];}
275: break;
276: case 24:
277: VALIDATE(ppz); pz = *ppz;
278:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i] +(cc)*pc[i];}
279: break;
280: case 25:
281: VALIDATE(ppz); pz = *ppz;
282:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i] +(cc)*pc[i];}
283: break;
284: case 26:
285: VALIDATE(ppz); pz = *ppz;
286:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cc)*pc[i];}
287: break;
288: case 27:
289: VALIDATE(ppz); pz = *ppz;
290:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cc)*pc[i];}
291: break;
292: case 28:
293: VALIDATE(ppz); pz = *ppz;
294:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i] +(cb)*pb[i];}
295: break;
296: case 29:
297: VALIDATE(ppz); pz = *ppz;
298:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(cb)*pb[i];}
299: break;
300: case 30:
301: VALIDATE(ppz); pz = *ppz;
302:                         for (i = 0; i < ad_grad_size; i++) { pz[i] =  +(ca)*pa[i];}
303: break;
304: case 31:
305: INVALIDATE(ppz);
306: }}

308: void ad_grad_daxpy_n(int n, double** ppz, ...)
309: {
310:     static double   alphas[100];
311:     static double*  grads[100];
312:     int             i, j, count = 0;
313:     double*         z;
314:     va_list         parg;

316:     va_start(parg, ppz);
317:     for (i = 0; i < n; i++) {
318:          alphas[count] = va_arg(parg, double);
319:          grads[count] = va_arg(parg, double*);
320:          if (!IS_ZERO(grads[count])) {
321:              count++;
322:          }
323:     }
324:     va_end(parg);

326:     switch (count) {
327:       case 0:
328:           INVALIDATE(ppz);
329:           break;

331:       case 1:
332:           DAXPY1(ppz,alphas[0],grads[0]);
333:           break;
334: 
335:       case 2:
336:           DAXPY2(ppz,alphas[0],grads[0],alphas[1],grads[1]);
337:           break;
338: 
339:       case 3:
340:           VALIDATE(ppz);
341:           DAXPY3(ppz,alphas[0],grads[0],alphas[1],grads[1],alphas[2],grads[2]);
342:           break;
343: 
344:       case 4:
345:           VALIDATE(ppz);
346:           z = *ppz;
347:           for (i = 0; i < ad_grad_size; i++) {
348:               z[i] = alphas[0]*grads[0][i] + alphas[1]*grads[1][i] +
349:                      alphas[2]*grads[2][i] + alphas[3]*grads[3][i];
350:           }
351:           break;
352: 
353:       case 5:
354:           VALIDATE(ppz);
355:           z = *ppz;
356:           for (i = 0; i < ad_grad_size; i++) {
357:               z[i] = alphas[0]*grads[0][i] + alphas[1]*grads[1][i] +
358:                      alphas[2]*grads[2][i] + alphas[3]*grads[3][i] +
359:                      alphas[4]*grads[4][i];
360:           }
361:           break;
362: 
363:       default:
364:           z = *ppz;
365:           for (i = 0; i < ad_grad_size; i++) {
366:               z[i] = alphas[0]*grads[0][i] + alphas[1]*grads[1][i] +
367:                      alphas[2]*grads[2][i] + alphas[3]*grads[3][i] +
368:                      alphas[4]*grads[4][i];
369:           }
370:           for (j = 5; j < count; j++) {
371:               double    a = alphas[j], *grad = grads[j];
372:               for (i = 0; i < ad_grad_size; i++) {
373:                   z[i] += a*grad[i];
374:               }
375:           }
376:           break;
377:     }
378: 
379: }