Actual source code: spbas_cholesky.h

petsc-3.6.1 2015-08-06
Report Typos and Errors
  2: /*
  3:    spbas_cholesky_row_alloc:
  4:       in the data arrays, find a place where another row may be stored.
  5:       Return PETSC_ERR_MEM if there is insufficient space to store the
  6:       row, so garbage collection and/or re-allocation may be done.
  7: */
 10: PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
 11: {
 13:   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
 14:   retval.values[k] = &retval.alloc_val[*n_alloc_used];
 15:   *n_alloc_used   += r_nnz;
 16:   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
 17:   return(0);
 18: }


 21: /*
 22:   spbas_cholesky_garbage_collect:
 23:      move the rows which have been calculated so far, as well as
 24:      those currently under construction, to the front of the
 25:      array, while putting them in the proper order.
 26:      When it seems necessary, enlarge the current arrays.

 28:      NB: re-allocation is being simulated using
 29:          PetscMalloc, memcpy, PetscFree, because
 30:          PetscRealloc does not seem to exist.

 32: */
 35: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
 36:                                                                                     Only the storage, not the contents of this matrix is changed in this function */
 37:                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
 38:                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
 39:                                                                                     places in the arrays: they need not be moved any more */
 40:                                               PetscInt     *n_alloc_used,   /* I/O:  */
 41:                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
 42: {
 43: /* PSEUDO-CODE:
 44:   1. Choose the appropriate size for the arrays
 45:   2. Rescue the arrays which would be lost during garbage collection
 46:   3. Reallocate and correct administration
 47:   4. Move all arrays so that they are in proper order */

 49:   PetscInt        i,j;
 50:   PetscInt        nrows         = result->nrows;
 51:   PetscInt        n_alloc_ok    =0;
 52:   PetscInt        n_alloc_ok_max=0;
 53:   PetscErrorCode  ierr;
 54:   PetscInt        need_already  = 0;
 55:   PetscInt        n_rows_ahead  =0;
 56:   PetscInt        max_need_extra= 0;
 57:   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
 58:   PetscInt        n_alloc_now     = result->n_alloc_icol;
 59:   PetscInt        *alloc_icol_old = result->alloc_icol;
 60:   PetscScalar     *alloc_val_old  = result->alloc_val;
 61:   PetscInt        *icol_rescue;
 62:   PetscScalar     *val_rescue;
 63:   PetscInt        n_rescue;
 64:   PetscInt        n_row_rescue;
 65:   PetscInt        i_here, i_last, n_copy;
 66:   const PetscReal xtra_perc = 20;

 69:   /*********************************************************
 70:   1. Choose appropriate array size
 71:   Count number of rows and memory usage which is already final */
 72:   for (i=0; i<i_row; i++)  {
 73:     n_alloc_ok     += result->row_nnz[i];
 74:     n_alloc_ok_max += max_row_nnz[i];
 75:   }

 77:   /* Count currently needed memory usage and future memory requirements
 78:     (max, predicted)*/
 79:   for (i=i_row; i<nrows; i++) {
 80:     if (!result->row_nnz[i]) {
 81:       max_need_extra += max_row_nnz[i];
 82:     } else {
 83:       need_already += max_row_nnz[i];
 84:       n_rows_ahead++;
 85:     }
 86:   }

 88:   /* Make maximal and realistic memory requirement estimates */
 89:   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
 90:   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));

 92:   /* Choose array sizes */
 93:   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
 94:   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
 95:   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
 96:   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));

 98:   /* If new estimate is less than what we already have,
 99:     don't reallocate, just garbage-collect */
100:   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
101:     n_alloc = result->n_alloc_icol;
102:   }

104:   /* Motivate dimension choice */
105:   PetscInfo1(NULL,"   Allocating %d nonzeros: ",n_alloc);
106:   if (n_alloc_max == n_alloc_est) {
107:     PetscInfo(NULL,"this is the correct size\n");
108:   } else if (n_alloc_now >= n_alloc_est) {
109:     PetscInfo(NULL,"the current size, which seems enough\n");
110:   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
111:     PetscInfo(NULL,"the maximum estimate\n");
112:   } else {
113:     PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);
114:   }


117:   /**********************************************************
118:   2. Rescue arrays which would be lost
119:   Count how many rows and nonzeros will have to be rescued
120:   when moving all arrays in place */
121:   n_row_rescue = 0; n_rescue = 0;
122:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
123:   else {
124:     i = *n_row_alloc_ok - 1;

126:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
127:   }

129:   for (i=*n_row_alloc_ok; i<nrows; i++) {
130:     i_here = result->icols[i]-result->alloc_icol;
131:     i_last = i_here + result->row_nnz[i];
132:     if (result->row_nnz[i]>0) {
133:       if (*n_alloc_used > i_here || i_last > n_alloc) {
134:         n_rescue += result->row_nnz[i];
135:         n_row_rescue++;
136:       }

138:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
139:       else         *n_alloc_used += max_row_nnz[i];
140:     }
141:   }

143:   /* Allocate rescue arrays */
144:   PetscMalloc1(n_rescue, &icol_rescue);
145:   PetscMalloc1(n_rescue, &val_rescue);

147:   /* Rescue the arrays which need rescuing */
148:   n_row_rescue = 0; n_rescue = 0;
149:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
150:   else {
151:     i             = *n_row_alloc_ok - 1;
152:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
153:   }

155:   for (i=*n_row_alloc_ok; i<nrows; i++) {
156:     i_here = result->icols[i]-result->alloc_icol;
157:     i_last = i_here + result->row_nnz[i];
158:     if (result->row_nnz[i]>0) {
159:       if (*n_alloc_used > i_here || i_last > n_alloc) {
160:         PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));
161:         PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));

163:         n_rescue += result->row_nnz[i];
164:         n_row_rescue++;
165:       }

167:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
168:       else         *n_alloc_used += max_row_nnz[i];
169:     }
170:   }

172:   /**********************************************************
173:   3. Reallocate and correct administration */

175:   if (n_alloc != result->n_alloc_icol) {
176:     n_copy = PetscMin(n_alloc,result->n_alloc_icol);

178:     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.

180:         Allocate new icol-data, copy old contents */
181:     PetscMalloc1(n_alloc, &result->alloc_icol);
182:     PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));

184:     /* Update administration, Reset pointers to new arrays  */
185:     result->n_alloc_icol = n_alloc;
186:     for (i=0; i<nrows; i++) {
187:       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
188:       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
189:     }

191:     /* Delete old array */
192:     PetscFree(alloc_icol_old);

194:     /* Allocate new value-data, copy old contents */
195:     PetscMalloc1(n_alloc, &result->alloc_val);
196:     PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));

198:     /* Update administration, Reset pointers to new arrays  */
199:     result->n_alloc_val = n_alloc;
200:     for (i=0; i<nrows; i++) {
201:       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
202:     }

204:     /* Delete old array */
205:     PetscFree(alloc_val_old);
206:   }


209:   /*********************************************************
210:   4. Copy all the arrays to their proper places */
211:   n_row_rescue = 0; n_rescue = 0;
212:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
213:   else {
214:     i = *n_row_alloc_ok - 1;

216:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
217:   }

219:   for (i=*n_row_alloc_ok; i<nrows; i++) {
220:     i_here = result->icols[i]-result->alloc_icol;
221:     i_last = i_here + result->row_nnz[i];

223:     result->icols[i] = result->alloc_icol + *n_alloc_used;
224:     result->values[i]= result->alloc_val  + *n_alloc_used;

226:     if (result->row_nnz[i]>0) {
227:       if (*n_alloc_used > i_here || i_last > n_alloc) {
228:         PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));
229:         PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));

231:         n_rescue += result->row_nnz[i];
232:         n_row_rescue++;
233:       } else {
234:         for (j=0; j<result->row_nnz[i]; j++) {
235:           result->icols[i][j]  = result->alloc_icol[i_here+j];
236:           result->values[i][j] = result->alloc_val[i_here+j];
237:         }
238:       }
239:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
240:       else         *n_alloc_used += max_row_nnz[i];
241:     }
242:   }

244:   /* Delete the rescue arrays */
245:   PetscFree(icol_rescue);
246:   PetscFree(val_rescue);

248:   *n_row_alloc_ok = i_row;
249:   return(0);
250: }

252: /*
253:   spbas_incomplete_cholesky:
254:      incomplete Cholesky decomposition of a square, symmetric,
255:      positive definite matrix.

257:      In case negative diagonals are encountered, function returns
258:      NEGATIVE_DIAGONAL. When this happens, call this function again
259:      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
260:      droptol
261: */
264: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
265: {
266:   PetscInt        jL;
267:   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
268:   PetscInt        *ai=a->i,*aj=a->j;
269:   MatScalar       *aa=a->a;
270:   PetscInt        nrows, ncols;
271:   PetscInt        *max_row_nnz;
272:   PetscErrorCode  ierr;
273:   spbas_matrix    retval;
274:   PetscScalar     *diag;
275:   PetscScalar     *val;
276:   PetscScalar     *lvec;
277:   PetscScalar     epsdiag;
278:   PetscInt        i,j,k;
279:   const PetscBool do_values = PETSC_TRUE;
280:   PetscInt        *r1_icol;
281:   PetscScalar     *r1_val;
282:   PetscInt        *r_icol;
283:   PetscInt        r_nnz;
284:   PetscScalar     *r_val;
285:   PetscInt        *A_icol;
286:   PetscInt        A_nnz;
287:   PetscScalar     *A_val;
288:   PetscInt        *p_icol;
289:   PetscInt        p_nnz;
290:   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
291:   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */

294:   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
295:   MatGetSize(A, &nrows, &ncols);
296:   MatGetTrace(A, &epsdiag);

298:   epsdiag *= epsdiag_in / nrows;

300:   PetscInfo2(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);

302:   if (droptol<1e-10) droptol=1e-10;

304:   if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");

306:   retval.nrows        = nrows;
307:   retval.ncols        = nrows;
308:   retval.nnz          = pattern.nnz/10;
309:   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
310:   retval.block_data   = PETSC_TRUE;

312:   spbas_allocate_pattern(&retval, do_values);
313:   PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));
314:   spbas_allocate_data(&retval);
315:   retval.nnz = 0;

317:   PetscMalloc1(nrows, &diag);
318:   PetscMalloc1(nrows, &val);
319:   PetscMalloc1(nrows, &lvec);
320:   PetscMalloc1(nrows, &max_row_nnz);
321:   if (!(diag && val && lvec && max_row_nnz)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n");

323:   PetscMemzero((void*) val, nrows*sizeof(PetscScalar));
324:   PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));
325:   PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));

327:   /* Check correct format of sparseness pattern */
328:   if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");

330:   /* Count the nonzeros on transpose of pattern */
331:   for (i = 0; i<nrows; i++)  {
332:     p_nnz  = pattern.row_nnz[i];
333:     p_icol = pattern.icols[i];
334:     for (j=0; j<p_nnz; j++)  {
335:       max_row_nnz[i+p_icol[j]]++;
336:     }
337:   }

339:   /* Calculate rows of L */
340:   for (i = 0; i<nrows; i++)  {
341:     p_nnz  = pattern.row_nnz[i];
342:     p_icol = pattern.icols[i];

344:     r_nnz  = retval.row_nnz[i];
345:     r_icol = retval.icols[i];
346:     r_val  = retval.values[i];
347:     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
348:     A_icol = &aj[ai[rip[i]]];
349:     A_val  = &aa[ai[rip[i]]];

351:     /* Calculate  val += A(i,i:n)'; */
352:     for (j=0; j<A_nnz; j++) {
353:       k = riip[A_icol[j]];
354:       if (k>=i) val[k] = A_val[j];
355:     }

357:     /*  Add regularization */
358:     val[i] += epsdiag;

360:     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
361:         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
362:     for (j=0; j<r_nnz; j++)  {
363:       k       = r_icol[j];
364:       lvec[k] = diag[k] * r_val[j];
365:       val[i] -= r_val[j] * lvec[k];
366:     }

368:     /* Calculate the new diagonal */
369:     diag[i] = val[i];
370:     if (PetscRealPart(diag[i])<droptol) {
371:       PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");
372:       PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);

374:       /* Delete the whole matrix at once. */
375:       spbas_delete(retval);
376:       return NEGATIVE_DIAGONAL;
377:     }

379:     /* If necessary, allocate arrays */
380:     if (r_nnz==0) {
381:       spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
382:       if (ierr == PETSC_ERR_MEM) {
383:         spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
384:         spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
385:       } else
386:       r_icol = retval.icols[i];
387:       r_val  = retval.values[i];
388:     }

390:     /* Now, fill in */
391:     r_icol[r_nnz] = i;
392:     r_val [r_nnz] = 1.0;
393:     r_nnz++;
394:     retval.row_nnz[i]++;

396:     retval.nnz += r_nnz;

398:     /* Calculate
399:         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
400:     for (j=1; j<p_nnz; j++)  {
401:       k       = i+p_icol[j];
402:       r1_icol = retval.icols[k];
403:       r1_val  = retval.values[k];
404:       for (jL=0; jL<retval.row_nnz[k]; jL++) {
405:         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
406:       }
407:       val[k] /= diag[i];

409:       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
410:         /* If necessary, allocate arrays */
411:         if (retval.row_nnz[k]==0) {
412:           spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
413:           if (ierr == PETSC_ERR_MEM) {
414:             spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
415:             spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
416:             r_icol = retval.icols[i];
417:             r_val  = retval.values[i];
418:           } else
419:         }

421:         retval.icols[k][retval.row_nnz[k]]  = i;
422:         retval.values[k][retval.row_nnz[k]] = val[k];
423:         retval.row_nnz[k]++;
424:       }
425:       val[k] = 0;
426:     }

428:     /* Erase the values used in the work arrays */
429:     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
430:   }

432:   ierr=PetscFree(lvec);
433:   ierr=PetscFree(val);

435:   spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
436:   PetscFree(max_row_nnz);

438:   /* Place the inverse of the diagonals in the matrix */
439:   for (i=0; i<nrows; i++) {
440:     r_nnz = retval.row_nnz[i];

442:     retval.values[i][r_nnz-1] = 1.0 / diag[i];
443:     for (j=0; j<r_nnz-1; j++) {
444:       retval.values[i][j] *= -1;
445:     }
446:   }
447:   PetscFree(diag);
448:   *matrix_L = retval;
449:   return(0);
450: }