Actual source code: spbas_cholesky.h


  2: /*
  3:    spbas_cholesky_row_alloc:
  4:       in the data arrays, find a place where another row may be stored.
  5:       Return PETSC_ERR_MEM if there is insufficient space to store the
  6:       row, so garbage collection and/or re-allocation may be done.
  7: */
  8: PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
  9: {
 10:   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
 11:   retval.values[k] = &retval.alloc_val[*n_alloc_used];
 12:   *n_alloc_used   += r_nnz;
 13:   if (*n_alloc_used > retval.n_alloc_icol) return PETSC_FALSE;
 14:   return PETSC_TRUE;
 15: }

 17: /*
 18:   spbas_cholesky_garbage_collect:
 19:      move the rows which have been calculated so far, as well as
 20:      those currently under construction, to the front of the
 21:      array, while putting them in the proper order.
 22:      When it seems necessary, enlarge the current arrays.

 24:      NB: re-allocation is being simulated using
 25:          PetscMalloc, memcpy, PetscFree, because
 26:          PetscRealloc does not seem to exist.

 28: */
 29: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
 30:                                                                                     Only the storage, not the contents of this matrix is changed in this function */
 31:                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
 32:                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
 33:                                                                                     places in the arrays: they need not be moved any more */
 34:                                               PetscInt     *n_alloc_used,   /* I/O:  */
 35:                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
 36: {
 37: /* PSEUDO-CODE:
 38:   1. Choose the appropriate size for the arrays
 39:   2. Rescue the arrays which would be lost during garbage collection
 40:   3. Reallocate and correct administration
 41:   4. Move all arrays so that they are in proper order */

 43:   PetscInt        i,j;
 44:   PetscInt        nrows         = result->nrows;
 45:   PetscInt        n_alloc_ok    =0;
 46:   PetscInt        n_alloc_ok_max=0;
 47:   PetscInt        need_already  = 0;
 48:   PetscInt        n_rows_ahead  =0;
 49:   PetscInt        max_need_extra= 0;
 50:   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
 51:   PetscInt        n_alloc_now     = result->n_alloc_icol;
 52:   PetscInt        *alloc_icol_old = result->alloc_icol;
 53:   PetscScalar     *alloc_val_old  = result->alloc_val;
 54:   PetscInt        *icol_rescue;
 55:   PetscScalar     *val_rescue;
 56:   PetscInt        n_rescue;
 57:   PetscInt        n_row_rescue;
 58:   PetscInt        i_here, i_last, n_copy;
 59:   const PetscReal xtra_perc = 20;

 61:   /*********************************************************
 62:   1. Choose appropriate array size
 63:   Count number of rows and memory usage which is already final */
 64:   for (i=0; i<i_row; i++)  {
 65:     n_alloc_ok     += result->row_nnz[i];
 66:     n_alloc_ok_max += max_row_nnz[i];
 67:   }

 69:   /* Count currently needed memory usage and future memory requirements
 70:     (max, predicted)*/
 71:   for (i=i_row; i<nrows; i++) {
 72:     if (!result->row_nnz[i]) {
 73:       max_need_extra += max_row_nnz[i];
 74:     } else {
 75:       need_already += max_row_nnz[i];
 76:       n_rows_ahead++;
 77:     }
 78:   }

 80:   /* Make maximal and realistic memory requirement estimates */
 81:   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
 82:   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));

 84:   /* Choose array sizes */
 85:   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
 86:   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
 87:   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
 88:   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));

 90:   /* If new estimate is less than what we already have,
 91:     don't reallocate, just garbage-collect */
 92:   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
 93:     n_alloc = result->n_alloc_icol;
 94:   }

 96:   /* Motivate dimension choice */
 97:   PetscInfo(NULL,"   Allocating %" PetscInt_FMT " nonzeros: ",n_alloc);
 98:   if (n_alloc_max == n_alloc_est) {
 99:     PetscInfo(NULL,"this is the correct size\n");
100:   } else if (n_alloc_now >= n_alloc_est) {
101:     PetscInfo(NULL,"the current size, which seems enough\n");
102:   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
103:     PetscInfo(NULL,"the maximum estimate\n");
104:   } else {
105:     PetscInfo(NULL,"%6.2f %% more than the estimate\n",(double)xtra_perc);
106:   }

108:   /**********************************************************
109:   2. Rescue arrays which would be lost
110:   Count how many rows and nonzeros will have to be rescued
111:   when moving all arrays in place */
112:   n_row_rescue = 0; n_rescue = 0;
113:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
114:   else {
115:     i = *n_row_alloc_ok - 1;

117:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
118:   }

120:   for (i=*n_row_alloc_ok; i<nrows; i++) {
121:     i_here = result->icols[i]-result->alloc_icol;
122:     i_last = i_here + result->row_nnz[i];
123:     if (result->row_nnz[i]>0) {
124:       if (*n_alloc_used > i_here || i_last > n_alloc) {
125:         n_rescue += result->row_nnz[i];
126:         n_row_rescue++;
127:       }

129:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
130:       else         *n_alloc_used += max_row_nnz[i];
131:     }
132:   }

134:   /* Allocate rescue arrays */
135:   PetscMalloc1(n_rescue, &icol_rescue);
136:   PetscMalloc1(n_rescue, &val_rescue);

138:   /* Rescue the arrays which need rescuing */
139:   n_row_rescue = 0; n_rescue = 0;
140:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
141:   else {
142:     i             = *n_row_alloc_ok - 1;
143:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
144:   }

146:   for (i=*n_row_alloc_ok; i<nrows; i++) {
147:     i_here = result->icols[i]-result->alloc_icol;
148:     i_last = i_here + result->row_nnz[i];
149:     if (result->row_nnz[i]>0) {
150:       if (*n_alloc_used > i_here || i_last > n_alloc) {
151:         PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);
152:         PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);
153:         n_rescue += result->row_nnz[i];
154:         n_row_rescue++;
155:       }

157:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
158:       else         *n_alloc_used += max_row_nnz[i];
159:     }
160:   }

162:   /**********************************************************
163:   3. Reallocate and correct administration */

165:   if (n_alloc != result->n_alloc_icol) {
166:     n_copy = PetscMin(n_alloc,result->n_alloc_icol);

168:     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.

170:         Allocate new icol-data, copy old contents */
171:     PetscMalloc1(n_alloc, &result->alloc_icol);
172:     PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);

174:     /* Update administration, Reset pointers to new arrays  */
175:     result->n_alloc_icol = n_alloc;
176:     for (i=0; i<nrows; i++) {
177:       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
178:       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
179:     }

181:     /* Delete old array */
182:     PetscFree(alloc_icol_old);

184:     /* Allocate new value-data, copy old contents */
185:     PetscMalloc1(n_alloc, &result->alloc_val);
186:     PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);

188:     /* Update administration, Reset pointers to new arrays  */
189:     result->n_alloc_val = n_alloc;
190:     for (i=0; i<nrows; i++) {
191:       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
192:     }

194:     /* Delete old array */
195:     PetscFree(alloc_val_old);
196:   }

198:   /*********************************************************
199:   4. Copy all the arrays to their proper places */
200:   n_row_rescue = 0; n_rescue = 0;
201:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
202:   else {
203:     i = *n_row_alloc_ok - 1;

205:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
206:   }

208:   for (i=*n_row_alloc_ok; i<nrows; i++) {
209:     i_here = result->icols[i]-result->alloc_icol;
210:     i_last = i_here + result->row_nnz[i];

212:     result->icols[i] = result->alloc_icol + *n_alloc_used;
213:     result->values[i]= result->alloc_val  + *n_alloc_used;

215:     if (result->row_nnz[i]>0) {
216:       if (*n_alloc_used > i_here || i_last > n_alloc) {
217:         PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);
218:         PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);

220:         n_rescue += result->row_nnz[i];
221:         n_row_rescue++;
222:       } else {
223:         for (j=0; j<result->row_nnz[i]; j++) {
224:           result->icols[i][j]  = result->alloc_icol[i_here+j];
225:           result->values[i][j] = result->alloc_val[i_here+j];
226:         }
227:       }
228:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
229:       else         *n_alloc_used += max_row_nnz[i];
230:     }
231:   }

233:   /* Delete the rescue arrays */
234:   PetscFree(icol_rescue);
235:   PetscFree(val_rescue);

237:   *n_row_alloc_ok = i_row;
238:   return 0;
239: }

241: /*
242:   spbas_incomplete_cholesky:
243:      incomplete Cholesky decomposition of a square, symmetric,
244:      positive definite matrix.

246:      In case negative diagonals are encountered, function returns
247:      NEGATIVE_DIAGONAL. When this happens, call this function again
248:      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
249:      droptol
250: */
251: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L,PetscBool *success)
252: {
253:   PetscInt        jL;
254:   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
255:   PetscInt        *ai=a->i,*aj=a->j;
256:   MatScalar       *aa=a->a;
257:   PetscInt        nrows, ncols;
258:   PetscInt        *max_row_nnz;
259:   spbas_matrix    retval;
260:   PetscScalar     *diag;
261:   PetscScalar     *val;
262:   PetscScalar     *lvec;
263:   PetscScalar     epsdiag;
264:   PetscInt        i,j,k;
265:   const PetscBool do_values = PETSC_TRUE;
266:   PetscInt        *r1_icol;
267:   PetscScalar     *r1_val;
268:   PetscInt        *r_icol;
269:   PetscInt        r_nnz;
270:   PetscScalar     *r_val;
271:   PetscInt        *A_icol;
272:   PetscInt        A_nnz;
273:   PetscScalar     *A_val;
274:   PetscInt        *p_icol;
275:   PetscInt        p_nnz;
276:   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
277:   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */

279:   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
280:   MatGetSize(A, &nrows, &ncols);
281:   MatGetTrace(A, &epsdiag);

283:   epsdiag *= epsdiag_in / nrows;

285:   PetscInfo(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);

287:   if (droptol<1e-10) droptol=1e-10;

289:   retval.nrows        = nrows;
290:   retval.ncols        = nrows;
291:   retval.nnz          = pattern.nnz/10;
292:   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
293:   retval.block_data   = PETSC_TRUE;

295:   spbas_allocate_pattern(&retval, do_values);
296:   PetscArrayzero(retval.row_nnz, nrows);
297:   spbas_allocate_data(&retval);
298:   retval.nnz = 0;

300:   PetscMalloc1(nrows, &diag);
301:   PetscCalloc1(nrows, &val);
302:   PetscCalloc1(nrows, &lvec);
303:   PetscCalloc1(nrows, &max_row_nnz);

305:   /* Count the nonzeros on transpose of pattern */
306:   for (i = 0; i<nrows; i++)  {
307:     p_nnz  = pattern.row_nnz[i];
308:     p_icol = pattern.icols[i];
309:     for (j=0; j<p_nnz; j++)  {
310:       max_row_nnz[i+p_icol[j]]++;
311:     }
312:   }

314:   /* Calculate rows of L */
315:   for (i = 0; i<nrows; i++)  {
316:     p_nnz  = pattern.row_nnz[i];
317:     p_icol = pattern.icols[i];

319:     r_nnz  = retval.row_nnz[i];
320:     r_icol = retval.icols[i];
321:     r_val  = retval.values[i];
322:     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
323:     A_icol = &aj[ai[rip[i]]];
324:     A_val  = &aa[ai[rip[i]]];

326:     /* Calculate  val += A(i,i:n)'; */
327:     for (j=0; j<A_nnz; j++) {
328:       k = riip[A_icol[j]];
329:       if (k>=i) val[k] = A_val[j];
330:     }

332:     /*  Add regularization */
333:     val[i] += epsdiag;

335:     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
336:         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
337:     for (j=0; j<r_nnz; j++)  {
338:       k       = r_icol[j];
339:       lvec[k] = diag[k] * r_val[j];
340:       val[i] -= r_val[j] * lvec[k];
341:     }

343:     /* Calculate the new diagonal */
344:     diag[i] = val[i];
345:     if (PetscRealPart(diag[i])<droptol) {
346:       PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");
347:       PetscInfo(NULL,"Negative diagonal in row %" PetscInt_FMT "\n",i+1);

349:       /* Delete the whole matrix at once. */
350:       spbas_delete(retval);
351:       *success = PETSC_FALSE;
352:       return 0;
353:     }

355:     /* If necessary, allocate arrays */
356:     if (r_nnz==0) {
357:       PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
359:       r_icol = retval.icols[i];
360:       r_val  = retval.values[i];
361:     }

363:     /* Now, fill in */
364:     r_icol[r_nnz] = i;
365:     r_val [r_nnz] = 1.0;
366:     r_nnz++;
367:     retval.row_nnz[i]++;

369:     retval.nnz += r_nnz;

371:     /* Calculate
372:         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
373:     for (j=1; j<p_nnz; j++)  {
374:       k       = i+p_icol[j];
375:       r1_icol = retval.icols[k];
376:       r1_val  = retval.values[k];
377:       for (jL=0; jL<retval.row_nnz[k]; jL++) {
378:         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
379:       }
380:       val[k] /= diag[i];

382:       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
383:         /* If necessary, allocate arrays */
384:         if (!retval.row_nnz[k]) {
385:           PetscBool flag,success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
386:           if (!success) {
387:             spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
388:             flag   = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
390:             r_icol = retval.icols[i];
391:           }
392:         }

394:         retval.icols[k][retval.row_nnz[k]]  = i;
395:         retval.values[k][retval.row_nnz[k]] = val[k];
396:         retval.row_nnz[k]++;
397:       }
398:       val[k] = 0;
399:     }

401:     /* Erase the values used in the work arrays */
402:     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
403:   }

405:   PetscFree(lvec);
406:   PetscFree(val);

408:   spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
409:   PetscFree(max_row_nnz);

411:   /* Place the inverse of the diagonals in the matrix */
412:   for (i=0; i<nrows; i++) {
413:     r_nnz = retval.row_nnz[i];

415:     retval.values[i][r_nnz-1] = 1.0 / diag[i];
416:     for (j=0; j<r_nnz-1; j++) {
417:       retval.values[i][j] *= -1;
418:     }
419:   }
420:   PetscFree(diag);
421:   *matrix_L = retval;
422:   *success  = PETSC_TRUE;
423:   return 0;
424: }