Actual source code: spbas_cholesky.h

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2: /*
  3:    spbas_cholesky_row_alloc:
  4:       in the data arrays, find a place where another row may be stored.
  5:       Return PETSC_ERR_MEM if there is insufficient space to store the
  6:       row, so garbage collection and/or re-allocation may be done.
  7: */
  8: PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
  9: {
 11:   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
 12:   retval.values[k] = &retval.alloc_val[*n_alloc_used];
 13:   *n_alloc_used   += r_nnz;
 14:   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
 15:   return(0);
 16: }


 19: /*
 20:   spbas_cholesky_garbage_collect:
 21:      move the rows which have been calculated so far, as well as
 22:      those currently under construction, to the front of the
 23:      array, while putting them in the proper order.
 24:      When it seems necessary, enlarge the current arrays.

 26:      NB: re-allocation is being simulated using
 27:          PetscMalloc, memcpy, PetscFree, because
 28:          PetscRealloc does not seem to exist.

 30: */
 31: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
 32:                                                                                     Only the storage, not the contents of this matrix is changed in this function */
 33:                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
 34:                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
 35:                                                                                     places in the arrays: they need not be moved any more */
 36:                                               PetscInt     *n_alloc_used,   /* I/O:  */
 37:                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
 38: {
 39: /* PSEUDO-CODE:
 40:   1. Choose the appropriate size for the arrays
 41:   2. Rescue the arrays which would be lost during garbage collection
 42:   3. Reallocate and correct administration
 43:   4. Move all arrays so that they are in proper order */

 45:   PetscInt        i,j;
 46:   PetscInt        nrows         = result->nrows;
 47:   PetscInt        n_alloc_ok    =0;
 48:   PetscInt        n_alloc_ok_max=0;
 49:   PetscErrorCode  ierr;
 50:   PetscInt        need_already  = 0;
 51:   PetscInt        n_rows_ahead  =0;
 52:   PetscInt        max_need_extra= 0;
 53:   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
 54:   PetscInt        n_alloc_now     = result->n_alloc_icol;
 55:   PetscInt        *alloc_icol_old = result->alloc_icol;
 56:   PetscScalar     *alloc_val_old  = result->alloc_val;
 57:   PetscInt        *icol_rescue;
 58:   PetscScalar     *val_rescue;
 59:   PetscInt        n_rescue;
 60:   PetscInt        n_row_rescue;
 61:   PetscInt        i_here, i_last, n_copy;
 62:   const PetscReal xtra_perc = 20;

 65:   /*********************************************************
 66:   1. Choose appropriate array size
 67:   Count number of rows and memory usage which is already final */
 68:   for (i=0; i<i_row; i++)  {
 69:     n_alloc_ok     += result->row_nnz[i];
 70:     n_alloc_ok_max += max_row_nnz[i];
 71:   }

 73:   /* Count currently needed memory usage and future memory requirements
 74:     (max, predicted)*/
 75:   for (i=i_row; i<nrows; i++) {
 76:     if (!result->row_nnz[i]) {
 77:       max_need_extra += max_row_nnz[i];
 78:     } else {
 79:       need_already += max_row_nnz[i];
 80:       n_rows_ahead++;
 81:     }
 82:   }

 84:   /* Make maximal and realistic memory requirement estimates */
 85:   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
 86:   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));

 88:   /* Choose array sizes */
 89:   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
 90:   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
 91:   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
 92:   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));

 94:   /* If new estimate is less than what we already have,
 95:     don't reallocate, just garbage-collect */
 96:   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
 97:     n_alloc = result->n_alloc_icol;
 98:   }

100:   /* Motivate dimension choice */
101:   PetscInfo1(NULL,"   Allocating %d nonzeros: ",n_alloc);
102:   if (n_alloc_max == n_alloc_est) {
103:     PetscInfo(NULL,"this is the correct size\n");
104:   } else if (n_alloc_now >= n_alloc_est) {
105:     PetscInfo(NULL,"the current size, which seems enough\n");
106:   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
107:     PetscInfo(NULL,"the maximum estimate\n");
108:   } else {
109:     PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);
110:   }


113:   /**********************************************************
114:   2. Rescue arrays which would be lost
115:   Count how many rows and nonzeros will have to be rescued
116:   when moving all arrays in place */
117:   n_row_rescue = 0; n_rescue = 0;
118:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
119:   else {
120:     i = *n_row_alloc_ok - 1;

122:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
123:   }

125:   for (i=*n_row_alloc_ok; i<nrows; i++) {
126:     i_here = result->icols[i]-result->alloc_icol;
127:     i_last = i_here + result->row_nnz[i];
128:     if (result->row_nnz[i]>0) {
129:       if (*n_alloc_used > i_here || i_last > n_alloc) {
130:         n_rescue += result->row_nnz[i];
131:         n_row_rescue++;
132:       }

134:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
135:       else         *n_alloc_used += max_row_nnz[i];
136:     }
137:   }

139:   /* Allocate rescue arrays */
140:   PetscMalloc1(n_rescue, &icol_rescue);
141:   PetscMalloc1(n_rescue, &val_rescue);

143:   /* Rescue the arrays which need rescuing */
144:   n_row_rescue = 0; n_rescue = 0;
145:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
146:   else {
147:     i             = *n_row_alloc_ok - 1;
148:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
149:   }

151:   for (i=*n_row_alloc_ok; i<nrows; i++) {
152:     i_here = result->icols[i]-result->alloc_icol;
153:     i_last = i_here + result->row_nnz[i];
154:     if (result->row_nnz[i]>0) {
155:       if (*n_alloc_used > i_here || i_last > n_alloc) {
156:         PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(PetscInt));
157:         PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));

159:         n_rescue += result->row_nnz[i];
160:         n_row_rescue++;
161:       }

163:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
164:       else         *n_alloc_used += max_row_nnz[i];
165:     }
166:   }

168:   /**********************************************************
169:   3. Reallocate and correct administration */

171:   if (n_alloc != result->n_alloc_icol) {
172:     n_copy = PetscMin(n_alloc,result->n_alloc_icol);

174:     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.

176:         Allocate new icol-data, copy old contents */
177:     PetscMalloc1(n_alloc, &result->alloc_icol);
178:     PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(PetscInt));

180:     /* Update administration, Reset pointers to new arrays  */
181:     result->n_alloc_icol = n_alloc;
182:     for (i=0; i<nrows; i++) {
183:       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
184:       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
185:     }

187:     /* Delete old array */
188:     PetscFree(alloc_icol_old);

190:     /* Allocate new value-data, copy old contents */
191:     PetscMalloc1(n_alloc, &result->alloc_val);
192:     PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));

194:     /* Update administration, Reset pointers to new arrays  */
195:     result->n_alloc_val = n_alloc;
196:     for (i=0; i<nrows; i++) {
197:       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
198:     }

200:     /* Delete old array */
201:     PetscFree(alloc_val_old);
202:   }


205:   /*********************************************************
206:   4. Copy all the arrays to their proper places */
207:   n_row_rescue = 0; n_rescue = 0;
208:   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
209:   else {
210:     i = *n_row_alloc_ok - 1;

212:     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
213:   }

215:   for (i=*n_row_alloc_ok; i<nrows; i++) {
216:     i_here = result->icols[i]-result->alloc_icol;
217:     i_last = i_here + result->row_nnz[i];

219:     result->icols[i] = result->alloc_icol + *n_alloc_used;
220:     result->values[i]= result->alloc_val  + *n_alloc_used;

222:     if (result->row_nnz[i]>0) {
223:       if (*n_alloc_used > i_here || i_last > n_alloc) {
224:         PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(PetscInt));
225:         PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));

227:         n_rescue += result->row_nnz[i];
228:         n_row_rescue++;
229:       } else {
230:         for (j=0; j<result->row_nnz[i]; j++) {
231:           result->icols[i][j]  = result->alloc_icol[i_here+j];
232:           result->values[i][j] = result->alloc_val[i_here+j];
233:         }
234:       }
235:       if (i<i_row) *n_alloc_used += result->row_nnz[i];
236:       else         *n_alloc_used += max_row_nnz[i];
237:     }
238:   }

240:   /* Delete the rescue arrays */
241:   PetscFree(icol_rescue);
242:   PetscFree(val_rescue);

244:   *n_row_alloc_ok = i_row;
245:   return(0);
246: }

248: /*
249:   spbas_incomplete_cholesky:
250:      incomplete Cholesky decomposition of a square, symmetric,
251:      positive definite matrix.

253:      In case negative diagonals are encountered, function returns
254:      NEGATIVE_DIAGONAL. When this happens, call this function again
255:      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
256:      droptol
257: */
258: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
259: {
260:   PetscInt        jL;
261:   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
262:   PetscInt        *ai=a->i,*aj=a->j;
263:   MatScalar       *aa=a->a;
264:   PetscInt        nrows, ncols;
265:   PetscInt        *max_row_nnz;
266:   PetscErrorCode  ierr;
267:   spbas_matrix    retval;
268:   PetscScalar     *diag;
269:   PetscScalar     *val;
270:   PetscScalar     *lvec;
271:   PetscScalar     epsdiag;
272:   PetscInt        i,j,k;
273:   const PetscBool do_values = PETSC_TRUE;
274:   PetscInt        *r1_icol;
275:   PetscScalar     *r1_val;
276:   PetscInt        *r_icol;
277:   PetscInt        r_nnz;
278:   PetscScalar     *r_val;
279:   PetscInt        *A_icol;
280:   PetscInt        A_nnz;
281:   PetscScalar     *A_val;
282:   PetscInt        *p_icol;
283:   PetscInt        p_nnz;
284:   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
285:   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */

288:   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
289:   MatGetSize(A, &nrows, &ncols);
290:   MatGetTrace(A, &epsdiag);

292:   epsdiag *= epsdiag_in / nrows;

294:   PetscInfo2(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);

296:   if (droptol<1e-10) droptol=1e-10;

298:   if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");

300:   retval.nrows        = nrows;
301:   retval.ncols        = nrows;
302:   retval.nnz          = pattern.nnz/10;
303:   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
304:   retval.block_data   = PETSC_TRUE;

306:   spbas_allocate_pattern(&retval, do_values);
307:   PetscMemzero((void*) retval.row_nnz, nrows*sizeof(PetscInt));
308:   spbas_allocate_data(&retval);
309:   retval.nnz = 0;

311:   PetscMalloc1(nrows, &diag);
312:   PetscMalloc1(nrows, &val);
313:   PetscMalloc1(nrows, &lvec);
314:   PetscMalloc1(nrows, &max_row_nnz);

316:   PetscMemzero((void*) val, nrows*sizeof(PetscScalar));
317:   PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));
318:   PetscMemzero((void*) max_row_nnz, nrows*sizeof(PetscInt));

320:   /* Check correct format of sparseness pattern */
321:   if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");

323:   /* Count the nonzeros on transpose of pattern */
324:   for (i = 0; i<nrows; i++)  {
325:     p_nnz  = pattern.row_nnz[i];
326:     p_icol = pattern.icols[i];
327:     for (j=0; j<p_nnz; j++)  {
328:       max_row_nnz[i+p_icol[j]]++;
329:     }
330:   }

332:   /* Calculate rows of L */
333:   for (i = 0; i<nrows; i++)  {
334:     p_nnz  = pattern.row_nnz[i];
335:     p_icol = pattern.icols[i];

337:     r_nnz  = retval.row_nnz[i];
338:     r_icol = retval.icols[i];
339:     r_val  = retval.values[i];
340:     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
341:     A_icol = &aj[ai[rip[i]]];
342:     A_val  = &aa[ai[rip[i]]];

344:     /* Calculate  val += A(i,i:n)'; */
345:     for (j=0; j<A_nnz; j++) {
346:       k = riip[A_icol[j]];
347:       if (k>=i) val[k] = A_val[j];
348:     }

350:     /*  Add regularization */
351:     val[i] += epsdiag;

353:     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
354:         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
355:     for (j=0; j<r_nnz; j++)  {
356:       k       = r_icol[j];
357:       lvec[k] = diag[k] * r_val[j];
358:       val[i] -= r_val[j] * lvec[k];
359:     }

361:     /* Calculate the new diagonal */
362:     diag[i] = val[i];
363:     if (PetscRealPart(diag[i])<droptol) {
364:       PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");
365:       PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);

367:       /* Delete the whole matrix at once. */
368:       spbas_delete(retval);
369:       return NEGATIVE_DIAGONAL;
370:     }

372:     /* If necessary, allocate arrays */
373:     if (r_nnz==0) {
374:       spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
375:       if (ierr == PETSC_ERR_MEM) {
376:         spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
377:         spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
378:       } else
379:       r_icol = retval.icols[i];
380:       r_val  = retval.values[i];
381:     }

383:     /* Now, fill in */
384:     r_icol[r_nnz] = i;
385:     r_val [r_nnz] = 1.0;
386:     r_nnz++;
387:     retval.row_nnz[i]++;

389:     retval.nnz += r_nnz;

391:     /* Calculate
392:         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
393:     for (j=1; j<p_nnz; j++)  {
394:       k       = i+p_icol[j];
395:       r1_icol = retval.icols[k];
396:       r1_val  = retval.values[k];
397:       for (jL=0; jL<retval.row_nnz[k]; jL++) {
398:         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
399:       }
400:       val[k] /= diag[i];

402:       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
403:         /* If necessary, allocate arrays */
404:         if (retval.row_nnz[k]==0) {
405:           spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
406:           if (ierr == PETSC_ERR_MEM) {
407:             spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
408:             spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
409:             r_icol = retval.icols[i];
410:           } else
411:         }

413:         retval.icols[k][retval.row_nnz[k]]  = i;
414:         retval.values[k][retval.row_nnz[k]] = val[k];
415:         retval.row_nnz[k]++;
416:       }
417:       val[k] = 0;
418:     }

420:     /* Erase the values used in the work arrays */
421:     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
422:   }

424:   ierr=PetscFree(lvec);
425:   ierr=PetscFree(val);

427:   spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
428:   PetscFree(max_row_nnz);

430:   /* Place the inverse of the diagonals in the matrix */
431:   for (i=0; i<nrows; i++) {
432:     r_nnz = retval.row_nnz[i];

434:     retval.values[i][r_nnz-1] = 1.0 / diag[i];
435:     for (j=0; j<r_nnz-1; j++) {
436:       retval.values[i][j] *= -1;
437:     }
438:   }
439:   PetscFree(diag);
440:   *matrix_L = retval;
441:   return(0);
442: }