Actual source code: spbas_cholesky.h
petsc-3.8.4 2018-03-24
2: /*
3: spbas_cholesky_row_alloc:
4: in the data arrays, find a place where another row may be stored.
5: Return PETSC_ERR_MEM if there is insufficient space to store the
6: row, so garbage collection and/or re-allocation may be done.
7: */
8: PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
9: {
11: retval.icols[k] = &retval.alloc_icol[*n_alloc_used];
12: retval.values[k] = &retval.alloc_val[*n_alloc_used];
13: *n_alloc_used += r_nnz;
14: if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
15: return(0);
16: }
19: /*
20: spbas_cholesky_garbage_collect:
21: move the rows which have been calculated so far, as well as
22: those currently under construction, to the front of the
23: array, while putting them in the proper order.
24: When it seems necessary, enlarge the current arrays.
26: NB: re-allocation is being simulated using
27: PetscMalloc, memcpy, PetscFree, because
28: PetscRealloc does not seem to exist.
30: */
31: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
32: Only the storage, not the contents of this matrix is changed in this function */
33: PetscInt i_row, /* I : Number of rows for which the final contents are known */
34: PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
35: places in the arrays: they need not be moved any more */
36: PetscInt *n_alloc_used, /* I/O: */
37: PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */
38: {
39: /* PSEUDO-CODE:
40: 1. Choose the appropriate size for the arrays
41: 2. Rescue the arrays which would be lost during garbage collection
42: 3. Reallocate and correct administration
43: 4. Move all arrays so that they are in proper order */
45: PetscInt i,j;
46: PetscInt nrows = result->nrows;
47: PetscInt n_alloc_ok =0;
48: PetscInt n_alloc_ok_max=0;
49: PetscErrorCode ierr;
50: PetscInt need_already = 0;
51: PetscInt n_rows_ahead =0;
52: PetscInt max_need_extra= 0;
53: PetscInt n_alloc_max, n_alloc_est, n_alloc;
54: PetscInt n_alloc_now = result->n_alloc_icol;
55: PetscInt *alloc_icol_old = result->alloc_icol;
56: PetscScalar *alloc_val_old = result->alloc_val;
57: PetscInt *icol_rescue;
58: PetscScalar *val_rescue;
59: PetscInt n_rescue;
60: PetscInt n_row_rescue;
61: PetscInt i_here, i_last, n_copy;
62: const PetscReal xtra_perc = 20;
65: /*********************************************************
66: 1. Choose appropriate array size
67: Count number of rows and memory usage which is already final */
68: for (i=0; i<i_row; i++) {
69: n_alloc_ok += result->row_nnz[i];
70: n_alloc_ok_max += max_row_nnz[i];
71: }
73: /* Count currently needed memory usage and future memory requirements
74: (max, predicted)*/
75: for (i=i_row; i<nrows; i++) {
76: if (!result->row_nnz[i]) {
77: max_need_extra += max_row_nnz[i];
78: } else {
79: need_already += max_row_nnz[i];
80: n_rows_ahead++;
81: }
82: }
84: /* Make maximal and realistic memory requirement estimates */
85: n_alloc_max = n_alloc_ok + need_already + max_need_extra;
86: n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
88: /* Choose array sizes */
89: if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
90: else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
91: else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
92: else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
94: /* If new estimate is less than what we already have,
95: don't reallocate, just garbage-collect */
96: if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
97: n_alloc = result->n_alloc_icol;
98: }
100: /* Motivate dimension choice */
101: PetscInfo1(NULL," Allocating %d nonzeros: ",n_alloc);
102: if (n_alloc_max == n_alloc_est) {
103: PetscInfo(NULL,"this is the correct size\n");
104: } else if (n_alloc_now >= n_alloc_est) {
105: PetscInfo(NULL,"the current size, which seems enough\n");
106: } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
107: PetscInfo(NULL,"the maximum estimate\n");
108: } else {
109: PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);
110: }
113: /**********************************************************
114: 2. Rescue arrays which would be lost
115: Count how many rows and nonzeros will have to be rescued
116: when moving all arrays in place */
117: n_row_rescue = 0; n_rescue = 0;
118: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
119: else {
120: i = *n_row_alloc_ok - 1;
122: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
123: }
125: for (i=*n_row_alloc_ok; i<nrows; i++) {
126: i_here = result->icols[i]-result->alloc_icol;
127: i_last = i_here + result->row_nnz[i];
128: if (result->row_nnz[i]>0) {
129: if (*n_alloc_used > i_here || i_last > n_alloc) {
130: n_rescue += result->row_nnz[i];
131: n_row_rescue++;
132: }
134: if (i<i_row) *n_alloc_used += result->row_nnz[i];
135: else *n_alloc_used += max_row_nnz[i];
136: }
137: }
139: /* Allocate rescue arrays */
140: PetscMalloc1(n_rescue, &icol_rescue);
141: PetscMalloc1(n_rescue, &val_rescue);
143: /* Rescue the arrays which need rescuing */
144: n_row_rescue = 0; n_rescue = 0;
145: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
146: else {
147: i = *n_row_alloc_ok - 1;
148: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
149: }
151: for (i=*n_row_alloc_ok; i<nrows; i++) {
152: i_here = result->icols[i]-result->alloc_icol;
153: i_last = i_here + result->row_nnz[i];
154: if (result->row_nnz[i]>0) {
155: if (*n_alloc_used > i_here || i_last > n_alloc) {
156: PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(PetscInt));
157: PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));
159: n_rescue += result->row_nnz[i];
160: n_row_rescue++;
161: }
163: if (i<i_row) *n_alloc_used += result->row_nnz[i];
164: else *n_alloc_used += max_row_nnz[i];
165: }
166: }
168: /**********************************************************
169: 3. Reallocate and correct administration */
171: if (n_alloc != result->n_alloc_icol) {
172: n_copy = PetscMin(n_alloc,result->n_alloc_icol);
174: /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
176: Allocate new icol-data, copy old contents */
177: PetscMalloc1(n_alloc, &result->alloc_icol);
178: PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(PetscInt));
180: /* Update administration, Reset pointers to new arrays */
181: result->n_alloc_icol = n_alloc;
182: for (i=0; i<nrows; i++) {
183: result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old);
184: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
185: }
187: /* Delete old array */
188: PetscFree(alloc_icol_old);
190: /* Allocate new value-data, copy old contents */
191: PetscMalloc1(n_alloc, &result->alloc_val);
192: PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));
194: /* Update administration, Reset pointers to new arrays */
195: result->n_alloc_val = n_alloc;
196: for (i=0; i<nrows; i++) {
197: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
198: }
200: /* Delete old array */
201: PetscFree(alloc_val_old);
202: }
205: /*********************************************************
206: 4. Copy all the arrays to their proper places */
207: n_row_rescue = 0; n_rescue = 0;
208: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
209: else {
210: i = *n_row_alloc_ok - 1;
212: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
213: }
215: for (i=*n_row_alloc_ok; i<nrows; i++) {
216: i_here = result->icols[i]-result->alloc_icol;
217: i_last = i_here + result->row_nnz[i];
219: result->icols[i] = result->alloc_icol + *n_alloc_used;
220: result->values[i]= result->alloc_val + *n_alloc_used;
222: if (result->row_nnz[i]>0) {
223: if (*n_alloc_used > i_here || i_last > n_alloc) {
224: PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(PetscInt));
225: PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));
227: n_rescue += result->row_nnz[i];
228: n_row_rescue++;
229: } else {
230: for (j=0; j<result->row_nnz[i]; j++) {
231: result->icols[i][j] = result->alloc_icol[i_here+j];
232: result->values[i][j] = result->alloc_val[i_here+j];
233: }
234: }
235: if (i<i_row) *n_alloc_used += result->row_nnz[i];
236: else *n_alloc_used += max_row_nnz[i];
237: }
238: }
240: /* Delete the rescue arrays */
241: PetscFree(icol_rescue);
242: PetscFree(val_rescue);
244: *n_row_alloc_ok = i_row;
245: return(0);
246: }
248: /*
249: spbas_incomplete_cholesky:
250: incomplete Cholesky decomposition of a square, symmetric,
251: positive definite matrix.
253: In case negative diagonals are encountered, function returns
254: NEGATIVE_DIAGONAL. When this happens, call this function again
255: with a larger epsdiag_in, a less sparse pattern, and/or a smaller
256: droptol
257: */
258: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
259: {
260: PetscInt jL;
261: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
262: PetscInt *ai=a->i,*aj=a->j;
263: MatScalar *aa=a->a;
264: PetscInt nrows, ncols;
265: PetscInt *max_row_nnz;
266: PetscErrorCode ierr;
267: spbas_matrix retval;
268: PetscScalar *diag;
269: PetscScalar *val;
270: PetscScalar *lvec;
271: PetscScalar epsdiag;
272: PetscInt i,j,k;
273: const PetscBool do_values = PETSC_TRUE;
274: PetscInt *r1_icol;
275: PetscScalar *r1_val;
276: PetscInt *r_icol;
277: PetscInt r_nnz;
278: PetscScalar *r_val;
279: PetscInt *A_icol;
280: PetscInt A_nnz;
281: PetscScalar *A_val;
282: PetscInt *p_icol;
283: PetscInt p_nnz;
284: PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */
285: PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */
288: /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */
289: MatGetSize(A, &nrows, &ncols);
290: MatGetTrace(A, &epsdiag);
292: epsdiag *= epsdiag_in / nrows;
294: PetscInfo2(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);
296: if (droptol<1e-10) droptol=1e-10;
298: if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
300: retval.nrows = nrows;
301: retval.ncols = nrows;
302: retval.nnz = pattern.nnz/10;
303: retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
304: retval.block_data = PETSC_TRUE;
306: spbas_allocate_pattern(&retval, do_values);
307: PetscMemzero((void*) retval.row_nnz, nrows*sizeof(PetscInt));
308: spbas_allocate_data(&retval);
309: retval.nnz = 0;
311: PetscMalloc1(nrows, &diag);
312: PetscMalloc1(nrows, &val);
313: PetscMalloc1(nrows, &lvec);
314: PetscMalloc1(nrows, &max_row_nnz);
316: PetscMemzero((void*) val, nrows*sizeof(PetscScalar));
317: PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));
318: PetscMemzero((void*) max_row_nnz, nrows*sizeof(PetscInt));
320: /* Check correct format of sparseness pattern */
321: if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
323: /* Count the nonzeros on transpose of pattern */
324: for (i = 0; i<nrows; i++) {
325: p_nnz = pattern.row_nnz[i];
326: p_icol = pattern.icols[i];
327: for (j=0; j<p_nnz; j++) {
328: max_row_nnz[i+p_icol[j]]++;
329: }
330: }
332: /* Calculate rows of L */
333: for (i = 0; i<nrows; i++) {
334: p_nnz = pattern.row_nnz[i];
335: p_icol = pattern.icols[i];
337: r_nnz = retval.row_nnz[i];
338: r_icol = retval.icols[i];
339: r_val = retval.values[i];
340: A_nnz = ai[rip[i]+1] - ai[rip[i]];
341: A_icol = &aj[ai[rip[i]]];
342: A_val = &aa[ai[rip[i]]];
344: /* Calculate val += A(i,i:n)'; */
345: for (j=0; j<A_nnz; j++) {
346: k = riip[A_icol[j]];
347: if (k>=i) val[k] = A_val[j];
348: }
350: /* Add regularization */
351: val[i] += epsdiag;
353: /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i);
354: val(i) = A(i,i) - L(0:i-1,i)' * lvec */
355: for (j=0; j<r_nnz; j++) {
356: k = r_icol[j];
357: lvec[k] = diag[k] * r_val[j];
358: val[i] -= r_val[j] * lvec[k];
359: }
361: /* Calculate the new diagonal */
362: diag[i] = val[i];
363: if (PetscRealPart(diag[i])<droptol) {
364: PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");
365: PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);
367: /* Delete the whole matrix at once. */
368: spbas_delete(retval);
369: return NEGATIVE_DIAGONAL;
370: }
372: /* If necessary, allocate arrays */
373: if (r_nnz==0) {
374: spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
375: if (ierr == PETSC_ERR_MEM) {
376: spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
377: spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
378: } else
379: r_icol = retval.icols[i];
380: r_val = retval.values[i];
381: }
383: /* Now, fill in */
384: r_icol[r_nnz] = i;
385: r_val [r_nnz] = 1.0;
386: r_nnz++;
387: retval.row_nnz[i]++;
389: retval.nnz += r_nnz;
391: /* Calculate
392: val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
393: for (j=1; j<p_nnz; j++) {
394: k = i+p_icol[j];
395: r1_icol = retval.icols[k];
396: r1_val = retval.values[k];
397: for (jL=0; jL<retval.row_nnz[k]; jL++) {
398: val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
399: }
400: val[k] /= diag[i];
402: if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
403: /* If necessary, allocate arrays */
404: if (retval.row_nnz[k]==0) {
405: spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
406: if (ierr == PETSC_ERR_MEM) {
407: spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
408: spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
409: r_icol = retval.icols[i];
410: } else
411: }
413: retval.icols[k][retval.row_nnz[k]] = i;
414: retval.values[k][retval.row_nnz[k]] = val[k];
415: retval.row_nnz[k]++;
416: }
417: val[k] = 0;
418: }
420: /* Erase the values used in the work arrays */
421: for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
422: }
424: ierr=PetscFree(lvec);
425: ierr=PetscFree(val);
427: spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
428: PetscFree(max_row_nnz);
430: /* Place the inverse of the diagonals in the matrix */
431: for (i=0; i<nrows; i++) {
432: r_nnz = retval.row_nnz[i];
434: retval.values[i][r_nnz-1] = 1.0 / diag[i];
435: for (j=0; j<r_nnz-1; j++) {
436: retval.values[i][j] *= -1;
437: }
438: }
439: PetscFree(diag);
440: *matrix_L = retval;
441: return(0);
442: }