Actual source code: spbas_cholesky.h
petsc-3.14.6 2021-03-30
2: /*
3: spbas_cholesky_row_alloc:
4: in the data arrays, find a place where another row may be stored.
5: Return PETSC_ERR_MEM if there is insufficient space to store the
6: row, so garbage collection and/or re-allocation may be done.
7: */
8: PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
9: {
11: retval.icols[k] = &retval.alloc_icol[*n_alloc_used];
12: retval.values[k] = &retval.alloc_val[*n_alloc_used];
13: *n_alloc_used += r_nnz;
14: if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
15: return(0);
16: }
19: /*
20: spbas_cholesky_garbage_collect:
21: move the rows which have been calculated so far, as well as
22: those currently under construction, to the front of the
23: array, while putting them in the proper order.
24: When it seems necessary, enlarge the current arrays.
26: NB: re-allocation is being simulated using
27: PetscMalloc, memcpy, PetscFree, because
28: PetscRealloc does not seem to exist.
30: */
31: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
32: Only the storage, not the contents of this matrix is changed in this function */
33: PetscInt i_row, /* I : Number of rows for which the final contents are known */
34: PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
35: places in the arrays: they need not be moved any more */
36: PetscInt *n_alloc_used, /* I/O: */
37: PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */
38: {
39: /* PSEUDO-CODE:
40: 1. Choose the appropriate size for the arrays
41: 2. Rescue the arrays which would be lost during garbage collection
42: 3. Reallocate and correct administration
43: 4. Move all arrays so that they are in proper order */
45: PetscInt i,j;
46: PetscInt nrows = result->nrows;
47: PetscInt n_alloc_ok =0;
48: PetscInt n_alloc_ok_max=0;
49: PetscErrorCode ierr;
50: PetscInt need_already = 0;
51: PetscInt n_rows_ahead =0;
52: PetscInt max_need_extra= 0;
53: PetscInt n_alloc_max, n_alloc_est, n_alloc;
54: PetscInt n_alloc_now = result->n_alloc_icol;
55: PetscInt *alloc_icol_old = result->alloc_icol;
56: PetscScalar *alloc_val_old = result->alloc_val;
57: PetscInt *icol_rescue;
58: PetscScalar *val_rescue;
59: PetscInt n_rescue;
60: PetscInt n_row_rescue;
61: PetscInt i_here, i_last, n_copy;
62: const PetscReal xtra_perc = 20;
65: /*********************************************************
66: 1. Choose appropriate array size
67: Count number of rows and memory usage which is already final */
68: for (i=0; i<i_row; i++) {
69: n_alloc_ok += result->row_nnz[i];
70: n_alloc_ok_max += max_row_nnz[i];
71: }
73: /* Count currently needed memory usage and future memory requirements
74: (max, predicted)*/
75: for (i=i_row; i<nrows; i++) {
76: if (!result->row_nnz[i]) {
77: max_need_extra += max_row_nnz[i];
78: } else {
79: need_already += max_row_nnz[i];
80: n_rows_ahead++;
81: }
82: }
84: /* Make maximal and realistic memory requirement estimates */
85: n_alloc_max = n_alloc_ok + need_already + max_need_extra;
86: n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
88: /* Choose array sizes */
89: if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
90: else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
91: else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
92: else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
94: /* If new estimate is less than what we already have,
95: don't reallocate, just garbage-collect */
96: if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
97: n_alloc = result->n_alloc_icol;
98: }
100: /* Motivate dimension choice */
101: PetscInfo1(NULL," Allocating %d nonzeros: ",n_alloc);
102: if (n_alloc_max == n_alloc_est) {
103: PetscInfo(NULL,"this is the correct size\n");
104: } else if (n_alloc_now >= n_alloc_est) {
105: PetscInfo(NULL,"the current size, which seems enough\n");
106: } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
107: PetscInfo(NULL,"the maximum estimate\n");
108: } else {
109: PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);
110: }
113: /**********************************************************
114: 2. Rescue arrays which would be lost
115: Count how many rows and nonzeros will have to be rescued
116: when moving all arrays in place */
117: n_row_rescue = 0; n_rescue = 0;
118: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
119: else {
120: i = *n_row_alloc_ok - 1;
122: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
123: }
125: for (i=*n_row_alloc_ok; i<nrows; i++) {
126: i_here = result->icols[i]-result->alloc_icol;
127: i_last = i_here + result->row_nnz[i];
128: if (result->row_nnz[i]>0) {
129: if (*n_alloc_used > i_here || i_last > n_alloc) {
130: n_rescue += result->row_nnz[i];
131: n_row_rescue++;
132: }
134: if (i<i_row) *n_alloc_used += result->row_nnz[i];
135: else *n_alloc_used += max_row_nnz[i];
136: }
137: }
139: /* Allocate rescue arrays */
140: PetscMalloc1(n_rescue, &icol_rescue);
141: PetscMalloc1(n_rescue, &val_rescue);
143: /* Rescue the arrays which need rescuing */
144: n_row_rescue = 0; n_rescue = 0;
145: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
146: else {
147: i = *n_row_alloc_ok - 1;
148: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
149: }
151: for (i=*n_row_alloc_ok; i<nrows; i++) {
152: i_here = result->icols[i]-result->alloc_icol;
153: i_last = i_here + result->row_nnz[i];
154: if (result->row_nnz[i]>0) {
155: if (*n_alloc_used > i_here || i_last > n_alloc) {
156: PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);
157: PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);
158: n_rescue += result->row_nnz[i];
159: n_row_rescue++;
160: }
162: if (i<i_row) *n_alloc_used += result->row_nnz[i];
163: else *n_alloc_used += max_row_nnz[i];
164: }
165: }
167: /**********************************************************
168: 3. Reallocate and correct administration */
170: if (n_alloc != result->n_alloc_icol) {
171: n_copy = PetscMin(n_alloc,result->n_alloc_icol);
173: /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
175: Allocate new icol-data, copy old contents */
176: PetscMalloc1(n_alloc, &result->alloc_icol);
177: PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);
179: /* Update administration, Reset pointers to new arrays */
180: result->n_alloc_icol = n_alloc;
181: for (i=0; i<nrows; i++) {
182: result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old);
183: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
184: }
186: /* Delete old array */
187: PetscFree(alloc_icol_old);
189: /* Allocate new value-data, copy old contents */
190: PetscMalloc1(n_alloc, &result->alloc_val);
191: PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);
193: /* Update administration, Reset pointers to new arrays */
194: result->n_alloc_val = n_alloc;
195: for (i=0; i<nrows; i++) {
196: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
197: }
199: /* Delete old array */
200: PetscFree(alloc_val_old);
201: }
204: /*********************************************************
205: 4. Copy all the arrays to their proper places */
206: n_row_rescue = 0; n_rescue = 0;
207: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
208: else {
209: i = *n_row_alloc_ok - 1;
211: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
212: }
214: for (i=*n_row_alloc_ok; i<nrows; i++) {
215: i_here = result->icols[i]-result->alloc_icol;
216: i_last = i_here + result->row_nnz[i];
218: result->icols[i] = result->alloc_icol + *n_alloc_used;
219: result->values[i]= result->alloc_val + *n_alloc_used;
221: if (result->row_nnz[i]>0) {
222: if (*n_alloc_used > i_here || i_last > n_alloc) {
223: PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);
224: PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);
226: n_rescue += result->row_nnz[i];
227: n_row_rescue++;
228: } else {
229: for (j=0; j<result->row_nnz[i]; j++) {
230: result->icols[i][j] = result->alloc_icol[i_here+j];
231: result->values[i][j] = result->alloc_val[i_here+j];
232: }
233: }
234: if (i<i_row) *n_alloc_used += result->row_nnz[i];
235: else *n_alloc_used += max_row_nnz[i];
236: }
237: }
239: /* Delete the rescue arrays */
240: PetscFree(icol_rescue);
241: PetscFree(val_rescue);
243: *n_row_alloc_ok = i_row;
244: return(0);
245: }
247: /*
248: spbas_incomplete_cholesky:
249: incomplete Cholesky decomposition of a square, symmetric,
250: positive definite matrix.
252: In case negative diagonals are encountered, function returns
253: NEGATIVE_DIAGONAL. When this happens, call this function again
254: with a larger epsdiag_in, a less sparse pattern, and/or a smaller
255: droptol
256: */
257: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
258: {
259: PetscInt jL;
260: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
261: PetscInt *ai=a->i,*aj=a->j;
262: MatScalar *aa=a->a;
263: PetscInt nrows, ncols;
264: PetscInt *max_row_nnz;
265: PetscErrorCode ierr;
266: spbas_matrix retval;
267: PetscScalar *diag;
268: PetscScalar *val;
269: PetscScalar *lvec;
270: PetscScalar epsdiag;
271: PetscInt i,j,k;
272: const PetscBool do_values = PETSC_TRUE;
273: PetscInt *r1_icol;
274: PetscScalar *r1_val;
275: PetscInt *r_icol;
276: PetscInt r_nnz;
277: PetscScalar *r_val;
278: PetscInt *A_icol;
279: PetscInt A_nnz;
280: PetscScalar *A_val;
281: PetscInt *p_icol;
282: PetscInt p_nnz;
283: PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */
284: PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */
287: /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */
288: MatGetSize(A, &nrows, &ncols);
289: MatGetTrace(A, &epsdiag);
291: epsdiag *= epsdiag_in / nrows;
293: PetscInfo2(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);
295: if (droptol<1e-10) droptol=1e-10;
297: if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
299: retval.nrows = nrows;
300: retval.ncols = nrows;
301: retval.nnz = pattern.nnz/10;
302: retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
303: retval.block_data = PETSC_TRUE;
305: spbas_allocate_pattern(&retval, do_values);
306: PetscArrayzero(retval.row_nnz, nrows);
307: spbas_allocate_data(&retval);
308: retval.nnz = 0;
310: PetscMalloc1(nrows, &diag);
311: PetscCalloc1(nrows, &val);
312: PetscCalloc1(nrows, &lvec);
313: PetscCalloc1(nrows, &max_row_nnz);
315: /* Check correct format of sparseness pattern */
316: if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
318: /* Count the nonzeros on transpose of pattern */
319: for (i = 0; i<nrows; i++) {
320: p_nnz = pattern.row_nnz[i];
321: p_icol = pattern.icols[i];
322: for (j=0; j<p_nnz; j++) {
323: max_row_nnz[i+p_icol[j]]++;
324: }
325: }
327: /* Calculate rows of L */
328: for (i = 0; i<nrows; i++) {
329: p_nnz = pattern.row_nnz[i];
330: p_icol = pattern.icols[i];
332: r_nnz = retval.row_nnz[i];
333: r_icol = retval.icols[i];
334: r_val = retval.values[i];
335: A_nnz = ai[rip[i]+1] - ai[rip[i]];
336: A_icol = &aj[ai[rip[i]]];
337: A_val = &aa[ai[rip[i]]];
339: /* Calculate val += A(i,i:n)'; */
340: for (j=0; j<A_nnz; j++) {
341: k = riip[A_icol[j]];
342: if (k>=i) val[k] = A_val[j];
343: }
345: /* Add regularization */
346: val[i] += epsdiag;
348: /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i);
349: val(i) = A(i,i) - L(0:i-1,i)' * lvec */
350: for (j=0; j<r_nnz; j++) {
351: k = r_icol[j];
352: lvec[k] = diag[k] * r_val[j];
353: val[i] -= r_val[j] * lvec[k];
354: }
356: /* Calculate the new diagonal */
357: diag[i] = val[i];
358: if (PetscRealPart(diag[i])<droptol) {
359: PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");
360: PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);
362: /* Delete the whole matrix at once. */
363: spbas_delete(retval);
364: return NEGATIVE_DIAGONAL;
365: }
367: /* If necessary, allocate arrays */
368: if (r_nnz==0) {
369: spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
370: if (ierr == PETSC_ERR_MEM) {
371: spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
372: spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
373: } else
374: r_icol = retval.icols[i];
375: r_val = retval.values[i];
376: }
378: /* Now, fill in */
379: r_icol[r_nnz] = i;
380: r_val [r_nnz] = 1.0;
381: r_nnz++;
382: retval.row_nnz[i]++;
384: retval.nnz += r_nnz;
386: /* Calculate
387: val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
388: for (j=1; j<p_nnz; j++) {
389: k = i+p_icol[j];
390: r1_icol = retval.icols[k];
391: r1_val = retval.values[k];
392: for (jL=0; jL<retval.row_nnz[k]; jL++) {
393: val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
394: }
395: val[k] /= diag[i];
397: if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
398: /* If necessary, allocate arrays */
399: if (retval.row_nnz[k]==0) {
400: spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
401: if (ierr == PETSC_ERR_MEM) {
402: spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
403: spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
404: r_icol = retval.icols[i];
405: } else
406: }
408: retval.icols[k][retval.row_nnz[k]] = i;
409: retval.values[k][retval.row_nnz[k]] = val[k];
410: retval.row_nnz[k]++;
411: }
412: val[k] = 0;
413: }
415: /* Erase the values used in the work arrays */
416: for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
417: }
419: ierr=PetscFree(lvec);
420: ierr=PetscFree(val);
422: spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
423: PetscFree(max_row_nnz);
425: /* Place the inverse of the diagonals in the matrix */
426: for (i=0; i<nrows; i++) {
427: r_nnz = retval.row_nnz[i];
429: retval.values[i][r_nnz-1] = 1.0 / diag[i];
430: for (j=0; j<r_nnz-1; j++) {
431: retval.values[i][j] *= -1;
432: }
433: }
434: PetscFree(diag);
435: *matrix_L = retval;
436: return(0);
437: }