Actual source code: spbas_cholesky.h
2: /*
3: spbas_cholesky_row_alloc:
4: in the data arrays, find a place where another row may be stored.
5: Return PETSC_ERR_MEM if there is insufficient space to store the
6: row, so garbage collection and/or re-allocation may be done.
7: */
8: PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
9: {
10: retval.icols[k] = &retval.alloc_icol[*n_alloc_used];
11: retval.values[k] = &retval.alloc_val[*n_alloc_used];
12: *n_alloc_used += r_nnz;
13: if (*n_alloc_used > retval.n_alloc_icol) return PETSC_FALSE;
14: return PETSC_TRUE;
15: }
17: /*
18: spbas_cholesky_garbage_collect:
19: move the rows which have been calculated so far, as well as
20: those currently under construction, to the front of the
21: array, while putting them in the proper order.
22: When it seems necessary, enlarge the current arrays.
24: NB: re-allocation is being simulated using
25: PetscMalloc, memcpy, PetscFree, because
26: PetscRealloc does not seem to exist.
28: */
29: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
30: Only the storage, not the contents of this matrix is changed in this function */
31: PetscInt i_row, /* I : Number of rows for which the final contents are known */
32: PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
33: places in the arrays: they need not be moved any more */
34: PetscInt *n_alloc_used, /* I/O: */
35: PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */
36: {
37: /* PSEUDO-CODE:
38: 1. Choose the appropriate size for the arrays
39: 2. Rescue the arrays which would be lost during garbage collection
40: 3. Reallocate and correct administration
41: 4. Move all arrays so that they are in proper order */
43: PetscInt i,j;
44: PetscInt nrows = result->nrows;
45: PetscInt n_alloc_ok =0;
46: PetscInt n_alloc_ok_max=0;
47: PetscInt need_already = 0;
48: PetscInt n_rows_ahead =0;
49: PetscInt max_need_extra= 0;
50: PetscInt n_alloc_max, n_alloc_est, n_alloc;
51: PetscInt n_alloc_now = result->n_alloc_icol;
52: PetscInt *alloc_icol_old = result->alloc_icol;
53: PetscScalar *alloc_val_old = result->alloc_val;
54: PetscInt *icol_rescue;
55: PetscScalar *val_rescue;
56: PetscInt n_rescue;
57: PetscInt n_row_rescue;
58: PetscInt i_here, i_last, n_copy;
59: const PetscReal xtra_perc = 20;
61: /*********************************************************
62: 1. Choose appropriate array size
63: Count number of rows and memory usage which is already final */
64: for (i=0; i<i_row; i++) {
65: n_alloc_ok += result->row_nnz[i];
66: n_alloc_ok_max += max_row_nnz[i];
67: }
69: /* Count currently needed memory usage and future memory requirements
70: (max, predicted)*/
71: for (i=i_row; i<nrows; i++) {
72: if (!result->row_nnz[i]) {
73: max_need_extra += max_row_nnz[i];
74: } else {
75: need_already += max_row_nnz[i];
76: n_rows_ahead++;
77: }
78: }
80: /* Make maximal and realistic memory requirement estimates */
81: n_alloc_max = n_alloc_ok + need_already + max_need_extra;
82: n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
84: /* Choose array sizes */
85: if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
86: else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
87: else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
88: else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
90: /* If new estimate is less than what we already have,
91: don't reallocate, just garbage-collect */
92: if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
93: n_alloc = result->n_alloc_icol;
94: }
96: /* Motivate dimension choice */
97: PetscInfo(NULL," Allocating %" PetscInt_FMT " nonzeros: ",n_alloc);
98: if (n_alloc_max == n_alloc_est) {
99: PetscInfo(NULL,"this is the correct size\n");
100: } else if (n_alloc_now >= n_alloc_est) {
101: PetscInfo(NULL,"the current size, which seems enough\n");
102: } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
103: PetscInfo(NULL,"the maximum estimate\n");
104: } else {
105: PetscInfo(NULL,"%6.2f %% more than the estimate\n",(double)xtra_perc);
106: }
108: /**********************************************************
109: 2. Rescue arrays which would be lost
110: Count how many rows and nonzeros will have to be rescued
111: when moving all arrays in place */
112: n_row_rescue = 0; n_rescue = 0;
113: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
114: else {
115: i = *n_row_alloc_ok - 1;
117: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
118: }
120: for (i=*n_row_alloc_ok; i<nrows; i++) {
121: i_here = result->icols[i]-result->alloc_icol;
122: i_last = i_here + result->row_nnz[i];
123: if (result->row_nnz[i]>0) {
124: if (*n_alloc_used > i_here || i_last > n_alloc) {
125: n_rescue += result->row_nnz[i];
126: n_row_rescue++;
127: }
129: if (i<i_row) *n_alloc_used += result->row_nnz[i];
130: else *n_alloc_used += max_row_nnz[i];
131: }
132: }
134: /* Allocate rescue arrays */
135: PetscMalloc1(n_rescue, &icol_rescue);
136: PetscMalloc1(n_rescue, &val_rescue);
138: /* Rescue the arrays which need rescuing */
139: n_row_rescue = 0; n_rescue = 0;
140: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
141: else {
142: i = *n_row_alloc_ok - 1;
143: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
144: }
146: for (i=*n_row_alloc_ok; i<nrows; i++) {
147: i_here = result->icols[i]-result->alloc_icol;
148: i_last = i_here + result->row_nnz[i];
149: if (result->row_nnz[i]>0) {
150: if (*n_alloc_used > i_here || i_last > n_alloc) {
151: PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);
152: PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);
153: n_rescue += result->row_nnz[i];
154: n_row_rescue++;
155: }
157: if (i<i_row) *n_alloc_used += result->row_nnz[i];
158: else *n_alloc_used += max_row_nnz[i];
159: }
160: }
162: /**********************************************************
163: 3. Reallocate and correct administration */
165: if (n_alloc != result->n_alloc_icol) {
166: n_copy = PetscMin(n_alloc,result->n_alloc_icol);
168: /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
170: Allocate new icol-data, copy old contents */
171: PetscMalloc1(n_alloc, &result->alloc_icol);
172: PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);
174: /* Update administration, Reset pointers to new arrays */
175: result->n_alloc_icol = n_alloc;
176: for (i=0; i<nrows; i++) {
177: result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old);
178: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
179: }
181: /* Delete old array */
182: PetscFree(alloc_icol_old);
184: /* Allocate new value-data, copy old contents */
185: PetscMalloc1(n_alloc, &result->alloc_val);
186: PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);
188: /* Update administration, Reset pointers to new arrays */
189: result->n_alloc_val = n_alloc;
190: for (i=0; i<nrows; i++) {
191: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
192: }
194: /* Delete old array */
195: PetscFree(alloc_val_old);
196: }
198: /*********************************************************
199: 4. Copy all the arrays to their proper places */
200: n_row_rescue = 0; n_rescue = 0;
201: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
202: else {
203: i = *n_row_alloc_ok - 1;
205: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
206: }
208: for (i=*n_row_alloc_ok; i<nrows; i++) {
209: i_here = result->icols[i]-result->alloc_icol;
210: i_last = i_here + result->row_nnz[i];
212: result->icols[i] = result->alloc_icol + *n_alloc_used;
213: result->values[i]= result->alloc_val + *n_alloc_used;
215: if (result->row_nnz[i]>0) {
216: if (*n_alloc_used > i_here || i_last > n_alloc) {
217: PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);
218: PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);
220: n_rescue += result->row_nnz[i];
221: n_row_rescue++;
222: } else {
223: for (j=0; j<result->row_nnz[i]; j++) {
224: result->icols[i][j] = result->alloc_icol[i_here+j];
225: result->values[i][j] = result->alloc_val[i_here+j];
226: }
227: }
228: if (i<i_row) *n_alloc_used += result->row_nnz[i];
229: else *n_alloc_used += max_row_nnz[i];
230: }
231: }
233: /* Delete the rescue arrays */
234: PetscFree(icol_rescue);
235: PetscFree(val_rescue);
237: *n_row_alloc_ok = i_row;
238: return 0;
239: }
241: /*
242: spbas_incomplete_cholesky:
243: incomplete Cholesky decomposition of a square, symmetric,
244: positive definite matrix.
246: In case negative diagonals are encountered, function returns
247: NEGATIVE_DIAGONAL. When this happens, call this function again
248: with a larger epsdiag_in, a less sparse pattern, and/or a smaller
249: droptol
250: */
251: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L,PetscBool *success)
252: {
253: PetscInt jL;
254: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
255: PetscInt *ai=a->i,*aj=a->j;
256: MatScalar *aa=a->a;
257: PetscInt nrows, ncols;
258: PetscInt *max_row_nnz;
259: spbas_matrix retval;
260: PetscScalar *diag;
261: PetscScalar *val;
262: PetscScalar *lvec;
263: PetscScalar epsdiag;
264: PetscInt i,j,k;
265: const PetscBool do_values = PETSC_TRUE;
266: PetscInt *r1_icol;
267: PetscScalar *r1_val;
268: PetscInt *r_icol;
269: PetscInt r_nnz;
270: PetscScalar *r_val;
271: PetscInt *A_icol;
272: PetscInt A_nnz;
273: PetscScalar *A_val;
274: PetscInt *p_icol;
275: PetscInt p_nnz;
276: PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */
277: PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */
279: /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */
280: MatGetSize(A, &nrows, &ncols);
281: MatGetTrace(A, &epsdiag);
283: epsdiag *= epsdiag_in / nrows;
285: PetscInfo(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);
287: if (droptol<1e-10) droptol=1e-10;
289: retval.nrows = nrows;
290: retval.ncols = nrows;
291: retval.nnz = pattern.nnz/10;
292: retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
293: retval.block_data = PETSC_TRUE;
295: spbas_allocate_pattern(&retval, do_values);
296: PetscArrayzero(retval.row_nnz, nrows);
297: spbas_allocate_data(&retval);
298: retval.nnz = 0;
300: PetscMalloc1(nrows, &diag);
301: PetscCalloc1(nrows, &val);
302: PetscCalloc1(nrows, &lvec);
303: PetscCalloc1(nrows, &max_row_nnz);
305: /* Count the nonzeros on transpose of pattern */
306: for (i = 0; i<nrows; i++) {
307: p_nnz = pattern.row_nnz[i];
308: p_icol = pattern.icols[i];
309: for (j=0; j<p_nnz; j++) {
310: max_row_nnz[i+p_icol[j]]++;
311: }
312: }
314: /* Calculate rows of L */
315: for (i = 0; i<nrows; i++) {
316: p_nnz = pattern.row_nnz[i];
317: p_icol = pattern.icols[i];
319: r_nnz = retval.row_nnz[i];
320: r_icol = retval.icols[i];
321: r_val = retval.values[i];
322: A_nnz = ai[rip[i]+1] - ai[rip[i]];
323: A_icol = &aj[ai[rip[i]]];
324: A_val = &aa[ai[rip[i]]];
326: /* Calculate val += A(i,i:n)'; */
327: for (j=0; j<A_nnz; j++) {
328: k = riip[A_icol[j]];
329: if (k>=i) val[k] = A_val[j];
330: }
332: /* Add regularization */
333: val[i] += epsdiag;
335: /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i);
336: val(i) = A(i,i) - L(0:i-1,i)' * lvec */
337: for (j=0; j<r_nnz; j++) {
338: k = r_icol[j];
339: lvec[k] = diag[k] * r_val[j];
340: val[i] -= r_val[j] * lvec[k];
341: }
343: /* Calculate the new diagonal */
344: diag[i] = val[i];
345: if (PetscRealPart(diag[i])<droptol) {
346: PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");
347: PetscInfo(NULL,"Negative diagonal in row %" PetscInt_FMT "\n",i+1);
349: /* Delete the whole matrix at once. */
350: spbas_delete(retval);
351: *success = PETSC_FALSE;
352: return 0;
353: }
355: /* If necessary, allocate arrays */
356: if (r_nnz==0) {
357: PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
359: r_icol = retval.icols[i];
360: r_val = retval.values[i];
361: }
363: /* Now, fill in */
364: r_icol[r_nnz] = i;
365: r_val [r_nnz] = 1.0;
366: r_nnz++;
367: retval.row_nnz[i]++;
369: retval.nnz += r_nnz;
371: /* Calculate
372: val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
373: for (j=1; j<p_nnz; j++) {
374: k = i+p_icol[j];
375: r1_icol = retval.icols[k];
376: r1_val = retval.values[k];
377: for (jL=0; jL<retval.row_nnz[k]; jL++) {
378: val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
379: }
380: val[k] /= diag[i];
382: if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
383: /* If necessary, allocate arrays */
384: if (!retval.row_nnz[k]) {
385: PetscBool flag,success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
386: if (!success) {
387: spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
388: flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
390: r_icol = retval.icols[i];
391: }
392: }
394: retval.icols[k][retval.row_nnz[k]] = i;
395: retval.values[k][retval.row_nnz[k]] = val[k];
396: retval.row_nnz[k]++;
397: }
398: val[k] = 0;
399: }
401: /* Erase the values used in the work arrays */
402: for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
403: }
405: PetscFree(lvec);
406: PetscFree(val);
408: spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
409: PetscFree(max_row_nnz);
411: /* Place the inverse of the diagonals in the matrix */
412: for (i=0; i<nrows; i++) {
413: r_nnz = retval.row_nnz[i];
415: retval.values[i][r_nnz-1] = 1.0 / diag[i];
416: for (j=0; j<r_nnz-1; j++) {
417: retval.values[i][j] *= -1;
418: }
419: }
420: PetscFree(diag);
421: *matrix_L = retval;
422: *success = PETSC_TRUE;
423: return 0;
424: }