Actual source code: spbas_cholesky.h
petsc-3.6.1 2015-08-06
2: /*
3: spbas_cholesky_row_alloc:
4: in the data arrays, find a place where another row may be stored.
5: Return PETSC_ERR_MEM if there is insufficient space to store the
6: row, so garbage collection and/or re-allocation may be done.
7: */
10: PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11: {
13: retval.icols[k] = &retval.alloc_icol[*n_alloc_used];
14: retval.values[k] = &retval.alloc_val[*n_alloc_used];
15: *n_alloc_used += r_nnz;
16: if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
17: return(0);
18: }
21: /*
22: spbas_cholesky_garbage_collect:
23: move the rows which have been calculated so far, as well as
24: those currently under construction, to the front of the
25: array, while putting them in the proper order.
26: When it seems necessary, enlarge the current arrays.
28: NB: re-allocation is being simulated using
29: PetscMalloc, memcpy, PetscFree, because
30: PetscRealloc does not seem to exist.
32: */
35: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
36: Only the storage, not the contents of this matrix is changed in this function */
37: PetscInt i_row, /* I : Number of rows for which the final contents are known */
38: PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
39: places in the arrays: they need not be moved any more */
40: PetscInt *n_alloc_used, /* I/O: */
41: PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */
42: {
43: /* PSEUDO-CODE:
44: 1. Choose the appropriate size for the arrays
45: 2. Rescue the arrays which would be lost during garbage collection
46: 3. Reallocate and correct administration
47: 4. Move all arrays so that they are in proper order */
49: PetscInt i,j;
50: PetscInt nrows = result->nrows;
51: PetscInt n_alloc_ok =0;
52: PetscInt n_alloc_ok_max=0;
53: PetscErrorCode ierr;
54: PetscInt need_already = 0;
55: PetscInt n_rows_ahead =0;
56: PetscInt max_need_extra= 0;
57: PetscInt n_alloc_max, n_alloc_est, n_alloc;
58: PetscInt n_alloc_now = result->n_alloc_icol;
59: PetscInt *alloc_icol_old = result->alloc_icol;
60: PetscScalar *alloc_val_old = result->alloc_val;
61: PetscInt *icol_rescue;
62: PetscScalar *val_rescue;
63: PetscInt n_rescue;
64: PetscInt n_row_rescue;
65: PetscInt i_here, i_last, n_copy;
66: const PetscReal xtra_perc = 20;
69: /*********************************************************
70: 1. Choose appropriate array size
71: Count number of rows and memory usage which is already final */
72: for (i=0; i<i_row; i++) {
73: n_alloc_ok += result->row_nnz[i];
74: n_alloc_ok_max += max_row_nnz[i];
75: }
77: /* Count currently needed memory usage and future memory requirements
78: (max, predicted)*/
79: for (i=i_row; i<nrows; i++) {
80: if (!result->row_nnz[i]) {
81: max_need_extra += max_row_nnz[i];
82: } else {
83: need_already += max_row_nnz[i];
84: n_rows_ahead++;
85: }
86: }
88: /* Make maximal and realistic memory requirement estimates */
89: n_alloc_max = n_alloc_ok + need_already + max_need_extra;
90: n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
92: /* Choose array sizes */
93: if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
94: else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
95: else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
96: else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
98: /* If new estimate is less than what we already have,
99: don't reallocate, just garbage-collect */
100: if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
101: n_alloc = result->n_alloc_icol;
102: }
104: /* Motivate dimension choice */
105: PetscInfo1(NULL," Allocating %d nonzeros: ",n_alloc);
106: if (n_alloc_max == n_alloc_est) {
107: PetscInfo(NULL,"this is the correct size\n");
108: } else if (n_alloc_now >= n_alloc_est) {
109: PetscInfo(NULL,"the current size, which seems enough\n");
110: } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
111: PetscInfo(NULL,"the maximum estimate\n");
112: } else {
113: PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);
114: }
117: /**********************************************************
118: 2. Rescue arrays which would be lost
119: Count how many rows and nonzeros will have to be rescued
120: when moving all arrays in place */
121: n_row_rescue = 0; n_rescue = 0;
122: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
123: else {
124: i = *n_row_alloc_ok - 1;
126: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
127: }
129: for (i=*n_row_alloc_ok; i<nrows; i++) {
130: i_here = result->icols[i]-result->alloc_icol;
131: i_last = i_here + result->row_nnz[i];
132: if (result->row_nnz[i]>0) {
133: if (*n_alloc_used > i_here || i_last > n_alloc) {
134: n_rescue += result->row_nnz[i];
135: n_row_rescue++;
136: }
138: if (i<i_row) *n_alloc_used += result->row_nnz[i];
139: else *n_alloc_used += max_row_nnz[i];
140: }
141: }
143: /* Allocate rescue arrays */
144: PetscMalloc1(n_rescue, &icol_rescue);
145: PetscMalloc1(n_rescue, &val_rescue);
147: /* Rescue the arrays which need rescuing */
148: n_row_rescue = 0; n_rescue = 0;
149: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
150: else {
151: i = *n_row_alloc_ok - 1;
152: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
153: }
155: for (i=*n_row_alloc_ok; i<nrows; i++) {
156: i_here = result->icols[i]-result->alloc_icol;
157: i_last = i_here + result->row_nnz[i];
158: if (result->row_nnz[i]>0) {
159: if (*n_alloc_used > i_here || i_last > n_alloc) {
160: PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));
161: PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));
163: n_rescue += result->row_nnz[i];
164: n_row_rescue++;
165: }
167: if (i<i_row) *n_alloc_used += result->row_nnz[i];
168: else *n_alloc_used += max_row_nnz[i];
169: }
170: }
172: /**********************************************************
173: 3. Reallocate and correct administration */
175: if (n_alloc != result->n_alloc_icol) {
176: n_copy = PetscMin(n_alloc,result->n_alloc_icol);
178: /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
180: Allocate new icol-data, copy old contents */
181: PetscMalloc1(n_alloc, &result->alloc_icol);
182: PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));
184: /* Update administration, Reset pointers to new arrays */
185: result->n_alloc_icol = n_alloc;
186: for (i=0; i<nrows; i++) {
187: result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old);
188: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
189: }
191: /* Delete old array */
192: PetscFree(alloc_icol_old);
194: /* Allocate new value-data, copy old contents */
195: PetscMalloc1(n_alloc, &result->alloc_val);
196: PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));
198: /* Update administration, Reset pointers to new arrays */
199: result->n_alloc_val = n_alloc;
200: for (i=0; i<nrows; i++) {
201: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
202: }
204: /* Delete old array */
205: PetscFree(alloc_val_old);
206: }
209: /*********************************************************
210: 4. Copy all the arrays to their proper places */
211: n_row_rescue = 0; n_rescue = 0;
212: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
213: else {
214: i = *n_row_alloc_ok - 1;
216: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
217: }
219: for (i=*n_row_alloc_ok; i<nrows; i++) {
220: i_here = result->icols[i]-result->alloc_icol;
221: i_last = i_here + result->row_nnz[i];
223: result->icols[i] = result->alloc_icol + *n_alloc_used;
224: result->values[i]= result->alloc_val + *n_alloc_used;
226: if (result->row_nnz[i]>0) {
227: if (*n_alloc_used > i_here || i_last > n_alloc) {
228: PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));
229: PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));
231: n_rescue += result->row_nnz[i];
232: n_row_rescue++;
233: } else {
234: for (j=0; j<result->row_nnz[i]; j++) {
235: result->icols[i][j] = result->alloc_icol[i_here+j];
236: result->values[i][j] = result->alloc_val[i_here+j];
237: }
238: }
239: if (i<i_row) *n_alloc_used += result->row_nnz[i];
240: else *n_alloc_used += max_row_nnz[i];
241: }
242: }
244: /* Delete the rescue arrays */
245: PetscFree(icol_rescue);
246: PetscFree(val_rescue);
248: *n_row_alloc_ok = i_row;
249: return(0);
250: }
252: /*
253: spbas_incomplete_cholesky:
254: incomplete Cholesky decomposition of a square, symmetric,
255: positive definite matrix.
257: In case negative diagonals are encountered, function returns
258: NEGATIVE_DIAGONAL. When this happens, call this function again
259: with a larger epsdiag_in, a less sparse pattern, and/or a smaller
260: droptol
261: */
264: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
265: {
266: PetscInt jL;
267: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
268: PetscInt *ai=a->i,*aj=a->j;
269: MatScalar *aa=a->a;
270: PetscInt nrows, ncols;
271: PetscInt *max_row_nnz;
272: PetscErrorCode ierr;
273: spbas_matrix retval;
274: PetscScalar *diag;
275: PetscScalar *val;
276: PetscScalar *lvec;
277: PetscScalar epsdiag;
278: PetscInt i,j,k;
279: const PetscBool do_values = PETSC_TRUE;
280: PetscInt *r1_icol;
281: PetscScalar *r1_val;
282: PetscInt *r_icol;
283: PetscInt r_nnz;
284: PetscScalar *r_val;
285: PetscInt *A_icol;
286: PetscInt A_nnz;
287: PetscScalar *A_val;
288: PetscInt *p_icol;
289: PetscInt p_nnz;
290: PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */
291: PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */
294: /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */
295: MatGetSize(A, &nrows, &ncols);
296: MatGetTrace(A, &epsdiag);
298: epsdiag *= epsdiag_in / nrows;
300: PetscInfo2(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);
302: if (droptol<1e-10) droptol=1e-10;
304: if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
306: retval.nrows = nrows;
307: retval.ncols = nrows;
308: retval.nnz = pattern.nnz/10;
309: retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
310: retval.block_data = PETSC_TRUE;
312: spbas_allocate_pattern(&retval, do_values);
313: PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));
314: spbas_allocate_data(&retval);
315: retval.nnz = 0;
317: PetscMalloc1(nrows, &diag);
318: PetscMalloc1(nrows, &val);
319: PetscMalloc1(nrows, &lvec);
320: PetscMalloc1(nrows, &max_row_nnz);
321: if (!(diag && val && lvec && max_row_nnz)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n");
323: PetscMemzero((void*) val, nrows*sizeof(PetscScalar));
324: PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));
325: PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));
327: /* Check correct format of sparseness pattern */
328: if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
330: /* Count the nonzeros on transpose of pattern */
331: for (i = 0; i<nrows; i++) {
332: p_nnz = pattern.row_nnz[i];
333: p_icol = pattern.icols[i];
334: for (j=0; j<p_nnz; j++) {
335: max_row_nnz[i+p_icol[j]]++;
336: }
337: }
339: /* Calculate rows of L */
340: for (i = 0; i<nrows; i++) {
341: p_nnz = pattern.row_nnz[i];
342: p_icol = pattern.icols[i];
344: r_nnz = retval.row_nnz[i];
345: r_icol = retval.icols[i];
346: r_val = retval.values[i];
347: A_nnz = ai[rip[i]+1] - ai[rip[i]];
348: A_icol = &aj[ai[rip[i]]];
349: A_val = &aa[ai[rip[i]]];
351: /* Calculate val += A(i,i:n)'; */
352: for (j=0; j<A_nnz; j++) {
353: k = riip[A_icol[j]];
354: if (k>=i) val[k] = A_val[j];
355: }
357: /* Add regularization */
358: val[i] += epsdiag;
360: /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i);
361: val(i) = A(i,i) - L(0:i-1,i)' * lvec */
362: for (j=0; j<r_nnz; j++) {
363: k = r_icol[j];
364: lvec[k] = diag[k] * r_val[j];
365: val[i] -= r_val[j] * lvec[k];
366: }
368: /* Calculate the new diagonal */
369: diag[i] = val[i];
370: if (PetscRealPart(diag[i])<droptol) {
371: PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");
372: PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);
374: /* Delete the whole matrix at once. */
375: spbas_delete(retval);
376: return NEGATIVE_DIAGONAL;
377: }
379: /* If necessary, allocate arrays */
380: if (r_nnz==0) {
381: spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
382: if (ierr == PETSC_ERR_MEM) {
383: spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
384: spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
385: } else
386: r_icol = retval.icols[i];
387: r_val = retval.values[i];
388: }
390: /* Now, fill in */
391: r_icol[r_nnz] = i;
392: r_val [r_nnz] = 1.0;
393: r_nnz++;
394: retval.row_nnz[i]++;
396: retval.nnz += r_nnz;
398: /* Calculate
399: val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
400: for (j=1; j<p_nnz; j++) {
401: k = i+p_icol[j];
402: r1_icol = retval.icols[k];
403: r1_val = retval.values[k];
404: for (jL=0; jL<retval.row_nnz[k]; jL++) {
405: val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
406: }
407: val[k] /= diag[i];
409: if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
410: /* If necessary, allocate arrays */
411: if (retval.row_nnz[k]==0) {
412: spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
413: if (ierr == PETSC_ERR_MEM) {
414: spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
415: spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
416: r_icol = retval.icols[i];
417: r_val = retval.values[i];
418: } else
419: }
421: retval.icols[k][retval.row_nnz[k]] = i;
422: retval.values[k][retval.row_nnz[k]] = val[k];
423: retval.row_nnz[k]++;
424: }
425: val[k] = 0;
426: }
428: /* Erase the values used in the work arrays */
429: for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
430: }
432: ierr=PetscFree(lvec);
433: ierr=PetscFree(val);
435: spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
436: PetscFree(max_row_nnz);
438: /* Place the inverse of the diagonals in the matrix */
439: for (i=0; i<nrows; i++) {
440: r_nnz = retval.row_nnz[i];
442: retval.values[i][r_nnz-1] = 1.0 / diag[i];
443: for (j=0; j<r_nnz-1; j++) {
444: retval.values[i][j] *= -1;
445: }
446: }
447: PetscFree(diag);
448: *matrix_L = retval;
449: return(0);
450: }