Actual source code: ex61f.F90

  1: !
  2: !        Demonstrates having each OpenMP thread manage its own PETSc objects and solves
  3: !           - each thread is ONLY allowed to access objects that IT created
  4: !                  that is, threads cannot shared objects
  5: !
  6: !        Run with "export OMP_NUM_THREADS=16 ./ex61f"
  7: !           to use 16 independent threads
  8: !
  9: !        ./configure --with-threadsafety --with-openmp
 10: !
 11: module ex61fmodule
 12:   implicit none
 13: contains
 14:   subroutine split_indices(total, num_pieces, ibeg, iend)
 15:     implicit none

 17:     integer :: total
 18:     integer :: num_pieces
 19:     integer :: ibeg(num_pieces), iend(num_pieces)
 20:     integer :: itmp1, itmp2, ioffset, i

 22:     itmp1 = total/num_pieces
 23:     itmp2 = mod(total, num_pieces)
 24:     ioffset = 0
 25:     do i = 1, itmp2
 26:       ibeg(i) = ioffset + 1
 27:       iend(i) = ioffset + (itmp1 + 1)
 28:       ioffset = iend(i)
 29:     end do
 30:     do i = itmp2 + 1, num_pieces
 31:       ibeg(i) = ioffset + 1
 32:       if (ibeg(i) > total) then
 33:         iend(i) = ibeg(i) - 1
 34:       else
 35:         iend(i) = ioffset + itmp1
 36:         ioffset = iend(i)
 37:       end if
 38:     end do

 40:   end subroutine split_indices
 41: end module ex61fmodule

 43: program tpetsc

 45: #include <petsc/finclude/petsc.h>
 46:   use ex61fmodule
 47:   use petsc
 48:   implicit none
 49: !     ----------------------------
 50: !     test concurrent PETSc solver
 51: !     ----------------------------

 53:   integer, parameter :: NCASES = 100
 54:   integer, parameter :: MAXTHREADS = 64
 55:   real(8), parameter :: tol = 1.0d-6

 57:   integer, dimension(MAXTHREADS) :: ibeg, iend

 59: !$ integer, external :: omp_get_num_threads

 61:   Mat, target :: Mcol_f_mat(MAXTHREADS)
 62:   Vec, target :: Mcol_f_vecb(MAXTHREADS)
 63:   Vec, target :: Mcol_f_vecx(MAXTHREADS)
 64:   KSP, target :: Mcol_f_ksp(MAXTHREADS)
 65:   PC, target :: MPC(MAXTHREADS)

 67:   Mat, pointer :: col_f_mat
 68:   Vec, pointer :: col_f_vecb
 69:   Vec, pointer :: col_f_vecx
 70:   KSP, pointer :: col_f_ksp
 71:   PC, pointer :: pc

 73:   PetscInt :: ith, nthreads
 74:   PetscErrorCode ierr

 76:   integer, parameter :: nz_per_row = 9
 77:   integer, parameter :: n = 100
 78:   integer :: i, j, ij, ij2, ii, jj, nz, ip, dx, dy, icase
 79:   integer, allocatable :: ilist(:), jlist(:)
 80:   PetscScalar :: aij
 81:   PetscScalar, allocatable :: alist(:)
 82:   logical :: isvalid_ii, isvalid_jj, is_diag

 84:   PetscInt nrow
 85:   PetscInt ncol
 86:   PetscScalar, allocatable :: x(:), b(:)
 87:   real(8) :: err(NCASES)

 89:   integer :: t1, t2, count_rate
 90:   real :: ttime

 92:   PetscCallA(PetscInitialize(ierr))

 94:   allocate (ilist(n*n*nz_per_row), jlist(n*n*nz_per_row), alist(n*n*nz_per_row))
 95:   allocate (x(0:(n*n - 1)), b(0:(n*n - 1)))
 96:   nrow = n*n
 97:   ncol = nrow

 99:   nthreads = 1
100: !$omp parallel
101: !$omp master
102: !$ nthreads = omp_get_num_threads()
103: !$omp end master
104: !$omp end parallel
105:   print *, 'nthreads = ', nthreads, ' NCASES = ', NCASES

107:   call split_indices(NCASES, nthreads, ibeg, iend)

109: !$omp parallel do                                                        &
110: !$omp private(ith,ierr)                                                  &
111: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
112:   do ith = 1, nthreads
113:     col_f_mat => Mcol_f_mat(ith)
114:     col_f_vecb => Mcol_f_vecb(ith)
115:     col_f_vecx => Mcol_f_vecx(ith)
116:     col_f_ksp => Mcol_f_ksp(ith)

118:     PetscCallA(MatCreateSeqAIJ(PETSC_COMM_SELF, nrow, ncol, nz_per_row, PETSC_NULL_INTEGER_ARRAY, col_f_mat, ierr))
119:     PetscCallA(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nrow, PETSC_NULL_SCALAR_ARRAY, col_f_vecb, ierr))
120:     PetscCallA(VecDuplicate(col_f_vecb, col_f_vecx, ierr))
121:     PetscCallA(KSPCreate(PETSC_COMM_SELF, col_f_ksp, ierr))
122:   end do

124: !      -----------------------
125: !      setup sparsity pattern
126: !      -----------------------
127:   nz = 0
128:   do j = 1, n
129:   do i = 1, n
130:     ij = i + (j - 1)*n
131:     do dx = -1, 1
132:     do dy = -1, 1
133:       ii = i + dx
134:       jj = j + dy

136:       ij2 = ii + (jj - 1)*n
137:       isvalid_ii = (1 <= ii) .and. (ii <= n)
138:       isvalid_jj = (1 <= jj) .and. (jj <= n)
139:       if (isvalid_ii .and. isvalid_jj) then
140:         is_diag = (ij == ij2)
141:         nz = nz + 1
142:         ilist(nz) = ij
143:         jlist(nz) = ij2
144:         if (is_diag) then
145:           aij = 4.0
146:         else
147:           aij = -1.0
148:         end if
149:         alist(nz) = aij
150:       end if
151:     end do
152:     end do
153:   end do
154:   end do

156:   print *, 'nz = ', nz

158: !      ----------------------------------
159: !      convert from Fortran to C indexing
160: !      ----------------------------------
161:   ilist(1:nz) = ilist(1:nz) - 1
162:   jlist(1:nz) = jlist(1:nz) - 1

164: !      --------------
165: !      setup matrices
166: !      --------------
167:   call system_clock(t1, count_rate)

169: !$omp  parallel do                                                      &
170: !$omp& private(ith,icase,ip,i,j,ii,jj,aij,ierr,x,b)                      &
171: !$omp& private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp,pc)
172:   do ith = 1, nthreads
173:     col_f_mat => Mcol_f_mat(ith)
174:     col_f_vecb => Mcol_f_vecb(ith)
175:     col_f_vecx => Mcol_f_vecx(ith)
176:     col_f_ksp => Mcol_f_ksp(ith)
177:     pc => MPC(ith)

179:     do icase = ibeg(ith), iend(ith)

181:       do ip = 1, nz
182:         ii = ilist(ip)
183:         jj = jlist(ip)
184:         aij = alist(ip)
185:         PetscCallA(MatSetValue(col_f_mat, ii, jj, aij, INSERT_VALUES, ierr))
186:       end do
187:       PetscCallA(MatAssemblyBegin(col_f_mat, MAT_FINAL_ASSEMBLY, ierr))
188:       PetscCallA(MatAssemblyEnd(col_f_mat, MAT_FINAL_ASSEMBLY, ierr))
189:       PetscCallA(KSPSetOperators(col_f_ksp, col_f_mat, col_f_mat, ierr))

191:       ! set linear solver
192:       PetscCallA(KSPGetPC(col_f_ksp, pc, ierr))
193:       PetscCallA(PCSetType(pc, PCLU, ierr))

195:       ! associate PETSc vector with allocated array
196:       x(:) = 1
197: !$omp    critical
198:       PetscCallA(VecPlaceArray(col_f_vecx, x, ierr))
199: !$omp    end critical

201:       b(:) = 0
202:       do ip = 1, nz
203:         i = ilist(ip)
204:         j = jlist(ip)
205:         aij = alist(ip)
206:         b(i) = b(i) + aij*x(j)
207:       end do
208:       x = 0
209: !$omp    critical
210:       PetscCallA(VecPlaceArray(col_f_vecb, b, ierr))
211: !$omp    end critical

213: !  -----------------------------------------------------------
214: !  key test, need to solve multiple linear systems in parallel
215: !  -----------------------------------------------------------
216:       PetscCallA(KSPSetFromOptions(col_f_ksp, ierr))

218:       PetscCallA(KSPSetUp(col_f_ksp, ierr))
219:       PetscCallA(KSPSolve(col_f_ksp, col_f_vecb, col_f_vecx, ierr))

221: !        ------------
222: !        check answer
223: !        ------------
224:       err(icase) = maxval(abs(x(:) - 1))

226: !$omp    critical
227:       PetscCallA(VecResetArray(col_f_vecx, ierr))
228: !$omp    end critical

230: !$omp    critical
231:       PetscCallA(VecResetArray(col_f_vecb, ierr))
232: !$omp    end critical

234:     end do
235:   end do

237: !$omp parallel do                                                        &
238: !$omp private(ith,ierr)                                                  &
239: !$omp private(col_f_mat,col_f_vecb,col_f_vecx,col_f_ksp)
240:   do ith = 1, nthreads
241:     col_f_mat => Mcol_f_mat(ith)
242:     col_f_vecb => Mcol_f_vecb(ith)
243:     col_f_vecx => Mcol_f_vecx(ith)
244:     col_f_ksp => Mcol_f_ksp(ith)

246:     PetscCallA(MatDestroy(col_f_mat, ierr))
247:     PetscCallA(VecDestroy(col_f_vecb, ierr))
248:     PetscCallA(VecDestroy(col_f_vecx, ierr))

250:     PetscCallA(KSPDestroy(col_f_ksp, ierr))

252:   end do

254:   call system_clock(t2, count_rate)
255:   ttime = real(t2 - t1)/real(count_rate)
256:   write (*, *) 'total time is ', ttime

258:   write (*, *) 'maxval(err) ', maxval(err)
259:   do icase = 1, NCASES
260:     if (err(icase) > tol) then
261:       write (*, *) 'icase,err(icase) ', icase, err(icase)
262:     end if
263:   end do

265:   deallocate (ilist, jlist, alist)
266:   deallocate (x, b)
267:   PetscCallA(PetscFinalize(ierr))
268: end program tpetsc

270: !/*TEST
271: !
272: !   build:
273: !      requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
274: !
275: !   test:
276: !      output_file: output/ex61f_1.out
277: !      TODO: Need to determine how to test OpenMP code
278: !
279: !TEST*/