Actual source code: aijAssemble.cu
petsc-3.7.3 2016-08-01
1: #define PETSC_SKIP_COMPLEX
2: #define PETSC_SKIP_SPINLOCK
4: #include <petscconf.h>
5: #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
6: #include <petscbt.h>
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <petsc/private/vecimpl.h>
9: #undef VecType
10: #include <../src/mat/impls/aij/seq/seqcusp/cuspmatimpl.h>
12: #include <thrust/reduce.h>
13: #include <thrust/inner_product.h>
15: #include <cusp/array1d.h>
16: #include <cusp/print.h>
17: #include <cusp/coo_matrix.h>
19: #include <cusp/io/matrix_market.h>
21: #include <thrust/iterator/counting_iterator.h>
22: #include <thrust/iterator/transform_iterator.h>
23: #include <thrust/iterator/permutation_iterator.h>
24: #include <thrust/functional.h>
26: // this example illustrates how to make repeated access to a range of values
27: // examples:
28: // repeated_range([0, 1, 2, 3], 1) -> [0, 1, 2, 3]
29: // repeated_range([0, 1, 2, 3], 2) -> [0, 0, 1, 1, 2, 2, 3, 3]
30: // repeated_range([0, 1, 2, 3], 3) -> [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]
31: // ...
33: template <typename Iterator>
34: class repeated_range
35: {
36: public:
38: typedef typename thrust::iterator_difference<Iterator>::type difference_type;
40: struct repeat_functor : public thrust::unary_function<difference_type,difference_type>
41: {
42: difference_type repeats;
44: repeat_functor(difference_type repeats) : repeats(repeats) {}
46: __host__ __device__
47: difference_type operator()(const difference_type &i) const {
48: return i / repeats;
49: }
50: };
52: typedef typename thrust::counting_iterator<difference_type> CountingIterator;
53: typedef typename thrust::transform_iterator<repeat_functor, CountingIterator> TransformIterator;
54: typedef typename thrust::permutation_iterator<Iterator,TransformIterator> PermutationIterator;
56: // type of the repeated_range iterator
57: typedef PermutationIterator iterator;
59: // construct repeated_range for the range [first,last)
60: repeated_range(Iterator first, Iterator last, difference_type repeats) : first(first), last(last), repeats(repeats) {}
62: iterator begin(void) const
63: {
64: return PermutationIterator(first, TransformIterator(CountingIterator(0), repeat_functor(repeats)));
65: }
67: iterator end(void) const
68: {
69: return begin() + repeats * (last - first);
70: }
72: protected:
73: difference_type repeats;
74: Iterator first;
75: Iterator last;
77: };
79: // this example illustrates how to repeat blocks in a range multiple times
80: // examples:
81: // tiled_range([0, 1, 2, 3], 2) -> [0, 1, 2, 3, 0, 1, 2, 3]
82: // tiled_range([0, 1, 2, 3], 4, 2) -> [0, 1, 2, 3, 0, 1, 2, 3]
83: // tiled_range([0, 1, 2, 3], 2, 2) -> [0, 1, 0, 1, 2, 3, 2, 3]
84: // tiled_range([0, 1, 2, 3], 2, 3) -> [0, 1, 0, 1 0, 1, 2, 3, 2, 3, 2, 3]
85: // ...
87: template <typename Iterator>
88: class tiled_range
89: {
90: public:
92: typedef typename thrust::iterator_difference<Iterator>::type difference_type;
94: struct tile_functor : public thrust::unary_function<difference_type,difference_type>
95: {
96: difference_type repeats;
97: difference_type tile_size;
99: tile_functor(difference_type repeats, difference_type tile_size) : tile_size(tile_size), repeats(repeats) {}
101: __host__ __device__
102: difference_type operator()(const difference_type &i) const {
103: return tile_size * (i / (tile_size * repeats)) + i % tile_size;
104: }
105: };
107: typedef typename thrust::counting_iterator<difference_type> CountingIterator;
108: typedef typename thrust::transform_iterator<tile_functor, CountingIterator> TransformIterator;
109: typedef typename thrust::permutation_iterator<Iterator,TransformIterator> PermutationIterator;
111: // type of the tiled_range iterator
112: typedef PermutationIterator iterator;
114: // construct repeated_range for the range [first,last)
115: tiled_range(Iterator first, Iterator last, difference_type repeats)
116: : first(first), last(last), repeats(repeats), tile_size(last - first) {}
118: tiled_range(Iterator first, Iterator last, difference_type repeats, difference_type tile_size)
119: : first(first), last(last), repeats(repeats), tile_size(tile_size)
120: {
121: // ASSERT((last - first) % tile_size == 0)
122: }
124: iterator begin(void) const
125: {
126: return PermutationIterator(first, TransformIterator(CountingIterator(0), tile_functor(repeats, tile_size)));
127: }
129: iterator end(void) const
130: {
131: return begin() + repeats * (last - first);
132: }
134: protected:
135: difference_type repeats;
136: difference_type tile_size;
137: Iterator first;
138: Iterator last;
139: };
141: typedef cusp::device_memory memSpace;
142: typedef int IndexType;
143: typedef PetscScalar ValueType;
144: typedef cusp::array1d<IndexType, memSpace> IndexArray;
145: typedef cusp::array1d<ValueType, memSpace> ValueArray;
146: typedef IndexArray::iterator IndexArrayIterator;
147: typedef ValueArray::iterator ValueArrayIterator;
149: // Ne: Number of elements
150: // Nl: Number of dof per element
153: PetscErrorCode MatSetValuesBatch_SeqAIJCUSP(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats)
154: {
155: size_t N = Ne * Nl;
156: size_t No = Ne * Nl*Nl;
157: PetscInt Nr; // Number of rows
160: // copy elemRows and elemMat to device
161: IndexArray d_elemRows(elemRows, elemRows + N);
162: ValueArray d_elemMats(elemMats, elemMats + No);
165: MatGetSize(J, &Nr, NULL);
166: // allocate storage for "fat" COO representation of matrix
167: PetscInfo1(J, "Making COO matrix of size %d\n", Nr);
168: cusp::coo_matrix<IndexType,ValueType, memSpace> COO(Nr, Nr, No);
170: // repeat elemRows entries Nl times
171: PetscInfo(J, "Making row indices\n");
172: repeated_range<IndexArrayIterator> rowInd(d_elemRows.begin(), d_elemRows.end(), Nl);
173: thrust::copy(rowInd.begin(), rowInd.end(), COO.row_indices.begin());
175: // tile rows of elemRows Nl times
176: PetscInfo(J, "Making column indices\n");
177: tiled_range<IndexArrayIterator> colInd(d_elemRows.begin(), d_elemRows.end(), Nl, Nl);
178: thrust::copy(colInd.begin(), colInd.end(), COO.column_indices.begin());
180: // copy values from elemMats into COO structure (could be avoided)
181: thrust::copy(d_elemMats.begin(), d_elemMats.end(), COO.values.begin());
183: // For MPIAIJ, split this into two COO matrices, and return both
184: // Need the column map
186: // print the "fat" COO representation
187: #if !defined(PETSC_USE_COMPLEX)
188: if (PetscLogPrintInfo) cusp::print(COO);
189: #endif
190: // sort COO format by (i,j), this is the most costly step
191: PetscInfo(J, "Sorting rows and columns\n");
192: #if 1
193: COO.sort_by_row_and_column();
194: #else
195: {
196: PetscInfo(J, " Making permutation\n");
197: IndexArray permutation(No);
198: thrust::sequence(permutation.begin(), permutation.end());
200: // compute permutation and sort by (I,J)
201: {
202: PetscInfo(J, " Sorting columns\n");
203: IndexArray temp(No);
204: thrust::copy(COO.column_indices.begin(), COO.column_indices.end(), temp.begin());
205: thrust::stable_sort_by_key(temp.begin(), temp.end(), permutation.begin());
206: PetscInfo(J, " Sorted columns\n");
207: if (PetscLogPrintInfo) {
208: for (IndexArrayIterator t_iter = temp.begin(), p_iter = permutation.begin(); t_iter != temp.end(); ++t_iter, ++p_iter) {
209: PetscInfo2(J, "%d(%d)\n", *t_iter, *p_iter);
210: }
211: }
213: PetscInfo(J, " Copying rows\n");
214: //cusp::copy(COO.row_indices, temp);
215: thrust::copy(COO.row_indices.begin(), COO.row_indices.end(), temp.begin());
216: PetscInfo(J, " Gathering rows\n");
217: thrust::gather(permutation.begin(), permutation.end(), temp.begin(), COO.row_indices.begin());
218: PetscInfo(J, " Sorting rows\n");
219: thrust::stable_sort_by_key(COO.row_indices.begin(), COO.row_indices.end(), permutation.begin());
221: PetscInfo(J, " Gathering columns\n");
222: cusp::copy(COO.column_indices, temp);
223: thrust::gather(permutation.begin(), permutation.end(), temp.begin(), COO.column_indices.begin());
224: }
226: // use permutation to reorder the values
227: {
228: PetscInfo(J, " Sorting values\n");
229: ValueArray temp(COO.values);
230: cusp::copy(COO.values, temp);
231: thrust::gather(permutation.begin(), permutation.end(), temp.begin(), COO.values.begin());
232: }
233: }
234: #endif
236: // print the "fat" COO representation
237: #if !defined(PETSC_USE_COMPLEX)
238: if (PetscLogPrintInfo) cusp::print(COO);
239: #endif
240: // compute number of unique (i,j) entries
241: // this counts the number of changes as we move along the (i,j) list
242: PetscInfo(J, "Computing number of unique entries\n");
243: size_t num_entries = thrust::inner_product
244: (thrust::make_zip_iterator(thrust::make_tuple(COO.row_indices.begin(), COO.column_indices.begin())),
245: thrust::make_zip_iterator(thrust::make_tuple(COO.row_indices.end (), COO.column_indices.end())) - 1,
246: thrust::make_zip_iterator(thrust::make_tuple(COO.row_indices.begin(), COO.column_indices.begin())) + 1,
247: size_t(1),
248: thrust::plus<size_t>(),
249: thrust::not_equal_to< thrust::tuple<IndexType,IndexType> >());
251: // allocate COO storage for final matrix
252: PetscInfo(J, "Allocating compressed matrix\n");
253: cusp::coo_matrix<IndexType, ValueType, memSpace> A(Nr, Nr, num_entries);
255: // sum values with the same (i,j) index
256: // XXX thrust::reduce_by_key is unoptimized right now, so we provide a SpMV-based one in cusp::detail
257: // the Cusp one is 2x faster, but still not optimal
258: // This could possibly be done in-place
259: PetscInfo(J, "Compressing matrix\n");
260: thrust::reduce_by_key(thrust::make_zip_iterator(thrust::make_tuple(COO.row_indices.begin(), COO.column_indices.begin())),
261: thrust::make_zip_iterator(thrust::make_tuple(COO.row_indices.end(), COO.column_indices.end())),
262: COO.values.begin(),
263: thrust::make_zip_iterator(thrust::make_tuple(A.row_indices.begin(), A.column_indices.begin())),
264: A.values.begin(),
265: thrust::equal_to< thrust::tuple<IndexType,IndexType> >(),
266: thrust::plus<ValueType>());
268: // print the final matrix
269: #if !defined(PETSC_USE_COMPLEX)
270: if (PetscLogPrintInfo) cusp::print(A);
271: #endif
272: //std::cout << "Writing matrix" << std::endl;
273: //cusp::io::write_matrix_market_file(A, "A.mtx");
275: PetscInfo(J, "Converting to PETSc matrix\n");
276: MatSetType(J, MATSEQAIJCUSP);
277: //cusp::csr_matrix<PetscInt,PetscScalar,cusp::device_memory> Jgpu;
278: CUSPMATRIX *Jgpu = new CUSPMATRIX;
279: cusp::convert(A, *Jgpu);
280: #if !defined(PETSC_USE_COMPLEX)
281: if (PetscLogPrintInfo) cusp::print(*Jgpu);
282: #endif
283: PetscInfo(J, "Copying to CPU matrix\n");
284: MatCUSPCopyFromGPU(J, Jgpu);
285: return(0);
286: }