Mesh Oriented datABase  (version 5.5.0)
An array-based unstructured mesh library
ReadHDF5VarLen.cpp
Go to the documentation of this file.
1 /** \file ReadHDF5VarLen.cpp
2  * \author Jason Kraftcheck
3  * \date 2010-09-04
4  */
5 
6 #include "ReadHDF5VarLen.hpp"
7 #include "ReadHDF5Dataset.hpp"
8 #include "H5Tpublic.h"
9 #include <cassert>
10 
11 namespace moab
12 {
13 
15  Range::const_iterator& ranged_iter,
16  Range::const_iterator range_end )
17 {
18  if( ranged_iter == range_end ) return false;
19 
20  assert( file_id <= *ranged_iter );
21  if( *ranged_iter != file_id ) return false;
22 
23  ++ranged_iter;
24  return true;
25 }
26 
28  const Range& offsets,
29  EntityHandle start_offset,
30  hid_t data_type,
31  const Range& file_ids,
32  const std::vector< unsigned >& vals_per_ent,
33  const Range& ranged_file_ids )
34 {
35  ErrorCode rval;
36  const size_t value_size = H5Tget_size( data_type );
37  const size_t buffer_size = bufferSize / value_size;
38  unsigned char* const data_buffer = reinterpret_cast< unsigned char* >( dataBuffer );
39  std::vector< unsigned char > partial; // for when we read only part of the contents of a set/entity
40  Range::const_iterator fileid_iter = file_ids.begin();
41  Range::const_iterator ranged_iter = ranged_file_ids.begin();
42  std::vector< unsigned >::const_iterator count_iter = vals_per_ent.begin();
43  size_t count, offset;
44  bool ranged;
45  int nn = 0;
46 
47  assert( file_ids.size() == vals_per_ent.size() );
48 
49  try
50  {
51  data_set.set_file_ids( offsets, start_offset, buffer_size, data_type );
52  }
54  {
55  return MB_FAILURE;
56  }
57 
58  dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() );
59 
60  while( !data_set.done() )
61  {
62  dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() );
63  try
64  {
65  data_set.read( data_buffer, count );
66  }
68  {
69  return MB_FAILURE;
70  }
71 
72  assert( 0 == count || fileid_iter != file_ids.end() );
73 
74  // Handle 'special' case where we read some, but not all
75  // of the data for an entity during the last iteration.
76  offset = 0;
77  if( !partial.empty() )
78  { // didn't read all of previous entity
79  assert( fileid_iter != file_ids.end() );
80  assert( 0 == ( partial.size() % value_size ) );
81  size_t num_prev = partial.size() / value_size;
82  offset = *count_iter - num_prev;
83  if( offset > count )
84  { // still don't have all
85  partial.insert( partial.end(), data_buffer, data_buffer + count * value_size );
86  continue;
87  }
88 
89  partial.insert( partial.end(), data_buffer, data_buffer + offset * value_size );
90 
91  ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() );
92  assert( partial.size() == *count_iter * value_size );
93  rval = store_data( *fileid_iter, &partial[0], *count_iter, ranged );
94  if( MB_SUCCESS != rval ) return rval;
95 
96  ++count_iter;
97  ++fileid_iter;
98  partial.clear();
99  }
100 
101  // Process contents for all entities for which we
102  // have read the complete list
103  while( count_iter != vals_per_ent.end() && offset + *count_iter <= count )
104  {
105  assert( fileid_iter != file_ids.end() );
106  ranged = is_ranged( *fileid_iter, ranged_iter, ranged_file_ids.end() );
107  rval = store_data( *fileid_iter, data_buffer + offset * value_size, *count_iter, ranged );
108  if( MB_SUCCESS != rval ) return rval;
109 
110  offset += *count_iter;
111  ++count_iter;
112  ++fileid_iter;
113  }
114 
115  // If we did not read all of the final entity,
116  // store what we did read to be processed in the
117  // next iteration
118  if( offset < count )
119  {
120  assert( partial.empty() );
121  partial.insert( partial.end(), data_buffer + offset * value_size, data_buffer + count * value_size );
122  }
123  }
124  // NOTE: If the last set is empty, we will not process it here
125  // assert(fileid_iter == file_ids.end());
126 #ifndef NDEBUG
127  for( ; fileid_iter != file_ids.end(); ++fileid_iter )
128  {
129  assert( 0 == *count_iter );
130  ++count_iter;
131  }
132 #endif
133  return MB_SUCCESS;
134 }
135 /*
136 ErrorCode ReadHDF5VarLen::read_offsets( ReadHDF5Dataset& data_set,
137  const Range& file_ids,
138  EntityHandle start_file_id,
139  unsigned num_columns,
140  const unsigned indices[],
141  EntityHandle nudge,
142  Range offsets_out[],
143  std::vector<unsigned> counts_out[],
144  Range* ranged_file_ids = 0 )
145 {
146  const int local_index = 1;
147 
148  // sanity check
149  const unsigned max_cols = ranged_file_ids ? data_set.columns() - 1 : data_set.columns()
150  for (unsigned i = 0; i < num_columns; ++i) {
151  assert(indices[i] >= max_cols);
152  if (indices[i] >= max_cols)
153  return MB_FAILURE;
154  }
155 
156  // Use hints to make sure insertion into ranges is O(1)
157  std::vector<Range::iterator> hints;
158  if (ranged_file_ids) {
159  hints.resize( num_colums + 1 );
160  hints.back() = ranged_file_ids->begin();
161  }
162  else {
163  hints.resize( num_columns );
164  }
165  for (unsigned i = 0; i < num_columns; ++i)
166  offsets_out[i].clear();
167  counts_out[i].clear();
168  counts_out[i].reserve( file_ids.size() );
169  hints[i] = offsets_out[i].begin();
170  }
171 
172  // If we only need one column from a multi-column data set,
173  // then read only that column.
174  if (num_columns == 1 && data_set.columns() > 1 && !ranged_file_ids) {
175  data_set.set_column( indices[0] );
176  indices = &local_index;
177  }
178  else if (ranged_file_ids && data_set.columns() > 1 && 0 == num_columns) {
179  data_set.set_column( data_set.columns() - 1 );
180  }
181  // NOTE: do not move this above the previous block.
182  // The previous block changes the results of data_set.columns()!
183  const size_t table_columns = data_set.columns();
184 
185  // Calculate which rows we need to read from the offsets table
186  Range rows;
187  Range::iterator hint = rows.begin();
188  Range::const_pair_iterator pair = file_ids.const_pair_begin();
189  // special case if reading first entity in dataset, because
190  // there is no previous end value.
191  if (pair != file_ids.const_pair_end() && pair->first == start_file_id)
192  hint = rows.insert( nudge, pair->second - start_file_id + nudge );
193  while (pair != file_ids.const_pair_end()) {
194  hint = rows.insert( hint,
195  pair->first + nudge - 1 - start_file_id,
196  pair->second + nudge - start_file_id );
197  ++pair;
198  }
199 
200  // set up read of offsets dataset
201  hsize_t buffer_size = bufferSize / (sizeof(hssize_t) * data_set.columns());
202  hssize_t* buffer = reinterpret_cast<hssize_t*>(dataBuffer);
203  data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE );
204  std::vector<hssize_t> prev_end;
205  // If we're reading the first row of the table, then the
206  // previous end is implicitly -1.
207  if (!file_ids.empty() && file_ids.front() == start_file_id)
208  prev_end.resize(num_columns,-1);
209 
210  // read offset table
211  size_t count, offset;
212  Range::const_iterator fiter = file_ids.begin();
213  while (!data_set.done()) {
214  try {
215  data_set.read( buffer, count );
216  }
217  catch (ReadHDF5Dataset::Exception e) {
218  return MB_FAILURE;
219  }
220  if (!count) // might have been NULL read for collective IO
221  continue;
222 
223  // If the previous end values were read in the previous iteration,
224  // then they're stored in prev_end.
225  size_t offset = 0;
226  if (!prev_end.empty()) {
227  for (unsigned i = 0; i < num_columns; ++i) {
228  counts_out[i].push_back( buffer[indices[i]] - prev_end[i] );
229  hints[i] = offsets_out[i].insert( hints[i],
230  prev_end[i] + 1 + nudge,
231  buffer[indices[i]] + nudge );
232  }
233  if (ranged_file_ids && (buffer[table_columns-1] & mhdf_SET_RANGE_BIT))
234  hints.back() = ranged_file_ids->insert( hints.back(), *fiter );
235  ++fiter;
236  offset = 1;
237  prev_end.clear();
238  }
239 
240  while (offset < count) {
241  assert(fiter != file_ids.end());
242  // whenever we get to a gap between blocks we need to
243  // advance one step because we read an extra end id
244  // preceding teah block
245  if (fiter == fiter.start_of_block()) {
246  if (offset == count-1)
247  break;
248  ++offset;
249  }
250 
251  for (unsigned i = 0; i < num_columns; ++i) {
252  size_t s = buffer[(offset-1)*table_columns+indices[i]] + 1;
253  size_t e = buffer[ offset *table_columns+indices[i]];
254  counts_out.push_back( e - s + 1 );
255  hints[i] = offsets_out.insert( hints[i], s, e );
256  }
257  if (ranged_file_ids && (buffer[offset*table_columns+table_columns-1] & mhdf_SET_RANGE_BIT))
258  hints.back() = ranged_file_ids->insert( hints.back(), *fiter );
259 
260  ++fiter;
261  ++offset;
262  }
263 
264  // If we did not end on the boundary between two blocks,
265  // then we need to save the end indices for the final entry
266  // for use in the next iteration. Similarly, if we ended
267  // with extra values that were read with the express intention
268  // of getting the previous end values for a block, we need to
269  // save them. This case only arises if we hit the break in
270  // the above loop.
271  if (fiter != fiter.start_of_block() || offset < count) {
272  assert(prev_end.empty());
273  if (offset == count) {
274  --offset;
275  assert(fiter != fiter.start_of_block());
276  }
277  else {
278  assert(offset+1 == count);
279  assert(fiter == fiter.start_of_block());
280  }
281  for (unsigned i = 0; i < num_columns; ++i)
282  prev_end.push_back(buffer[offset*table_columns+indices[i]]);
283  }
284  }
285  assert(prev_end.empty());
286  assert(fiter == file_ids.end());
287 
288  return MB_SUCCESS;
289 }
290 */
292  const Range& file_ids,
293  EntityHandle start_file_id,
294  EntityHandle nudge,
295  Range& offsets_out,
296  std::vector< unsigned >& counts_out )
297 {
298 
299  // Use hints to make sure insertion into ranges is O(1)
300  offsets_out.clear();
301  counts_out.clear();
302  counts_out.reserve( file_ids.size() );
303  Range::iterator hint;
304 
305  // Calculate which rows we need to read from the offsets table
306  Range rows;
307  hint = rows.begin();
309  // special case if reading first entity in dataset, because
310  // there is no previous end value.
311  if( pair != file_ids.const_pair_end() && pair->first == start_file_id )
312  {
313  hint = rows.insert( nudge, pair->second - start_file_id + nudge );
314  ++pair;
315  }
316  while( pair != file_ids.const_pair_end() )
317  {
318  hint = rows.insert( hint, pair->first - start_file_id + nudge - 1, pair->second - start_file_id + nudge );
319  ++pair;
320  }
321 
322  // set up read of offsets dataset
323  hsize_t buffer_size = bufferSize / sizeof( hssize_t );
324  hssize_t* buffer = reinterpret_cast< hssize_t* >( dataBuffer );
325  data_set.set_file_ids( rows, nudge, buffer_size, H5T_NATIVE_HSSIZE );
326  hssize_t prev_end;
327  bool have_prev_end = false;
328  // If we're reading the first row of the table, then the
329  // previous end is implicitly -1.
330  if( !file_ids.empty() && file_ids.front() == start_file_id )
331  {
332  prev_end = -1;
333  have_prev_end = true;
334  }
335 
336  dbgOut.printf( 3, "Reading %s in %lu chunks\n", data_set.get_debug_desc(), data_set.get_read_count() );
337 
338  // read offset table
339  size_t count, offset;
340  Range::const_iterator fiter = file_ids.begin();
341  hint = offsets_out.begin();
342  int nn = 0;
343  while( !data_set.done() )
344  {
345  dbgOut.printf( 3, "Reading chunk %d of %s\n", ++nn, data_set.get_debug_desc() );
346  try
347  {
348  data_set.read( buffer, count );
349  }
351  {
352  return MB_FAILURE;
353  }
354  if( !count ) // might have been NULL read for collective IO
355  continue;
356 
357  // If the previous end values were read in the previous iteration,
358  // then they're stored in prev_end.
359  offset = 0;
360  if( have_prev_end )
361  {
362  counts_out.push_back( buffer[0] - prev_end );
363  hint = offsets_out.insert( hint, prev_end + 1 + nudge, buffer[0] + nudge );
364  ++fiter;
365  offset = 1;
366  have_prev_end = false;
367  }
368 
369  while( offset < count )
370  {
371  assert( fiter != file_ids.end() );
372  // whenever we get to a gap between blocks we need to
373  // advance one step because we read an extra end id
374  // preceding teah block
375  if( fiter == fiter.start_of_block() )
376  {
377  if( offset == count - 1 ) break;
378  ++offset;
379  }
380 
381  size_t s = buffer[offset - 1] + 1;
382  size_t e = buffer[offset];
383  counts_out.push_back( e - s + 1 );
384  hint = offsets_out.insert( hint, s + nudge, e + nudge );
385 
386  ++fiter;
387  ++offset;
388  }
389 
390  // If we did not end on the boundary between two blocks,
391  // then we need to save the end indices for the final entry
392  // for use in the next iteration. Similarly, if we ended
393  // with extra values that were read with the express intention
394  // of getting the previous end values for a block, we need to
395  // save them. This case only arises if we hit the break in
396  // the above loop.
397  if( fiter != fiter.start_of_block() || offset < count )
398  {
399  assert( !have_prev_end );
400  if( offset == count )
401  {
402  --offset;
403  assert( fiter != fiter.start_of_block() );
404  }
405  else
406  {
407  assert( offset + 1 == count );
408  assert( fiter == fiter.start_of_block() );
409  }
410  have_prev_end = true;
411  prev_end = buffer[offset];
412  }
413  }
414  assert( !have_prev_end );
415  assert( fiter == file_ids.end() );
416 
417  return MB_SUCCESS;
418 }
419 
420 } // namespace moab