Parse a file as space-separated tokens. More...
#include <FileTokenizer.hpp>
Public Member Functions | |
FileTokenizer (std::FILE *file_ptr, ReadUtilIface *read_util_ptr) | |
constructor More... | |
~FileTokenizer () | |
destructor : closes file. More... | |
const char * | get_string () |
get next token More... | |
bool | get_newline (bool report_error=true) |
check for newline More... | |
bool | get_doubles (size_t count, double *array) |
Parse a sequence of double values. More... | |
bool | get_floats (size_t count, float *array) |
Parse a sequence of float values. More... | |
bool | get_integers (size_t count, int *array) |
Parse a sequence of integer values. More... | |
bool | get_long_ints (size_t count, long *array) |
Parse a sequence of integer values. More... | |
bool | get_short_ints (size_t count, short *array) |
Parse a sequence of integer values. More... | |
bool | get_bytes (size_t count, unsigned char *array) |
Parse a sequence of integer values. More... | |
bool | get_binary (size_t bytes, void *mem) |
Read binary data (interleaved with ASCII) More... | |
bool | get_booleans (size_t count, bool *array) |
Parse a sequence of bit or boolean values. More... | |
bool | eof () const |
int | line_number () const |
void | unget_token () |
bool | match_token (const char *string, bool print_error=true) |
int | match_token (const char *const *string_list, bool print_error=true) |
Private Member Functions | |
bool | get_double_internal (double &result) |
bool | get_long_int_internal (long &result) |
bool | get_boolean_internal (bool &result) |
bool | get_float_internal (float &result) |
bool | get_integer_internal (int &result) |
bool | get_short_int_internal (short &result) |
bool | get_byte_internal (unsigned char &result) |
Private Attributes | |
std::FILE * | filePtr |
char | buffer [512] |
char * | nextToken |
char * | bufferEnd |
int | lineNumber |
char | lastChar |
Parse a file as space-separated tokens.
Read a file, separating it into space-separated tokens. This is provided in place of using the standard C or C++ file parsing routines because it counts lines, which is useful for error reporting. Also provides some useful utility methods for parsing VTK files (which is the intended use of this implementation.)
Uses raw reads/writes, implementing internal buffering. Token size may not exceed buffer size.
Definition at line 44 of file FileTokenizer.hpp.
moab::FileTokenizer::FileTokenizer | ( | std::FILE * | file_ptr, |
ReadUtilIface * | read_util_ptr | ||
) |
constructor
file_ptr | The file to read from. |
read_util_ptr | Pointer to ReadUtilIface to use for reporting errors. |
Definition at line 30 of file FileTokenizer.cpp.
31 : filePtr( file_ptr ), nextToken( buffer ), bufferEnd( buffer ), lineNumber( 1 ), lastChar( '\0' )
32 {
33 }
moab::FileTokenizer::~FileTokenizer | ( | ) |
destructor : closes file.
The destructor closes the passed file handle. This is done as a convenience feature. If the caller creates an instance of this object on the stack, the file will automatically be closed when the caller returns.
Definition at line 35 of file FileTokenizer.cpp.
36 { 37 fclose( filePtr ); 38 }
References filePtr.
bool moab::FileTokenizer::eof | ( | ) | const |
Check for end-of-file condition.
Definition at line 40 of file FileTokenizer.cpp.
41 {
42 return nextToken == bufferEnd && feof( filePtr );
43 }
References bufferEnd, filePtr, and nextToken.
Referenced by get_newline(), and moab::ReadVtk::load_file().
bool moab::FileTokenizer::get_binary | ( | size_t | bytes, |
void * | mem | ||
) |
Read binary data (interleaved with ASCII)
Read a block of binary data.
bytes | Number of bytes to read |
mem | Memory address at which to store data. |
Definition at line 420 of file FileTokenizer.cpp.
421 {
422 // If data in buffer
423 if( nextToken != bufferEnd )
424 {
425 // If requested size is less than buffer contents,
426 // just pass back part of the buffer
427 if( bufferEnd - nextToken <= (int)size )
428 {
429 memcpy( mem, nextToken, size );
430 nextToken += size;
431 return true;
432 }
433
434 // Copy buffer contents into memory and clear buffer
435 memcpy( mem, nextToken, bufferEnd - nextToken );
436 size -= bufferEnd - nextToken;
437 mem = reinterpret_cast< char* >( mem ) + ( bufferEnd - nextToken );
438 nextToken = bufferEnd;
439 }
440
441 // Read any additional data from file
442 return size == fread( mem, 1, size, filePtr );
443 }
|
private |
Internal implementation of get_Booleans
Definition at line 214 of file FileTokenizer.cpp.
215 {
216 // Get a token
217 const char* token = get_string();
218 if( !token ) return false;
219
220 if( token[1] || ( token[0] != '0' && token[0] != '1' ) )
221 MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected 0 or 1, got \"" << token << "\"",
222 false );
223
224 result = token[0] == '1';
225
226 return true;
227 }
References get_string(), line_number(), and MB_SET_ERR_RET_VAL.
Referenced by get_booleans().
bool moab::FileTokenizer::get_booleans | ( | size_t | count, |
bool * | array | ||
) |
Parse a sequence of bit or boolean values.
Read the specified number of space-delimited values.
count | The number of values to read. |
array | The memory at which to store the values. |
Definition at line 295 of file FileTokenizer.cpp.
296 {
297 for( size_t i = 0; i < count; ++i )
298 {
299 if( !get_boolean_internal( *array ) ) return false;
300 ++array;
301 }
302
303 return true;
304 }
References get_boolean_internal().
Referenced by moab::ReadVtk::vtk_read_tag_data().
|
private |
Internal implementation of get_bytes
Definition at line 181 of file FileTokenizer.cpp.
182 {
183 long i;
184 if( !get_long_int_internal( i ) ) return false;
185
186 result = (unsigned char)i;
187 if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
188
189 return true;
190 }
References get_long_int_internal(), line_number(), and MB_SET_ERR_RET_VAL.
Referenced by get_bytes().
bool moab::FileTokenizer::get_bytes | ( | size_t | count, |
unsigned char * | array | ||
) |
Parse a sequence of integer values.
Read the specified number of space-delimited ints.
count | The number of values to read. |
array | The memory at which to store the values. |
Definition at line 251 of file FileTokenizer.cpp.
252 {
253 for( size_t i = 0; i < count; ++i )
254 {
255 if( !get_byte_internal( *array ) ) return false;
256 ++array;
257 }
258
259 return true;
260 }
References get_byte_internal().
|
private |
Internal implementation of get_doubles
Definition at line 126 of file FileTokenizer.cpp.
127 {
128 // Get a token
129 const char *token_end, *token = get_string();
130 if( !token ) return false;
131
132 // Check for hex value -- on some platforms (e.g. Linux), strtod
133 // will accept hex values, on others (e.g. Sun) it will not. Force
134 // failure on hex numbers for consistency.
135 if( token[0] && token[1] && token[0] == '0' && toupper( token[1] ) == 'X' )
136 MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
137 false );
138
139 // Parse token as double
140 result = strtod( token, (char**)&token_end );
141
142 // If the one past the last char read by strtod is
143 // not the NULL character terminating the string,
144 // then parse failed.
145 if( *token_end )
146 MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
147 false );
148
149 return true;
150 }
References get_string(), line_number(), and MB_SET_ERR_RET_VAL.
Referenced by get_doubles(), and get_float_internal().
bool moab::FileTokenizer::get_doubles | ( | size_t | count, |
double * | array | ||
) |
Parse a sequence of double values.
Read the specified number of space-delimited doubles.
count | The number of values to read. |
array | The memory at which to store the values. |
Definition at line 240 of file FileTokenizer.cpp.
241 {
242 for( size_t i = 0; i < count; ++i )
243 {
244 if( !get_double_internal( *array ) ) return false;
245 ++array;
246 }
247
248 return true;
249 }
References get_double_internal().
Referenced by moab::ReadGmsh::load_file(), moab::ReadVtk::read_vertices(), moab::ReadVtk::vtk_read_field(), moab::ReadVtk::vtk_read_rectilinear_grid(), moab::ReadVtk::vtk_read_structured_points(), and moab::ReadVtk::vtk_read_tag_data().
|
private |
Internal implementation of get_floats
Definition at line 152 of file FileTokenizer.cpp.
153 {
154 double d;
155 if( !get_double_internal( d ) ) return false;
156
157 result = (float)d;
158
159 return true;
160 }
References get_double_internal().
Referenced by get_floats().
bool moab::FileTokenizer::get_floats | ( | size_t | count, |
float * | array | ||
) |
Parse a sequence of float values.
Read the specified number of space-delimited doubles.
count | The number of values to read. |
array | The memory at which to store the values. |
Definition at line 229 of file FileTokenizer.cpp.
230 {
231 for( size_t i = 0; i < count; ++i )
232 {
233 if( !get_float_internal( *array ) ) return false;
234 ++array;
235 }
236
237 return true;
238 }
References get_float_internal().
Referenced by moab::ReadSTL::ascii_read_triangles().
|
private |
Internal implementation of get_integers
Definition at line 203 of file FileTokenizer.cpp.
204 {
205 long i;
206 if( !get_long_int_internal( i ) ) return false;
207
208 result = (int)i;
209 if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
210
211 return true;
212 }
References get_long_int_internal(), line_number(), and MB_SET_ERR_RET_VAL.
Referenced by get_integers().
bool moab::FileTokenizer::get_integers | ( | size_t | count, |
int * | array | ||
) |
Parse a sequence of integer values.
Read the specified number of space-delimited ints.
count | The number of values to read. |
array | The memory at which to store the values. |
Definition at line 273 of file FileTokenizer.cpp.
274 {
275 for( size_t i = 0; i < count; ++i )
276 {
277 if( !get_integer_internal( *array ) ) return false;
278 ++array;
279 }
280
281 return true;
282 }
References get_integer_internal().
Referenced by moab::ReadGmsh::load_file(), moab::ReadVtk::vtk_read_tag_data(), and moab::ReadVtk::vtk_read_texture_attrib().
|
private |
Internal implementation of get_long_ints
Definition at line 162 of file FileTokenizer.cpp.
163 {
164 // Get a token
165 const char *token_end, *token = get_string();
166 if( !token ) return false;
167
168 // Parse token as long
169 result = strtol( token, (char**)&token_end, 0 );
170
171 // If the one past the last char read by strtol is
172 // not the NULL character terminating the string,
173 // then parse failed.
174 if( *token_end )
175 MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
176 false );
177
178 return true;
179 }
References get_string(), line_number(), and MB_SET_ERR_RET_VAL.
Referenced by get_byte_internal(), get_integer_internal(), get_long_ints(), and get_short_int_internal().
bool moab::FileTokenizer::get_long_ints | ( | size_t | count, |
long * | array | ||
) |
Parse a sequence of integer values.
Read the specified number of space-delimited ints.
count | The number of values to read. |
array | The memory at which to store the values. |
Definition at line 284 of file FileTokenizer.cpp.
285 {
286 for( size_t i = 0; i < count; ++i )
287 {
288 if( !get_long_int_internal( *array ) ) return false;
289 ++array;
290 }
291
292 return true;
293 }
References get_long_int_internal().
Referenced by moab::ReadGmsh::load_file(), moab::ReadVtk::load_file(), moab::ReadVtk::vtk_read_color_attrib(), moab::ReadVtk::vtk_read_field(), moab::ReadVtk::vtk_read_field_attrib(), moab::ReadVtk::vtk_read_polydata(), moab::ReadVtk::vtk_read_polygons(), moab::ReadVtk::vtk_read_rectilinear_grid(), moab::ReadVtk::vtk_read_structured_grid(), moab::ReadVtk::vtk_read_structured_points(), and moab::ReadVtk::vtk_read_unstructured_grid().
bool moab::FileTokenizer::get_newline | ( | bool | report_error = true | ) |
check for newline
Consume whitespace up to and including the next newline. If a non-space character is found before a newline, the function will stop, set the error message, and return false.
Definition at line 372 of file FileTokenizer.cpp.
373 {
374 if( lastChar == '\n' )
375 {
376 lastChar = ' ';
377 ++lineNumber;
378 return true;
379 }
380
381 // Loop until either we a) find a newline, b) find a non-whitespace
382 // character or c) reach the end of the file.
383 for( ;; )
384 {
385 // If the buffer is empty, read more.
386 if( nextToken == bufferEnd )
387 {
388 size_t count = fread( buffer, 1, sizeof( buffer ), filePtr );
389 if( 0 == count )
390 {
391 if( eof() )
392 MB_SET_ERR_RET_VAL( "File truncated at line " << line_number(), false );
393 else
394 MB_SET_ERR_RET_VAL( "I/O Error", false );
395 }
396
397 nextToken = buffer;
398 bufferEnd = buffer + count;
399 }
400
401 // If the current character is not a space, the we've failed.
402 if( !isspace( *nextToken ) )
403 if( report_error ) MB_SET_ERR_RET_VAL( "Expected newline at line " << line_number(), false );
404
405 // If the current space character is a newline,
406 // increment the line number count.
407 if( *nextToken == '\n' )
408 {
409 ++lineNumber;
410 ++nextToken;
411 lastChar = ' ';
412 return true;
413 }
414 ++nextToken;
415 }
416
417 return false;
418 }
References buffer, bufferEnd, eof(), filePtr, lastChar, line_number(), lineNumber, MB_SET_ERR_RET_VAL, and nextToken.
Referenced by moab::ReadGmsh::load_file(), moab::ReadVtk::vtk_read_polydata(), moab::ReadVtk::vtk_read_polygons(), moab::ReadVtk::vtk_read_rectilinear_grid(), moab::ReadVtk::vtk_read_structured_grid(), moab::ReadVtk::vtk_read_structured_points(), and moab::ReadVtk::vtk_read_unstructured_grid().
|
private |
Internal implementation of get_short_ints
Definition at line 192 of file FileTokenizer.cpp.
193 {
194 long i;
195 if( !get_long_int_internal( i ) ) return false;
196
197 result = (short)i;
198 if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
199
200 return true;
201 }
References get_long_int_internal(), line_number(), and MB_SET_ERR_RET_VAL.
Referenced by get_short_ints().
bool moab::FileTokenizer::get_short_ints | ( | size_t | count, |
short * | array | ||
) |
Parse a sequence of integer values.
Read the specified number of space-delimited ints.
count | The number of values to read. |
array | The memory at which to store the values. |
Definition at line 262 of file FileTokenizer.cpp.
263 {
264 for( size_t i = 0; i < count; ++i )
265 {
266 if( !get_short_int_internal( *array ) ) return false;
267 ++array;
268 }
269
270 return true;
271 }
References get_short_int_internal().
const char * moab::FileTokenizer::get_string | ( | ) |
get next token
Get the next whitespace-delimited token from the file. NOTE: The returned string is only valid until the next call to any of the functions in this class that read from the file.
Definition at line 45 of file FileTokenizer.cpp.
46 {
47 // If the whitespace character marking the end of the
48 // last token was a newline, increment the line count.
49 if( lastChar == '\n' ) ++lineNumber;
50
51 // Loop until either found the start of a token to return or have
52 // reached the end of the file.
53 for( ;; )
54 {
55 // If the buffer is empty, read more.
56 if( nextToken == bufferEnd )
57 {
58 size_t count = fread( buffer, 1, sizeof( buffer ) - 1, filePtr );
59 if( 0 == count )
60 {
61 if( feof( filePtr ) )
62 return NULL;
63 else
64 MB_SET_ERR_RET_VAL( "I/O Error", NULL );
65 }
66
67 nextToken = buffer;
68 bufferEnd = buffer + count;
69 }
70
71 // If the current character is not a space, we've found a token.
72 if( !isspace( *nextToken ) ) break;
73
74 // If the current space character is a newline,
75 // increment the line number count.
76 if( *nextToken == '\n' ) ++lineNumber;
77 ++nextToken;
78 }
79
80 // Store the start of the token in "result" and
81 // advance "nextToken" to one past the end of the
82 // token.
83 char* result = nextToken;
84 while( nextToken != bufferEnd && !isspace( static_cast< unsigned char >( *nextToken ) ) )
85 ++nextToken;
86
87 // If we have reached the end of the buffer without finding
88 // a whitespace character terminating the token, we need to
89 // read more from the file. Only try once. If the token is
90 // too large to fit in the buffer, give up.
91 if( nextToken == bufferEnd )
92 {
93 // Shift the (possibly) partial token to the start of the buffer.
94 size_t remaining = bufferEnd - result;
95 memmove( buffer, result, remaining );
96 result = buffer;
97 nextToken = result + remaining;
98
99 // Fill the remainder of the buffer after the token.
100 size_t count = fread( nextToken, 1, sizeof( buffer ) - remaining - 1, filePtr );
101 if( 0 == count && !feof( filePtr ) ) MB_SET_ERR_RET_VAL( "I/O Error", NULL );
102 bufferEnd = nextToken + count;
103
104 // Continue to advance nextToken until we find the space
105 // terminating the token.
106 while( nextToken != bufferEnd && !isspace( *nextToken ) )
107 ++nextToken;
108
109 if( nextToken == bufferEnd )
110 { // EOF
111 *bufferEnd = '\0';
112 ++bufferEnd;
113 }
114 }
115
116 // Save terminating whitespace character (or NULL char if EOF).
117 lastChar = *nextToken;
118 // Put null in buffer to mark end of current token.
119 *nextToken = '\0';
120 // Advance nextToken to the next character to search next time.
121 ++nextToken;
122
123 return result;
124 }
References buffer, bufferEnd, filePtr, lastChar, lineNumber, MB_SET_ERR_RET_VAL, and nextToken.
Referenced by get_boolean_internal(), get_double_internal(), get_long_int_internal(), moab::ReadGmsh::load_file(), match_token(), moab::ReadVtk::vtk_read_attrib_data(), moab::ReadVtk::vtk_read_field(), moab::ReadVtk::vtk_read_field_attrib(), and moab::ReadVtk::vtk_read_scalar_attrib().
|
inline |
Get the line number the last token was read from.
Definition at line 175 of file FileTokenizer.hpp.
176 {
177 return lineNumber;
178 }
References lineNumber.
Referenced by get_boolean_internal(), get_byte_internal(), get_double_internal(), get_integer_internal(), get_long_int_internal(), get_newline(), get_short_int_internal(), moab::ReadGmsh::load_file(), moab::ReadVtk::load_file(), match_token(), moab::ReadVtk::vtk_read_field_attrib(), moab::ReadVtk::vtk_read_polydata(), moab::ReadVtk::vtk_read_rectilinear_grid(), moab::ReadVtk::vtk_read_scalar_attrib(), moab::ReadVtk::vtk_read_structured_grid(), moab::ReadVtk::vtk_read_structured_points(), moab::ReadVtk::vtk_read_tag_data(), moab::ReadVtk::vtk_read_texture_attrib(), and moab::ReadVtk::vtk_read_unstructured_grid().
int moab::FileTokenizer::match_token | ( | const char *const * | string_list, |
bool | print_error = true |
||
) |
Match the current token to one of an array of strings. Sets the error message if the current token doesn't match any of the input strings.
string_list | A NULL-terminated array of strings. |
Definition at line 338 of file FileTokenizer.cpp.
339 {
340 // Get a token
341 const char* token = get_string();
342 if( !token ) return 0;
343
344 // Check if it matches any input string
345 const char* const* ptr;
346 for( ptr = list; *ptr; ++ptr )
347 {
348 if( 0 == strcmp( token, *ptr ) ) return ptr - list + 1;
349 }
350
351 if( !print_error ) return 0;
352
353 // No match, constuct error message
354 std::string message( "Parsing error at line " );
355 char lineno[16];
356 snprintf( lineno, 16, "%d", line_number() );
357 message += lineno;
358 message += ": expected one of {";
359 for( ptr = list; *ptr; ++ptr )
360 {
361 message += " ";
362 message += *ptr;
363 }
364 message += " } got \"";
365 message += token;
366 message += "\"";
367 MB_SET_ERR_CONT( message.c_str() );
368
369 return 0;
370 }
References get_string(), line_number(), MB_SET_ERR_CONT, and print_error().
bool moab::FileTokenizer::match_token | ( | const char * | string, |
bool | print_error = true |
||
) |
Match current token to passed string. If token doesn't match, set error message.
Definition at line 321 of file FileTokenizer.cpp.
322 {
323 // Get a token
324 const char* token = get_string();
325 if( !token ) return false;
326
327 // Check if it matches
328 if( 0 == strcmp( token, str ) ) return true;
329
330 // Construct error message
331 if( print_error )
332 MB_SET_ERR_CONT( "Syntax error at line " << line_number() << ": expected \"" << str << "\", got \"" << token
333 << "\"" );
334
335 return false;
336 }
References get_string(), line_number(), MB_SET_ERR_CONT, and print_error().
Referenced by moab::ReadSTL::ascii_read_triangles(), moab::ReadGmsh::load_file(), moab::ReadVtk::load_file(), moab::ReadVtk::vtk_read_attrib_data(), moab::ReadVtk::vtk_read_dataset(), moab::ReadVtk::vtk_read_field(), moab::ReadVtk::vtk_read_field_attrib(), moab::ReadVtk::vtk_read_polydata(), moab::ReadVtk::vtk_read_rectilinear_grid(), moab::ReadVtk::vtk_read_scalar_attrib(), moab::ReadVtk::vtk_read_structured_grid(), moab::ReadVtk::vtk_read_structured_points(), moab::ReadVtk::vtk_read_tensor_attrib(), moab::ReadVtk::vtk_read_texture_attrib(), moab::ReadVtk::vtk_read_unstructured_grid(), and moab::ReadVtk::vtk_read_vector_attrib().
void moab::FileTokenizer::unget_token | ( | ) |
Put current token back in buffer. Can only unget one token.
Definition at line 306 of file FileTokenizer.cpp.
307 {
308 if( nextToken - buffer < 2 ) return;
309
310 --nextToken;
311 *nextToken = lastChar;
312 --nextToken;
313 while( nextToken > buffer && *nextToken )
314 --nextToken;
315
316 if( !*nextToken ) ++nextToken;
317
318 lastChar = '\0';
319 }
References buffer, lastChar, and nextToken.
Referenced by moab::ReadVtk::load_file(), and moab::ReadVtk::vtk_read_scalar_attrib().
|
private |
Input buffer
Definition at line 223 of file FileTokenizer.hpp.
Referenced by get_newline(), get_string(), and unget_token().
|
private |
One past the last used byte of the buffer
Definition at line 228 of file FileTokenizer.hpp.
Referenced by eof(), get_binary(), get_newline(), and get_string().
|
private |
Pointer to standard C FILE struct
Definition at line 220 of file FileTokenizer.hpp.
Referenced by eof(), get_binary(), get_newline(), get_string(), and ~FileTokenizer().
|
private |
The whitespace character marking the end of the last returned token. Saved here because if it is a newline, the line count will need to be incremented when the next token is returned.
Definition at line 238 of file FileTokenizer.hpp.
Referenced by get_newline(), get_string(), and unget_token().
|
private |
Line number of last returned token
Definition at line 231 of file FileTokenizer.hpp.
Referenced by get_newline(), get_string(), and line_number().
|
private |
One past the end of the last token returned
Definition at line 226 of file FileTokenizer.hpp.
Referenced by eof(), get_binary(), get_newline(), get_string(), and unget_token().