Mesh Oriented datABase  (version 5.5.1)
An array-based unstructured mesh library
FileTokenizer.cpp
Go to the documentation of this file.
1 /**
2  * MOAB, a Mesh-Oriented datABase, is a software component for creating,
3  * storing and accessing finite element mesh data.
4  *
5  * Copyright 2004 Sandia Corporation. Under the terms of Contract
6  * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government
7  * retains certain rights in this software.
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  */
15 
16 #include "FileTokenizer.hpp"
17 #include "moab/ReadUtilIface.hpp"
18 #include "moab/ErrorHandler.hpp"
19 
20 #include <cstring>
21 #include <cctype>
22 #include <string>
23 #include <cstdlib>
24 
25 namespace moab
26 {
27 
28 using namespace std;
29 
31  : filePtr( file_ptr ), nextToken( buffer ), bufferEnd( buffer ), lineNumber( 1 ), lastChar( '\0' )
32 {
33 }
34 
36 {
37  fclose( filePtr );
38 }
39 
40 bool FileTokenizer::eof() const
41 {
42  return nextToken == bufferEnd && feof( filePtr );
43 }
44 
46 {
47  // If the whitespace character marking the end of the
48  // last token was a newline, increment the line count.
49  if( lastChar == '\n' ) ++lineNumber;
50 
51  // Loop until either found the start of a token to return or have
52  // reached the end of the file.
53  for( ;; )
54  {
55  // If the buffer is empty, read more.
56  if( nextToken == bufferEnd )
57  {
58  size_t count = fread( buffer, 1, sizeof( buffer ) - 1, filePtr );
59  if( 0 == count )
60  {
61  if( feof( filePtr ) )
62  return NULL;
63  else
64  MB_SET_ERR_RET_VAL( "I/O Error", NULL );
65  }
66 
67  nextToken = buffer;
68  bufferEnd = buffer + count;
69  }
70 
71  // If the current character is not a space, we've found a token.
72  if( !isspace( *nextToken ) ) break;
73 
74  // If the current space character is a newline,
75  // increment the line number count.
76  if( *nextToken == '\n' ) ++lineNumber;
77  ++nextToken;
78  }
79 
80  // Store the start of the token in "result" and
81  // advance "nextToken" to one past the end of the
82  // token.
83  char* result = nextToken;
84  while( nextToken != bufferEnd && !isspace( static_cast< unsigned char >( *nextToken ) ) )
85  ++nextToken;
86 
87  // If we have reached the end of the buffer without finding
88  // a whitespace character terminating the token, we need to
89  // read more from the file. Only try once. If the token is
90  // too large to fit in the buffer, give up.
91  if( nextToken == bufferEnd )
92  {
93  // Shift the (possibly) partial token to the start of the buffer.
94  size_t remaining = bufferEnd - result;
95  memmove( buffer, result, remaining );
96  result = buffer;
97  nextToken = result + remaining;
98 
99  // Fill the remainder of the buffer after the token.
100  size_t count = fread( nextToken, 1, sizeof( buffer ) - remaining - 1, filePtr );
101  if( 0 == count && !feof( filePtr ) ) MB_SET_ERR_RET_VAL( "I/O Error", NULL );
102  bufferEnd = nextToken + count;
103 
104  // Continue to advance nextToken until we find the space
105  // terminating the token.
106  while( nextToken != bufferEnd && !isspace( *nextToken ) )
107  ++nextToken;
108 
109  if( nextToken == bufferEnd )
110  { // EOF
111  *bufferEnd = '\0';
112  ++bufferEnd;
113  }
114  }
115 
116  // Save terminating whitespace character (or NULL char if EOF).
117  lastChar = *nextToken;
118  // Put null in buffer to mark end of current token.
119  *nextToken = '\0';
120  // Advance nextToken to the next character to search next time.
121  ++nextToken;
122 
123  return result;
124 }
125 
127 {
128  // Get a token
129  const char *token_end, *token = get_string();
130  if( !token ) return false;
131 
132  // Check for hex value -- on some platforms (e.g. Linux), strtod
133  // will accept hex values, on others (e.g. Sun) it will not. Force
134  // failure on hex numbers for consistency.
135  if( token[0] && token[1] && token[0] == '0' && toupper( token[1] ) == 'X' )
136  MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
137  false );
138 
139  // Parse token as double
140  result = strtod( token, (char**)&token_end );
141 
142  // If the one past the last char read by strtod is
143  // not the NULL character terminating the string,
144  // then parse failed.
145  if( *token_end )
146  MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
147  false );
148 
149  return true;
150 }
151 
153 {
154  double d;
155  if( !get_double_internal( d ) ) return false;
156 
157  result = (float)d;
158 
159  return true;
160 }
161 
163 {
164  // Get a token
165  const char *token_end, *token = get_string();
166  if( !token ) return false;
167 
168  // Parse token as long
169  result = strtol( token, (char**)&token_end, 0 );
170 
171  // If the one past the last char read by strtol is
172  // not the NULL character terminating the string,
173  // then parse failed.
174  if( *token_end )
175  MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
176  false );
177 
178  return true;
179 }
180 
181 bool FileTokenizer::get_byte_internal( unsigned char& result )
182 {
183  long i;
184  if( !get_long_int_internal( i ) ) return false;
185 
186  result = (unsigned char)i;
187  if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
188 
189  return true;
190 }
191 
193 {
194  long i;
195  if( !get_long_int_internal( i ) ) return false;
196 
197  result = (short)i;
198  if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
199 
200  return true;
201 }
202 
204 {
205  long i;
206  if( !get_long_int_internal( i ) ) return false;
207 
208  result = (int)i;
209  if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
210 
211  return true;
212 }
213 
215 {
216  // Get a token
217  const char* token = get_string();
218  if( !token ) return false;
219 
220  if( token[1] || ( token[0] != '0' && token[0] != '1' ) )
221  MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected 0 or 1, got \"" << token << "\"",
222  false );
223 
224  result = token[0] == '1';
225 
226  return true;
227 }
228 
229 bool FileTokenizer::get_floats( size_t count, float* array )
230 {
231  for( size_t i = 0; i < count; ++i )
232  {
233  if( !get_float_internal( *array ) ) return false;
234  ++array;
235  }
236 
237  return true;
238 }
239 
240 bool FileTokenizer::get_doubles( size_t count, double* array )
241 {
242  for( size_t i = 0; i < count; ++i )
243  {
244  if( !get_double_internal( *array ) ) return false;
245  ++array;
246  }
247 
248  return true;
249 }
250 
251 bool FileTokenizer::get_bytes( size_t count, unsigned char* array )
252 {
253  for( size_t i = 0; i < count; ++i )
254  {
255  if( !get_byte_internal( *array ) ) return false;
256  ++array;
257  }
258 
259  return true;
260 }
261 
262 bool FileTokenizer::get_short_ints( size_t count, short* array )
263 {
264  for( size_t i = 0; i < count; ++i )
265  {
266  if( !get_short_int_internal( *array ) ) return false;
267  ++array;
268  }
269 
270  return true;
271 }
272 
273 bool FileTokenizer::get_integers( size_t count, int* array )
274 {
275  for( size_t i = 0; i < count; ++i )
276  {
277  if( !get_integer_internal( *array ) ) return false;
278  ++array;
279  }
280 
281  return true;
282 }
283 
284 bool FileTokenizer::get_long_ints( size_t count, long* array )
285 {
286  for( size_t i = 0; i < count; ++i )
287  {
288  if( !get_long_int_internal( *array ) ) return false;
289  ++array;
290  }
291 
292  return true;
293 }
294 
295 bool FileTokenizer::get_booleans( size_t count, bool* array )
296 {
297  for( size_t i = 0; i < count; ++i )
298  {
299  if( !get_boolean_internal( *array ) ) return false;
300  ++array;
301  }
302 
303  return true;
304 }
305 
307 {
308  if( nextToken - buffer < 2 ) return;
309 
310  --nextToken;
311  *nextToken = lastChar;
312  --nextToken;
313  while( nextToken > buffer && *nextToken )
314  --nextToken;
315 
316  if( !*nextToken ) ++nextToken;
317 
318  lastChar = '\0';
319 }
320 
321 bool FileTokenizer::match_token( const char* str, bool print_error )
322 {
323  // Get a token
324  const char* token = get_string();
325  if( !token ) return false;
326 
327  // Check if it matches
328  if( 0 == strcmp( token, str ) ) return true;
329 
330  // Construct error message
331  if( print_error )
332  MB_SET_ERR_CONT( "Syntax error at line " << line_number() << ": expected \"" << str << "\", got \"" << token
333  << "\"" );
334 
335  return false;
336 }
337 
338 int FileTokenizer::match_token( const char* const* list, bool print_error )
339 {
340  // Get a token
341  const char* token = get_string();
342  if( !token ) return 0;
343 
344  // Check if it matches any input string
345  const char* const* ptr;
346  for( ptr = list; *ptr; ++ptr )
347  {
348  if( 0 == strcmp( token, *ptr ) ) return ptr - list + 1;
349  }
350 
351  if( !print_error ) return 0;
352 
353  // No match, constuct error message
354  std::string message( "Parsing error at line " );
355  char lineno[16];
356  snprintf( lineno, 16, "%d", line_number() );
357  message += lineno;
358  message += ": expected one of {";
359  for( ptr = list; *ptr; ++ptr )
360  {
361  message += " ";
362  message += *ptr;
363  }
364  message += " } got \"";
365  message += token;
366  message += "\"";
367  MB_SET_ERR_CONT( message.c_str() );
368 
369  return 0;
370 }
371 
372 bool FileTokenizer::get_newline( bool report_error )
373 {
374  if( lastChar == '\n' )
375  {
376  lastChar = ' ';
377  ++lineNumber;
378  return true;
379  }
380 
381  // Loop until either we a) find a newline, b) find a non-whitespace
382  // character or c) reach the end of the file.
383  for( ;; )
384  {
385  // If the buffer is empty, read more.
386  if( nextToken == bufferEnd )
387  {
388  size_t count = fread( buffer, 1, sizeof( buffer ), filePtr );
389  if( 0 == count )
390  {
391  if( eof() )
392  MB_SET_ERR_RET_VAL( "File truncated at line " << line_number(), false );
393  else
394  MB_SET_ERR_RET_VAL( "I/O Error", false );
395  }
396 
397  nextToken = buffer;
398  bufferEnd = buffer + count;
399  }
400 
401  // If the current character is not a space, the we've failed.
402  if( !isspace( *nextToken ) )
403  if( report_error ) MB_SET_ERR_RET_VAL( "Expected newline at line " << line_number(), false );
404 
405  // If the current space character is a newline,
406  // increment the line number count.
407  if( *nextToken == '\n' )
408  {
409  ++lineNumber;
410  ++nextToken;
411  lastChar = ' ';
412  return true;
413  }
414  ++nextToken;
415  }
416 
417  return false;
418 }
419 
420 bool FileTokenizer::get_binary( size_t size, void* mem )
421 {
422  // If data in buffer
423  if( nextToken != bufferEnd )
424  {
425  // If requested size is less than buffer contents,
426  // just pass back part of the buffer
427  if( bufferEnd - nextToken <= (int)size )
428  {
429  memcpy( mem, nextToken, size );
430  nextToken += size;
431  return true;
432  }
433 
434  // Copy buffer contents into memory and clear buffer
435  memcpy( mem, nextToken, bufferEnd - nextToken );
436  size -= bufferEnd - nextToken;
437  mem = reinterpret_cast< char* >( mem ) + ( bufferEnd - nextToken );
439  }
440 
441  // Read any additional data from file
442  return size == fread( mem, 1, size, filePtr );
443 }
444 
445 } // namespace moab