Actual source code: scanner.c


  2: /*
  3:  * Introduction
  4:  * ************
  5:  *
  6:  * The following notes assume that you are familiar with the YAML specification
  7:  * (http://yaml.org/spec/cvs/current.html).  We mostly follow it, although in
  8:  * some cases we are less restrictive that it requires.
  9:  *
 10:  * The process of transforming a YAML stream into a sequence of events is
 11:  * divided on two steps: Scanning and Parsing.
 12:  *
 13:  * The Scanner transforms the input stream into a sequence of tokens, while the
 14:  * parser transform the sequence of tokens produced by the Scanner into a
 15:  * sequence of parsing events.
 16:  *
 17:  * The Scanner is rather clever and complicated. The Parser, on the contrary,
 18:  * is a straightforward implementation of a recursive-descendant parser (or,
 19:  * LL(1) parser, as it is usually called).
 20:  *
 21:  * Actually there are two issues of Scanning that might be called "clever", the
 22:  * rest is quite straightforward.  The issues are "block collection start" and
 23:  * "simple keys".  Both issues are explained below in details.
 24:  *
 25:  * Here the Scanning step is explained and implemented.  We start with the list
 26:  * of all the tokens produced by the Scanner together with short descriptions.
 27:  *
 28:  * Now, tokens:
 29:  *
 30:  *      STREAM-START(encoding)          # The stream start.
 31:  *      STREAM-END                      # The stream end.
 32:  *      VERSION-DIRECTIVE(major,minor)  # The '%YAML' directive.
 33:  *      TAG-DIRECTIVE(handle,prefix)    # The '%TAG' directive.
 34:  *      DOCUMENT-START                  # '---'
 35:  *      DOCUMENT-END                    # '...'
 36:  *      BLOCK-SEQUENCE-START            # Indentation increase denoting a block
 37:  *      BLOCK-MAPPING-START             # sequence or a block mapping.
 38:  *      BLOCK-END                       # Indentation decrease.
 39:  *      FLOW-SEQUENCE-START             # '['
 40:  *      FLOW-SEQUENCE-END               # ']'
 41:  *      FLOW-MAPPING-START              # '{'
 42:  *      FLOW-MAPPING-END                # '}'
 43:  *      BLOCK-ENTRY                     # '-'
 44:  *      FLOW-ENTRY                      # ','
 45:  *      KEY                             # '?' or nothing (simple keys).
 46:  *      VALUE                           # ':'
 47:  *      ALIAS(anchor)                   # '*anchor'
 48:  *      ANCHOR(anchor)                  # '&anchor'
 49:  *      TAG(handle,suffix)              # '!handle!suffix'
 50:  *      SCALAR(value,style)             # A scalar.
 51:  *
 52:  * The following two tokens are "virtual" tokens denoting the beginning and the
 53:  * end of the stream:
 54:  *
 55:  *      STREAM-START(encoding)
 56:  *      STREAM-END
 57:  *
 58:  * We pass the information about the input stream encoding with the
 59:  * STREAM-START token.
 60:  *
 61:  * The next two tokens are responsible for tags:
 62:  *
 63:  *      VERSION-DIRECTIVE(major,minor)
 64:  *      TAG-DIRECTIVE(handle,prefix)
 65:  *
 66:  * Example:
 67:  *
 68:  *      %YAML   1.1
 69:  *      %TAG    !   !foo
 70:  *      %TAG    !yaml!  tag:yaml.org,2002:
 71:  *      ---
 72:  *
 73:  * The corresponding sequence of tokens:
 74:  *
 75:  *      STREAM-START(utf-8)
 76:  *      VERSION-DIRECTIVE(1,1)
 77:  *      TAG-DIRECTIVE("!","!foo")
 78:  *      TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
 79:  *      DOCUMENT-START
 80:  *      STREAM-END
 81:  *
 82:  * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
 83:  * line.
 84:  *
 85:  * The document start and end indicators are represented by:
 86:  *
 87:  *      DOCUMENT-START
 88:  *      DOCUMENT-END
 89:  *
 90:  * Note that if a YAML stream contains an implicit document (without '---'
 91:  * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
 92:  * produced.
 93:  *
 94:  * In the following examples, we present whole documents together with the
 95:  * produced tokens.
 96:  *
 97:  *      1. An implicit document:
 98:  *
 99:  *          'a scalar'
100:  *
101:  *      Tokens:
102:  *
103:  *          STREAM-START(utf-8)
104:  *          SCALAR("a scalar",single-quoted)
105:  *          STREAM-END
106:  *
107:  *      2. An explicit document:
108:  *
109:  *          ---
110:  *          'a scalar'
111:  *          ...
112:  *
113:  *      Tokens:
114:  *
115:  *          STREAM-START(utf-8)
116:  *          DOCUMENT-START
117:  *          SCALAR("a scalar",single-quoted)
118:  *          DOCUMENT-END
119:  *          STREAM-END
120:  *
121:  *      3. Several documents in a stream:
122:  *
123:  *          'a scalar'
124:  *          ---
125:  *          'another scalar'
126:  *          ---
127:  *          'yet another scalar'
128:  *
129:  *      Tokens:
130:  *
131:  *          STREAM-START(utf-8)
132:  *          SCALAR("a scalar",single-quoted)
133:  *          DOCUMENT-START
134:  *          SCALAR("another scalar",single-quoted)
135:  *          DOCUMENT-START
136:  *          SCALAR("yet another scalar",single-quoted)
137:  *          STREAM-END
138:  *
139:  * We have already introduced the SCALAR token above.  The following tokens are
140:  * used to describe aliases, anchors, tag, and scalars:
141:  *
142:  *      ALIAS(anchor)
143:  *      ANCHOR(anchor)
144:  *      TAG(handle,suffix)
145:  *      SCALAR(value,style)
146:  *
147:  * The following series of examples illustrate the usage of these tokens:
148:  *
149:  *      1. A recursive sequence:
150:  *
151:  *          &A [ *A ]
152:  *
153:  *      Tokens:
154:  *
155:  *          STREAM-START(utf-8)
156:  *          ANCHOR("A")
157:  *          FLOW-SEQUENCE-START
158:  *          ALIAS("A")
159:  *          FLOW-SEQUENCE-END
160:  *          STREAM-END
161:  *
162:  *      2. A tagged scalar:
163:  *
164:  *          !!float "3.14"  # A good approximation.
165:  *
166:  *      Tokens:
167:  *
168:  *          STREAM-START(utf-8)
169:  *          TAG("!!","float")
170:  *          SCALAR("3.14",double-quoted)
171:  *          STREAM-END
172:  *
173:  *      3. Various scalar styles:
174:  *
175:  *          --- # Implicit empty plain scalars do not produce tokens.
176:  *          --- a plain scalar
177:  *          --- 'a single-quoted scalar'
178:  *          --- "a double-quoted scalar"
179:  *          --- |-
180:  *            a literal scalar
181:  *          --- >-
182:  *            a folded
183:  *            scalar
184:  *
185:  *      Tokens:
186:  *
187:  *          STREAM-START(utf-8)
188:  *          DOCUMENT-START
189:  *          DOCUMENT-START
190:  *          SCALAR("a plain scalar",plain)
191:  *          DOCUMENT-START
192:  *          SCALAR("a single-quoted scalar",single-quoted)
193:  *          DOCUMENT-START
194:  *          SCALAR("a double-quoted scalar",double-quoted)
195:  *          DOCUMENT-START
196:  *          SCALAR("a literal scalar",literal)
197:  *          DOCUMENT-START
198:  *          SCALAR("a folded scalar",folded)
199:  *          STREAM-END
200:  *
201:  * Now it's time to review collection-related tokens. We will start with
202:  * flow collections:
203:  *
204:  *      FLOW-SEQUENCE-START
205:  *      FLOW-SEQUENCE-END
206:  *      FLOW-MAPPING-START
207:  *      FLOW-MAPPING-END
208:  *      FLOW-ENTRY
209:  *      KEY
210:  *      VALUE
211:  *
212:  * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213:  * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214:  * correspondingly.  FLOW-ENTRY represent the ',' indicator.  Finally the
215:  * indicators '?' and ':', which are used for denoting mapping keys and values,
216:  * are represented by the KEY and VALUE tokens.
217:  *
218:  * The following examples show flow collections:
219:  *
220:  *      1. A flow sequence:
221:  *
222:  *          [item 1, item 2, item 3]
223:  *
224:  *      Tokens:
225:  *
226:  *          STREAM-START(utf-8)
227:  *          FLOW-SEQUENCE-START
228:  *          SCALAR("item 1",plain)
229:  *          FLOW-ENTRY
230:  *          SCALAR("item 2",plain)
231:  *          FLOW-ENTRY
232:  *          SCALAR("item 3",plain)
233:  *          FLOW-SEQUENCE-END
234:  *          STREAM-END
235:  *
236:  *      2. A flow mapping:
237:  *
238:  *          {
239:  *              a simple key: a value,  # Note that the KEY token is produced.
240:  *              ? a complex key: another value,
241:  *          }
242:  *
243:  *      Tokens:
244:  *
245:  *          STREAM-START(utf-8)
246:  *          FLOW-MAPPING-START
247:  *          KEY
248:  *          SCALAR("a simple key",plain)
249:  *          VALUE
250:  *          SCALAR("a value",plain)
251:  *          FLOW-ENTRY
252:  *          KEY
253:  *          SCALAR("a complex key",plain)
254:  *          VALUE
255:  *          SCALAR("another value",plain)
256:  *          FLOW-ENTRY
257:  *          FLOW-MAPPING-END
258:  *          STREAM-END
259:  *
260:  * A simple key is a key which is not denoted by the '?' indicator.  Note that
261:  * the Scanner still produce the KEY token whenever it encounters a simple key.
262:  *
263:  * For scanning block collections, the following tokens are used (note that we
264:  * repeat KEY and VALUE here):
265:  *
266:  *      BLOCK-SEQUENCE-START
267:  *      BLOCK-MAPPING-START
268:  *      BLOCK-END
269:  *      BLOCK-ENTRY
270:  *      KEY
271:  *      VALUE
272:  *
273:  * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274:  * increase that precedes a block collection (cf. the INDENT token in Python).
275:  * The token BLOCK-END denote indentation decrease that ends a block collection
276:  * (cf. the DEDENT token in Python).  However YAML has some syntax peculiarities
277:  * that makes detections of these tokens more complex.
278:  *
279:  * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280:  * '-', '?', and ':' correspondingly.
281:  *
282:  * The following examples show how the tokens BLOCK-SEQUENCE-START,
283:  * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284:  *
285:  *      1. Block sequences:
286:  *
287:  *          - item 1
288:  *          - item 2
289:  *          -
290:  *            - item 3.1
291:  *            - item 3.2
292:  *          -
293:  *            key 1: value 1
294:  *            key 2: value 2
295:  *
296:  *      Tokens:
297:  *
298:  *          STREAM-START(utf-8)
299:  *          BLOCK-SEQUENCE-START
300:  *          BLOCK-ENTRY
301:  *          SCALAR("item 1",plain)
302:  *          BLOCK-ENTRY
303:  *          SCALAR("item 2",plain)
304:  *          BLOCK-ENTRY
305:  *          BLOCK-SEQUENCE-START
306:  *          BLOCK-ENTRY
307:  *          SCALAR("item 3.1",plain)
308:  *          BLOCK-ENTRY
309:  *          SCALAR("item 3.2",plain)
310:  *          BLOCK-END
311:  *          BLOCK-ENTRY
312:  *          BLOCK-MAPPING-START
313:  *          KEY
314:  *          SCALAR("key 1",plain)
315:  *          VALUE
316:  *          SCALAR("value 1",plain)
317:  *          KEY
318:  *          SCALAR("key 2",plain)
319:  *          VALUE
320:  *          SCALAR("value 2",plain)
321:  *          BLOCK-END
322:  *          BLOCK-END
323:  *          STREAM-END
324:  *
325:  *      2. Block mappings:
326:  *
327:  *          a simple key: a value   # The KEY token is produced here.
328:  *          ? a complex key
329:  *          : another value
330:  *          a mapping:
331:  *            key 1: value 1
332:  *            key 2: value 2
333:  *          a sequence:
334:  *            - item 1
335:  *            - item 2
336:  *
337:  *      Tokens:
338:  *
339:  *          STREAM-START(utf-8)
340:  *          BLOCK-MAPPING-START
341:  *          KEY
342:  *          SCALAR("a simple key",plain)
343:  *          VALUE
344:  *          SCALAR("a value",plain)
345:  *          KEY
346:  *          SCALAR("a complex key",plain)
347:  *          VALUE
348:  *          SCALAR("another value",plain)
349:  *          KEY
350:  *          SCALAR("a mapping",plain)
351:  *          VALUE
352:  *          BLOCK-MAPPING-START
353:  *          KEY
354:  *          SCALAR("key 1",plain)
355:  *          VALUE
356:  *          SCALAR("value 1",plain)
357:  *          KEY
358:  *          SCALAR("key 2",plain)
359:  *          VALUE
360:  *          SCALAR("value 2",plain)
361:  *          BLOCK-END
362:  *          KEY
363:  *          SCALAR("a sequence",plain)
364:  *          VALUE
365:  *          BLOCK-SEQUENCE-START
366:  *          BLOCK-ENTRY
367:  *          SCALAR("item 1",plain)
368:  *          BLOCK-ENTRY
369:  *          SCALAR("item 2",plain)
370:  *          BLOCK-END
371:  *          BLOCK-END
372:  *          STREAM-END
373:  *
374:  * YAML does not always require to start a new block collection from a new
375:  * line.  If the current line contains only '-', '?', and ':' indicators, a new
376:  * block collection may start at the current line.  The following examples
377:  * illustrate this case:
378:  *
379:  *      1. Collections in a sequence:
380:  *
381:  *          - - item 1
382:  *            - item 2
383:  *          - key 1: value 1
384:  *            key 2: value 2
385:  *          - ? complex key
386:  *            : complex value
387:  *
388:  *      Tokens:
389:  *
390:  *          STREAM-START(utf-8)
391:  *          BLOCK-SEQUENCE-START
392:  *          BLOCK-ENTRY
393:  *          BLOCK-SEQUENCE-START
394:  *          BLOCK-ENTRY
395:  *          SCALAR("item 1",plain)
396:  *          BLOCK-ENTRY
397:  *          SCALAR("item 2",plain)
398:  *          BLOCK-END
399:  *          BLOCK-ENTRY
400:  *          BLOCK-MAPPING-START
401:  *          KEY
402:  *          SCALAR("key 1",plain)
403:  *          VALUE
404:  *          SCALAR("value 1",plain)
405:  *          KEY
406:  *          SCALAR("key 2",plain)
407:  *          VALUE
408:  *          SCALAR("value 2",plain)
409:  *          BLOCK-END
410:  *          BLOCK-ENTRY
411:  *          BLOCK-MAPPING-START
412:  *          KEY
413:  *          SCALAR("complex key")
414:  *          VALUE
415:  *          SCALAR("complex value")
416:  *          BLOCK-END
417:  *          BLOCK-END
418:  *          STREAM-END
419:  *
420:  *      2. Collections in a mapping:
421:  *
422:  *          ? a sequence
423:  *          : - item 1
424:  *            - item 2
425:  *          ? a mapping
426:  *          : key 1: value 1
427:  *            key 2: value 2
428:  *
429:  *      Tokens:
430:  *
431:  *          STREAM-START(utf-8)
432:  *          BLOCK-MAPPING-START
433:  *          KEY
434:  *          SCALAR("a sequence",plain)
435:  *          VALUE
436:  *          BLOCK-SEQUENCE-START
437:  *          BLOCK-ENTRY
438:  *          SCALAR("item 1",plain)
439:  *          BLOCK-ENTRY
440:  *          SCALAR("item 2",plain)
441:  *          BLOCK-END
442:  *          KEY
443:  *          SCALAR("a mapping",plain)
444:  *          VALUE
445:  *          BLOCK-MAPPING-START
446:  *          KEY
447:  *          SCALAR("key 1",plain)
448:  *          VALUE
449:  *          SCALAR("value 1",plain)
450:  *          KEY
451:  *          SCALAR("key 2",plain)
452:  *          VALUE
453:  *          SCALAR("value 2",plain)
454:  *          BLOCK-END
455:  *          BLOCK-END
456:  *          STREAM-END
457:  *
458:  * YAML also permits non-indented sequences if they are included into a block
459:  * mapping.  In this case, the token BLOCK-SEQUENCE-START is not produced:
460:  *
461:  *      key:
462:  *      - item 1    # BLOCK-SEQUENCE-START is NOT produced here.
463:  *      - item 2
464:  *
465:  * Tokens:
466:  *
467:  *      STREAM-START(utf-8)
468:  *      BLOCK-MAPPING-START
469:  *      KEY
470:  *      SCALAR("key",plain)
471:  *      VALUE
472:  *      BLOCK-ENTRY
473:  *      SCALAR("item 1",plain)
474:  *      BLOCK-ENTRY
475:  *      SCALAR("item 2",plain)
476:  *      BLOCK-END
477:  */

479: #include "yaml_private.h"

481: /*
482:  * Ensure that the buffer contains the required number of characters.
483:  * Return 1 on success, 0 on failure (reader error or memory error).
484:  */

486: #define CACHE(parser,length)                                                    \
487:     (parser->unread >= (length)                                                 \
488:         ? 1                                                                     \
489:         : yaml_parser_update_buffer(parser, (length)))

491: /*
492:  * Advance the buffer pointer.
493:  */

495: #define SKIP(parser)                                                            \
496:      (parser->mark.index ++,                                                    \
497:       parser->mark.column ++,                                                   \
498:       parser->unread --,                                                        \
499:       parser->buffer.pointer += WIDTH(parser->buffer))

501: #define SKIP_LINE(parser)                                                       \
502:      (IS_CRLF(parser->buffer) ?                                                 \
503:       (parser->mark.index += 2,                                                 \
504:        parser->mark.column = 0,                                                 \
505:        parser->mark.line ++,                                                    \
506:        parser->unread -= 2,                                                     \
507:        parser->buffer.pointer += 2) :                                           \
508:       IS_BREAK(parser->buffer) ?                                                \
509:       (parser->mark.index ++,                                                   \
510:        parser->mark.column = 0,                                                 \
511:        parser->mark.line ++,                                                    \
512:        parser->unread --,                                                       \
513:        parser->buffer.pointer += WIDTH(parser->buffer)) : NULL)

515: /*
516:  * Copy a character to a string buffer and advance pointers.
517:  */

519: #define READ(parser,string)                                                     \
520:      (STRING_EXTEND(parser,string) ?                                            \
521:          (COPY(string,parser->buffer),                                          \
522:           parser->mark.index ++,                                                \
523:           parser->mark.column ++,                                               \
524:           parser->unread --,                                                    \
525:           1) : 0)

527: /*
528:  * Copy a line break character to a string buffer and advance pointers.
529:  */

531: #define READ_LINE(parser,string)                                                \
532:     (STRING_EXTEND(parser,string) ?                                             \
533:     (((CHECK_AT(parser->buffer,'\r',0)                                          \
534:        && CHECK_AT(parser->buffer,'\n',1)) ?        /* CR LF -> LF */           \
535:      (*((string).pointer++) = (yaml_char_t) '\n',                               \
536:       parser->buffer.pointer += 2,                                              \
537:       parser->mark.index += 2,                                                  \
538:       parser->mark.column = 0,                                                  \
539:       parser->mark.line ++,                                                     \
540:       parser->unread -= 2) :                                                    \
541:      (CHECK_AT(parser->buffer,'\r',0)                                           \
542:       || CHECK_AT(parser->buffer,'\n',0)) ?         /* CR|LF -> LF */           \
543:      (*((string).pointer++) = (yaml_char_t) '\n',                               \
544:       parser->buffer.pointer ++,                                                \
545:       parser->mark.index ++,                                                    \
546:       parser->mark.column = 0,                                                  \
547:       parser->mark.line ++,                                                     \
548:       parser->unread --) :                                                      \
549:      (CHECK_AT(parser->buffer,'\xC2',0)                                         \
550:       && CHECK_AT(parser->buffer,'\x85',1)) ?       /* NEL -> LF */             \
551:      (*((string).pointer++) = (yaml_char_t) '\n',                               \
552:       parser->buffer.pointer += 2,                                              \
553:       parser->mark.index ++,                                                    \
554:       parser->mark.column = 0,                                                  \
555:       parser->mark.line ++,                                                     \
556:       parser->unread --) :                                                      \
557:      (CHECK_AT(parser->buffer,'\xE2',0) &&                                      \
558:       CHECK_AT(parser->buffer,'\x80',1) &&                                      \
559:       (CHECK_AT(parser->buffer,'\xA8',2) ||                                     \
560:        CHECK_AT(parser->buffer,'\xA9',2))) ?        /* LS|PS -> LS|PS */        \
561:      (*((string).pointer++) = *(parser->buffer.pointer++),                      \
562:       *((string).pointer++) = *(parser->buffer.pointer++),                      \
563:       *((string).pointer++) = *(parser->buffer.pointer++),                      \
564:       parser->mark.index ++,                                                    \
565:       parser->mark.column = 0,                                                  \
566:       parser->mark.line ++,                                                     \
567:       parser->unread --) : 0),                                                  \
568:     1) : 0)

570: /*
571:  * Public API declarations.
572:  */

574: YAML_DECLARE(int)
575: yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token);

577: /*
578:  * Error handling.
579:  */

581: static int
582: yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
583:         yaml_mark_t context_mark, const char *problem);

585: /*
586:  * High-level token API.
587:  */

589: YAML_DECLARE(int)
590: yaml_parser_fetch_more_tokens(yaml_parser_t *parser);

592: static int
593: yaml_parser_fetch_next_token(yaml_parser_t *parser);

595: /*
596:  * Potential simple keys.
597:  */

599: static int
600: yaml_parser_stale_simple_keys(yaml_parser_t *parser);

602: static int
603: yaml_parser_save_simple_key(yaml_parser_t *parser);

605: static int
606: yaml_parser_remove_simple_key(yaml_parser_t *parser);

608: static int
609: yaml_parser_increase_flow_level(yaml_parser_t *parser);

611: static int
612: yaml_parser_decrease_flow_level(yaml_parser_t *parser);

614: /*
615:  * Indentation treatment.
616:  */

618: static int
619: yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
620:         ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);

622: static int
623: yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);

625: /*
626:  * Token fetchers.
627:  */

629: static int
630: yaml_parser_fetch_stream_start(yaml_parser_t *parser);

632: static int
633: yaml_parser_fetch_stream_end(yaml_parser_t *parser);

635: static int
636: yaml_parser_fetch_directive(yaml_parser_t *parser);

638: static int
639: yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
640:         yaml_token_type_t type);

642: static int
643: yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
644:         yaml_token_type_t type);

646: static int
647: yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
648:         yaml_token_type_t type);

650: static int
651: yaml_parser_fetch_flow_entry(yaml_parser_t *parser);

653: static int
654: yaml_parser_fetch_block_entry(yaml_parser_t *parser);

656: static int
657: yaml_parser_fetch_key(yaml_parser_t *parser);

659: static int
660: yaml_parser_fetch_value(yaml_parser_t *parser);

662: static int
663: yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);

665: static int
666: yaml_parser_fetch_tag(yaml_parser_t *parser);

668: static int
669: yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);

671: static int
672: yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);

674: static int
675: yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);

677: /*
678:  * Token scanners.
679:  */

681: static int
682: yaml_parser_scan_to_next_token(yaml_parser_t *parser);

684: static int
685: yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);

687: static int
688: yaml_parser_scan_directive_name(yaml_parser_t *parser,
689:         yaml_mark_t start_mark, yaml_char_t **name);

691: static int
692: yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
693:         yaml_mark_t start_mark, int *major, int *minor);

695: static int
696: yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
697:         yaml_mark_t start_mark, int *number);

699: static int
700: yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
701:         yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);

703: static int
704: yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
705:         yaml_token_type_t type);

707: static int
708: yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);

710: static int
711: yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
712:         yaml_mark_t start_mark, yaml_char_t **handle);

714: static int
715: yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
716:         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);

718: static int
719: yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
720:         yaml_mark_t start_mark, yaml_string_t *string);

722: static int
723: yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
724:         int literal);

726: static int
727: yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
728:         int *indent, yaml_string_t *breaks,
729:         yaml_mark_t start_mark, yaml_mark_t *end_mark);

731: static int
732: yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
733:         int single);

735: static int
736: yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);

738: /*
739:  * Get the next token.
740:  */

742: YAML_DECLARE(int)
743: yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
744: {
745:     assert(parser); /* Non-NULL parser object is expected. */
746:     assert(token);  /* Non-NULL token object is expected. */

748:     /* Erase the token object. */

750:     memset(token, 0, sizeof(yaml_token_t));

752:     /* No tokens after STREAM-END or error. */

754:     if (parser->stream_end_produced || parser->error) return 1;

756:     /* Ensure that the tokens queue contains enough tokens. */

758:     if (!parser->token_available) {
759:         if (!yaml_parser_fetch_more_tokens(parser))
760:             return 0;
761:     }

763:     /* Fetch the next token from the queue. */

765:     *token = DEQUEUE(parser, parser->tokens);
766:     parser->token_available = 0;
767:     parser->tokens_parsed ++;

769:     if (token->type == YAML_STREAM_END_TOKEN) parser->stream_end_produced = 1;

771:     return 1;
772: }

774: /*
775:  * Set the scanner error and return 0.
776:  */

778: static int
779: yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
780:         yaml_mark_t context_mark, const char *problem)
781: {
782:     parser->error = YAML_SCANNER_ERROR;
783:     parser->context = context;
784:     parser->context_mark = context_mark;
785:     parser->problem = problem;
786:     parser->problem_mark = parser->mark;

788:     return 0;
789: }

791: /*
792:  * Ensure that the tokens queue contains at least one token which can be
793:  * returned to the Parser.
794:  */

796: YAML_DECLARE(int)
797: yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
798: {
799:     int need_more_tokens;

801:     /* While we need more tokens to fetch, do it. */

803:     while (1)
804:     {
805:         /*
806:          * Check if we really need to fetch more tokens.
807:          */

809:         need_more_tokens = 0;

811:         if (parser->tokens.head == parser->tokens.tail)
812:         {
813:             /* Queue is empty. */

815:             need_more_tokens = 1;
816:         }
817:         else
818:         {
819:             yaml_simple_key_t *simple_key;

821:             /* Check if any potential simple key may occupy the head position. */

823:             if (!yaml_parser_stale_simple_keys(parser))
824:                 return 0;

826:             for (simple_key = parser->simple_keys.start;
827:                     simple_key != parser->simple_keys.top; simple_key++) {
828:                 if (simple_key->possible
829:                         && simple_key->token_number == parser->tokens_parsed) {
830:                     need_more_tokens = 1;
831:                     break;
832:                 }
833:             }
834:         }

836:         /* We are finished. */

838:         if (!need_more_tokens)
839:             break;

841:         /* Fetch the next token. */

843:         if (!yaml_parser_fetch_next_token(parser))
844:             return 0;
845:     }

847:     parser->token_available = 1;

849:     return 1;
850: }

852: /*
853:  * The dispatcher for token fetchers.
854:  */

856: static int
857: yaml_parser_fetch_next_token(yaml_parser_t *parser)
858: {
859:     /* Ensure that the buffer is initialized. */

861:     if (!CACHE(parser, 1))
862:         return 0;

864:     /* Check if we just started scanning.  Fetch STREAM-START then. */

866:     if (!parser->stream_start_produced)
867:         return yaml_parser_fetch_stream_start(parser);

869:     /* Eat whitespaces and comments until we reach the next token. */

871:     if (!yaml_parser_scan_to_next_token(parser))
872:         return 0;

874:     /* Remove obsolete potential simple keys. */

876:     if (!yaml_parser_stale_simple_keys(parser))
877:         return 0;

879:     /* Check the indentation level against the current column. */

881:     if (!yaml_parser_unroll_indent(parser, parser->mark.column))
882:         return 0;

884:     /*
885:      * Ensure that the buffer contains at least 4 characters.  4 is the length
886:      * of the longest indicators ('--- ' and '... ').
887:      */

889:     if (!CACHE(parser, 4))
890:         return 0;

892:     /* Is it the end of the stream? */

894:     if (IS_Z(parser->buffer))
895:         return yaml_parser_fetch_stream_end(parser);

897:     /* Is it a directive? */

899:     if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
900:         return yaml_parser_fetch_directive(parser);

902:     /* Is it the document start indicator? */

904:     if (parser->mark.column == 0
905:             && CHECK_AT(parser->buffer, '-', 0)
906:             && CHECK_AT(parser->buffer, '-', 1)
907:             && CHECK_AT(parser->buffer, '-', 2)
908:             && IS_BLANKZ_AT(parser->buffer, 3))
909:         return yaml_parser_fetch_document_indicator(parser,
910:                 YAML_DOCUMENT_START_TOKEN);

912:     /* Is it the document end indicator? */

914:     if (parser->mark.column == 0
915:             && CHECK_AT(parser->buffer, '.', 0)
916:             && CHECK_AT(parser->buffer, '.', 1)
917:             && CHECK_AT(parser->buffer, '.', 2)
918:             && IS_BLANKZ_AT(parser->buffer, 3))
919:         return yaml_parser_fetch_document_indicator(parser,
920:                 YAML_DOCUMENT_END_TOKEN);

922:     /* Is it the flow sequence start indicator? */

924:     if (CHECK(parser->buffer, '['))
925:         return yaml_parser_fetch_flow_collection_start(parser,
926:                 YAML_FLOW_SEQUENCE_START_TOKEN);

928:     /* Is it the flow mapping start indicator? */

930:     if (CHECK(parser->buffer, '{'))
931:         return yaml_parser_fetch_flow_collection_start(parser,
932:                 YAML_FLOW_MAPPING_START_TOKEN);

934:     /* Is it the flow sequence end indicator? */

936:     if (CHECK(parser->buffer, ']'))
937:         return yaml_parser_fetch_flow_collection_end(parser,
938:                 YAML_FLOW_SEQUENCE_END_TOKEN);

940:     /* Is it the flow mapping end indicator? */

942:     if (CHECK(parser->buffer, '}'))
943:         return yaml_parser_fetch_flow_collection_end(parser,
944:                 YAML_FLOW_MAPPING_END_TOKEN);

946:     /* Is it the flow entry indicator? */

948:     if (CHECK(parser->buffer, ','))
949:         return yaml_parser_fetch_flow_entry(parser);

951:     /* Is it the block entry indicator? */

953:     if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
954:         return yaml_parser_fetch_block_entry(parser);

956:     /* Is it the key indicator? */

958:     if (CHECK(parser->buffer, '?')
959:             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
960:         return yaml_parser_fetch_key(parser);

962:     /* Is it the value indicator? */

964:     if (CHECK(parser->buffer, ':')
965:             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
966:         return yaml_parser_fetch_value(parser);

968:     /* Is it an alias? */

970:     if (CHECK(parser->buffer, '*'))
971:         return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);

973:     /* Is it an anchor? */

975:     if (CHECK(parser->buffer, '&'))
976:         return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);

978:     /* Is it a tag? */

980:     if (CHECK(parser->buffer, '!'))
981:         return yaml_parser_fetch_tag(parser);

983:     /* Is it a literal scalar? */

985:     if (CHECK(parser->buffer, '|') && !parser->flow_level)
986:         return yaml_parser_fetch_block_scalar(parser, 1);

988:     /* Is it a folded scalar? */

990:     if (CHECK(parser->buffer, '>') && !parser->flow_level)
991:         return yaml_parser_fetch_block_scalar(parser, 0);

993:     /* Is it a single-quoted scalar? */

995:     if (CHECK(parser->buffer, '\''))
996:         return yaml_parser_fetch_flow_scalar(parser, 1);

998:     /* Is it a double-quoted scalar? */

1000:     if (CHECK(parser->buffer, '"'))
1001:         return yaml_parser_fetch_flow_scalar(parser, 0);

1003:     /*
1004:      * Is it a plain scalar?
1005:      *
1006:      * A plain scalar may start with any non-blank characters except
1007:      *
1008:      *      '-', '?', ':', ',', '[', ']', '{', '}',
1009:      *      '#', '&', '*', '!', '|', '>', '\'', '\"',
1010:      *      '%', '@', '`'.
1011:      *
1012:      * In the block context (and, for the '-' indicator, in the flow context
1013:      * too), it may also start with the characters
1014:      *
1015:      *      '-', '?', ':'
1016:      *
1017:      * if it is followed by a non-space character.
1018:      *
1019:      * The last rule is more restrictive than the specification requires.
1020:      */

1022:     if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1023:                 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1024:                 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1025:                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1026:                 || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1027:                 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1028:                 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1029:                 || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1030:                 || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1031:                 || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1032:             (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1033:             (!parser->flow_level &&
1034:              (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1035:              && !IS_BLANKZ_AT(parser->buffer, 1)))
1036:         return yaml_parser_fetch_plain_scalar(parser);

1038:     /*
1039:      * If we don't determine the token type so far, it is an error.
1040:      */

1042:     return yaml_parser_set_scanner_error(parser,
1043:             "while scanning for the next token", parser->mark,
1044:             "found character that cannot start any token");
1045: }

1047: /*
1048:  * Check the list of potential simple keys and remove the positions that
1049:  * cannot contain simple keys anymore.
1050:  */

1052: static int
1053: yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1054: {
1055:     yaml_simple_key_t *simple_key;

1057:     /* Check for a potential simple key for each flow level. */

1059:     for (simple_key = parser->simple_keys.start;
1060:             simple_key != parser->simple_keys.top; simple_key ++)
1061:     {
1062:         /*
1063:          * The specification requires that a simple key
1064:          *
1065:          *  - is limited to a single line,
1066:          *  - is shorter than 1024 characters.
1067:          */

1069:         if (simple_key->possible
1070:                 && (simple_key->mark.line < parser->mark.line
1071:                     || simple_key->mark.index+1024 < parser->mark.index)) {

1073:             /* Check if the potential simple key to be removed is required. */

1075:             if (simple_key->required) {
1076:                 return yaml_parser_set_scanner_error(parser,
1077:                         "while scanning a simple key", simple_key->mark,
1078:                         "could not find expected ':'");
1079:             }

1081:             simple_key->possible = 0;
1082:         }
1083:     }

1085:     return 1;
1086: }

1088: /*
1089:  * Check if a simple key may start at the current position and add it if
1090:  * needed.
1091:  */

1093: static int
1094: yaml_parser_save_simple_key(yaml_parser_t *parser)
1095: {
1096:     /*
1097:      * A simple key is required at the current position if the scanner is in
1098:      * the block context and the current column coincides with the indentation
1099:      * level.
1100:      */

1102:     int required = (!parser->flow_level
1103:             && parser->indent == (ptrdiff_t)parser->mark.column);

1105:     /*
1106:      * If the current position may start a simple key, save it.
1107:      */

1109:     if (parser->simple_key_allowed)
1110:     {
1111:         yaml_simple_key_t simple_key;
1112:         simple_key.possible = 1;
1113:         simple_key.required = required;
1114:         simple_key.token_number =
1115:             parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1116:         simple_key.mark = parser->mark;

1118:         if (!yaml_parser_remove_simple_key(parser)) return 0;

1120:         *(parser->simple_keys.top-1) = simple_key;
1121:     }

1123:     return 1;
1124: }

1126: /*
1127:  * Remove a potential simple key at the current flow level.
1128:  */

1130: static int
1131: yaml_parser_remove_simple_key(yaml_parser_t *parser)
1132: {
1133:     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;

1135:     if (simple_key->possible)
1136:     {
1137:         /* If the key is required, it is an error. */

1139:         if (simple_key->required) {
1140:             return yaml_parser_set_scanner_error(parser,
1141:                     "while scanning a simple key", simple_key->mark,
1142:                     "could not find expected ':'");
1143:         }
1144:     }

1146:     /* Remove the key from the stack. */

1148:     simple_key->possible = 0;

1150:     return 1;
1151: }

1153: /*
1154:  * Increase the flow level and resize the simple key list if needed.
1155:  */

1157: static int
1158: yaml_parser_increase_flow_level(yaml_parser_t *parser)
1159: {
1160:     yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };

1162:     /* Reset the simple key on the next level. */

1164:     if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1165:         return 0;

1167:     /* Increase the flow level. */

1169:     if (parser->flow_level == INT_MAX) {
1170:         parser->error = YAML_MEMORY_ERROR;
1171:         return 0;
1172:     }

1174:     parser->flow_level++;

1176:     return 1;
1177: }

1179: /*
1180:  * Decrease the flow level.
1181:  */

1183: static int
1184: yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1185: {
1186:     if (parser->flow_level) {
1187:         parser->flow_level --;
1188:         (void)POP(parser, parser->simple_keys);
1189:     }

1191:     return 1;
1192: }

1194: /*
1195:  * Push the current indentation level to the stack and set the new level
1196:  * the current column is greater than the indentation level.  In this case,
1197:  * append or insert the specified token into the token queue.
1198:  *
1199:  */

1201: static int
1202: yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
1203:         ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
1204: {
1205:     yaml_token_t token;

1207:     /* In the flow context, do nothing. */

1209:     if (parser->flow_level)
1210:         return 1;

1212:     if (parser->indent < column)
1213:     {
1214:         /*
1215:          * Push the current indentation level to the stack and set the new
1216:          * indentation level.
1217:          */

1219:         if (!PUSH(parser, parser->indents, parser->indent))
1220:             return 0;

1222:         if (column > INT_MAX) {
1223:             parser->error = YAML_MEMORY_ERROR;
1224:             return 0;
1225:         }

1227:         parser->indent = column;

1229:         /* Create a token and insert it into the queue. */

1231:         TOKEN_INIT(token, type, mark, mark);

1233:         if (number == -1) {
1234:             if (!ENQUEUE(parser, parser->tokens, token))
1235:                 return 0;
1236:         }
1237:         else {
1238:             if (!QUEUE_INSERT(parser,
1239:                         parser->tokens, number - parser->tokens_parsed, token))
1240:                 return 0;
1241:         }
1242:     }

1244:     return 1;
1245: }

1247: /*
1248:  * Pop indentation levels from the indents stack until the current level
1249:  * becomes less or equal to the column.  For each indentation level, append
1250:  * the BLOCK-END token.
1251:  */


1254: static int
1255: yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
1256: {
1257:     yaml_token_t token;

1259:     /* In the flow context, do nothing. */

1261:     if (parser->flow_level)
1262:         return 1;

1264:     /* Loop through the indentation levels in the stack. */

1266:     while (parser->indent > column)
1267:     {
1268:         /* Create a token and append it to the queue. */

1270:         TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);

1272:         if (!ENQUEUE(parser, parser->tokens, token))
1273:             return 0;

1275:         /* Pop the indentation level. */

1277:         parser->indent = POP(parser, parser->indents);
1278:     }

1280:     return 1;
1281: }

1283: /*
1284:  * Initialize the scanner and produce the STREAM-START token.
1285:  */

1287: static int
1288: yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1289: {
1290:     yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1291:     yaml_token_t token;

1293:     /* Set the initial indentation. */

1295:     parser->indent = -1;

1297:     /* Initialize the simple key stack. */

1299:     if (!PUSH(parser, parser->simple_keys, simple_key))
1300:         return 0;

1302:     /* A simple key is allowed at the beginning of the stream. */

1304:     parser->simple_key_allowed = 1;

1306:     /* We have started. */

1308:     parser->stream_start_produced = 1;

1310:     /* Create the STREAM-START token and append it to the queue. */

1312:     STREAM_START_TOKEN_INIT(token, parser->encoding,
1313:             parser->mark, parser->mark);

1315:     if (!ENQUEUE(parser, parser->tokens, token))
1316:         return 0;

1318:     return 1;
1319: }

1321: /*
1322:  * Produce the STREAM-END token and shut down the scanner.
1323:  */

1325: static int
1326: yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1327: {
1328:     yaml_token_t token;

1330:     /* Force new line. */

1332:     if (parser->mark.column != 0) {
1333:         parser->mark.column = 0;
1334:         parser->mark.line ++;
1335:     }

1337:     /* Reset the indentation level. */

1339:     if (!yaml_parser_unroll_indent(parser, -1))
1340:         return 0;

1342:     /* Reset simple keys. */

1344:     if (!yaml_parser_remove_simple_key(parser))
1345:         return 0;

1347:     parser->simple_key_allowed = 0;

1349:     /* Create the STREAM-END token and append it to the queue. */

1351:     STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);

1353:     if (!ENQUEUE(parser, parser->tokens, token))
1354:         return 0;

1356:     return 1;
1357: }

1359: /*
1360:  * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1361:  */

1363: static int
1364: yaml_parser_fetch_directive(yaml_parser_t *parser)
1365: {
1366:     yaml_token_t token;

1368:     /* Reset the indentation level. */

1370:     if (!yaml_parser_unroll_indent(parser, -1))
1371:         return 0;

1373:     /* Reset simple keys. */

1375:     if (!yaml_parser_remove_simple_key(parser))
1376:         return 0;

1378:     parser->simple_key_allowed = 0;

1380:     /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */

1382:     if (!yaml_parser_scan_directive(parser, &token))
1383:         return 0;

1385:     /* Append the token to the queue. */

1387:     if (!ENQUEUE(parser, parser->tokens, token)) {
1388:         yaml_token_delete(&token);
1389:         return 0;
1390:     }

1392:     return 1;
1393: }

1395: /*
1396:  * Produce the DOCUMENT-START or DOCUMENT-END token.
1397:  */

1399: static int
1400: yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1401:         yaml_token_type_t type)
1402: {
1403:     yaml_mark_t start_mark, end_mark;
1404:     yaml_token_t token;

1406:     /* Reset the indentation level. */

1408:     if (!yaml_parser_unroll_indent(parser, -1))
1409:         return 0;

1411:     /* Reset simple keys. */

1413:     if (!yaml_parser_remove_simple_key(parser))
1414:         return 0;

1416:     parser->simple_key_allowed = 0;

1418:     /* Consume the token. */

1420:     start_mark = parser->mark;

1422:     SKIP(parser);
1423:     SKIP(parser);
1424:     SKIP(parser);

1426:     end_mark = parser->mark;

1428:     /* Create the DOCUMENT-START or DOCUMENT-END token. */

1430:     TOKEN_INIT(token, type, start_mark, end_mark);

1432:     /* Append the token to the queue. */

1434:     if (!ENQUEUE(parser, parser->tokens, token))
1435:         return 0;

1437:     return 1;
1438: }

1440: /*
1441:  * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1442:  */

1444: static int
1445: yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1446:         yaml_token_type_t type)
1447: {
1448:     yaml_mark_t start_mark, end_mark;
1449:     yaml_token_t token;

1451:     /* The indicators '[' and '{' may start a simple key. */

1453:     if (!yaml_parser_save_simple_key(parser))
1454:         return 0;

1456:     /* Increase the flow level. */

1458:     if (!yaml_parser_increase_flow_level(parser))
1459:         return 0;

1461:     /* A simple key may follow the indicators '[' and '{'. */

1463:     parser->simple_key_allowed = 1;

1465:     /* Consume the token. */

1467:     start_mark = parser->mark;
1468:     SKIP(parser);
1469:     end_mark = parser->mark;

1471:     /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */

1473:     TOKEN_INIT(token, type, start_mark, end_mark);

1475:     /* Append the token to the queue. */

1477:     if (!ENQUEUE(parser, parser->tokens, token))
1478:         return 0;

1480:     return 1;
1481: }

1483: /*
1484:  * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1485:  */

1487: static int
1488: yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1489:         yaml_token_type_t type)
1490: {
1491:     yaml_mark_t start_mark, end_mark;
1492:     yaml_token_t token;

1494:     /* Reset any potential simple key on the current flow level. */

1496:     if (!yaml_parser_remove_simple_key(parser))
1497:         return 0;

1499:     /* Decrease the flow level. */

1501:     if (!yaml_parser_decrease_flow_level(parser))
1502:         return 0;

1504:     /* No simple keys after the indicators ']' and '}'. */

1506:     parser->simple_key_allowed = 0;

1508:     /* Consume the token. */

1510:     start_mark = parser->mark;
1511:     SKIP(parser);
1512:     end_mark = parser->mark;

1514:     /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */

1516:     TOKEN_INIT(token, type, start_mark, end_mark);

1518:     /* Append the token to the queue. */

1520:     if (!ENQUEUE(parser, parser->tokens, token))
1521:         return 0;

1523:     return 1;
1524: }

1526: /*
1527:  * Produce the FLOW-ENTRY token.
1528:  */

1530: static int
1531: yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
1532: {
1533:     yaml_mark_t start_mark, end_mark;
1534:     yaml_token_t token;

1536:     /* Reset any potential simple keys on the current flow level. */

1538:     if (!yaml_parser_remove_simple_key(parser))
1539:         return 0;

1541:     /* Simple keys are allowed after ','. */

1543:     parser->simple_key_allowed = 1;

1545:     /* Consume the token. */

1547:     start_mark = parser->mark;
1548:     SKIP(parser);
1549:     end_mark = parser->mark;

1551:     /* Create the FLOW-ENTRY token and append it to the queue. */

1553:     TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);

1555:     if (!ENQUEUE(parser, parser->tokens, token))
1556:         return 0;

1558:     return 1;
1559: }

1561: /*
1562:  * Produce the BLOCK-ENTRY token.
1563:  */

1565: static int
1566: yaml_parser_fetch_block_entry(yaml_parser_t *parser)
1567: {
1568:     yaml_mark_t start_mark, end_mark;
1569:     yaml_token_t token;

1571:     /* Check if the scanner is in the block context. */

1573:     if (!parser->flow_level)
1574:     {
1575:         /* Check if we are allowed to start a new entry. */

1577:         if (!parser->simple_key_allowed) {
1578:             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1579:                     "block sequence entries are not allowed in this context");
1580:         }

1582:         /* Add the BLOCK-SEQUENCE-START token if needed. */

1584:         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1585:                     YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark))
1586:             return 0;
1587:     }
1588:     else
1589:     {
1590:         /*
1591:          * It is an error for the '-' indicator to occur in the flow context,
1592:          * but we let the Parser detect and report about it because the Parser
1593:          * is able to point to the context.
1594:          */
1595:     }

1597:     /* Reset any potential simple keys on the current flow level. */

1599:     if (!yaml_parser_remove_simple_key(parser))
1600:         return 0;

1602:     /* Simple keys are allowed after '-'. */

1604:     parser->simple_key_allowed = 1;

1606:     /* Consume the token. */

1608:     start_mark = parser->mark;
1609:     SKIP(parser);
1610:     end_mark = parser->mark;

1612:     /* Create the BLOCK-ENTRY token and append it to the queue. */

1614:     TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);

1616:     if (!ENQUEUE(parser, parser->tokens, token))
1617:         return 0;

1619:     return 1;
1620: }

1622: /*
1623:  * Produce the KEY token.
1624:  */

1626: static int
1627: yaml_parser_fetch_key(yaml_parser_t *parser)
1628: {
1629:     yaml_mark_t start_mark, end_mark;
1630:     yaml_token_t token;

1632:     /* In the block context, additional checks are required. */

1634:     if (!parser->flow_level)
1635:     {
1636:         /* Check if we are allowed to start a new key (not necessary simple). */

1638:         if (!parser->simple_key_allowed) {
1639:             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1640:                     "mapping keys are not allowed in this context");
1641:         }

1643:         /* Add the BLOCK-MAPPING-START token if needed. */

1645:         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1646:                     YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1647:             return 0;
1648:     }

1650:     /* Reset any potential simple keys on the current flow level. */

1652:     if (!yaml_parser_remove_simple_key(parser))
1653:         return 0;

1655:     /* Simple keys are allowed after '?' in the block context. */

1657:     parser->simple_key_allowed = (!parser->flow_level);

1659:     /* Consume the token. */

1661:     start_mark = parser->mark;
1662:     SKIP(parser);
1663:     end_mark = parser->mark;

1665:     /* Create the KEY token and append it to the queue. */

1667:     TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);

1669:     if (!ENQUEUE(parser, parser->tokens, token))
1670:         return 0;

1672:     return 1;
1673: }

1675: /*
1676:  * Produce the VALUE token.
1677:  */

1679: static int
1680: yaml_parser_fetch_value(yaml_parser_t *parser)
1681: {
1682:     yaml_mark_t start_mark, end_mark;
1683:     yaml_token_t token;
1684:     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;

1686:     /* Have we found a simple key? */

1688:     if (simple_key->possible)
1689:     {

1691:         /* Create the KEY token and insert it into the queue. */

1693:         TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);

1695:         if (!QUEUE_INSERT(parser, parser->tokens,
1696:                     simple_key->token_number - parser->tokens_parsed, token))
1697:             return 0;

1699:         /* In the block context, we may need to add the BLOCK-MAPPING-START token. */

1701:         if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1702:                     simple_key->token_number,
1703:                     YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1704:             return 0;

1706:         /* Remove the simple key. */

1708:         simple_key->possible = 0;

1710:         /* A simple key cannot follow another simple key. */

1712:         parser->simple_key_allowed = 0;
1713:     }
1714:     else
1715:     {
1716:         /* The ':' indicator follows a complex key. */

1718:         /* In the block context, extra checks are required. */

1720:         if (!parser->flow_level)
1721:         {
1722:             /* Check if we are allowed to start a complex value. */

1724:             if (!parser->simple_key_allowed) {
1725:                 return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1726:                         "mapping values are not allowed in this context");
1727:             }

1729:             /* Add the BLOCK-MAPPING-START token if needed. */

1731:             if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1732:                         YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1733:                 return 0;
1734:         }

1736:         /* Simple keys after ':' are allowed in the block context. */

1738:         parser->simple_key_allowed = (!parser->flow_level);
1739:     }

1741:     /* Consume the token. */

1743:     start_mark = parser->mark;
1744:     SKIP(parser);
1745:     end_mark = parser->mark;

1747:     /* Create the VALUE token and append it to the queue. */

1749:     TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);

1751:     if (!ENQUEUE(parser, parser->tokens, token))
1752:         return 0;

1754:     return 1;
1755: }

1757: /*
1758:  * Produce the ALIAS or ANCHOR token.
1759:  */

1761: static int
1762: yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
1763: {
1764:     yaml_token_t token;

1766:     /* An anchor or an alias could be a simple key. */

1768:     if (!yaml_parser_save_simple_key(parser))
1769:         return 0;

1771:     /* A simple key cannot follow an anchor or an alias. */

1773:     parser->simple_key_allowed = 0;

1775:     /* Create the ALIAS or ANCHOR token and append it to the queue. */

1777:     if (!yaml_parser_scan_anchor(parser, &token, type))
1778:         return 0;

1780:     if (!ENQUEUE(parser, parser->tokens, token)) {
1781:         yaml_token_delete(&token);
1782:         return 0;
1783:     }
1784:     return 1;
1785: }

1787: /*
1788:  * Produce the TAG token.
1789:  */

1791: static int
1792: yaml_parser_fetch_tag(yaml_parser_t *parser)
1793: {
1794:     yaml_token_t token;

1796:     /* A tag could be a simple key. */

1798:     if (!yaml_parser_save_simple_key(parser))
1799:         return 0;

1801:     /* A simple key cannot follow a tag. */

1803:     parser->simple_key_allowed = 0;

1805:     /* Create the TAG token and append it to the queue. */

1807:     if (!yaml_parser_scan_tag(parser, &token))
1808:         return 0;

1810:     if (!ENQUEUE(parser, parser->tokens, token)) {
1811:         yaml_token_delete(&token);
1812:         return 0;
1813:     }

1815:     return 1;
1816: }

1818: /*
1819:  * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1820:  */

1822: static int
1823: yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
1824: {
1825:     yaml_token_t token;

1827:     /* Remove any potential simple keys. */

1829:     if (!yaml_parser_remove_simple_key(parser))
1830:         return 0;

1832:     /* A simple key may follow a block scalar. */

1834:     parser->simple_key_allowed = 1;

1836:     /* Create the SCALAR token and append it to the queue. */

1838:     if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1839:         return 0;

1841:     if (!ENQUEUE(parser, parser->tokens, token)) {
1842:         yaml_token_delete(&token);
1843:         return 0;
1844:     }

1846:     return 1;
1847: }

1849: /*
1850:  * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1851:  */

1853: static int
1854: yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
1855: {
1856:     yaml_token_t token;

1858:     /* A plain scalar could be a simple key. */

1860:     if (!yaml_parser_save_simple_key(parser))
1861:         return 0;

1863:     /* A simple key cannot follow a flow scalar. */

1865:     parser->simple_key_allowed = 0;

1867:     /* Create the SCALAR token and append it to the queue. */

1869:     if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1870:         return 0;

1872:     if (!ENQUEUE(parser, parser->tokens, token)) {
1873:         yaml_token_delete(&token);
1874:         return 0;
1875:     }

1877:     return 1;
1878: }

1880: /*
1881:  * Produce the SCALAR(...,plain) token.
1882:  */

1884: static int
1885: yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
1886: {
1887:     yaml_token_t token;

1889:     /* A plain scalar could be a simple key. */

1891:     if (!yaml_parser_save_simple_key(parser))
1892:         return 0;

1894:     /* A simple key cannot follow a flow scalar. */

1896:     parser->simple_key_allowed = 0;

1898:     /* Create the SCALAR token and append it to the queue. */

1900:     if (!yaml_parser_scan_plain_scalar(parser, &token))
1901:         return 0;

1903:     if (!ENQUEUE(parser, parser->tokens, token)) {
1904:         yaml_token_delete(&token);
1905:         return 0;
1906:     }

1908:     return 1;
1909: }

1911: /*
1912:  * Eat whitespaces and comments until the next token is found.
1913:  */

1915: static int
1916: yaml_parser_scan_to_next_token(yaml_parser_t *parser)
1917: {
1918:     /* Until the next token is not found. */

1920:     while (1)
1921:     {
1922:         /* Allow the BOM mark to start a line. */

1924:         if (!CACHE(parser, 1)) return 0;

1926:         if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1927:             SKIP(parser);

1929:         /*
1930:          * Eat whitespaces.
1931:          *
1932:          * Tabs are allowed:
1933:          *
1934:          *  - in the flow context;
1935:          *  - in the block context, but not at the beginning of the line or
1936:          *  after '-', '?', or ':' (complex value).
1937:          */

1939:         if (!CACHE(parser, 1)) return 0;

1941:         while (CHECK(parser->buffer,' ') ||
1942:                 ((parser->flow_level || !parser->simple_key_allowed) &&
1943:                  CHECK(parser->buffer, '\t'))) {
1944:             SKIP(parser);
1945:             if (!CACHE(parser, 1)) return 0;
1946:         }

1948:         /* Eat a comment until a line break. */

1950:         if (CHECK(parser->buffer, '#')) {
1951:             while (!IS_BREAKZ(parser->buffer)) {
1952:                 SKIP(parser);
1953:                 if (!CACHE(parser, 1)) return 0;
1954:             }
1955:         }

1957:         /* If it is a line break, eat it. */

1959:         if (IS_BREAK(parser->buffer))
1960:         {
1961:             if (!CACHE(parser, 2)) return 0;
1962:             SKIP_LINE(parser);

1964:             /* In the block context, a new line may start a simple key. */

1966:             if (!parser->flow_level) parser->simple_key_allowed = 1;
1967:         }
1968:         else
1969:         {
1970:             /* We have found a token. */

1972:             break;
1973:         }
1974:     }

1976:     return 1;
1977: }

1979: /*
1980:  * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1981:  *
1982:  * Scope:
1983:  *      %YAML    1.1    # a comment \n
1984:  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1985:  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
1986:  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1987:  */

1989: int
1990: yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
1991: {
1992:     yaml_mark_t start_mark, end_mark;
1993:     yaml_char_t *name = NULL;
1994:     int major, minor;
1995:     yaml_char_t *handle = NULL, *prefix = NULL;

1997:     /* Eat '%'. */

1999:     start_mark = parser->mark;

2001:     SKIP(parser);

2003:     /* Scan the directive name. */

2005:     if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2006:         goto error;

2008:     /* Is it a YAML directive? */

2010:     if (strcmp((char *)name, "YAML") == 0)
2011:     {
2012:         /* Scan the VERSION directive value. */

2014:         if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2015:                     &major, &minor))
2016:             goto error;

2018:         end_mark = parser->mark;

2020:         /* Create a VERSION-DIRECTIVE token. */

2022:         VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
2023:                 start_mark, end_mark);
2024:     }

2026:     /* Is it a TAG directive? */

2028:     else if (strcmp((char *)name, "TAG") == 0)
2029:     {
2030:         /* Scan the TAG directive value. */

2032:         if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2033:                     &handle, &prefix))
2034:             goto error;

2036:         end_mark = parser->mark;

2038:         /* Create a TAG-DIRECTIVE token. */

2040:         TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2041:                 start_mark, end_mark);
2042:     }

2044:     /* Unknown directive. */

2046:     else
2047:     {
2048:         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2049:                 start_mark, "found unknown directive name");
2050:         goto error;
2051:     }

2053:     /* Eat the rest of the line including any comments. */

2055:     if (!CACHE(parser, 1)) goto error;

2057:     while (IS_BLANK(parser->buffer)) {
2058:         SKIP(parser);
2059:         if (!CACHE(parser, 1)) goto error;
2060:     }

2062:     if (CHECK(parser->buffer, '#')) {
2063:         while (!IS_BREAKZ(parser->buffer)) {
2064:             SKIP(parser);
2065:             if (!CACHE(parser, 1)) goto error;
2066:         }
2067:     }

2069:     /* Check if we are at the end of the line. */

2071:     if (!IS_BREAKZ(parser->buffer)) {
2072:         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2073:                 start_mark, "did not find expected comment or line break");
2074:         goto error;
2075:     }

2077:     /* Eat a line break. */

2079:     if (IS_BREAK(parser->buffer)) {
2080:         if (!CACHE(parser, 2)) goto error;
2081:         SKIP_LINE(parser);
2082:     }

2084:     yaml_free(name);

2086:     return 1;

2088: error:
2089:     yaml_free(prefix);
2090:     yaml_free(handle);
2091:     yaml_free(name);
2092:     return 0;
2093: }

2095: /*
2096:  * Scan the directive name.
2097:  *
2098:  * Scope:
2099:  *      %YAML   1.1     # a comment \n
2100:  *       ^^^^
2101:  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2102:  *       ^^^
2103:  */

2105: static int
2106: yaml_parser_scan_directive_name(yaml_parser_t *parser,
2107:         yaml_mark_t start_mark, yaml_char_t **name)
2108: {
2109:     yaml_string_t string = NULL_STRING;

2111:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;

2113:     /* Consume the directive name. */

2115:     if (!CACHE(parser, 1)) goto error;

2117:     while (IS_ALPHA(parser->buffer))
2118:     {
2119:         if (!READ(parser, string)) goto error;
2120:         if (!CACHE(parser, 1)) goto error;
2121:     }

2123:     /* Check if the name is empty. */

2125:     if (string.start == string.pointer) {
2126:         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2127:                 start_mark, "could not find expected directive name");
2128:         goto error;
2129:     }

2131:     /* Check for an blank character after the name. */

2133:     if (!IS_BLANKZ(parser->buffer)) {
2134:         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2135:                 start_mark, "found unexpected non-alphabetical character");
2136:         goto error;
2137:     }

2139:     *name = string.start;

2141:     return 1;

2143: error:
2144:     STRING_DEL(parser, string);
2145:     return 0;
2146: }

2148: /*
2149:  * Scan the value of VERSION-DIRECTIVE.
2150:  *
2151:  * Scope:
2152:  *      %YAML   1.1     # a comment \n
2153:  *           ^^^^^^
2154:  */

2156: static int
2157: yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
2158:         yaml_mark_t start_mark, int *major, int *minor)
2159: {
2160:     /* Eat whitespaces. */

2162:     if (!CACHE(parser, 1)) return 0;

2164:     while (IS_BLANK(parser->buffer)) {
2165:         SKIP(parser);
2166:         if (!CACHE(parser, 1)) return 0;
2167:     }

2169:     /* Consume the major version number. */

2171:     if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2172:         return 0;

2174:     /* Eat '.'. */

2176:     if (!CHECK(parser->buffer, '.')) {
2177:         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2178:                 start_mark, "did not find expected digit or '.' character");
2179:     }

2181:     SKIP(parser);

2183:     /* Consume the minor version number. */

2185:     if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2186:         return 0;

2188:     return 1;
2189: }

2191: #define MAX_NUMBER_LENGTH   9

2193: /*
2194:  * Scan the version number of VERSION-DIRECTIVE.
2195:  *
2196:  * Scope:
2197:  *      %YAML   1.1     # a comment \n
2198:  *              ^
2199:  *      %YAML   1.1     # a comment \n
2200:  *                ^
2201:  */

2203: static int
2204: yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
2205:         yaml_mark_t start_mark, int *number)
2206: {
2207:     int value = 0;
2208:     size_t length = 0;

2210:     /* Repeat while the next character is digit. */

2212:     if (!CACHE(parser, 1)) return 0;

2214:     while (IS_DIGIT(parser->buffer))
2215:     {
2216:         /* Check if the number is too long. */

2218:         if (++length > MAX_NUMBER_LENGTH) {
2219:             return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2220:                     start_mark, "found extremely long version number");
2221:         }

2223:         value = value*10 + AS_DIGIT(parser->buffer);

2225:         SKIP(parser);

2227:         if (!CACHE(parser, 1)) return 0;
2228:     }

2230:     /* Check if the number was present. */

2232:     if (!length) {
2233:         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2234:                 start_mark, "did not find expected version number");
2235:     }

2237:     *number = value;

2239:     return 1;
2240: }

2242: /*
2243:  * Scan the value of a TAG-DIRECTIVE token.
2244:  *
2245:  * Scope:
2246:  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2247:  *          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2248:  */

2250: static int
2251: yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
2252:         yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2253: {
2254:     yaml_char_t *handle_value = NULL;
2255:     yaml_char_t *prefix_value = NULL;

2257:     /* Eat whitespaces. */

2259:     if (!CACHE(parser, 1)) goto error;

2261:     while (IS_BLANK(parser->buffer)) {
2262:         SKIP(parser);
2263:         if (!CACHE(parser, 1)) goto error;
2264:     }

2266:     /* Scan a handle. */

2268:     if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2269:         goto error;

2271:     /* Expect a whitespace. */

2273:     if (!CACHE(parser, 1)) goto error;

2275:     if (!IS_BLANK(parser->buffer)) {
2276:         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2277:                 start_mark, "did not find expected whitespace");
2278:         goto error;
2279:     }

2281:     /* Eat whitespaces. */

2283:     while (IS_BLANK(parser->buffer)) {
2284:         SKIP(parser);
2285:         if (!CACHE(parser, 1)) goto error;
2286:     }

2288:     /* Scan a prefix. */

2290:     if (!yaml_parser_scan_tag_uri(parser, 1, 1, NULL, start_mark, &prefix_value))
2291:         goto error;

2293:     /* Expect a whitespace or line break. */

2295:     if (!CACHE(parser, 1)) goto error;

2297:     if (!IS_BLANKZ(parser->buffer)) {
2298:         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2299:                 start_mark, "did not find expected whitespace or line break");
2300:         goto error;
2301:     }

2303:     *handle = handle_value;
2304:     *prefix = prefix_value;

2306:     return 1;

2308: error:
2309:     yaml_free(handle_value);
2310:     yaml_free(prefix_value);
2311:     return 0;
2312: }

2314: static int
2315: yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
2316:         yaml_token_type_t type)
2317: {
2318:     int length = 0;
2319:     yaml_mark_t start_mark, end_mark;
2320:     yaml_string_t string = NULL_STRING;

2322:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;

2324:     /* Eat the indicator character. */

2326:     start_mark = parser->mark;

2328:     SKIP(parser);

2330:     /* Consume the value. */

2332:     if (!CACHE(parser, 1)) goto error;

2334:     while (IS_ALPHA(parser->buffer)) {
2335:         if (!READ(parser, string)) goto error;
2336:         if (!CACHE(parser, 1)) goto error;
2337:         length ++;
2338:     }

2340:     end_mark = parser->mark;

2342:     /*
2343:      * Check if length of the anchor is greater than 0 and it is followed by
2344:      * a whitespace character or one of the indicators:
2345:      *
2346:      *      '?', ':', ',', ']', '}', '%', '@', '`'.
2347:      */

2349:     if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2350:                 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2351:                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2352:                 || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2353:                 || CHECK(parser->buffer, '`'))) {
2354:         yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
2355:                 "while scanning an anchor" : "while scanning an alias", start_mark,
2356:                 "did not find expected alphabetic or numeric character");
2357:         goto error;
2358:     }

2360:     /* Create a token. */

2362:     if (type == YAML_ANCHOR_TOKEN) ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2363:     else {
2364:         ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2365:     }

2367:     return 1;

2369: error:
2370:     STRING_DEL(parser, string);
2371:     return 0;
2372: }

2374: /*
2375:  * Scan a TAG token.
2376:  */

2378: static int
2379: yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
2380: {
2381:     yaml_char_t *handle = NULL;
2382:     yaml_char_t *suffix = NULL;
2383:     yaml_mark_t start_mark, end_mark;

2385:     start_mark = parser->mark;

2387:     /* Check if the tag is in the canonical form. */

2389:     if (!CACHE(parser, 2)) goto error;

2391:     if (CHECK_AT(parser->buffer, '<', 1))
2392:     {
2393:         /* Set the handle to '' */

2395:         handle = YAML_MALLOC(1);
2396:         if (!handle) goto error;
2397:         handle[0] = '\0';

2399:         /* Eat '!<' */

2401:         SKIP(parser);
2402:         SKIP(parser);

2404:         /* Consume the tag value. */

2406:         if (!yaml_parser_scan_tag_uri(parser, 1, 0, NULL, start_mark, &suffix))
2407:             goto error;

2409:         /* Check for '>' and eat it. */

2411:         if (!CHECK(parser->buffer, '>')) {
2412:             yaml_parser_set_scanner_error(parser, "while scanning a tag",
2413:                     start_mark, "did not find the expected '>'");
2414:             goto error;
2415:         }

2417:         SKIP(parser);
2418:     }
2419:     else
2420:     {
2421:         /* The tag has either the '!suffix' or the '!handle!suffix' form. */

2423:         /* First, try to scan a handle. */

2425:         if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2426:             goto error;

2428:         /* Check if it is, indeed, handle. */

2430:         if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2431:         {
2432:             /* Scan the suffix now. */

2434:             if (!yaml_parser_scan_tag_uri(parser, 0, 0, NULL, start_mark, &suffix))
2435:                 goto error;
2436:         }
2437:         else
2438:         {
2439:             /* It wasn't a handle after all.  Scan the rest of the tag. */

2441:             if (!yaml_parser_scan_tag_uri(parser, 0, 0, handle, start_mark, &suffix))
2442:                 goto error;

2444:             /* Set the handle to '!'. */

2446:             yaml_free(handle);
2447:             handle = YAML_MALLOC(2);
2448:             if (!handle) goto error;
2449:             handle[0] = '!';
2450:             handle[1] = '\0';

2452:             /*
2453:              * A special case: the '!' tag.  Set the handle to '' and the
2454:              * suffix to '!'.
2455:              */

2457:             if (suffix[0] == '\0') {
2458:                 yaml_char_t *tmp = handle;
2459:                 handle = suffix;
2460:                 suffix = tmp;
2461:             }
2462:         }
2463:     }

2465:     /* Check the character which ends the tag. */

2467:     if (!CACHE(parser, 1)) goto error;

2469:     if (!IS_BLANKZ(parser->buffer)) {
2470:         if (!parser->flow_level || !CHECK(parser->buffer, ',') ) {
2471:             yaml_parser_set_scanner_error(parser, "while scanning a tag",
2472:                     start_mark, "did not find expected whitespace or line break");
2473:             goto error;
2474:         }
2475:     }

2477:     end_mark = parser->mark;

2479:     /* Create a token. */

2481:     TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);

2483:     return 1;

2485: error:
2486:     yaml_free(handle);
2487:     yaml_free(suffix);
2488:     return 0;
2489: }

2491: /*
2492:  * Scan a tag handle.
2493:  */

2495: static int
2496: yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
2497:         yaml_mark_t start_mark, yaml_char_t **handle)
2498: {
2499:     yaml_string_t string = NULL_STRING;

2501:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;

2503:     /* Check the initial '!' character. */

2505:     if (!CACHE(parser, 1)) goto error;

2507:     if (!CHECK(parser->buffer, '!')) {
2508:         yaml_parser_set_scanner_error(parser, directive ?
2509:                 "while scanning a tag directive" : "while scanning a tag",
2510:                 start_mark, "did not find expected '!'");
2511:         goto error;
2512:     }

2514:     /* Copy the '!' character. */

2516:     if (!READ(parser, string)) goto error;

2518:     /* Copy all subsequent alphabetical and numerical characters. */

2520:     if (!CACHE(parser, 1)) goto error;

2522:     while (IS_ALPHA(parser->buffer))
2523:     {
2524:         if (!READ(parser, string)) goto error;
2525:         if (!CACHE(parser, 1)) goto error;
2526:     }

2528:     /* Check if the trailing character is '!' and copy it. */

2530:     if (CHECK(parser->buffer, '!'))
2531:     {
2532:         if (!READ(parser, string)) goto error;
2533:     }
2534:     else
2535:     {
2536:         /*
2537:          * It's either the '!' tag or not really a tag handle.  If it's a %TAG
2538:          * directive, it's an error.  If it's a tag token, it must be a part of
2539:          * URI.
2540:          */

2542:         if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2543:             yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2544:                     start_mark, "did not find expected '!'");
2545:             goto error;
2546:         }
2547:     }

2549:     *handle = string.start;

2551:     return 1;

2553: error:
2554:     STRING_DEL(parser, string);
2555:     return 0;
2556: }

2558: /*
2559:  * Scan a tag.
2560:  */

2562: static int
2563: yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
2564:         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2565: {
2566:     size_t length = head ? strlen((char *)head) : 0;
2567:     yaml_string_t string = NULL_STRING;

2569:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;

2571:     /* Resize the string to include the head. */

2573:     while ((size_t)(string.end - string.start) <= length) {
2574:         if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2575:             parser->error = YAML_MEMORY_ERROR;
2576:             goto error;
2577:         }
2578:     }

2580:     /*
2581:      * Copy the head if needed.
2582:      *
2583:      * Note that we don't copy the leading '!' character.
2584:      */

2586:     if (length > 1) {
2587:         memcpy(string.start, head+1, length-1);
2588:         string.pointer += length-1;
2589:     }

2591:     /* Scan the tag. */

2593:     if (!CACHE(parser, 1)) goto error;

2595:     /*
2596:      * The set of characters that may appear in URI is as follows:
2597:      *
2598:      *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2599:      *      '=', '+', '$', '.', '!', '~', '*', '\'', '(', ')', '%'.
2600:      *
2601:      * If we are inside a verbatim tag <...> (parameter uri_char is true)
2602:      * then also the following flow indicators are allowed:
2603:      *      ',', '[', ']'
2604:      */

2606:     while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2607:             || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2608:             || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2609:             || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2610:             || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2611:             || CHECK(parser->buffer, '.') || CHECK(parser->buffer, '%')
2612:             || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2613:             || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2614:             || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2615:             || (uri_char && (
2616:                 CHECK(parser->buffer, ',')
2617:                 || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2618:                 )
2619:             ))
2620:     {
2621:         /* Check if it is a URI-escape sequence. */

2623:         if (CHECK(parser->buffer, '%')) {
2624:             if (!STRING_EXTEND(parser, string))
2625:                 goto error;

2627:             if (!yaml_parser_scan_uri_escapes(parser,
2628:                         directive, start_mark, &string)) goto error;
2629:         }
2630:         else {
2631:             if (!READ(parser, string)) goto error;
2632:         }

2634:         length ++;
2635:         if (!CACHE(parser, 1)) goto error;
2636:     }

2638:     /* Check if the tag is non-empty. */

2640:     if (!length) {
2641:         if (!STRING_EXTEND(parser, string))
2642:             goto error;

2644:         yaml_parser_set_scanner_error(parser, directive ?
2645:                 "while parsing a %TAG directive" : "while parsing a tag",
2646:                 start_mark, "did not find expected tag URI");
2647:         goto error;
2648:     }

2650:     *uri = string.start;

2652:     return 1;

2654: error:
2655:     STRING_DEL(parser, string);
2656:     return 0;
2657: }

2659: /*
2660:  * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2661:  */

2663: static int
2664: yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
2665:         yaml_mark_t start_mark, yaml_string_t *string)
2666: {
2667:     int width = 0;

2669:     /* Decode the required number of characters. */

2671:     do {

2673:         unsigned char octet = 0;

2675:         /* Check for a URI-escaped octet. */

2677:         if (!CACHE(parser, 3)) return 0;

2679:         if (!(CHECK(parser->buffer, '%')
2680:                     && IS_HEX_AT(parser->buffer, 1)
2681:                     && IS_HEX_AT(parser->buffer, 2))) {
2682:             return yaml_parser_set_scanner_error(parser, directive ?
2683:                     "while parsing a %TAG directive" : "while parsing a tag",
2684:                     start_mark, "did not find URI escaped octet");
2685:         }

2687:         /* Get the octet. */

2689:         octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);

2691:         /* If it is the leading octet, determine the length of the UTF-8 sequence. */

2693:         if (!width)
2694:         {
2695:             width = (octet & 0x80) == 0x00 ? 1 :
2696:                     (octet & 0xE0) == 0xC0 ? 2 :
2697:                     (octet & 0xF0) == 0xE0 ? 3 :
2698:                     (octet & 0xF8) == 0xF0 ? 4 : 0;
2699:             if (!width) {
2700:                 return yaml_parser_set_scanner_error(parser, directive ?
2701:                         "while parsing a %TAG directive" : "while parsing a tag",
2702:                         start_mark, "found an incorrect leading UTF-8 octet");
2703:             }
2704:         }
2705:         else
2706:         {
2707:             /* Check if the trailing octet is correct. */

2709:             if ((octet & 0xC0) != 0x80) {
2710:                 return yaml_parser_set_scanner_error(parser, directive ?
2711:                         "while parsing a %TAG directive" : "while parsing a tag",
2712:                         start_mark, "found an incorrect trailing UTF-8 octet");
2713:             }
2714:         }

2716:         /* Copy the octet and move the pointers. */

2718:         *(string->pointer++) = octet;
2719:         SKIP(parser);
2720:         SKIP(parser);
2721:         SKIP(parser);

2723:     } while (--width);

2725:     return 1;
2726: }

2728: /*
2729:  * Scan a block scalar.
2730:  */

2732: static int
2733: yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
2734:         int literal)
2735: {
2736:     yaml_mark_t start_mark;
2737:     yaml_mark_t end_mark;
2738:     yaml_string_t string = NULL_STRING;
2739:     yaml_string_t leading_break = NULL_STRING;
2740:     yaml_string_t trailing_breaks = NULL_STRING;
2741:     int chomping = 0;
2742:     int increment = 0;
2743:     int indent = 0;
2744:     int leading_blank = 0;
2745:     int trailing_blank = 0;

2747:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2748:     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2749:     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;

2751:     /* Eat the indicator '|' or '>'. */

2753:     start_mark = parser->mark;

2755:     SKIP(parser);

2757:     /* Scan the additional block scalar indicators. */

2759:     if (!CACHE(parser, 1)) goto error;

2761:     /* Check for a chomping indicator. */

2763:     if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2764:     {
2765:         /* Set the chomping method and eat the indicator. */

2767:         chomping = CHECK(parser->buffer, '+') ? +1 : -1;

2769:         SKIP(parser);

2771:         /* Check for an indentation indicator. */

2773:         if (!CACHE(parser, 1)) goto error;

2775:         if (IS_DIGIT(parser->buffer))
2776:         {
2777:             /* Check that the indentation is greater than 0. */

2779:             if (CHECK(parser->buffer, '0')) {
2780:                 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2781:                         start_mark, "found an indentation indicator equal to 0");
2782:                 goto error;
2783:             }

2785:             /* Get the indentation level and eat the indicator. */

2787:             increment = AS_DIGIT(parser->buffer);

2789:             SKIP(parser);
2790:         }
2791:     }

2793:     /* Do the same as above, but in the opposite order. */

2795:     else if (IS_DIGIT(parser->buffer))
2796:     {
2797:         if (CHECK(parser->buffer, '0')) {
2798:             yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2799:                     start_mark, "found an indentation indicator equal to 0");
2800:             goto error;
2801:         }

2803:         increment = AS_DIGIT(parser->buffer);

2805:         SKIP(parser);

2807:         if (!CACHE(parser, 1)) goto error;

2809:         if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2810:             chomping = CHECK(parser->buffer, '+') ? +1 : -1;

2812:             SKIP(parser);
2813:         }
2814:     }

2816:     /* Eat whitespaces and comments to the end of the line. */

2818:     if (!CACHE(parser, 1)) goto error;

2820:     while (IS_BLANK(parser->buffer)) {
2821:         SKIP(parser);
2822:         if (!CACHE(parser, 1)) goto error;
2823:     }

2825:     if (CHECK(parser->buffer, '#')) {
2826:         while (!IS_BREAKZ(parser->buffer)) {
2827:             SKIP(parser);
2828:             if (!CACHE(parser, 1)) goto error;
2829:         }
2830:     }

2832:     /* Check if we are at the end of the line. */

2834:     if (!IS_BREAKZ(parser->buffer)) {
2835:         yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2836:                 start_mark, "did not find expected comment or line break");
2837:         goto error;
2838:     }

2840:     /* Eat a line break. */

2842:     if (IS_BREAK(parser->buffer)) {
2843:         if (!CACHE(parser, 2)) goto error;
2844:         SKIP_LINE(parser);
2845:     }

2847:     end_mark = parser->mark;

2849:     /* Set the indentation level if it was specified. */

2851:     if (increment) indent = parser->indent >= 0 ? parser->indent+increment : increment;

2853:     /* Scan the leading line breaks and determine the indentation level if needed. */

2855:     if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2856:                 start_mark, &end_mark)) goto error;

2858:     /* Scan the block scalar content. */

2860:     if (!CACHE(parser, 1)) goto error;

2862:     while ((int)parser->mark.column == indent && !(IS_Z(parser->buffer)))
2863:     {
2864:         /*
2865:          * We are at the beginning of a non-empty line.
2866:          */

2868:         /* Is it a trailing whitespace? */

2870:         trailing_blank = IS_BLANK(parser->buffer);

2872:         /* Check if we need to fold the leading line break. */

2874:         if (!literal && (*leading_break.start == '\n')
2875:                 && !leading_blank && !trailing_blank)
2876:         {
2877:             /* Do we need to join the lines by space? */

2879:             if (*trailing_breaks.start == '\0') {
2880:                 if (!STRING_EXTEND(parser, string)) goto error;
2881:                 *(string.pointer ++) = ' ';
2882:             }

2884:             CLEAR(parser, leading_break);
2885:         }
2886:         else {
2887:             if (!JOIN(parser, string, leading_break)) goto error;
2888:             CLEAR(parser, leading_break);
2889:         }

2891:         /* Append the remaining line breaks. */

2893:         if (!JOIN(parser, string, trailing_breaks)) goto error;
2894:         CLEAR(parser, trailing_breaks);

2896:         /* Is it a leading whitespace? */

2898:         leading_blank = IS_BLANK(parser->buffer);

2900:         /* Consume the current line. */

2902:         while (!IS_BREAKZ(parser->buffer)) {
2903:             if (!READ(parser, string)) goto error;
2904:             if (!CACHE(parser, 1)) goto error;
2905:         }

2907:         /* Consume the line break. */

2909:         if (!CACHE(parser, 2)) goto error;

2911:         if (!READ_LINE(parser, leading_break)) goto error;

2913:         /* Eat the following indentation spaces and line breaks. */

2915:         if (!yaml_parser_scan_block_scalar_breaks(parser,
2916:                     &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2917:     }

2919:     /* Chomp the tail. */

2921:     if (chomping != -1) {
2922:         if (!JOIN(parser, string, leading_break)) goto error;
2923:     }
2924:     if (chomping == 1) {
2925:         if (!JOIN(parser, string, trailing_breaks)) goto error;
2926:     }

2928:     /* Create a token. */

2930:     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2931:             literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
2932:             start_mark, end_mark);

2934:     STRING_DEL(parser, leading_break);
2935:     STRING_DEL(parser, trailing_breaks);

2937:     return 1;

2939: error:
2940:     STRING_DEL(parser, string);
2941:     STRING_DEL(parser, leading_break);
2942:     STRING_DEL(parser, trailing_breaks);

2944:     return 0;
2945: }

2947: /*
2948:  * Scan indentation spaces and line breaks for a block scalar.  Determine the
2949:  * indentation level if needed.
2950:  */

2952: static int
2953: yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
2954:         int *indent, yaml_string_t *breaks,
2955:         yaml_mark_t start_mark, yaml_mark_t *end_mark)
2956: {
2957:     int max_indent = 0;

2959:     *end_mark = parser->mark;

2961:     /* Eat the indentation spaces and line breaks. */

2963:     while (1)
2964:     {
2965:         /* Eat the indentation spaces. */

2967:         if (!CACHE(parser, 1)) return 0;

2969:         while ((!*indent || (int)parser->mark.column < *indent)
2970:                 && IS_SPACE(parser->buffer)) {
2971:             SKIP(parser);
2972:             if (!CACHE(parser, 1)) return 0;
2973:         }

2975:         if ((int)parser->mark.column > max_indent)
2976:             max_indent = (int)parser->mark.column;

2978:         /* Check for a tab character messing the indentation. */

2980:         if ((!*indent || (int)parser->mark.column < *indent)
2981:                 && IS_TAB(parser->buffer)) {
2982:             return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2983:                     start_mark, "found a tab character where an indentation space is expected");
2984:         }

2986:         /* Have we found a non-empty line? */

2988:         if (!IS_BREAK(parser->buffer)) break;

2990:         /* Consume the line break. */

2992:         if (!CACHE(parser, 2)) return 0;
2993:         if (!READ_LINE(parser, *breaks)) return 0;
2994:         *end_mark = parser->mark;
2995:     }

2997:     /* Determine the indentation level if needed. */

2999:     if (!*indent) {
3000:         *indent = max_indent;
3001:         if (*indent < parser->indent + 1)
3002:             *indent = parser->indent + 1;
3003:         if (*indent < 1)
3004:             *indent = 1;
3005:     }

3007:    return 1;
3008: }

3010: /*
3011:  * Scan a quoted scalar.
3012:  */

3014: static int
3015: yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
3016:         int single)
3017: {
3018:     yaml_mark_t start_mark;
3019:     yaml_mark_t end_mark;
3020:     yaml_string_t string = NULL_STRING;
3021:     yaml_string_t leading_break = NULL_STRING;
3022:     yaml_string_t trailing_breaks = NULL_STRING;
3023:     yaml_string_t whitespaces = NULL_STRING;
3024:     int leading_blanks;

3026:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3027:     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3028:     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3029:     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;

3031:     /* Eat the left quote. */

3033:     start_mark = parser->mark;

3035:     SKIP(parser);

3037:     /* Consume the content of the quoted scalar. */

3039:     while (1)
3040:     {
3041:         /* Check that there are no document indicators at the beginning of the line. */

3043:         if (!CACHE(parser, 4)) goto error;

3045:         if (parser->mark.column == 0 &&
3046:             ((CHECK_AT(parser->buffer, '-', 0) &&
3047:               CHECK_AT(parser->buffer, '-', 1) &&
3048:               CHECK_AT(parser->buffer, '-', 2)) ||
3049:              (CHECK_AT(parser->buffer, '.', 0) &&
3050:               CHECK_AT(parser->buffer, '.', 1) &&
3051:               CHECK_AT(parser->buffer, '.', 2))) &&
3052:             IS_BLANKZ_AT(parser->buffer, 3))
3053:         {
3054:             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3055:                     start_mark, "found unexpected document indicator");
3056:             goto error;
3057:         }

3059:         /* Check for EOF. */

3061:         if (IS_Z(parser->buffer)) {
3062:             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3063:                     start_mark, "found unexpected end of stream");
3064:             goto error;
3065:         }

3067:         /* Consume non-blank characters. */

3069:         if (!CACHE(parser, 2)) goto error;

3071:         leading_blanks = 0;

3073:         while (!IS_BLANKZ(parser->buffer))
3074:         {
3075:             /* Check for an escaped single quote. */

3077:             if (single && CHECK_AT(parser->buffer, '\'', 0)
3078:                     && CHECK_AT(parser->buffer, '\'', 1))
3079:             {
3080:                 if (!STRING_EXTEND(parser, string)) goto error;
3081:                 *(string.pointer++) = '\'';
3082:                 SKIP(parser);
3083:                 SKIP(parser);
3084:             }

3086:             /* Check for the right quote. */

3088:             else if (CHECK(parser->buffer, single ? '\'' : '"'))
3089:             {
3090:                 break;
3091:             }

3093:             /* Check for an escaped line break. */

3095:             else if (!single && CHECK(parser->buffer, '\\')
3096:                     && IS_BREAK_AT(parser->buffer, 1))
3097:             {
3098:                 if (!CACHE(parser, 3)) goto error;
3099:                 SKIP(parser);
3100:                 SKIP_LINE(parser);
3101:                 leading_blanks = 1;
3102:                 break;
3103:             }

3105:             /* Check for an escape sequence. */

3107:             else if (!single && CHECK(parser->buffer, '\\'))
3108:             {
3109:                 size_t code_length = 0;

3111:                 if (!STRING_EXTEND(parser, string)) goto error;

3113:                 /* Check the escape character. */

3115:                 switch (parser->buffer.pointer[1])
3116:                 {
3117:                     case '0':
3118:                         *(string.pointer++) = '\0';
3119:                         break;

3121:                     case 'a':
3122:                         *(string.pointer++) = '\x07';
3123:                         break;

3125:                     case 'b':
3126:                         *(string.pointer++) = '\x08';
3127:                         break;

3129:                     case 't':
3130:                     case '\t':
3131:                         *(string.pointer++) = '\x09';
3132:                         break;

3134:                     case 'n':
3135:                         *(string.pointer++) = '\x0A';
3136:                         break;

3138:                     case 'v':
3139:                         *(string.pointer++) = '\x0B';
3140:                         break;

3142:                     case 'f':
3143:                         *(string.pointer++) = '\x0C';
3144:                         break;

3146:                     case 'r':
3147:                         *(string.pointer++) = '\x0D';
3148:                         break;

3150:                     case 'e':
3151:                         *(string.pointer++) = '\x1B';
3152:                         break;

3154:                     case ' ':
3155:                         *(string.pointer++) = '\x20';
3156:                         break;

3158:                     case '"':
3159:                         *(string.pointer++) = '"';
3160:                         break;

3162:                     case '/':
3163:                         *(string.pointer++) = '/';
3164:                         break;

3166:                     case '\\':
3167:                         *(string.pointer++) = '\\';
3168:                         break;

3170:                     case 'N':   /* NEL (#x85) */
3171:                         *(string.pointer++) = (unsigned char)'\xC2';
3172:                         *(string.pointer++) = (unsigned char)'\x85';
3173:                         break;

3175:                     case '_':   /* #xA0 */
3176:                         *(string.pointer++) = (unsigned char)'\xC2';
3177:                         *(string.pointer++) = (unsigned char) '\xA0';
3178:                         break;

3180:                     case 'L':   /* LS (#x2028) */
3181:                         *(string.pointer++) = (unsigned char)'\xE2';
3182:                         *(string.pointer++) = (unsigned char)'\x80';
3183:                         *(string.pointer++) = (unsigned char)'\xA8';
3184:                         break;

3186:                     case 'P':   /* PS (#x2029) */
3187:                         *(string.pointer++) = (unsigned char)'\xE2';
3188:                         *(string.pointer++) = (unsigned char)'\x80';
3189:                         *(string.pointer++) = (unsigned char)'\xA9';
3190:                         break;

3192:                     case 'x':
3193:                         code_length = 2;
3194:                         break;

3196:                     case 'u':
3197:                         code_length = 4;
3198:                         break;

3200:                     case 'U':
3201:                         code_length = 8;
3202:                         break;

3204:                     default:
3205:                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3206:                                 start_mark, "found unknown escape character");
3207:                         goto error;
3208:                 }

3210:                 SKIP(parser);
3211:                 SKIP(parser);

3213:                 /* Consume an arbitrary escape code. */

3215:                 if (code_length)
3216:                 {
3217:                     unsigned int value = 0;
3218:                     size_t k;

3220:                     /* Scan the character value. */

3222:                     if (!CACHE(parser, code_length)) goto error;

3224:                     for (k = 0; k < code_length; k ++) {
3225:                         if (!IS_HEX_AT(parser->buffer, k)) {
3226:                             yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3227:                                     start_mark, "did not find expected hexadecimal number");
3228:                             goto error;
3229:                         }
3230:                         value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3231:                     }

3233:                     /* Check the value and write the character. */

3235:                     if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3236:                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3237:                                 start_mark, "found invalid Unicode character escape code");
3238:                         goto error;
3239:                     }

3241:                     if (value <= 0x7F) *(string.pointer++) = value;
3242:                     else if (value <= 0x7FF) {
3243:                         *(string.pointer++) = 0xC0 + (value >> 6);
3244:                         *(string.pointer++) = 0x80 + (value & 0x3F);
3245:                     }
3246:                     else if (value <= 0xFFFF) {
3247:                         *(string.pointer++) = 0xE0 + (value >> 12);
3248:                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3249:                         *(string.pointer++) = 0x80 + (value & 0x3F);
3250:                     }
3251:                     else {
3252:                         *(string.pointer++) = 0xF0 + (value >> 18);
3253:                         *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3254:                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3255:                         *(string.pointer++) = 0x80 + (value & 0x3F);
3256:                     }

3258:                     /* Advance the pointer. */

3260:                     for (k = 0; k < code_length; k ++) SKIP(parser);
3261:                 }
3262:             }

3264:             else
3265:             {
3266:                 /* It is a non-escaped non-blank character. */

3268:                 if (!READ(parser, string)) goto error;
3269:             }

3271:             if (!CACHE(parser, 2)) goto error;
3272:         }

3274:         /* Check if we are at the end of the scalar. */

3276:         /* Fix for crash uninitialized value crash
3277:          * Credit for the bug and input is to OSS Fuzz
3278:          * Credit for the fix to Alex Gaynor
3279:          */
3280:         if (!CACHE(parser, 1)) goto error;
3281:         if (CHECK(parser->buffer, single ? '\'' : '"'))
3282:             break;

3284:         /* Consume blank characters. */

3286:         if (!CACHE(parser, 1)) goto error;

3288:         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3289:         {
3290:             if (IS_BLANK(parser->buffer))
3291:             {
3292:                 /* Consume a space or a tab character. */

3294:                 if (!leading_blanks) {
3295:                     if (!READ(parser, whitespaces)) goto error;
3296:                 }
3297:                 else {
3298:                     SKIP(parser);
3299:                 }
3300:             }
3301:             else
3302:             {
3303:                 if (!CACHE(parser, 2)) goto error;

3305:                 /* Check if it is a first line break. */

3307:                 if (!leading_blanks)
3308:                 {
3309:                     CLEAR(parser, whitespaces);
3310:                     if (!READ_LINE(parser, leading_break)) goto error;
3311:                     leading_blanks = 1;
3312:                 }
3313:                 else
3314:                 {
3315:                     if (!READ_LINE(parser, trailing_breaks)) goto error;
3316:                 }
3317:             }
3318:             if (!CACHE(parser, 1)) goto error;
3319:         }

3321:         /* Join the whitespaces or fold line breaks. */

3323:         if (leading_blanks)
3324:         {
3325:             /* Do we need to fold line breaks? */

3327:             if (leading_break.start[0] == '\n') {
3328:                 if (trailing_breaks.start[0] == '\0') {
3329:                     if (!STRING_EXTEND(parser, string)) goto error;
3330:                     *(string.pointer++) = ' ';
3331:                 }
3332:                 else {
3333:                     if (!JOIN(parser, string, trailing_breaks)) goto error;
3334:                     CLEAR(parser, trailing_breaks);
3335:                 }
3336:                 CLEAR(parser, leading_break);
3337:             }
3338:             else {
3339:                 if (!JOIN(parser, string, leading_break)) goto error;
3340:                 if (!JOIN(parser, string, trailing_breaks)) goto error;
3341:                 CLEAR(parser, leading_break);
3342:                 CLEAR(parser, trailing_breaks);
3343:             }
3344:         }
3345:         else
3346:         {
3347:             if (!JOIN(parser, string, whitespaces)) goto error;
3348:             CLEAR(parser, whitespaces);
3349:         }
3350:     }

3352:     /* Eat the right quote. */

3354:     SKIP(parser);

3356:     end_mark = parser->mark;

3358:     /* Create a token. */

3360:     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3361:             single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE,
3362:             start_mark, end_mark);

3364:     STRING_DEL(parser, leading_break);
3365:     STRING_DEL(parser, trailing_breaks);
3366:     STRING_DEL(parser, whitespaces);

3368:     return 1;

3370: error:
3371:     STRING_DEL(parser, string);
3372:     STRING_DEL(parser, leading_break);
3373:     STRING_DEL(parser, trailing_breaks);
3374:     STRING_DEL(parser, whitespaces);

3376:     return 0;
3377: }

3379: /*
3380:  * Scan a plain scalar.
3381:  */

3383: static int
3384: yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
3385: {
3386:     yaml_mark_t start_mark;
3387:     yaml_mark_t end_mark;
3388:     yaml_string_t string = NULL_STRING;
3389:     yaml_string_t leading_break = NULL_STRING;
3390:     yaml_string_t trailing_breaks = NULL_STRING;
3391:     yaml_string_t whitespaces = NULL_STRING;
3392:     int leading_blanks = 0;
3393:     int indent = parser->indent+1;

3395:     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3396:     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3397:     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3398:     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;

3400:     start_mark = end_mark = parser->mark;

3402:     /* Consume the content of the plain scalar. */

3404:     while (1)
3405:     {
3406:         /* Check for a document indicator. */

3408:         if (!CACHE(parser, 4)) goto error;

3410:         if (parser->mark.column == 0 &&
3411:             ((CHECK_AT(parser->buffer, '-', 0) &&
3412:               CHECK_AT(parser->buffer, '-', 1) &&
3413:               CHECK_AT(parser->buffer, '-', 2)) ||
3414:              (CHECK_AT(parser->buffer, '.', 0) &&
3415:               CHECK_AT(parser->buffer, '.', 1) &&
3416:               CHECK_AT(parser->buffer, '.', 2))) &&
3417:             IS_BLANKZ_AT(parser->buffer, 3)) break;

3419:         /* Check for a comment. */

3421:         if (CHECK(parser->buffer, '#'))
3422:             break;

3424:         /* Consume non-blank characters. */

3426:         while (!IS_BLANKZ(parser->buffer))
3427:         {
3428:             /* Check for "x:" + one of ',?[]{}' in the flow context. TODO: Fix the test "spec-08-13".
3429:              * This is not completely according to the spec
3430:              * See http://yaml.org/spec/1.1/#id907281 9.1.3. Plain
3431:              */

3433:             if (parser->flow_level
3434:                     && CHECK(parser->buffer, ':')
3435:                     && (
3436:                         CHECK_AT(parser->buffer, ',', 1)
3437:                         || CHECK_AT(parser->buffer, '?', 1)
3438:                         || CHECK_AT(parser->buffer, '[', 1)
3439:                         || CHECK_AT(parser->buffer, ']', 1)
3440:                         || CHECK_AT(parser->buffer, '{', 1)
3441:                         || CHECK_AT(parser->buffer, '}', 1)
3442:                     )
3443:                     ) {
3444:                 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3445:                         start_mark, "found unexpected ':'");
3446:                 goto error;
3447:             }

3449:             /* Check for indicators that may end a plain scalar. */

3451:             if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3452:                     || (parser->flow_level &&
3453:                         (CHECK(parser->buffer, ',')
3454:                          || CHECK(parser->buffer, '[')
3455:                          || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3456:                          || CHECK(parser->buffer, '}'))))
3457:                 break;

3459:             /* Check if we need to join whitespaces and breaks. */

3461:             if (leading_blanks || whitespaces.start != whitespaces.pointer)
3462:             {
3463:                 if (leading_blanks)
3464:                 {
3465:                     /* Do we need to fold line breaks? */

3467:                     if (leading_break.start[0] == '\n') {
3468:                         if (trailing_breaks.start[0] == '\0') {
3469:                             if (!STRING_EXTEND(parser, string)) goto error;
3470:                             *(string.pointer++) = ' ';
3471:                         }
3472:                         else {
3473:                             if (!JOIN(parser, string, trailing_breaks)) goto error;
3474:                             CLEAR(parser, trailing_breaks);
3475:                         }
3476:                         CLEAR(parser, leading_break);
3477:                     }
3478:                     else {
3479:                         if (!JOIN(parser, string, leading_break)) goto error;
3480:                         if (!JOIN(parser, string, trailing_breaks)) goto error;
3481:                         CLEAR(parser, leading_break);
3482:                         CLEAR(parser, trailing_breaks);
3483:                     }

3485:                     leading_blanks = 0;
3486:                 }
3487:                 else
3488:                 {
3489:                     if (!JOIN(parser, string, whitespaces)) goto error;
3490:                     CLEAR(parser, whitespaces);
3491:                 }
3492:             }

3494:             /* Copy the character. */

3496:             if (!READ(parser, string)) goto error;

3498:             end_mark = parser->mark;

3500:             if (!CACHE(parser, 2)) goto error;
3501:         }

3503:         /* Is it the end? */

3505:         if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3506:             break;

3508:         /* Consume blank characters. */

3510:         if (!CACHE(parser, 1)) goto error;

3512:         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3513:         {
3514:             if (IS_BLANK(parser->buffer))
3515:             {
3516:                 /* Check for tab characters that abuse indentation. */

3518:                 if (leading_blanks && (int)parser->mark.column < indent
3519:                         && IS_TAB(parser->buffer)) {
3520:                     yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3521:                             start_mark, "found a tab character that violates indentation");
3522:                     goto error;
3523:                 }

3525:                 /* Consume a space or a tab character. */

3527:                 if (!leading_blanks) {
3528:                     if (!READ(parser, whitespaces)) goto error;
3529:                 }
3530:                 else {
3531:                     SKIP(parser);
3532:                 }
3533:             }
3534:             else
3535:             {
3536:                 if (!CACHE(parser, 2)) goto error;

3538:                 /* Check if it is a first line break. */

3540:                 if (!leading_blanks)
3541:                 {
3542:                     CLEAR(parser, whitespaces);
3543:                     if (!READ_LINE(parser, leading_break)) goto error;
3544:                     leading_blanks = 1;
3545:                 }
3546:                 else
3547:                 {
3548:                     if (!READ_LINE(parser, trailing_breaks)) goto error;
3549:                 }
3550:             }
3551:             if (!CACHE(parser, 1)) goto error;
3552:         }

3554:         /* Check indentation level. */

3556:         if (!parser->flow_level && (int)parser->mark.column < indent)
3557:             break;
3558:     }

3560:     /* Create a token. */

3562:     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3563:             YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);

3565:     /* Note that we change the 'simple_key_allowed' flag. */

3567:     if (leading_blanks) parser->simple_key_allowed = 1;

3569:     STRING_DEL(parser, leading_break);
3570:     STRING_DEL(parser, trailing_breaks);
3571:     STRING_DEL(parser, whitespaces);

3573:     return 1;

3575: error:
3576:     STRING_DEL(parser, string);
3577:     STRING_DEL(parser, leading_break);
3578:     STRING_DEL(parser, trailing_breaks);
3579:     STRING_DEL(parser, whitespaces);

3581:     return 0;
3582: }