CARLA
pugixml.cpp
Go to the documentation of this file.
1 /**
2  * pugixml parser - version 1.9
3  * --------------------------------------------------------
4  * Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5  * Report bugs and download new versions at http://pugixml.org/
6  *
7  * This library is distributed under the MIT License. See notice at the end
8  * of this file.
9  *
10  * This work is based on the pugxml parser, which is:
11  * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12  */
13 
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
16 
17 #include "pugixml.hpp"
18 
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <limits.h>
24 
25 #ifdef PUGIXML_WCHAR_MODE
26 # include <wchar.h>
27 #endif
28 
29 #ifndef PUGIXML_NO_XPATH
30 # include <math.h>
31 # include <float.h>
32 #endif
33 
34 #ifndef PUGIXML_NO_STL
35 # include <istream>
36 # include <ostream>
37 # include <string>
38 #endif
39 
40 // For placement new
41 #include <new>
42 
43 #if defined(__clang__)
44 # pragma clang diagnostic push
45 # pragma clang diagnostic ignored "-Wconversion"
46 # pragma clang diagnostic ignored "-Wdouble-promotion"
47 #endif
48 
49 #ifdef _MSC_VER
50 # pragma warning(push)
51 # pragma warning(disable: 4127) // conditional expression is constant
52 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
53 # pragma warning(disable: 4702) // unreachable code
54 # pragma warning(disable: 4996) // this function or variable may be unsafe
55 #endif
56 
57 #if defined(_MSC_VER) && defined(__c2__)
58 # pragma clang diagnostic push
59 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
60 #endif
61 
62 #ifdef __INTEL_COMPILER
63 # pragma warning(disable: 177) // function was declared but never referenced
64 # pragma warning(disable: 279) // controlling expression is constant
65 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
66 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
67 #endif
68 
69 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
70 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
71 #endif
72 
73 #ifdef __BORLANDC__
74 # pragma option push
75 # pragma warn -8008 // condition is always false
76 # pragma warn -8066 // unreachable code
77 #endif
78 
79 #ifdef __SNC__
80 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
81 # pragma diag_suppress=178 // function was declared but never referenced
82 # pragma diag_suppress=237 // controlling expression is constant
83 #endif
84 
85 #ifdef __TI_COMPILER_VERSION__
86 # pragma diag_suppress 179 // function was declared but never referenced
87 #endif
88 
89 // Inlining controls
90 #if defined(_MSC_VER) && _MSC_VER >= 1300
91 # define PUGI__NO_INLINE __declspec(noinline)
92 #elif defined(__GNUC__)
93 # define PUGI__NO_INLINE __attribute__((noinline))
94 #else
95 # define PUGI__NO_INLINE
96 #endif
97 
98 // Branch weight controls
99 #if defined(__GNUC__) && !defined(__c2__)
100 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
101 #else
102 # define PUGI__UNLIKELY(cond) (cond)
103 #endif
104 
105 // Simple static assertion
106 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
107 
108 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
109 #ifdef __DMC__
110 # define PUGI__DMC_VOLATILE volatile
111 #else
112 # define PUGI__DMC_VOLATILE
113 #endif
114 
115 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
116 #if defined(__clang__) && defined(__has_attribute)
117 # if __has_attribute(no_sanitize)
118 # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
119 # else
120 # define PUGI__UNSIGNED_OVERFLOW
121 # endif
122 #else
123 # define PUGI__UNSIGNED_OVERFLOW
124 #endif
125 
126 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
127 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
128 using std::memcpy;
129 using std::memmove;
130 using std::memset;
131 #endif
132 
133 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
134 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
135 # define LLONG_MIN (-LLONG_MAX - 1LL)
136 # define LLONG_MAX __LONG_LONG_MAX__
137 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
138 #endif
139 
140 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
141 #if defined(_MSC_VER) && !defined(__S3E__)
142 # define PUGI__MSVC_CRT_VERSION _MSC_VER
143 #endif
144 
145 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
146 #if __cplusplus >= 201103
147 # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
148 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
149 # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
150 #else
151 # define PUGI__SNPRINTF sprintf
152 #endif
153 
154 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
155 #ifdef PUGIXML_HEADER_ONLY
156 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
157 # define PUGI__NS_END } }
158 # define PUGI__FN inline
159 # define PUGI__FN_NO_INLINE inline
160 #else
161 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
162 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
163 # define PUGI__NS_END } }
164 # else
165 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
166 # define PUGI__NS_END } } }
167 # endif
168 # define PUGI__FN
169 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
170 #endif
171 
172 // uintptr_t
173 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
174 namespace pugi
175 {
176 # ifndef _UINTPTR_T_DEFINED
177  typedef size_t uintptr_t;
178 # endif
179 
180  typedef unsigned __int8 uint8_t;
181  typedef unsigned __int16 uint16_t;
182  typedef unsigned __int32 uint32_t;
183 }
184 #else
185 # include <stdint.h>
186 #endif
187 
188 // Memory allocation
190  PUGI__FN void* default_allocate(size_t size)
191  {
192  return malloc(size);
193  }
194 
195  PUGI__FN void default_deallocate(void* ptr)
196  {
197  free(ptr);
198  }
199 
200  template <typename T>
202  {
205  };
206 
207  // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
208  // Without a template<> we'll get multiple definitions of the same static
211 
214 
215 // String utilities
217  // Get string length
218  PUGI__FN size_t strlength(const char_t* s)
219  {
220  assert(s);
221 
222  #ifdef PUGIXML_WCHAR_MODE
223  return wcslen(s);
224  #else
225  return strlen(s);
226  #endif
227  }
228 
229  // Compare two strings
230  PUGI__FN bool strequal(const char_t* src, const char_t* dst)
231  {
232  assert(src && dst);
233 
234  #ifdef PUGIXML_WCHAR_MODE
235  return wcscmp(src, dst) == 0;
236  #else
237  return strcmp(src, dst) == 0;
238  #endif
239  }
240 
241  // Compare lhs with [rhs_begin, rhs_end)
242  PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
243  {
244  for (size_t i = 0; i < count; ++i)
245  if (lhs[i] != rhs[i])
246  return false;
247 
248  return lhs[count] == 0;
249  }
250 
251  // Get length of wide string, even if CRT lacks wide character support
252  PUGI__FN size_t strlength_wide(const wchar_t* s)
253  {
254  assert(s);
255 
256  #ifdef PUGIXML_WCHAR_MODE
257  return wcslen(s);
258  #else
259  const wchar_t* end = s;
260  while (*end) end++;
261  return static_cast<size_t>(end - s);
262  #endif
263  }
265 
266 // auto_ptr-like object for exception recovery
268  template <typename T> struct auto_deleter
269  {
270  typedef void (*D)(T*);
271 
272  T* data;
274 
275  auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
276  {
277  }
278 
280  {
281  if (data) deleter(data);
282  }
283 
284  T* release()
285  {
286  T* result = data;
287  data = 0;
288  return result;
289  }
290  };
292 
293 #ifdef PUGIXML_COMPACT
295  class compact_hash_table
296  {
297  public:
298  compact_hash_table(): _items(0), _capacity(0), _count(0)
299  {
300  }
301 
302  void clear()
303  {
304  if (_items)
305  {
306  xml_memory::deallocate(_items);
307  _items = 0;
308  _capacity = 0;
309  _count = 0;
310  }
311  }
312 
313  void* find(const void* key)
314  {
315  if (_capacity == 0) return 0;
316 
317  item_t* item = get_item(key);
318  assert(item);
319  assert(item->key == key || (item->key == 0 && item->value == 0));
320 
321  return item->value;
322  }
323 
324  void insert(const void* key, void* value)
325  {
326  assert(_capacity != 0 && _count < _capacity - _capacity / 4);
327 
328  item_t* item = get_item(key);
329  assert(item);
330 
331  if (item->key == 0)
332  {
333  _count++;
334  item->key = key;
335  }
336 
337  item->value = value;
338  }
339 
340  bool reserve(size_t extra = 16)
341  {
342  if (_count + extra >= _capacity - _capacity / 4)
343  return rehash(_count + extra);
344 
345  return true;
346  }
347 
348  private:
349  struct item_t
350  {
351  const void* key;
352  void* value;
353  };
354 
355  item_t* _items;
356  size_t _capacity;
357 
358  size_t _count;
359 
360  bool rehash(size_t count);
361 
362  item_t* get_item(const void* key)
363  {
364  assert(key);
365  assert(_capacity > 0);
366 
367  size_t hashmod = _capacity - 1;
368  size_t bucket = hash(key) & hashmod;
369 
370  for (size_t probe = 0; probe <= hashmod; ++probe)
371  {
372  item_t& probe_item = _items[bucket];
373 
374  if (probe_item.key == key || probe_item.key == 0)
375  return &probe_item;
376 
377  // hash collision, quadratic probing
378  bucket = (bucket + probe + 1) & hashmod;
379  }
380 
381  assert(false && "Hash table is full"); // unreachable
382  return 0;
383  }
384 
385  static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
386  {
387  unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
388 
389  // MurmurHash3 32-bit finalizer
390  h ^= h >> 16;
391  h *= 0x85ebca6bu;
392  h ^= h >> 13;
393  h *= 0xc2b2ae35u;
394  h ^= h >> 16;
395 
396  return h;
397  }
398  };
399 
400  PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
401  {
402  size_t capacity = 32;
403  while (count >= capacity - capacity / 4)
404  capacity *= 2;
405 
406  compact_hash_table rt;
407  rt._capacity = capacity;
408  rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
409 
410  if (!rt._items)
411  return false;
412 
413  memset(rt._items, 0, sizeof(item_t) * capacity);
414 
415  for (size_t i = 0; i < _capacity; ++i)
416  if (_items[i].key)
417  rt.insert(_items[i].key, _items[i].value);
418 
419  if (_items)
420  xml_memory::deallocate(_items);
421 
422  _capacity = capacity;
423  _items = rt._items;
424 
425  assert(_count == rt._count);
426 
427  return true;
428  }
429 
431 #endif
432 
434 #ifdef PUGIXML_COMPACT
435  static const uintptr_t xml_memory_block_alignment = 4;
436 #else
437  static const uintptr_t xml_memory_block_alignment = sizeof(void*);
438 #endif
439 
440  // extra metadata bits
441  static const uintptr_t xml_memory_page_contents_shared_mask = 64;
442  static const uintptr_t xml_memory_page_name_allocated_mask = 32;
443  static const uintptr_t xml_memory_page_value_allocated_mask = 16;
444  static const uintptr_t xml_memory_page_type_mask = 15;
445 
446  // combined masks for string uniqueness
449 
450 #ifdef PUGIXML_COMPACT
451  #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
452  #define PUGI__GETPAGE_IMPL(header) (header).get_page()
453 #else
454  #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
455  // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
456  #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
457 #endif
458 
459  #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
460  #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
461 
462  struct xml_allocator;
463 
465  {
466  static xml_memory_page* construct(void* memory)
467  {
468  xml_memory_page* result = static_cast<xml_memory_page*>(memory);
469 
470  result->allocator = 0;
471  result->prev = 0;
472  result->next = 0;
473  result->busy_size = 0;
474  result->freed_size = 0;
475 
476  #ifdef PUGIXML_COMPACT
477  result->compact_string_base = 0;
478  result->compact_shared_parent = 0;
479  result->compact_page_marker = 0;
480  #endif
481 
482  return result;
483  }
484 
486 
489 
490  size_t busy_size;
491  size_t freed_size;
492 
493  #ifdef PUGIXML_COMPACT
494  char_t* compact_string_base;
495  void* compact_shared_parent;
496  uint32_t* compact_page_marker;
497  #endif
498  };
499 
500  static const size_t xml_memory_page_size =
501  #ifdef PUGIXML_MEMORY_PAGE_SIZE
502  (PUGIXML_MEMORY_PAGE_SIZE)
503  #else
504  32768
505  #endif
506  - sizeof(xml_memory_page);
507 
509  {
510  uint16_t page_offset; // offset from page->data
511  uint16_t full_size; // 0 if string occupies whole page
512  };
513 
515  {
516  xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
517  {
518  #ifdef PUGIXML_COMPACT
519  _hash = 0;
520  #endif
521  }
522 
523  xml_memory_page* allocate_page(size_t data_size)
524  {
525  size_t size = sizeof(xml_memory_page) + data_size;
526 
527  // allocate block with some alignment, leaving memory for worst-case padding
528  void* memory = xml_memory::allocate(size);
529  if (!memory) return 0;
530 
531  // prepare page structure
533  assert(page);
534 
535  page->allocator = _root->allocator;
536 
537  return page;
538  }
539 
540  static void deallocate_page(xml_memory_page* page)
541  {
543  }
544 
545  void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
546 
547  void* allocate_memory(size_t size, xml_memory_page*& out_page)
548  {
549  if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
550  return allocate_memory_oob(size, out_page);
551 
552  void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
553 
554  _busy_size += size;
555 
556  out_page = _root;
557 
558  return buf;
559  }
560 
561  #ifdef PUGIXML_COMPACT
562  void* allocate_object(size_t size, xml_memory_page*& out_page)
563  {
564  void* result = allocate_memory(size + sizeof(uint32_t), out_page);
565  if (!result) return 0;
566 
567  // adjust for marker
568  ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
569 
570  if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
571  {
572  // insert new marker
573  uint32_t* marker = static_cast<uint32_t*>(result);
574 
575  *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
576  out_page->compact_page_marker = marker;
577 
578  // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
579  // this will make sure deallocate_memory correctly tracks the size
580  out_page->freed_size += sizeof(uint32_t);
581 
582  return marker + 1;
583  }
584  else
585  {
586  // roll back uint32_t part
587  _busy_size -= sizeof(uint32_t);
588 
589  return result;
590  }
591  }
592  #else
593  void* allocate_object(size_t size, xml_memory_page*& out_page)
594  {
595  return allocate_memory(size, out_page);
596  }
597  #endif
598 
599  void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
600  {
601  if (page == _root) page->busy_size = _busy_size;
602 
603  assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
604  (void)!ptr;
605 
606  page->freed_size += size;
607  assert(page->freed_size <= page->busy_size);
608 
609  if (page->freed_size == page->busy_size)
610  {
611  if (page->next == 0)
612  {
613  assert(_root == page);
614 
615  // top page freed, just reset sizes
616  page->busy_size = 0;
617  page->freed_size = 0;
618 
619  #ifdef PUGIXML_COMPACT
620  // reset compact state to maximize efficiency
621  page->compact_string_base = 0;
622  page->compact_shared_parent = 0;
623  page->compact_page_marker = 0;
624  #endif
625 
626  _busy_size = 0;
627  }
628  else
629  {
630  assert(_root != page);
631  assert(page->prev);
632 
633  // remove from the list
634  page->prev->next = page->next;
635  page->next->prev = page->prev;
636 
637  // deallocate
638  deallocate_page(page);
639  }
640  }
641  }
642 
643  char_t* allocate_string(size_t length)
644  {
645  static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
646 
647  PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
648 
649  // allocate memory for string and header block
650  size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
651 
652  // round size up to block alignment boundary
653  size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
654 
655  xml_memory_page* page;
656  xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
657 
658  if (!header) return 0;
659 
660  // setup header
661  ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
662 
663  assert(page_offset % xml_memory_block_alignment == 0);
664  assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
665  header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
666 
667  // full_size == 0 for large strings that occupy the whole page
668  assert(full_size % xml_memory_block_alignment == 0);
669  assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
670  header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
671 
672  // round-trip through void* to avoid 'cast increases required alignment of target type' warning
673  // header is guaranteed a pointer-sized alignment, which should be enough for char_t
674  return static_cast<char_t*>(static_cast<void*>(header + 1));
675  }
676 
677  void deallocate_string(char_t* string)
678  {
679  // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
680  // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
681 
682  // get header
683  xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
684  assert(header);
685 
686  // deallocate
687  size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
688  xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
689 
690  // if full_size == 0 then this string occupies the whole page
691  size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
692 
693  deallocate_memory(header, full_size, page);
694  }
695 
696  bool reserve()
697  {
698  #ifdef PUGIXML_COMPACT
699  return _hash->reserve();
700  #else
701  return true;
702  #endif
703  }
704 
706  size_t _busy_size;
707 
708  #ifdef PUGIXML_COMPACT
709  compact_hash_table* _hash;
710  #endif
711  };
712 
714  {
715  const size_t large_allocation_threshold = xml_memory_page_size / 4;
716 
717  xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
718  out_page = page;
719 
720  if (!page) return 0;
721 
722  if (size <= large_allocation_threshold)
723  {
724  _root->busy_size = _busy_size;
725 
726  // insert page at the end of linked list
727  page->prev = _root;
728  _root->next = page;
729  _root = page;
730 
731  _busy_size = size;
732  }
733  else
734  {
735  // insert page before the end of linked list, so that it is deleted as soon as possible
736  // the last page is not deleted even if it's empty (see deallocate_memory)
737  assert(_root->prev);
738 
739  page->prev = _root->prev;
740  page->next = _root;
741 
742  _root->prev->next = page;
743  _root->prev = page;
744 
745  page->busy_size = size;
746  }
747 
748  return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
749  }
751 
752 #ifdef PUGIXML_COMPACT
754  static const uintptr_t compact_alignment_log2 = 2;
755  static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
756 
757  class compact_header
758  {
759  public:
760  compact_header(xml_memory_page* page, unsigned int flags)
761  {
762  PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
763 
764  ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
765  assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
766 
767  _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
768  _flags = static_cast<unsigned char>(flags);
769  }
770 
771  void operator&=(uintptr_t mod)
772  {
773  _flags &= static_cast<unsigned char>(mod);
774  }
775 
776  void operator|=(uintptr_t mod)
777  {
778  _flags |= static_cast<unsigned char>(mod);
779  }
780 
781  uintptr_t operator&(uintptr_t mod) const
782  {
783  return _flags & mod;
784  }
785 
786  xml_memory_page* get_page() const
787  {
788  // round-trip through void* to silence 'cast increases required alignment of target type' warnings
789  const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
790  const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
791 
792  return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
793  }
794 
795  private:
796  unsigned char _page;
797  unsigned char _flags;
798  };
799 
800  PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
801  {
802  const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
803 
804  return header->get_page();
805  }
806 
807  template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
808  {
809  return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
810  }
811 
812  template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
813  {
814  compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
815  }
816 
817  template <typename T, int header_offset, int start = -126> class compact_pointer
818  {
819  public:
820  compact_pointer(): _data(0)
821  {
822  }
823 
824  void operator=(const compact_pointer& rhs)
825  {
826  *this = rhs + 0;
827  }
828 
829  void operator=(T* value)
830  {
831  if (value)
832  {
833  // value is guaranteed to be compact-aligned; 'this' is not
834  // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
835  // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
836  // compensate for arithmetic shift rounding for negative values
837  ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
838  ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
839 
840  if (static_cast<uintptr_t>(offset) <= 253)
841  _data = static_cast<unsigned char>(offset + 1);
842  else
843  {
844  compact_set_value<header_offset>(this, value);
845 
846  _data = 255;
847  }
848  }
849  else
850  _data = 0;
851  }
852 
853  operator T*() const
854  {
855  if (_data)
856  {
857  if (_data < 255)
858  {
859  uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
860 
861  return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
862  }
863  else
864  return compact_get_value<header_offset, T>(this);
865  }
866  else
867  return 0;
868  }
869 
870  T* operator->() const
871  {
872  return *this;
873  }
874 
875  private:
876  unsigned char _data;
877  };
878 
879  template <typename T, int header_offset> class compact_pointer_parent
880  {
881  public:
882  compact_pointer_parent(): _data(0)
883  {
884  }
885 
886  void operator=(const compact_pointer_parent& rhs)
887  {
888  *this = rhs + 0;
889  }
890 
891  void operator=(T* value)
892  {
893  if (value)
894  {
895  // value is guaranteed to be compact-aligned; 'this' is not
896  // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
897  // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
898  // compensate for arithmetic shift behavior for negative values
899  ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
900  ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
901 
902  if (static_cast<uintptr_t>(offset) <= 65533)
903  {
904  _data = static_cast<unsigned short>(offset + 1);
905  }
906  else
907  {
908  xml_memory_page* page = compact_get_page(this, header_offset);
909 
910  if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
911  page->compact_shared_parent = value;
912 
913  if (page->compact_shared_parent == value)
914  {
915  _data = 65534;
916  }
917  else
918  {
919  compact_set_value<header_offset>(this, value);
920 
921  _data = 65535;
922  }
923  }
924  }
925  else
926  {
927  _data = 0;
928  }
929  }
930 
931  operator T*() const
932  {
933  if (_data)
934  {
935  if (_data < 65534)
936  {
937  uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
938 
939  return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
940  }
941  else if (_data == 65534)
942  return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
943  else
944  return compact_get_value<header_offset, T>(this);
945  }
946  else
947  return 0;
948  }
949 
950  T* operator->() const
951  {
952  return *this;
953  }
954 
955  private:
956  uint16_t _data;
957  };
958 
959  template <int header_offset, int base_offset> class compact_string
960  {
961  public:
962  compact_string(): _data(0)
963  {
964  }
965 
966  void operator=(const compact_string& rhs)
967  {
968  *this = rhs + 0;
969  }
970 
971  void operator=(char_t* value)
972  {
973  if (value)
974  {
975  xml_memory_page* page = compact_get_page(this, header_offset);
976 
977  if (PUGI__UNLIKELY(page->compact_string_base == 0))
978  page->compact_string_base = value;
979 
980  ptrdiff_t offset = value - page->compact_string_base;
981 
982  if (static_cast<uintptr_t>(offset) < (65535 << 7))
983  {
984  // round-trip through void* to silence 'cast increases required alignment of target type' warnings
985  uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
986 
987  if (*base == 0)
988  {
989  *base = static_cast<uint16_t>((offset >> 7) + 1);
990  _data = static_cast<unsigned char>((offset & 127) + 1);
991  }
992  else
993  {
994  ptrdiff_t remainder = offset - ((*base - 1) << 7);
995 
996  if (static_cast<uintptr_t>(remainder) <= 253)
997  {
998  _data = static_cast<unsigned char>(remainder + 1);
999  }
1000  else
1001  {
1002  compact_set_value<header_offset>(this, value);
1003 
1004  _data = 255;
1005  }
1006  }
1007  }
1008  else
1009  {
1010  compact_set_value<header_offset>(this, value);
1011 
1012  _data = 255;
1013  }
1014  }
1015  else
1016  {
1017  _data = 0;
1018  }
1019  }
1020 
1021  operator char_t*() const
1022  {
1023  if (_data)
1024  {
1025  if (_data < 255)
1026  {
1027  xml_memory_page* page = compact_get_page(this, header_offset);
1028 
1029  // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1030  const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1031  assert(*base);
1032 
1033  ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1034 
1035  return page->compact_string_base + offset;
1036  }
1037  else
1038  {
1039  return compact_get_value<header_offset, char_t>(this);
1040  }
1041  }
1042  else
1043  return 0;
1044  }
1045 
1046  private:
1047  unsigned char _data;
1048  };
1050 #endif
1051 
1052 #ifdef PUGIXML_COMPACT
1053 namespace pugi
1054 {
1055  struct xml_attribute_struct
1056  {
1057  xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1058  {
1059  PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1060  }
1061 
1062  impl::compact_header header;
1063 
1064  uint16_t namevalue_base;
1065 
1066  impl::compact_string<4, 2> name;
1067  impl::compact_string<5, 3> value;
1068 
1069  impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1070  impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1071  };
1072 
1073  struct xml_node_struct
1074  {
1075  xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1076  {
1077  PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1078  }
1079 
1080  impl::compact_header header;
1081 
1082  uint16_t namevalue_base;
1083 
1084  impl::compact_string<4, 2> name;
1085  impl::compact_string<5, 3> value;
1086 
1087  impl::compact_pointer_parent<xml_node_struct, 6> parent;
1088 
1089  impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1090 
1091  impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
1092  impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1093 
1094  impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1095  };
1096 }
1097 #else
1098 namespace pugi
1099 {
1101  {
1102  xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1103  {
1104  header = PUGI__GETHEADER_IMPL(this, page, 0);
1105  }
1106 
1107  uintptr_t header;
1108 
1111 
1114  };
1115 
1117  {
1118  xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1119  {
1120  header = PUGI__GETHEADER_IMPL(this, page, type);
1121  }
1122 
1123  uintptr_t header;
1124 
1127 
1129 
1131 
1134 
1136  };
1137 }
1138 #endif
1139 
1142  {
1145  };
1146 
1148  {
1149  xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1150  {
1151  }
1152 
1153  const char_t* buffer;
1154 
1156 
1157  #ifdef PUGIXML_COMPACT
1158  compact_hash_table hash;
1159  #endif
1160  };
1161 
1162  template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1163  {
1164  assert(object);
1165 
1166  return *PUGI__GETPAGE(object)->allocator;
1167  }
1168 
1169  template <typename Object> inline xml_document_struct& get_document(const Object* object)
1170  {
1171  assert(object);
1172 
1173  return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1174  }
1176 
1177 // Low-level DOM operations
1179  inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1180  {
1181  xml_memory_page* page;
1182  void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1183  if (!memory) return 0;
1184 
1185  return new (memory) xml_attribute_struct(page);
1186  }
1187 
1189  {
1190  xml_memory_page* page;
1191  void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1192  if (!memory) return 0;
1193 
1194  return new (memory) xml_node_struct(page, type);
1195  }
1196 
1197  inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1198  {
1200  alloc.deallocate_string(a->name);
1201 
1203  alloc.deallocate_string(a->value);
1204 
1205  alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1206  }
1207 
1209  {
1211  alloc.deallocate_string(n->name);
1212 
1214  alloc.deallocate_string(n->value);
1215 
1216  for (xml_attribute_struct* attr = n->first_attribute; attr; )
1217  {
1218  xml_attribute_struct* next = attr->next_attribute;
1219 
1220  destroy_attribute(attr, alloc);
1221 
1222  attr = next;
1223  }
1224 
1225  for (xml_node_struct* child = n->first_child; child; )
1226  {
1227  xml_node_struct* next = child->next_sibling;
1228 
1229  destroy_node(child, alloc);
1230 
1231  child = next;
1232  }
1233 
1234  alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1235  }
1236 
1237  inline void append_node(xml_node_struct* child, xml_node_struct* node)
1238  {
1239  child->parent = node;
1240 
1241  xml_node_struct* head = node->first_child;
1242 
1243  if (head)
1244  {
1245  xml_node_struct* tail = head->prev_sibling_c;
1246 
1247  tail->next_sibling = child;
1248  child->prev_sibling_c = tail;
1249  head->prev_sibling_c = child;
1250  }
1251  else
1252  {
1253  node->first_child = child;
1254  child->prev_sibling_c = child;
1255  }
1256  }
1257 
1258  inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1259  {
1260  child->parent = node;
1261 
1262  xml_node_struct* head = node->first_child;
1263 
1264  if (head)
1265  {
1266  child->prev_sibling_c = head->prev_sibling_c;
1267  head->prev_sibling_c = child;
1268  }
1269  else
1270  child->prev_sibling_c = child;
1271 
1272  child->next_sibling = head;
1273  node->first_child = child;
1274  }
1275 
1277  {
1278  xml_node_struct* parent = node->parent;
1279 
1280  child->parent = parent;
1281 
1282  if (node->next_sibling)
1283  node->next_sibling->prev_sibling_c = child;
1284  else
1285  parent->first_child->prev_sibling_c = child;
1286 
1287  child->next_sibling = node->next_sibling;
1288  child->prev_sibling_c = node;
1289 
1290  node->next_sibling = child;
1291  }
1292 
1294  {
1295  xml_node_struct* parent = node->parent;
1296 
1297  child->parent = parent;
1298 
1299  if (node->prev_sibling_c->next_sibling)
1300  node->prev_sibling_c->next_sibling = child;
1301  else
1302  parent->first_child = child;
1303 
1304  child->prev_sibling_c = node->prev_sibling_c;
1305  child->next_sibling = node;
1306 
1307  node->prev_sibling_c = child;
1308  }
1309 
1310  inline void remove_node(xml_node_struct* node)
1311  {
1312  xml_node_struct* parent = node->parent;
1313 
1314  if (node->next_sibling)
1315  node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1316  else
1317  parent->first_child->prev_sibling_c = node->prev_sibling_c;
1318 
1319  if (node->prev_sibling_c->next_sibling)
1320  node->prev_sibling_c->next_sibling = node->next_sibling;
1321  else
1322  parent->first_child = node->next_sibling;
1323 
1324  node->parent = 0;
1325  node->prev_sibling_c = 0;
1326  node->next_sibling = 0;
1327  }
1328 
1329  inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1330  {
1331  xml_attribute_struct* head = node->first_attribute;
1332 
1333  if (head)
1334  {
1335  xml_attribute_struct* tail = head->prev_attribute_c;
1336 
1337  tail->next_attribute = attr;
1338  attr->prev_attribute_c = tail;
1339  head->prev_attribute_c = attr;
1340  }
1341  else
1342  {
1343  node->first_attribute = attr;
1344  attr->prev_attribute_c = attr;
1345  }
1346  }
1347 
1348  inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1349  {
1350  xml_attribute_struct* head = node->first_attribute;
1351 
1352  if (head)
1353  {
1354  attr->prev_attribute_c = head->prev_attribute_c;
1355  head->prev_attribute_c = attr;
1356  }
1357  else
1358  attr->prev_attribute_c = attr;
1359 
1360  attr->next_attribute = head;
1361  node->first_attribute = attr;
1362  }
1363 
1364  inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1365  {
1366  if (place->next_attribute)
1367  place->next_attribute->prev_attribute_c = attr;
1368  else
1369  node->first_attribute->prev_attribute_c = attr;
1370 
1371  attr->next_attribute = place->next_attribute;
1372  attr->prev_attribute_c = place;
1373  place->next_attribute = attr;
1374  }
1375 
1376  inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1377  {
1378  if (place->prev_attribute_c->next_attribute)
1379  place->prev_attribute_c->next_attribute = attr;
1380  else
1381  node->first_attribute = attr;
1382 
1383  attr->prev_attribute_c = place->prev_attribute_c;
1384  attr->next_attribute = place;
1385  place->prev_attribute_c = attr;
1386  }
1387 
1388  inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1389  {
1390  if (attr->next_attribute)
1391  attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1392  else
1393  node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1394 
1395  if (attr->prev_attribute_c->next_attribute)
1396  attr->prev_attribute_c->next_attribute = attr->next_attribute;
1397  else
1398  node->first_attribute = attr->next_attribute;
1399 
1400  attr->prev_attribute_c = 0;
1401  attr->next_attribute = 0;
1402  }
1403 
1405  {
1406  if (!alloc.reserve()) return 0;
1407 
1408  xml_node_struct* child = allocate_node(alloc, type);
1409  if (!child) return 0;
1410 
1411  append_node(child, node);
1412 
1413  return child;
1414  }
1415 
1417  {
1418  if (!alloc.reserve()) return 0;
1419 
1420  xml_attribute_struct* attr = allocate_attribute(alloc);
1421  if (!attr) return 0;
1422 
1423  append_attribute(attr, node);
1424 
1425  return attr;
1426  }
1428 
1429 // Helper classes for code generation
1431  struct opt_false
1432  {
1433  enum { value = 0 };
1434  };
1435 
1436  struct opt_true
1437  {
1438  enum { value = 1 };
1439  };
1441 
1442 // Unicode utilities
1444  inline uint16_t endian_swap(uint16_t value)
1445  {
1446  return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1447  }
1448 
1449  inline uint32_t endian_swap(uint32_t value)
1450  {
1451  return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1452  }
1453 
1455  {
1456  typedef size_t value_type;
1457 
1458  static value_type low(value_type result, uint32_t ch)
1459  {
1460  // U+0000..U+007F
1461  if (ch < 0x80) return result + 1;
1462  // U+0080..U+07FF
1463  else if (ch < 0x800) return result + 2;
1464  // U+0800..U+FFFF
1465  else return result + 3;
1466  }
1467 
1468  static value_type high(value_type result, uint32_t)
1469  {
1470  // U+10000..U+10FFFF
1471  return result + 4;
1472  }
1473  };
1474 
1476  {
1477  typedef uint8_t* value_type;
1478 
1479  static value_type low(value_type result, uint32_t ch)
1480  {
1481  // U+0000..U+007F
1482  if (ch < 0x80)
1483  {
1484  *result = static_cast<uint8_t>(ch);
1485  return result + 1;
1486  }
1487  // U+0080..U+07FF
1488  else if (ch < 0x800)
1489  {
1490  result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1491  result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1492  return result + 2;
1493  }
1494  // U+0800..U+FFFF
1495  else
1496  {
1497  result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1498  result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1499  result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1500  return result + 3;
1501  }
1502  }
1503 
1504  static value_type high(value_type result, uint32_t ch)
1505  {
1506  // U+10000..U+10FFFF
1507  result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1508  result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1509  result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1510  result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1511  return result + 4;
1512  }
1513 
1514  static value_type any(value_type result, uint32_t ch)
1515  {
1516  return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1517  }
1518  };
1519 
1521  {
1522  typedef size_t value_type;
1523 
1524  static value_type low(value_type result, uint32_t)
1525  {
1526  return result + 1;
1527  }
1528 
1529  static value_type high(value_type result, uint32_t)
1530  {
1531  return result + 2;
1532  }
1533  };
1534 
1536  {
1537  typedef uint16_t* value_type;
1538 
1539  static value_type low(value_type result, uint32_t ch)
1540  {
1541  *result = static_cast<uint16_t>(ch);
1542 
1543  return result + 1;
1544  }
1545 
1546  static value_type high(value_type result, uint32_t ch)
1547  {
1548  uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1549  uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1550 
1551  result[0] = static_cast<uint16_t>(0xD800 + msh);
1552  result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1553 
1554  return result + 2;
1555  }
1556 
1557  static value_type any(value_type result, uint32_t ch)
1558  {
1559  return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1560  }
1561  };
1562 
1564  {
1565  typedef size_t value_type;
1566 
1567  static value_type low(value_type result, uint32_t)
1568  {
1569  return result + 1;
1570  }
1571 
1572  static value_type high(value_type result, uint32_t)
1573  {
1574  return result + 1;
1575  }
1576  };
1577 
1579  {
1580  typedef uint32_t* value_type;
1581 
1582  static value_type low(value_type result, uint32_t ch)
1583  {
1584  *result = ch;
1585 
1586  return result + 1;
1587  }
1588 
1589  static value_type high(value_type result, uint32_t ch)
1590  {
1591  *result = ch;
1592 
1593  return result + 1;
1594  }
1595 
1596  static value_type any(value_type result, uint32_t ch)
1597  {
1598  *result = ch;
1599 
1600  return result + 1;
1601  }
1602  };
1603 
1605  {
1606  typedef uint8_t* value_type;
1607 
1608  static value_type low(value_type result, uint32_t ch)
1609  {
1610  *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1611 
1612  return result + 1;
1613  }
1614 
1615  static value_type high(value_type result, uint32_t ch)
1616  {
1617  (void)ch;
1618 
1619  *result = '?';
1620 
1621  return result + 1;
1622  }
1623  };
1624 
1626  {
1627  typedef uint8_t type;
1628 
1629  template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1630  {
1631  const uint8_t utf8_byte_mask = 0x3f;
1632 
1633  while (size)
1634  {
1635  uint8_t lead = *data;
1636 
1637  // 0xxxxxxx -> U+0000..U+007F
1638  if (lead < 0x80)
1639  {
1640  result = Traits::low(result, lead);
1641  data += 1;
1642  size -= 1;
1643 
1644  // process aligned single-byte (ascii) blocks
1645  if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1646  {
1647  // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1648  while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1649  {
1650  result = Traits::low(result, data[0]);
1651  result = Traits::low(result, data[1]);
1652  result = Traits::low(result, data[2]);
1653  result = Traits::low(result, data[3]);
1654  data += 4;
1655  size -= 4;
1656  }
1657  }
1658  }
1659  // 110xxxxx -> U+0080..U+07FF
1660  else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1661  {
1662  result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1663  data += 2;
1664  size -= 2;
1665  }
1666  // 1110xxxx -> U+0800-U+FFFF
1667  else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1668  {
1669  result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1670  data += 3;
1671  size -= 3;
1672  }
1673  // 11110xxx -> U+10000..U+10FFFF
1674  else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1675  {
1676  result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1677  data += 4;
1678  size -= 4;
1679  }
1680  // 10xxxxxx or 11111xxx -> invalid
1681  else
1682  {
1683  data += 1;
1684  size -= 1;
1685  }
1686  }
1687 
1688  return result;
1689  }
1690  };
1691 
1692  template <typename opt_swap> struct utf16_decoder
1693  {
1694  typedef uint16_t type;
1695 
1696  template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1697  {
1698  while (size)
1699  {
1700  uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1701 
1702  // U+0000..U+D7FF
1703  if (lead < 0xD800)
1704  {
1705  result = Traits::low(result, lead);
1706  data += 1;
1707  size -= 1;
1708  }
1709  // U+E000..U+FFFF
1710  else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1711  {
1712  result = Traits::low(result, lead);
1713  data += 1;
1714  size -= 1;
1715  }
1716  // surrogate pair lead
1717  else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1718  {
1719  uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1720 
1721  if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1722  {
1723  result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1724  data += 2;
1725  size -= 2;
1726  }
1727  else
1728  {
1729  data += 1;
1730  size -= 1;
1731  }
1732  }
1733  else
1734  {
1735  data += 1;
1736  size -= 1;
1737  }
1738  }
1739 
1740  return result;
1741  }
1742  };
1743 
1744  template <typename opt_swap> struct utf32_decoder
1745  {
1746  typedef uint32_t type;
1747 
1748  template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1749  {
1750  while (size)
1751  {
1752  uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1753 
1754  // U+0000..U+FFFF
1755  if (lead < 0x10000)
1756  {
1757  result = Traits::low(result, lead);
1758  data += 1;
1759  size -= 1;
1760  }
1761  // U+10000..U+10FFFF
1762  else
1763  {
1764  result = Traits::high(result, lead);
1765  data += 1;
1766  size -= 1;
1767  }
1768  }
1769 
1770  return result;
1771  }
1772  };
1773 
1775  {
1776  typedef uint8_t type;
1777 
1778  template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1779  {
1780  while (size)
1781  {
1782  result = Traits::low(result, *data);
1783  data += 1;
1784  size -= 1;
1785  }
1786 
1787  return result;
1788  }
1789  };
1790 
1791  template <size_t size> struct wchar_selector;
1792 
1793  template <> struct wchar_selector<2>
1794  {
1795  typedef uint16_t type;
1799  };
1800 
1801  template <> struct wchar_selector<4>
1802  {
1803  typedef uint32_t type;
1807  };
1808 
1811 
1813  {
1814  typedef wchar_t type;
1815 
1816  template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1817  {
1819 
1820  return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1821  }
1822  };
1823 
1824 #ifdef PUGIXML_WCHAR_MODE
1825  PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1826  {
1827  for (size_t i = 0; i < length; ++i)
1828  result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1829  }
1830 #endif
1832 
1835  {
1836  ct_parse_pcdata = 1, // \0, &, \r, <
1837  ct_parse_attr = 2, // \0, &, \r, ', "
1838  ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1839  ct_space = 8, // \r, \n, space, tab
1840  ct_parse_cdata = 16, // \0, ], >, \r
1841  ct_parse_comment = 32, // \0, -, >, \r
1842  ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1843  ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1844  };
1845 
1846  static const unsigned char chartype_table[256] =
1847  {
1848  55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1849  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1850  8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1851  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1852  0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1853  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1854  0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1855  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1856 
1857  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1858  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1859  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1860  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1861  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1862  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1863  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1864  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1865  };
1866 
1868  {
1869  ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1870  ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1871  ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1872  ctx_digit = 8, // 0-9
1873  ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1874  };
1875 
1876  static const unsigned char chartypex_table[256] =
1877  {
1878  3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1879  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1880  0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1881  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1882 
1883  0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1884  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1885  0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1886  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1887 
1888  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1889  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1890  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1891  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1892  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1893  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1894  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1895  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1896  };
1897 
1898 #ifdef PUGIXML_WCHAR_MODE
1899  #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1900 #else
1901  #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1902 #endif
1903 
1904  #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1905  #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1906 
1908  {
1909  unsigned int ui = 1;
1910 
1911  return *reinterpret_cast<unsigned char*>(&ui) == 1;
1912  }
1913 
1915  {
1916  PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1917 
1918  if (sizeof(wchar_t) == 2)
1920  else
1922  }
1923 
1924  PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1925  {
1926  #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1927  #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1928 
1929  // check if we have a non-empty XML declaration
1930  if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1931  return false;
1932 
1933  // scan XML declaration until the encoding field
1934  for (size_t i = 6; i + 1 < size; ++i)
1935  {
1936  // declaration can not contain ? in quoted values
1937  if (data[i] == '?')
1938  return false;
1939 
1940  if (data[i] == 'e' && data[i + 1] == 'n')
1941  {
1942  size_t offset = i;
1943 
1944  // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1947 
1948  // S? = S?
1950  PUGI__SCANCHAR('=');
1952 
1953  // the only two valid delimiters are ' and "
1954  uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1955 
1956  PUGI__SCANCHAR(delimiter);
1957 
1958  size_t start = offset;
1959 
1960  out_encoding = data + offset;
1961 
1963 
1964  out_length = offset - start;
1965 
1966  PUGI__SCANCHAR(delimiter);
1967 
1968  return true;
1969  }
1970  }
1971 
1972  return false;
1973 
1974  #undef PUGI__SCANCHAR
1975  #undef PUGI__SCANCHARTYPE
1976  }
1977 
1978  PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1979  {
1980  // skip encoding autodetection if input buffer is too small
1981  if (size < 4) return encoding_utf8;
1982 
1983  uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1984 
1985  // look for BOM in first few bytes
1986  if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1987  if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1988  if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1989  if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1990  if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1991 
1992  // look for <, <? or <?xm in various encodings
1993  if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1994  if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1995  if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1996  if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1997 
1998  // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1999  if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
2000  if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
2001 
2002  // no known BOM detected; parse declaration
2003  const uint8_t* enc = 0;
2004  size_t enc_length = 0;
2005 
2006  if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
2007  {
2008  // iso-8859-1 (case-insensitive)
2009  if (enc_length == 10
2010  && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
2011  && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
2012  && enc[8] == '-' && enc[9] == '1')
2013  return encoding_latin1;
2014 
2015  // latin1 (case-insensitive)
2016  if (enc_length == 6
2017  && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
2018  && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
2019  && enc[5] == '1')
2020  return encoding_latin1;
2021  }
2022 
2023  return encoding_utf8;
2024  }
2025 
2026  PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2027  {
2028  // replace wchar encoding with utf implementation
2029  if (encoding == encoding_wchar) return get_wchar_encoding();
2030 
2031  // replace utf16 encoding with utf16 with specific endianness
2033 
2034  // replace utf32 encoding with utf32 with specific endianness
2036 
2037  // only do autodetection if no explicit encoding is requested
2038  if (encoding != encoding_auto) return encoding;
2039 
2040  // try to guess encoding (based on XML specification, Appendix F.1)
2041  const uint8_t* data = static_cast<const uint8_t*>(contents);
2042 
2043  return guess_buffer_encoding(data, size);
2044  }
2045 
2046  PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2047  {
2048  size_t length = size / sizeof(char_t);
2049 
2050  if (is_mutable)
2051  {
2052  out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2053  out_length = length;
2054  }
2055  else
2056  {
2057  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2058  if (!buffer) return false;
2059 
2060  if (contents)
2061  memcpy(buffer, contents, length * sizeof(char_t));
2062  else
2063  assert(length == 0);
2064 
2065  buffer[length] = 0;
2066 
2067  out_buffer = buffer;
2068  out_length = length + 1;
2069  }
2070 
2071  return true;
2072  }
2073 
2074 #ifdef PUGIXML_WCHAR_MODE
2075  PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2076  {
2077  return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2078  (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2079  }
2080 
2081  PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2082  {
2083  const char_t* data = static_cast<const char_t*>(contents);
2084  size_t length = size / sizeof(char_t);
2085 
2086  if (is_mutable)
2087  {
2088  char_t* buffer = const_cast<char_t*>(data);
2089 
2090  convert_wchar_endian_swap(buffer, data, length);
2091 
2092  out_buffer = buffer;
2093  out_length = length;
2094  }
2095  else
2096  {
2097  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2098  if (!buffer) return false;
2099 
2100  convert_wchar_endian_swap(buffer, data, length);
2101  buffer[length] = 0;
2102 
2103  out_buffer = buffer;
2104  out_length = length + 1;
2105  }
2106 
2107  return true;
2108  }
2109 
2110  template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2111  {
2112  const typename D::type* data = static_cast<const typename D::type*>(contents);
2113  size_t data_length = size / sizeof(typename D::type);
2114 
2115  // first pass: get length in wchar_t units
2116  size_t length = D::process(data, data_length, 0, wchar_counter());
2117 
2118  // allocate buffer of suitable length
2119  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2120  if (!buffer) return false;
2121 
2122  // second pass: convert utf16 input to wchar_t
2123  wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2124  wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2125 
2126  assert(oend == obegin + length);
2127  *oend = 0;
2128 
2129  out_buffer = buffer;
2130  out_length = length + 1;
2131 
2132  return true;
2133  }
2134 
2135  PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2136  {
2137  // get native encoding
2138  xml_encoding wchar_encoding = get_wchar_encoding();
2139 
2140  // fast path: no conversion required
2141  if (encoding == wchar_encoding)
2142  return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2143 
2144  // only endian-swapping is required
2145  if (need_endian_swap_utf(encoding, wchar_encoding))
2146  return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2147 
2148  // source encoding is utf8
2149  if (encoding == encoding_utf8)
2150  return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2151 
2152  // source encoding is utf16
2153  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2154  {
2156 
2157  return (native_encoding == encoding) ?
2158  convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2159  convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2160  }
2161 
2162  // source encoding is utf32
2163  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2164  {
2166 
2167  return (native_encoding == encoding) ?
2168  convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2169  convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2170  }
2171 
2172  // source encoding is latin1
2173  if (encoding == encoding_latin1)
2174  return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2175 
2176  assert(false && "Invalid encoding"); // unreachable
2177  return false;
2178  }
2179 #else
2180  template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2181  {
2182  const typename D::type* data = static_cast<const typename D::type*>(contents);
2183  size_t data_length = size / sizeof(typename D::type);
2184 
2185  // first pass: get length in utf8 units
2186  size_t length = D::process(data, data_length, 0, utf8_counter());
2187 
2188  // allocate buffer of suitable length
2189  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2190  if (!buffer) return false;
2191 
2192  // second pass: convert utf16 input to utf8
2193  uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2194  uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2195 
2196  assert(oend == obegin + length);
2197  *oend = 0;
2198 
2199  out_buffer = buffer;
2200  out_length = length + 1;
2201 
2202  return true;
2203  }
2204 
2205  PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2206  {
2207  for (size_t i = 0; i < size; ++i)
2208  if (data[i] > 127)
2209  return i;
2210 
2211  return size;
2212  }
2213 
2214  PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2215  {
2216  const uint8_t* data = static_cast<const uint8_t*>(contents);
2217  size_t data_length = size;
2218 
2219  // get size of prefix that does not need utf8 conversion
2220  size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2221  assert(prefix_length <= data_length);
2222 
2223  const uint8_t* postfix = data + prefix_length;
2224  size_t postfix_length = data_length - prefix_length;
2225 
2226  // if no conversion is needed, just return the original buffer
2227  if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2228 
2229  // first pass: get length in utf8 units
2230  size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2231 
2232  // allocate buffer of suitable length
2233  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2234  if (!buffer) return false;
2235 
2236  // second pass: convert latin1 input to utf8
2237  memcpy(buffer, data, prefix_length);
2238 
2239  uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2240  uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2241 
2242  assert(oend == obegin + length);
2243  *oend = 0;
2244 
2245  out_buffer = buffer;
2246  out_length = length + 1;
2247 
2248  return true;
2249  }
2250 
2251  PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2252  {
2253  // fast path: no conversion required
2254  if (encoding == encoding_utf8)
2255  return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2256 
2257  // source encoding is utf16
2258  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2259  {
2261 
2262  return (native_encoding == encoding) ?
2263  convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2264  convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2265  }
2266 
2267  // source encoding is utf32
2268  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2269  {
2271 
2272  return (native_encoding == encoding) ?
2273  convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2274  convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2275  }
2276 
2277  // source encoding is latin1
2278  if (encoding == encoding_latin1)
2279  return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2280 
2281  assert(false && "Invalid encoding"); // unreachable
2282  return false;
2283  }
2284 #endif
2285 
2286  PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2287  {
2288  // get length in utf8 characters
2289  return wchar_decoder::process(str, length, 0, utf8_counter());
2290  }
2291 
2292  PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2293  {
2294  // convert to utf8
2295  uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2296  uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2297 
2298  assert(begin + size == end);
2299  (void)!end;
2300  (void)!size;
2301  }
2302 
2303 #ifndef PUGIXML_NO_STL
2304  PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2305  {
2306  // first pass: get length in utf8 characters
2307  size_t size = as_utf8_begin(str, length);
2308 
2309  // allocate resulting string
2310  std::string result;
2311  result.resize(size);
2312 
2313  // second pass: convert to utf8
2314  if (size > 0) as_utf8_end(&result[0], size, str, length);
2315 
2316  return result;
2317  }
2318 
2319  PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2320  {
2321  const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2322 
2323  // first pass: get length in wchar_t units
2324  size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2325 
2326  // allocate resulting string
2327  std::basic_string<wchar_t> result;
2328  result.resize(length);
2329 
2330  // second pass: convert to wchar_t
2331  if (length > 0)
2332  {
2333  wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2334  wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2335 
2336  assert(begin + length == end);
2337  (void)!end;
2338  }
2339 
2340  return result;
2341  }
2342 #endif
2343 
2344  template <typename Header>
2345  inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2346  {
2347  // never reuse shared memory
2348  if (header & xml_memory_page_contents_shared_mask) return false;
2349 
2350  size_t target_length = strlength(target);
2351 
2352  // always reuse document buffer memory if possible
2353  if ((header & header_mask) == 0) return target_length >= length;
2354 
2355  // reuse heap memory if waste is not too great
2356  const size_t reuse_threshold = 32;
2357 
2358  return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2359  }
2360 
2361  template <typename String, typename Header>
2362  PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2363  {
2364  if (source_length == 0)
2365  {
2366  // empty string and null pointer are equivalent, so just deallocate old memory
2367  xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2368 
2369  if (header & header_mask) alloc->deallocate_string(dest);
2370 
2371  // mark the string as not allocated
2372  dest = 0;
2373  header &= ~header_mask;
2374 
2375  return true;
2376  }
2377  else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2378  {
2379  // we can reuse old buffer, so just copy the new data (including zero terminator)
2380  memcpy(dest, source, source_length * sizeof(char_t));
2381  dest[source_length] = 0;
2382 
2383  return true;
2384  }
2385  else
2386  {
2387  xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2388 
2389  if (!alloc->reserve()) return false;
2390 
2391  // allocate new buffer
2392  char_t* buf = alloc->allocate_string(source_length + 1);
2393  if (!buf) return false;
2394 
2395  // copy the string (including zero terminator)
2396  memcpy(buf, source, source_length * sizeof(char_t));
2397  buf[source_length] = 0;
2398 
2399  // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2400  if (header & header_mask) alloc->deallocate_string(dest);
2401 
2402  // the string is now allocated, so set the flag
2403  dest = buf;
2404  header |= header_mask;
2405 
2406  return true;
2407  }
2408  }
2409 
2410  struct gap
2411  {
2413  size_t size;
2414 
2415  gap(): end(0), size(0)
2416  {
2417  }
2418 
2419  // Push new gap, move s count bytes further (skipping the gap).
2420  // Collapse previous gap.
2421  void push(char_t*& s, size_t count)
2422  {
2423  if (end) // there was a gap already; collapse it
2424  {
2425  // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2426  assert(s >= end);
2427  memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2428  }
2429 
2430  s += count; // end of current gap
2431 
2432  // "merge" two gaps
2433  end = s;
2434  size += count;
2435  }
2436 
2437  // Collapse all gaps, return past-the-end pointer
2439  {
2440  if (end)
2441  {
2442  // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2443  assert(s >= end);
2444  memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2445 
2446  return s - size;
2447  }
2448  else return s;
2449  }
2450  };
2451 
2453  {
2454  char_t* stre = s + 1;
2455 
2456  switch (*stre)
2457  {
2458  case '#': // &#...
2459  {
2460  unsigned int ucsc = 0;
2461 
2462  if (stre[1] == 'x') // &#x... (hex code)
2463  {
2464  stre += 2;
2465 
2466  char_t ch = *stre;
2467 
2468  if (ch == ';') return stre;
2469 
2470  for (;;)
2471  {
2472  if (static_cast<unsigned int>(ch - '0') <= 9)
2473  ucsc = 16 * ucsc + (ch - '0');
2474  else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2475  ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2476  else if (ch == ';')
2477  break;
2478  else // cancel
2479  return stre;
2480 
2481  ch = *++stre;
2482  }
2483 
2484  ++stre;
2485  }
2486  else // &#... (dec code)
2487  {
2488  char_t ch = *++stre;
2489 
2490  if (ch == ';') return stre;
2491 
2492  for (;;)
2493  {
2494  if (static_cast<unsigned int>(ch - '0') <= 9)
2495  ucsc = 10 * ucsc + (ch - '0');
2496  else if (ch == ';')
2497  break;
2498  else // cancel
2499  return stre;
2500 
2501  ch = *++stre;
2502  }
2503 
2504  ++stre;
2505  }
2506 
2507  #ifdef PUGIXML_WCHAR_MODE
2508  s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2509  #else
2510  s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2511  #endif
2512 
2513  g.push(s, stre - s);
2514  return stre;
2515  }
2516 
2517  case 'a': // &a
2518  {
2519  ++stre;
2520 
2521  if (*stre == 'm') // &am
2522  {
2523  if (*++stre == 'p' && *++stre == ';') // &amp;
2524  {
2525  *s++ = '&';
2526  ++stre;
2527 
2528  g.push(s, stre - s);
2529  return stre;
2530  }
2531  }
2532  else if (*stre == 'p') // &ap
2533  {
2534  if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2535  {
2536  *s++ = '\'';
2537  ++stre;
2538 
2539  g.push(s, stre - s);
2540  return stre;
2541  }
2542  }
2543  break;
2544  }
2545 
2546  case 'g': // &g
2547  {
2548  if (*++stre == 't' && *++stre == ';') // &gt;
2549  {
2550  *s++ = '>';
2551  ++stre;
2552 
2553  g.push(s, stre - s);
2554  return stre;
2555  }
2556  break;
2557  }
2558 
2559  case 'l': // &l
2560  {
2561  if (*++stre == 't' && *++stre == ';') // &lt;
2562  {
2563  *s++ = '<';
2564  ++stre;
2565 
2566  g.push(s, stre - s);
2567  return stre;
2568  }
2569  break;
2570  }
2571 
2572  case 'q': // &q
2573  {
2574  if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2575  {
2576  *s++ = '"';
2577  ++stre;
2578 
2579  g.push(s, stre - s);
2580  return stre;
2581  }
2582  break;
2583  }
2584 
2585  default:
2586  break;
2587  }
2588 
2589  return stre;
2590  }
2591 
2592  // Parser utilities
2593  #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
2594  #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2595  #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
2596  #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2597  #define PUGI__POPNODE() { cursor = cursor->parent; }
2598  #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2599  #define PUGI__SCANWHILE(X) { while (X) ++s; }
2600  #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2601  #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
2602  #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2603  #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2604 
2606  {
2607  gap g;
2608 
2609  while (true)
2610  {
2612 
2613  if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2614  {
2615  *s++ = '\n'; // replace first one with 0x0a
2616 
2617  if (*s == '\n') g.push(s, 1);
2618  }
2619  else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2620  {
2621  *g.flush(s) = 0;
2622 
2623  return s + (s[2] == '>' ? 3 : 2);
2624  }
2625  else if (*s == 0)
2626  {
2627  return 0;
2628  }
2629  else ++s;
2630  }
2631  }
2632 
2634  {
2635  gap g;
2636 
2637  while (true)
2638  {
2640 
2641  if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2642  {
2643  *s++ = '\n'; // replace first one with 0x0a
2644 
2645  if (*s == '\n') g.push(s, 1);
2646  }
2647  else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2648  {
2649  *g.flush(s) = 0;
2650 
2651  return s + 1;
2652  }
2653  else if (*s == 0)
2654  {
2655  return 0;
2656  }
2657  else ++s;
2658  }
2659  }
2660 
2661  typedef char_t* (*strconv_pcdata_t)(char_t*);
2662 
2663  template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2664  {
2665  static char_t* parse(char_t* s)
2666  {
2667  gap g;
2668 
2669  char_t* begin = s;
2670 
2671  while (true)
2672  {
2674 
2675  if (*s == '<') // PCDATA ends here
2676  {
2677  char_t* end = g.flush(s);
2678 
2679  if (opt_trim::value)
2680  while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2681  --end;
2682 
2683  *end = 0;
2684 
2685  return s + 1;
2686  }
2687  else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2688  {
2689  *s++ = '\n'; // replace first one with 0x0a
2690 
2691  if (*s == '\n') g.push(s, 1);
2692  }
2693  else if (opt_escape::value && *s == '&')
2694  {
2695  s = strconv_escape(s, g);
2696  }
2697  else if (*s == 0)
2698  {
2699  char_t* end = g.flush(s);
2700 
2701  if (opt_trim::value)
2702  while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2703  --end;
2704 
2705  *end = 0;
2706 
2707  return s;
2708  }
2709  else ++s;
2710  }
2711  }
2712  };
2713 
2715  {
2716  PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2717 
2718  switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
2719  {
2728  default: assert(false); return 0; // unreachable
2729  }
2730  }
2731 
2732  typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2733 
2734  template <typename opt_escape> struct strconv_attribute_impl
2735  {
2736  static char_t* parse_wnorm(char_t* s, char_t end_quote)
2737  {
2738  gap g;
2739 
2740  // trim leading whitespaces
2741  if (PUGI__IS_CHARTYPE(*s, ct_space))
2742  {
2743  char_t* str = s;
2744 
2745  do ++str;
2746  while (PUGI__IS_CHARTYPE(*str, ct_space));
2747 
2748  g.push(s, str - s);
2749  }
2750 
2751  while (true)
2752  {
2754 
2755  if (*s == end_quote)
2756  {
2757  char_t* str = g.flush(s);
2758 
2759  do *str-- = 0;
2760  while (PUGI__IS_CHARTYPE(*str, ct_space));
2761 
2762  return s + 1;
2763  }
2764  else if (PUGI__IS_CHARTYPE(*s, ct_space))
2765  {
2766  *s++ = ' ';
2767 
2768  if (PUGI__IS_CHARTYPE(*s, ct_space))
2769  {
2770  char_t* str = s + 1;
2771  while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2772 
2773  g.push(s, str - s);
2774  }
2775  }
2776  else if (opt_escape::value && *s == '&')
2777  {
2778  s = strconv_escape(s, g);
2779  }
2780  else if (!*s)
2781  {
2782  return 0;
2783  }
2784  else ++s;
2785  }
2786  }
2787 
2788  static char_t* parse_wconv(char_t* s, char_t end_quote)
2789  {
2790  gap g;
2791 
2792  while (true)
2793  {
2795 
2796  if (*s == end_quote)
2797  {
2798  *g.flush(s) = 0;
2799 
2800  return s + 1;
2801  }
2802  else if (PUGI__IS_CHARTYPE(*s, ct_space))
2803  {
2804  if (*s == '\r')
2805  {
2806  *s++ = ' ';
2807 
2808  if (*s == '\n') g.push(s, 1);
2809  }
2810  else *s++ = ' ';
2811  }
2812  else if (opt_escape::value && *s == '&')
2813  {
2814  s = strconv_escape(s, g);
2815  }
2816  else if (!*s)
2817  {
2818  return 0;
2819  }
2820  else ++s;
2821  }
2822  }
2823 
2824  static char_t* parse_eol(char_t* s, char_t end_quote)
2825  {
2826  gap g;
2827 
2828  while (true)
2829  {
2831 
2832  if (*s == end_quote)
2833  {
2834  *g.flush(s) = 0;
2835 
2836  return s + 1;
2837  }
2838  else if (*s == '\r')
2839  {
2840  *s++ = '\n';
2841 
2842  if (*s == '\n') g.push(s, 1);
2843  }
2844  else if (opt_escape::value && *s == '&')
2845  {
2846  s = strconv_escape(s, g);
2847  }
2848  else if (!*s)
2849  {
2850  return 0;
2851  }
2852  else ++s;
2853  }
2854  }
2855 
2856  static char_t* parse_simple(char_t* s, char_t end_quote)
2857  {
2858  gap g;
2859 
2860  while (true)
2861  {
2863 
2864  if (*s == end_quote)
2865  {
2866  *g.flush(s) = 0;
2867 
2868  return s + 1;
2869  }
2870  else if (opt_escape::value && *s == '&')
2871  {
2872  s = strconv_escape(s, g);
2873  }
2874  else if (!*s)
2875  {
2876  return 0;
2877  }
2878  else ++s;
2879  }
2880  }
2881  };
2882 
2884  {
2886 
2887  switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2888  {
2905  default: assert(false); return 0; // unreachable
2906  }
2907  }
2908 
2909  inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2910  {
2911  xml_parse_result result;
2912  result.status = status;
2913  result.offset = offset;
2914 
2915  return result;
2916  }
2917 
2918  struct xml_parser
2919  {
2921  char_t* error_offset;
2923 
2924  xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2925  {
2926  }
2927 
2928  // DOCTYPE consists of nested sections of the following possible types:
2929  // <!-- ... -->, <? ... ?>, "...", '...'
2930  // <![...]]>
2931  // <!...>
2932  // First group can not contain nested groups
2933  // Second group can contain nested groups of the same type
2934  // Third group can contain all other groups
2935  char_t* parse_doctype_primitive(char_t* s)
2936  {
2937  if (*s == '"' || *s == '\'')
2938  {
2939  // quoted string
2940  char_t ch = *s++;
2941  PUGI__SCANFOR(*s == ch);
2943 
2944  s++;
2945  }
2946  else if (s[0] == '<' && s[1] == '?')
2947  {
2948  // <? ... ?>
2949  s += 2;
2950  PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2952 
2953  s += 2;
2954  }
2955  else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2956  {
2957  s += 4;
2958  PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2960 
2961  s += 3;
2962  }
2964 
2965  return s;
2966  }
2967 
2968  char_t* parse_doctype_ignore(char_t* s)
2969  {
2970  size_t depth = 0;
2971 
2972  assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2973  s += 3;
2974 
2975  while (*s)
2976  {
2977  if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2978  {
2979  // nested ignore section
2980  s += 3;
2981  depth++;
2982  }
2983  else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2984  {
2985  // ignore section end
2986  s += 3;
2987 
2988  if (depth == 0)
2989  return s;
2990 
2991  depth--;
2992  }
2993  else s++;
2994  }
2995 
2997  }
2998 
2999  char_t* parse_doctype_group(char_t* s, char_t endch)
3000  {
3001  size_t depth = 0;
3002 
3003  assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
3004  s += 2;
3005 
3006  while (*s)
3007  {
3008  if (s[0] == '<' && s[1] == '!' && s[2] != '-')
3009  {
3010  if (s[2] == '[')
3011  {
3012  // ignore
3013  s = parse_doctype_ignore(s);
3014  if (!s) return s;
3015  }
3016  else
3017  {
3018  // some control group
3019  s += 2;
3020  depth++;
3021  }
3022  }
3023  else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3024  {
3025  // unknown tag (forbidden), or some primitive group
3026  s = parse_doctype_primitive(s);
3027  if (!s) return s;
3028  }
3029  else if (*s == '>')
3030  {
3031  if (depth == 0)
3032  return s;
3033 
3034  depth--;
3035  s++;
3036  }
3037  else s++;
3038  }
3039 
3040  if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3041 
3042  return s;
3043  }
3044 
3045  char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3046  {
3047  // parse node contents, starting with exclamation mark
3048  ++s;
3049 
3050  if (*s == '-') // '<!-...'
3051  {
3052  ++s;
3053 
3054  if (*s == '-') // '<!--...'
3055  {
3056  ++s;
3057 
3059  {
3060  PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3061  cursor->value = s; // Save the offset.
3062  }
3063 
3065  {
3066  s = strconv_comment(s, endch);
3067 
3068  if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3069  }
3070  else
3071  {
3072  // Scan for terminating '-->'.
3073  PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3075 
3077  *s = 0; // Zero-terminate this segment at the first terminating '-'.
3078 
3079  s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3080  }
3081  }
3083  }
3084  else if (*s == '[')
3085  {
3086  // '<![CDATA[...'
3087  if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3088  {
3089  ++s;
3090 
3092  {
3093  PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3094  cursor->value = s; // Save the offset.
3095 
3096  if (PUGI__OPTSET(parse_eol))
3097  {
3098  s = strconv_cdata(s, endch);
3099 
3100  if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3101  }
3102  else
3103  {
3104  // Scan for terminating ']]>'.
3105  PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3107 
3108  *s++ = 0; // Zero-terminate this segment.
3109  }
3110  }
3111  else // Flagged for discard, but we still have to scan for the terminator.
3112  {
3113  // Scan for terminating ']]>'.
3114  PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3116 
3117  ++s;
3118  }
3119 
3120  s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3121  }
3123  }
3124  else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3125  {
3126  s -= 2;
3127 
3128  if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3129 
3130  char_t* mark = s + 9;
3131 
3132  s = parse_doctype_group(s, endch);
3133  if (!s) return s;
3134 
3135  assert((*s == 0 && endch == '>') || *s == '>');
3136  if (*s) *s++ = 0;
3137 
3139  {
3140  while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3141 
3143 
3144  cursor->value = mark;
3145  }
3146  }
3147  else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3148  else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3150 
3151  return s;
3152  }
3153 
3154  char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3155  {
3156  // load into registers
3157  xml_node_struct* cursor = ref_cursor;
3158  char_t ch = 0;
3159 
3160  // parse node contents, starting with question mark
3161  ++s;
3162 
3163  // read PI target
3164  char_t* target = s;
3165 
3167 
3170 
3171  // determine node type; stricmp / strcasecmp is not portable
3172  bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3173 
3174  if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3175  {
3176  if (declaration)
3177  {
3178  // disallow non top-level declarations
3179  if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3180 
3182  }
3183  else
3184  {
3186  }
3187 
3188  cursor->name = target;
3189 
3190  PUGI__ENDSEG();
3191 
3192  // parse value/attributes
3193  if (ch == '?')
3194  {
3195  // empty node
3196  if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3197  s += (*s == '>');
3198 
3199  PUGI__POPNODE();
3200  }
3201  else if (PUGI__IS_CHARTYPE(ch, ct_space))
3202  {
3203  PUGI__SKIPWS();
3204 
3205  // scan for tag end
3206  char_t* value = s;
3207 
3208  PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3210 
3211  if (declaration)
3212  {
3213  // replace ending ? with / so that 'element' terminates properly
3214  *s = '/';
3215 
3216  // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3217  s = value;
3218  }
3219  else
3220  {
3221  // store value and step over >
3222  cursor->value = value;
3223 
3224  PUGI__POPNODE();
3225 
3226  PUGI__ENDSEG();
3227 
3228  s += (*s == '>');
3229  }
3230  }
3232  }
3233  else
3234  {
3235  // scan for tag end
3236  PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3238 
3239  s += (s[1] == '>' ? 2 : 1);
3240  }
3241 
3242  // store from registers
3243  ref_cursor = cursor;
3244 
3245  return s;
3246  }
3247 
3248  char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3249  {
3250  strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3251  strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3252 
3253  char_t ch = 0;
3254  xml_node_struct* cursor = root;
3255  char_t* mark = s;
3256 
3257  while (*s != 0)
3258  {
3259  if (*s == '<')
3260  {
3261  ++s;
3262 
3263  LOC_TAG:
3264  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3265  {
3266  PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3267 
3268  cursor->name = s;
3269 
3270  PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3271  PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3272 
3273  if (ch == '>')
3274  {
3275  // end of tag
3276  }
3277  else if (PUGI__IS_CHARTYPE(ch, ct_space))
3278  {
3279  LOC_ATTRIBUTES:
3280  while (true)
3281  {
3282  PUGI__SKIPWS(); // Eat any whitespace.
3283 
3284  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3285  {
3286  xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3288 
3289  a->name = s; // Save the offset.
3290 
3291  PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3292  PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3293 
3294  if (PUGI__IS_CHARTYPE(ch, ct_space))
3295  {
3296  PUGI__SKIPWS(); // Eat any whitespace.
3297 
3298  ch = *s;
3299  ++s;
3300  }
3301 
3302  if (ch == '=') // '<... #=...'
3303  {
3304  PUGI__SKIPWS(); // Eat any whitespace.
3305 
3306  if (*s == '"' || *s == '\'') // '<... #="...'
3307  {
3308  ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3309  ++s; // Step over the quote.
3310  a->value = s; // Save the offset.
3311 
3312  s = strconv_attribute(s, ch);
3313 
3314  if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3315 
3316  // After this line the loop continues from the start;
3317  // Whitespaces, / and > are ok, symbols and EOF are wrong,
3318  // everything else will be detected
3320  }
3322  }
3324  }
3325  else if (*s == '/')
3326  {
3327  ++s;
3328 
3329  if (*s == '>')
3330  {
3331  PUGI__POPNODE();
3332  s++;
3333  break;
3334  }
3335  else if (*s == 0 && endch == '>')
3336  {
3337  PUGI__POPNODE();
3338  break;
3339  }
3341  }
3342  else if (*s == '>')
3343  {
3344  ++s;
3345 
3346  break;
3347  }
3348  else if (*s == 0 && endch == '>')
3349  {
3350  break;
3351  }
3353  }
3354 
3355  // !!!
3356  }
3357  else if (ch == '/') // '<#.../'
3358  {
3360 
3361  PUGI__POPNODE(); // Pop.
3362 
3363  s += (*s == '>');
3364  }
3365  else if (ch == 0)
3366  {
3367  // we stepped over null terminator, backtrack & handle closing tag
3368  --s;
3369 
3370  if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3371  }
3373  }
3374  else if (*s == '/')
3375  {
3376  ++s;
3377 
3378  mark = s;
3379 
3380  char_t* name = cursor->name;
3382 
3383  while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3384  {
3385  if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3386  }
3387 
3388  if (*name)
3389  {
3390  if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3392  }
3393 
3394  PUGI__POPNODE(); // Pop.
3395 
3396  PUGI__SKIPWS();
3397 
3398  if (*s == 0)
3399  {
3400  if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3401  }
3402  else
3403  {
3404  if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3405  ++s;
3406  }
3407  }
3408  else if (*s == '?') // '<?...'
3409  {
3410  s = parse_question(s, cursor, optmsk, endch);
3411  if (!s) return s;
3412 
3413  assert(cursor);
3414  if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3415  }
3416  else if (*s == '!') // '<!...'
3417  {
3418  s = parse_exclamation(s, cursor, optmsk, endch);
3419  if (!s) return s;
3420  }
3421  else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3423  }
3424  else
3425  {
3426  mark = s; // Save this offset while searching for a terminator.
3427 
3428  PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3429 
3430  if (*s == '<' || !*s)
3431  {
3432  // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3433  assert(mark != s);
3434 
3436  {
3437  continue;
3438  }
3440  {
3441  if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3442  }
3443  }
3444 
3446  s = mark;
3447 
3448  if (cursor->parent || PUGI__OPTSET(parse_fragment))
3449  {
3450  if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3451  {
3452  cursor->value = s; // Save the offset.
3453  }
3454  else
3455  {
3456  PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3457 
3458  cursor->value = s; // Save the offset.
3459 
3460  PUGI__POPNODE(); // Pop since this is a standalone.
3461  }
3462 
3463  s = strconv_pcdata(s);
3464 
3465  if (!*s) break;
3466  }
3467  else
3468  {
3469  PUGI__SCANFOR(*s == '<'); // '...<'
3470  if (!*s) break;
3471 
3472  ++s;
3473  }
3474 
3475  // We're after '<'
3476  goto LOC_TAG;
3477  }
3478  }
3479 
3480  // check that last tag is closed
3481  if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3482 
3483  return s;
3484  }
3485 
3486  #ifdef PUGIXML_WCHAR_MODE
3487  static char_t* parse_skip_bom(char_t* s)
3488  {
3489  unsigned int bom = 0xfeff;
3490  return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3491  }
3492  #else
3493  static char_t* parse_skip_bom(char_t* s)
3494  {
3495  return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3496  }
3497  #endif
3498 
3500  {
3501  while (node)
3502  {
3503  if (PUGI__NODETYPE(node) == node_element) return true;
3504 
3505  node = node->next_sibling;
3506  }
3507 
3508  return false;
3509  }
3510 
3511  static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3512  {
3513  // early-out for empty documents
3514  if (length == 0)
3516 
3517  // get last child of the root before parsing
3518  xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3519 
3520  // create parser on stack
3521  xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3522 
3523  // save last character and make buffer zero-terminated (speeds up parsing)
3524  char_t endch = buffer[length - 1];
3525  buffer[length - 1] = 0;
3526 
3527  // skip BOM to make sure it does not end up as part of parse output
3528  char_t* buffer_data = parse_skip_bom(buffer);
3529 
3530  // perform actual parsing
3531  parser.parse_tree(buffer_data, root, optmsk, endch);
3532 
3533  xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3534  assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3535 
3536  if (result)
3537  {
3538  // since we removed last character, we have to handle the only possible false positive (stray <)
3539  if (endch == '<')
3540  return make_parse_result(status_unrecognized_tag, length - 1);
3541 
3542  // check if there are any element nodes parsed
3543  xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3544 
3545  if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3546  return make_parse_result(status_no_document_element, length - 1);
3547  }
3548  else
3549  {
3550  // roll back offset if it occurs on a null terminator in the source buffer
3551  if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3552  result.offset--;
3553  }
3554 
3555  return result;
3556  }
3557  };
3558 
3559  // Output facilities
3561  {
3562  #ifdef PUGIXML_WCHAR_MODE
3563  return get_wchar_encoding();
3564  #else
3565  return encoding_utf8;
3566  #endif
3567  }
3568 
3570  {
3571  // replace wchar encoding with utf implementation
3572  if (encoding == encoding_wchar) return get_wchar_encoding();
3573 
3574  // replace utf16 encoding with utf16 with specific endianness
3576 
3577  // replace utf32 encoding with utf32 with specific endianness
3579 
3580  // only do autodetection if no explicit encoding is requested
3581  if (encoding != encoding_auto) return encoding;
3582 
3583  // assume utf8 encoding
3584  return encoding_utf8;
3585  }
3586 
3587  template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3588  {
3589  PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3590 
3591  typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3592 
3593  return static_cast<size_t>(end - dest) * sizeof(*dest);
3594  }
3595 
3596  template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3597  {
3598  PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3599 
3600  typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3601 
3602  if (opt_swap)
3603  {
3604  for (typename T::value_type i = dest; i != end; ++i)
3605  *i = endian_swap(*i);
3606  }
3607 
3608  return static_cast<size_t>(end - dest) * sizeof(*dest);
3609  }
3610 
3611 #ifdef PUGIXML_WCHAR_MODE
3612  PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3613  {
3614  if (length < 1) return 0;
3615 
3616  // discard last character if it's the lead of a surrogate pair
3617  return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3618  }
3619 
3620  PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3621  {
3622  // only endian-swapping is required
3623  if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3624  {
3625  convert_wchar_endian_swap(r_char, data, length);
3626 
3627  return length * sizeof(char_t);
3628  }
3629 
3630  // convert to utf8
3631  if (encoding == encoding_utf8)
3632  return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3633 
3634  // convert to utf16
3635  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3636  {
3638 
3639  return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3640  }
3641 
3642  // convert to utf32
3643  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3644  {
3646 
3647  return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3648  }
3649 
3650  // convert to latin1
3651  if (encoding == encoding_latin1)
3652  return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3653 
3654  assert(false && "Invalid encoding"); // unreachable
3655  return 0;
3656  }
3657 #else
3658  PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3659  {
3660  if (length < 5) return 0;
3661 
3662  for (size_t i = 1; i <= 4; ++i)
3663  {
3664  uint8_t ch = static_cast<uint8_t>(data[length - i]);
3665 
3666  // either a standalone character or a leading one
3667  if ((ch & 0xc0) != 0x80) return length - i;
3668  }
3669 
3670  // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3671  return length;
3672  }
3673 
3674  PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3675  {
3676  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3677  {
3679 
3680  return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3681  }
3682 
3683  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3684  {
3686 
3687  return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3688  }
3689 
3690  if (encoding == encoding_latin1)
3691  return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3692 
3693  assert(false && "Invalid encoding"); // unreachable
3694  return 0;
3695  }
3696 #endif
3697 
3699  {
3701  xml_buffered_writer& operator=(const xml_buffered_writer&);
3702 
3703  public:
3704  xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3705  {
3706  PUGI__STATIC_ASSERT(bufcapacity >= 8);
3707  }
3708 
3709  size_t flush()
3710  {
3711  flush(buffer, bufsize);
3712  bufsize = 0;
3713  return 0;
3714  }
3715 
3716  void flush(const char_t* data, size_t size)
3717  {
3718  if (size == 0) return;
3719 
3720  // fast path, just write data
3721  if (encoding == get_write_native_encoding())
3722  writer.write(data, size * sizeof(char_t));
3723  else
3724  {
3725  // convert chunk
3726  size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3727  assert(result <= sizeof(scratch));
3728 
3729  // write data
3730  writer.write(scratch.data_u8, result);
3731  }
3732  }
3733 
3734  void write_direct(const char_t* data, size_t length)
3735  {
3736  // flush the remaining buffer contents
3737  flush();
3738 
3739  // handle large chunks
3740  if (length > bufcapacity)
3741  {
3742  if (encoding == get_write_native_encoding())
3743  {
3744  // fast path, can just write data chunk
3745  writer.write(data, length * sizeof(char_t));
3746  return;
3747  }
3748 
3749  // need to convert in suitable chunks
3750  while (length > bufcapacity)
3751  {
3752  // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3753  // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3754  size_t chunk_size = get_valid_length(data, bufcapacity);
3755  assert(chunk_size);
3756 
3757  // convert chunk and write
3758  flush(data, chunk_size);
3759 
3760  // iterate
3761  data += chunk_size;
3762  length -= chunk_size;
3763  }
3764 
3765  // small tail is copied below
3766  bufsize = 0;
3767  }
3768 
3769  memcpy(buffer + bufsize, data, length * sizeof(char_t));
3770  bufsize += length;
3771  }
3772 
3773  void write_buffer(const char_t* data, size_t length)
3774  {
3775  size_t offset = bufsize;
3776 
3777  if (offset + length <= bufcapacity)
3778  {
3779  memcpy(buffer + offset, data, length * sizeof(char_t));
3780  bufsize = offset + length;
3781  }
3782  else
3783  {
3784  write_direct(data, length);
3785  }
3786  }
3787 
3788  void write_string(const char_t* data)
3789  {
3790  // write the part of the string that fits in the buffer
3791  size_t offset = bufsize;
3792 
3793  while (*data && offset < bufcapacity)
3794  buffer[offset++] = *data++;
3795 
3796  // write the rest
3797  if (offset < bufcapacity)
3798  {
3799  bufsize = offset;
3800  }
3801  else
3802  {
3803  // backtrack a bit if we have split the codepoint
3804  size_t length = offset - bufsize;
3805  size_t extra = length - get_valid_length(data - length, length);
3806 
3807  bufsize = offset - extra;
3808 
3809  write_direct(data - extra, strlength(data) + extra);
3810  }
3811  }
3812 
3813  void write(char_t d0)
3814  {
3815  size_t offset = bufsize;
3816  if (offset > bufcapacity - 1) offset = flush();
3817 
3818  buffer[offset + 0] = d0;
3819  bufsize = offset + 1;
3820  }
3821 
3822  void write(char_t d0, char_t d1)
3823  {
3824  size_t offset = bufsize;
3825  if (offset > bufcapacity - 2) offset = flush();
3826 
3827  buffer[offset + 0] = d0;
3828  buffer[offset + 1] = d1;
3829  bufsize = offset + 2;
3830  }
3831 
3832  void write(char_t d0, char_t d1, char_t d2)
3833  {
3834  size_t offset = bufsize;
3835  if (offset > bufcapacity - 3) offset = flush();
3836 
3837  buffer[offset + 0] = d0;
3838  buffer[offset + 1] = d1;
3839  buffer[offset + 2] = d2;
3840  bufsize = offset + 3;
3841  }
3842 
3843  void write(char_t d0, char_t d1, char_t d2, char_t d3)
3844  {
3845  size_t offset = bufsize;
3846  if (offset > bufcapacity - 4) offset = flush();
3847 
3848  buffer[offset + 0] = d0;
3849  buffer[offset + 1] = d1;
3850  buffer[offset + 2] = d2;
3851  buffer[offset + 3] = d3;
3852  bufsize = offset + 4;
3853  }
3854 
3855  void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3856  {
3857  size_t offset = bufsize;
3858  if (offset > bufcapacity - 5) offset = flush();
3859 
3860  buffer[offset + 0] = d0;
3861  buffer[offset + 1] = d1;
3862  buffer[offset + 2] = d2;
3863  buffer[offset + 3] = d3;
3864  buffer[offset + 4] = d4;
3865  bufsize = offset + 5;
3866  }
3867 
3868  void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3869  {
3870  size_t offset = bufsize;
3871  if (offset > bufcapacity - 6) offset = flush();
3872 
3873  buffer[offset + 0] = d0;
3874  buffer[offset + 1] = d1;
3875  buffer[offset + 2] = d2;
3876  buffer[offset + 3] = d3;
3877  buffer[offset + 4] = d4;
3878  buffer[offset + 5] = d5;
3879  bufsize = offset + 6;
3880  }
3881 
3882  // utf8 maximum expansion: x4 (-> utf32)
3883  // utf16 maximum expansion: x2 (-> utf32)
3884  // utf32 maximum expansion: x1
3885  enum
3886  {
3887  bufcapacitybytes =
3888  #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3889  PUGIXML_MEMORY_OUTPUT_STACK
3890  #else
3891  10240
3892  #endif
3893  ,
3894  bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3895  };
3896 
3897  char_t buffer[bufcapacity];
3898 
3899  union
3900  {
3901  uint8_t data_u8[4 * bufcapacity];
3902  uint16_t data_u16[2 * bufcapacity];
3903  uint32_t data_u32[bufcapacity];
3904  char_t data_char[bufcapacity];
3905  } scratch;
3906 
3907  xml_writer& writer;
3908  size_t bufsize;
3910  };
3911 
3912  PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
3913  {
3914  while (*s)
3915  {
3916  const char_t* prev = s;
3917 
3918  // While *s is a usual symbol
3920 
3921  writer.write_buffer(prev, static_cast<size_t>(s - prev));
3922 
3923  switch (*s)
3924  {
3925  case 0: break;
3926  case '&':
3927  writer.write('&', 'a', 'm', 'p', ';');
3928  ++s;
3929  break;
3930  case '<':
3931  writer.write('&', 'l', 't', ';');
3932  ++s;
3933  break;
3934  case '>':
3935  writer.write('&', 'g', 't', ';');
3936  ++s;
3937  break;
3938  case '"':
3939  writer.write('&', 'q', 'u', 'o', 't', ';');
3940  ++s;
3941  break;
3942  default: // s is not a usual symbol
3943  {
3944  unsigned int ch = static_cast<unsigned int>(*s++);
3945  assert(ch < 32);
3946 
3947  writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3948  }
3949  }
3950  }
3951  }
3952 
3953  PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3954  {
3955  if (flags & format_no_escapes)
3956  writer.write_string(s);
3957  else
3958  text_output_escaped(writer, s, type);
3959  }
3960 
3961  PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3962  {
3963  do
3964  {
3965  writer.write('<', '!', '[', 'C', 'D');
3966  writer.write('A', 'T', 'A', '[');
3967 
3968  const char_t* prev = s;
3969 
3970  // look for ]]> sequence - we can't output it as is since it terminates CDATA
3971  while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3972 
3973  // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3974  if (*s) s += 2;
3975 
3976  writer.write_buffer(prev, static_cast<size_t>(s - prev));
3977 
3978  writer.write(']', ']', '>');
3979  }
3980  while (*s);
3981  }
3982 
3983  PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3984  {
3985  switch (indent_length)
3986  {
3987  case 1:
3988  {
3989  for (unsigned int i = 0; i < depth; ++i)
3990  writer.write(indent[0]);
3991  break;
3992  }
3993 
3994  case 2:
3995  {
3996  for (unsigned int i = 0; i < depth; ++i)
3997  writer.write(indent[0], indent[1]);
3998  break;
3999  }
4000 
4001  case 3:
4002  {
4003  for (unsigned int i = 0; i < depth; ++i)
4004  writer.write(indent[0], indent[1], indent[2]);
4005  break;
4006  }
4007 
4008  case 4:
4009  {
4010  for (unsigned int i = 0; i < depth; ++i)
4011  writer.write(indent[0], indent[1], indent[2], indent[3]);
4012  break;
4013  }
4014 
4015  default:
4016  {
4017  for (unsigned int i = 0; i < depth; ++i)
4018  writer.write_buffer(indent, indent_length);
4019  }
4020  }
4021  }
4022 
4023  PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4024  {
4025  writer.write('<', '!', '-', '-');
4026 
4027  while (*s)
4028  {
4029  const char_t* prev = s;
4030 
4031  // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4032  while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4033 
4034  writer.write_buffer(prev, static_cast<size_t>(s - prev));
4035 
4036  if (*s)
4037  {
4038  assert(*s == '-');
4039 
4040  writer.write('-', ' ');
4041  ++s;
4042  }
4043  }
4044 
4045  writer.write('-', '-', '>');
4046  }
4047 
4048  PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4049  {
4050  while (*s)
4051  {
4052  const char_t* prev = s;
4053 
4054  // look for ?> sequence - we can't output it since ?> terminates PI
4055  while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4056 
4057  writer.write_buffer(prev, static_cast<size_t>(s - prev));
4058 
4059  if (*s)
4060  {
4061  assert(s[0] == '?' && s[1] == '>');
4062 
4063  writer.write('?', ' ', '>');
4064  s += 2;
4065  }
4066  }
4067  }
4068 
4069  PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4070  {
4071  const char_t* default_name = PUGIXML_TEXT(":anonymous");
4072 
4073  for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4074  {
4076  {
4077  writer.write('\n');
4078 
4079  text_output_indent(writer, indent, indent_length, depth + 1);
4080  }
4081  else
4082  {
4083  writer.write(' ');
4084  }
4085 
4086  writer.write_string(a->name ? a->name + 0 : default_name);
4087  writer.write('=', '"');
4088 
4089  if (a->value)
4090  text_output(writer, a->value, ctx_special_attr, flags);
4091 
4092  writer.write('"');
4093  }
4094  }
4095 
4096  PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4097  {
4098  const char_t* default_name = PUGIXML_TEXT(":anonymous");
4099  const char_t* name = node->name ? node->name + 0 : default_name;
4100 
4101  writer.write('<');
4102  writer.write_string(name);
4103 
4104  if (node->first_attribute)
4105  node_output_attributes(writer, node, indent, indent_length, flags, depth);
4106 
4107  // element nodes can have value if parse_embed_pcdata was used
4108  if (!node->value)
4109  {
4110  if (!node->first_child)
4111  {
4112  if (flags & format_no_empty_element_tags)
4113  {
4114  writer.write('>', '<', '/');
4115  writer.write_string(name);
4116  writer.write('>');
4117 
4118  return false;
4119  }
4120  else
4121  {
4122  if ((flags & format_raw) == 0)
4123  writer.write(' ');
4124 
4125  writer.write('/', '>');
4126 
4127  return false;
4128  }
4129  }
4130  else
4131  {
4132  writer.write('>');
4133 
4134  return true;
4135  }
4136  }
4137  else
4138  {
4139  writer.write('>');
4140 
4141  text_output(writer, node->value, ctx_special_pcdata, flags);
4142 
4143  if (!node->first_child)
4144  {
4145  writer.write('<', '/');
4146  writer.write_string(name);
4147  writer.write('>');
4148 
4149  return false;
4150  }
4151  else
4152  {
4153  return true;
4154  }
4155  }
4156  }
4157 
4159  {
4160  const char_t* default_name = PUGIXML_TEXT(":anonymous");
4161  const char_t* name = node->name ? node->name + 0 : default_name;
4162 
4163  writer.write('<', '/');
4164  writer.write_string(name);
4165  writer.write('>');
4166  }
4167 
4168  PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4169  {
4170  const char_t* default_name = PUGIXML_TEXT(":anonymous");
4171 
4172  switch (PUGI__NODETYPE(node))
4173  {
4174  case node_pcdata:
4175  text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4176  break;
4177 
4178  case node_cdata:
4179  text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4180  break;
4181 
4182  case node_comment:
4183  node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4184  break;
4185 
4186  case node_pi:
4187  writer.write('<', '?');
4188  writer.write_string(node->name ? node->name + 0 : default_name);
4189 
4190  if (node->value)
4191  {
4192  writer.write(' ');
4193  node_output_pi_value(writer, node->value);
4194  }
4195 
4196  writer.write('?', '>');
4197  break;
4198 
4199  case node_declaration:
4200  writer.write('<', '?');
4201  writer.write_string(node->name ? node->name + 0 : default_name);
4202  node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4203  writer.write('?', '>');
4204  break;
4205 
4206  case node_doctype:
4207  writer.write('<', '!', 'D', 'O', 'C');
4208  writer.write('T', 'Y', 'P', 'E');
4209 
4210  if (node->value)
4211  {
4212  writer.write(' ');
4213  writer.write_string(node->value);
4214  }
4215 
4216  writer.write('>');
4217  break;
4218 
4219  default:
4220  assert(false && "Invalid node type"); // unreachable
4221  }
4222  }
4223 
4225  {
4228  };
4229 
4230  PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4231  {
4232  size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4233  unsigned int indent_flags = indent_indent;
4234 
4235  xml_node_struct* node = root;
4236 
4237  do
4238  {
4239  assert(node);
4240 
4241  // begin writing current node
4242  if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4243  {
4244  node_output_simple(writer, node, flags);
4245 
4246  indent_flags = 0;
4247  }
4248  else
4249  {
4250  if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4251  writer.write('\n');
4252 
4253  if ((indent_flags & indent_indent) && indent_length)
4254  text_output_indent(writer, indent, indent_length, depth);
4255 
4256  if (PUGI__NODETYPE(node) == node_element)
4257  {
4258  indent_flags = indent_newline | indent_indent;
4259 
4260  if (node_output_start(writer, node, indent, indent_length, flags, depth))
4261  {
4262  // element nodes can have value if parse_embed_pcdata was used
4263  if (node->value)
4264  indent_flags = 0;
4265 
4266  node = node->first_child;
4267  depth++;
4268  continue;
4269  }
4270  }
4271  else if (PUGI__NODETYPE(node) == node_document)
4272  {
4273  indent_flags = indent_indent;
4274 
4275  if (node->first_child)
4276  {
4277  node = node->first_child;
4278  continue;
4279  }
4280  }
4281  else
4282  {
4283  node_output_simple(writer, node, flags);
4284 
4285  indent_flags = indent_newline | indent_indent;
4286  }
4287  }
4288 
4289  // continue to the next node
4290  while (node != root)
4291  {
4292  if (node->next_sibling)
4293  {
4294  node = node->next_sibling;
4295  break;
4296  }
4297 
4298  node = node->parent;
4299 
4300  // write closing node
4301  if (PUGI__NODETYPE(node) == node_element)
4302  {
4303  depth--;
4304 
4305  if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4306  writer.write('\n');
4307 
4308  if ((indent_flags & indent_indent) && indent_length)
4309  text_output_indent(writer, indent, indent_length, depth);
4310 
4311  node_output_end(writer, node);
4312 
4313  indent_flags = indent_newline | indent_indent;
4314  }
4315  }
4316  }
4317  while (node != root);
4318 
4319  if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4320  writer.write('\n');
4321  }
4322 
4324  {
4325  for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4326  {
4327  xml_node_type type = PUGI__NODETYPE(child);
4328 
4329  if (type == node_declaration) return true;
4330  if (type == node_element) return false;
4331  }
4332 
4333  return false;
4334  }
4335 
4336  PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4337  {
4338  for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4339  if (a == attr)
4340  return true;
4341 
4342  return false;
4343  }
4344 
4346  {
4347  return parent == node_element || parent == node_declaration;
4348  }
4349 
4351  {
4352  if (parent != node_document && parent != node_element) return false;
4353  if (child == node_document || child == node_null) return false;
4354  if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4355 
4356  return true;
4357  }
4358 
4359  PUGI__FN bool allow_move(xml_node parent, xml_node child)
4360  {
4361  // check that child can be a child of parent
4362  if (!allow_insert_child(parent.type(), child.type()))
4363  return false;
4364 
4365  // check that node is not moved between documents
4366  if (parent.root() != child.root())
4367  return false;
4368 
4369  // check that new parent is not in the child subtree
4370  xml_node cur = parent;
4371 
4372  while (cur)
4373  {
4374  if (cur == child)
4375  return false;
4376 
4377  cur = cur.parent();
4378  }
4379 
4380  return true;
4381  }
4382 
4383  template <typename String, typename Header>
4384  PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4385  {
4386  assert(!dest && (header & header_mask) == 0);
4387 
4388  if (source)
4389  {
4390  if (alloc && (source_header & header_mask) == 0)
4391  {
4392  dest = source;
4393 
4394  // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4396  source_header |= xml_memory_page_contents_shared_mask;
4397  }
4398  else
4399  strcpy_insitu(dest, header, header_mask, source, strlength(source));
4400  }
4401  }
4402 
4404  {
4405  node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4406  node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4407 
4408  for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4409  {
4410  xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4411 
4412  if (da)
4413  {
4414  node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4415  node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4416  }
4417  }
4418  }
4419 
4421  {
4422  xml_allocator& alloc = get_allocator(dn);
4423  xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4424 
4425  node_copy_contents(dn, sn, shared_alloc);
4426 
4427  xml_node_struct* dit = dn;
4428  xml_node_struct* sit = sn->first_child;
4429 
4430  while (sit && sit != sn)
4431  {
4432  // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
4433  if (sit != dn)
4434  {
4435  xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4436 
4437  if (copy)
4438  {
4439  node_copy_contents(copy, sit, shared_alloc);
4440 
4441  if (sit->first_child)
4442  {
4443  dit = copy;
4444  sit = sit->first_child;
4445  continue;
4446  }
4447  }
4448  }
4449 
4450  // continue to the next node
4451  do
4452  {
4453  if (sit->next_sibling)
4454  {
4455  sit = sit->next_sibling;
4456  break;
4457  }
4458 
4459  sit = sit->parent;
4460  dit = dit->parent;
4461  }
4462  while (sit != sn);
4463  }
4464  }
4465 
4466  PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4467  {
4468  xml_allocator& alloc = get_allocator(da);
4469  xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4470 
4471  node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4472  node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4473  }
4474 
4475  inline bool is_text_node(xml_node_struct* node)
4476  {
4477  xml_node_type type = PUGI__NODETYPE(node);
4478 
4479  return type == node_pcdata || type == node_cdata;
4480  }
4481 
4482  // get value with conversion functions
4483  template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
4484  {
4485  U result = 0;
4486  const char_t* s = value;
4487 
4488  while (PUGI__IS_CHARTYPE(*s, ct_space))
4489  s++;
4490 
4491  bool negative = (*s == '-');
4492 
4493  s += (*s == '+' || *s == '-');
4494 
4495  bool overflow = false;
4496 
4497  if (s[0] == '0' && (s[1] | ' ') == 'x')
4498  {
4499  s += 2;
4500 
4501  // since overflow detection relies on length of the sequence skip leading zeros
4502  while (*s == '0')
4503  s++;
4504 
4505  const char_t* start = s;
4506 
4507  for (;;)
4508  {
4509  if (static_cast<unsigned>(*s - '0') < 10)
4510  result = result * 16 + (*s - '0');
4511  else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4512  result = result * 16 + ((*s | ' ') - 'a' + 10);
4513  else
4514  break;
4515 
4516  s++;
4517  }
4518 
4519  size_t digits = static_cast<size_t>(s - start);
4520 
4521  overflow = digits > sizeof(U) * 2;
4522  }
4523  else
4524  {
4525  // since overflow detection relies on length of the sequence skip leading zeros
4526  while (*s == '0')
4527  s++;
4528 
4529  const char_t* start = s;
4530 
4531  for (;;)
4532  {
4533  if (static_cast<unsigned>(*s - '0') < 10)
4534  result = result * 10 + (*s - '0');
4535  else
4536  break;
4537 
4538  s++;
4539  }
4540 
4541  size_t digits = static_cast<size_t>(s - start);
4542 
4543  PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4544 
4545  const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4546  const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4547  const size_t high_bit = sizeof(U) * 8 - 1;
4548 
4549  overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4550  }
4551 
4552  if (negative)
4553  {
4554  // Workaround for crayc++ CC-3059: Expected no overflow in routine.
4555  #ifdef _CRAYC
4556  return (overflow || result > ~minv + 1) ? minv : ~result + 1;
4557  #else
4558  return (overflow || result > 0 - minv) ? minv : 0 - result;
4559  #endif
4560  }
4561  else
4562  return (overflow || result > maxv) ? maxv : result;
4563  }
4564 
4565  PUGI__FN int get_value_int(const char_t* value)
4566  {
4567  return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
4568  }
4569 
4570  PUGI__FN unsigned int get_value_uint(const char_t* value)
4571  {
4572  return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4573  }
4574 
4575  PUGI__FN double get_value_double(const char_t* value)
4576  {
4577  #ifdef PUGIXML_WCHAR_MODE
4578  return wcstod(value, 0);
4579  #else
4580  return strtod(value, 0);
4581  #endif
4582  }
4583 
4584  PUGI__FN float get_value_float(const char_t* value)
4585  {
4586  #ifdef PUGIXML_WCHAR_MODE
4587  return static_cast<float>(wcstod(value, 0));
4588  #else
4589  return static_cast<float>(strtod(value, 0));
4590  #endif
4591  }
4592 
4593  PUGI__FN bool get_value_bool(const char_t* value)
4594  {
4595  // only look at first char
4596  char_t first = *value;
4597 
4598  // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4599  return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4600  }
4601 
4602 #ifdef PUGIXML_HAS_LONG_LONG
4603  PUGI__FN long long get_value_llong(const char_t* value)
4604  {
4605  return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4606  }
4607 
4608  PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4609  {
4610  return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4611  }
4612 #endif
4613 
4614  template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4615  {
4616  char_t* result = end - 1;
4617  U rest = negative ? 0 - value : value;
4618 
4619  do
4620  {
4621  *result-- = static_cast<char_t>('0' + (rest % 10));
4622  rest /= 10;
4623  }
4624  while (rest);
4625 
4626  assert(result >= begin);
4627  (void)begin;
4628 
4629  *result = '-';
4630 
4631  return result + !negative;
4632  }
4633 
4634  // set value with conversion functions
4635  template <typename String, typename Header>
4636  PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4637  {
4638  #ifdef PUGIXML_WCHAR_MODE
4639  char_t wbuf[128];
4640  assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4641 
4642  size_t offset = 0;
4643  for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4644 
4645  return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4646  #else
4647  return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4648  #endif
4649  }
4650 
4651  template <typename U, typename String, typename Header>
4652  PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4653  {
4654  char_t buf[64];
4655  char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4656  char_t* begin = integer_to_string(buf, end, value, negative);
4657 
4658  return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4659  }
4660 
4661  template <typename String, typename Header>
4662  PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
4663  {
4664  char buf[128];
4665  PUGI__SNPRINTF(buf, "%.9g", value);
4666 
4667  return set_value_ascii(dest, header, header_mask, buf);
4668  }
4669 
4670  template <typename String, typename Header>
4671  PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
4672  {
4673  char buf[128];
4674  PUGI__SNPRINTF(buf, "%.17g", value);
4675 
4676  return set_value_ascii(dest, header, header_mask, buf);
4677  }
4678 
4679  template <typename String, typename Header>
4680  PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4681  {
4682  return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4683  }
4684 
4685  PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4686  {
4687  // check input buffer
4688  if (!contents && size) return make_parse_result(status_io_error);
4689 
4690  // get actual encoding
4691  xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4692 
4693  // get private buffer
4694  char_t* buffer = 0;
4695  size_t length = 0;
4696 
4697  if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4698 
4699  // delete original buffer if we performed a conversion
4700  if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4701 
4702  // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4703  if (own || buffer != contents) *out_buffer = buffer;
4704 
4705  // store buffer for offset_debug
4706  doc->buffer = buffer;
4707 
4708  // parse
4709  xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4710 
4711  // remember encoding
4712  res.encoding = buffer_encoding;
4713 
4714  return res;
4715  }
4716 
4717  // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4718  PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4719  {
4720  #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4721  // there are 64-bit versions of fseek/ftell, let's use them
4722  typedef __int64 length_type;
4723 
4724  _fseeki64(file, 0, SEEK_END);
4725  length_type length = _ftelli64(file);
4726  _fseeki64(file, 0, SEEK_SET);
4727  #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4728  // there are 64-bit versions of fseek/ftell, let's use them
4729  typedef off64_t length_type;
4730 
4731  fseeko64(file, 0, SEEK_END);
4732  length_type length = ftello64(file);
4733  fseeko64(file, 0, SEEK_SET);
4734  #else
4735  // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4736  typedef long length_type;
4737 
4738  fseek(file, 0, SEEK_END);
4739  length_type length = ftell(file);
4740  fseek(file, 0, SEEK_SET);
4741  #endif
4742 
4743  // check for I/O errors
4744  if (length < 0) return status_io_error;
4745 
4746  // check for overflow
4747  size_t result = static_cast<size_t>(length);
4748 
4749  if (static_cast<length_type>(result) != length) return status_out_of_memory;
4750 
4751  // finalize
4752  out_result = result;
4753 
4754  return status_ok;
4755  }
4756 
4757  // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4758  PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4759  {
4760  // We only need to zero-terminate if encoding conversion does not do it for us
4761  #ifdef PUGIXML_WCHAR_MODE
4762  xml_encoding wchar_encoding = get_wchar_encoding();
4763 
4764  if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4765  {
4766  size_t length = size / sizeof(char_t);
4767 
4768  static_cast<char_t*>(buffer)[length] = 0;
4769  return (length + 1) * sizeof(char_t);
4770  }
4771  #else
4772  if (encoding == encoding_utf8)
4773  {
4774  static_cast<char*>(buffer)[size] = 0;
4775  return size + 1;
4776  }
4777  #endif
4778 
4779  return size;
4780  }
4781 
4782  PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4783  {
4784  if (!file) return make_parse_result(status_file_not_found);
4785 
4786  // get file size (can result in I/O errors)
4787  size_t size = 0;
4788  xml_parse_status size_status = get_file_size(file, size);
4789  if (size_status != status_ok) return make_parse_result(size_status);
4790 
4791  size_t max_suffix_size = sizeof(char_t);
4792 
4793  // allocate buffer for the whole file
4794  char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4795  if (!contents) return make_parse_result(status_out_of_memory);
4796 
4797  // read file in memory
4798  size_t read_size = fread(contents, 1, size, file);
4799 
4800  if (read_size != size)
4801  {
4802  xml_memory::deallocate(contents);
4804  }
4805 
4806  xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4807 
4808  return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4809  }
4810 
4811  PUGI__FN void close_file(FILE* file)
4812  {
4813  fclose(file);
4814  }
4815 
4816 #ifndef PUGIXML_NO_STL
4817  template <typename T> struct xml_stream_chunk
4818  {
4820  {
4821  void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4822  if (!memory) return 0;
4823 
4824  return new (memory) xml_stream_chunk();
4825  }
4826 
4827  static void destroy(xml_stream_chunk* chunk)
4828  {
4829  // free chunk chain
4830  while (chunk)
4831  {
4832  xml_stream_chunk* next_ = chunk->next;
4833 
4834  xml_memory::deallocate(chunk);
4835 
4836  chunk = next_;
4837  }
4838  }
4839 
4840  xml_stream_chunk(): next(0), size(0)
4841  {
4842  }
4843 
4845  size_t size;
4846 
4847  T data[xml_memory_page_size / sizeof(T)];
4848  };
4849 
4850  template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4851  {
4853 
4854  // read file to a chunk list
4855  size_t total = 0;
4856  xml_stream_chunk<T>* last = 0;
4857 
4858  while (!stream.eof())
4859  {
4860  // allocate new chunk
4862  if (!chunk) return status_out_of_memory;
4863 
4864  // append chunk to list
4865  if (last) last = last->next = chunk;
4866  else chunks.data = last = chunk;
4867 
4868  // read data to chunk
4869  stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4870  chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4871 
4872  // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4873  if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4874 
4875  // guard against huge files (chunk size is small enough to make this overflow check work)
4876  if (total + chunk->size < total) return status_out_of_memory;
4877  total += chunk->size;
4878  }
4879 
4880  size_t max_suffix_size = sizeof(char_t);
4881 
4882  // copy chunk list to a contiguous buffer
4883  char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4884  if (!buffer) return status_out_of_memory;
4885 
4886  char* write = buffer;
4887 
4888  for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4889  {
4890  assert(write + chunk->size <= buffer + total);
4891  memcpy(write, chunk->data, chunk->size);
4892  write += chunk->size;
4893  }
4894 
4895  assert(write == buffer + total);
4896 
4897  // return buffer
4898  *out_buffer = buffer;
4899  *out_size = total;
4900 
4901  return status_ok;
4902  }
4903 
4904  template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4905  {
4906  // get length of remaining data in stream
4907  typename std::basic_istream<T>::pos_type pos = stream.tellg();
4908  stream.seekg(0, std::ios::end);
4909  std::streamoff length = stream.tellg() - pos;
4910  stream.seekg(pos);
4911 
4912  if (stream.fail() || pos < 0) return status_io_error;
4913 
4914  // guard against huge files
4915  size_t read_length = static_cast<size_t>(length);
4916 
4917  if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4918 
4919  size_t max_suffix_size = sizeof(char_t);
4920 
4921  // read stream data into memory (guard against stream exceptions with buffer holder)
4922  auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4923  if (!buffer.data) return status_out_of_memory;
4924 
4925  stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4926 
4927  // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4928  if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4929 
4930  // return buffer
4931  size_t actual_length = static_cast<size_t>(stream.gcount());
4932  assert(actual_length <= read_length);
4933 
4934  *out_buffer = buffer.release();
4935  *out_size = actual_length * sizeof(T);
4936 
4937  return status_ok;
4938  }
4939 
4940  template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4941  {
4942  void* buffer = 0;
4943  size_t size = 0;
4944  xml_parse_status status = status_ok;
4945 
4946  // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4947  if (stream.fail()) return make_parse_result(status_io_error);
4948 
4949  // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4950  if (stream.tellg() < 0)
4951  {
4952  stream.clear(); // clear error flags that could be set by a failing tellg
4953  status = load_stream_data_noseek(stream, &buffer, &size);
4954  }
4955  else
4956  status = load_stream_data_seek(stream, &buffer, &size);
4957 
4958  if (status != status_ok) return make_parse_result(status);
4959 
4960  xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4961 
4962  return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4963  }
4964 #endif
4965 
4966 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
4967  PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4968  {
4969  return _wfopen(path, mode);
4970  }
4971 #else
4972  PUGI__FN char* convert_path_heap(const wchar_t* str)
4973  {
4974  assert(str);
4975 
4976  // first pass: get length in utf8 characters
4977  size_t length = strlength_wide(str);
4978  size_t size = as_utf8_begin(str, length);
4979 
4980  // allocate resulting string
4981  char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4982  if (!result) return 0;
4983 
4984  // second pass: convert to utf8
4985  as_utf8_end(result, size, str, length);
4986 
4987  // zero-terminate
4988  result[size] = 0;
4989 
4990  return result;
4991  }
4992 
4993  PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4994  {
4995  // there is no standard function to open wide paths, so our best bet is to try utf8 path
4996  char* path_utf8 = convert_path_heap(path);
4997  if (!path_utf8) return 0;
4998 
4999  // convert mode to ASCII (we mirror _wfopen interface)
5000  char mode_ascii[4] = {0};
5001  for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
5002 
5003  // try to open the utf8 path
5004  FILE* result = fopen(path_utf8, mode_ascii);
5005 
5006  // free dummy buffer
5007  xml_memory::deallocate(path_utf8);
5008 
5009  return result;
5010  }
5011 #endif
5012 
5013  PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5014  {
5015  if (!file) return false;
5016 
5017  xml_writer_file writer(file);
5018  doc.save(writer, indent, flags, encoding);
5019 
5020  return ferror(file) == 0;
5021  }
5022 
5024  {
5026  char_t* name;
5027 
5028  name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5029  {
5030  node->name = 0;
5031  }
5032 
5034  {
5035  node->name = name;
5036  }
5037  };
5039 
5040 namespace pugi
5041 {
5042  PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5043  {
5044  }
5045 
5046  PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5047  {
5048  size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5049  (void)!result; // unfortunately we can't do proper error handling here
5050  }
5051 
5052 #ifndef PUGIXML_NO_STL
5053  PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5054  {
5055  }
5056 
5057  PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5058  {
5059  }
5060 
5061  PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5062  {
5063  if (narrow_stream)
5064  {
5065  assert(!wide_stream);
5066  narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5067  }
5068  else
5069  {
5070  assert(wide_stream);
5071  assert(size % sizeof(wchar_t) == 0);
5072 
5073  wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5074  }
5075  }
5076 #endif
5077 
5079  {
5080  }
5081 
5083  {
5084  }
5085 
5087  {
5088  return _depth;
5089  }
5090 
5092  {
5093  return true;
5094  }
5095 
5097  {
5098  return true;
5099  }
5100 
5102  {
5103  }
5104 
5106  {
5107  }
5108 
5110  {
5111  }
5112 
5113  PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5114  {
5116  }
5117 
5119  {
5120  return !_attr;
5121  }
5122 
5124  {
5125  return (_attr == r._attr);
5126  }
5127 
5129  {
5130  return (_attr != r._attr);
5131  }
5132 
5134  {
5135  return (_attr < r._attr);
5136  }
5137 
5139  {
5140  return (_attr > r._attr);
5141  }
5142 
5144  {
5145  return (_attr <= r._attr);
5146  }
5147 
5149  {
5150  return (_attr >= r._attr);
5151  }
5152 
5154  {
5156  }
5157 
5159  {
5161  }
5162 
5163  PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5164  {
5165  return (_attr && _attr->value) ? _attr->value + 0 : def;
5166  }
5167 
5168  PUGI__FN int xml_attribute::as_int(int def) const
5169  {
5170  return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5171  }
5172 
5173  PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5174  {
5175  return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5176  }
5177 
5178  PUGI__FN double xml_attribute::as_double(double def) const
5179  {
5180  return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5181  }
5182 
5183  PUGI__FN float xml_attribute::as_float(float def) const
5184  {
5185  return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5186  }
5187 
5188  PUGI__FN bool xml_attribute::as_bool(bool def) const
5189  {
5190  return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5191  }
5192 
5193 #ifdef PUGIXML_HAS_LONG_LONG
5194  PUGI__FN long long xml_attribute::as_llong(long long def) const
5195  {
5196  return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5197  }
5198 
5199  PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5200  {
5201  return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5202  }
5203 #endif
5204 
5206  {
5207  return !_attr;
5208  }
5209 
5210  PUGI__FN const char_t* xml_attribute::name() const
5211  {
5212  return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5213  }
5214 
5215  PUGI__FN const char_t* xml_attribute::value() const
5216  {
5217  return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5218  }
5219 
5221  {
5222  return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5223  }
5224 
5226  {
5227  return _attr;
5228  }
5229 
5231  {
5232  set_value(rhs);
5233  return *this;
5234  }
5235 
5237  {
5238  set_value(rhs);
5239  return *this;
5240  }
5241 
5243  {
5244  set_value(rhs);
5245  return *this;
5246  }
5247 
5249  {
5250  set_value(rhs);
5251  return *this;
5252  }
5253 
5255  {
5256  set_value(rhs);
5257  return *this;
5258  }
5259 
5261  {
5262  set_value(rhs);
5263  return *this;
5264  }
5265 
5267  {
5268  set_value(rhs);
5269  return *this;
5270  }
5271 
5273  {
5274  set_value(rhs);
5275  return *this;
5276  }
5277 
5278 #ifdef PUGIXML_HAS_LONG_LONG
5280  {
5281  set_value(rhs);
5282  return *this;
5283  }
5284 
5285  PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5286  {
5287  set_value(rhs);
5288  return *this;
5289  }
5290 #endif
5291 
5292  PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5293  {
5294  if (!_attr) return false;
5295 
5297  }
5298 
5299  PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5300  {
5301  if (!_attr) return false;
5302 
5304  }
5305 
5307  {
5308  if (!_attr) return false;
5309 
5310  return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5311  }
5312 
5313  PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5314  {
5315  if (!_attr) return false;
5316 
5317  return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5318  }
5319 
5321  {
5322  if (!_attr) return false;
5323 
5324  return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5325  }
5326 
5327  PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5328  {
5329  if (!_attr) return false;
5330 
5331  return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5332  }
5333 
5335  {
5336  if (!_attr) return false;
5337 
5339  }
5340 
5342  {
5343  if (!_attr) return false;
5344 
5346  }
5347 
5349  {
5350  if (!_attr) return false;
5351 
5353  }
5354 
5355 #ifdef PUGIXML_HAS_LONG_LONG
5356  PUGI__FN bool xml_attribute::set_value(long long rhs)
5357  {
5358  if (!_attr) return false;
5359 
5360  return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5361  }
5362 
5363  PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5364  {
5365  if (!_attr) return false;
5366 
5367  return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5368  }
5369 #endif
5370 
5371 #ifdef __BORLANDC__
5372  PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5373  {
5374  return (bool)lhs && rhs;
5375  }
5376 
5377  PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5378  {
5379  return (bool)lhs || rhs;
5380  }
5381 #endif
5382 
5384  {
5385  }
5386 
5388  {
5389  }
5390 
5392  {
5393  }
5394 
5395  PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5396  {
5397  return _root ? unspecified_bool_xml_node : 0;
5398  }
5399 
5401  {
5402  return !_root;
5403  }
5404 
5406  {
5407  return iterator(_root ? _root->first_child + 0 : 0, _root);
5408  }
5409 
5411  {
5412  return iterator(0, _root);
5413  }
5414 
5416  {
5417  return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5418  }
5419 
5421  {
5422  return attribute_iterator(0, _root);
5423  }
5424 
5426  {
5428  }
5429 
5431  {
5433  }
5434 
5436  {
5438  }
5439 
5441  {
5442  return (_root == r._root);
5443  }
5444 
5446  {
5447  return (_root != r._root);
5448  }
5449 
5451  {
5452  return (_root < r._root);
5453  }
5454 
5456  {
5457  return (_root > r._root);
5458  }
5459 
5461  {
5462  return (_root <= r._root);
5463  }
5464 
5466  {
5467  return (_root >= r._root);
5468  }
5469 
5471  {
5472  return !_root;
5473  }
5474 
5475  PUGI__FN const char_t* xml_node::name() const
5476  {
5477  return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5478  }
5479 
5481  {
5482  return _root ? PUGI__NODETYPE(_root) : node_null;
5483  }
5484 
5485  PUGI__FN const char_t* xml_node::value() const
5486  {
5487  return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5488  }
5489 
5490  PUGI__FN xml_node xml_node::child(const char_t* name_) const
5491  {
5492  if (!_root) return xml_node();
5493 
5494  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5495  if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5496 
5497  return xml_node();
5498  }
5499 
5500  PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5501  {
5502  if (!_root) return xml_attribute();
5503 
5505  if (i->name && impl::strequal(name_, i->name))
5506  return xml_attribute(i);
5507 
5508  return xml_attribute();
5509  }
5510 
5511  PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5512  {
5513  if (!_root) return xml_node();
5514 
5515  for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5516  if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5517 
5518  return xml_node();
5519  }
5520 
5522  {
5523  return _root ? xml_node(_root->next_sibling) : xml_node();
5524  }
5525 
5526  PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5527  {
5528  if (!_root) return xml_node();
5529 
5531  if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5532 
5533  return xml_node();
5534  }
5535 
5536  PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5537  {
5538  xml_attribute_struct* hint = hint_._attr;
5539 
5540  // if hint is not an attribute of node, behavior is not defined
5541  assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5542 
5543  if (!_root) return xml_attribute();
5544 
5545  // optimistically search from hint up until the end
5546  for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5547  if (i->name && impl::strequal(name_, i->name))
5548  {
5549  // update hint to maximize efficiency of searching for consecutive attributes
5550  hint_._attr = i->next_attribute;
5551 
5552  return xml_attribute(i);
5553  }
5554 
5555  // wrap around and search from the first attribute until the hint
5556  // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5557  for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5558  if (j->name && impl::strequal(name_, j->name))
5559  {
5560  // update hint to maximize efficiency of searching for consecutive attributes
5561  hint_._attr = j->next_attribute;
5562 
5563  return xml_attribute(j);
5564  }
5565 
5566  return xml_attribute();
5567  }
5568 
5570  {
5571  if (!_root) return xml_node();
5572 
5574  else return xml_node();
5575  }
5576 
5578  {
5579  return _root ? xml_node(_root->parent) : xml_node();
5580  }
5581 
5583  {
5585  }
5586 
5588  {
5589  return xml_text(_root);
5590  }
5591 
5592  PUGI__FN const char_t* xml_node::child_value() const
5593  {
5594  if (!_root) return PUGIXML_TEXT("");
5595 
5596  // element nodes can have value if parse_embed_pcdata was used
5598  return _root->value;
5599 
5600  for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5601  if (impl::is_text_node(i) && i->value)
5602  return i->value;
5603 
5604  return PUGIXML_TEXT("");
5605  }
5606 
5607  PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5608  {
5609  return child(name_).child_value();
5610  }
5611 
5613  {
5614  return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5615  }
5616 
5618  {
5620  }
5621