Orcus
sax_parser_base.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
9 #define INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
10 
11 #include "env.hpp"
12 #include "cell_buffer.hpp"
13 #include "parser_global.hpp"
14 #include "parser_base.hpp"
15 
16 #include <cassert>
17 #include <cstdlib>
18 #include <exception>
19 #include <sstream>
20 #include <memory>
21 
22 #define ORCUS_DEBUG_SAX_PARSER 0
23 
24 #if ORCUS_DEBUG_SAX_PARSER
25 #include <iostream>
26 using std::cout;
27 using std::endl;
28 #endif
29 
30 namespace orcus { namespace sax {
31 
32 class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public ::orcus::parse_error
33 {
34 public:
35  malformed_xml_error() = delete;
36  malformed_xml_error(const std::string& msg, std::ptrdiff_t offset);
37  virtual ~malformed_xml_error() throw();
38 };
39 
45 {
46  enum class keyword_type { dtd_public, dtd_private };
47 
48  keyword_type keyword;
49  std::string_view root_element;
50  std::string_view fpi;
51  std::string_view uri;
52 };
53 
65 ORCUS_PSR_DLLPUBLIC char decode_xml_encoded_char(const char* p, size_t n);
66 
78 ORCUS_PSR_DLLPUBLIC std::string decode_xml_unicode_char(const char* p, size_t n);
79 
85 {
86  std::string_view ns; // element namespace (optional)
87  std::string_view name; // element name
88  std::ptrdiff_t begin_pos; // position of the opening brace '<'.
89  std::ptrdiff_t end_pos; // position of the char after the closing brace '>'.
90 };
91 
100 {
101  std::string_view ns; // attribute namespace (optional)
102  std::string_view name; // attribute name
103  std::string_view value; // attribute value
104  bool transient; // whether or not the attribute value is on a temporary buffer.
105 };
106 
107 class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
108 {
109  struct impl;
110  std::unique_ptr<impl> mp_impl;
111 
112  parser_base() = delete;
113  parser_base(const parser_base&) = delete;
114  parser_base& operator=(const parser_base&) = delete;
115 protected:
116  size_t m_nest_level;
117  size_t m_buffer_pos;
118  bool m_root_elem_open:1;
119 
120 protected:
121  parser_base(const char* content, size_t size, bool transient_stream);
122  ~parser_base();
123 
124  void next_check()
125  {
126  next();
127  if (!has_char())
128  throw malformed_xml_error("xml stream ended prematurely.", offset());
129  }
130 
131  void nest_up() { ++m_nest_level; }
132  void nest_down()
133  {
134  if (m_nest_level == 0)
135  throw malformed_xml_error("incorrect nesting in xml stream", offset());
136 
137  --m_nest_level;
138  }
139 
140  void inc_buffer_pos();
141  void reset_buffer_pos() { m_buffer_pos = 0; }
142 
143  void has_char_throw(const char* msg) const
144  {
145  if (!has_char())
146  throw malformed_xml_error(msg, offset());
147  }
148 
156  inline size_t remains() const
157  {
158 #if ORCUS_DEBUG_SAX_PARSER
159  if (mp_char >= mp_end)
160  throw malformed_xml_error("xml stream ended prematurely.", offset());
161 #endif
162  return mp_end - mp_char;
163  }
164 
165  char cur_char_checked() const
166  {
167  if (!has_char())
168  throw malformed_xml_error("xml stream ended prematurely.", offset());
169 
170  return *mp_char;
171  }
172 
173  char next_and_char()
174  {
175  next();
176 #if ORCUS_DEBUG_SAX_PARSER
177  if (mp_char >= mp_end)
178  throw malformed_xml_error("xml stream ended prematurely.", offset());
179 #endif
180  return *mp_char;
181  }
182 
183  char next_char_checked()
184  {
185  next();
186  if (!has_char())
187  throw malformed_xml_error("xml stream ended prematurely.", offset());
188 
189  return *mp_char;
190  }
191 
192  cell_buffer& get_cell_buffer();
193 
194  void comment();
195 
199  void skip_bom();
200 
201  void expects_next(const char* p, size_t n);
202 
203  void parse_encoded_char(cell_buffer& buf);
204  void value_with_encoded_char(cell_buffer& buf, std::string_view& str, char quote_char);
205 
214  bool value(std::string_view& str, bool decode);
215 
216  void name(std::string_view& str);
217  void element_name(parser_element& elem, std::ptrdiff_t begin_pos);
218  void attribute_name(std::string_view& attr_ns, std::string_view& attr_name);
219  void characters_with_encoded_char(cell_buffer& buf);
220 };
221 
222 }}
223 
224 #endif
225 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: cell_buffer.hpp:22
Definition: parser_base.hpp:27
Definition: parser_base.hpp:41
Definition: sax_parser_base.hpp:33
Definition: sax_parser_base.hpp:108
size_t remains() const
Definition: sax_parser_base.hpp:156
bool value(std::string_view &str, bool decode)
Definition: sax_parser_base.hpp:45
Definition: sax_parser_base.hpp:100
Definition: sax_parser_base.hpp:85