Orcus
sax_ns_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_SAX_NS_PARSER_HPP
9 #define INCLUDED_ORCUS_SAX_NS_PARSER_HPP
10 
11 #include "sax_parser.hpp"
12 #include "xml_namespace.hpp"
13 #include "global.hpp"
14 
15 #include <unordered_set>
16 #include <vector>
17 #include <memory>
18 #include <algorithm>
19 
20 namespace orcus {
21 
23 {
24  xmlns_id_t ns; // element namespace
25  std::string_view ns_alias; // element namespace alias
26  std::string_view name; // element name
27  std::ptrdiff_t begin_pos; // position of the opening brace '<'.
28  std::ptrdiff_t end_pos; // position of the char after the closing brace '>'.
29 };
30 
32 {
33  xmlns_id_t ns; // attribute namespace
34  std::string_view ns_alias; // attribute namespace alias
35  std::string_view name; // attribute name
36  std::string_view value; // attribute value
37  bool transient; // whether or not the attribute value is transient.
38 };
39 
40 namespace __sax {
41 
43 {
44  std::string_view ns;
45  std::string_view name;
46 
47  entity_name(std::string_view _ns, std::string_view _name) :
48  ns(_ns), name(_name) {}
49 
50  bool operator== (const entity_name& other) const
51  {
52  return other.ns == ns && other.name == name;
53  }
54 
55  struct hash
56  {
57  size_t operator() (const entity_name& v) const
58  {
59  std::hash<std::string_view> hasher;
60  return hasher(v.ns) + hasher(v.name);
61  }
62  };
63 };
64 
65 typedef std::unordered_set<std::string_view> ns_keys_type;
66 typedef std::unordered_set<entity_name, entity_name::hash> entity_names_type;
67 
68 struct elem_scope
69 {
70  xmlns_id_t ns;
71  std::string_view name;
72  ns_keys_type ns_keys;
73 };
74 
75 typedef std::vector<std::unique_ptr<elem_scope>> elem_scopes_type;
76 
78 {
79  xmlns_context& m_cxt;
80 public:
81  pop_ns_by_key(xmlns_context& cxt) : m_cxt(cxt) {}
82  void operator() (std::string_view key)
83  {
84  m_cxt.pop(key);
85  }
86 };
87 
88 }
89 
91 {
92 public:
93  void doctype(const orcus::sax::doctype_declaration& /*dtd*/) {}
94 
95  void start_declaration(std::string_view /*decl*/) {}
96 
97  void end_declaration(std::string_view /*decl*/) {}
98 
99  void start_element(const orcus::sax_ns_parser_element& /*elem*/) {}
100 
101  void end_element(const orcus::sax_ns_parser_element& /*elem*/) {}
102 
103  void characters(std::string_view /*val*/, bool /*transient*/) {}
104 
105  void attribute(std::string_view /*name*/, std::string_view /*val*/) {}
106 
107  void attribute(const orcus::sax_ns_parser_attribute& /*attr*/) {}
108 };
109 
113 template<typename _Handler>
115 {
116 public:
117  typedef _Handler handler_type;
118 
119  sax_ns_parser(const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler);
120  sax_ns_parser(const char* content, const size_t size, bool transient_stream,
121  xmlns_context& ns_cxt, handler_type& handler);
122  ~sax_ns_parser();
123 
124  void parse();
125 
126 private:
131  class handler_wrapper
132  {
133  __sax::elem_scopes_type m_scopes;
134  __sax::ns_keys_type m_ns_keys;
135  __sax::entity_names_type m_attrs;
136 
137  sax_ns_parser_element m_elem;
139 
140  xmlns_context& m_ns_cxt;
141  handler_type& m_handler;
142 
143  bool m_declaration;
144 
145  public:
146  handler_wrapper(xmlns_context& ns_cxt, handler_type& handler) : m_ns_cxt(ns_cxt), m_handler(handler), m_declaration(false) {}
147 
148  void doctype(const sax::doctype_declaration& dtd)
149  {
150  m_handler.doctype(dtd);
151  }
152 
153  void start_declaration(std::string_view name)
154  {
155  m_declaration = true;
156  m_handler.start_declaration(name);
157  }
158 
159  void end_declaration(std::string_view name)
160  {
161  m_declaration = false;
162  m_handler.end_declaration(name);
163  }
164 
165  void start_element(const sax::parser_element& elem)
166  {
167  m_scopes.push_back(std::make_unique<__sax::elem_scope>());
168  __sax::elem_scope& scope = *m_scopes.back();
169  scope.ns = m_ns_cxt.get(elem.ns);
170  scope.name = elem.name;
171  scope.ns_keys.swap(m_ns_keys);
172 
173  m_elem.ns = scope.ns;
174  m_elem.ns_alias = elem.ns;
175  m_elem.name = scope.name;
176  m_elem.begin_pos = elem.begin_pos;
177  m_elem.end_pos = elem.end_pos;
178  m_handler.start_element(m_elem);
179 
180  m_attrs.clear();
181  }
182 
183  void end_element(const sax::parser_element& elem)
184  {
185  __sax::elem_scope& scope = *m_scopes.back();
186  if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name)
187  throw sax::malformed_xml_error("mis-matching closing element.", -1);
188 
189  m_elem.ns = scope.ns;
190  m_elem.ns_alias = elem.ns;
191  m_elem.name = scope.name;
192  m_elem.begin_pos = elem.begin_pos;
193  m_elem.end_pos = elem.end_pos;
194  m_handler.end_element(m_elem);
195 
196  // Pop all namespaces declared in this scope.
197  std::for_each(scope.ns_keys.begin(), scope.ns_keys.end(), __sax::pop_ns_by_key(m_ns_cxt));
198 
199  m_scopes.pop_back();
200  }
201 
202  void characters(std::string_view val, bool transient)
203  {
204  m_handler.characters(val, transient);
205  }
206 
207  void attribute(const sax::parser_attribute& attr)
208  {
209  if (m_declaration)
210  {
211  // XML declaration attribute. Pass it through to the handler without namespace.
212  m_handler.attribute(attr.name, attr.value);
213  return;
214  }
215 
216  if (m_attrs.count(__sax::entity_name(attr.ns, attr.name)) > 0)
218  "You can't define two attributes of the same name in the same element.", -1);
219 
220  m_attrs.insert(__sax::entity_name(attr.ns, attr.name));
221 
222  if (attr.ns.empty() && attr.name == "xmlns")
223  {
224  // Default namespace
225  m_ns_cxt.push(std::string_view{}, attr.value);
226  m_ns_keys.insert(std::string_view{});
227  return;
228  }
229 
230  if (attr.ns == "xmlns")
231  {
232  // Namespace alias
233  if (!attr.name.empty())
234  {
235  m_ns_cxt.push(attr.name, attr.value);
236  m_ns_keys.insert(attr.name);
237  }
238  return;
239  }
240 
241  m_attr.ns = attr.ns.empty() ? XMLNS_UNKNOWN_ID : m_ns_cxt.get(attr.ns);
242  m_attr.ns_alias = attr.ns;
243  m_attr.name = attr.name;
244  m_attr.value = attr.value;
245  m_attr.transient = attr.transient;
246  m_handler.attribute(m_attr);
247  }
248  };
249 
250 private:
251  handler_wrapper m_wrapper;
253 };
254 
255 template<typename _Handler>
257  const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler) :
258  m_wrapper(ns_cxt, handler), m_parser(content, size, m_wrapper)
259 {
260 }
261 
262 template<typename _Handler>
263 sax_ns_parser<_Handler>::sax_ns_parser(
264  const char* content, const size_t size, bool transient_stream, xmlns_context& ns_cxt, handler_type& handler) :
265  m_wrapper(ns_cxt, handler), m_parser(content, size, transient_stream, m_wrapper)
266 {
267 }
268 
269 template<typename _Handler>
270 sax_ns_parser<_Handler>::~sax_ns_parser()
271 {
272 }
273 
274 template<typename _Handler>
275 void sax_ns_parser<_Handler>::parse()
276 {
277  m_parser.parse();
278 }
279 
280 }
281 
282 #endif
283 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: sax_ns_parser.hpp:78
Definition: sax_parser_base.hpp:33
Definition: sax_ns_parser.hpp:91
Definition: sax_ns_parser.hpp:115
Definition: xml_namespace.hpp:82
xmlns_id_t get(std::string_view key) const
Definition: sax_ns_parser.hpp:69
Definition: sax_ns_parser.hpp:56
Definition: sax_ns_parser.hpp:43
Definition: sax_parser_base.hpp:45
Definition: sax_parser_base.hpp:100
Definition: sax_parser_base.hpp:85
Definition: sax_ns_parser.hpp:32
Definition: sax_ns_parser.hpp:23