bes  Updated for version 3.20.10
DmrppParserSax2.h
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2012 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #ifndef dmrpp_parser_sax2_h
27 #define dmrpp_parser_sax2_h
28 
29 #define ATTR 1
30 
31 #include <cstring>
32 
33 #include <memory>
34 #include <string>
35 #include <iostream>
36 #include <map>
37 #include <unordered_map>
38 #include <stack>
39 
40 #include <libxml/parserInternals.h>
41 
42 #include <libdap/Type.h> // from libdap
43 #include "BESRegex.h"
44 #include "url_impl.h"
45 #include "EffectiveUrlCache.h"
46 
47 #define CRLF "\r\n"
48 #define D4_PARSE_BUFF_SIZE 1048576
49 
50 namespace libdap {
51 class DMR;
52 class BaseType;
53 class D4BaseTypeFactory;
54 class D4Group;
55 class D4Attributes;
56 class D4EnumDef;
57 class D4Dimension;
58 }
59 
60 namespace dmrpp {
67 {
68 private:
71  enum ParseState {
72  parser_start,
73 
74  inside_dataset,
75 
76  // inside_group is the state just after parsing the start of a Group element.
77  inside_group,
78 
79  inside_attribute_container,
80  inside_attribute,
81  inside_attribute_value,
82  inside_other_xml_attribute,
83 
84  inside_enum_def,
85  inside_enum_const,
86 
87  inside_dim_def,
88 
89  // This covers Byte, ..., Url, Opaque
90  inside_simple_type,
91 
92  // inside_array,
93  inside_dim,
94  inside_map,
95 
96  inside_constructor,
97 
98  not_dap4_element,
99  inside_dmrpp_object,
100  inside_dmrpp_chunkDimensionSizes_element,
101  inside_dmrpp_compact_element,
102 
103  parser_unknown,
104  parser_error,
105  parser_fatal_error,
106 
107  parser_end
108  };
109 
110  char d_parse_buffer[D4_PARSE_BUFF_SIZE+1]; // Buff size plus one byte for NULL termination.
111 
112  xmlSAXHandler dmrpp_sax_parser;
113 
114  // The results of the parse operation are stored in these fields.
115  // This is passed into the parser using the intern() methods.
116  libdap::DMR *d_dmr; // dump DMR here
117  libdap::DMR *dmr() const { return d_dmr; }
118 
119  // These stacks hold the state of the parse as it progresses.
120  std::stack<ParseState> s; // Current parse state
121  void push_state(DmrppParserSax2::ParseState state) { s.push(state); }
122  DmrppParserSax2::ParseState get_state() const { return s.top(); }
123  void pop_state() { s.pop(); }
124  bool empty_state() const { return s.empty(); }
125 
126  std::stack<libdap::BaseType*> btp_stack; // current variable(s)
127  void push_basetype(libdap::BaseType *btp) { btp_stack.push(btp); }
128  libdap::BaseType *top_basetype() const { return btp_stack.top(); }
129  void pop_basetype() { btp_stack.pop(); }
130  bool empty_basetype() const { return btp_stack.empty(); }
131 
132  std::stack<libdap::D4Group*> grp_stack; // current groups(s)
133  void push_group(libdap::D4Group *grp) { grp_stack.push(grp); }
134  libdap::D4Group *top_group() const { return grp_stack.top(); }
135  void pop_group() { grp_stack.pop(); }
136  bool empty_group() const { return grp_stack.empty(); }
137 
138  std::stack<libdap::D4Attributes*> d_attrs_stack; // DAP4 Attributes
139  void push_attributes(libdap::D4Attributes *attr) { d_attrs_stack.push(attr); }
140  libdap::D4Attributes *top_attributes() const { return d_attrs_stack.top(); }
141  void pop_attributes() { d_attrs_stack.pop(); }
142  bool empty_attributes() const { return d_attrs_stack.empty(); }
143 
144  libdap::D4EnumDef *d_enum_def;
145  libdap::D4EnumDef *enum_def();
146  void clear_enum_def() { d_enum_def = 0; }
147 
148  libdap::D4Dimension *d_dim_def;
149  libdap::D4Dimension *dim_def();
150  void clear_dim_def() { d_dim_def = 0; }
151 
152  // Accumulate stuff inside an 'OtherXML' DAP attribute here
153  std::string other_xml;
154 
155  // When we're parsing unknown XML, how deeply is it nested? This is used
156  // for the OtherXML DAP attributes.
157  unsigned int other_xml_depth;
158  unsigned int unknown_depth;
159 
160  // These are used for processing errors.
161  std::string error_msg; // Error message(s), if any.
162  xmlParserCtxtPtr context; // used for error message line numbers
163 
164  // These hold temporary values read during the parse.
165  std::string dods_attr_name; // DAP4 attributes, not XML attributes
166  std::string dods_attr_type; // ... not XML ...
167  std::string char_data; // char data in value elements; null after use
168  std::string root_ns; // What is the namespace of the root node (Group)
169 
170  bool d_strict;
171 
172  std::shared_ptr<http::url> dmrpp_dataset_href;
173 
174  class XMLAttribute {
175  public:
176  std::string prefix;
177  std::string nsURI;
178  std::string value;
179 
180  void clone(const XMLAttribute &src) {
181  prefix = src.prefix;
182  nsURI = src.nsURI;
183  value = src.value;
184  }
185 
186  XMLAttribute() : prefix(""), nsURI(""), value("") {}
187  XMLAttribute(const std::string &p, const std::string &ns, const std::string &v)
188  : prefix(p), nsURI(ns), value(v) {}
189  // 'attributes' as passed from libxml2 is a five element array but this
190  // ctor gets the back four elements.
191  XMLAttribute(const xmlChar **attributes/*[4]*/) {
192  prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
193  nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
194  value = std::string((const char *)attributes[2], (const char *)attributes[3]);
195  }
196  XMLAttribute(const XMLAttribute &rhs) {
197  clone(rhs);
198  }
199  XMLAttribute &operator=(const XMLAttribute &rhs) {
200  if (this == &rhs)
201  return *this;
202  clone(rhs);
203  return *this;
204  }
205  };
206 
207  typedef std::unordered_map<std::string, XMLAttribute> XMLAttrMap;
208  XMLAttrMap xml_attrs; // dump XML attributes here
209 
210  XMLAttrMap::iterator xml_attr_begin() { return xml_attrs.begin(); }
211 
212  XMLAttrMap::iterator xml_attr_end() { return xml_attrs.end(); }
213 
214  std::map<std::string, std::string> namespace_table;
215 
216  void cleanup_parse();
217 
224 #if 0
225  void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
226 #endif
227  std::string get_attribute_val(const std::string &name, const xmlChar **attributes, int num_attributes);
228  void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
229  bool check_required_attribute(const std::string &attr);
230  bool check_required_attribute(const std::string &attr, const xmlChar **attributes, int num_attributes);
231  bool check_attribute(const std::string & attr);
232  bool check_attribute(const std::string &name, const xmlChar **attributes, int num_attributes);
233  void process_variable_helper(libdap::Type t, ParseState s, const xmlChar **attrs, int nb_attributes);
234 
235  void process_enum_const_helper(const xmlChar **attrs, int nb_attributes);
236  void process_enum_def_helper(const xmlChar **attrs, int nb_attributes);
237 
238  bool process_dmrpp_compact_start(const char *name);
239  void process_dmrpp_compact_end(const char *localname);
240  bool process_dimension(const char *name, const xmlChar **attrs, int nb_attrs);
241  bool process_dimension_def(const char *name, const xmlChar **attrs, int nb_attrs);
242  bool process_map(const char *name, const xmlChar **attrs, int nb_attributes);
243  bool process_attribute(const char *name, const xmlChar **attrs, int nb_attributes);
244  bool process_variable(const char *name, const xmlChar **attrs, int nb_attributes);
245  bool process_group(const char *name, const xmlChar **attrs, int nb_attributes);
246  bool process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes);
247  bool process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes);
248  bool process_dmrpp_object(const char *name, const xmlChar **attrs, int nb_attributes);
249 
250  void finish_variable(const char *tag, libdap::Type t, const char *expected);
252 
253  friend class DmrppParserSax2Test;
254 
255 public:
256  DmrppParserSax2() :
257  d_dmr(0), d_enum_def(0), d_dim_def(0),
258  other_xml(""), other_xml_depth(0), unknown_depth(0),
259  error_msg(""), context(0),
260  dods_attr_name(""), dods_attr_type(""),
261  char_data(""), root_ns(""), d_strict(true),
262  dmrpp_dataset_href(nullptr)
263  {
264  //xmlSAXHandler ddx_sax_parser;
265  memset(&dmrpp_sax_parser, 0, sizeof(xmlSAXHandler));
266 
267  dmrpp_sax_parser.getEntity = &DmrppParserSax2::dmr_get_entity;
268  dmrpp_sax_parser.startDocument = &DmrppParserSax2::dmr_start_document;
269  dmrpp_sax_parser.endDocument = &DmrppParserSax2::dmr_end_document;
270  dmrpp_sax_parser.characters = &DmrppParserSax2::dmr_get_characters;
271  dmrpp_sax_parser.ignorableWhitespace = &DmrppParserSax2::dmr_ignoreable_whitespace;
272  dmrpp_sax_parser.cdataBlock = &DmrppParserSax2::dmr_get_cdata;
273  dmrpp_sax_parser.warning = &DmrppParserSax2::dmr_error;
274  dmrpp_sax_parser.error = &DmrppParserSax2::dmr_error;
275  dmrpp_sax_parser.fatalError = &DmrppParserSax2::dmr_fatal_error;
276  dmrpp_sax_parser.initialized = XML_SAX2_MAGIC;
277  dmrpp_sax_parser.startElementNs = &DmrppParserSax2::dmr_start_element;
278  dmrpp_sax_parser.endElementNs = &DmrppParserSax2::dmr_end_element;
279  }
280 
281  ~DmrppParserSax2(){}
282 
283  void intern(std::istream &f, libdap::DMR *dest_dmr);
284  void intern(const std::string &document, libdap::DMR *dest_dmr);
285  void intern(const char *buffer, int size, libdap::DMR *dest_dmr);
286 
299  void set_strict(bool s) { d_strict = s; }
303  bool get_strict() const { return d_strict; }
306  static void dmr_start_document(void *parser);
307  static void dmr_end_document(void *parser);
308 
309  static void dmr_start_element(void *parser,
310  const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
311  int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
312  int nb_defaulted, const xmlChar **attributes);
313  static void dmr_end_element(void *parser, const xmlChar *localname,
314  const xmlChar *prefix, const xmlChar *URI);
315 
316  static void dmr_get_characters(void *parser, const xmlChar *ch, int len);
317  static void dmr_ignoreable_whitespace(void *parser,
318  const xmlChar * ch, int len);
319  static void dmr_get_cdata(void *parser, const xmlChar *value, int len);
320 
321  static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name);
322  static void dmr_fatal_error(void *parser, const char *msg, ...);
323  static void dmr_error(void *parser, const char *msg, ...);
324 
325  };
326 
327 } // namespace dmrpp
328 
329 #endif // dmrpp_parser_sax2_h
void intern(std::istream &f, libdap::DMR *dest_dmr)
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
bool get_strict() const
Get the setting of the 'strict' mode.
void set_strict(bool s)
Set the 'strict' mode to true or false.
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_end_document(void *parser)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
static void dmr_fatal_error(void *parser, const char *msg,...)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
static void dmr_start_document(void *parser)
Type
Type of JSON value.
Definition: rapidjson.h:664