aboutsummaryrefslogtreecommitdiff
path: root/cutl/xml/parser.hxx
blob: b61d26dd3c8f81d09c4c71ec9a6e607ebdd7af1f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
// file      : cutl/xml/parser.hxx
// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC
// license   : MIT; see accompanying LICENSE file

#ifndef CUTL_XML_PARSER_HXX
#define CUTL_XML_PARSER_HXX

#include <string>
#include <vector>
#include <iosfwd>
#include <cstddef> // std::size_t

#include <cutl/details/config.hxx> // LIBCUTL_EXTERNAL_EXPAT

#ifndef LIBCUTL_EXTERNAL_EXPAT
#  include <cutl/details/expat/expat.h>
#else
#  include <expat.h>
#endif

// We only support UTF-8 expat.
//
#ifdef XML_UNICODE
#  error UTF-16 expat (XML_UNICODE defined) is not supported
#endif

#include <cutl/xml/qname.hxx>
#include <cutl/xml/exception.hxx>

#include <cutl/details/export.hxx>

namespace cutl
{
  namespace xml
  {
    struct LIBCUTL_EXPORT parsing: exception
    {
      virtual
      ~parsing () throw ();

      parsing (const std::string& name,
               unsigned long long line,
               unsigned long long column,
               const std::string& description);

      const std::string&
      name () const {return name_;}

      unsigned long long
      line () const {return line_;}

      unsigned long long
      column () const {return column_;}

      const std::string&
      description () const {return description_;}

      virtual const char*
      what () const throw ();

    private:
      std::string name_;
      unsigned long long line_;
      unsigned long long column_;
      std::string description_;
      std::string what_;
    };

    class LIBCUTL_EXPORT parser
    {
    public:
      ~parser ();

      typedef xml::qname qname_type;
      typedef unsigned short feature_type;

      static const feature_type receive_elements = 0x0001;
      static const feature_type receive_characters = 0x0002;
      static const feature_type receive_attributes = 0x0004;
      static const feature_type receive_namespace_decls = 0x0008;

      static const feature_type receive_default = receive_elements |
                                                  receive_characters |
                                                  receive_attributes;

      // Parse std::istream. Name is used in diagnostics to identify the
      // document being parsed. std::ios_base::failure exception is used
      // to report io errors (badbit and failbit).
      //
      parser (std::istream&,
              const std::string& name,
              feature_type = receive_default);

      enum event_type
      {
        // If adding new events, also update the stream insertion operator.
        //
        start_element,
        end_element,
        start_attribute,
        end_attribute,
        characters,
        start_namespace_decl,
        end_namespace_decl,
        eof
      };

      event_type
      next ();

      const qname_type& qname () const {return *pqname_;}

      const std::string& namespace_ () const {return pqname_->namespace_ ();}
      const std::string& name () const {return pqname_->name ();}
      const std::string& prefix () const {return pqname_->prefix ();}

      const std::string& value () const {return *pvalue_;}

      unsigned long long line () const {return line_;}
      unsigned long long column () const {return column_;}

      // Optional content processing.
      //
    public:
      enum content_type
      {
                 //  element   characters  whitespaces
        empty,   //    no          no        ignored
        simple,  //    no          yes       preserved
        complex, //    yes         no        ignored
        mixed    //    yes         yes       preserved
      };

      void
      content (content_type c)
      {
        if (!content_.empty () && content_.back ().depth == depth_)
          content_.back ().content = c;
        else
          content_.push_back (content_entry (depth_, c));
      }

      content_type
      content () const
      {
        return !content_.empty () && content_.back ().depth == depth_
          ? content_.back ().content : mixed;
      }

    private:
      static void XMLCALL
      start_element_ (void*, const XML_Char*, const XML_Char**);

      static void XMLCALL
      end_element_ (void*, const XML_Char*);

      static void XMLCALL
      characters_ (void*, const XML_Char*, int);

      static void XMLCALL
      start_namespace_decl_ (void*, const XML_Char*, const XML_Char*);

      static void XMLCALL
      end_namespace_decl_ (void*, const XML_Char*);

    private:
      event_type
      next_ ();

      void
      handle_error ();

    private:
      std::istream& is_;
      const std::string name_;
      feature_type feature_;

      XML_Parser p_;
      std::size_t depth_;
      event_type event_;
      event_type queue_;

      qname_type qname_;
      std::string value_;

      // These are used to avoid copying when we are handling attributes
      // and namespace decls.
      //
      const qname_type* pqname_;
      const std::string* pvalue_;

      unsigned long long line_;
      unsigned long long column_;

      // Attributes.
      //
      struct attribute
      {
        qname_type qname;
        std::string value;
      };

      typedef std::vector<attribute> attributes;

      attributes attr_;
      attributes::size_type attr_i_; // Index of the current attribute.

      // Namespace declarations.
      //
      typedef std::vector<qname_type> namespace_decls;

      namespace_decls start_ns_;
      namespace_decls::size_type start_ns_i_; // Index of the current decl.

      namespace_decls end_ns_;
      namespace_decls::size_type end_ns_i_; // Index of the current decl.

      // Content.
      //
      struct content_entry
      {
        content_entry (std::size_t d, content_type c)
            : depth (d), content (c) {}

        std::size_t depth;
        content_type content;
      };

      std::vector<content_entry> content_;
    };

    LIBCUTL_EXPORT
    std::ostream&
    operator<< (std::ostream&, parser::event_type);
  }
}

#endif // CUTL_XML_PARSER_HXX