Orcus
css_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_CSS_PARSER_HPP
9 #define INCLUDED_ORCUS_CSS_PARSER_HPP
10 
11 #define ORCUS_DEBUG_CSS 0
12 
13 #include "parser_global.hpp"
14 #include "css_parser_base.hpp"
15 #include "global.hpp"
16 
17 #include <cassert>
18 #include <algorithm>
19 
20 #if ORCUS_DEBUG_CSS
21 #include <iostream>
22 using std::cout;
23 using std::endl;
24 #endif
25 
26 namespace orcus {
27 
33 {
34 public:
35  void at_rule_name(const char* p, size_t n)
36  {
37  (void)p; (void)n;
38  }
39 
40  void simple_selector_type(const char* p, size_t n)
41  {
42  (void)p; (void)n;
43  }
44 
45  void simple_selector_class(const char* p, size_t n)
46  {
47  (void)p; (void)n;
48  }
49 
50  void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe)
51  {
52  (void)pe;
53  }
54 
55  void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc)
56  {
57  (void)pc;
58  }
59 
60  void simple_selector_id(const char* p, size_t n)
61  {
62  (void)p; (void)n;
63  }
64 
65  void end_simple_selector() {}
66 
67  void end_selector() {}
68 
69  void combinator(orcus::css::combinator_t combinator)
70  {
71  (void)combinator;
72  }
73 
80  void property_name(const char* p, size_t n)
81  {
82  (void)p; (void)n;
83  }
84 
91  void value(const char* p, size_t n)
92  {
93  (void)p; (void)n;
94  }
95 
103  void rgb(uint8_t red, uint8_t green, uint8_t blue)
104  {
105  (void)red; (void)green; (void)blue;
106  }
107 
117  void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
118  {
119  (void)red; (void)green; (void)blue; (void)alpha;
120  }
121 
129  void hsl(uint8_t hue, uint8_t sat, uint8_t light)
130  {
131  (void)hue; (void)sat; (void)light;
132  }
133 
143  void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
144  {
145  (void)hue; (void)sat; (void)light; (void)alpha;
146  }
147 
154  void url(const char* p, size_t n)
155  {
156  (void)p; (void)n;
157  }
158 
162  void begin_parse() {}
163 
167  void end_parse() {}
168 
173  void begin_block() {}
174 
179  void end_block() {}
180 
184  void begin_property() {}
185 
189  void end_property() {}
190 };
191 
192 template<typename _Handler>
194 {
195 public:
196  typedef _Handler handler_type;
197 
198  css_parser(const char* p, size_t n, handler_type& hdl);
199  void parse();
200 
201 private:
202  // Handlers - at the time a handler is called the current position is
203  // expected to point to the first unprocessed non-blank character, and
204  // each handler must set the current position to the next unprocessed
205  // non-blank character when it finishes.
206  void rule();
207  void at_rule_name();
208  void simple_selector_name();
209  void property_name();
210  void property();
211  void quoted_value(char c);
212  void value();
213  void function_value(std::string_view v);
214  void function_rgb(bool alpha);
215  void function_hsl(bool alpha);
216  void function_url();
217  void name_sep();
218  void property_sep();
219  void block();
220 
221  handler_type& m_handler;
222 };
223 
224 template<typename _Handler>
225 css_parser<_Handler>::css_parser(const char* p, size_t n, handler_type& hdl) :
226  css::parser_base(p, n), m_handler(hdl) {}
227 
228 template<typename _Handler>
229 void css_parser<_Handler>::parse()
230 {
231  shrink_stream();
232 
233 #if ORCUS_DEBUG_CSS
234  std::cout << "compressed: '";
235  const char* p = mp_char;
236  for (; p != mp_end; ++p)
237  std::cout << *p;
238  std::cout << "'" << std::endl;
239 #endif
240  m_handler.begin_parse();
241  while (has_char())
242  rule();
243  m_handler.end_parse();
244 }
245 
246 template<typename _Handler>
247 void css_parser<_Handler>::rule()
248 {
249  // <selector name> , ... , <selector name> <block>
250  while (has_char())
251  {
252  if (skip_comment())
253  continue;
254 
255  char c = cur_char();
256  if (is_alpha(c))
257  {
258  simple_selector_name();
259  continue;
260  }
261 
262  switch (c)
263  {
264  case '>':
265  set_combinator(c, css::combinator_t::direct_child);
266  break;
267  case '+':
268  set_combinator(c, css::combinator_t::next_sibling);
269  break;
270  case '.':
271  case '#':
272  case '@':
273  simple_selector_name();
274  break;
275  case ',':
276  name_sep();
277  break;
278  case '{':
279  reset_before_block();
280  block();
281  break;
282  default:
283  css::parse_error::throw_with("rule: failed to parse '", c, "'");
284  }
285  }
286 }
287 
288 template<typename _Handler>
289 void css_parser<_Handler>::at_rule_name()
290 {
291  assert(has_char());
292  assert(cur_char() == '@');
293  next();
294  char c = cur_char();
295  if (!is_alpha(c))
296  throw css::parse_error("at_rule_name: first character of an at-rule name must be an alphabet.");
297 
298  const char* p;
299  size_t len;
300  identifier(p, len);
301  skip_blanks();
302 
303  m_handler.at_rule_name(p, len);
304 #if ORCUS_DEBUG_CSS
305  std::string foo(p, len);
306  std::cout << "at-rule name: " << foo.c_str() << std::endl;
307 #endif
308 }
309 
310 template<typename _Handler>
311 void css_parser<_Handler>::simple_selector_name()
312 {
313  assert(has_char());
314  char c = cur_char();
315  if (c == '@')
316  {
317  // This is the name of an at-rule.
318  at_rule_name();
319  return;
320  }
321 
322  if (m_simple_selector_count)
323  {
324 #if ORCUS_DEBUG_CSS
325  cout << "combinator: " << m_combinator << endl;
326 #endif
327  m_handler.combinator(m_combinator);
328  m_combinator = css::combinator_t::descendant;
329  }
330  assert(is_alpha(c) || c == '.' || c == '#');
331 
332  const char* p = nullptr;
333  size_t n = 0;
334 
335 #if ORCUS_DEBUG_CSS
336  cout << "simple_selector_name: (" << m_simple_selector_count << ")";
337 #endif
338 
339  if (c != '.' && c != '#')
340  {
341  identifier(p, n);
342 #if ORCUS_DEBUG_CSS
343  std::string s(p, n);
344  cout << " type=" << s;
345 #endif
346  m_handler.simple_selector_type(p, n);
347  }
348 
349  bool in_loop = true;
350  while (in_loop && has_char())
351  {
352  switch (cur_char())
353  {
354  case '.':
355  {
356  next();
357  identifier(p, n);
358  m_handler.simple_selector_class(p, n);
359 #if ORCUS_DEBUG_CSS
360  std::string s(p, n);
361  std::cout << " class=" << s;
362 #endif
363  }
364  break;
365  case '#':
366  {
367  next();
368  identifier(p, n);
369  m_handler.simple_selector_id(p, n);
370 #if ORCUS_DEBUG_CSS
371  std::string s(p, n);
372  std::cout << " id=" << s;
373 #endif
374  }
375  break;
376  case ':':
377  {
378  // This could be either a pseudo element or pseudo class.
379  next();
380  if (cur_char() == ':')
381  {
382  // pseudo element.
383  next();
384  identifier(p, n);
385  css::pseudo_element_t elem = css::to_pseudo_element({p, n});
386  if (!elem)
387  css::parse_error::throw_with(
388  "selector_name: unknown pseudo element '", p, n, "'");
389 
390  m_handler.simple_selector_pseudo_element(elem);
391  }
392  else
393  {
394  // pseudo class (or pseudo element in the older version of CSS).
395  identifier(p, n);
396  css::pseudo_class_t pc = css::to_pseudo_class({p, n});
397  if (!pc)
398  css::parse_error::throw_with(
399  "selector_name: unknown pseudo class '", p, n, "'");
400 
401  m_handler.simple_selector_pseudo_class(pc);
402  }
403  }
404  break;
405  default:
406  in_loop = false;
407  }
408  }
409 
410  m_handler.end_simple_selector();
411  skip_comments_and_blanks();
412 
413  ++m_simple_selector_count;
414 
415 #if ORCUS_DEBUG_CSS
416  std::cout << std::endl;
417 #endif
418 }
419 
420 template<typename _Handler>
421 void css_parser<_Handler>::property_name()
422 {
423  // <identifier>
424 
425  assert(has_char());
426  char c = cur_char();
427  if (!is_alpha(c) && c != '.')
428  css::parse_error::throw_with(
429  "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'");
430 
431  const char* p;
432  size_t len;
433  identifier(p, len);
434  skip_comments_and_blanks();
435 
436  m_handler.property_name(p, len);
437 #if ORCUS_DEBUG_CSS
438  std::string foo(p, len);
439  std::cout << "property name: " << foo.c_str() << std::endl;
440 #endif
441 }
442 
443 template<typename _Handler>
444 void css_parser<_Handler>::property()
445 {
446  // <property name> : <value> , ... , <value>
447 
448  m_handler.begin_property();
449  property_name();
450  if (cur_char() != ':')
451  throw css::parse_error("property: ':' expected.");
452  next();
453  skip_comments_and_blanks();
454 
455  bool in_loop = true;
456  while (in_loop && has_char())
457  {
458  value();
459  char c = cur_char();
460  switch (c)
461  {
462  case ',':
463  {
464  // separated by commas.
465  next();
466  skip_comments_and_blanks();
467  }
468  break;
469  case ';':
470  case '}':
471  in_loop = false;
472  break;
473  default:
474  ;
475  }
476  }
477 
478  skip_comments_and_blanks();
479  m_handler.end_property();
480 }
481 
482 template<typename _Handler>
483 void css_parser<_Handler>::quoted_value(char c)
484 {
485  // Parse until the the end quote is reached.
486  const char* p = nullptr;
487  size_t len = 0;
488  literal(p, len, c);
489  next();
490  skip_blanks();
491 
492  m_handler.value(p, len);
493 #if ORCUS_DEBUG_CSS
494  std::string foo(p, len);
495  std::cout << "quoted value: " << foo.c_str() << std::endl;
496 #endif
497 }
498 
499 template<typename _Handler>
500 void css_parser<_Handler>::value()
501 {
502  assert(has_char());
503  char c = cur_char();
504  if (c == '"' || c == '\'')
505  {
506  quoted_value(c);
507  return;
508  }
509 
510  std::string_view v = parse_value();
511  if (v.empty())
512  return;
513 
514  if (cur_char() == '(')
515  {
516  function_value(v);
517  return;
518  }
519 
520  m_handler.value(v.data(), v.size());
521 
522  skip_comments_and_blanks();
523 
524 #if ORCUS_DEBUG_CSS
525  std::cout << "value: " << v << std::endl;
526 #endif
527 }
528 
529 template<typename _Handler>
530 void css_parser<_Handler>::function_value(std::string_view v)
531 {
532  assert(cur_char() == '(');
533  css::property_function_t func = css::to_property_function(v);
534  if (func == css::property_function_t::unknown)
535  css::parse_error::throw_with("function_value: unknown function '", v, "'");
536 
537  // Move to the first character of the first argument.
538  next();
539  skip_comments_and_blanks();
540 
541  switch (func)
542  {
543  case css::property_function_t::rgb:
544  function_rgb(false);
545  break;
546  case css::property_function_t::rgba:
547  function_rgb(true);
548  break;
549  case css::property_function_t::hsl:
550  function_hsl(false);
551  break;
552  case css::property_function_t::hsla:
553  function_hsl(true);
554  break;
555  case css::property_function_t::url:
556  function_url();
557  break;
558  default:
559  css::parse_error::throw_with("function_value: unhandled function '", v, "'");
560  }
561 
562  char c = cur_char();
563  if (c != ')')
564  css::parse_error::throw_with("function_value: ')' expected but '", c, "' found.");
565 
566  next();
567  skip_comments_and_blanks();
568 }
569 
570 template<typename _Handler>
571 void css_parser<_Handler>::function_rgb(bool alpha)
572 {
573  // rgb(num, num, num) rgba(num, num, num, float)
574 
575  uint8_t vals[3];
576  uint8_t* p = vals;
577  const uint8_t* plast = p + 2;
578  char c = 0;
579 
580  for (; ; ++p)
581  {
582  *p = parse_uint8();
583 
584  skip_comments_and_blanks();
585 
586  if (p == plast)
587  break;
588 
589  c = cur_char();
590 
591  if (c != ',')
592  css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
593 
594  next();
595  skip_comments_and_blanks();
596  }
597 
598  if (alpha)
599  {
600  c = cur_char();
601  if (c != ',')
602  css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
603 
604  next();
605  skip_comments_and_blanks();
606 
607  double alpha_val = parse_double_or_throw();
608 
609  alpha_val = std::clamp(alpha_val, 0.0, 1.0);
610  m_handler.rgba(vals[0], vals[1], vals[2], alpha_val);
611  }
612  else
613  m_handler.rgb(vals[0], vals[1], vals[2]);
614 
615 #if ORCUS_DEBUG_CSS
616  std::cout << "rgb";
617  if (alpha)
618  std::cout << 'a';
619  std::cout << '(';
620  p = vals;
621  const uint8_t* pend = plast + 1;
622  for (; p != pend; ++p)
623  std::cout << ' ' << (int)*p;
624  std::cout << " )" << std::endl;
625 #endif
626 }
627 
628 template<typename _Handler>
629 void css_parser<_Handler>::function_hsl(bool alpha)
630 {
631  // hsl(num, percent, percent) hsla(num, percent, percent, float)
632 
633  double hue = parse_double_or_throw(); // casted to uint8_t eventually.
634  hue = std::clamp(hue, 0.0, 360.0);
635  skip_comments_and_blanks();
636 
637  char c = cur_char();
638  if (c != ',')
639  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
640 
641  next();
642  skip_comments_and_blanks();
643 
644  double sat = parse_percent();
645  sat = std::clamp(sat, 0.0, 100.0);
646  skip_comments_and_blanks();
647 
648  c = cur_char();
649  if (c != ',')
650  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
651 
652  next();
653  skip_comments_and_blanks();
654 
655  double light = parse_percent();
656  light = std::clamp(light, 0.0, 100.0);
657  skip_comments_and_blanks();
658 
659  if (!alpha)
660  {
661  m_handler.hsl(hue, sat, light);
662  return;
663  }
664 
665  c = cur_char();
666  if (c != ',')
667  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
668 
669  next();
670  skip_comments_and_blanks();
671 
672  double alpha_val = parse_double_or_throw();
673  alpha_val = std::clamp(alpha_val, 0.0, 1.0);
674  skip_comments_and_blanks();
675  m_handler.hsla(hue, sat, light, alpha_val);
676 }
677 
678 template<typename _Handler>
679 void css_parser<_Handler>::function_url()
680 {
681  char c = cur_char();
682 
683  if (c == '"' || c == '\'')
684  {
685  // Quoted URL value.
686  const char* p;
687  size_t len;
688  literal(p, len, c);
689  next();
690  skip_comments_and_blanks();
691  m_handler.url(p, len);
692 #if ORCUS_DEBUG_CSS
693  std::cout << "url(" << std::string(p, len) << ")" << std::endl;
694 #endif
695  return;
696  }
697 
698  // Unquoted URL value.
699  const char* p;
700  size_t len;
701  skip_to_or_blank(p, len, ORCUS_ASCII(")"));
702  skip_comments_and_blanks();
703  m_handler.url(p, len);
704 #if ORCUS_DEBUG_CSS
705  std::cout << "url(" << std::string(p, len) << ")" << std::endl;
706 #endif
707 }
708 
709 template<typename _Handler>
710 void css_parser<_Handler>::name_sep()
711 {
712  assert(cur_char() == ',');
713 #if ORCUS_DEBUG_CSS
714  std::cout << "," << std::endl;
715 #endif
716  next();
717  skip_blanks();
718  m_handler.end_selector();
719 }
720 
721 template<typename _Handler>
722 void css_parser<_Handler>::property_sep()
723 {
724 #if ORCUS_DEBUG_CSS
725  std::cout << ";" << std::endl;
726 #endif
727  next();
728  skip_comments_and_blanks();
729 }
730 
731 template<typename _Handler>
732 void css_parser<_Handler>::block()
733 {
734  // '{' <property> ';' ... ';' <property> ';'(optional) '}'
735 
736  assert(cur_char() == '{');
737 #if ORCUS_DEBUG_CSS
738  std::cout << "{" << std::endl;
739 #endif
740  m_handler.end_selector();
741  m_handler.begin_block();
742 
743  next();
744  skip_comments_and_blanks();
745 
746  // parse properties.
747  while (has_char())
748  {
749  property();
750  if (cur_char() != ';')
751  break;
752  property_sep();
753  if (cur_char() == '}')
754  // ';' after the last property. This is optional but allowed.
755  break;
756  }
757 
758  if (cur_char() != '}')
759  throw css::parse_error("block: '}' expected.");
760 
761  m_handler.end_block();
762 
763  next();
764  skip_comments_and_blanks();
765 
766 #if ORCUS_DEBUG_CSS
767  std::cout << "}" << std::endl;
768 #endif
769 }
770 
771 }
772 
773 #endif
774 
775 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: css_parser_base.hpp:32
Definition: css_parser.hpp:33
void end_parse()
Definition: css_parser.hpp:167
void end_block()
Definition: css_parser.hpp:179
void hsl(uint8_t hue, uint8_t sat, uint8_t light)
Definition: css_parser.hpp:129
void value(const char *p, size_t n)
Definition: css_parser.hpp:91
void end_property()
Definition: css_parser.hpp:189
void begin_parse()
Definition: css_parser.hpp:162
void begin_block()
Definition: css_parser.hpp:173
void url(const char *p, size_t n)
Definition: css_parser.hpp:154
void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
Definition: css_parser.hpp:117
void property_name(const char *p, size_t n)
Definition: css_parser.hpp:80
void rgb(uint8_t red, uint8_t green, uint8_t blue)
Definition: css_parser.hpp:103
void begin_property()
Definition: css_parser.hpp:184
void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
Definition: css_parser.hpp:143
Definition: css_parser.hpp:194
Definition: parser_base.hpp:41