URY playd
C++ minimalist audio player
tokeniser.cpp
Go to the documentation of this file.
1 // This file is part of playd.
2 // playd is licensed under the MIT licence: see LICENSE.txt.
3 
10 #include <algorithm>
11 #include <cassert>
12 #include <cctype>
13 #include <cstdint>
14 
15 #include "response.hpp"
16 
17 #include "tokeniser.hpp"
18 
20  : escape_next(false), in_word(false), quote_type(Tokeniser::QuoteType::NONE)
21 {
22 }
23 
24 std::vector<std::vector<std::string>> Tokeniser::Feed(const std::string &raw)
25 {
26  // The list of ready lines should be cleared by any previous Feed.
27  assert(this->ready_lines.empty());
28 
29  for (char c : raw) {
30  if (this->escape_next) {
31  this->Push(c);
32  continue;
33  }
34 
35  switch (this->quote_type) {
36  case QuoteType::SINGLE:
37  if (c == '\'') {
39  } else {
40  this->Push(c);
41  }
42  break;
43 
44  case QuoteType::DOUBLE:
45  switch (c) {
46  case '\"':
47  this->quote_type =
49  break;
50 
51  case '\\':
52  this->escape_next = true;
53  break;
54 
55  default:
56  this->Push(c);
57  break;
58  }
59  break;
60 
61  case QuoteType::NONE:
62  switch (c) {
63  case '\n':
64  this->Emit();
65  break;
66 
67  case '\'':
68  this->in_word = true;
69  this->quote_type =
71  break;
72 
73  case '\"':
74  this->in_word = true;
75  this->quote_type =
77  break;
78 
79  case '\\':
80  this->escape_next = true;
81  break;
82 
83  default:
84  isspace(c) ? this->EndWord()
85  : this->Push(c);
86  break;
87  }
88  break;
89  }
90  }
91 
92  auto lines = this->ready_lines;
93  this->ready_lines.clear();
94 
95  return lines;
96 }
97 
98 void Tokeniser::Push(const char c)
99 {
100  assert(this->escape_next ||
101  !(this->quote_type == QuoteType::NONE && isspace(c)));
102  this->in_word = true;
103  this->current_word.push_back(c);
104  this->escape_next = false;
105  assert(!this->current_word.empty());
106 }
107 
109 {
110  // Don't add a word unless we're in one.
111  if (!this->in_word) return;
112  this->in_word = false;
113 
114  this->words.push_back(this->current_word);
115 
116  this->current_word.clear();
117 }
118 
120 {
121  // Since we assume these, we don't need to set them later.
122  assert(this->quote_type == QuoteType::NONE);
123  assert(!this->escape_next);
124 
125  // We might still be in a word, in which case we treat the end of a
126  // line as the end of the word too.
127  this->EndWord();
128 
129  this->ready_lines.push_back(this->words);
130 
131  this->words.clear();
132 
133  // The state should now be clean and ready for another command.
134  assert(this->quote_type == QuoteType::NONE);
135  assert(!this->escape_next);
136  assert(this->current_word.empty());
137 }
A string tokeniser.
Definition: tokeniser.hpp:24
bool escape_next
Whether the next character is to be interpreted as an escape code.
Definition: tokeniser.hpp:61
void Push(char c)
Pushes a raw character onto the end of the current word.
Definition: tokeniser.cpp:98
QuoteType quote_type
The type of quotation currently being used in this Tokeniser.
Definition: tokeniser.hpp:67
Tokeniser()
Constructs a new Tokeniser.
Definition: tokeniser.cpp:19
void EndWord()
Finishes the current word, adding it to the tokenised line.
Definition: tokeniser.cpp:108
void Emit()
Finishes the current word and sends the line to the CommandHandler.
Definition: tokeniser.cpp:119
bool in_word
Whether the tokeniser is currently in a word.
Definition: tokeniser.hpp:64
QuoteType
Enumeration of quotation types.
Definition: tokeniser.hpp:42
std::vector< std::vector< std::string > > Feed(const std::string &raw)
Feeds a string into a Tokeniser.
Definition: tokeniser.cpp:24
In single quotes (&#39;&#39;).
std::vector< std::string > words
The current vector of completed, tokenised words.
Definition: tokeniser.hpp:53
Declaration of classes pertaining to responses to the client.
Not currently in a quote pair.
std::vector< std::vector< std::string > > ready_lines
The current vector of completed, tokenised lines.
Definition: tokeniser.hpp:50
Declaration of the Tokeniser class.
In double quotes ("").
std::string current_word
The current, incomplete word to which new characters should be added.
Definition: tokeniser.hpp:57