SciLex
A header-only C++20 lexer built on REAL
Loading...
Searching...
No Matches
layout.hpp
Go to the documentation of this file.
1
35#ifndef SCILEX_LAYOUT_HPP
36#define SCILEX_LAYOUT_HPP
37
38#include <cstddef>
39#include <limits>
40#include <span>
41#include <stdexcept>
42#include <string>
43#include <vector>
44
45#include "token.hpp"
46
47namespace scilex {
48
50 inline constexpr int newline {std::numeric_limits<int>::min() + 1};
52 inline constexpr int indent {std::numeric_limits<int>::min() + 2};
54 inline constexpr int dedent {std::numeric_limits<int>::min() + 3};
55
56 // The reserved kinds (this family plus end_of_input and error) must stay mutually distinct — they
57 // share the low end of the int range, and a collision would make two reserved kinds indistinguishable.
58 static_assert(end_of_input != newline && newline != indent && indent != dedent
59 && end_of_input != error && newline != error && indent != error && dedent != error,
60 "reserved token kinds must be distinct");
61
65 class layout_error : public std::runtime_error
66 {
67 public:
68
70 layout_error(const std::string& message,
72 : std::runtime_error(message),
74 {}
75
77 [[nodiscard]] position where() const noexcept
78 {
79 return where_;
80 }
81
82 private:
83
85 };
86
102 [[nodiscard]] inline std::vector<token> layout(std::span<const token> tokens,
103 const std::vector<bool>& mode_significant = {})
104 {
105 std::vector<token> out;
106 std::vector<std::size_t> levels {0};
107 bool started {false};
108 std::size_t previous_line {0}; // last *significant* line seen
109 position end_position {0, 1, 1};
110
111 for (const token& current : tokens) {
112 if (current.kind == end_of_input) {
113 end_position = current.start; // remember; emit our own terminal at the end
114 continue;
115 }
116 // A token shapes layout unless its mode is marked insignificant. An empty
117 // policy makes every token significant — identical to the positional pass.
118 const bool significant {mode_significant.empty()
119 || current.mode_id >= mode_significant.size()
120 || mode_significant[current.mode_id]};
121 if (significant && (!started || current.start.line != previous_line)) {
122 if (started) {
123 out.push_back(token {newline, {}, current.start});
124 }
125 const std::size_t width {current.start.column - 1};
126 if (width > levels.back()) {
127 levels.push_back(width);
128 out.push_back(token {indent, {}, current.start});
129 }
130 else {
131 while (width < levels.back()) {
132 levels.pop_back();
133 out.push_back(token {dedent, {}, current.start});
134 }
135 if (width != levels.back()) {
136 throw layout_error("inconsistent indentation", current.start);
137 }
138 }
139 previous_line = current.start.line; // only significant lines advance this
140 started = true;
141 }
142 out.push_back(current); // every token is kept, significant or not
143 }
144
145 if (started) {
146 out.push_back(token {newline, {}, end_position});
147 }
148 while (levels.back() > 0) {
149 levels.pop_back();
150 out.push_back(token {dedent, {}, end_position});
151 }
152 out.push_back(token {end_of_input, {}, end_position});
153 return out;
154 }
155} // namespace scilex
156
157#endif // SCILEX_LAYOUT_HPP
Thrown when a line's indentation matches no enclosing level.
Definition layout.hpp:66
position where() const noexcept
Returns the position of the offending line.
Definition layout.hpp:77
position where_
Where the indentation was inconsistent.
Definition layout.hpp:84
layout_error(const std::string &message, position where)
Builds the error.
Definition layout.hpp:70
The SciLex public API (scilex::lexer, scilex::rule, scilex::token).
Definition layout.hpp:47
constexpr int error
Reserved token kind for a lexical-error run under scilex::error_policy::token.
Definition token.hpp:37
constexpr int indent
Reserved kind: indentation increased (start of a deeper block).
Definition layout.hpp:52
constexpr int dedent
Reserved kind: indentation decreased (end of a block).
Definition layout.hpp:54
std::vector< token > layout(std::span< const token > tokens, const std::vector< bool > &mode_significant={})
Rewrites tokens with NEWLINE / INDENT / DEDENT inserted.
Definition layout.hpp:102
constexpr int newline
Reserved kind: end of a logical line.
Definition layout.hpp:50
constexpr int end_of_input
Reserved token kind for the synthetic end-of-input token.
Definition token.hpp:26
A location in the source text.
Definition token.hpp:48
std::size_t column
1-based byte column within the line.
Definition token.hpp:51
The token produced by the lexer and its source position.