1
1
package main
2
2
3
3
import (
4
- "bufio"
5
- "bytes"
6
- "io"
7
- "regexp"
8
- "strings"
4
+ "bufio"
5
+ "bytes"
6
+ "io"
7
+ "regexp"
8
+ "strings"
9
9
)
10
10
11
11
// Scanner represents a lexical scanner.
12
12
type Scanner struct {
13
- r * bufio.Reader
13
+ r * bufio.Reader
14
14
}
15
15
16
16
// NewScanner returns a new instance of Scanner.
17
17
func NewScanner (r io.Reader ) * Scanner {
18
- return & Scanner {r : bufio .NewReader (r )}
18
+ return & Scanner {r : bufio .NewReader (r )}
19
19
}
20
20
21
21
// Scan returns the next token and literal value.
22
22
func (s * Scanner ) Scan () (tok Token , lit string ) {
23
- // Read the next rune.
24
- ch := s .read ()
25
-
26
- // If we see whitespace then consume all contiguous whitespace.
27
- // If we see a letter then consume as an ident or reserved word.
28
- // If we see a digit then consume as a number.
29
- if isWhitespace (ch ) {
30
- s .unread ()
31
- return s .scanWhitespace ()
32
- } else if ( ch == eof ) {
33
- return EOF , ""
34
- } else if ( ch == ',' ) {
35
- return COMMA , string (ch )
36
- } else if ( isValidCh (ch ) ) {
37
- s .unread ()
38
- return s .scanToken ()
39
- }
40
-
41
- return ILLEGAL , string (ch )
23
+ // Read the next rune.
24
+ ch := s .read ()
25
+
26
+ // If we see whitespace then consume all contiguous whitespace.
27
+ // If we see a letter then consume as an ident or reserved word.
28
+ // If we see a digit then consume as a number.
29
+ if isWhitespace (ch ) {
30
+ s .unread ()
31
+ return s .scanWhitespace ()
32
+ } else if ch == eof {
33
+ return EOF , ""
34
+ } else if ch == ',' {
35
+ return COMMA , string (ch )
36
+ } else if isValidCh (ch ) {
37
+ s .unread ()
38
+ return s .scanToken ()
39
+ }
40
+
41
+ return ILLEGAL , string (ch )
42
42
}
43
43
44
44
// scanWhitespace consumes the current rune and all contiguous whitespace.
45
45
func (s * Scanner ) scanWhitespace () (tok Token , lit string ) {
46
- // Create a buffer and read the current character into it.
47
- var buf bytes.Buffer
48
- buf .WriteRune (s .read ())
49
-
50
- // Read every subsequent whitespace character into the buffer.
51
- // Non-whitespace characters and EOF will cause the loop to exit.
52
- for {
53
- if ch := s .read (); ch == eof {
54
- break
55
- } else if ! isWhitespace (ch ) {
56
- s .unread ()
57
- break
58
- } else {
59
- buf .WriteRune (ch )
60
- }
61
- }
62
-
63
- return WS , buf .String ()
46
+ // Create a buffer and read the current character into it.
47
+ var buf bytes.Buffer
48
+ buf .WriteRune (s .read ())
49
+
50
+ // Read every subsequent whitespace character into the buffer.
51
+ // Non-whitespace characters and EOF will cause the loop to exit.
52
+ for {
53
+ if ch := s .read (); ch == eof {
54
+ break
55
+ } else if ! isWhitespace (ch ) {
56
+ s .unread ()
57
+ break
58
+ } else {
59
+ buf .WriteRune (ch )
60
+ }
61
+ }
62
+
63
+ return WS , buf .String ()
64
64
}
65
65
66
66
// scanToken consumes the current rune and all contiguous ident runes.
67
67
func (s * Scanner ) scanToken () (tok Token , lit string ) {
68
- // Create a buffer and read the current character into it.
69
- var buf bytes.Buffer
70
- buf .WriteRune (s .read ())
71
-
72
- // Read every subsequent ident character into the buffer.
73
- // Non-ident characters and EOF will cause the loop to exit.
74
- for {
75
- if ch := s .read (); ch == eof {
76
- break
77
- } else if isWhitespace (ch ) || ! isValidCh (ch ) {
78
- s .unread ()
79
- break
80
- } else {
81
- _ , _ = buf .WriteRune (ch )
82
- }
83
- }
84
-
85
- // If the string matches a keyword then return that keyword.
86
- switch strings .ToUpper (buf .String ()) {
87
- case "MAP" :
88
- return MAP , buf .String ()
89
- case "REDUCE" :
90
- return REDUCE , buf .String ()
91
- case "ON" :
92
- return ON , buf .String ()
93
- case "WHERE" :
94
- return WHERE , buf .String ()
95
- case "AND" :
96
- return AND , buf .String ()
97
- case "IN" :
98
- return IN , buf .String ()
99
- case "SUM" :
100
- return SUM , buf .String ()
101
- }
102
-
103
- // Match operators
104
- switch buf .String () {
105
- case "<" :
106
- return LT , buf .String ()
107
- case "<=" :
108
- return LTE , buf .String ()
109
- case ">" :
110
- return GT , buf .String ()
111
- case ">=" :
112
- return GTE , buf .String ()
113
- case "=" :
114
- return EQ , buf .String ()
115
- case "!=" :
116
- return NOT_EQ , buf .String ()
117
- }
118
-
119
- // Check for string literal or ident
120
- var str string = buf .String ()
121
- if str [0 ] == '"' && str [len (str )- 1 ] == '"' {
122
- str = str [1 :len (str )- 1 ]
123
- return STRING , str
124
- }
125
-
126
- // Check for number
127
- if match , _ := regexp .MatchString ("([0-9.]+)" , buf .String ()); match {
128
- return NUMBER , buf .String ()
129
- }
130
-
131
- return IDENT , buf .String ()
68
+ // Create a buffer and read the current character into it.
69
+ var buf bytes.Buffer
70
+ buf .WriteRune (s .read ())
71
+
72
+ // Read every subsequent ident character into the buffer.
73
+ // Non-ident characters and EOF will cause the loop to exit.
74
+ for {
75
+ if ch := s .read (); ch == eof {
76
+ break
77
+ } else if isWhitespace (ch ) || ! isValidCh (ch ) {
78
+ s .unread ()
79
+ break
80
+ } else {
81
+ _ , _ = buf .WriteRune (ch )
82
+ }
83
+ }
84
+
85
+ // If the string matches a keyword then return that keyword.
86
+ switch strings .ToUpper (buf .String ()) {
87
+ case "MAP" :
88
+ return MAP , buf .String ()
89
+ case "REDUCE" :
90
+ return REDUCE , buf .String ()
91
+ case "ON" :
92
+ return ON , buf .String ()
93
+ case "WHERE" :
94
+ return WHERE , buf .String ()
95
+ case "AND" :
96
+ return AND , buf .String ()
97
+ case "IN" :
98
+ return IN , buf .String ()
99
+ case "SUM" :
100
+ return SUM , buf .String ()
101
+ case "COUNT" :
102
+ return COUNT , buf .String ()
103
+ }
104
+
105
+ // Match operators
106
+ switch buf .String () {
107
+ case "<" :
108
+ return LT , buf .String ()
109
+ case "<=" :
110
+ return LTE , buf .String ()
111
+ case ">" :
112
+ return GT , buf .String ()
113
+ case ">=" :
114
+ return GTE , buf .String ()
115
+ case "=" :
116
+ return EQ , buf .String ()
117
+ case "!=" :
118
+ return NOT_EQ , buf .String ()
119
+ }
120
+
121
+ // Check for string literal or ident
122
+ var str string = buf .String ()
123
+ if str [0 ] == '"' && str [len (str )- 1 ] == '"' {
124
+ str = str [1 : len (str )- 1 ]
125
+ return STRING , str
126
+ }
127
+
128
+ // Check for number
129
+ if match , _ := regexp .MatchString ("([0-9.]+)" , buf .String ()); match {
130
+ return NUMBER , buf .String ()
131
+ }
132
+
133
+ return IDENT , buf .String ()
132
134
}
133
135
134
136
// read reads the next rune from the bufferred reader.
135
137
// Returns the rune(0) if an error occurs (or io.EOF is returned).
136
138
func (s * Scanner ) read () rune {
137
- ch , _ , err := s .r .ReadRune ()
138
- if err != nil {
139
- return eof
140
- }
141
- return ch
139
+ ch , _ , err := s .r .ReadRune ()
140
+ if err != nil {
141
+ return eof
142
+ }
143
+ return ch
142
144
}
143
145
144
146
// unread places the previously read rune back on the reader.
@@ -153,10 +155,12 @@ func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && c
153
155
// isDigit returns true if the rune is a digit.
154
156
func isDigit (ch rune ) bool { return (ch >= '0' && ch <= '9' ) }
155
157
156
- func isOperator (ch rune ) bool { return (ch == '<' || ch == '>' || ch == '=' || ch == '+' || ch == '-' || ch == '/' || ch == '*' ) }
158
+ func isOperator (ch rune ) bool {
159
+ return (ch == '<' || ch == '>' || ch == '=' || ch == '+' || ch == '-' || ch == '/' || ch == '*' )
160
+ }
157
161
158
- func isValidCh (ch rune ) bool {
159
- return isWhitespace (ch ) || isLetter (ch ) || isDigit (ch ) || isOperator (ch ) || ch == '_' || ch == '"' || ch == '.' || ch == '@' || ch == '!'
162
+ func isValidCh (ch rune ) bool {
163
+ return isWhitespace (ch ) || isLetter (ch ) || isDigit (ch ) || isOperator (ch ) || ch == '_' || ch == '"' || ch == '.' || ch == '@' || ch == '!'
160
164
}
161
165
162
166
// eof represents a marker rune for the end of the reader.
0 commit comments