Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

1 using System;

2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5 using System.Threading.Tasks;
6
7 namespace KenChessPGNCoreObjects
8 {
9 public static class PGNTokenizer
10 {
11 public const string lettersAndDigits =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
12
13 /*
14 * Design: The first step in reading a pgn file (or similar content on
Clipboard) is to map the input stream as a stream of tokens.
15 * Consider this method, getTokenizedData, as the lexer which creates the
stream of tokens.
16 * The tokens are defined in the Standard: Portable Game Notation
Specification and Implementation Guide
17 * */
18 /// <summary>
19 /// Tokenizes raw PGN multigame text.
20 /// </summary>
21 /// <param name="entireContent"></param>
22 /// <returns></returns>
23 public static List<PGNToken> tokenizeDataFromRawPGNMultigameText(string
entireContent)
24 {
25 List<PGNToken> tokenizedData = new List<PGNToken>();
26 int ctrGames = 0;
27 int currentPosition = 0; // zero-based index into entireContent
28 char prevChar = '\n';
29 // At this point we are always looking for the START of the next token
30 while (currentPosition < entireContent.Length)
31 {
32 string nextChar = entireContent.Substring(currentPosition, 1);
33 string lookAheadChar = currentPosition < entireContent.Length - 1 ?
entireContent.Substring(currentPosition + 1, 1) : " ";
34
35 // Must check for game termination token before checking for
IntegerToken
36 // Tricky! Returns null if currentPosition is not the start of a Game
Termination Token.
37 PGNToken gameTerminationToken =
getGameTerminationToken(currentPosition, entireContent);
38 if (gameTerminationToken != null)
39 {
40 tokenizedData.Add(gameTerminationToken);
41 currentPosition += gameTerminationToken.tokenContent.Length;
42 ctrGames++;
43 if (ctrGames > 250)
44 {
45 throw new Exception("Error! KenChessPGNUtilities does not
allow more than 250 games in a single pgn file.");
46 }
47 }
48 // Check for special escape mechanism (see PGN Spec)
49 else if ((prevChar == '\n') && nextChar.Equals("%"))
50 {
51 PGNToken pgnToken = getEscapeMechanismToken(currentPosition,
entireContent);
52 tokenizedData.Add(pgnToken);
53 currentPosition += pgnToken.tokenContent.Length;
54 if (currentPosition >= entireContent.Length)
55 {
56 break;
57 }
58 }
59 // check for comment-to-end-of-line
60 else if (nextChar.Equals(";"))
61 {
62 PGNToken pgnToken = getCommentToEOLToken(currentPosition,
entireContent);
63 tokenizedData.Add(pgnToken);
64 currentPosition += pgnToken.tokenContent.Length;
65 }
66 // check for comment between braces
67 else if (nextChar.Equals("{"))
68 {
69 PGNToken pgnToken = getBracedCommentToken(currentPosition,
entireContent);
70 tokenizedData.Add(pgnToken);
71 currentPosition += pgnToken.tokenContent.Length;
72 }
73 // check for string token
74 else if (nextChar.Equals("\""))
75 {
76 PGNToken pgnToken = getStringToken(currentPosition,
entireContent);
77 tokenizedData.Add(pgnToken);
78 currentPosition += pgnToken.tokenContent.Length;
79 }
80 else if (".[]()".Contains(nextChar))
81 {
82 PGNToken pgnToken = getSingleCharacterPGNToken(nextChar[0]);
83 tokenizedData.Add(pgnToken);
84 currentPosition++;
85 }
86 else if (nextChar.Equals("!"))
87 {
88 PGNToken pgnToken = new PGNToken(PGNTokenType.NAGToken, "");
89 switch (lookAheadChar)
90 {
91 case "!":
92 pgnToken.tokenContent = "$3";
93 currentPosition += 2;
94 break;
95 case "?":
96 pgnToken.tokenContent = "$5";
97 currentPosition += 2;
98 break;
99 case " ":
100 pgnToken.tokenContent = "$1";
101 currentPosition++;
102 break;
103 }
104 tokenizedData.Add(pgnToken);
105 }
106 else if (nextChar.Equals("?"))
107 {
108 PGNToken pgnToken = new PGNToken(PGNTokenType.NAGToken, "");
109 switch (lookAheadChar)
110 {
111 case "!":
112 pgnToken.tokenContent = "$6";
113 currentPosition += 2;
114 break;
115 case "?":
116 pgnToken.tokenContent = "$4";
117 currentPosition += 2;
118 break;
119 case " ":
120 pgnToken.tokenContent = "$2";
121 currentPosition++;
122 break;
123 }
124 tokenizedData.Add(pgnToken);
125 }
126 else if (nextChar.Equals("$"))
127 {
128 PGNToken pgnToken = getNAGToken(currentPosition, entireContent);
129 tokenizedData.Add(pgnToken);
130 currentPosition += pgnToken.tokenContent.Length;
131 }
132 else if (Char.IsDigit(nextChar[0]))
133 {
134 PGNToken pgnToken = getIntegerToken(currentPosition,
entireContent);
135 tokenizedData.Add(pgnToken);
136 currentPosition += pgnToken.tokenContent.Length;
137 }
138 else if (lettersAndDigits.Contains(nextChar[0]))
139 {
140 PGNToken pgnToken = getSymbolToken(currentPosition,
entireContent);
141 tokenizedData.Add(pgnToken);
142 currentPosition += pgnToken.tokenContent.Length;
143 }
144 else
145 {
146 currentPosition++;
147 }
148 prevChar = entireContent.Substring(currentPosition - 1)[0];
149 }
150 return tokenizedData;
151 }
152 // Game Termination Token
153 // Tricky! Returns null if currentPosition is not the start of a Game
Termination Token.
154 private static PGNToken getGameTerminationToken(int currentPosition, string
entireContent)
155 {
156 PGNToken pgnToken = null;
157 int numberRemainingChars = entireContent.Length - currentPosition;
158 // check for game termination token
159 if (entireContent.Substring(currentPosition, 1).Equals("*"))
160 {
161 pgnToken = new PGNToken(PGNTokenType.GameTerminationToken, "*");
162 }
163 else if ((numberRemainingChars >= 3) &&
(entireContent.Substring(currentPosition, 3).Equals("1-0")))
164 {
165 pgnToken = new PGNToken(PGNTokenType.GameTerminationToken, "1-0");
166 }
167 else if ((numberRemainingChars >= 3) &&
(entireContent.Substring(currentPosition, 3).Equals("0-1")))
168 {
169 pgnToken = new PGNToken(PGNTokenType.GameTerminationToken, "0-1");
170 }
171 else if ((numberRemainingChars >= 7) &&
(entireContent.Substring(currentPosition, 7).Equals("1/2-1/2")))
172 {
173 pgnToken = new PGNToken(PGNTokenType.GameTerminationToken, "1/2-1/2");
174 }
175 return pgnToken;
176 }
177
178 // Single character tokens
179 private static PGNToken getSingleCharacterPGNToken(char tokenChar)
180 {
181 string tokenContent = tokenChar.ToString();
182 PGNTokenType pgnTokenType = PGNTokenType.UNKNOWN;
183 switch (tokenChar)
184 {
185 case '.':
186 pgnTokenType = PGNTokenType.PeriodToken; break;
187 case '[':
188 pgnTokenType = PGNTokenType.LeftBracketToken; break;
189 case ']':
190 pgnTokenType = PGNTokenType.RightBracketToken; break;
191 case '(':
192 pgnTokenType = PGNTokenType.LeftParenthesisToken; break;
193 case ')':
194 pgnTokenType = PGNTokenType.RightParenthesisToken; break;
195 default:
196 break;
197 }
198 PGNToken pgnToken = new PGNToken(pgnTokenType, tokenContent);
199 return pgnToken;
200 }
201
202 // NAG = Numeric Annotation Glyph
203 private static PGNToken getNAGToken(int currentPosition, string entireContent)
204 {
205 string tokenContent = "$";
206 while (true)
207 {
208 currentPosition++;
209 char nextChar = entireContent.Substring(currentPosition, 1)[0];
210 if (Char.IsDigit(nextChar))
211 {
212 tokenContent += nextChar;
213 }
214 else
215 {
216 break;
217 }
218 }
219 PGNToken pgnToken = new PGNToken(PGNTokenType.NAGToken, tokenContent);
220 return pgnToken;
221 }
222
223 // The returned token will be percent (%) + all characters up to and
including new line character
224 private static PGNToken getEscapeMechanismToken(int currentPosition, string
entireContent)
225 {
226 string tokenContent = "%";
227 while (true)
228 {
229 currentPosition++;
230 char nextChar = entireContent.Substring(currentPosition, 1)[0];
231 tokenContent += nextChar;
232 if (nextChar == '\n')
233 {
234 break;
235 }
236 }
237 PGNToken pgnToken = new PGNToken(PGNTokenType.EscapeMechanismToken,
tokenContent);
238 return pgnToken;
239 }
240
241 // The returned token will be semicolon + all characters up to and including
new line character
242 private static PGNToken getCommentToEOLToken(int currentPosition, string
entireContent)
243 {
244 string tokenContent = ";";
245 while (true)
246 {
247 currentPosition++;
248 char nextChar = entireContent.Substring(currentPosition, 1)[0];
249 tokenContent += nextChar;
250 if (nextChar == '\n')
251 {
252 break;
253 }
254 }
255 PGNToken pgnToken = new PGNToken(PGNTokenType.CommentToEOLToken,
tokenContent);
256 return pgnToken;
257 }
258
259
260 // The returned token will be left brace + all characters up to and including
right brace.
261 // Any newline chars in the token content will be replaced with a blank space
character.
262 private static PGNToken getBracedCommentToken(int currentPosition, string
entireContent)
263 {
264 string tokenContent = "{";
265 while (true)
266 {
267 currentPosition++;
268 char nextChar = entireContent.Substring(currentPosition, 1)[0];
269 //
270 if (nextChar == '\n')
271 {
272 nextChar = ' ';
273 }
274 tokenContent += nextChar;
275 if (nextChar == '}')
276 {
277 break;
278 }
279 }
280 tokenContent = tokenContent.Replace("\r\n", " ");
281 tokenContent = tokenContent.Replace("\r", " ");
282 tokenContent = tokenContent.Replace("\n", " ");
283
284 PGNToken pgnToken = new PGNToken(PGNTokenType.CommentBetweenBracesToken,
tokenContent);
285 return pgnToken;
286 }
287
288
289 // The returned token will be left double-quote + all characters up to and
including right double-quote.
290 // Tricky! Be careful with escaped double-quote (ex "From book, \"Chess
Strategy\" at page 11.")
291 private static PGNToken getStringToken(int currentPosition, string
entireContent)
292 {
293 char prevChar = ' ';
294 string tokenContent = "\"";
295 while (true)
296 {
297 currentPosition++;
298 char nextChar = entireContent.Substring(currentPosition, 1)[0];
299 tokenContent += nextChar;
300 if ((nextChar == '\"') && (prevChar != '\\'))
301 {
302 break;
303 }
304 prevChar = nextChar;
305 }
306 PGNToken pgnToken = new PGNToken(PGNTokenType.StringToken, tokenContent);
307 return pgnToken;
308 }
309
310 // The returned token will be all the consecutive digit characters starting
at current position
311 private static PGNToken getIntegerToken(int currentPosition, string
entireContent)
312 {
313 string tokenContent = entireContent.Substring(currentPosition, 1);
314 while (true)
315 {
316 currentPosition++;
317 char nextChar = entireContent.Substring(currentPosition, 1)[0];
318 if (Char.IsDigit(nextChar))
319 {
320 tokenContent += nextChar;
321 }
322 else
323 {
324 break;
325 }
326 }
327 PGNToken pgnToken = new PGNToken(PGNTokenType.IntegerToken, tokenContent);
328 return pgnToken;
329 }
330
331
332 // A symbol token starts with a letter or digit character and is immediately
followed by a sequence of zero or more symbol
333 // continuation characters. These continuation characters are letter
characters ("A-Za-z"), digit characters ("0-9"),
334 // the underscore ("_"), the plus sign ("+"), the octothorpe sign ("#"), the
equal sign ("="), the colon (":"), and the hyphen ("-").
335 private static PGNToken getSymbolToken(int currentPosition, string
entireContent)
336 {
337 // symbol continuation characters
338 string scc = lettersAndDigits + "_+#=:-";
339 string tokenContent = entireContent.Substring(currentPosition, 1);
340 while (true)
341 {
342 currentPosition++;
343 char nextChar = entireContent.Substring(currentPosition, 1)[0];
344 if (scc.Contains(nextChar))
345 {
346 tokenContent += nextChar;
347 }
348 else
349 {
350 break;
351 }
352 }
353 PGNToken pgnToken = new PGNToken(PGNTokenType.SymbolToken, tokenContent);
354 return pgnToken;
355 }
356
357
358 }
359 }
360

You might also like