forked from matteson/parse-pgn-files
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pgn_parser.py
158 lines (125 loc) · 4.81 KB
/
pgn_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import logging
from parsita import lit, opt, reg, rep, repsep
"""
PGN grammar-parser
Includes support for-
- Annotations
- Extended move syntax (LiChess's {comment} syntax)
- Move decomposition
"""
logger = logging.getLogger(__name__)
def formatannotations(annotations):
return {ant[0]: ant[1] for ant in annotations}
def formatgame(game):
return {
'moves': game[0],
'outcome': game[1]
}
def formatentry(entry):
return {'annotations': entry[0], 'game': entry[1]}
def handle_optional(optionalmove):
if len(optionalmove) > 0:
return optionalmove[0]
else:
return None
def handle_move(move):
"""
Normal move:
[1, [['e4', []], ['{ [%clk 0:03:00] }']], [['c5', []], ['{ [%clk 0:03:00] }']]]
Split move:
[1, [['d4', []], ['{ [%eval 0.08] [%clk 0:05:00] }']], [1, [['d5', []], ['{ [%eval 0.07] [%clk 0:05:00] }']]]]
# White move number @ [0]
[1,
# White algebraic move @ [1][0][0]
[['d4', []],
# White comment @ [1][1][0][0]
['{ [%eval 0.08] [%clk 0:05:00] }']],
# Black move number @ [2][0]
[1,
# Black algebraic move @ [2][1][0][0]
[['d5', []],
# Black comment @ [2][1][1][0]
['{ [%eval 0.07] [%clk 0:05:00] }']]]]
"""
# TODO: param 'move' is ugly and convoluted, I would like to restructure
# it, tho generally that means functions,
# which tend to decrease performance
res = None
try:
# Base rsponse w/White's move
res = {
'num': move[0],
'white': (move[1][0][0],
move[1][0][1][0] if move[1][0][1] else None,
move[1][1][0] if move[1][1] else None),
'black': None,
}
# Handle split move for Black
if move[2] and isinstance(move[2][0], int) and move[2][1]:
res['black'] = (move[2][1][0][0],
move[2][1][0][1][0] if move[2][1][0][1] else None,
move[2][1][1][0] if move[2][1][1] else None)
# Regular move for Black
elif move[2] and isinstance(move[2][0], list):
res['black'] = (move[2][0][0],
move[2][0][1][0] if move[2][0][1] else None,
move[2][1][0] if move[2][1] else None)
except Exception as e:
logger.error(f"Error handling move: {move}")
return res
# tokens
quote = lit(r'"')
whitespace = lit(' ') | lit('\n')
tag = reg(r'[\u0021-\u0021\u0023-\u005A\u005E-\u007E]+')
string = reg(r'[\u0020-\u0021\u0023-\u005A\u005E-\U0010FFFF]+')
# Annotations: [Foo "Super Awesome Information"]
annotation = "[" >> tag << " " & (quote >> string << quote) << "]"
annotations = repsep(annotation, '\n') > formatannotations
# Moves are more complicated
regularmove = reg(
r'[a-h1-8NBRQKx=]+') # Matches more than just chess moves
longcastle = reg(
r'O-O-O') # match first to avoid castle matching spuriously
castle = reg(r'O-O')
nullmove = lit('--') # Illegal move rarely used in annotations
unfinished = lit('*') # Game unfinished
move_annotation = rep(
lit('+') | # check
lit('#') | # checkmate
lit('!!') | # brilliant—and usually surprising—move
lit('!?') | # interesting move that may not be the best
lit('!') | # very good move
lit('?!') | # dubious move that is not easily refutable
lit('??') | # blunder
lit('?') | # bad move; a mistake
lit('⌓') | # better move than the one played
lit('□') | # forced move; the only reasonable move, or the only move available
lit('TN') | lit('NA') # or NA
) # > move_annotation_handler
# LiChess annotations
move_comment = reg(r'\{[^}]*\}')
# Possible move types
move = (regularmove | longcastle | castle | nullmove) & opt(move_annotation)
# Build up the game
move_number = (reg(r'[0-9]+') << '.' << whitespace) > int
move_number_ellipsis = (reg(r'[0-9]+') << '...' << whitespace) > int
standard_turn = move_number & (
move << whitespace & (opt(move_comment << whitespace))) & (
opt(move << whitespace & (
opt(move_comment << whitespace))) > handle_optional)
split_turn = move_number & (
move << whitespace & (opt(move_comment << whitespace))) & (
move_number_ellipsis & (
opt(move << whitespace & (
opt(move_comment << whitespace))) > handle_optional))
turn = (standard_turn | split_turn) > handle_move
draw = lit('1/2-1/2')
white = lit('1-0')
black = lit('0-1')
outcome = draw | white | black | unfinished
game = (rep(turn) & outcome) > formatgame
# A PGN entry is annotations and the game
entry = ((annotations << rep(whitespace)) & (
game << rep(whitespace))) > formatentry
# A file is repeated entries
pgn_file = rep(entry)