-
Notifications
You must be signed in to change notification settings - Fork 1
/
bot_base.pyx
303 lines (266 loc) · 12.7 KB
/
bot_base.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
from libc.math cimport ceil, sqrt
from cython import ccall, cclass, locals, returns, wraparound
from numpy import asarray, empty, newaxis, ones, prod, repeat
from numpy.random import randint
from interface cimport c_get_score
from interface import reset_level
@cclass
class BaseBot:
"""
Abstract bot class, defines the interface.
Exposes level and params info and provides generic implementations of
cloning, parameter generation and mutation. Can handle emphases, phases
(or multiple parameter sets in general), parameter mapping, freezing and
selecting specific distributions for parameter mutation and variation.
The base constructor precalculates parameter array sizes and their total
count of entries. The only thing that a subclass has to provide is a static
shapes() method that returns a map from param keys to array shape tuples.
Three attributes are used for the generic parameter generation and
mutation: param_sizes maps param keys to array sizes, param_entries is
the total count of parameters (array entries), and param_multipliers
can be used to scale one parameter array with respect to another (by
default nothing is scaled, but some subclasses update the multipliers).
Frozen parameters are removed from these dictionaries after generation.
The base class manages a choices array. Each entry in this array is the
number of the parameters' set that should be used at the said time.
If a bot declares that it wants to use multiple values for parameter
entries, parameter arrays get an additional inner-most axis collecting
the values from all parameter sets. If parameters without the additional
axis are loaded, the array is repeated with the same entry for each set.
All arrays are guaranteed to be C-contiguous, so you may avoid stride
calculations for the last index (use ::1 in Cython).
"""
multi = False
@locals(level='dict', params='dict', param_map='dict',
param_freeze='tuple', param_scale='dict', dists='dict',
emphases='tuple', phases='float[:]',
steps='int', step='int', phase='int', phase_end='float',
key='str', target_key='str', shape='tuple', target_shape='tuple',
param='object', scale='float',
source_choices='int', target_choices='int', reps='float')
@wraparound(True)
def __init__(self, level, params={}, param_map={}, param_freeze=(),
param_scale={}, dists=None, emphases=None, phases=None):
"""
Prepares the bot for the given level, fixing some parameters.
You may provide a dictionary of (bot-specific) parameters or give the
info needed to generate a new random set: a dict of distributions for
entries randomization and a tuple of state feature emphases.
The param_map dict serves to translate arrays from the input seed to
other arrays in bot's parameters. Source array keys should be mapped
to lists of target keys.
Listing params in param_freeze prevents them from being varied.
Loaded params may be prescaled to fit them for another bot, by using
the param_scale dict.
If both params, and dists and emphases, are given, params may contain
just a subset of needed parameter arrays, the rest will be randomized
(as if parameters were newly generated).
Phases may be a list of phase ends as level time fractions, overriding
phase splits stored under a _phases key in the parameters. For a multi-
valued bot the choices array is initialized. For instance, for phases
(.33, .67, 1.) it is set to [0, ..., 0, 1, ..., 1, 2, ..., 2].
"""
self.level = level
self.param_shapes = self.shapes(level['steps'], level['actions'],
level['features'])
if self.multi:
steps = level['steps']
if phases is None:
phases = params.get('_phases', ones(1, dtype='f4'))
self.param_choices = len(phases)
self.choices = empty(steps, dtype='i4')
step = 0
for phase, phase_end in enumerate(phases):
while step < phase_end * steps:
self.choices[step] = phase
step += 1
for key in self.param_shapes:
self.param_shapes[key] += (self.param_choices,)
else:
self.param_choices = 1
self.choices = None
self.param_sizes = {k: prod(s) for k, s in self.param_shapes.items()}
self.param_entries = sum(self.param_sizes.values())
self.param_multipliers = {k: 1 for k in self.param_shapes.keys()}
if dists is None or emphases is None:
self.params = {}
else:
self.params = self.new_params(dists, emphases)
for key, param in params.items():
target_keys = param_map.get(key, []) + [key]
for target_key in target_keys:
try:
target_shape = self.param_shapes[target_key]
except KeyError:
pass
else:
if param.shape != target_shape:
if self.multi:
if param.ndim == len(target_shape) - 1:
# Single-set promoted to a one-choice multi.
param = param[..., newaxis]
if param.shape[:-1] == target_shape[:-1]:
# M choices replicated / cropped to N choices.
source_choices = param.shape[-1]
target_choices = target_shape[-1]
reps = float(target_choices) / source_choices
param = repeat(param, int(ceil(reps)), axis=-1)
param = param[..., :target_choices]
else:
raise ValueError(
"Wrong parameter shape (multi " +
"'{}', got {}, expected {})".format(
target_key, param.shape, target_shape))
else:
if param.shape[:-1] == target_shape:
# Multi-set clipped to a single-set.
param = param[..., 0]
else:
raise ValueError(
"Wrong parameter shape (single " +
"'{}', got {}, expected {})".format(
target_key, param.shape, target_shape))
# NumPy's ndarray.copy() ensures C contiguity.
self.params[target_key] = param.copy()
for key, scale in param_scale.items():
self.params[key] *= scale
if self.multi:
self.params['_phases'] = asarray(phases)
for key in param_freeze:
self.param_entries -= self.param_sizes[key]
del self.param_shapes[key]
del self.param_sizes[key]
del self.multipliers[key]
self.last_action = -1
@ccall
@returns('BaseBot')
@locals(state='bint', bot='BaseBot')
def clone(self, state=True):
"""
Returns a copy of this bot, one that can vary parameters independently.
If state is true, the current bot's state should be cloned, otherwise,
bot's state should be initialized as if the bot was created anew.
"""
bot = self.__new__(type(self), self.level)
bot.level = self.level
bot.param_shapes = self.param_shapes
bot.param_sizes = self.param_sizes
bot.param_entries = self.param_entries
bot.param_multipliers = self.param_multipliers
bot.param_choices = self.param_choices
bot.choices = self.choices
bot.params = {k: p.copy() for k, p in self.params.items()}
if state:
bot.last_action = self.last_action
else:
bot.last_action = -1
return bot
@ccall
@returns('dict')
@locals(dists='dict', emphases='tuple',
features='int', feature='int', feature0='int', feature1='int',
multipliers='dict', params='dict', key='str', shape='tuple',
dist='object', coeffs='object', emp='float')
@wraparound(True)
def new_params(self, dists, emphases):
"""
Generates a new set of real parameters according to param_shapes,
taking emphases into account for typical parameters.
Parameter arrays with keys containing "state" have emphases applied to
their entries as if they were linear or quadratic state coefficients.
"""
features = self.level['features']
multipliers = self.param_multipliers
params = {}
for key, shape in self.param_shapes.items():
dist = dists['new'].get(key, dists['real'])
coeffs = dist.rvs(size=shape).astype('f4') * multipliers[key]
if 'state' in key or 'diffs' in key:
if key[-1] == 'l':
for feature in range(features):
emp = emphases[feature]
if self.multi:
coeffs[..., feature, :] *= emp
else:
coeffs[..., feature] *= emp
elif key[-1] == 'q':
for feature0 in range(features):
for feature1 in range(features):
emp = sqrt(emphases[feature0] * emphases[feature1])
if self.multi:
coeffs[..., feature0, feature1, :] *= emp
else:
coeffs[..., feature0, feature1] *= emp
params[key] = coeffs
return params
@ccall
@returns('void')
@locals(dists='dict', emphases='tuple', change='float', variations='int',
variation='int')
def vary_params(self, dists, emphases, change, variations):
"""
Makes a couple of variations in a single call.
"""
for variation in range(variations):
self.vary_param(dists, emphases, change)
@ccall
@returns('void')
@locals(dists='dict', emphases='tuple', change='float',
features='int', feature0='int', feature1='int', entry='int',
key='str', size='int', array='object', dist='object',
multiplier='float', coeff='float')
@wraparound(True)
def vary_param(self, dists, emphases, change):
"""
Randomly varies a single entry from parameter arrays.
The change should be in (0, 1], where 1 means that a parameter should
be redrawn, while .001 makes a very small variation.
"""
features = self.level['features']
multiplier = 1
entry = randint(self.param_entries)
for key, size in self.param_sizes.items():
if entry < size:
array = self.params[key].flat
dist = dists['vary'].get(key, dists['real'])
multiplier = self.param_multipliers[key]
break
else:
entry -= size
if 'state' in key or 'diffs' in key:
if key[-1] == 'l':
multiplier *= emphases[entry // self.param_choices % features]
elif key[-1] == 'q':
feature0, feature1 = divmod(
entry // self.param_choices % features ** 2, features)
multiplier *= sqrt(emphases[feature0] * emphases[feature1])
coeff = dist.rvs() * multiplier
array[entry] += change * (coeff - array[entry])
@ccall
@returns('float')
@locals(runs='int', run='int', score='float')
def evaluate(self, runs):
"""
Estimates the value of the current parameters by running the bot
through a complete level.
If runs is given, repeats the level a couple of times and averages the
score -- to account for non-determinism in bots behavior.
"""
score = 0
for run in range(runs):
reset_level()
self.act(self.level['steps'])
score += c_get_score()
return score / runs
@ccall
@returns('void')
@locals(steps='int')
def act(self, steps):
"""
The main bot function, performs actions not minding any consequences.
Takes the number of actions to do. Some bots have limitations on the
number of actions that can be played.
The bot is responsible for storing the last action it made (-1 if it
did not do any) as self.last_action (for collectors use).
"""
raise NotImplementedError()