sPyNNaker neural_modelling 7.3.1
Loading...
Searching...
No Matches
local_only_conv_impl.c
1/*
2 * Copyright (c) 2021 The University of Manchester
3 * based on work Copyright (c) The University of Sussex,
4 * Garibaldi Pineda Garcia, James Turner, James Knight and Thomas Nowotny
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
19
20#include "local_only_impl.h"
21#include "local_only_2d_common.h"
22#include <stdlib.h>
23#include <debug.h>
24#include <stdfix-full-iso.h>
25#include "../population_table/population_table.h"
26#include "../neuron.h"
27
28// One per connector
55
78
79typedef struct {
80 lc_coord_t post_start;
81 lc_coord_t post_end;
82 lc_shape_t post_shape;
83 uint32_t n_sources;
84 uint32_t n_connectors_total;
85 uint32_t n_weights_total;
86 source_info sources[];
87 // In SDRAM, after sources[n_sources] is the following:
88 // connector connectors[n_connectors_total];
89 // lc_weight_t[n_weights_total] weights;
91
92// The main configuration data
93static conv_config *config;
94
95static connector *connectors;
96
97static lc_weight_t *weights;
98
99static inline void log_div_const(const char *name, div_const d) {
100 log_debug(" %s=(m: %u, sh1: %u, sh2: %u)", name, d.m, d.sh1, d.sh2);
101}
102
104bool local_only_impl_initialise(void *address){
105 log_info("+++++++++++++++++ CONV init ++++++++++++++++++++");
106 conv_config* sdram_config = address;
107 uint32_t n_bytes = sizeof(conv_config) +
108 (sizeof(source_info) * sdram_config->n_sources);
109 config = spin1_malloc(n_bytes);
110 if (config == NULL) {
111 log_error("Can't allocate memory for config!");
112 return false;
113 }
115
116 log_info("post_start = %u, %u, post_end = %u, %u, post_shape = %u, %u",
117 config->post_start.col, config->post_start.row,
118 config->post_end.col, config->post_end.row,
119 config->post_shape.width, config->post_shape.height);
120 log_info("num sources = %u", config->n_sources);
121
122 if (config->n_sources == 0) {
123 log_error("No sources!");
124 return false;
125 }
126
127 // The connectors come after the sources in SDRAM
128 connector *sdram_connectors =
129 (connector *) &(sdram_config->sources[config->n_sources]);
130 uint32_t n_connector_bytes = sizeof(connector) * config->n_connectors_total;
131 connectors = spin1_malloc(n_connector_bytes);
132 if (connectors == NULL) {
133 log_error("Can't allocate %u bytes of memory for %u connectors!",
134 n_connector_bytes, config->n_connectors_total);
135 return false;
136 }
137 spin1_memcpy(connectors, sdram_connectors, n_connector_bytes);
138
139 // The weights come after the connectors in SDRAM
140 lc_weight_t *kernel_weights =
141 (lc_weight_t *) &(sdram_connectors[config->n_connectors_total]);
142 uint32_t n_weight_bytes = sizeof(lc_weight_t) * config->n_weights_total;
143 weights = spin1_malloc(n_weight_bytes);
144 if (weights == NULL) {
145 log_error("Can't allocate %u bytes of memory for %u weights!",
146 n_weight_bytes, config->n_weights_total);
147 return false;
148 }
149 spin1_memcpy(weights, kernel_weights, n_weight_bytes);
150
151 // Print what we have
152 for (uint32_t i = 0; i < config->n_sources; i++) {
153 source_info *s_info = &(config->sources[i]);
154 log_debug("Source %u: key=0x%08x, mask=0x%08x, start=%u, count=%u",
155 i, s_info->key_info.key, s_info->key_info.mask,
156 s_info->key_info.start, s_info->key_info.count);
157 log_debug(" core_mask=0x%08x, mask_shift=0x%08x",
158 s_info->key_info.core_mask, s_info->key_info.mask_shift);
159 log_debug(" height_per_core=%u, width_per_core=%u",
161 log_debug(" height_last_core=%u, width_last_core=%u",
163 log_debug(" cores_per_height=%u, cores_per_width=%u",
165 log_div_const("source_width_div", s_info->source_width_div);
166 log_div_const("source_width_last_div", s_info->source_width_last_div);
167 log_div_const("cores_per_width_div", s_info->cores_per_width_div);
168 }
169
170 for (uint32_t i = 0; i < config->n_connectors_total; i++) {
171 connector *conn = &(connectors[i]);
172 log_debug("Connector %u: kernel size=%u, %u", i, conn->kernel.width,
173 conn->kernel.height);
174 log_debug(" delay=%u, delay_stage=%u", conn->delay, conn->delay_stage);
175 }
176
177 return true;
178}
179
184static inline lc_coord_t map_pre_to_post(connector *connector, lc_coord_t pre,
185 int16_t half_kh, int16_t half_kw) {
186 lc_coord_t post = pre;
187 post.row = div_by_const(post.row, connector->pool_stride_height_div);
188 post.col = div_by_const(post.col, connector->pool_stride_width_div);
189 post.row = post.row - half_kh + connector->padding.height;
190 post.col = post.col - half_kw + connector->padding.width;
191 post.row = div_by_const(post.row, connector->stride_height_div);
192 post.col = div_by_const(post.col, connector->stride_width_div);
193 return post;
194}
195
196
200static inline void do_convolution_operation(
201 uint32_t time, lc_coord_t pre_coord, connector *connector,
202 uint16_t *ring_buffers) {
203 int32_t half_kh = connector->kernel.height / 2;
204 int32_t half_kw = connector->kernel.width / 2;
205 lc_coord_t post_coord = map_pre_to_post(connector, pre_coord, half_kh, half_kw);
206 log_debug("pre row %d, col %d AS post row %d, col %d",
207 pre_coord.row, pre_coord.col, post_coord.row, post_coord.col);
208 lc_weight_t *connector_weights = &weights[connector->kernel_index];
209 uint32_t sat_flag = 0xFFFF0000;
210 uint32_t sat_value = 0xFFFF;
211
212 int32_t kw = connector->kernel.width;
213 for (int32_t r = -half_kh, kr = 0; r <= half_kh; r++, kr++) {
214 int32_t tmp_row = post_coord.row + r;
215 if ((tmp_row < config->post_start.row) || (tmp_row > config->post_end.row)) {
216 continue;
217 }
218 for (int32_t c = -half_kw, kc = 0; c <= half_kw; c++, kc++) {
219 int32_t tmp_col = post_coord.col + c;
220 if ((tmp_col < config->post_start.col) || (tmp_col > config->post_end.col)) {
221 continue;
222 }
223
224 // This the neuron id relative to the neurons on this core
225 uint32_t post_index =
226 ((tmp_row - config->post_start.row) * config->post_shape.width)
227 + (tmp_col - config->post_start.col);
228 uint32_t k = (kr * kw) + kc;
229 lc_weight_t weight = connector_weights[k];
230 if (weight == 0) {
231 continue;
232 }
233 uint32_t rb_index = 0;
234 if (weight > 0) {
236 connector->positive_synapse_type, post_index,
239 } else {
241 connector->negative_synapse_type, post_index,
244 weight = -weight;
245 }
246 log_debug("Updating ring_buffers[%u] for post neuron %u = %u, %u, with weight %u",
247 rb_index, post_index, tmp_col, tmp_row, weight);
248
249 // Add weight to current ring buffer value, avoiding saturation
250 uint32_t accumulation = ring_buffers[rb_index] + weight;
251 uint32_t sat_test = accumulation & sat_flag;
252 if (sat_test) {
253 accumulation = sat_value;
254 }
255 ring_buffers[rb_index] = accumulation;
256 }
257 }
258}
259
260static inline uint32_t get_core_row(uint32_t core_id, source_info *s_info) {
261 return div_by_const(core_id, s_info->cores_per_width_div);
262}
263
264static inline uint32_t get_core_col(uint32_t core_id, uint32_t core_row,
265 source_info *s_info) {
266 return core_id - (core_row * s_info->cores_per_source_width);
267}
268
269static inline bool is_last_core_on_row(uint32_t core_col, source_info *s_info) {
270 return core_col == (uint32_t) (s_info->cores_per_source_width - 1);
271}
272
273static inline bool is_last_core_in_col(uint32_t core_row, source_info *s_info) {
274 return core_row == (uint32_t) (s_info->cores_per_source_height - 1);
275}
276
277static inline bool key_to_index_lookup(uint32_t spike, source_info **rs_info) {
278 for (uint32_t i = 0; i < config->n_sources; i++) {
279 source_info *s_info = &(config->sources[i]);
280 // We have a match on key
281 if ((spike & s_info->key_info.mask) == s_info->key_info.key) {
282 *rs_info = s_info;
283 return true;
284 }
285 }
286 return false;
287}
288
295void local_only_impl_process_spike(
296 uint32_t time, uint32_t spike, uint16_t* ring_buffers) {
297
298 // Lookup the spike, and if found, get the appropriate parts
299 source_info *s_info;
300 if (!key_to_index_lookup(spike, &s_info)) {
301 log_debug("Spike %x didn't match any connectors!", spike);
302 return;
303 }
304
305 uint32_t core_id = get_core_id(spike, s_info->key_info);
306 uint32_t core_row = get_core_row(core_id, s_info);
307 uint32_t core_col = get_core_col(core_id, core_row, s_info);
308 bool last_core_on_row = is_last_core_on_row(core_col, s_info);
309 bool last_core_in_col = is_last_core_in_col(core_row, s_info);
310 uint32_t source_height = 0;
311 uint32_t source_width = 0;
312 div_const source_width_d;
313 if (last_core_on_row) {
314 source_width = s_info->source_width_last_core;
315 source_width_d = s_info->source_width_last_div;
316 } else {
317 source_width = s_info->source_width_per_core;
318 source_width_d = s_info->source_width_div;
319 }
320 if (last_core_in_col) {
321 source_height = s_info->source_height_last_core;
322 } else {
323 source_height = s_info->source_height_per_core;
324 }
325 uint32_t local_id = get_local_id(spike, s_info->key_info);
326 uint32_t neurons_per_core = source_width * source_height;
327
328 log_debug("Spike %x, on core %u (%u, %u), is last (%u, %u), local %u",
329 spike, core_id, core_col, core_row, last_core_on_row, last_core_in_col,
330 local_id);
331
332 // compute the population-based coordinates
333 uint32_t end = s_info->key_info.start + s_info->key_info.count;
334 for (uint32_t i = s_info->key_info.start; i < end; i++) {
335 connector *connector = &(connectors[i]);
336
337 // Ignore the neuron if the delay does not match
338 uint32_t first_neuron = neurons_per_core * connector->delay_stage;
339 uint32_t last_neuron = first_neuron + neurons_per_core;
340 log_debug("Connector %u, delay stage = %u, first = %u, last = %u",
341 i, connector->delay_stage, first_neuron, last_neuron);
342 if (local_id < first_neuron || local_id >= last_neuron) {
343 continue;
344 }
345
346 uint32_t local_neuron_id = local_id - first_neuron;
347 uint32_t local_row = div_by_const(local_neuron_id, source_width_d);
348 uint32_t local_col = local_neuron_id - (local_row * source_width);
349
350 lc_coord_t pre_coord = {
351 // The x-coordinate is the remainder of the "division"
352 .col = (core_col * s_info->source_width_per_core) + local_col,
353 // The y-coordinate is the integer part of the "division".
354 .row = (core_row * s_info->source_height_per_core) + local_row
355 };
356
357 log_debug("Local coord = %u, %u, Pre coord = %u, %u",
358 local_col, local_row, pre_coord.col, pre_coord.col);
359
360 // Compute the convolution
361 do_convolution_operation(time, pre_coord, connector, ring_buffers);
362 }
363}
static weight_t * ring_buffers
The ring buffers to be used in the simulation.
Definition c_main.c:118
void log_error(const char *message,...)
void log_debug(const char *message,...)
void log_info(const char *message,...)
uint32_t synapse_delay_mask
The mask to get the synaptic delay from a "synapse".
Definition local_only.c:71
uint32_t synapse_type_index_bits
The number of bits used by the synapse type and post-neuron index.
Definition local_only.c:74
uint32_t synapse_index_bits
The number of bits used by just the post-neuron index.
Definition local_only.c:77
div_const source_width_last_div
Division by last core width.
key_info key_info
Information about the key.
uint32_t source_height_last_core
The source population height on the last core in a column.
uint32_t cores_per_source_width
Number of cores in a width of the source.
uint32_t source_height_per_core
The source population height per core.
uint32_t cores_per_source_height
The number cores in a height of the source.
div_const cores_per_width_div
Division by cores per source width.
uint32_t source_width_per_core
The source population width per core.
div_const source_width_div
Used to calculate division by the source width per core efficiently.
uint32_t source_width_last_core
The source population width on the last core on a row.
A region of SDRAM used to transfer synapses.
Collection of rates to apply over time to a particular spike source.
void spin1_memcpy(void *dst, void const *src, uint len)
div_const pool_stride_width_div
1 / pooling stride width
div_const stride_width_div
1 / stride width;
uint16_t kernel_index
The index of the weights for the kernel.
lc_shape_t padding
The shape of the padding.
div_const stride_height_div
1 / stride height
uint16_t delay
The delay in time steps.
uint16_t positive_synapse_type
The index of the synapse for positive weights.
lc_shape_t kernel
The shape of the kernel.
uint16_t negative_synapse_type
The index of the synapse for negative weights.
uint16_t delay_stage
The delay stage.
div_const pool_stride_height_div
1 / pooling stride height
Structure for constants for precise constant integer division (see div_by_const)
uint32_t start
The index into connectors for this entry.
uint32_t count
The number of entries in connectors for this entry.
uint32_t key
The key to match against the incoming message.
uint32_t mask_shift
The shift to apply to the key to get the core part.
uint32_t mask
The mask to select the relevant bits of key for matching.
uint32_t core_mask
The mask to apply to the key once shifted to get the core index.
A coordinate in terms of rows and columns (y and x)
A shape in terms of height and width.
static index_t synapse_row_get_ring_buffer_index(uint32_t simulation_timestep, uint32_t synapse_type_index, uint32_t neuron_index, uint32_t synapse_type_index_bits, uint32_t synapse_index_bits, uint32_t synapse_delay_mask)
Get the index of the ring buffer for a given timestep, synapse type and neuron index.