sPyNNaker neural_modelling 7.1.1
Loading...
Searching...
No Matches
local_only_conv_impl.c
1/*
2 * Copyright (c) 2021 The University of Manchester
3 * based on work Copyright (c) The University of Sussex,
4 * Garibaldi Pineda Garcia, James Turner, James Knight and Thomas Nowotny
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
19
20#include "local_only_impl.h"
21#include "local_only_2d_common.h"
22#include <stdlib.h>
23#include <debug.h>
24#include <stdfix-full-iso.h>
25#include "../population_table/population_table.h"
26#include "../neuron.h"
27
28// One per connector
55
78
79typedef struct {
80 lc_coord_t post_start;
81 lc_coord_t post_end;
82 lc_shape_t post_shape;
83 uint32_t n_sources;
84 uint32_t n_connectors_total;
85 uint32_t n_weights_total;
86 source_info sources[];
87 // In SDRAM, after sources[n_sources] is the following:
88 // connector connectors[n_connectors_total];
89 // lc_weight_t[n_weights_total] weights;
91
92// The main configuration data
93static conv_config *config;
94
95static connector *connectors;
96
97static lc_weight_t *weights;
98
99static inline void log_div_const(const char *name, div_const d) {
100 log_debug(" %s=(m: %u, sh1: %u, sh2: %u)", name, d.m, d.sh1, d.sh2);
101}
102
104bool local_only_impl_initialise(void *address){
105 log_info("+++++++++++++++++ CONV init ++++++++++++++++++++");
106 conv_config* sdram_config = address;
107 uint32_t n_bytes = sizeof(conv_config) +
108 (sizeof(source_info) * sdram_config->n_sources);
109 config = spin1_malloc(n_bytes);
110 if (config == NULL) {
111 log_error("Can't allocate memory for config!");
112 return false;
113 }
115
116 log_info("post_start = %u, %u, post_end = %u, %u, post_shape = %u, %u",
117 config->post_start.col, config->post_start.row,
118 config->post_end.col, config->post_end.row,
119 config->post_shape.width, config->post_shape.height);
120 log_info("num sources = %u", config->n_sources);
121
122 if (config->n_sources == 0) {
123 log_error("No sources!");
124 return false;
125 }
126
127 // The connectors come after the sources in SDRAM
128 connector *sdram_connectors =
129 (connector *) &(sdram_config->sources[config->n_sources]);
130 uint32_t n_connector_bytes = sizeof(connector) * config->n_connectors_total;
131 connectors = spin1_malloc(n_connector_bytes);
132 if (connectors == NULL) {
133 log_error("Can't allocate %u bytes of memory for %u connectors!",
134 n_connector_bytes, config->n_connectors_total);
135 return false;
136 }
137 spin1_memcpy(connectors, sdram_connectors, n_connector_bytes);
138
139 // The weights come after the connectors in SDRAM
140 lc_weight_t *kernel_weights =
141 (lc_weight_t *) &(sdram_connectors[config->n_connectors_total]);
142 uint32_t n_weight_bytes = sizeof(lc_weight_t) * config->n_weights_total;
143 weights = spin1_malloc(n_weight_bytes);
144 if (weights == NULL) {
145 log_error("Can't allocate %u bytes of memory for %u weights!",
146 n_weight_bytes, config->n_weights_total);
147 return false;
148 }
149 spin1_memcpy(weights, kernel_weights, n_weight_bytes);
150
151 // Print what we have
152 for (uint32_t i = 0; i < config->n_sources; i++) {
153 source_info *s_info = &(config->sources[i]);
154 log_debug("Source %u: key=0x%08x, mask=0x%08x, start=%u, count=%u",
155 i, s_info->key_info.key, s_info->key_info.mask,
156 s_info->key_info.start, s_info->key_info.count);
157 log_debug(" core_mask=0x%08x, mask_shift=0x%08x",
158 s_info->key_info.core_mask, s_info->key_info.mask_shift);
159 log_debug(" height_per_core=%u, width_per_core=%u",
161 log_debug(" height_last_core=%u, width_last_core=%u",
163 log_debug(" cores_per_height=%u, cores_per_width=%u",
165 log_div_const("source_width_div", s_info->source_width_div);
166 log_div_const("source_width_last_div", s_info->source_width_last_div);
167 log_div_const("cores_per_width_div", s_info->cores_per_width_div);
168 }
169
170 for (uint32_t i = 0; i < config->n_connectors_total; i++) {
171 connector *conn = &(connectors[i]);
172 log_debug("Connector %u: kernel size=%u, %u", i, conn->kernel.width,
173 conn->kernel.height);
174 log_debug(" delay=%u, delay_stage=%u", conn->delay, conn->delay_stage);
175 }
176
177 return true;
178}
179
184static inline lc_coord_t map_pre_to_post(connector *connector, lc_coord_t pre,
185 int16_t half_kh, int16_t half_kw) {
186 lc_coord_t post = pre;
187 post.row = div_by_const(post.row, connector->pool_stride_height_div);
188 post.col = div_by_const(post.col, connector->pool_stride_width_div);
189 post.row = post.row - half_kh + connector->padding.height;
190 post.col = post.col - half_kw + connector->padding.width;
191 post.row = div_by_const(post.row, connector->stride_height_div);
192 post.col = div_by_const(post.col, connector->stride_width_div);
193 return post;
194}
195
196
200static inline void do_convolution_operation(
201 uint32_t time, lc_coord_t pre_coord, connector *connector,
202 uint16_t *ring_buffers) {
203 int32_t half_kh = connector->kernel.height / 2;
204 int32_t half_kw = connector->kernel.width / 2;
205 lc_coord_t post_coord = map_pre_to_post(connector, pre_coord, half_kh, half_kw);
206 log_debug("pre row %d, col %d AS post row %d, col %d",
207 pre_coord.row, pre_coord.col, post_coord.row, post_coord.col);
208 lc_weight_t *connector_weights = &weights[connector->kernel_index];
209
210 int32_t kw = connector->kernel.width;
211 for (int32_t r = -half_kh, kr = 0; r <= half_kh; r++, kr++) {
212 int32_t tmp_row = post_coord.row + r;
213 if ((tmp_row < config->post_start.row) || (tmp_row > config->post_end.row)) {
214 continue;
215 }
216 for (int32_t c = -half_kw, kc = 0; c <= half_kw; c++, kc++) {
217 int32_t tmp_col = post_coord.col + c;
218 if ((tmp_col < config->post_start.col) || (tmp_col > config->post_end.col)) {
219 continue;
220 }
221
222 // This the neuron id relative to the neurons on this core
223 uint32_t post_index =
224 ((tmp_row - config->post_start.row) * config->post_shape.width)
225 + (tmp_col - config->post_start.col);
226 uint32_t k = (kr * kw) + kc;
227 lc_weight_t weight = connector_weights[k];
228 if (weight == 0) {
229 continue;
230 }
231 uint32_t rb_index = 0;
232 if (weight > 0) {
234 connector->positive_synapse_type, post_index,
237 } else {
239 connector->negative_synapse_type, post_index,
242 weight = -weight;
243 }
244 log_debug("Updating ring_buffers[%u] for post neuron %u = %u, %u, with weight %u",
245 rb_index, post_index, tmp_col, tmp_row, weight);
246
247 // Add weight to current ring buffer value, avoiding saturation
248 uint32_t accumulation = ring_buffers[rb_index] + weight;
249 uint32_t sat_test = accumulation & 0x10000;
250 if (sat_test) {
251 accumulation = sat_test - 1;
252 }
253 ring_buffers[rb_index] = accumulation;
254 }
255 }
256}
257
258static inline uint32_t get_core_row(uint32_t core_id, source_info *s_info) {
259 return div_by_const(core_id, s_info->cores_per_width_div);
260}
261
262static inline uint32_t get_core_col(uint32_t core_id, uint32_t core_row,
263 source_info *s_info) {
264 return core_id - (core_row * s_info->cores_per_source_width);
265}
266
267static inline bool is_last_core_on_row(uint32_t core_col, source_info *s_info) {
268 return core_col == (uint32_t) (s_info->cores_per_source_width - 1);
269}
270
271static inline bool is_last_core_in_col(uint32_t core_row, source_info *s_info) {
272 return core_row == (uint32_t) (s_info->cores_per_source_height - 1);
273}
274
275static inline bool key_to_index_lookup(uint32_t spike, source_info **rs_info) {
276 for (uint32_t i = 0; i < config->n_sources; i++) {
277 source_info *s_info = &(config->sources[i]);
278 // We have a match on key
279 if ((spike & s_info->key_info.mask) == s_info->key_info.key) {
280 *rs_info = s_info;
281 return true;
282 }
283 }
284 return false;
285}
286
293void local_only_impl_process_spike(
294 uint32_t time, uint32_t spike, uint16_t* ring_buffers) {
295
296 // Lookup the spike, and if found, get the appropriate parts
297 source_info *s_info;
298 if (!key_to_index_lookup(spike, &s_info)) {
299 log_debug("Spike %x didn't match any connectors!", spike);
300 return;
301 }
302
303 uint32_t core_id = get_core_id(spike, s_info->key_info);
304 uint32_t core_row = get_core_row(core_id, s_info);
305 uint32_t core_col = get_core_col(core_id, core_row, s_info);
306 bool last_core_on_row = is_last_core_on_row(core_col, s_info);
307 bool last_core_in_col = is_last_core_in_col(core_row, s_info);
308 uint32_t source_height = 0;
309 uint32_t source_width = 0;
310 div_const source_width_d;
311 if (last_core_on_row) {
312 source_width = s_info->source_width_last_core;
313 source_width_d = s_info->source_width_last_div;
314 } else {
315 source_width = s_info->source_width_per_core;
316 source_width_d = s_info->source_width_div;
317 }
318 if (last_core_in_col) {
319 source_height = s_info->source_height_last_core;
320 } else {
321 source_height = s_info->source_height_per_core;
322 }
323 uint32_t local_id = get_local_id(spike, s_info->key_info);
324 uint32_t neurons_per_core = source_width * source_height;
325
326 log_debug("Spike %x, on core %u (%u, %u), is last (%u, %u), local %u",
327 spike, core_id, core_col, core_row, last_core_on_row, last_core_in_col,
328 local_id);
329
330 // compute the population-based coordinates
331 uint32_t end = s_info->key_info.start + s_info->key_info.count;
332 for (uint32_t i = s_info->key_info.start; i < end; i++) {
333 connector *connector = &(connectors[i]);
334
335 // Ignore the neuron if the delay does not match
336 uint32_t first_neuron = neurons_per_core * connector->delay_stage;
337 uint32_t last_neuron = first_neuron + neurons_per_core;
338 log_debug("Connector %u, delay stage = %u, first = %u, last = %u",
339 i, connector->delay_stage, first_neuron, last_neuron);
340 if (local_id < first_neuron || local_id >= last_neuron) {
341 continue;
342 }
343
344 uint32_t local_neuron_id = local_id - first_neuron;
345 uint32_t local_row = div_by_const(local_neuron_id, source_width_d);
346 uint32_t local_col = local_neuron_id - (local_row * source_width);
347
348 lc_coord_t pre_coord = {
349 // The x-coordinate is the remainder of the "division"
350 .col = (core_col * s_info->source_width_per_core) + local_col,
351 // The y-coordinate is the integer part of the "division".
352 .row = (core_row * s_info->source_height_per_core) + local_row
353 };
354
355 log_debug("Local coord = %u, %u, Pre coord = %u, %u",
356 local_col, local_row, pre_coord.col, pre_coord.col);
357
358 // Compute the convolution
359 do_convolution_operation(time, pre_coord, connector, ring_buffers);
360 }
361}
static weight_t * ring_buffers
The ring buffers to be used in the simulation.
Definition c_main.c:118
void log_error(const char *message,...)
void log_debug(const char *message,...)
void log_info(const char *message,...)
uint32_t synapse_delay_mask
The mask to get the synaptic delay from a "synapse".
Definition local_only.c:71
uint32_t synapse_type_index_bits
The number of bits used by the synapse type and post-neuron index.
Definition local_only.c:74
uint32_t synapse_index_bits
The number of bits used by just the post-neuron index.
Definition local_only.c:77
div_const source_width_last_div
Division by last core width.
key_info key_info
Information about the key.
uint32_t source_height_last_core
The source population height on the last core in a column.
uint32_t cores_per_source_width
Number of cores in a width of the source.
uint32_t source_height_per_core
The source population height per core.
uint32_t cores_per_source_height
The number cores in a height of the source.
div_const cores_per_width_div
Division by cores per source width.
uint32_t source_width_per_core
The source population width per core.
div_const source_width_div
Used to calculate division by the source width per core efficiently.
uint32_t source_width_last_core
The source population width on the last core on a row.
A region of SDRAM used to transfer synapses.
Collection of rates to apply over time to a particular spike source.
void spin1_memcpy(void *dst, void const *src, uint len)
div_const pool_stride_width_div
1 / pooling stride width
div_const stride_width_div
1 / stride width;
uint16_t kernel_index
The index of the weights for the kernel.
lc_shape_t padding
The shape of the padding.
div_const stride_height_div
1 / stride height
uint16_t delay
The delay in time steps.
uint16_t positive_synapse_type
The index of the synapse for positive weights.
lc_shape_t kernel
The shape of the kernel.
uint16_t negative_synapse_type
The index of the synapse for negative weights.
uint16_t delay_stage
The delay stage.
div_const pool_stride_height_div
1 / pooling stride height
Structure for constants for precise constant integer division (see div_by_const)
uint32_t start
The index into connectors for this entry.
uint32_t count
The number of entries in connectors for this entry.
uint32_t key
The key to match against the incoming message.
uint32_t mask_shift
The shift to apply to the key to get the core part.
uint32_t mask
The mask to select the relevant bits of key for matching.
uint32_t core_mask
The mask to apply to the key once shifted to get the core index.
A coordinate in terms of rows and columns (y and x)
A shape in terms of height and width.
static index_t synapse_row_get_ring_buffer_index(uint32_t simulation_timestep, uint32_t synapse_type_index, uint32_t neuron_index, uint32_t synapse_type_index_bits, uint32_t synapse_index_bits, uint32_t synapse_delay_mask)
Get the index of the ring buffer for a given timestep, synapse type and neuron index.