24 #define has_input_from_N(conn) (conn & 0b00000001)
25 #define has_input_from_E(conn) (conn & 0b00000010)
26 #define has_input_from_S(conn) (conn & 0b00000100)
27 #define has_input_from_W(conn) (conn & 0b00001000)
28 #define has_input_from_NW(conn) (conn & 0b00010000)
29 #define has_input_from_NE(conn) (conn & 0b00100000)
30 #define has_input_from_SE(conn) (conn & 0b01000000)
31 #define has_input_from_SW(conn) (conn & 0b10000000)
34 const unsigned char ones_in_4b[] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4};
35 #define get_ones_in_8b(v) (ones_in_4b[v & 0x0f] + ones_in_4b[v >> 4])
38 #define is_corner_pe(r, c, rows, cols) (\
39 (r == 0 && c == 0) ||\
40 (r == 0 && c == cols-1) ||\
41 (r == rows-1 && c == 0) ||\
42 (r == rows-1 && c == cols-1)\
44 #define is_side_pe(r, c, rows, cols) (\
50 #define is_top_leftmost_pe(r, c, rows, cols) (r == 0 && c == 0)
51 #define is_top_rightmost_pe(r, c, rows, cols) (r == 0 && c == cols-1)
52 #define is_bot_leftmost_pe(r, c, rows, cols) (r == rows-1 && c == 0)
53 #define is_bot_rightmost_pe(r, c, rows, cols) (r == rows-1 && c == cols-1)
54 #define is_top_pe(r, c, rows, cols) (r == 0)
55 #define is_bot_pe(r, c, rows, cols) (r == rows-1)
56 #define is_leftmost_pe(r, c, rows, cols) (c == 0)
57 #define is_rightmost_pe(r, c, rows, cols) (c == cols-1)
60 #define mask_top_leftmost_pe(conn) (conn & 0b01000110)
61 #define mask_top_rightmost_pe(conn) (conn & 0b10001100)
62 #define mask_bot_leftmost_pe(conn) (conn & 0b00100011)
63 #define mask_bot_rightmost_pe(conn) (conn & 0b00011001)
64 #define mask_top_pe(conn) (conn & 0b11001110)
65 #define mask_bot_pe(conn) (conn & 0b00111011)
66 #define mask_leftmost_pe(conn) (conn & 0b01100111)
67 #define mask_rightmost_pe(conn) (conn & 0b10011101)
78 else if (
is_side_pe(r, c, rows, cols)) count++;
83 if (r == 0 && c == 0) {
90 }
else if (r == 0 && c == (cols - 1)) {
97 }
else if (r == (rows - 1) && c == 0) {
104 }
else if (r == (rows - 1) && c == (cols - 1)) {
124 }
else if (r == (rows - 1)) {
129 }
else if (c == (cols - 1)) {
143 const int cols = args.
getInt(
"cols");
144 const int rows = args.
getInt(
"rows");
145 const int toroid = args.
getInt(
"toroid");
146 const int hetero_fu_type = args.
getInt(
"fu_type");
147 const int fu_latency = args.
getInt(
"fu_latency");
148 const int fu_II = args.
getInt(
"fu_II");
149 const int rf_cols = args.
getInt(
"rf_cols");
150 const int rf_rows = args.
getInt(
"rf_rows");
151 const int num_const_addresses = args.
getInt(
"num_const_addresses");
152 const bool use_op_div = args.
getBool(
"op_div");
153 const bool extra_mem = args.
getBool(
"extra_mem");
154 const int pe_conn = args.
getInt(
"pe_conn");
155 const int II = args.
getInt(
"II");
156 const bool pred = args.
getBool(
"pred");
157 const bool reg_bypass = args.
getBool(
"reg_bypass");
158 const std::string pred_scheme = args.
getString(
"pred_scheme");
161 if (rows < 2 || cols < 2)
162 throw cgrame_error(
"ADRES Arch only accepts rows/cols >= 2");
165 if (((rows - 1) % rf_rows) != 0)
166 throw cgrame_error(
"rows-1 must be divisible by rf_rows");
168 if ((cols % rf_cols) != 0)
169 throw cgrame_error(
"cols must be divisible by rf_rows");
171 const unsigned RF_SIZE = rf_rows * rf_cols;
172 auto cgra_storage = std::make_unique<CGRA>();
173 Module* result = &cgra_storage->getTopLevelModule();
182 double moduleW = 1.0/(2*cols+1);
183 double moduleH = 1.0/(2*rows+1);
188 for (
unsigned int i = 0; i < cols; i++)
200 for (
unsigned int i = 0; i < rows; i++)
203 result->
addSubModule(
new MemPort(
"mem_" +
std::to_string(i), loc, cols, 32, num_const_addresses, pred, II), 0, (i+1)*moduleH, moduleW, moduleH);
205 result->
addSubModule(
new MemPort(
"mem_right_" +
std::to_string(i), loc, cols, 32, num_const_addresses, pred, II), 0, (i+1)*moduleH, moduleW, moduleH);
220 for (
unsigned int c = 0; c < cols; c++)
222 for (
unsigned int r = 0; r < rows; r++)
225 std::string fu_type =
"cga";
226 if ((r == 0 || hetero_fu_type == 0) ||
227 (hetero_fu_type == 1 && ((r % 2) == 0)) ||
228 (hetero_fu_type == 2 && ((c % 2) == 0))) {
237 count + ((r == 0)?2:1) + extra_mem,
247 ), (2*c+2)*moduleW, (2*r+2)*moduleH, moduleW, moduleH);
257 for (
unsigned int c = 0; c < cols; c++)
266 for (
unsigned int r = 0; r < rows; r++)
271 for (
unsigned int c = 0; c < cols; c++)
275 result->
addConnection(mem_n +
".out", blk_n + ((r == 0)?
".in1":
".in0"));
278 result->
addConnection(mem_right_n +
".out", blk_n + ((r == 0)?
".in2":
".in1"));
285 result->
addSubModule(
new RegisterFile(
"drf", drf_loc, reg_bypass? cols * 2 : cols, 2*cols, 3, 32, II), 0.3, moduleH, 0.5, moduleH);
288 result->
addConnection(
"context_cell.Context_Used",
"context_counter_drf.Context_Used",
false);
289 result->
addConnection(
"context_counter_drf.Context",
"drf.Context",
false);
291 for (
unsigned int c = 0; c < cols; c++)
303 for (
unsigned int c = 0; c < cols; c += rf_cols)
305 for (
unsigned int r = 1; r < rows; r += rf_rows)
311 result->
addSubModule(
new RegisterFile(rf, rf_loc, RF_SIZE + reg_bypass, 2, RF_SIZE, 32, II), (2*c+1)*moduleW, (2*r+1)*moduleH, moduleW, moduleH);
316 result->
addConnection(
"context_cell.Context_Used",
"context_counter_" + rf +
".Context_Used",
false);
317 result->
addConnection(
"context_counter_" + rf +
".Context", rf +
".Context",
false);
323 for (
unsigned int c = 0; c < cols; c++)
325 for (
unsigned int r = 1; r < rows; r++)
327 int c_mod = c % rf_cols;
328 int r_mod = (r - 1) % rf_rows;
329 int c_rf = c - c_mod;
330 int r_rf = r - r_mod;
331 int rf_in_i = 2 * r_mod + c_mod;
335 result->
addConnection(blk +
".fu_to_rf", rf +
".in" + rf_in);
343 for (
unsigned int c = 0; c < cols; c++)
345 for (
unsigned int r = 0; r < rows; r++)
358 unsigned char true_conn = pe_conn;
372 }
else if (
is_bot_pe(r, c, rows, cols)) {
382 int index = ((r == 0)?2:1) + extra_mem;
452 if (!pred)
return cgra_storage;
460 if (pred_scheme.find(
"event") != std::string::npos){
464 result->
addConnection(
"event_mux.out",
"EventTransitionTable.current_event");
465 for (
int i = 0; i < cols; i++ ){
469 }
else if (pred_scheme.find(
"partial") != std::string::npos){
472 throw cgrame_error(
"Cannot recognize the predication scheme");
476 for (
unsigned int i = 0; i < cols; i++)
483 for (
unsigned int c = 0; c < cols; c++)
491 for (
unsigned int r = 0; r < rows; r++)
496 for (
unsigned int c = 0; c < cols; c++)
506 Location pred_drf_loc = {1, UINT_MAX};
509 result->
addSubModule(
new RegisterFile(
"drf_pred", pred_drf_loc, reg_bypass? cols * 2 : cols, 2*cols, 3, size, II), 0.3, moduleH, 0.5, moduleH);
511 result->
addConnection(
"context_counter_drf.Context",
"drf_pred.Context",
false);
513 for (
unsigned int c = 0; c < cols; c++)
525 for (
unsigned int c = 0; c < cols; c += rf_cols)
527 for (
unsigned int r = 1; r < rows; r += rf_rows)
534 result->
addSubModule(
new RegisterFile(rf, loc, RF_SIZE + reg_bypass, 2, RF_SIZE, size, II), (2*c+1)*moduleW, (2*r+1)*moduleH, moduleW, moduleH);
535 result->
addConnection(rf +
".out0", blk +
".rf_to_muxa_pred");
536 result->
addConnection(rf +
".out1", blk +
".rf_to_muxout_pred");
538 result->
addConnection(
"context_counter_" + rf_org +
".Context", rf +
".Context",
false);
544 for (
unsigned int c = 0; c < cols; c++)
546 for (
unsigned int r = 1; r < rows; r++)
548 int c_mod = c % rf_cols;
549 int r_mod = (r - 1) % rf_rows;
550 int c_rf = c - c_mod;
551 int r_rf = r - r_mod;
552 int rf_in_i = 2 * r_mod + c_mod;
556 result->
addConnection(blk +
".fu_to_rf_pred", rf +
".in" + rf_in);
558 result->
addConnection(blk +
".bypass_to_rf_pred", rf +
".in1");
564 for (
unsigned int c = 0; c < cols; c++)
566 for (
unsigned int r = 0; r < rows; r++)
579 unsigned char true_conn = pe_conn;
593 }
else if (
is_bot_pe(r, c, rows, cols)) {
603 int index = ((r == 0)?2:1) + extra_mem;