CGRA-ME
TMPack.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * The software programs comprising "CGRA-ME" and the documentation provided
3  * with them are copyright by its authors and the University of Toronto. Only
4  * non-commercial, not-for-profit use of this software is permitted without ex-
5  * plicit permission. This software is provided "as is" with no warranties or
6  * guarantees of support. See the LICENCE for more details. You should have re-
7  * ceived a copy of the full licence along with this software. If not, see
8  * <http://cgra-me.ece.utoronto.ca/license/>.
9  ******************************************************************************/
10 
11 #include <CGRA/ClusteredMapper.h>
12 #include <CGRA/Exception.h>
13 #include <CGRA/GraphAlgorithms.h>
14 #include <CGRA/MRRGProcedures.h>
17 #include <CGRA/dotparse.h>
18 
19 
20 #include <algorithm>
21 #include <utility>
22 #include <functional>
23 #include <cmath>
24 #include <fstream>
25 
27  ClusteredMapperDriver& driver,
28  Module* topLevelModule,
29  std::shared_ptr<OpGraph> opgraph,
30  const MRRG& mrrg,
31  int rows, int cols,
32  int II, bool isElastic)
33  : ClusteredMapperDriver(driver)
34  , l_opgraph(opgraph)
35  , l_mrrg(mrrg)
36  , l_rows(rows+2)
37  , l_cols(cols+2)
38  , l_reschedule(false)
39  , l_II(II)
40  , l_isElastic(isElastic) {
41  localMRRG = topLevelModule->createMRRG(1);
42 
43  std::vector<std::vector<std::vector<MRRGNodeDesc>>> l_routing_nodes_3d (l_rows, std::vector<std::vector<MRRGNodeDesc>>(l_cols, std::vector<MRRGNodeDesc>{} ));
44  std::vector<std::vector<std::vector<MRRGNodeDesc>>> l_function_nodes_3d(l_rows, std::vector<std::vector<MRRGNodeDesc>>(l_cols, std::vector<MRRGNodeDesc>{}));
45  const auto classes = computeNodeClassListsByCycle(*localMRRG);
46  for (const auto& mrrg_node : classes.routing_nodes[0]) {
47  uint x_loc = mrrg_node->parent->loc.x_coord;
48  uint y_loc = mrrg_node->parent->loc.y_coord;
49  if (x_loc == UINT_MAX && y_loc != UINT_MAX) {
50  for (int i = 1; i < l_cols - 1; i++) {
51  l_routing_nodes_3d[i][y_loc].push_back(mrrg_node);
52  }
53  } else if (x_loc != UINT_MAX && y_loc == UINT_MAX) {
54  for (int i = 1; i < l_rows - 1; i++) {
55  l_routing_nodes_3d[x_loc][i].push_back(mrrg_node);
56  }
57 
58  } else if (x_loc != UINT_MAX && y_loc != UINT_MAX) {
59  l_routing_nodes_3d[x_loc][y_loc].push_back(mrrg_node);
60  } else {
61  throw cgrame_error("Clusterting: x & y locations are both assigned to uint max for module " + mrrg_node->parent->getName());
62  }
63  }
64 
65  for (const auto& mrrg_node : classes.function_nodes[0]) {
66  uint x_loc = mrrg_node->parent->loc.x_coord;
67  uint y_loc = mrrg_node->parent->loc.y_coord;
68  if (x_loc == UINT_MAX && y_loc != UINT_MAX) {
69  for (int i = 1; i < l_cols - 1; i++) {
70  l_function_nodes_3d[i][y_loc].push_back(mrrg_node);
71  }
72  } else if (x_loc != UINT_MAX && y_loc == UINT_MAX) {
73  for (int i = 1; i < l_rows - 1; i++) {
74  l_function_nodes_3d[x_loc][i].push_back(mrrg_node);
75  }
76  } else if (x_loc != UINT_MAX && y_loc != UINT_MAX) {
77  l_function_nodes_3d[x_loc][y_loc].push_back(mrrg_node);
78  } else {
79  throw cgrame_error("Clusterting: x & y locations are both assigned to uint max for module " + mrrg_node->parent->getName());
80  }
81  }
82 
83  for (int i = 0; i < l_rows; i++) {
84  for (int j = 0; j < l_cols; j++) {
85  int function_node_size = l_function_nodes_3d[i][j].size();
86  int routing_node_size = l_routing_nodes_3d[i][j].size();
87  if (function_node_size == 0 && routing_node_size == 0) continue;
88  std::string genericName = "func_" + std::to_string(function_node_size) + "_route_" + std::to_string(routing_node_size);
89  if (l_function_nodes.find(genericName) != l_function_nodes.end()) {
90  bool same_functions = true;
91  // Check if not homogenous
92  continue;
93  } else {
94  l_function_nodes[genericName] = l_function_nodes_3d[i][j];
95  l_routing_nodes[genericName] = l_routing_nodes_3d[i][j];
96  setPortsForSubmodule(genericName);
97  }
98  }
99  }
100 
101 }
102 
103 void TMPack::clusterMemoryOperations(std::set<OpGraphOpDesc>* visited) {
104  // Check if the memory ports are limited. If it is then
105  // set certain number of memories to be clustered
106  if (l_ram_ports <= 0) return;
107  static int runs = 0;
108  std::map<std::string, std::vector<OpGraphOp*>> l_mem_name_ops;
109 
110  // Getting all the load and store operations and placing them with the similar
111  // named operations
112  for (auto& op : l_opgraph->opNodes()) {
113  if (op->getOpCode() == OpCode::LOAD || op->getOpCode() == OpCode::STORE) {
114  if (!op->getName().empty())
115  l_mem_name_ops[op->getMemName()].push_back(op);
116  }
117  }
118 
119  for (auto mem_name_op : l_mem_name_ops) {
120  // Checking if the size of the differnt operations could fit
121  // the CGRA
122  if (mem_name_op.second.size() > (l_ram_ports * l_II)) {
123  throw cgrame_error("Err: Memory: " + mem_name_op.first + " has "
124  + std::to_string(mem_name_op.second.size()) +
125  " load/store operations. That could not fit within a CGRA connected to a RAM with "
126  + std::to_string(l_ram_ports) + " ports and II of " + std::to_string(l_II) );
127  }
128  if (mem_name_op.second.size() == l_ram_ports) continue;
129 
130  int num_of_elements_per_port =
131  ceil(static_cast<double>(mem_name_op.second.size())/static_cast<double>(l_ram_ports));
132 
133  for (int i = 0; i < mem_name_op.second.size(); i+= num_of_elements_per_port) {
134  std::vector<OpGraphOpDesc> ops_v;
135  // std::set<int> used_contexts;
136  std::map<int, OpGraphOp*> used_contexts;
137  for (int j = 0; j < num_of_elements_per_port; j++) {
138  int context = l_schedule[mem_name_op.second[i+j]]%l_II;
139  // if (used_contexts.count(schedule[mem_name_op.second[i+j]]%l_II) != 0) {
140  if (used_contexts.find(context) != used_contexts.end()) {
141  if (runs > 5) {
142  throw cgrame_error
143  ("ERR: Two memory operations that are scheduled at the same context are scheduled to be mapped together"); // NOLINT
144  }
145  OpGraphOp* op_mem_1 = mem_name_op.second[i+j];
146  OpGraphOp* op_mem_2 = used_contexts[context];
147  std::cout << "Adding a new contraint " << op_mem_1->getName() << " " << op_mem_2->getName() << std::endl;
148  opPair oper_pair = {op_mem_2, op_mem_1};
149  extended_sched_const.emplace(oper_pair,
150  std::abs(l_schedule[used_contexts[context]] - l_schedule[mem_name_op.second[i+j]]) + 1);
151  l_reschedule = true;
152  // std::abs(schedule[used_contexts[context]] - schedule[mem_name_op.second[i+j]]));
153  }
154  ops_v.push_back(mem_name_op.second[i+j]);
155  visited->emplace(mem_name_op.second[i+j]);
156  l_clustered_ops.emplace(mem_name_op.second[i+j]);
157  used_contexts.emplace(l_schedule[mem_name_op.second[i+j]]%l_II, mem_name_op.second[i+j]);
158  }
159  l_clusters.push_back(ops_v);
160  }
161  }
162  runs++;
163  return;
164 }
165 
166 void TMPack::setPortsForSubmodule(std::string submodule) {
167  std::set<MRRGNodeDesc> input_ports;
168  std::set<MRRGNodeDesc> output_ports;
169  for (auto& nlist : {&l_function_nodes[submodule], &l_routing_nodes[submodule]}) {
170  for (auto& node : *nlist) {
171  for (auto& fanout : node->fanout) {
172  if (std::find(l_function_nodes[submodule].begin(), l_function_nodes[submodule].end(), fanout) == l_function_nodes[submodule].end() &&
173  std::find(l_routing_nodes[submodule].begin(), l_routing_nodes[submodule].end(), fanout) == l_routing_nodes[submodule].end()) {
174 
175  uint x_loc = node->parent->loc.x_coord;
176  uint y_loc = node->parent->loc.y_coord;
177  output_ports.emplace(node);
178  continue;
179  }
180  }
181  for (auto& fanin : node->fanin) {
182  if (std::find(l_function_nodes[submodule].begin(), l_function_nodes[submodule].end(), fanin) == l_function_nodes[submodule].end() &&
183  std::find(l_routing_nodes[submodule].begin(), l_routing_nodes[submodule].end(), fanin) == l_routing_nodes[submodule].end()) {
184  input_ports.emplace(node);
185  continue;
186  }
187  }
188  }
189  }
190  l_submodule_input_ports.emplace(submodule, input_ports);
191  l_submodule_output_ports.emplace(submodule, output_ports);
192 }
193 
194 void TMPack::getFirstOps(std::queue<OpGraphOpDesc>& operations) {
195  // Get the first operations with no input operations
196  for (auto &op : l_opgraph->opNodes()) {
197  bool hasInput = false;
198  for (auto input_val : op->input) {
199  if (input_val->input && (input_val->input != op)) {
200  hasInput = true;
201  break;
202  }
203  }
204  if (!hasInput) operations.emplace(op);
205  }
206  return;
207 }
208 
209 void TMPack::getModulesToMapOp(OpGraphOpDesc op, std::map<std::string, MRRGNodeDesc>& modules_could_map_op) {
210  for (auto& subModule : l_function_nodes) {
211  for (auto& function_node : subModule.second) {
212  if (function_node->canMapOp(op)) {
213  modules_could_map_op.emplace(subModule.first, function_node);
214  break;
215  }
216  }
217  }
218  return;
219 }
220 
222  std::vector<OpGraphOpDesc> ops, std::vector<MRRGNodeDesc> &used_nodes,
223  std::vector<OpGraphOpDesc> &used_ops, std::set<OpGraphOpDesc> &visited_ops, std::string submodule) {
224  std::vector<OpGraphOpDesc> next_ops;
225 
226  for (auto& next_op : ops) {
227  if (std::find(used_ops.begin(), used_ops.end(), next_op) != used_ops.end()) continue;
228  if (visited_ops.count(next_op) != 0) continue;
229 
230  for (auto& function_node : l_function_nodes[submodule]) {
231  if (std::find(used_nodes.begin(), used_nodes.end(), function_node) != used_nodes.end()) continue;
232  if (!function_node->canMapOp(next_op)) continue;
233  addNextOps(&next_ops, next_op);
234 
235  used_nodes.push_back(function_node);
236  used_ops.push_back(next_op);
237  break;
238  }
239  }
240 
241  if (next_ops.empty()) return;
242  opsCouldBeClustered(next_ops, used_nodes, used_ops, visited_ops, submodule);
243 
244  return;
245 }
246 
247 bool TMPack::checkIfOutputPortsNeeded(OpGraphValDesc val, std::vector<OpGraphOpDesc> &ops) {
248  // Check if there are any output ports needed for this operation
249  for (auto& sink_op : val->output) {
250  if (std::find(ops.begin(), ops.end(), sink_op) == ops.end()) {
251  return true;
252  }
253  }
254  return false;
255 }
256 
257 bool TMPack::checkIfInputPortsNeeded(OpGraphOpDesc source_op, std::vector<OpGraphOpDesc> &ops) {
258  for (auto& in_val : source_op->input) {
259  OpGraphOpDesc connected_op = in_val->input;
260  if (std::find(ops.begin(), ops.end(), connected_op) == ops.end()) {
261  return true;
262  }
263  }
264  return false;
265 }
266 
267 void TMPack::setSourceOpNodes(std::vector<MRRGNodeDesc>& op_mrrg_nodes,
268  OpGraphOpDesc op, std::string submodule) {
269  op_mrrg_nodes.clear();
270  // Get mrrg nodes that could map this operation
271  for (auto& function_node : l_function_nodes[submodule]) {
272  if (function_node->canMapOp(op)) {
273  op_mrrg_nodes.push_back(function_node);
274  }
275  }
276 }
277 
278 void TMPack::setReachesOutputPorts(std::map<MRRGNodeDesc, std::set<MRRGNodeDesc>>& connected_nodes,
279  std::vector<MRRGNodeDesc>& op_mrrg_nodes, std::string submodule) {
280  // Check if any of the source mrrg nodes reaches an output port
281  for (auto& op_node : op_mrrg_nodes) {
282  for (auto& output_node : l_submodule_output_ports[submodule]) {
283  //std::cout << *output_node << " " << *op_node << std::endl;
284  if (output_node->bitwidth < op_node->bitwidth) continue;
285 
286  bool reached_output_port = isReachable(op_node, output_node, submodule);
287  //std::cout << *output_node << " " << *op_node << " " << reached_output_port << std::endl;
288  if (reached_output_port) {
289  connected_nodes[op_node].emplace(output_node);
290  }
291  }
292  }
293  return;
294 }
295 
296 void TMPack::setReachesInputPorts(std::map<MRRGNodeDesc, std::set<MRRGNodeDesc>>& connected_nodes,
297  std::vector<MRRGNodeDesc>& op_mrrg_nodes, std::string submodule) {
298  // Check if any of the source mrrg nodes reaches an output port
299  for (auto& op_mrrg_node : op_mrrg_nodes) {
300  for (auto& input_node : l_submodule_input_ports[submodule]) {
301  bool reached_input_port = isReachable(input_node, op_mrrg_node, submodule);
302  if (reached_input_port) {
303  connected_nodes[op_mrrg_node].emplace(input_node);
304  }
305  }
306  }
307  return;
308 }
309 
310 bool TMPack::checkPortConn(std::vector<std::pair<OpGraphOpDesc, OpGraphOpDesc>>& global_port_connection, int i,
311  std::set<MRRGNodeDesc> ports_used,
312  std::set<OpGraphOpDesc> out_port_found,
313  std::map<OpGraphOpDesc, std::set<MRRGNodeDesc>>& op_to_port_nodes,
314  std::string submodule, bool isInput) {
315  if (global_port_connection.size() == i) {
316  return true;
317  }
318  if ((ports_used == l_submodule_input_ports[submodule] & isInput) | (ports_used == l_submodule_output_ports[submodule] & !isInput)) {
319  return false;
320  }
321  std::set<MRRGNodeDesc>& ports_of_op = op_to_port_nodes[global_port_connection[i].first];
322  // Since outputs could have multi-fanout signals we need to check if the op connects to a single output
323  // While input needs a unique input for each signal
324  if (out_port_found.count(global_port_connection[i].first) != 0) {
325  return checkPortConn(global_port_connection, ++i, ports_used, out_port_found, op_to_port_nodes, submodule, isInput);
326  }
327  for (auto& mrrg_node : ports_of_op) {
328  if (ports_used.count(mrrg_node) == 0) {
329  ports_used.emplace(mrrg_node);
330  if (!isInput) out_port_found.emplace(global_port_connection[i].first);
331  if (checkPortConn(global_port_connection, ++i, ports_used, out_port_found, op_to_port_nodes, submodule, isInput)) {
332  return true;
333  } else {
334  ports_used.erase(mrrg_node);
335  }
336  }
337  }
338  return false;
339 }
340 
341 bool TMPack::checkPortsOfOperation(std::vector<std::pair<OpGraphOpDesc, OpGraphOpDesc>> global_port_connection,
342  std::map<OpGraphOpDesc, std::vector<MRRGNodeDesc>>& ops_mrrg_nodes,
343  std::set<OpGraphOpDesc> exclusive_port_operation, std::string submodule, bool isInput) {
344 
345  std::map<MRRGNodeDesc, std::set<MRRGNodeDesc>> connected_nodes;
346  std::map<OpGraphOpDesc, std::set<MRRGNodeDesc>> op_to_port_nodes;
347  for (const auto& op_pair : global_port_connection) {
348  if (isInput) {
349  setReachesInputPorts(connected_nodes, ops_mrrg_nodes[op_pair.first], submodule);
350  } else {
351  setReachesOutputPorts(connected_nodes, ops_mrrg_nodes[op_pair.first], submodule);
352  }
353  // Check if there a connection exist for each of the mrrg nodes of the operation
354  // otherwise remove the node from the list
355  std::vector<MRRGNodeDesc> nodes_to_remove;
356  for (auto& mrrg_node : ops_mrrg_nodes[op_pair.first]) {
357  // Remove node if it cannout be found within the connection list
358  if (connected_nodes.find(mrrg_node) == connected_nodes.end()) {
359  nodes_to_remove.push_back(mrrg_node);
360  continue;
361  }
362  // For the connected nodes set the opgraphop of the node
363  if (connected_nodes[mrrg_node].size() != 0) {
364  for (auto& input_node : connected_nodes[mrrg_node]) {
365  op_to_port_nodes[op_pair.first].emplace(input_node);
366  }
367  }
368  }
369  // remove the nodes that could not be connected
370  for (auto& mrrg_node : nodes_to_remove) {
371  ops_mrrg_nodes[op_pair.first].erase(remove(ops_mrrg_nodes[op_pair.first].begin(), ops_mrrg_nodes[op_pair.first].end(), mrrg_node), ops_mrrg_nodes[op_pair.first].end());
372  }
373  // If the operation has an input/output from within and outside of the cluster and cannot reach any of the nodes
374  // then apply re-computation.
375  if (op_to_port_nodes[op_pair.first].size() == 0 && exclusive_port_operation.count(op_pair.first) == 0) {
376  if (isInput) {
377  resolveReachability(op_pair.second, op_pair.first, op_pair.second->output);
378  } else {
379  resolveReachability(op_pair.first, op_pair.second, op_pair.first->output);
380  }
381  // if we added an operation we need to break and repeat
382  if (l_reschedule) return false;
383  } else if (op_to_port_nodes[op_pair.first].size() == 0){
384  if (isInput) {
385  throw cgrame_error("Input ERR: cannot connect operation: " + op_pair.second->getName() +
386  " to operation: " + op_pair.first->getName() + ". \n");
387  } else {
388  throw cgrame_error("Output ERR: cannot connect operation: " + op_pair.first->getName() +
389  " to operation: " + op_pair.second->getName() + ". \n");
390  }
391  }
392  }
393  if (op_to_port_nodes.size() == 1 ) return true;
394  // Making sure that there is not more ports than ops and that for each port there exist an input/output
395  bool reachable = checkPortConn(global_port_connection, 0, {}, {}, op_to_port_nodes, submodule, isInput);
396  if (!reachable) {
397  //Place operations in different clusters
398  if (op_to_port_nodes.size() < 2) {
399  throw cgrame_error("ERR: operation: could not connect all output/input ports. \n");
400  }
401  // Find the port with the minimum nodes to connect to then set it to be clustered alone
402  OpGraphOpDesc op_to_cluster_alone = NULL;
403  int min_inputs = INT_MAX;
404  for (auto& op_port_nodes : op_to_port_nodes) {
405  if (min_inputs >= op_port_nodes.second.size()) {
406  min_inputs = op_port_nodes.second.size();
407  op_to_cluster_alone = op_port_nodes.first;
408  }
409  }
410  auto& val = op_to_cluster_alone->output;
411  if (!val) {
412  throw cgrame_error("ERR: operation: " + op_to_cluster_alone->name +
413  " could not not be placed. \n");
414  }
415  for (auto& output_op : val->output){
416  l_ops_not_clustered.insert({op_to_cluster_alone, output_op});
417  }
418  for (auto& in_val : op_to_cluster_alone->input) {
419  OpGraphOpDesc connected_op = in_val->input;
420  l_ops_not_clustered.insert({connected_op, op_to_cluster_alone});
421  }
422  }
423  return reachable;
424 }
425 
426 bool TMPack::checkConnectedOperations(std::pair<OpGraphOpDesc, OpGraphOpDesc> op_pair, std::map<OpGraphOpDesc, std::vector<MRRGNodeDesc>>& ops_mrrg_nodes,
427  std::string submodule) {
428  std::vector<MRRGNodeDesc> nodes_to_remove;
429  Latency cycles_to_sink = getCyclesToSink(op_pair.first, op_pair.second);
430  for (auto source_mrrg_node : ops_mrrg_nodes[op_pair.first]) {
431  bool reached_sink_port = false;
432  for (auto& sink_mrrg_node : ops_mrrg_nodes[op_pair.second]) {
433  reached_sink_port = isReachable(source_mrrg_node, sink_mrrg_node, submodule, cycles_to_sink);
434  if (reached_sink_port) {
435  break;
436  }
437  }
438  if (!reached_sink_port) {
439  nodes_to_remove.push_back(source_mrrg_node);
440  }
441  }
442 
443  // remove the nodes that could not be connected
444  for (auto& mrrg_node : nodes_to_remove) {
445  ops_mrrg_nodes[op_pair.first].erase(remove(ops_mrrg_nodes[op_pair.first].begin(), ops_mrrg_nodes[op_pair.first].end(), mrrg_node), ops_mrrg_nodes[op_pair.first].end());
446  }
447  if (ops_mrrg_nodes[op_pair.first].empty()) return false;
448 
449  return true;
450 
451  // Now I need to check the conditions at which I cannot cluster together operations
452 }
453 
454 bool TMPack::checkReachbitlity(std::vector<OpGraphOpDesc> &ops, std::string submodule) {
455  if (ops.size() == 1) return true;
456  std::map<OpGraphOpDesc, std::vector<MRRGNodeDesc>> ops_mrrg_nodes;
457  bool is_input_reachable = false;
458  bool is_output_reachable = false;
459  bool is_connected_reachable = false;
460  std::set<OpGraphOpDesc> exclusive_input_operation;
461  std::set<OpGraphOpDesc> exclusive_output_operation;
462  std::set<OpGraphOpDesc> non_exclusive_port_operation;
463  std::vector<std::pair<OpGraphOpDesc, OpGraphOpDesc>> global_input_connection;
464  std::vector<std::pair<OpGraphOpDesc, OpGraphOpDesc>> connected_ops;
465  std::vector<std::pair<OpGraphOpDesc, OpGraphOpDesc>> global_output_connection;
466  for (auto& operation: ops) {
467  // Get the mrrg nodes that could map this op
468  std::vector<MRRGNodeDesc> mrrg_nodes_to_map_op;
469  setSourceOpNodes(mrrg_nodes_to_map_op, operation, submodule);
470  ops_mrrg_nodes[operation] = mrrg_nodes_to_map_op;
471 
472  // check if the operations has inputs outside of the interconnect
473  for (auto& in_val : operation->input) {
474  OpGraphOpDesc connected_op = in_val->input;
475  if (std::find(ops.begin(), ops.end(), connected_op) == ops.end()) {
476  global_input_connection.push_back({operation, connected_op});
477  exclusive_input_operation.emplace(operation);
478  }
479  }
480 
481  // Getting the output
482  auto& val = operation->output;
483  if (!val) return true;
484  // Get output of the operation and check if it needs to connect to the global interconnect or
485  // the connection is within the cluster
486  for (auto& sink_op : val->output) {
487  if (std::find(ops.begin(), ops.end(), sink_op) == ops.end()) {
488  global_output_connection.push_back({operation, sink_op});
489  exclusive_output_operation.emplace(operation);
490  }
491  }
492  for (auto& sink_op: val->output) {
493  if (operation == sink_op) continue;
494  if (std::find(ops.begin(), ops.end(), sink_op) != ops.end()) {
495  connected_ops.push_back({operation, sink_op});
496  if (exclusive_input_operation.count(sink_op) != 0) {
497  exclusive_input_operation.erase(sink_op);
498  non_exclusive_port_operation.emplace(sink_op);
499  }
500  if (exclusive_output_operation.count(operation) != 0) {
501  exclusive_output_operation.erase(operation);
502  non_exclusive_port_operation.emplace(operation);
503  }
504  }
505  }
506 
507  }
508  is_input_reachable = checkPortsOfOperation(global_input_connection, ops_mrrg_nodes, exclusive_input_operation, submodule, true /*isInput*/);
509  if (!is_input_reachable) return false;
510  is_output_reachable = checkPortsOfOperation(global_output_connection, ops_mrrg_nodes, exclusive_output_operation, submodule, false /*isInput*/);
511  if (!is_output_reachable) return false;
512  for (auto& op_pair : connected_ops) {
513  is_connected_reachable = checkConnectedOperations(op_pair, ops_mrrg_nodes, submodule);
514  if (!is_connected_reachable && non_exclusive_port_operation.count(op_pair.first) == 0 && non_exclusive_port_operation.count(op_pair.second) == 0) {
515  throw cgrame_error("ERR: cannot connect operation: " + op_pair.first->getName() +
516  " to operation: " + op_pair.second->getName() + ". \n");
517  }
518  else if (!is_connected_reachable) {
519  l_ops_not_clustered.insert({op_pair.first, op_pair.second});
520  l_ops_not_clustered.insert({op_pair.second, op_pair.first});
521 
522  return false;
523  }
524  }
525  for (auto& op_mrrg_nodes : ops_mrrg_nodes) {
526  std::vector<MRRGNodeDesc> mrrg_nodes_to_map_op;
527  setSourceOpNodes(mrrg_nodes_to_map_op, op_mrrg_nodes.first, submodule);
528 
529  if (mrrg_nodes_to_map_op.size() != op_mrrg_nodes.second.size()) {
530  for (auto& node : op_mrrg_nodes.second) {
531  if (node->type != MRRG_NODE_FUNCTION && node->type != MRRG_NODE_ROUTING_FUNCTION) throw cgrame_error ("Trying to tie an operation " + op_mrrg_nodes.first->name + " to none function node " + node->name);
532  std::string name = node->name;
533  std::vector<std::string> v;
534  int start, end;
535  start = end = 0;
536  // defining the delimitation character
537  char dl = '.';
538 
539  while ((start = name.find_first_not_of(dl, end))
540  != std::string::npos) {
541  end = name.find(dl, start);
542  v.push_back(name.substr(start, end - start));
543  }
544  int vec_size = v.size();
545  std::string module_operation = v[vec_size -2] + '.' + v[vec_size -1];
546  l_ops_to_node_names[op_mrrg_nodes.first].push_back(module_operation);
547  }
548  }
549  }
550  return true;
551 }
552 
554  // If this is the only output of the value and the sink and source are not clustered together then error out.
555  if (val->output.size() <= 1) {
556  throw cgrame_error("Op: " + source->getName() + " and Op: " + sink->getName() + " could not be clustered together or routed to one another."); //NOLINT
557  }
558 
559  // Create a new operation with the correct connections
560  l_opgraph->unLink(val, sink);
561  auto new_op = l_opgraph->insert(source->propertyClone(source->name + "_new"));
562  l_opgraph->link(new_op, sink, new_op->getName() + "_out", val->bitwidth, 0, EdgeKind::kDataFlow);
563  for (auto& source_inputs : source->input) {
564  l_opgraph->link(source_inputs, new_op, source_inputs->getOperandForOutput(source));
565  }
566  for (auto val : l_opgraph->valNodes()) {
567  std::cout << val->getName() << std::endl;
568  std::cout << "OUTPUTS" << std::endl;
569  for (auto output : val->output) {
570  std::cout << output->getName() << std::endl;
571  }
572  }
573  // if this point is reached we added a new operation and we need to repeat the
574  // clustering process from the start
575  l_reschedule = true;
576  return;
577 }
578 
579 void TMPack::addNextOps(std::vector<OpGraphOpDesc>* next_ops, OpGraphOpDesc op) {
580  if (op->output) {
581  for (auto& nxt_op : op->output->output) {
582  if (l_ops_not_clustered.count({op, nxt_op}) != 0) continue;
583  next_ops->push_back(nxt_op);
584  }
585  }
586 
587  for (auto& input : op->input) {
588  if (!input->input) continue;
589  if (l_ops_not_clustered.count({input->input, op}) != 0) continue;
590  next_ops->push_back(input->input);
591  }
592  return;
593 }
594 
595 bool TMPack::isReachable(MRRGNodeDesc source, MRRGNodeDesc sink, std::string submodule, Latency cycles) {
596  if (source == sink) return false;
597  // Stores the current node cost and the path to this node
598  struct VertexData {
599  std::vector<const MRRGNode*> fanin;
600  Latency num_of_cycles;
601  bool operator==(const VertexData& rhs) const {
602  return this->fanin == rhs.fanin && num_of_cycles == rhs.num_of_cycles;
603  }
604  };
605  using Vertex = std::pair<MRRGNodeDesc, Latency>;
606  // Stores the nodes and the visited ones
607  std::queue<Vertex> to_visit;
608  // Stores the node and if it is visited
609  std::unordered_map<Vertex, bool, pair_hash> visited;
610  // Data stores the current mrrgnode and its vertex data
611  auto data = std::unordered_map<Vertex, VertexData, pair_hash>();
612 
613  Latency c = (cycles == kUndefLatency)? kUndefLatency : 0;
614  // Initialize the visit and data to the source
615  to_visit.push({source, c});
616  data.insert({{source, 0}, {{source}, 0}});
617  bool found = false;
618  while (!to_visit.empty() && !found) {
619  // Get the first element from the queue
620  const auto explore_curr = to_visit.front().first;
621  Latency cycles_curr = to_visit.front().second;
622  to_visit.pop();
623 
624  // Check if this node has been explored and check if it is the lowest known cost
625  if (visited[{explore_curr, cycles_curr}]) continue;
626 
627  // Reached the sink need to check if the tag is correct otherwise go to the
628  // next element
629  if (!found && sink == explore_curr) {
630  if (cycles_curr == kUndefLatency || cycles_curr == cycles) {
631  auto data_curr = data[{explore_curr, cycles_curr}];
632 
633  //printNodes(data_curr.fanin);
634  found = true;
635  continue;
636  }
637  }
638 
639  for (const auto& fanout : explore_curr->fanout) {
640  if (std::find(l_function_nodes[submodule].begin(), l_function_nodes[submodule].end(), fanout) == l_function_nodes[submodule].end() &&
641  std::find(l_routing_nodes[submodule].begin(), l_routing_nodes[submodule].end(), fanout) == l_routing_nodes[submodule].end()) {
642 
643  continue;
644 
645  }
646  //std::cout << "FANOUT " << *fanout << std::endl;
647  // Ignore if the node is the source node or is a function node other than the sink
648  if (fanout == source) continue;
649  if (fanout->type == MRRGNode_Type::MRRG_NODE_FUNCTION && fanout != sink) continue;
650  if (fanout->bitwidth < sink->bitwidth && fanout->bitwidth < source->bitwidth) continue;
651  auto data_curr = data[{explore_curr, cycles_curr}];
652  // Avoiding loop back
653  if (std::find(data_curr.fanin.begin(), data_curr.fanin.end(), fanout)
654  != data_curr.fanin.end()) continue;
655 
656  Latency cycles_fanout = (cycles == kUndefLatency)? kUndefLatency : cycles_curr + explore_curr->latency;
657  if (fanout->parent->loc.x_coord != explore_curr->parent->loc.x_coord && fanout->parent->loc.y_coord != explore_curr->parent->loc.y_coord) continue;
658  if (visited[{fanout, cycles_fanout}]) continue;
659  data[{fanout, cycles_fanout}] = {data[{explore_curr, cycles_curr}].fanin, cycles_fanout};
660  data[{fanout, cycles_fanout}].fanin.push_back(explore_curr);
661  to_visit.push({fanout, cycles_fanout});
662  }
663  // Update the visited and clean up the data such
664  // that we do not save all the paths
665  visited[{explore_curr, cycles_curr}] = true;
666  data.erase({explore_curr, cycles_curr});
667  }
668  return found;
669 }
670 
672  int i = 0;
673  for (auto &op : l_opgraph->opNodes()) {
674  std::vector<OpGraphOpDesc> clust;
675  l_op_cluster_index[op] = i;
676  i++;
677  clust.push_back(op);
678  l_clusters.push_back(clust);
679  }
680  return;
681 }
682 
683 bool TMPack::clusterPEs(bool cluster) {
684  // If no clustering is set then set each operation
685  // as it's own cluster
686  if (!cluster) {
688  return false;
689  }
690 
691  std::queue<OpGraphOpDesc> operations;
692  std::set<OpGraphOpDesc> visited;
693  std::map<std::string, MRRGNodeDesc> modules_to_map_op;
694 
695  int latency = 0;
696  // Get Operations wiht no inputs
697  getFirstOps(operations);
698  // Cluster operations depending on the number of ram ports assigned
699  clusterMemoryOperations(&visited);
700  // Set the operations that are already clustered as first operations
701  for (auto op : visited) {
702  if (op->output) {
703  for (auto& next_op : op->output->output) {
704  operations.emplace(next_op);
705  }
706  }
707  }
708 
709  // Check the modules that could map this operation
710  while (!operations.empty()) {
711  // get the first op and check if it is visited
712  auto op = operations.front();
713  operations.pop();
714  if (visited.count(op) != 0) continue;
715  // Get all modules that could map this op
716  getModulesToMapOp(op, modules_to_map_op);
717 
718  if (modules_to_map_op.empty()) {
719  throw cgrame_mapper_error("No submodule could map op: " + op->getName());
720  }
721 
722  // Get all connected ops to this module
723  std::vector<OpGraphOpDesc> next_ops;
724  addNextOps(&next_ops, op);
725 
726  int max = 0;
727  std::string submodule_max_ops = "";
728  std::vector<OpGraphOpDesc> max_used_ops;
729 
730  // From all the submodules that could map op get the one with the largest cluster
731  // TODO (raghebom) : save the different clusters and submodules to run reachibilty
732  // check on them from largest generated cluster to the smallest
733  for (auto& op_submodules : modules_to_map_op) {
734  std::vector<MRRGNodeDesc> used_nodes;
735  std::vector<OpGraphOpDesc> used_ops;
736  used_nodes.push_back(op_submodules.second);
737  used_ops.push_back(op);
738  opsCouldBeClustered(next_ops, used_nodes, used_ops,
739  visited, op_submodules.first);
740  if (max < used_ops.size()) {
741  max = used_ops.size();
742  max_used_ops = used_ops;
743  submodule_max_ops = op_submodules.first;
744  } else if (max == used_ops.size()) {
745  if (l_routing_nodes[op_submodules.first] < l_routing_nodes[submodule_max_ops]) {
746  max_used_ops = used_ops;
747  submodule_max_ops = op_submodules.first;
748  }
749  }
750  }
751  modules_to_map_op.clear();
752  // make clusters for each op to place clusters not ops
753  bool reachable = checkReachbitlity(max_used_ops, submodule_max_ops);
754  if (l_reschedule) break;
755  if (!reachable) {
756  operations.push(op);
757  continue;
758  }
759 
760  // update the visited list and add the next operations to be clustered
761  visited.emplace(op);
762  if (op->output) {
763  for (auto& next_op : op->output->output) {
764  operations.emplace(next_op);
765  }
766  }
767  for (auto& visited_op : max_used_ops) {
768  visited.emplace(visited_op);
769  if (!visited_op->output) continue;
770  for (auto& next_op : visited_op->output->output) {
771  operations.emplace(next_op);
772  }
773  }
774  // Add cluster
775  l_clusters.push_back(max_used_ops);
776 
777  // for sanity check will remove later
778  for (auto& clustered_op : max_used_ops) {
779  l_op_cluster_index[clustered_op] = l_clusters.size() - 1;
780  l_clustered_ops.emplace(clustered_op);
781  }
782 
783  modules_to_map_op.clear();
784  }
785  if (!l_reschedule) {
786  // Sanity check
788  } else {
789  // restart the clustering process
790  l_clustered_ops.clear();
791  for (auto& cluster : l_clusters) {
792  cluster.clear();
793  }
794  l_clusters.clear();
795  while (!operations.empty())
796  operations.pop();
797  visited.clear();
798  modules_to_map_op.clear();
799  }
800 
801  return l_reschedule;
802 }
803 
805  for (const auto& op : l_opgraph->opNodes()) {
806  if (l_clustered_ops.count(op) == 0) {
807  std::cout << "Final Result" << std::endl;
808  for (auto& cluster : l_clusters) {
809  std::cout << "cluster" << std::endl;
810  for (auto& op : cluster) {
811  std::cout << *op << std::endl;
812  }
813  }
814  throw cgrame_mapper_error("Not all ops are included in the clusters missing: " + op->getName());
815  }
816  }
817 }
818 
819 void TMPack::printOps (std::vector<OpGraphOpDesc>& ops) {
820  std::cout << "Ops packed together: " << std::endl;
821  for (auto& op : ops) {
822  std::cout << op->getName() << " ";
823  }
824  std::cout << std::endl;
825 }
826 void TMPack::printNodes (std::vector<MRRGNodeDesc>& mrrg_nodes) {
827  std::cout << "MRRG Nodes: " << std::endl;
828  for (auto& node : mrrg_nodes) {
829  std::cout << node->getHierarchyQualifiedName() << " ";
830  }
831  std::cout << std::endl;
832 }
OpGraphVal::output
std::vector< OpGraphOp * > output
Definition: OpGraph.h:196
TMPack::TMPack
TMPack(ClusteredMapperDriver &driver, Module *topLevelModule, std::shared_ptr< OpGraph > opgraph, const MRRG &mrrg, int rows, int cols, int II, bool isElastic=false)
Definition: TMPack.cpp:26
MRRG_NODE_ROUTING_FUNCTION
@ MRRG_NODE_ROUTING_FUNCTION
Definition: MRRG.h:47
CodeProfiling.h
OpGraphOp::output
OpGraphVal * output
Definition: OpGraph.h:166
TMPack::l_opgraph
std::shared_ptr< OpGraph > l_opgraph
Definition: ClusteredMapper.h:239
TMPack::extended_sched_const
std::unordered_map< opPair, int, pair_hash > extended_sched_const
Definition: ClusteredMapper.h:230
MRRG
Definition: MRRG.h:216
dotparse.h
ClusteredMapperDriver::l_schedule
std::unordered_map< OpGraphOpDesc, int > l_schedule
Definition: ClusteredMapper.h:90
GraphAlgorithms.h
ClusteredMapperDriver
Proxy class that makes a new instance of the for ever call. Holds constant configuration info,...
Definition: ClusteredMapper.h:41
Latency
int Latency
Definition: ClusteredMapper.h:35
TMPack::l_mem_name_ops
std::map< std::string, std::vector< std::vector< OpGraphOpDesc > > > l_mem_name_ops
Definition: ClusteredMapper.h:248
begin
auto begin(const SingleItemImmutableSet< VertexID > &siis)
Definition: Collections.h:137
TMPack::l_ops_not_clustered
std::set< OpsNotToCluster > l_ops_not_clustered
Definition: ClusteredMapper.h:241
TMPack::printNodes
void printNodes(std::vector< MRRGNodeDesc > &mrrg_nodes)
Definition: TMPack.cpp:826
OpCode::STORE
@ STORE
TMPack::l_submodule_input_ports
std::map< std::string, std::set< MRRGNodeDesc > > l_submodule_input_ports
Definition: ClusteredMapper.h:246
TMPack::localMRRG
MRRG * localMRRG
Definition: ClusteredMapper.h:234
TMPack::getFirstOps
void getFirstOps(std::queue< OpGraphOpDesc > &operations)
Definition: TMPack.cpp:194
ClusteredMapperDriver::l_ram_ports
const int l_ram_ports
Definition: ClusteredMapper.h:78
TMPack::l_submodule_output_ports
std::map< std::string, std::set< MRRGNodeDesc > > l_submodule_output_ports
Definition: ClusteredMapper.h:247
computeNodeClassListsByCycle
MRRGNodeClassListsByCycle computeNodeClassListsByCycle(const MRRG &mrrg)
Definition: MRRGProcedures.cpp:138
TMPack::checkIfAllOpsAreClustered
void checkIfAllOpsAreClustered()
Definition: TMPack.cpp:804
ClusteredMapperDriver::l_op_cluster_index
std::map< OpGraphOpDesc, int > l_op_cluster_index
Definition: ClusteredMapper.h:88
TMPack::l_II
int l_II
Definition: ClusteredMapper.h:235
to_string
const std::string & to_string(const OpGraphOpCode &opcode)
Definition: OpGraph.cpp:111
TMPack::setSourceOpNodes
void setSourceOpNodes(std::vector< MRRGNodeDesc > &source_op_nodes, OpGraphOpDesc source_op, std::string submodule)
Definition: TMPack.cpp:267
OpCode::LOAD
@ LOAD
OpGraphOp::input
std::vector< OpGraphVal * > input
Definition: OpGraph.h:164
TMPack::createSingleOpClusters
void createSingleOpClusters()
Definition: TMPack.cpp:671
TMPack::setPortsForSubmodule
void setPortsForSubmodule(std::string)
Definition: TMPack.cpp:166
Exception.h
TMPack::clusterPEs
bool clusterPEs(bool cluster)
Definition: TMPack.cpp:683
TMPack::opPair
std::pair< OpGraphOp *, OpGraphOp * > opPair
Definition: ClusteredMapper.h:229
operator==
bool operator==(const OpGraph &lhs, const OpGraph &rhs)
Definition: OpGraph.cpp:1159
TMPack::l_routing_nodes
std::map< std::string, std::vector< MRRGNodeDesc > > l_routing_nodes
Definition: ClusteredMapper.h:245
OpGraphNode::name
std::string name
Definition: OpGraph.h:123
ClusteredMapperDriver::l_ops_to_node_names
std::unordered_map< OpGraphOpDesc, std::vector< std::string > > l_ops_to_node_names
Definition: ClusteredMapper.h:89
MRRG_NODE_FUNCTION
@ MRRG_NODE_FUNCTION
Definition: MRRG.h:46
UserModules.h
ClusteredMapperDriver::getCyclesToSink
const Latency getCyclesToSink(OpGraphOpDesc source, OpGraphOpDesc sink)
Definition: ClusteredMapper.cpp:217
Module
Definition: Module.h:163
OpGraphOp::propertyClone
OpGraphOp propertyClone() const
Definition: OpGraph.h:149
TMPack::checkConnectedOperations
bool checkConnectedOperations(std::pair< OpGraphOpDesc, OpGraphOpDesc >, std::map< OpGraphOpDesc, std::vector< MRRGNodeDesc >> &ops_mrrg_nodes, std::string submodule)
Definition: TMPack.cpp:426
TMPack::checkPortsOfOperation
bool checkPortsOfOperation(std::vector< std::pair< OpGraphOpDesc, OpGraphOpDesc >> global_port_connection, std::map< OpGraphOpDesc, std::vector< MRRGNodeDesc >> &ops_mrrg_nodes, std::set< OpGraphOpDesc > exlusive_port_operation, std::string submodule, bool isInput)
Definition: TMPack.cpp:341
TMPack::resolveReachability
void resolveReachability(OpGraphOpDesc source, OpGraphOpDesc sink, OpGraphVal *val)
Definition: TMPack.cpp:553
TMPack::isReachable
bool isReachable(MRRGNodeDesc source, MRRGNodeDesc sink, std::string, Latency cycles=-1)
Definition: TMPack.cpp:595
TMPack::l_function_nodes
std::map< std::string, std::vector< MRRGNodeDesc > > l_function_nodes
Definition: ClusteredMapper.h:244
TMPack::l_cols
int l_cols
Definition: ClusteredMapper.h:237
OpGraphVal
Definition: OpGraph.h:178
TMPack::addNextOps
void addNextOps(std::vector< OpGraphOpDesc > *next_ops, OpGraphOpDesc op)
Definition: TMPack.cpp:579
end
auto end(const SingleItemImmutableSet< VertexID > &siis)
Definition: Collections.h:138
OpGraphOp
Definition: OpGraph.h:131
MRRGNode
Definition: MRRG.h:60
TMPack::kUndefLatency
static constexpr Latency kUndefLatency
Definition: ClusteredMapper.h:172
EdgeKind::kDataFlow
@ kDataFlow
TMPack::printOps
void printOps(std::vector< OpGraphOpDesc > &ops)
Definition: TMPack.cpp:819
TMPack::setReachesOutputPorts
void setReachesOutputPorts(std::map< MRRGNodeDesc, std::set< MRRGNodeDesc >> &connected_nodes, std::vector< MRRGNodeDesc > &op_mrrg_nodes, std::string submodule)
Definition: TMPack.cpp:278
TMPack::checkIfInputPortsNeeded
bool checkIfInputPortsNeeded(OpGraphOpDesc source_op, std::vector< OpGraphOpDesc > &ops)
Definition: TMPack.cpp:257
Module::createMRRG
virtual MRRG * createMRRG(unsigned contexts)
Definition: Module.cpp:1520
TMPack::setReachesInputPorts
void setReachesInputPorts(std::map< MRRGNodeDesc, std::set< MRRGNodeDesc >> &connected_nodes, std::vector< MRRGNodeDesc > &op_mrrg_nodes, std::string submodule)
Definition: TMPack.cpp:296
MRRGNode::bitwidth
unsigned int bitwidth
Definition: MRRG.h:129
TMPack::l_clustered_ops
std::unordered_set< OpGraphOpDesc > l_clustered_ops
Definition: ClusteredMapper.h:243
MRRGProcedures.h
if
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0) set(CMAKE_CXX_FLAGS "$
Definition: CMakeLists.txt:7
TMPack::clusterMemoryOperations
void clusterMemoryOperations(std::set< OpGraphOpDesc > *)
Definition: TMPack.cpp:103
OpGraphVal::bitwidth
int bitwidth
Definition: OpGraph.h:200
TMPack::checkPortConn
bool checkPortConn(std::vector< std::pair< OpGraphOpDesc, OpGraphOpDesc >> &global_port_connection, int i, std::set< MRRGNodeDesc > ports_used, std::set< OpGraphOpDesc > out_port_found, std::map< OpGraphOpDesc, std::set< MRRGNodeDesc >> &op_to_port_nodes, std::string submodule, bool isInput)
Definition: TMPack.cpp:310
TMPack::l_rows
int l_rows
Definition: ClusteredMapper.h:236
cgrame_mapper_error
Definition: Exception.h:27
ClusteredMapperDriver::l_clusters
std::vector< std::vector< OpGraphOpDesc > > l_clusters
Definition: ClusteredMapper.h:87
ClusteredMapper.h
TMPack::checkReachbitlity
bool checkReachbitlity(std::vector< OpGraphOpDesc > &, std::string)
Definition: TMPack.cpp:454
cgrame_error
Definition: Exception.h:20
TMPack::getModulesToMapOp
void getModulesToMapOp(OpGraphOpDesc op, std::map< std::string, MRRGNodeDesc > &modules_could_map_op)
Definition: TMPack.cpp:209
TMPack::checkIfOutputPortsNeeded
bool checkIfOutputPortsNeeded(OpGraphValDesc val, std::vector< OpGraphOpDesc > &ops)
Definition: TMPack.cpp:247
TMPack::l_reschedule
bool l_reschedule
Definition: ClusteredMapper.h:233
TMPack::opsCouldBeClustered
void opsCouldBeClustered(std::vector< OpGraphOpDesc > nextOps, std::vector< MRRGNodeDesc > &used_nodes, std::vector< OpGraphOpDesc > &used_ops, std::set< OpGraphOpDesc > &visitedOps, std::string subModule)
Definition: TMPack.cpp:221
OpGraphNode::getName
const std::string & getName() const
Definition: OpGraph.h:121