20 #include <catch2/catch.hpp>
24 Module god_module(
"top",
"");
33 bool connectivity_available;
34 bool possible_to_balance_latency;
35 bool requires_recomputation;
36 std::string cgra_description;
37 std::string opgraph_description;
38 std::function<
MRRG()> mrrg_generator;
39 std::function<
OpGraph()> opgraph_generator;
42 struct MappingApproch {
43 std::string description;
44 std::string mapper_id;
46 bool recomputation_enabled;
49 MRRG makeMRRG_112x21DAG(
int latency_of_d1) {
64 MRRG makeMRRG_221DAG(
int mrrg_ii,
int latency_of_c1) {
66 const int cyc2 = latency_of_c1 % mrrg_ii;
80 MRRG makeMRRG_222DAG(
int mrrg_ii,
int latency_of_c1) {
82 const int cyc2 = latency_of_c1 % mrrg_ii;
85 const auto c1 = mrrg.insertMultiFanin({a1},
MRRGNode::make_routing( &god_module, 32, 0,
"c1", latency_of_c1)).first;
98 MRRG makeMRRG_131DAG(
int mrrg_ii,
int latency_of_c1) {
102 const auto c1 = mrrg.insertMultiFanin({b1},
MRRGNode::make_routing( &god_module, 32, 0,
"c1", latency_of_c1)).first;
115 MRRG makeMRRG_121DAG(
int mrrg_ii,
int latency_of_c1) {
119 const auto c1 = mrrg.insertMultiFanin({b1},
MRRGNode::make_routing( &god_module, 32, 0,
"c1", latency_of_c1)).first;
130 MRRG makeMRRG_231DAG(
int mrrg_ii,
int latency_of_c1) {
132 const int cyc2 = latency_of_c1 % mrrg_ii;
136 const auto b2 = mrrg.insertMultiFanin({a2},
MRRGNode::make_routing( &god_module, 32, cyc2,
"b2", 0)).first;
137 const auto c1 = mrrg.insertMultiFanin({b1},
MRRGNode::make_routing( &god_module, 32, 0,
"c1", latency_of_c1)).first;
138 const auto c2 = mrrg.insertMultiFanin({b2},
MRRGNode::make_routing( &god_module, 32, cyc2,
"c2", 0)).first;
139 const auto c3 = mrrg.insertMultiFanin({b2},
MRRGNode::make_routing( &god_module, 32, cyc2,
"c3", 0)).first;
143 mrrg.insertMultiFanin({f1},
MRRGNode::make_routing(&god_module, 32, cyc2,
"f1_dummy_fanout_for_ILPMapper", 0));
155 SCENARIO(
"General Mapper Tests -- Tiny MRRGs",
"") {
162 const auto& test = GENERATE_REF(values<MapperTest>({
163 {
true,
true,
false,
"3x1t1 ortho",
"linear 3-node",
164 []{
return makeMRRG_Simple(1, {{
"width",1},{
"height",3}});},
167 {
true,
true,
false,
"1x3t1 ortho",
"linear 3-node",
168 []{
return makeMRRG_Simple(1, {{
"width",3},{
"height",1}});},
172 {
false,
false,
false,
"1x2t1 ortho",
"linear 3-node",
173 []{
return makeMRRG_Simple(1, {{
"width",2},{
"height",1}});},
177 {
false,
false,
false,
"1x1t1 ortho without self-feedback",
"1 node self-loop",
178 []{
return makeMRRG_Simple(1, {{
"width",1},{
"height",1},{
"fu_self_feedback",
false}});},
182 {
true,
false,
false,
"1x1t1 ortho with self-feedback, but no FU latency",
"1 node self-loop",
183 []{
return makeMRRG_Simple(1, {{
"width",1},{
"height",1}});},
187 {
true,
true,
false,
"1x1t1 ortho with self-feedback and FU latency of 1",
"1 node self-loop",
188 []{
return makeMRRG_Simple(1, {{
"width",1},{
"height",1},{
"fu_latency",1}});},
192 {
true,
true,
false,
"2x2t1 flow ortho with self-feedback and FU latency of 1",
"Converging 121",
193 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",2},{
"fu_latency",1}});},
197 {
true,
false,
false,
"2x2t1 flow ortho with self-feedback and FU latency of 1 except for FU 01",
"Converging 121",
198 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",2},{
"fu_latency",1},{
xyName(
"fu_latency",{0,1}),0}});},
202 {
true,
true,
true,
"2x1t2 flow ortho without self-feedback",
"Tree 12",
203 []{
return makeMRRG_SimpleFlow(2, {{
"width",2},{
"height",1},{
"fu_self_feedback",
false}});},
207 {
true,
true,
false,
"2x2t1 flow ortho supporting only commutative ops",
"Converging 21 with only commutative operands",
208 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",2},{
"operand_tags",
"only_commutative"}});},
212 {
false,
true,
false,
"2x2t1 flow ortho supporting only commutative ops",
"Converging 21 with only non-commutative operands",
213 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",2},{
"operand_tags",
"only_commutative"}});},
217 {
true,
true,
false,
"2x2t1 flow ortho",
"Converging 21 with only non-commutative operands",
218 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",2}});},
222 {
true,
true,
false,
"3x3t1 flow ortho",
"Converging 21 with only untagged operands",
223 []{
return makeMRRG_SimpleFlow(1, {{
"width",3},{
"height",3}});},
227 {
false,
false,
false,
"1x2t1 flow ortho, op support backwards",
"Linear 2-node, hetero ops",
228 []{
return makeMRRG_SimpleFlow(1, {
229 {
"width",1},{
"height",2},{
"fu_arity",1},{
"fu_self_feedback",
false},{
xyName(
"fu_ops",{0,0}),
"add"},{
xyName(
"fu_ops",{0,1}),
"mul"}});},
233 {
true,
true,
false,
"2x1t1 flow ortho",
"Multi-edge 11, no operand tags",
234 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1}});},
237 {
true,
true,
false,
"2x1t1 flow ortho",
"Multi-edge 11, non-commutative",
238 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1}});},
241 {
true,
true,
false,
"2x1t1 flow ortho",
"Multi-edge 11, commutative",
242 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1}});},
245 {
true,
true,
false,
"2x1t1 flow ortho",
"Multi-edge 11, mixed_lhs",
246 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1}});},
249 {
true,
true,
false,
"2x1t1 flow ortho",
"Multi-edge 11, mixed_any",
250 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1}});},
254 {
true,
true,
false,
"2x1t1 flow ortho only_one_any operands",
"Multi-edge 11, mixed_lhs",
255 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1},{
"operand_tags",
"only_one_any"}});},
258 {
false,
false,
false,
"2x1t1 flow ortho only_commutative operands",
"Multi-edge 11, mixed_lhs",
259 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1},{
"operand_tags",
"only_commutative"}});},
262 {
true,
true,
false,
"2x1t1 flow ortho only_one_any operands",
"Multi-edge 11, mixed_any",
263 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1},{
"operand_tags",
"only_one_any"}});},
271 {
false,
false,
false,
"2x1t1 flow ortho, fu_arity 1",
"Multi-edge 11, no operands",
272 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1},{
"fu_arity",1},{
"operand_tags",
"none"},{
"fu_self_feedback",
false}});},
276 {
false,
false,
false,
"2x1t1 flow ortho, fu_arity 1, all_lhs",
"Multi-edge 11, no operands",
277 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1},{
"fu_arity",1},{
"operand_tags",
"all_lhs"},{
"fu_self_feedback",
false}});},
281 {
false,
false,
false,
"2x1t1 flow ortho, fu_arity 1, all_lhs",
"Multi-edge 11, one-lhs-one-untagged",
282 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1},{
"fu_arity",1},{
"operand_tags",
"none"},{
"fu_self_feedback",
false}});},
286 {
false,
false,
false,
"2x1t1 flow ortho, fu_arity 1, all_lhs",
"Multi-edge 11, one-lhs-one-untagged",
287 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1},{
"fu_arity",1},{
"operand_tags",
"all_lhs"},{
"fu_self_feedback",
false}});},
291 {
false,
false,
false,
"2x1t1 flow ortho, fu_arity 2",
"Multi-edge 11, all lhs",
292 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1},{
"fu_arity",2},{
"operand_tags",
"all"},{
"fu_self_feedback",
false}});},
296 {
true,
true,
false,
"2x1t1 flow ortho, fu_arity 2 all_lhs",
"Multi-edge 11, all lhs",
297 []{
return makeMRRG_SimpleFlow(1, {{
"width",2},{
"height",1},{
"fu_arity",2},{
"operand_tags",
"all_lhs"},{
"fu_self_feedback",
false}});},
301 {
false,
false,
false,
"112x21DAG: fu_arity 2, crossbar at input, lhs-rhs",
"Multi-edge 11, all_lhs",
302 []{
return makeMRRG_112x21DAG(0);},
306 {
true,
true,
false,
"112x21DAG: fu_arity 2, crossbar at input, lhs-rhs",
"Multi-edge 11 lhs-rhs",
307 []{
return makeMRRG_112x21DAG(0);},
311 {
true,
false,
false,
"112x21DAG: fu_arity 2, crossbar at input, lhs-rhs, d1 latency 1",
"Multi-edge 11 untagged",
312 []{
return makeMRRG_112x21DAG(1);},
316 {
true,
false,
false,
"112x21DAG: fu_arity 2, crossbar at input, lhs-rhs, d1 latency 1",
"Multi-edge 11 lhs-rhs",
317 []{
return makeMRRG_112x21DAG(1);},
321 {
false,
false,
false,
"221DAG: II=2, c1 latenecy 0, lhs-rhs",
"Multi-edge 11 all_lhs",
322 []{
return makeMRRG_221DAG(2, 0);},
326 {
true,
true,
true,
"221DAG: II=2, c1 latenecy 0, lhs-rhs",
"Multi-edge 11 lhs-rhs",
327 []{
return makeMRRG_221DAG(2, 0);},
331 {
false,
false,
false,
"221DAG: II=2, c1 latenecy 1, lhs-rhs",
"Multi-edge 11 all_lhs",
332 []{
return makeMRRG_221DAG(2, 1);},
336 {
true,
true,
true,
"221DAG: II=2, c1 latenecy 1, lhs-rhs",
"Multi-edge 11 lhs-rhs",
337 []{
return makeMRRG_221DAG(2, 1);},
341 {
true,
true,
true,
"221DAG: II=2, c1 latenecy 3, lhs-rhs",
"Multi-edge 11 lhs-rhs",
342 []{
return makeMRRG_221DAG(2, 3);},
346 {
false,
false,
false,
"222DAG: II=1, c1 latenecy 0",
"Multi-edge 11 lhs-rhs",
347 []{
return makeMRRG_222DAG(1, 0);},
351 {
true,
true,
false,
"222DAG: II=1, c1 latenecy 0",
"Parallel 11-11",
352 []{
return makeMRRG_222DAG(1, 0);},
356 {
true,
true,
true,
"231DAG: II=1, c1 latenecy 0",
"Multi-edge 11 commutative",
357 []{
return makeMRRG_231DAG(1, 0);},
361 {
false,
false,
false,
"121DAG: II=1, c1 latenecy 0",
"Triple-edge 11",
362 []{
return makeMRRG_121DAG(1, 0);},
366 {
true,
true,
false,
"131DAG: II=1, c1 latenecy 0",
"Triple-edge 11",
367 []{
return makeMRRG_131DAG(1, 0);},
372 const auto approach = GENERATE_REF(values<MappingApproch>({
373 {
"Exact ILP with partial latency balancing",
"ILPMapper",
true,
false},
374 {
"Exact ILP with latency balancing",
"ILPMapper",
true,
false},
375 {
"Heuristic ILP with latency balancing",
"ILPHeuristicMapper",
true,
false},
376 {
"Heuristic ILP without latency balancing",
"ILPHeuristicMapper",
false,
false},
377 {
"Heuristic ILP with re-computation and latency balancing",
"ILPHeuristicMapper",
true,
true},
378 {
"Heuristic ILP with re-computation but without latency balancing",
"ILPHeuristicMapper",
false,
true},
386 auto args = mapper_registry.getAllRegisteredArgDefaults();
389 if (not approach.balance_latency && approach.mapper_id ==
"ILPHeuristicMapper") {
390 args.setBool(approach.mapper_id +
".allow_unbalanced_latency",
true);
392 if (approach.description ==
"Exact ILP with latency balancing") {
393 args.setBool(approach.mapper_id +
".enable_topological_order_mode",
true);
395 if (approach.recomputation_enabled) {
396 args.setBool(approach.mapper_id +
".allow_recomputation",
true);
399 const bool expect_to_map = test.connectivity_available
400 && (not approach.balance_latency || test.possible_to_balance_latency)
401 && (approach.recomputation_enabled || not test.requires_recomputation);
407 GIVEN(
"A '" << test.cgra_description <<
"' CGRA") {
409 if (approach.mapper_id ==
"ILPMapper") {
410 remove_useless_routing_nodes_config.
setBool(
"fus_must_have_fanout",
true);
412 const auto& mrrg = removeUselessRoutingNodes(test.mrrg_generator(), remove_useless_routing_nodes_config);
413 const auto II = mrrg.initiationInterval();
414 auto cgra = std::make_shared<CGRA>();
416 GIVEN(
"A '" << test.opgraph_description <<
"' DFG") {
417 const auto opgraph = std::make_shared<OpGraph>(test.opgraph_generator());
419 WHEN (
"Mapped with a '" << approach.description <<
"' Mapper") {
420 std::unordered_map<std::string, std::string> fix_port;
421 const auto mapping = mapper_registry.createMapper(approach.mapper_id, cgra, 60, args)->mapOpGraph(opgraph, II, mrrg, fix_port);
423 THEN (
"Mapping should " << (expect_to_map ?
"succeed" :
"fail")) {
425 mrrg.printDot(std::cout);
426 opgraph->serialize(std::cout);
427 if (mapping.isMapped()) { mapping.outputMapping(std::cout); }
431 {approach.mapper_id +
".verbosity",
"3"},
435 }
else if (mapping.isMapped()) {
436 AND_WHEN(
"Making a MappingGraph") {
439 THEN(
"Latency should " << (test.possible_to_balance_latency ?
"" :
"not ") <<
"be balanced") {
441 const auto latency_check_result =
latencyCheck(cmg_result.mg, mrrg, cmg_result.toMRRG);
442 CHECKED_ELSE(latency_check_result.first == test.possible_to_balance_latency) {
443 mrrg.printDot(std::cout);
444 opgraph->serialize(std::cout);
445 mapping.outputMapping(std::cout);
446 cmg_result.mg.printDot(std::cout, mrrg, *opgraph, cmg_result.toMRRG,
ConfigStore());
447 INFO(
"The previous check failed, and it dumped its inputs");
464 auto cgra = std::make_shared<CGRA>();
467 const auto args = mapper_registry.getAllRegisteredArgDefaults();
468 const auto mapper = mapper_registry.createMapper(
"ILPMapper", cgra, 60, args);
469 std::unordered_map<std::string, std::string> fix_port;
470 REQUIRE_THROWS_WITH(mapper->mapOpGraph(opgraph, mrrg.initiationInterval(), mrrg, fix_port), Catch::Matchers::Contains(
"FU does not have exactly one fanout"));
476 if (options.getIntOr(
"fu_arity", 0) == 1) {
477 options.addString(
"operand_tags",
"none");
482 {
"fu_self_feedback",
true},
483 {
"ortho_connections",
true},
484 {
"diag_connections",
false},
485 {
"flow_conections_only",
false},
488 {
"fu_ops",
"add,sub"},
491 {
"operand_tags",
"all"},
493 const auto inter_cluster_offsets = [&](){
494 auto result = std::vector<XY>{};
495 if (options.getBool(
"ortho_connections")) {
496 if (not options.getBool(
"flow_conections_only")) {
497 result.emplace_back(-1, 0);
498 result.emplace_back( 0,-1);
500 result.emplace_back( 1, 0);
501 result.emplace_back( 0, 1);
504 if (options.getBool(
"diag_connections")) {
505 if (not options.getBool(
"flow_conections_only")) {
506 result.emplace_back(-1,-1);
507 result.emplace_back(-1, 1);
508 result.emplace_back( 1,-1);
510 result.emplace_back( 1, 1);
513 if (options.getBool(
"fu_self_feedback")) {
514 result.emplace_back( 0, 0);
520 const auto wrap_logic = [&](
XY local_xy,
XY offset) ->
XY {
522 local_xy.first + offset.first,
523 local_xy.second + offset.second,
527 auto mrrg =
MRRG{II};
532 std::vector<MRRG::NodeDescriptor> inputs;
535 std::map<int, std::map<XY,PEData>> pe_data;
538 for (
int ii = 0; ii < II; ++ii) {
539 for (
int x = 0; x < options.getInt(
"width"); ++x) {
540 for (
int y = 0; y < options.getInt(
"height"); ++y) {
541 const auto xy =
XY{x,y};
542 const auto fu_name =
xyName(
"fu", xy);
543 const auto fu_latency = options.getIntOr(
xyName(
"fu_latency",xy), options.getInt(
"fu_latency"));
545 const auto fu_op_csv = options.getStringOr(
xyName(
"fu_ops",xy), options.getString(
"fu_ops"));
546 std::vector<OpCode> fu_ops;
547 for (std::size_t next_pos = 0, pos = 0; next_pos != fu_op_csv.size(); pos = next_pos + 1) {
548 next_pos = std::min(fu_op_csv.find_first_of(
',', pos), fu_op_csv.size());
552 auto& pe = pe_data[ii].insert({ xy, {
561 mrrg.link(pe.fu, pe.output);
563 const auto& operand_option = options.getString(
"operand_tags");
564 const auto fu_arity = options.getInt(
"fu_arity");
566 std::vector<MRRGNode::SupportedOpTags> operand_tags(fu_arity);
567 if (operand_option ==
"all") {
571 }
else throw std::runtime_error(
"unsupported arity");
572 }
else if (operand_option ==
"none") {
574 }
else if (operand_option ==
"only_lhs") {
577 operand_tags.at(1) = {};
578 }
else throw std::runtime_error(
"unsupported arity");
579 }
else if (operand_option ==
"all_lhs") {
580 for (
auto& tag_list : operand_tags) {
583 }
else if (operand_option ==
"only_one_any") {
585 operand_tags.at(0) = {};
587 }
else throw std::runtime_error(
"unsupported arity");
588 }
else if (operand_option ==
"only_commutative") {
592 }
else if (fu_arity == 3) {
596 }
else throw std::runtime_error(
"unsupported arity");
598 throw std::runtime_error(
"unsupported operand_tags option: " + operand_option);
601 for (
int i = 0; i < fu_arity; ++i) {
603 &god_module, 32, ii,
xyidName(
"pe_in", xy, i), operand_tags.at(i))).first);
604 mrrg.link(pe.inputs.back(), pe.fu);
611 for (
int ii = 0; ii < II; ++ii) {
612 for (
int x = 0; x < options.getInt(
"width"); ++x) {
613 for (
int y = 0; y < options.getInt(
"height"); ++y) {
614 const auto xy =
XY{x,y};
615 const auto& pe = pe_data.at(ii).at(xy);
616 for (
const auto offset : inter_cluster_offsets) {
617 const auto remote_xy = wrap_logic(xy, offset);
618 const auto find_results = pe_data.at(ii).find(remote_xy);
619 if (find_results == pe_data.at(ii).end()) {
continue; }
620 for (
const auto& remote_input : find_results->second.inputs) {
624 mrrg.getNodeRef(pe.output).getHierarchyQualifiedName()
625 +
"-to-" + mrrg.getNodeRef(remote_input).getHierarchyQualifiedName()
627 mrrg.link(pe.output, intermediate);
628 mrrg.link(intermediate, remote_input);
641 {
"flow_conections_only",
"yes"}
643 return makeMRRG_Simple(II, options);
648 {
"fus_must_have_fanout",
false},
652 for (
bool made_changes =
true; made_changes;) {
653 made_changes =
false;
656 const auto& node = name_and_ptr.second.get();
659 bool driven_by_fu =
false;
660 for (
const auto fanin : mrrg.
fanin(node)) {
662 if (driven_by_fu)
break;
664 if (options.
getBool(
"fus_must_have_fanout") && driven_by_fu)
continue;
665 if (mrrg.
fanin(node).empty() || mrrg.
fanout(node).empty()) {