18 std::pair<OpGraph::Walk, OpGraph::Walk> reverse_and_canonicalize
19 (std::pair<OpGraph::Walk, OpGraph::Walk>&& line_trail_pair) {
20 std::reverse(line_trail_pair.first.begin(), line_trail_pair.first.end());
21 std::reverse(line_trail_pair.second.begin(), line_trail_pair.second.end());
22 if (line_trail_pair.second < line_trail_pair.first) {
23 std::swap(line_trail_pair.first, line_trail_pair.second);
25 return line_trail_pair;
29 std::reverse(cyclic_trail.begin(), cyclic_trail.end());
30 auto new_begin = std::min_element(cyclic_trail.begin(), cyclic_trail.end(), [&](
const auto& lhs,
const auto& rhs) {
31 return opgraph.getNodeRef(lhs.val).getName() < opgraph.getNodeRef(rhs.val).getName();
33 std::rotate(cyclic_trail.begin(), new_begin, cyclic_trail.end());
48 std::set<OpDesc> ops_visited;
52 std::set<EdgeDesc> nte_and_convergence_edges_found;
60 for (OpDesc op : opgraph.
opNodes()) {
61 if (ops_visited.find(op) != ops_visited.end()) {
continue; }
65 {std::set<OpDesc> ancestors;
67 const auto& fanin = opgraph.
inputVals(op);
68 if (fanin.empty() || not ancestors.insert(op).second) {
break; }
69 op = opgraph.
inputOp(*fanin.begin());
73 const OpGraph* opgraph =
nullptr;
74 std::set<OpDesc> examined_op_targets = {};
75 std::set<EdgeDesc> non_tree_edges = {};
79 if (not examined_op_targets.insert(opgraph->
targetOfEdge(e)).second) {
80 non_tree_edges.insert(e);
84 visitor.opgraph = &opgraph;
85 visitor.examined_op_targets.
insert(op);
92 const auto determineSearchTreeAncestors = [&](
const auto& e) {
93 auto traceback = galgos.singleTraceback(std::set<EdgeDesc>(), e, search_tree);
94 std::reverse(traceback.path.begin(), traceback.path.end());
95 return std::move(traceback.path);
99 const auto determineSearchTreeEdgeWithSameTarget = [&](
const auto& e) {
101 if (visitor.non_tree_edges.find(edge_to_target) != visitor.non_tree_edges.end()) {
continue; }
102 if (search_tree.find(edge_to_target) == search_tree.end()) {
continue; }
103 return edge_to_target;
105 throw make_from_stream<cgrame_error>([&](
auto&& s) {
106 s <<
"couldn't find the search tree edge with same target as " << e;
110 const auto same_driver = [&](
auto&& e1,
auto&& e2) {
return opgraph.
inputOp(e1.val) == opgraph.
inputOp(e2.val); };
114 for (
const auto nte : visitor.non_tree_edges) {
116 if (not nte_and_convergence_edges_found.insert(nte).second) {
continue; }
120 const auto reverse_search_tree = galgos.wavedBreadthFirstVisit(
121 makeReversedGraphFromFaninLists<EdgeDesc>(&search_tree), {nte},
125 if (found) { cycle_edge = e; }
130 auto traceback = galgos.singleTraceback(
singleItemSet(nte), cycle_edge, reverse_search_tree);
131 if (not traceback.success) {
throw make_from_stream<cgrame_error>([fname=__func__](
auto&& s) {
132 s << fname <<
" couldn't find original non-tree edge when searching a reverse search tree";
134 if (nte == cycle_edge) {
135 traceback.path.pop_back();
138 if (result.
cyclic_trails.insert(reverse_and_canonicalize(std::move(traceback.path), opgraph)).second) {
144 const auto convergence_edge = determineSearchTreeEdgeWithSameTarget(nte);
146 if (not nte_and_convergence_edges_found.insert(convergence_edge).second) {
continue; }
147 const auto nte_ancestors = determineSearchTreeAncestors(nte);
148 const auto ce_ancestors = determineSearchTreeAncestors(convergence_edge);
150 if (same_driver_search.first != nte_ancestors.end()) {
152 {nte_ancestors.begin(), std::next(same_driver_search.first)},
153 {ce_ancestors.begin(), std::next(same_driver_search.second)},
158 throw make_from_stream<cgrame_error>([nte,fname=__func__](
auto&& s) {
159 s << fname <<
" couldn't construct a constraint from the non-tree edge " << nte
160 <<
". It is neither a cycle or a re-convergence!";
165 std::copy(visitor.examined_op_targets.begin(), visitor.examined_op_targets.end(), std::inserter(ops_visited, ops_visited.end()));
172 os <<
"Trails to balance: {\n";
173 os <<
" Line trail pairs to be balanced: ";
180 os <<
"\n Cycle trails to be balanced: ";
189 auto cg_vert_to_op = std::map<ConfigGraph::VertexID, OpGraph::OpDescriptor>{};
190 for (
const auto& v : config.
allNodes()) {
193 const bool has_input = v_attrs.
hasKey(
"input");
194 const bool has_output = v_attrs.hasKey(
"output");
195 const bool has_opcode = v_attrs.hasKey(
"opcode");
196 const int bitwidth = v_attrs.getIntOr(
"bitwidth", 32);
197 const std::string node_config = v_attrs.getStringOr(
"config",
"");
198 if ((
int)has_input + (
int)has_output + (
int)has_opcode > 1) {
199 throw make_from_stream<cgrame_error>([&](
auto&& s) {
200 s <<
"ambiguous: op " << config.
name(v)
201 <<
" specifies more than one of 'input', 'output', and 'opcode'";
209 :
throw make_from_stream<cgrame_error>([&](
auto&& s) {
210 s <<
"op: " << config.
name(v) <<
" does not specify an opcode!";
214 const auto src = cg_vert_to_op[v] = result.
insert({
218 v_attrs.getInt(
"constVal")
221 const auto src = cg_vert_to_op[v] = result.
insert({
225 v_attrs.getString(
"cmpMode"),
230 std::vector<BitSetting> bitSet;
231 for (
char bit : node_config) {
234 bitConfig->
add(bitSet, 0);
235 const auto src = cg_vert_to_op[v] = result.
insert({
242 const auto src = cg_vert_to_op[v] = result.
insert({
246 v_attrs.getStringOr(
"memName",
""),
250 const auto src = cg_vert_to_op[v] = result.
insert({
258 for (
const auto& v : config.
allNodes()) {
259 for (
const auto& out_edge : config.
outEdges(v)) {
260 const auto& e_attrs = config.
attributes(out_edge);
262 cg_vert_to_op.at(v), cg_vert_to_op.at(config.
target(out_edge)),
263 e_attrs.getStringOr(
"operand",
""),
264 e_attrs.getIntOr(
"bitwidth", 32),
265 e_attrs.getIntOr(
"dist", 0),
267 e_attrs.getBoolOr(
"predicate",
false));
279 OpGraph removeTheseNodesAndAnyNowUnusedFaninRecursively(
OpGraph opgraph, std::vector<OpGraph::OpDescriptor> ops_to_erase) {
281 std::set<OpGraph::OpDescriptor> will_be_or_is_erased{ops_to_erase.begin(), ops_to_erase.end()};
284 while (not ops_to_erase.empty()) {
286 const auto op = ops_to_erase.back();
287 ops_to_erase.pop_back();
289 const auto op_inputs = opgraph.
inputOps(op);
293 for (
const auto& input_op : op_inputs) {
294 if (not opgraph.
outputOps(input_op).empty()) {
continue; }
295 if (not will_be_or_is_erased.insert(input_op).second) {
continue; }
296 ops_to_erase.push_back(input_op);
310 return removeTheseNodesAndAnyNowUnusedFaninRecursively(std::move(opgraph), ops_to_erase);
315 const auto opcode = og.getNodeRef(op).getOpCode();
316 return og.inputVals(op).size() == 1 &&
317 (opcode == OpGraphOpCode::NOP
318 || opcode == OpGraphOpCode::SEXT
319 || opcode == OpGraphOpCode::ZEXT
320 || opcode == OpGraphOpCode::TRUNC);
323 for (
const auto& cast_op : orig_casts) {
325 for (
const auto& edge : og.
outEdges(cast_op)) {
335 const auto initial_op_nodes = opgraph.
opNodes();
337 auto erased_opnodes = std::set<OpGraph::OpDescriptor>{};
339 opgraph.
erase(op_desc);
340 erased_opnodes.insert(op_desc);
343 for (
const auto& op_desc : initial_op_nodes) {
344 const auto& opnode = opgraph.
getNodeRef(op_desc);
345 if (erased_opnodes.find(op_desc) != erased_opnodes.end()) {
continue; }
346 if (opnode.getOpCode() !=
OpCode::PHI) {
continue; }
349 const auto is_external_edge = [&](
const auto& in_edge) {
350 const auto& in_opcode = opgraph.
getNodeRef(opgraph.
inputOp(in_edge.val)).getOpCode();
354 const auto internal_in_edges =
filter_collection(opgraph.
inEdges(op_desc), [&](
auto& e) { return !is_external_edge(e); });
355 if (external_in_edges.empty()) {
throw std::logic_error(
"could not find external input edges to phi node '" + opnode.getName() +
"'!"); }
356 if (internal_in_edges.empty()) {
throw std::logic_error(
"could not find internal input edges to phi node '" + opnode.getName() +
"'!"); }
357 if (internal_in_edges.size() > 1) {
throw std::logic_error(
"multiple internal input edges to phi node '" + opnode.getName() +
"'... does this loop have multiple basic blocks?"); }
360 for (
const auto& int_in_edge : internal_in_edges) {
361 for (
const auto& out_edge : opgraph.
outEdges(op_desc)) {
367 opgraph = removeTheseNodesAndAnyNowUnusedFaninRecursively(std::move(opgraph), {op_desc});
374 bool made_changes =
true;
375 while (made_changes) {
376 made_changes =
false;
377 for (
const auto& op : og.
opNodes()) {
380 if (op_ref.getOpCode() == OpGraphOpCode::MUL) {
381 std::int64_t product_of_constants = 1;
382 int num_const_inputs = 0;
385 for (
const auto& in_edge : og.
inEdges(op)) {
387 if (in_op_ref.getOpCode() == OpGraphOpCode::CONST) {
389 num_const_inputs += 1;
393 if (num_const_inputs == (std::ptrdiff_t)og.
inEdges(op).size() || product_of_constants == 0) {
395 const auto new_const = og.
emplace(op_ref.getName() +
"_as_const",
396 op_ref.bitwidth, OpGraphOpCode::CONST, product_of_constants);
397 for (
const auto& out_edge : og.
outEdges(op)) {
400 og = removeTheseNodesAndAnyNowUnusedFaninRecursively(std::move(og), {op});
410 if (op_ref.getOpCode() == OpGraphOpCode::ADD) {
411 int64_t sum_of_constants = 0;
412 int num_const_inputs = 0;
416 for (
const auto& in_edge : og.
inEdges(op)) {
418 if (in_op_ref.getOpCode() == OpGraphOpCode::CONST) {
420 num_const_inputs += 1;
422 a_non_const_input = og.
inputOp(in_edge.val);
428 const auto num_op_inputs = (std::ptrdiff_t)og.
inEdges(op).size();
429 if (num_const_inputs == num_op_inputs) {
431 replacement_op = og.
emplace(op_ref.getName() +
"_as_const",
432 op_ref.bitwidth, OpGraphOpCode::CONST, sum_of_constants);
433 }
else if (num_const_inputs + 1 == num_op_inputs && sum_of_constants == 0) {
435 replacement_op = a_non_const_input;
438 if (replacement_op) {
439 for (
const auto& out_edge : og.
outEdges(op)) {
442 og = removeTheseNodesAndAnyNowUnusedFaninRecursively(std::move(og), {op});
448 if (made_changes) {
break; }
456 for (
const auto& op : og.
opNodes()) {
458 if (op_ref.getOpCode() != OpGraphOpCode::CONST) {
continue; }
462 const auto out_edges = og.
outEdges(op);
464 if (out_edges.size() <= 1) {
break; }
466 const auto& out_edge = out_edges.back();
468 auto new_name = op_ref.getName() +
"_dup" +
std::to_string(dup_number);