CGRA-ME
RIKENPE_Elastic.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * The software programs comprising "CGRA-ME" and the documentation provided
3  * with them are copyright by its authors and the University of Toronto. Only
4  * non-commercial, not-for-profit use of this software is permitted without ex-
5  * plicit permission. This software is provided "as is" with no warranties or
6  * guarantees of support. See the LICENCE for more details. You should have re-
7  * ceived a copy of the full licence along with this software. If not, see
8  * <http://cgra-me.ece.utoronto.ca/license/>.
9  ******************************************************************************/
10 
19 
20 // Macro for calculating number of ones
21 static const unsigned char ones_in_4b[] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4};
22 #define get_ones_in_8b(v) (ones_in_4b[v & 0x0f] + ones_in_4b[v >> 4])
23 
25 {
26  std::string name;
27 
28  name = "elastic_riken_in" + std::to_string(pe_conn_in) + "_out" + std::to_string(pe_conn_out);
29 
30  return name;
31 }
32 
33 #define FP 1 // janders selectable FP or INT ALU
34 
35 RIKEN_PE_Elastic::RIKEN_PE_Elastic(std::string name, int pe_conn_in, int pe_conn_out, int II, int latency, Location loc, int type, int buffer_depth, bool eb_enable, int contexts, bool pred, std::string pred_type)
36  : Module(name, loc, 32)
37  , pe_conn_in(pe_conn_in)
38  , pe_conn_out(pe_conn_out)
39  , buffer_depth(buffer_depth)
40 {
41 
42  // Figure out Word Length
43  int size = getSize();
44  isElastic = true; // RIKEN CGRA is dataflow
45  if (II != 1)
46  throw cgrame_error ("RIKEN CGRA must be single context.\n");
47 
48  int num_ALU_inputs = 2;
49  // Figure out number of inputs, and outputs using pe_conn
50  int num_outPorts = get_ones_in_8b(pe_conn_out);
51  int num_inPorts = get_ones_in_8b(pe_conn_in);
52 
53  // TODO: Calculate total number of possible inputs/outputs, based on the length of pe_conn
54  int num_Possible_Ports = 8;
55 
56  // ****************************************** INSTANTIATE SUBMODULES OF THE PE ************
57 
58  // Add the "guts" of the PE, e.g., the elastic buffers, constant reg, ALU, etc.
59  addSubModule(new ElasticBufferFifo("buffA", loc, buffer_depth, 32, eb_enable), 0.0, 0.4, 0.5, 0.1);
60  addSubModule(new ElasticBufferFifo("buffB", loc, buffer_depth, 32, eb_enable), 0.0, 0.4, 0.5, 0.1);
61  addSubModule(new ConstUnit("Imm", loc, 32, 1, isElastic), 0.0, 0.15, 1, 0.125);
62  addSubModule(new ElasticEagerFork("Imm_fork", loc, 32, num_ALU_inputs, num_ALU_inputs, isElastic), 0.25, 0, 0.5, 0.1);
63  if (pred) {
64  addSubModule(new ElasticEagerFork("cond_fork", loc, 32, 2, 2, isElastic), 0.25, 0, 0.5, 0.1);
65  }
66  addSubModule(new Multiplexer("muxA", loc, 2, size, isElastic), 0.0, 0.25, 0.25, 0.125);
67  addSubModule(new Multiplexer("muxB", loc, 2, size, isElastic), 0.5, 0.25, 0.25, 0.125);
68 
69  addSubModule(new ElasticBufferFifo("buffOut", loc, buffer_depth, 32, eb_enable), 0.0, 0.4, 0.5, 0.1);
70 
71  if (!FP) { // integer ALU case
72  addSubModule(new ElasticFuncUnit("FuncUnit", loc,
73  {
78  OpCode::OR,
83  }, size, 1, latency, pred, isElastic), 0, 0.75, 0.5, 0.25);
84  }
85  else { // floating point ALU case
86  addSubModule(new ElasticFPUnit("FuncUnit", loc, {
90  OpCode::SQRT,}, size, II, latency, pred, isElastic), 0.5, 0.4, 0.5, 0.1);
91  }
92 
93  // Crossbar (RIKEN paper calls this SB == switch block)
94  int numCrossbarInputs = num_inPorts + 1; // Add 1 for ALU output (from FIFO)
95  int numCrossbarOutputs = num_outPorts + num_ALU_inputs; // Add 2 for ALU in_a and in_b
96  addSubModule(new ElasticCrossbar("crossbar", loc, numCrossbarInputs, numCrossbarOutputs, size, type, pred, pred_type), 0.5, 0.375, 0.4, 0.4);
97 
98  // Input elastic registers in front of the crossbar inputs
99  for (int i = 0; i < num_Possible_Ports; i++)
100  {
101  if (pe_conn_in & (0b00000001<<i))
102  {
103  addSubModule(new Register("reg" + std::to_string(i), loc, size, isElastic), static_cast<double>(i)/num_Possible_Ports, 0.5, 1.0/num_Possible_Ports, 0.125);
104  }
105  }
106 
107  // ****************************************** CONFIGURATION BITS ************
108 
109  // Now, add the necessary configuration bits for the above elements
110  if (eb_enable){
111  addConfig("EBEnable", {"buffOut.enable"}, contexts, isElastic);
112  addConfig("buffAEnable", {"buffA.enable"}, contexts, isElastic);
113  addConfig("buffBEnable", {"buffB.enable"}, contexts, isElastic);
114  }
115 
116  // Config bits attached to FF enables (FFs on the inputs to the crossbar)
117  for(int i = 0; i < num_Possible_Ports; i++)
118  {
119  if (pe_conn_in & (0b00000001<<i))
120  {
121  // Input registers
122  if (eb_enable){
123  addConfig("Reg" + std::to_string(i) + "config", {"reg" + std::to_string(i) + ".enable"}, contexts, isElastic);
124  }
125  }
126  }
127 
128  // Config bits for the ALU muxes, const fork, immediate value
129  addConfig("ALUMuxAconfig", {"muxA.select"}, contexts, isElastic);
130  addConfig("ALUMuxBconfig", {"muxB.select"}, contexts, isElastic);
131  addConfig("ConstForkConfig", {"Imm_fork.enable_downstream"}, contexts, isElastic);
132  if (pred) {
133  addConfig("CondForkConfig", {"cond_fork.enable_downstream"}, contexts, isElastic);
134  }
135 
136 
137  // ****************************************** CONNECT THE PIECES TOGETHER ************
138 
139  // Add inputs ports to the PE tile
140  for(int i = 0; i < num_Possible_Ports; i++)
141  {
142  if (pe_conn_in & (0b00000001<<i)) {
143  addPort("in" + std::to_string(i), PORT_INPUT, size, isElastic);
144  }
145  }
146 
147  // Add output ports to the PE tile
148  for (int i = 0; i < num_Possible_Ports; i++)
149  {
150  if (pe_conn_out & (0b00000001<<i)) {
151  addPort("out" + std::to_string(i), PORT_OUTPUT, size, isElastic);
152  }
153  }
154 
155  // Connect the inputs of the PE to the registers on the crossbar inputs
156  for(int i = 0; i < num_Possible_Ports; i++)
157  {
158  if (pe_conn_in & (0b00000001<<i))
159  { // InPorts to Registers
160  connectPorts("this.in" + std::to_string(i), "reg" + std::to_string(i) + ".in", isElastic);
161  }
162  }
163 
164  // Connect the registers on the crossbar inputs to the crossbar itself
165  int crossbarInputCounter = 0; // Needed because the crossbar and mux indices may not be the same
166  for(int i = 0; i < num_Possible_Ports; i++)
167  {
168  if (pe_conn_in & (0b00000001<<i))
169  {
170  connectPorts("reg" + std::to_string(i) + ".out", "crossbar.in" + std::to_string(crossbarInputCounter), isElastic);
171  crossbarInputCounter += 1;
172  }
173  }
174 
175  // Connect the crossbar outputs to the PE tile outputs
176  int crossbarOutputCounter = 0; // Needed because the crossbar and mux indices may not be the same
177  for(int i = 0; i < num_Possible_Ports; i++)
178  {
179  if (pe_conn_out & (0b00000001<<i))
180  {
181  connectPorts("crossbar.out" + std::to_string(crossbarOutputCounter), "this.out" + std::to_string(i), isElastic);
182  crossbarOutputCounter += 1;
183  }
184  }
185 
186  // Crossbar to buffA and buffB
187  connectPorts("crossbar.out" + std::to_string(num_outPorts), "buffA.data_in", isElastic);
188  connectPorts("crossbar.out" + std::to_string(num_outPorts + 1), "buffB.data_in", isElastic);
189 
190  // buffA and buffB to muxA and muxB
191  connectPorts("buffA.data_out", "muxA.in0", isElastic);
192  connectPorts("buffB.data_out", "muxB.in0", isElastic);
193 
194  // Imm to Imm_fork
195  connectPorts("Imm.out", "Imm_fork.in", isElastic);
196 
197  // Imm_fork to muxA and muxB
198  connectPorts("Imm_fork.out0", "muxA.in1", isElastic);
199  connectPorts("Imm_fork.out1", "muxB.in1", isElastic);
200 
201  // muxA and muxB to Func Unit
202  connectPorts("muxA.out", "FuncUnit.in_a", isElastic);
203  connectPorts("muxB.out", "FuncUnit.in_b", isElastic);
204 
205 
206  // and here is the messy part -- connect FuncUnit to ALU or FPALU wrapper
207  // Omar no longer messy :)
208  connectPorts("FuncUnit.out", "buffOut.data_in", isElastic);
209 
210  if (pred) {
211  // connect buffOut to the crossbar (SB)
212  addSubModule(new TruncateInput("trunc_res", loc, 1, 32, isElastic));
213 
214  connectPorts("buffOut.data_out", "cond_fork.in", isElastic);
215  connectPorts("cond_fork.out0", "crossbar.in" + std::to_string(num_inPorts), isElastic);
216  connectPorts("cond_fork.out1", "trunc_res.in", isElastic);
217  connectPorts("trunc_res.out0", "crossbar.pred_in", isElastic);
218  } else {
219  connectPorts("buffOut.data_out", "crossbar.in" + std::to_string(num_inPorts), isElastic);
220  }
221 }
ElasticBufferFifo
Definition: ModuleElastic.h:10
ElasticEagerFork
Definition: ModuleElastic.h:34
ElasticFuncUnit
Definition: ModuleElastic.h:260
Module::name
std::string name
Definition: Module.h:341
TruncateInput
Zero-cycle latency split input.
Definition: Module.h:621
Multiplexer
Zero-cycle latency multiplexer.
Definition: Module.h:592
RIKEN_PE_Elastic::buffer_depth
int buffer_depth
Definition: UserModules.h:35
Location
Definition: Module.h:156
ElasticFPUnit
Definition: ModuleElastic.h:267
OpCode::ASHR
@ ASHR
OpCode::OR
@ OR
RIKEN_PE_Elastic::pe_conn_out
int pe_conn_out
Definition: UserModules.h:34
Register
A simple latency element with an enable signal; a data flip-flop.
Definition: Module.h:566
OpCode::ADD
@ ADD
Module::addPort
void addPort(std::string portname, port_type pt, unsigned size)
Definition: Module.cpp:1354
get_ones_in_8b
#define get_ones_in_8b(v)
Definition: RIKENPE_Elastic.cpp:22
to_string
const std::string & to_string(const OpGraphOpCode &opcode)
Definition: OpGraph.cpp:111
ElasticCrossbar
Definition: ModuleElastic.h:275
OpCode::LSHR
@ LSHR
OpCode::XOR
@ XOR
UserModules.h
RIKEN_PE_Elastic::GenericName
virtual std::string GenericName()
Definition: RIKENPE_Elastic.cpp:24
ConstUnit
Definition: Module.h:540
Module
Definition: Module.h:163
PORT_INPUT
@ PORT_INPUT
Definition: Module.h:63
OpCode::FMUL
@ FMUL
FP
#define FP
Definition: RIKENPE_Elastic.cpp:33
OpCode::SHL
@ SHL
OpCode::FDIV
@ FDIV
Module::addConfig
void addConfig(ConfigCell *c, std::vector< std::string > ConnectTo)
Definition: Module.cpp:1087
OpCode::SQRT
@ SQRT
Module::getSize
int getSize() const
Definition: Module.h:246
Module::addSubModule
void addSubModule(Module *m)
Definition: Module.cpp:1124
OpCode::AND
@ AND
Module::isElastic
bool isElastic
Definition: Module.h:236
OpCode::SUB
@ SUB
OpCode::MUL
@ MUL
Module::loc
Location loc
Definition: Module.h:239
RIKEN_PE_Elastic::pe_conn_in
int pe_conn_in
Definition: UserModules.h:33
ones_in_4b
static const unsigned char ones_in_4b[]
Definition: RIKENPE_Elastic.cpp:21
Module::connectPorts
void connectPorts(std::string src, std::string dst, bool isElastic)
Definition: Module.cpp:1232
OpCode::FADD
@ FADD
RIKEN_PE_Elastic::RIKEN_PE_Elastic
RIKEN_PE_Elastic(std::string name, int pe_conn_in, int pe_conn_out, int II, int latency, Location Loc, int type, int buffer_depth=2, bool eb_enable=true, int contexts=1, bool pred=false, std::string pred_type="full")
Definition: RIKENPE_Elastic.cpp:35
cgrame_error
Definition: Exception.h:20
PORT_OUTPUT
@ PORT_OUTPUT
Definition: Module.h:64