Skip to content

Commit 59327a4

Browse files
committed
[rtl] debug for laneScale.
1 parent 9b06847 commit 59327a4

19 files changed

+394
-238
lines changed

t1/src/Bundles.scala

+8-8
Original file line numberDiff line numberDiff line change
@@ -625,8 +625,8 @@ class SlotRequestToVFU(parameter: LaneParameter) extends Bundle {
625625
val vSew: UInt = UInt(2.W)
626626
val shifterSize: UInt = UInt((log2Ceil(parameter.eLen) * (parameter.datapathWidth / 8)).W)
627627
val rem: Bool = Bool()
628-
val executeIndex: UInt = UInt(2.W)
629-
val popInit: UInt = UInt(parameter.vlMaxBits.W)
628+
val executeIndex: UInt = UInt(log2Ceil(parameter.datapathWidth / 8).W)
629+
val popInit: UInt = UInt((parameter.datapathWidth / parameter.eLen * parameter.vlMaxBits).W)
630630
val groupIndex: UInt = UInt(parameter.groupNumberBits.W)
631631
val laneIndex: UInt = UInt(parameter.laneNumberBits.W)
632632
val complete: Bool = Bool()
@@ -645,9 +645,9 @@ class VFUResponseToSlot(parameter: LaneParameter) extends Bundle {
645645
val data: UInt = UInt(parameter.datapathWidth.W)
646646
val executeIndex: UInt = UInt(2.W)
647647
val clipFail: Bool = Bool()
648-
val ffoSuccess: Bool = Bool()
648+
val ffoSuccess: UInt = UInt((parameter.datapathWidth / parameter.eLen).W)
649649
val divBusy: Bool = Bool()
650-
val adderMaskResp: UInt = UInt(4.W)
650+
val adderMaskResp: UInt = UInt((parameter.datapathWidth / 8).W)
651651
val vxsat: UInt = UInt(4.W)
652652
// float flag
653653
val exceptionFlags: UInt = UInt(5.W)
@@ -710,7 +710,7 @@ class MaskUnitReadState(parameter: T1Parameter) extends Bundle {
710710
val accessLane: Vec[UInt] = Vec(parameter.laneNumber, UInt(log2Ceil(parameter.laneNumber).W))
711711
// 3: log2Ceil(8); 8: Use up to 8 registers
712712
val vsGrowth: Vec[UInt] = Vec(parameter.laneNumber, UInt(3.W))
713-
val executeGroup: UInt = UInt((parameter.laneParam.groupNumberBits + 2).W)
713+
val executeGroup: UInt = UInt((parameter.laneParam.groupNumberBits + log2Ceil(parameter.datapathWidth / 8)).W)
714714
val readDataOffset: UInt = UInt((log2Ceil(parameter.datapathWidth / 8) * parameter.laneNumber).W)
715715
val last: Bool = Bool()
716716
}
@@ -740,9 +740,9 @@ class MaskUnitExeReq(parameter: LaneParameter) extends Bundle {
740740
// source2, read offset
741741
val source2: UInt = UInt(parameter.datapathWidth.W)
742742
val index: UInt = UInt(parameter.instructionIndexBits.W)
743-
val ffo: Bool = Bool()
743+
val ffo: UInt = UInt((parameter.datapathWidth / parameter.eLen).W)
744744
// Is there a valid element?
745-
val fpReduceValid: Option[Bool] = Option.when(parameter.fpuEnable)(Bool())
745+
val fpReduceValid: Option[UInt] = Option.when(parameter.fpuEnable)(UInt((parameter.datapathWidth / parameter.eLen).W))
746746
}
747747

748748
class MaskUnitExeResponse(parameter: LaneParameter) extends Bundle {
@@ -773,7 +773,7 @@ class MaskUnitReadQueue(parameter: T1Parameter) extends Bundle {
773773
}
774774

775775
class MaskUnitWaitReadQueue(parameter: T1Parameter) extends Bundle {
776-
val executeGroup: UInt = UInt((parameter.laneParam.groupNumberBits + 2).W)
776+
val executeGroup: UInt = UInt((parameter.laneParam.groupNumberBits + log2Ceil(parameter.datapathWidth / 8)).W)
777777
val sourceValid: UInt = UInt(parameter.laneNumber.W)
778778
val replaceVs1: UInt = UInt(parameter.laneNumber.W)
779779
val needRead: UInt = UInt(parameter.laneNumber.W)

t1/src/Lane.scala

+16-10
Original file line numberDiff line numberDiff line change
@@ -1002,7 +1002,9 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
10021002
entranceControl.executeIndex := 0.U
10031003
entranceControl.instructionFinished :=
10041004
// vl is too small, don't need to use this lane.
1005-
(((laneIndex ## 0.U(2.W)) >> csrInterface.vSew).asUInt >= csrInterface.vl || maskLogicCompleted) &&
1005+
(((laneIndex ## 0.U(
1006+
parameter.dataPathByteBits.W
1007+
)) >> csrInterface.vSew).asUInt >= csrInterface.vl || maskLogicCompleted) &&
10061008
// for 'nr' type instructions, they will need another complete signal.
10071009
!(laneRequest.bits.decodeResult(Decoder.nr) || laneRequest.bits.lsWholeReg)
10081010
// indicate if this is the mask type.
@@ -1016,25 +1018,27 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
10161018
val lastElementIndex: UInt = (csrInterface.vl - csrInterface.vl.orR)(parameter.vlMaxBits - 2, 0)
10171019
val requestVSew1H: UInt = UIntToOH(csrInterface.vSew)
10181020

1021+
val dataPathScaleBit: Int = log2Ceil(parameter.datapathWidth / parameter.eLen)
1022+
10191023
/** For an instruction, the last group is not executed by all lanes, here is the last group of the instruction xxxxx
10201024
* xxx xx -> vsew = 0 xxxxxx xxx x -> vsew = 1 xxxxxxx xxx -> vsew = 2
10211025
*/
10221026
val lastGroupForInstruction: UInt = Mux1H(
10231027
requestVSew1H(2, 0),
10241028
Seq(
1025-
lastElementIndex(parameter.vlMaxBits - 2, parameter.laneNumberBits + 2),
1026-
lastElementIndex(parameter.vlMaxBits - 2, parameter.laneNumberBits + 1),
1027-
lastElementIndex(parameter.vlMaxBits - 2, parameter.laneNumberBits)
1029+
lastElementIndex(parameter.vlMaxBits - 2, parameter.laneNumberBits + 2 + dataPathScaleBit),
1030+
lastElementIndex(parameter.vlMaxBits - 2, parameter.laneNumberBits + 1 + dataPathScaleBit),
1031+
lastElementIndex(parameter.vlMaxBits - 2, parameter.laneNumberBits + dataPathScaleBit)
10281032
)
10291033
)
10301034

10311035
/** Which lane the last element is in. */
10321036
val lastLaneIndex: UInt = Mux1H(
10331037
requestVSew1H(2, 0),
10341038
Seq(
1035-
lastElementIndex(parameter.laneNumberBits + 2 - 1, 2),
1036-
lastElementIndex(parameter.laneNumberBits + 1 - 1, 1),
1037-
lastElementIndex(parameter.laneNumberBits - 1, 0)
1039+
lastElementIndex(parameter.laneNumberBits + 2 - 1 + dataPathScaleBit, 2 + dataPathScaleBit),
1040+
lastElementIndex(parameter.laneNumberBits + 1 - 1 + dataPathScaleBit, 1 + dataPathScaleBit),
1041+
lastElementIndex(parameter.laneNumberBits - 1 + dataPathScaleBit, dataPathScaleBit)
10381042
)
10391043
)
10401044

@@ -1057,11 +1061,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
10571061
val isLastLaneForMaskLogic: Bool = lastLaneIndexForMaskLogic === laneIndex
10581062
val lastGroupCountForMaskLogic: UInt = (maskeDataGroup >> parameter.laneNumberBits).asUInt -
10591063
((vlBody.orR || dataPathMisaligned) && (laneIndex > lastLaneIndexForMaskLogic))
1064+
val vlTailWidth: Int = log2Ceil(parameter.datapathWidth / 8)
10601065
val misalignedForOther: Bool = Mux1H(
1061-
requestVSew1H(1, 0),
1066+
requestVSew1H(2, 0),
10621067
Seq(
1063-
csrInterface.vl(1, 0).orR,
1064-
csrInterface.vl(0)
1068+
csrInterface.vl(vlTailWidth - 1, 0).orR,
1069+
csrInterface.vl(vlTailWidth - 2, 0).orR,
1070+
if (vlTailWidth - 3 >= 0) csrInterface.vl(vlTailWidth - 3, 0).orR else false.B
10651071
)
10661072
)
10671073

t1/src/LaneFloat.scala

+7-7
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ case class LaneFloatParam(eLen: Int, latency: Int, laneScale: Int)
2424
val datapathWidth: Int = eLen * laneScale
2525
val decodeField: BoolField = Decoder.float
2626
val inputBundle = new LaneFloatRequest(datapathWidth)
27-
val outputBundle = new LaneFloatResponse(datapathWidth, laneScale)
27+
val outputBundle = new LaneFloatResponse(datapathWidth)
2828
override val NeedSplit: Boolean = false
2929
}
3030

@@ -53,9 +53,9 @@ class LaneFloatRequest(datapathWidth: Int) extends VFUPipeBundle {
5353
val executeIndex: UInt = UInt(2.W)
5454
}
5555

56-
class LaneFloatResponse(datapathWidth: Int, laneScale: Int) extends VFUPipeBundle {
56+
class LaneFloatResponse(datapathWidth: Int) extends VFUPipeBundle {
5757
val data = UInt(datapathWidth.W)
58-
val adderMaskResp = UInt(laneScale.W)
58+
val adderMaskResp = UInt((datapathWidth / 8).W)
5959
val exceptionFlags = UInt(5.W)
6060
val executeIndex: UInt = UInt(2.W)
6161
}
@@ -69,7 +69,7 @@ class LaneFloat(val parameter: LaneFloatParam) extends VFUModule with Serializab
6969
val omInstance: Instance[LaneFloatOM] = Instantiate(new LaneFloatOM(parameter))
7070
omInstance.retimeIn.foreach(_ := Property(Path(clock)))
7171

72-
val response: LaneFloatResponse = Wire(new LaneFloatResponse(parameter.datapathWidth, parameter.laneScale))
72+
val response: LaneFloatResponse = Wire(new LaneFloatResponse(parameter.datapathWidth))
7373
val request: LaneFloatRequest = connectIO(response, true.B).asTypeOf(parameter.inputBundle)
7474

7575
val responseVec: Seq[(UInt, UInt)] = Seq.tabulate(parameter.laneScale) { index =>
@@ -114,7 +114,7 @@ class LaneFloat(val parameter: LaneFloatParam) extends VFUModule with Serializab
114114

115115
val mulAddRecFN = Module(new MulAddRecFN(8, 24))
116116
val fmaIn0 = Mux(sub, recIn1, recIn0)
117-
val fmaIn1 = Mux(addsub, (BigInt(1) << (parameter.datapathWidth - 1)).U, Mux(rmaf, recIn2, recIn1))
117+
val fmaIn1 = Mux(addsub, (BigInt(1) << (parameter.eLen - 1)).U, Mux(rmaf, recIn2, recIn1))
118118
val fmaIn2 = Mux(
119119
sub,
120120
recIn0,
@@ -123,7 +123,7 @@ class LaneFloat(val parameter: LaneFloatParam) extends VFUModule with Serializab
123123
recIn2,
124124
Mux(
125125
maf && subRequest.floatMul,
126-
((subRequest.src(0) ^ subRequest.src(1)) & (BigInt(1) << (parameter.datapathWidth - 1)).U) << 1,
126+
((subRequest.src(0) ^ subRequest.src(1)) & (BigInt(1) << (parameter.eLen - 1)).U) << 1,
127127
recIn1
128128
)
129129
)
@@ -317,7 +317,7 @@ class LaneFloat(val parameter: LaneFloatParam) extends VFUModule with Serializab
317317
(result, flags)
318318
}
319319

320-
response.adderMaskResp := VecInit(responseVec.map(_._1(0))).asUInt
320+
response.adderMaskResp := VecInit(responseVec.map(_._1(parameter.eLen / 8 - 1, 0))).asUInt
321321
response.data := VecInit(responseVec.map(_._1)).asUInt
322322
response.exceptionFlags := VecInit(responseVec.map(_._2)).reduce(_ | _)
323323
response.executeIndex := request.executeIndex

t1/src/OtherUnit.scala

+11-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,17 @@ class OtherUnit(val parameter: OtherUnitParam) extends VFUModule with Serializab
111111
val differentSign: Bool = request.sign && roundSignBits && !request.src(1)(parameter.datapathWidth - 1)
112112
val clipResult = Mux(roundResultOverlap || differentSign, largestClipResult, roundResult)
113113

114-
val indexRes: UInt = ((request.groupIndex ## request.laneIndex ## request.executeIndex) >> request.vSew).asUInt
114+
val indexRes: UInt = Mux1H(
115+
vSewOH(2, 0),
116+
Seq(
117+
request.groupIndex ## request.laneIndex ## request.executeIndex,
118+
request.groupIndex ## request.laneIndex ## request.executeIndex(log2Ceil(parameter.dataPathByteWidth) - 2, 0),
119+
if (log2Ceil(parameter.dataPathByteWidth) > 2)
120+
request.groupIndex ## request.laneIndex ## request.executeIndex(log2Ceil(parameter.dataPathByteWidth) - 3, 0)
121+
else
122+
request.groupIndex ## request.laneIndex
123+
)
124+
)
115125

116126
val extendSign: Bool =
117127
request.sign && Mux1H(vSewOH, Seq(request.src.head(7), request.src.head(15), request.src.head(31)))

t1/src/T1.scala

+6-5
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,8 @@ case class T1Parameter(
232232
/** vLen in Byte. */
233233
val vlenb: Int = vLen / 8
234234

235-
/** The hardware width of [[datapathWidth]]. */
236-
val dataPathWidthBits: Int = log2Ceil(datapathWidth)
235+
/** The hardware width of [[datapathWidth]] / 8. */
236+
val dataPathByteBits: Int = log2Ceil(datapathWidth / 8)
237237

238238
/** 1 in MSB for instruction order. */
239239
val instructionIndexBits: Int = log2Ceil(chainingSize) + 1
@@ -336,6 +336,7 @@ case class T1Parameter(
336336
datapathWidth = datapathWidth,
337337
chainingSize = chainingSize,
338338
vLen = vLen,
339+
eLen = eLen,
339340
laneNumber = laneNumber,
340341
paWidth = xLen,
341342
// TODO: configurable for each LSU
@@ -720,14 +721,14 @@ class T1(val parameter: T1Parameter)
720721
val evlForLane: UInt = Mux(
721722
decodeResult(Decoder.nr),
722723
// evl for Whole Vector Register Move -> vs1 * (vlen / datapathWidth)
723-
(requestRegDequeue.bits.instruction(17, 15) +& 1.U) ## 0.U(log2Ceil(parameter.vLen / parameter.datapathWidth).W),
724+
(requestRegDequeue.bits.instruction(17, 15) +& 1.U) ## 0.U(log2Ceil(parameter.vLen / parameter.eLen).W),
724725
requestReg.bits.issue.vl
725726
)
726727

727-
val vSewForLsu: UInt = Mux(lsWholeReg, 2.U, requestRegDequeue.bits.instruction(13, 12))
728+
val vSewForLsu: UInt = Mux(lsWholeReg, log2Ceil(parameter.eLen / 8).U, requestRegDequeue.bits.instruction(13, 12))
728729
val evlForLsu: UInt = Mux(
729730
lsWholeReg,
730-
(requestRegDequeue.bits.instruction(31, 29) +& 1.U) ## 0.U(log2Ceil(parameter.vLen / parameter.datapathWidth).W),
731+
(requestRegDequeue.bits.instruction(31, 29) +& 1.U) ## 0.U(log2Ceil(parameter.vLen / parameter.eLen).W),
731732
requestReg.bits.issue.vl
732733
)
733734

t1/src/VectorFunctionUnit.scala

+72-9
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,14 @@ object VFUInstantiateParameter {
162162
(
163163
SerializableModuleGenerator(
164164
classOf[OtherUnit],
165-
OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
165+
OtherUnitParam(
166+
32,
167+
log2Ceil(vLen) + 1,
168+
log2Ceil(vLen * 8 / dLen),
169+
log2Ceil(dLen / 32 / laneScale),
170+
4 * laneScale,
171+
1
172+
)
166173
),
167174
Seq.tabulate(chainingSize) { i => i }
168175
)
@@ -208,7 +215,14 @@ object VFUInstantiateParameter {
208215
(
209216
SerializableModuleGenerator(
210217
classOf[OtherUnit],
211-
OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
218+
OtherUnitParam(
219+
32,
220+
log2Ceil(vLen) + 1,
221+
log2Ceil(vLen * 8 / dLen),
222+
log2Ceil(dLen / 32 / laneScale),
223+
4 * laneScale,
224+
1
225+
)
212226
),
213227
Seq.tabulate(chainingSize) { i => i }
214228
)
@@ -248,7 +262,14 @@ object VFUInstantiateParameter {
248262
(
249263
SerializableModuleGenerator(
250264
classOf[OtherUnit],
251-
OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
265+
OtherUnitParam(
266+
32,
267+
log2Ceil(vLen) + 1,
268+
log2Ceil(vLen * 8 / dLen),
269+
log2Ceil(dLen / 32 / laneScale),
270+
4 * laneScale,
271+
1
272+
)
252273
),
253274
Seq(i)
254275
)
@@ -288,7 +309,14 @@ object VFUInstantiateParameter {
288309
(
289310
SerializableModuleGenerator(
290311
classOf[OtherUnit],
291-
OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
312+
OtherUnitParam(
313+
32,
314+
log2Ceil(vLen) + 1,
315+
log2Ceil(vLen * 8 / dLen),
316+
log2Ceil(dLen / 32 / laneScale),
317+
4 * laneScale,
318+
1
319+
)
292320
),
293321
Seq(i)
294322
)
@@ -325,7 +353,14 @@ object VFUInstantiateParameter {
325353
(
326354
SerializableModuleGenerator(
327355
classOf[OtherUnit],
328-
OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
356+
OtherUnitParam(
357+
32,
358+
log2Ceil(vLen) + 1,
359+
log2Ceil(vLen * 8 / dLen),
360+
log2Ceil(dLen / 32 / laneScale),
361+
4 * laneScale,
362+
1
363+
)
329364
),
330365
Seq(i)
331366
)
@@ -370,7 +405,14 @@ object VFUInstantiateParameter {
370405
(
371406
SerializableModuleGenerator(
372407
classOf[OtherUnit],
373-
OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
408+
OtherUnitParam(
409+
32,
410+
log2Ceil(vLen) + 1,
411+
log2Ceil(vLen * 8 / dLen),
412+
log2Ceil(dLen / 32 / laneScale),
413+
4 * laneScale,
414+
1
415+
)
374416
),
375417
Seq.tabulate(chainingSize) { i => i }
376418
)
@@ -410,7 +452,14 @@ object VFUInstantiateParameter {
410452
(
411453
SerializableModuleGenerator(
412454
classOf[OtherUnit],
413-
OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
455+
OtherUnitParam(
456+
32,
457+
log2Ceil(vLen) + 1,
458+
log2Ceil(vLen * 8 / dLen),
459+
log2Ceil(dLen / 32 / laneScale),
460+
4 * laneScale,
461+
1
462+
)
414463
),
415464
Seq.tabulate(chainingSize) { i => i }
416465
)
@@ -451,7 +500,14 @@ object VFUInstantiateParameter {
451500
(
452501
SerializableModuleGenerator(
453502
classOf[OtherUnit],
454-
OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
503+
OtherUnitParam(
504+
32,
505+
log2Ceil(vLen) + 1,
506+
log2Ceil(vLen * 8 / dLen),
507+
log2Ceil(dLen / 32 / laneScale),
508+
4 * laneScale,
509+
1
510+
)
455511
),
456512
Seq.tabulate(chainingSize) { i => i }
457513
)
@@ -492,7 +548,14 @@ object VFUInstantiateParameter {
492548
(
493549
SerializableModuleGenerator(
494550
classOf[OtherUnit],
495-
OtherUnitParam(32, log2Ceil(vLen) + 1, log2Ceil(vLen * 8 / dLen), log2Ceil(dLen / 32), 4, 1)
551+
OtherUnitParam(
552+
32,
553+
log2Ceil(vLen) + 1,
554+
log2Ceil(vLen * 8 / dLen),
555+
log2Ceil(dLen / 32 / laneScale),
556+
4 * laneScale,
557+
1
558+
)
496559
),
497560
Seq.tabulate(chainingSize) { i => i }
498561
)

0 commit comments

Comments
 (0)