Skip to content

Commit 5fa9b5d

Browse files
Amanda ShiAmanda Shi
authored andcommitted
change the gpu requant write addr
1 parent b15decb commit 5fa9b5d

2 files changed

Lines changed: 21 additions & 13 deletions

File tree

src/main/scala/gemmini/MxRequantizer.scala

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ class MxRequantizer[T <: Data](
175175
dontTouch(quantized_buffer)
176176
dontTouch( should_compute)
177177
should_compute := false.B
178-
178+
val quant_half_counter = RegInit(false.B)
179+
val first_half_buf = RegInit(0.U(128.W))
179180

180181
val total_bits_per_element = WireDefault(0.U(5.W))
181182
total_bits_per_element := 1.U +& exp_bits +& mant_bits
@@ -213,7 +214,13 @@ class MxRequantizer[T <: Data](
213214
data_buffer_counter := ~data_buffer_counter
214215
}
215216

216-
217+
val gpu_addr = RegInit(0.U((32).W))
218+
when(io.requant_data_in_gpu.fire && data_buffer_counter === 0.U && total_bits_per_element === 8.U){
219+
gpu_addr := (io.requant_data_in_gpu.bits.address >> 1)
220+
}.elsewhen(io.requant_data_in_gpu.fire && data_buffer_counter === 0.U && total_bits_per_element =/= 8.U && !quant_half_counter){
221+
gpu_addr := (io.requant_data_in_gpu.bits.address >> 2)
222+
}
223+
217224
when(io.mxacc_req.mx_data_in.fire) {
218225
pipe_in.valid := true.B
219226
pipe_in.bits.mx_mode := io.mxacc_req.mx_mode
@@ -232,9 +239,11 @@ class MxRequantizer[T <: Data](
232239
pipe_in.bits.out.full_mx_data_out := VecInit(combined.reverse).asTypeOf(half_acc_row_t)
233240
pipe_in.bits.out.fromDMA := false.B
234241
pipe_in.bits.is_gpu := true.B
235-
pipe_in.bits.gpu_addr := io.requant_data_in_gpu.bits.address
242+
pipe_in.bits.gpu_addr := gpu_addr
236243
}
237244

245+
246+
238247
// for (i <- 1 until pipelineLatency) {
239248
// pipelined_out(i) := pipelined_out(i-1)
240249
// }
@@ -276,8 +285,7 @@ class MxRequantizer[T <: Data](
276285
// }
277286

278287
// Two-cycle accumulation registers for FP4 / FP6:
279-
val quant_half_counter = RegInit(false.B)
280-
val first_half_buf = RegInit(0.U(128.W))
288+
281289

282290

283291
val fp6_lut_out = Cat(quantLut.io.projected_data.bits.reverse)
@@ -369,7 +377,7 @@ class MxRequantizer[T <: Data](
369377

370378

371379
final_pipe_out.ready := Mux(final_pipe_out.bits.is_gpu,
372-
io.requant_data_out.ready,
380+
io.requant_data_out.ready || final_pipe_out.bits.out.is_garbage,
373381
io.mxacc_req.mx_data_out.ready)
374382
oldest_pipe_out.ready := final_pipe_out.ready
375383

@@ -378,33 +386,33 @@ class MxRequantizer[T <: Data](
378386
val helding_flag = RegInit(0.U)
379387
when (io.requant_data_out.fire){
380388
helding_flag := 0.U
381-
}.elsewhen(final_pipe_out.bits.is_gpu && final_pipe_out.valid && !io.requant_data_out.ready){
389+
}.elsewhen(final_pipe_out.bits.is_gpu && final_pipe_out.valid && !io.requant_data_out.ready && !final_pipe_out.bits.out.is_garbage){
382390
helding_flag := 1.U
383391
}
384392

385-
when(final_pipe_out.bits.is_gpu && final_pipe_out.valid){
393+
when(final_pipe_out.bits.is_gpu && final_pipe_out.valid && !final_pipe_out.bits.out.is_garbage){
386394
when(helding_flag === 1.U && io.requant_data_out.ready){
387395
io.requant_data_out.bits.data := gpu_out_held
388396
io.requant_data_out.valid := true.B
389397
io.requant_data_out.bits.dataType := RequantizerDataType(format_reg)
390-
io.requant_data_out.bits.address := Mux(format_reg===0.U, final_pipe_out.bits.gpu_addr, final_pipe_out.bits.gpu_addr >>1)
398+
io.requant_data_out.bits.address := final_pipe_out.bits.gpu_addr
391399
}.elsewhen(helding_flag === 0.U && io.requant_data_out.ready){
392400
io.requant_data_out.bits.data := final_pipe_out.bits.out.quant_mx_data_out.asUInt
393401
io.requant_data_out.valid := true.B
394402
io.requant_data_out.bits.dataType := RequantizerDataType(format_reg)
395-
io.requant_data_out.bits.address := Mux(format_reg===0.U, final_pipe_out.bits.gpu_addr, final_pipe_out.bits.gpu_addr >> 1)
403+
io.requant_data_out.bits.address := final_pipe_out.bits.gpu_addr
396404
}.otherwise{
397405
gpu_out_held := final_pipe_out.bits.out.quant_mx_data_out.asUInt
398406
io.requant_data_out.bits.data := 0.U
399407
io.requant_data_out.valid := false.B
400408
io.requant_data_out.bits.dataType := RequantizerDataType(format_reg)
401-
io.requant_data_out.bits.address := Mux(format_reg===0.U, final_pipe_out.bits.gpu_addr, final_pipe_out.bits.gpu_addr >> 1)
409+
io.requant_data_out.bits.address := final_pipe_out.bits.gpu_addr
402410
}
403411
}.otherwise{
404412
io.requant_data_out.bits.data := 0.U
405413
io.requant_data_out.valid := false.B
406414
io.requant_data_out.bits.dataType := RequantizerDataType(format_reg)
407-
io.requant_data_out.bits.address := Mux(format_reg===0.U, final_pipe_out.bits.gpu_addr, final_pipe_out.bits.gpu_addr >> 1)
415+
io.requant_data_out.bits.address := final_pipe_out.bits.gpu_addr
408416
}
409417

410418
should_compute := false.B

0 commit comments

Comments
 (0)