Skip to content

Commit d1eecd4

Browse files
committed
add shifting unit
1 parent 03fceae commit d1eecd4

2 files changed

Lines changed: 105 additions & 6 deletions

File tree

rtl/warp_defines.v

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,9 @@
9898
`define CANONICAL_NOP 32'h00000013
9999
`define BUNDLE_SIZE 68
100100

101-
`endif /* WARP_DEFINES */
101+
`define XSHIFT_OP_SHL 2'b00
102+
`define XSHIFT_OP_SHR 2'b01
103+
`define XSHIFT_OP_ROL 2'b10
104+
`define XSHIFT_OP_ROR 2'b11
105+
106+
`endif

rtl/warp_integer.v

Lines changed: 99 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@
22

33
// `include "warp_defines.v"
44

5-
`define XSHIFT_OP_SHL 2'b00
6-
`define XSHIFT_OP_SHR 2'b01
7-
`define XSHIFT_OP_ROL 2'b10
8-
`define XSHIFT_OP_ROR 2'b11
9-
105
// scalar integer arithmetic unit - add/sub, set less than, min/max, branch
116
// latency: 1 cycle
127
// initiation interval: 1 cycle
@@ -383,6 +378,105 @@ module warp_xshift (
383378
input wire i_word,
384379
output wire [63:0] o_result
385380
);
381+
// Internal wires for each shift stage
382+
wire [63:0] stage0_rol, stage1_rol, stage2_rol, stage3_rol, stage4_rol, stage5_rol;
383+
wire [63:0] stage0_ror, stage1_ror, stage2_ror, stage3_ror, stage4_ror, stage5_ror;
384+
wire [63:0] stage0_shl, stage1_shl, stage2_shl, stage3_shl, stage4_shl, stage5_shl;
385+
wire [63:0] stage0_shr, stage1_shr, stage2_shr, stage3_shr, stage4_shr, stage5_shr;
386+
387+
// 32-bit rotation wires using consistent style
388+
wire [31:0] stage0_rol_32, stage1_rol_32, stage2_rol_32, stage3_rol_32, stage4_rol_32;
389+
wire [31:0] stage0_ror_32, stage1_ror_32, stage2_ror_32, stage3_ror_32, stage4_ror_32;
390+
wire [31:0] rotated_32_result;
391+
wire [63:0] rotated_32;
392+
393+
wire [63:0] operand_in;
394+
wire [63:0] o_result_tmp;
395+
396+
// if i_word true between rotate or shift outputs for 32 bits, else select 64 bit
397+
assign o_result = (i_word) ? (
398+
((i_opsel == `XSHIFT_OP_ROL) | (i_opsel == `XSHIFT_OP_ROR)) ? rotated_32 :
399+
{{32{o_result_tmp[31]}}, o_result_tmp[31:0]}
400+
) : o_result_tmp;
401+
402+
// sign extend for i_word before operation
403+
assign operand_in = i_word ? {{32{i_operand[31]}}, i_operand[31:0]} : i_operand;
404+
405+
// Barrel shifter stages for 64-bit operations
406+
// Stage 0: shift or rotate by 1
407+
assign stage0_shl = (i_amount[0]) ? ({operand_in[62:0], 1'b0}) : operand_in;
408+
assign stage0_shr = (i_amount[0]) ? ({(i_arithmetic & operand_in[63]), operand_in[63:1]}) : operand_in;
409+
assign stage0_rol = (i_amount[0]) ? ({operand_in[62:0], operand_in[63]}) : operand_in;
410+
assign stage0_ror = (i_amount[0]) ? ({operand_in[0], operand_in[63:1]}) : operand_in;
411+
412+
// Stage 1: shift or rotate by 2
413+
assign stage1_shl = (i_amount[1]) ? ({stage0_shl[61:0], 2'b0}) : stage0_shl;
414+
assign stage1_shr = (i_amount[1]) ? ({{2{i_arithmetic & stage0_shr[63]}}, stage0_shr[63:2]}) : stage0_shr;
415+
assign stage1_rol = (i_amount[1]) ? ({stage0_rol[61:0], stage0_rol[63:62]}) : stage0_rol;
416+
assign stage1_ror = (i_amount[1]) ? ({stage0_ror[1:0], stage0_ror[63:2]}) : stage0_ror;
417+
418+
// Stage 2: shift or rotate by 4
419+
assign stage2_shl = (i_amount[2]) ? ({stage1_shl[59:0], 4'b0}) : stage1_shl;
420+
assign stage2_shr = (i_amount[2]) ? ({{4{i_arithmetic & stage1_shr[63]}}, stage1_shr[63:4]}) : stage1_shr;
421+
assign stage2_rol = (i_amount[2]) ? ({stage1_rol[59:0], stage1_rol[63:60]}) : stage1_rol;
422+
assign stage2_ror = (i_amount[2]) ? ({stage1_ror[3:0], stage1_ror[63:4]}) : stage1_ror;
423+
424+
// Stage 3: shift or rotate by 8
425+
assign stage3_shl = (i_amount[3]) ? ({stage2_shl[55:0], 8'b0}) : stage2_shl;
426+
assign stage3_shr = (i_amount[3]) ? ({{8{i_arithmetic & stage2_shr[63]}}, stage2_shr[63:8]}) : stage2_shr;
427+
assign stage3_rol = (i_amount[3]) ? ({stage2_rol[55:0], stage2_rol[63:56]}) : stage2_rol;
428+
assign stage3_ror = (i_amount[3]) ? ({stage2_ror[7:0], stage2_ror[63:8]}) : stage2_ror;
429+
430+
// Stage 4: shift or rotate by 16
431+
assign stage4_shl = (i_amount[4]) ? ({stage3_shl[47:0], 16'b0}) : stage3_shl;
432+
assign stage4_shr = (i_amount[4]) ? ({{16{i_arithmetic & stage3_shr[63]}}, stage3_shr[63:16]}) : stage3_shr;
433+
assign stage4_rol = (i_amount[4]) ? ({stage3_rol[47:0], stage3_rol[63:48]}) : stage3_rol;
434+
assign stage4_ror = (i_amount[4]) ? ({stage3_ror[15:0], stage3_ror[63:16]}) : stage3_ror;
435+
436+
// Stage 5: shift or rotate by 32
437+
assign stage5_shl = (i_amount[5] & ~i_word) ? ({stage4_shl[31:0], 32'b0}) : stage4_shl;
438+
assign stage5_shr = (i_amount[5] & ~i_word) ? ({{32{i_arithmetic & stage4_shr[63]}}, stage4_shr[63:32]}) : stage4_shr;
439+
assign stage5_rol = (i_amount[5]) ? ({stage4_rol[31:0], stage4_rol[63:32]}) : stage4_rol;
440+
assign stage5_ror = (i_amount[5]) ? ({stage4_ror[31:0], stage4_ror[63:32]}) : stage4_ror;
441+
442+
// 32-bit rotation stages - using separate wires for each operation
443+
// Stage 0: rotate by 1 (32 bit)
444+
assign stage0_rol_32 = (i_amount[0]) ? {i_operand[30:0], i_operand[31]} : i_operand[31:0];
445+
assign stage0_ror_32 = (i_amount[0]) ? {i_operand[0], i_operand[31:1]} : i_operand[31:0];
446+
447+
// Stage 1: rotate by 2 (32 bit)
448+
assign stage1_rol_32 = (i_amount[1]) ? {stage0_rol_32[29:0], stage0_rol_32[31:30]} : stage0_rol_32;
449+
assign stage1_ror_32 = (i_amount[1]) ? {stage0_ror_32[1:0], stage0_ror_32[31:2]} : stage0_ror_32;
450+
451+
// Stage 2: rotate by 4 (32 bit)
452+
assign stage2_rol_32 = (i_amount[2]) ? {stage1_rol_32[27:0], stage1_rol_32[31:28]} : stage1_rol_32;
453+
assign stage2_ror_32 = (i_amount[2]) ? {stage1_ror_32[3:0], stage1_ror_32[31:4]} : stage1_ror_32;
454+
455+
// Stage 3: rotate by 8 (32 bit)
456+
assign stage3_rol_32 = (i_amount[3]) ? {stage2_rol_32[23:0], stage2_rol_32[31:24]} : stage2_rol_32;
457+
assign stage3_ror_32 = (i_amount[3]) ? {stage2_ror_32[7:0], stage2_ror_32[31:8]} : stage2_ror_32;
458+
459+
// Stage 4: rotate by 16 (32 bit)
460+
assign stage4_rol_32 = (i_amount[4]) ? {stage3_rol_32[15:0], stage3_rol_32[31:16]} : stage3_rol_32;
461+
assign stage4_ror_32 = (i_amount[4]) ? {stage3_ror_32[15:0], stage3_ror_32[31:16]} : stage3_ror_32;
462+
463+
// Select the appropriate 32-bit rotation result based on operation
464+
assign rotated_32_result = (i_opsel == `XSHIFT_OP_ROL) ? stage4_rol_32 : stage4_ror_32;
465+
466+
// Sign extend the 32-bit result to 64 bits
467+
assign rotated_32 = {{32{rotated_32_result[31]}}, rotated_32_result};
468+
469+
// Select operation for final output using case statement
470+
reg [63:0] o_result_tmp;
471+
always @(*) begin
472+
case (i_opsel)
473+
`XSHIFT_OP_SHL: o_result_tmp = stage5_shl;
474+
`XSHIFT_OP_SHR: o_result_tmp = stage5_shr;
475+
`XSHIFT_OP_ROL: o_result_tmp = stage5_rol;
476+
`XSHIFT_OP_ROR: o_result_tmp = stage5_ror;
477+
default: o_result_tmp = 64'hx; // For safety
478+
endcase
479+
end
386480
endmodule
387481

388482
// multiplies two 64 bit operands and outputs the lower 64 bits of

0 commit comments

Comments
 (0)