|
2 | 2 |
|
3 | 3 | // `include "warp_defines.v" |
4 | 4 |
|
5 | | -`define XSHIFT_OP_SHL 2'b00 |
6 | | -`define XSHIFT_OP_SHR 2'b01 |
7 | | -`define XSHIFT_OP_ROL 2'b10 |
8 | | -`define XSHIFT_OP_ROR 2'b11 |
9 | | - |
10 | 5 | // scalar integer arithmetic unit - add/sub, set less than, min/max, branch |
11 | 6 | // latency: 1 cycle |
12 | 7 | // initiation interval: 1 cycle |
@@ -383,6 +378,105 @@ module warp_xshift ( |
383 | 378 | input wire i_word, |
384 | 379 | output wire [63:0] o_result |
385 | 380 | ); |
| 381 | + // Internal wires for each shift stage |
| 382 | + wire [63:0] stage0_rol, stage1_rol, stage2_rol, stage3_rol, stage4_rol, stage5_rol; |
| 383 | + wire [63:0] stage0_ror, stage1_ror, stage2_ror, stage3_ror, stage4_ror, stage5_ror; |
| 384 | + wire [63:0] stage0_shl, stage1_shl, stage2_shl, stage3_shl, stage4_shl, stage5_shl; |
| 385 | + wire [63:0] stage0_shr, stage1_shr, stage2_shr, stage3_shr, stage4_shr, stage5_shr; |
| 386 | + |
| 387 | + // 32-bit rotation wires using consistent style |
| 388 | + wire [31:0] stage0_rol_32, stage1_rol_32, stage2_rol_32, stage3_rol_32, stage4_rol_32; |
| 389 | + wire [31:0] stage0_ror_32, stage1_ror_32, stage2_ror_32, stage3_ror_32, stage4_ror_32; |
| 390 | + wire [31:0] rotated_32_result; |
| 391 | + wire [63:0] rotated_32; |
| 392 | + |
| 393 | + wire [63:0] operand_in; |
| 394 | + wire [63:0] o_result_tmp; |
| 395 | + |
| 396 | + // if i_word true between rotate or shift outputs for 32 bits, else select 64 bit |
| 397 | + assign o_result = (i_word) ? ( |
| 398 | + ((i_opsel == `XSHIFT_OP_ROL) | (i_opsel == `XSHIFT_OP_ROR)) ? rotated_32 : |
| 399 | + {{32{o_result_tmp[31]}}, o_result_tmp[31:0]} |
| 400 | + ) : o_result_tmp; |
| 401 | + |
| 402 | + // sign extend for i_word before operation |
| 403 | + assign operand_in = i_word ? {{32{i_operand[31]}}, i_operand[31:0]} : i_operand; |
| 404 | + |
| 405 | + // Barrel shifter stages for 64-bit operations |
| 406 | + // Stage 0: shift or rotate by 1 |
| 407 | + assign stage0_shl = (i_amount[0]) ? ({operand_in[62:0], 1'b0}) : operand_in; |
| 408 | + assign stage0_shr = (i_amount[0]) ? ({(i_arithmetic & operand_in[63]), operand_in[63:1]}) : operand_in; |
| 409 | + assign stage0_rol = (i_amount[0]) ? ({operand_in[62:0], operand_in[63]}) : operand_in; |
| 410 | + assign stage0_ror = (i_amount[0]) ? ({operand_in[0], operand_in[63:1]}) : operand_in; |
| 411 | + |
| 412 | + // Stage 1: shift or rotate by 2 |
| 413 | + assign stage1_shl = (i_amount[1]) ? ({stage0_shl[61:0], 2'b0}) : stage0_shl; |
| 414 | + assign stage1_shr = (i_amount[1]) ? ({{2{i_arithmetic & stage0_shr[63]}}, stage0_shr[63:2]}) : stage0_shr; |
| 415 | + assign stage1_rol = (i_amount[1]) ? ({stage0_rol[61:0], stage0_rol[63:62]}) : stage0_rol; |
| 416 | + assign stage1_ror = (i_amount[1]) ? ({stage0_ror[1:0], stage0_ror[63:2]}) : stage0_ror; |
| 417 | + |
| 418 | + // Stage 2: shift or rotate by 4 |
| 419 | + assign stage2_shl = (i_amount[2]) ? ({stage1_shl[59:0], 4'b0}) : stage1_shl; |
| 420 | + assign stage2_shr = (i_amount[2]) ? ({{4{i_arithmetic & stage1_shr[63]}}, stage1_shr[63:4]}) : stage1_shr; |
| 421 | + assign stage2_rol = (i_amount[2]) ? ({stage1_rol[59:0], stage1_rol[63:60]}) : stage1_rol; |
| 422 | + assign stage2_ror = (i_amount[2]) ? ({stage1_ror[3:0], stage1_ror[63:4]}) : stage1_ror; |
| 423 | + |
| 424 | + // Stage 3: shift or rotate by 8 |
| 425 | + assign stage3_shl = (i_amount[3]) ? ({stage2_shl[55:0], 8'b0}) : stage2_shl; |
| 426 | + assign stage3_shr = (i_amount[3]) ? ({{8{i_arithmetic & stage2_shr[63]}}, stage2_shr[63:8]}) : stage2_shr; |
| 427 | + assign stage3_rol = (i_amount[3]) ? ({stage2_rol[55:0], stage2_rol[63:56]}) : stage2_rol; |
| 428 | + assign stage3_ror = (i_amount[3]) ? ({stage2_ror[7:0], stage2_ror[63:8]}) : stage2_ror; |
| 429 | + |
| 430 | + // Stage 4: shift or rotate by 16 |
| 431 | + assign stage4_shl = (i_amount[4]) ? ({stage3_shl[47:0], 16'b0}) : stage3_shl; |
| 432 | + assign stage4_shr = (i_amount[4]) ? ({{16{i_arithmetic & stage3_shr[63]}}, stage3_shr[63:16]}) : stage3_shr; |
| 433 | + assign stage4_rol = (i_amount[4]) ? ({stage3_rol[47:0], stage3_rol[63:48]}) : stage3_rol; |
| 434 | + assign stage4_ror = (i_amount[4]) ? ({stage3_ror[15:0], stage3_ror[63:16]}) : stage3_ror; |
| 435 | + |
| 436 | + // Stage 5: shift or rotate by 32 |
| 437 | + assign stage5_shl = (i_amount[5] & ~i_word) ? ({stage4_shl[31:0], 32'b0}) : stage4_shl; |
| 438 | + assign stage5_shr = (i_amount[5] & ~i_word) ? ({{32{i_arithmetic & stage4_shr[63]}}, stage4_shr[63:32]}) : stage4_shr; |
| 439 | + assign stage5_rol = (i_amount[5]) ? ({stage4_rol[31:0], stage4_rol[63:32]}) : stage4_rol; |
| 440 | + assign stage5_ror = (i_amount[5]) ? ({stage4_ror[31:0], stage4_ror[63:32]}) : stage4_ror; |
| 441 | + |
| 442 | + // 32-bit rotation stages - using separate wires for each operation |
| 443 | + // Stage 0: rotate by 1 (32 bit) |
| 444 | + assign stage0_rol_32 = (i_amount[0]) ? {i_operand[30:0], i_operand[31]} : i_operand[31:0]; |
| 445 | + assign stage0_ror_32 = (i_amount[0]) ? {i_operand[0], i_operand[31:1]} : i_operand[31:0]; |
| 446 | + |
| 447 | + // Stage 1: rotate by 2 (32 bit) |
| 448 | + assign stage1_rol_32 = (i_amount[1]) ? {stage0_rol_32[29:0], stage0_rol_32[31:30]} : stage0_rol_32; |
| 449 | + assign stage1_ror_32 = (i_amount[1]) ? {stage0_ror_32[1:0], stage0_ror_32[31:2]} : stage0_ror_32; |
| 450 | + |
| 451 | + // Stage 2: rotate by 4 (32 bit) |
| 452 | + assign stage2_rol_32 = (i_amount[2]) ? {stage1_rol_32[27:0], stage1_rol_32[31:28]} : stage1_rol_32; |
| 453 | + assign stage2_ror_32 = (i_amount[2]) ? {stage1_ror_32[3:0], stage1_ror_32[31:4]} : stage1_ror_32; |
| 454 | + |
| 455 | + // Stage 3: rotate by 8 (32 bit) |
| 456 | + assign stage3_rol_32 = (i_amount[3]) ? {stage2_rol_32[23:0], stage2_rol_32[31:24]} : stage2_rol_32; |
| 457 | + assign stage3_ror_32 = (i_amount[3]) ? {stage2_ror_32[7:0], stage2_ror_32[31:8]} : stage2_ror_32; |
| 458 | + |
| 459 | + // Stage 4: rotate by 16 (32 bit) |
| 460 | + assign stage4_rol_32 = (i_amount[4]) ? {stage3_rol_32[15:0], stage3_rol_32[31:16]} : stage3_rol_32; |
| 461 | + assign stage4_ror_32 = (i_amount[4]) ? {stage3_ror_32[15:0], stage3_ror_32[31:16]} : stage3_ror_32; |
| 462 | + |
| 463 | + // Select the appropriate 32-bit rotation result based on operation |
| 464 | + assign rotated_32_result = (i_opsel == `XSHIFT_OP_ROL) ? stage4_rol_32 : stage4_ror_32; |
| 465 | + |
| 466 | + // Sign extend the 32-bit result to 64 bits |
| 467 | + assign rotated_32 = {{32{rotated_32_result[31]}}, rotated_32_result}; |
| 468 | + |
| 469 | + // Select operation for final output using case statement |
| 470 | + reg [63:0] o_result_tmp; |
| 471 | + always @(*) begin |
| 472 | + case (i_opsel) |
| 473 | + `XSHIFT_OP_SHL: o_result_tmp = stage5_shl; |
| 474 | + `XSHIFT_OP_SHR: o_result_tmp = stage5_shr; |
| 475 | + `XSHIFT_OP_ROL: o_result_tmp = stage5_rol; |
| 476 | + `XSHIFT_OP_ROR: o_result_tmp = stage5_ror; |
| 477 | + default: o_result_tmp = 64'hx; // For safety |
| 478 | + endcase |
| 479 | + end |
386 | 480 | endmodule |
387 | 481 |
|
388 | 482 | // multiplies two 64 bit operands and outputs the lower 64 bits of |
|
0 commit comments