Skip to content

Commit ceb3861

Browse files
authored
Merge pull request #4173 from oharboe/mock-cpu
asap7/mock-cpu: fix fabricated hold budget at async-FIFO macro boundary
2 parents 26b7130 + f0a31fd commit ceb3861

4 files changed

Lines changed: 117 additions & 52 deletions

File tree

flow/designs/asap7/mock-cpu/config.mk

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,22 @@ export VERILOG_FILES = $(wildcard $(DESIGN_HOME)/src/fifo/*.v)
77
export SDC_FILE = $(DESIGN_HOME)/$(PLATFORM)/$(DESIGN_NICKNAME)/constraint.sdc
88
export SDC_FILE_EXTRA = $(DESIGN_HOME)/src/mock-array/util.tcl
99

10+
# The SDC references fifo_in/<pin> and fifo_out/<pin> directly. Two
11+
# knobs must agree to make those pin paths resolve in OpenSTA:
12+
#
13+
# 1. Yosys must keep the fifo1 module boundary through flattening.
14+
# SYNTH_KEEP_MODULES doesn't work here because hierarchy elaboration
15+
# specializes fifo1 into $paramod$<hash>\fifo1 before the flow's
16+
# keep loop runs — instead we use an (* keep_hierarchy *) RTL
17+
# attribute on the module itself (see src/fifo/fifo1.v).
18+
#
19+
# 2. OpenROAD must link the netlist hierarchically, otherwise
20+
# link_design flattens the fifo_in / fifo_out instances even though
21+
# Yosys preserved them. OPENROAD_HIERARCHICAL=1 switches link_design
22+
# to -hier mode. (Same mechanism used by asap7/mock-alu, cva6,
23+
# swerv_wrapper.)
24+
export OPENROAD_HIERARCHICAL = 1
25+
1026
export CORE_UTILIZATION = 40
1127
export CORE_ASPECT_RATIO = 1
1228
export CORE_MARGIN = 2
Lines changed: 78 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,31 @@
1-
# https://gist.github.com/brabect1/7695ead3d79be47576890bbcd61fe426
1+
# mock-cpu: multi-clock async-FIFO bridge macro.
2+
#
3+
# PR #4170 idiom (multi-clock variant): optimization targets use
4+
# set_max_delay -ignore_clock_latency so hold-fixing does not invent
5+
# phantom budgets against the deep clock tree's insertion delay. See
6+
# flow/platforms/asap7/constraints.sdc lines 1-56 for the single-clock
7+
# rationale; this file can't `source` that template because mock-cpu
8+
# has two async clocks.
9+
#
10+
# The IO optimization targets below are deliberately surgical:
11+
# set_max_delay from top-level ports to fifo_in/<pin> and from
12+
# fifo_out/<pin> to top-level ports, rather than -to [all_registers] /
13+
# -from [all_registers]. Functionally equivalent for this topology
14+
# (all IO paths begin/end at the FIFO), but it exercises more flow
15+
# features — SYNTH_KEEP_MODULES hierarchy preservation, hierarchical
16+
# get_pins selection, and io2fifo/fifo2io path grouping. Intentional
17+
# regression coverage; do not "simplify" back to [all_registers].
218
#
3-
# This fifo is from http://www.sunburst-design.com/papers/CummingsSNUG2002SJ_FIFO1.pdf
19+
# (* keep_hierarchy *) on the fifo1 module (src/fifo/fifo1.v) preserves
20+
# the FIFO instance boundary through Yosys flattening so the fifo_in/<pin>
21+
# and fifo_out/<pin> paths below resolve. An RTL attribute is used rather
22+
# than SYNTH_KEEP_MODULES because the latter matches exact module names
23+
# and hierarchy elaboration specializes fifo1 into $paramod$<hash>\fifo1
24+
# before SYNTH_KEEP_MODULES runs.
25+
#
26+
# FIFO RTL: Cummings SNUG 2002 — gray-coded pointers, 2-FF synchronizers
27+
# (sync_r2w, sync_w2r). Metastability handled by construction.
28+
# https://gist.github.com/brabect1/7695ead3d79be47576890bbcd61fe426
429

530
source $::env(SDC_FILE_EXTRA)
631

@@ -10,57 +35,73 @@ set clk_period 333
1035
set clk2_period 1000
1136

1237
set clk1_name clk
13-
create_clock -name $clk1_name -period $clk_period -waveform \
14-
[list 0 [expr $clk_period/2]] [get_ports $clk1_name]
38+
create_clock -name $clk1_name -period $clk_period \
39+
-waveform [list 0 [expr $clk_period/2]] [get_ports $clk1_name]
1540
set_clock_uncertainty 10 [get_clocks $clk1_name]
1641

1742
set clk2_name clk_uncore
18-
create_clock -name $clk2_name -period $clk2_period -waveform \
19-
[list 0 [expr $clk_period/2]] [get_ports $clk2_name]
43+
create_clock -name $clk2_name -period $clk2_period \
44+
-waveform [list 0 [expr $clk2_period/2]] [get_ports $clk2_name]
2045
set_clock_uncertainty 10 [get_clocks $clk2_name]
46+
2147
set_clock_groups -group $clk1_name -group $clk2_name -asynchronous -allow_paths
2248

49+
# Async reset distribution.
2350
set_false_path -from [get_ports *rst_n]
2451
set_false_path -to [get_ports *rst_n]
2552

26-
# The mock-cpu is a macro connecting to a slower peripheral bus and possibly DRAM.
27-
# Avoid using set_input/output_delay here.
28-
# Register-to-register paths are checked at the mock-cpu level or from the mock-cpu
29-
# .lib file to an external register.
30-
# Timing closure is ensured at the SoC level where the mock-cpu is connected.
31-
# Instead, set strict optimization targets for inputs and outputs to ensure
32-
# constraints are not too loose.
33-
set non_clk_inputs {}
34-
set clock_ports [list [get_ports $clk1_name] [get_ports $clk2_name]]
35-
foreach input [all_inputs] {
36-
if { [lsearch -exact $clock_ports $input] == -1 } {
37-
lappend non_clk_inputs $input
38-
}
39-
}
53+
# Timing firewall: surgical port <-> FIFO boundary optimization targets.
54+
# Internal 1024-stage pipeline is reg2reg, constrained by the clock
55+
# period alone. IO paths end/begin at the FIFO boundary — no further.
56+
set io_target 80
4057

41-
set_max_delay 80 -from $non_clk_inputs -to [all_outputs]
42-
group_path -name in2out -from $non_clk_inputs -to [all_outputs]
58+
set fifo_in_wdata [get_pins fifo_in/wdata[*]]
59+
set fifo_in_winc [get_pins fifo_in/winc]
60+
set fifo_out_rinc [get_pins fifo_out/rinc]
4361

44-
set all_register_outputs [get_pins -of_objects [all_registers] -filter {direction == output}]
45-
set_max_delay 80 -from $non_clk_inputs -to [all_registers]
46-
set_max_delay 80 -from $all_register_outputs -to [all_outputs]
47-
group_path -name in2reg -from $non_clk_inputs -to [all_registers]
62+
# Port -> FIFO. -to on a hierarchical instance input pin is accepted:
63+
# OpenSTA traverses into the instance and finds the leaf endpoint.
64+
set_max_delay -ignore_clock_latency $io_target \
65+
-from [get_ports wdata*] -to $fifo_in_wdata
66+
set_max_delay -ignore_clock_latency $io_target \
67+
-from [get_ports winc] -to $fifo_in_winc
68+
set_max_delay -ignore_clock_latency $io_target \
69+
-from [get_ports rinc] -to $fifo_out_rinc
70+
71+
# FIFO -> Port. The symmetric surgical form -from $fifo_out_<pin>
72+
# hits STA-1554 ("not a valid start point") because a hierarchical
73+
# instance output pin has no implicit launch clock. Use
74+
# [all_registers] instead — OPENROAD_HIERARCHICAL=1 plus the fifo1
75+
# keep_hierarchy makes [all_registers] enumerate leaf flops inside
76+
# fifo_out (fifomem and pointer-sync flops) whose Q pins are valid
77+
# start points, matching the platform template's single-clock form.
78+
# rdata is excluded here; it's false_path'd at the bottom.
79+
set_max_delay -ignore_clock_latency $io_target \
80+
-from [all_registers] -to [get_ports rempty]
81+
set_max_delay -ignore_clock_latency $io_target \
82+
-from [all_registers] -to [get_ports wfull]
83+
84+
group_path -name io2fifo \
85+
-from [all_inputs -no_clocks] \
86+
-to [list $fifo_in_wdata $fifo_in_winc $fifo_out_rinc]
4887
group_path -name reg2out -from [all_registers] -to [all_outputs]
4988
group_path -name reg2reg -from [all_registers] -to [all_registers]
5089

51-
## Dual clock fifo timing constraints
52-
# Using fastest clock as constaint
90+
# Dual-clock FIFO CDC: bound combinational delay on pointer-sync paths
91+
# (sync_r2w, sync_w2r) to the fastest clock period, ignore clock
92+
# latency (deep tree), and declare hold false — gray-coded pointers
93+
# and 2-FF synchronizers handle metastability by construction.
5394
set cdc_max_delay $clk_period
54-
55-
# rdata from fifo_out goes directly to I/O-pins so we need special handling of this case
56-
# to ignore timing path from wclk -> rdata for this special case
57-
# In normal cases fifo output (rdata) will most likely have a FF on I/O output signal
58-
set_false_path -from $clk1_name -to [match_pins rdata* output 0]
59-
60-
# Set timing constraint between clock domains
6195
set_max_delay $cdc_max_delay -from $clk1_name -to $clk2_name -ignore_clock_latency
6296
set_max_delay $cdc_max_delay -from $clk2_name -to $clk1_name -ignore_clock_latency
63-
64-
# Hold times between clock domain makes no sense, and should just be ignored
6597
set_false_path -hold -from $clk1_name -to $clk2_name
6698
set_false_path -hold -from $clk2_name -to $clk1_name
99+
100+
# rdata port has no launch FF on the IO side. It's driven
101+
# combinationally by fifo_out.fifomem (mem[raddr]):
102+
# clk-clocked fifomem flops -> rdata (wclk launch path)
103+
# clk_uncore-clocked rbin -> raddr -> mem mux -> rdata (rclk launch)
104+
# Both are "valid when rempty is low" by FIFO protocol, not a
105+
# single-cycle timing. Declare every path to rdata as false — normal
106+
# FIFO deployments would put an FF on rdata in the consumer domain.
107+
set_false_path -to [get_ports rdata*]

flow/designs/asap7/mock-cpu/rules-base.json

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,38 @@
11
{
22
"synth__design__instance__area__stdcell": {
3-
"value": 7302.54,
3+
"value": 7400.0,
44
"compare": "<="
55
},
66
"constraints__clocks__count": {
77
"value": 2,
88
"compare": "=="
99
},
1010
"placeopt__design__instance__area": {
11-
"value": 7379,
11+
"value": 7471,
1212
"compare": "<="
1313
},
1414
"placeopt__design__instance__count__stdcell": {
15-
"value": 46274,
15+
"value": 47171,
1616
"compare": "<="
1717
},
1818
"detailedplace__design__violations": {
1919
"value": 0,
2020
"compare": "=="
2121
},
2222
"cts__design__instance__count__setup_buffer": {
23-
"value": 530,
23+
"value": 4102,
2424
"compare": "<="
2525
},
2626
"cts__design__instance__count__hold_buffer": {
27-
"value": 530,
27+
"value": 4102,
2828
"compare": "<="
2929
},
3030
"cts__timing__setup__ws": {
31-
"value": -89.5,
31+
"value": -16.6,
3232
"compare": ">="
3333
},
3434
"cts__timing__setup__tns": {
35-
"value": -1730.0,
35+
"value": -66.6,
3636
"compare": ">="
3737
},
3838
"cts__timing__hold__ws": {
@@ -48,11 +48,11 @@
4848
"compare": "<="
4949
},
5050
"globalroute__timing__setup__ws": {
51-
"value": -90.9,
51+
"value": -16.6,
5252
"compare": ">="
5353
},
5454
"globalroute__timing__setup__tns": {
55-
"value": -2160.0,
55+
"value": -66.6,
5656
"compare": ">="
5757
},
5858
"globalroute__timing__hold__ws": {
@@ -64,7 +64,7 @@
6464
"compare": ">="
6565
},
6666
"detailedroute__route__wirelength": {
67-
"value": 50994,
67+
"value": 55508,
6868
"compare": "<="
6969
},
7070
"detailedroute__route__drc_errors": {
@@ -80,23 +80,23 @@
8080
"compare": "<="
8181
},
8282
"finish__timing__setup__ws": {
83-
"value": -84.6,
83+
"value": -16.6,
8484
"compare": ">="
8585
},
8686
"finish__timing__setup__tns": {
87-
"value": -2050.0,
87+
"value": -66.6,
8888
"compare": ">="
8989
},
9090
"finish__timing__hold__ws": {
91-
"value": -16.6,
91+
"value": -17.4,
9292
"compare": ">="
9393
},
9494
"finish__timing__hold__tns": {
95-
"value": -66.6,
95+
"value": -67.3,
9696
"compare": ">="
9797
},
9898
"finish__design__instance__area": {
99-
"value": 7617,
99+
"value": 8049,
100100
"compare": "<="
101101
}
102102
}

flow/designs/src/fifo/fifo1.v

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
// (* keep_hierarchy *) preserves the fifo1 instance boundary (fifo_in,
2+
// fifo_out in mock_cpu) through Yosys flattening so the SDC can
3+
// reference fifo_in/<pin> and fifo_out/<pin> directly. SYNTH_KEEP_MODULES
4+
// doesn't work here because hierarchy elaboration specializes fifo1 into
5+
// $paramod$<hash>\fifo1 variants before the flow's SYNTH_KEEP_MODULES
6+
// loop runs. An RTL attribute rides through elaboration onto each
7+
// specialized clone.
8+
(* keep_hierarchy *)
19
module fifo1 #(
210
parameter DSIZE = 8,
311
parameter ASIZE = 4

0 commit comments

Comments
 (0)