diff --git a/.github/workflows/accelsim.yml b/.github/workflows/accelsim.yml
index 39ae46df0..08a65be11 100644
--- a/.github/workflows/accelsim.yml
+++ b/.github/workflows/accelsim.yml
@@ -15,7 +15,8 @@ on:
# By default regress against accel-sim's dev branch
env:
- ACCELSIM_BRANCH: dev
+ ACCELSIM_REPO: https://github.com/purdue-aalp/accel-sim-framework-public.git
+ ACCELSIM_BRANCH: dev-uvm
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
diff --git a/README.md b/README.md
index 84cfcd02c..90dd928e8 100644
--- a/README.md
+++ b/README.md
@@ -83,6 +83,18 @@ Complex Dynamics in Many-Core Accelerator Architectures, In Proceedings of the
IEEE International Symposium on Performance Analysis of Systems and Software
(ISPASS), pp. 164-174, White Plains, NY, March 28-30, 2010.
+If you use prefetchers and page eviction policies, please cite:
+
+Debashis Ganguly, Ziyu Zhang, Jun Yang, and Rami Melhem, Interplay between hardware prefetcher and page eviction policy in CPU-GPU unified virtual memory, In Proceedings of the 46th International Symposium on Computer Architecture (ISCA '19), New York, NY, USA, 2019.
+
+If you use access counter-based delayed migration, LFU eviction, cold vs hot data structure classification, and page migration and pinning, please cite:
+
+Debashis Ganguly, Ziyu Zhang, Jun Yang, and Rami Melhem, Adaptive Page Migration for Irregular Data-intensive Applications under GPU Memory Oversubscription, In Proceedings of the 34th IEEE International Parallel & Distributed Processing Symposium (IPDPS 2020), New Orleans, Louisiana, USA, 2020.
+
+If you use adaptive runtime to detect pattern in CPU-GPU interconnect traffic, and policy engine to choose and dynamically employ memory management policies, please cite:
+
+Debashis Ganguly, Rami Melhem, and Jun Yang, An Adaptive Framework for Oversubscription Management in CPU-GPU Unified Memory, In 2021 Design, Automation & Test in Europe Conference & Exhibition (DATE 2021).
+
This file contains instructions on installing, building and running GPGPU-Sim.
Detailed documentation on what GPGPU-Sim models, how to configure it, and a
guide to the source code can be found here: .
diff --git a/configs/tested-cfgs/SM2_GTX480/gpgpusim.config b/configs/tested-cfgs/SM2_GTX480/gpgpusim.config
index 47637286e..bc01821db 100644
--- a/configs/tested-cfgs/SM2_GTX480/gpgpusim.config
+++ b/configs/tested-cfgs/SM2_GTX480/gpgpusim.config
@@ -56,8 +56,8 @@
# In Fermi, the cache and shared memory can be configured to 16kb:48kb(default) or 48kb:16kb
-# :::,::::,::,:**,
-# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
+# :::,::::,::,:**
+# ** Optional parameter - Required when mshr_type==Texture Fifo
# Note: Hashing set index function (H) only applies to a set size of 32 or 64.
-gpgpu_cache:dl1 N:32:128:4,L:L:m:N:H,S:64:8,8
-gpgpu_shmem_size 49152
diff --git a/configs/tested-cfgs/SM3_KEPLER_TITAN/gpgpusim.config b/configs/tested-cfgs/SM3_KEPLER_TITAN/gpgpusim.config
index 9cb328f5d..ef47ddfd9 100644
--- a/configs/tested-cfgs/SM3_KEPLER_TITAN/gpgpusim.config
+++ b/configs/tested-cfgs/SM3_KEPLER_TITAN/gpgpusim.config
@@ -99,8 +99,8 @@
# Greedy then oldest scheduler
-gpgpu_scheduler gto
-# :::,::::,::,:**,
-# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
+# :::,::::,::,:**
+# ** Optional parameter - Required when mshr_type==Texture Fifo
# Note: Hashing set index function (H) only applies to a set size of 32 or 64.
# The defulat is to disable the L1 cache, unless cache modifieres are used
-gpgpu_cache:dl1 S:4:128:32,L:L:s:N:L,A:256:8,16:0,32
diff --git a/configs/tested-cfgs/SM6_GTX1080Ti/config_fermi_islip.icnt b/configs/tested-cfgs/SM6_GTX1080Ti/config_fermi_islip.icnt
new file mode 100644
index 000000000..2a69ddd4c
--- /dev/null
+++ b/configs/tested-cfgs/SM6_GTX1080Ti/config_fermi_islip.icnt
@@ -0,0 +1,70 @@
+//21*1 fly with 32 flits per packet under gpgpusim injection mode
+use_map = 0;
+flit_size = 32;
+
+// currently we do not use this, see subnets below
+network_count = 2;
+
+// Topology
+topology = fly;
+k = 50;
+n = 1;
+
+// Routing
+
+routing_function = dest_tag;
+
+// Flow control
+
+num_vcs = 1;
+vc_buf_size = 8;
+
+wait_for_tail_credit = 0;
+
+// Router architecture
+
+vc_allocator = islip; //separable_input_first;
+sw_allocator = islip; //separable_input_first;
+alloc_iters = 1;
+
+credit_delay = 0;
+routing_delay = 0;
+vc_alloc_delay = 1;
+sw_alloc_delay = 1;
+
+input_speedup = 2;
+output_speedup = 1;
+internal_speedup = 1.0;
+
+// Traffic, GPGPU-Sim does not use this
+
+traffic = uniform;
+packet_size ={{1,2,3,4},{10,20}};
+packet_size_rate={{1,1,1,1},{2,1}};
+
+// Simulation - Don't change
+
+sim_type = gpgpusim;
+//sim_type = latency;
+injection_rate = 0.1;
+
+subnets = 2;
+
+// Always use read and write no matter following line
+//use_read_write = 1;
+
+
+read_request_subnet = 0;
+read_reply_subnet = 1;
+write_request_subnet = 0;
+write_reply_subnet = 1;
+
+read_request_begin_vc = 0;
+read_request_end_vc = 0;
+write_request_begin_vc = 0;
+write_request_end_vc = 0;
+read_reply_begin_vc = 0;
+read_reply_end_vc = 0;
+write_reply_begin_vc = 0;
+write_reply_end_vc = 0;
+
diff --git a/configs/tested-cfgs/SM6_GTX1080Ti/gpgpusim.config b/configs/tested-cfgs/SM6_GTX1080Ti/gpgpusim.config
new file mode 100644
index 000000000..03854ff27
--- /dev/null
+++ b/configs/tested-cfgs/SM6_GTX1080Ti/gpgpusim.config
@@ -0,0 +1,289 @@
+# This config models the Volta
+# For more info about volta architecture:
+# http://images.nvidia.com/content/volta-architecture/pdf/volta-architecture-whitepaper.pdf
+# https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8344474&tag=1#
+# http://on-demand.gputechconf.com/gtc/2018/presentation/s8122-dissecting-the-volta-gpu-architecture-through-microbenchmarking.pdf
+# https://en.wikipedia.org/wiki/Volta_(microarchitecture)
+# https://www.hotchips.org/wp-content/uploads/hc_archives/hc29/HC29.21-Monday-Pub/HC29.21.10-GPU-Gaming-Pub/HC29.21.132-Volta-Choquette-NVIDIA-Final3.pdf
+# https://devblogs.nvidia.com/inside-volta/
+# http://on-demand.gputechconf.com/gtc/2017/presentation/s7798-luke-durant-inside-volta.pdf
+
+# functional simulator specification
+-gpgpu_ptx_instruction_classification 0
+-gpgpu_ptx_sim_mode 0
+-gpgpu_ptx_force_max_capability 70
+
+# Device Limits
+-gpgpu_stack_size_limit 1024
+-gpgpu_heap_size_limit 8388608
+-gpgpu_runtime_sync_depth_limit 2
+-gpgpu_runtime_pending_launch_count_limit 2048
+-gpgpu_kernel_launch_latency 5000
+-gpgpu_TB_launch_latency 0
+
+# Compute Capability
+-gpgpu_compute_capability_major 7
+-gpgpu_compute_capability_minor 0
+
+# PTX execution-driven
+-gpgpu_ptx_convert_to_ptxplus 0
+-gpgpu_ptx_save_converted_ptxplus 0
+
+# high level architecture configuration
+-gpgpu_n_clusters 80
+-gpgpu_n_cores_per_cluster 1
+-gpgpu_n_mem 32
+-gpgpu_n_sub_partition_per_mchannel 2
+
+# volta clock domains
+#-gpgpu_clock_domains :::
+-gpgpu_clock_domains 1132.0:1132.0:1132.0:850.0
+# boost mode
+# -gpgpu_clock_domains 1628.0:1628.0:1628.0:850.0
+
+# shader core pipeline config
+-gpgpu_shader_registers 65536
+-gpgpu_registers_per_block 65536
+-gpgpu_occupancy_sm_number 70
+
+# This implies a maximum of 64 warps/SM
+-gpgpu_shader_core_pipeline 2048:32
+-gpgpu_shader_cta 32
+-gpgpu_simd_model 1
+
+# Pipeline widths and number of FUs
+# ID_OC_SP,ID_OC_DP,ID_OC_INT,ID_OC_SFU,ID_OC_MEM,OC_EX_SP,OC_EX_DP,OC_EX_INT,OC_EX_SFU,OC_EX_MEM,EX_WB,ID_OC_TENSOR_CORE,OC_EX_TENSOR_CORE
+## Volta GV100 has 4 SP SIMD units, 4 SFU units, 4 DP units per core, 4 Tensor core units
+## we need to scale the number of pipeline registers to be equal to the number of SP units
+-gpgpu_pipeline_widths 4,4,4,4,4,4,4,4,4,4,8,4,4
+-gpgpu_num_sp_units 4
+-gpgpu_num_sfu_units 4
+-gpgpu_num_dp_units 4
+-gpgpu_num_int_units 4
+-gpgpu_tensor_core_avail 1
+-gpgpu_num_tensor_core_units 4
+
+# Instruction latencies and initiation intervals
+# "ADD,MAX,MUL,MAD,DIV"
+# All Div operations are executed on SFU unit
+-ptx_opcode_latency_int 4,13,4,5,145,21
+-ptx_opcode_initiation_int 2,2,2,2,8,4
+-ptx_opcode_latency_fp 4,13,4,5,39
+-ptx_opcode_initiation_fp 2,2,2,2,4
+-ptx_opcode_latency_dp 8,19,8,8,330
+-ptx_opcode_initiation_dp 4,4,4,4,130
+-ptx_opcode_latency_sfu 100
+-ptx_opcode_initiation_sfu 8
+-ptx_opcode_latency_tesnor 64
+-ptx_opcode_initiation_tensor 64
+
+# Volta has sub core model, in which each scheduler has its own register file and EUs
+# i.e. schedulers are isolated
+-gpgpu_sub_core_model 1
+# disable specialized operand collectors and use generic operand collectors instead
+-gpgpu_enable_specialized_operand_collector 0
+-gpgpu_operand_collector_num_units_gen 8
+-gpgpu_operand_collector_num_in_ports_gen 8
+-gpgpu_operand_collector_num_out_ports_gen 8
+# volta has 8 banks, 4 schedulers, two banks per scheduler
+# we increase #banks to 16 to mitigate the effect of Regisrer File Cache (RFC) which we do not implement in the current version
+-gpgpu_num_reg_banks 16
+-gpgpu_reg_file_port_throughput 2
+
+# shared memory bankconflict detection
+-gpgpu_shmem_num_banks 32
+-gpgpu_shmem_limited_broadcast 0
+-gpgpu_shmem_warp_parts 1
+-gpgpu_coalesce_arch 60
+
+# Volta has four schedulers per core
+-gpgpu_num_sched_per_core 4
+# Greedy then oldest scheduler
+-gpgpu_scheduler gto
+## In Volta, a warp scheduler can issue 1 inst per cycle
+-gpgpu_max_insn_issue_per_warp 1
+-gpgpu_dual_issue_diff_exec_units 1
+
+## L1/shared memory configuration
+# ::,::::,::,:**
+# ** Optional parameter - Required when mshr_type==Texture Fifo
+# Defualt config is 32KB DL1 and 96KB shared memory
+# In Volta, we assign the remaining shared memory to L1 cache
+# if the assigned shd mem = 0, then L1 cache = 128KB
+# For more info, see https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory-7-x
+# disable this mode in case of multi kernels/apps execution
+-gpgpu_adaptive_cache_config 1
+# Volta unified cache has four banks
+-gpgpu_l1_banks 4
+-gpgpu_cache:dl1 S:1:128:256,L:L:s:N:L,A:256:8,16:0,32
+-gpgpu_shmem_size 98304
+-gpgpu_shmem_sizeDefault 98304
+-gpgpu_shmem_per_block 65536
+-gpgpu_gmem_skip_L1D 0
+-gpgpu_n_cluster_ejection_buffer_size 32
+-gpgpu_l1_latency 20
+-gpgpu_smem_latency 20
+-gpgpu_flush_l1_cache 1
+
+# 32 sets, each 128 bytes 24-way for each memory sub partition (96 KB per memory sub partition). This gives us 6MB L2 cache
+-gpgpu_cache:dl2 S:32:128:24,L:B:m:L:P,A:192:4,32:0,32
+-gpgpu_cache:dl2_texture_only 0
+-gpgpu_dram_partition_queues 64:64:64:64
+-gpgpu_perf_sim_memcpy 1
+-gpgpu_memory_partition_indexing 2
+
+# 128 KB Inst.
+-gpgpu_cache:il1 N:64:128:16,L:R:f:N:L,S:2:48,4
+-gpgpu_inst_fetch_throughput 4
+# 128 KB Tex
+# Note, TEX is deprected in Volta, It is used for legacy apps only. Use L1D cache instead with .nc modifier or __ldg mehtod
+-gpgpu_tex_cache:l1 N:4:128:256,L:R:m:N:L,T:512:8,128:2
+# 64 KB Const
+-gpgpu_const_cache:l1 N:128:64:8,L:R:f:N:L,S:2:64,4
+-gpgpu_perfect_inst_const_cache 1
+
+# interconnection
+#-network_mode 1
+#-inter_config_file config_volta_islip.icnt
+# use built-in local xbar
+-network_mode 2
+-icnt_in_buffer_limit 512
+-icnt_out_buffer_limit 512
+-icnt_subnets 2
+-icnt_flit_size 40
+-icnt_arbiter_algo 1
+
+# memory partition latency config
+-gpgpu_l2_rop_latency 160
+-dram_latency 100
+
+# dram model config
+-gpgpu_dram_scheduler 1
+-gpgpu_frfcfs_dram_sched_queue_size 64
+-gpgpu_dram_return_queue_size 192
+
+# for HBM, three stacks, 24 channles, each (128 bits) 16 bytes width
+-gpgpu_n_mem_per_ctrlr 1
+-gpgpu_dram_buswidth 16
+-gpgpu_dram_burst_length 2
+-dram_data_command_freq_ratio 2 # HBM is DDR
+-gpgpu_mem_address_mask 1
+-gpgpu_mem_addr_mapping dramid@8;00000000.00000000.00000000.00000000.0000RRRR.RRRRRRRR.RBBBCCCB.CCCSSSSS
+
+# HBM timing are adopted from hynix JESD235 standered and nVidia HPCA 2017 paper (http://www.cs.utah.edu/~nil/pubs/hpca17.pdf)
+# Timing for 1 GHZ
+# tRRDl and tWTR are missing, need to be added
+#-gpgpu_dram_timing_opt "nbk=16:CCD=1:RRD=4:RCD=14:RAS=33:RP=14:RC=47:
+# CL=14:WL=2:CDLR=3:WR=12:nbkgrp=4:CCDL=2:RTPL=4"
+
+# Timing for 850 MHZ, V100 HBM runs at 850 MHZ
+-gpgpu_dram_timing_opt "nbk=16:CCD=1:RRD=3:RCD=12:RAS=28:RP=12:RC=40:
+ CL=12:WL=2:CDLR=3:WR=10:nbkgrp=4:CCDL=2:RTPL=3"
+
+# HBM has dual bus interface, in which it can issue two col and row commands at a time
+-dram_dual_bus_interface 1
+# select lower bits for bnkgrp to increase bnkgrp parallelism
+-dram_bnk_indexing_policy 0
+-dram_bnkgrp_indexing_policy 1
+
+#-dram_seperate_write_queue_enable 1
+#-dram_write_queue_size 64:56:32
+
+# stat collection
+-gpgpu_memlatency_stat 14
+-gpgpu_runtime_stat 500
+-enable_ptx_file_line_stats 1
+-visualizer_enabled 0
+
+# power model configs, disable it untill we create a real energy model for Volta
+-power_simulation_enabled 0
+
+# tracing functionality
+#-trace_enabled 1
+#-trace_components WARP_SCHEDULER,SCOREBOARD,LIVENESS
+#-trace_sampling_core 0
+
+### items for functional and timing simulation of UVM ###
+
+# gddr size should be less than or equal to 1GB, in the unit of MB/GB
+-gddr_size 1GB
+
+# size of gddr page, only 4KB and 2MB available
+-page_size 4KB
+
+# number of tlb entries per SM
+-tlb_size 4096
+
+# average page table walk latency (in core cycle)
+# for 4K page, set to 100 and for 2M page, set to 66
+-page_table_walk_latency 100
+
+# page eviction policy
+# 0 - lru 2MB (default)
+# 1 - lru tree-based neighborhood
+# 2 - lru sequential locality 64K
+# 3 - random 4KB
+# 4 - LFU 2MB
+# 5 - lru 4KB
+-eviction_policy 0
+
+# invalidate clean pages directly instead of writing back
+-invalidate_clean 0
+
+# reserve percentage (e.g. 10 or 20) of accesses pages from eviction in hope that they will be accessed in next iteration
+-reserve_accessed_page_percent 0
+
+# percentage of free page buffer to trigger the page eviction (e.g. 5 or 10)
+-percentage_of_free_page_buffer 0
+
+# pcie bandwidth per direction
+-pcie_bandwidth 16.0GB/s
+
+# enable/disable GMMU statistics profiling for UVM
+-sim_prof_enable 1
+
+# disable deadlock check for UVM
+-gpgpu_deadlock_detect 0
+
+# latency to process kernel launch (150us or 222150 core cycles)
+#-gpgpu_kernel_launch_latency 222150
+
+# hardware prefetcher
+# 0 - disabled
+# 1 - tree-based neighborhood (default)
+# 2 - sequential locality 64K
+# 3 - random 4 K
+-hardware_prefetch 1
+
+# hardware prefetcher under over-subscription
+# 0 - disable upon eviction (default)
+# 1 - tree-based neighborhood
+# 2 - sequential locality 64K
+# 3 - random 4 K
+-hwprefetch_oversub 1
+
+# latency in core cycle to handle page fault (45us)
+# encompass the overhead of stalling threads, deciding memory address, page table walk, maintaining page flags, transfer chunks and orders
+-page_fault_latency 66645
+
+# enabling accurate simulation for stalling warps and serializing accesses for page fault handling (default 0)
+-enable_accurate_simulation 0
+
+# Enable direct CPU-memory access from GPU
+# 0 - disable
+# 1 - adaptive
+# 2 - always
+# 3 - after oversubscription
+-enable_dma 0
+
+# Access counter threshold for migrating the page from cpu to gpu
+-migrate_threshold 8
+
+# Oversubscription Multiplicative Penalty Factor for Adaptive DMA
+-multiply_dma_penalty 2
+
+# enabling access pattern detection, policy engine, and adaptive memory management
+-enable_smart_runtime 0
+
+# enabling skip cycles when all warps stall and wait for page fault come back
+-skip_cycles_enable 1
diff --git a/configs/tested-cfgs/SM6_TITANX/gpgpusim.config b/configs/tested-cfgs/SM6_TITANX/gpgpusim.config
index 882630e76..7d3e2d47e 100644
--- a/configs/tested-cfgs/SM6_TITANX/gpgpusim.config
+++ b/configs/tested-cfgs/SM6_TITANX/gpgpusim.config
@@ -123,8 +123,8 @@
-gpgpu_dual_issue_diff_exec_units 1
## L1/shared memory configuration
-# :::,::::,::,:**,
-# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
+# :::,::::,::,:**
+# ** Optional parameter - Required when mshr_type==Texture Fifo
# Note: Hashing set index function (H) only applies to a set size of 32 or 64.
# The defulat is to disable the L1 cache, unless cache modifieres are used
-gpgpu_l1_banks 2
diff --git a/configs/tested-cfgs/SM75_RTX2060/gpgpusim.config b/configs/tested-cfgs/SM75_RTX2060/gpgpusim.config
index 8cc3ed6bf..6ff4b6c08 100644
--- a/configs/tested-cfgs/SM75_RTX2060/gpgpusim.config
+++ b/configs/tested-cfgs/SM75_RTX2060/gpgpusim.config
@@ -83,8 +83,8 @@
-gpgpu_dual_issue_diff_exec_units 1
## L1/shared memory configuration
-# :::,::::,::,:**,
-# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
+# :::,::::,::,:**
+# ** Optional parameter - Required when mshr_type==Texture Fifo
# In adaptive cache, we adaptively assign the remaining shared memory to L1 cache
# For more info, see https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory-7-x
-gpgpu_adaptive_cache_config 1
diff --git a/configs/tested-cfgs/SM75_RTX2060_S/gpgpusim.config b/configs/tested-cfgs/SM75_RTX2060_S/gpgpusim.config
index 290c08d6c..08ac75277 100644
--- a/configs/tested-cfgs/SM75_RTX2060_S/gpgpusim.config
+++ b/configs/tested-cfgs/SM75_RTX2060_S/gpgpusim.config
@@ -128,8 +128,8 @@
-gpgpu_num_reg_banks 16
-gpgpu_reg_file_port_throughput 2
-# :::,::::,::,:**,
-# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
+# :::,::::,::,:**
+# ** Optional parameter - Required when mshr_type==Texture Fifo
-gpgpu_adaptive_cache_config 0
-gpgpu_l1_banks 4
-gpgpu_cache:dl1 S:1:128:512,L:L:s:N:L,A:256:8,16:0,32
diff --git a/configs/tested-cfgs/SM7_GV100/gpgpusim.config b/configs/tested-cfgs/SM7_GV100/gpgpusim.config
index 4887be8cf..26ce0eb58 100644
--- a/configs/tested-cfgs/SM7_GV100/gpgpusim.config
+++ b/configs/tested-cfgs/SM7_GV100/gpgpusim.config
@@ -137,8 +137,8 @@
-gpgpu_dual_issue_diff_exec_units 1
## L1/shared memory configuration
-# :::,::::,::,:**,
-# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
+# :::,::::,::,:**
+# ** Optional parameter - Required when mshr_type==Texture Fifo
# Defualt config is 32KB DL1 and 96KB shared memory
# In Volta, we assign the remaining shared memory to L1 cache
# if the assigned shd mem = 0, then L1 cache = 128KB
diff --git a/configs/tested-cfgs/SM7_QV100/gpgpusim.config b/configs/tested-cfgs/SM7_QV100/gpgpusim.config
index a14ae7567..b3384afcb 100644
--- a/configs/tested-cfgs/SM7_QV100/gpgpusim.config
+++ b/configs/tested-cfgs/SM7_QV100/gpgpusim.config
@@ -137,8 +137,8 @@
-gpgpu_dual_issue_diff_exec_units 1
## L1/shared memory configuration
-# :::,::::,::,:**,
-# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
+# :::,::::,::,:**
+# ** Optional parameter - Required when mshr_type==Texture Fifo
# Defualt config is 32KB DL1 and 96KB shared memory
# In Volta, we assign the remaining shared memory to L1 cache
# if the assigned shd mem = 0, then L1 cache = 128KB
diff --git a/configs/tested-cfgs/SM7_TITANV/gpgpusim.config b/configs/tested-cfgs/SM7_TITANV/gpgpusim.config
index b48f37006..c37aaf053 100644
--- a/configs/tested-cfgs/SM7_TITANV/gpgpusim.config
+++ b/configs/tested-cfgs/SM7_TITANV/gpgpusim.config
@@ -107,8 +107,8 @@
-gpgpu_dual_issue_diff_exec_units 1
## L1/shared memory configuration
-# :::,::::,::,:**,
-# ** Optional parameter - Required when mshr_type==Texture Fifo, set to 0 if not used
+# :::,::::,::,:**
+# ** Optional parameter - Required when mshr_type==Texture Fifo
# Defualt config is 32KB DL1 and 96KB shared memory
# In Volta, we assign the remaining shared memory to L1 cache
# if the assigned shd mem = 0, then L1 cache = 128KB
diff --git a/configs/tested-cfgs/SM86_RTX3070/gpgpusim.config b/configs/tested-cfgs/SM86_RTX3070/gpgpusim.config
index c884541bb..d26b1a621 100644
--- a/configs/tested-cfgs/SM86_RTX3070/gpgpusim.config
+++ b/configs/tested-cfgs/SM86_RTX3070/gpgpusim.config
@@ -83,8 +83,8 @@
-gpgpu_dual_issue_diff_exec_units 1
## L1/shared memory configuration
-# :::,: