Skip to content

Commit 3437c80

Browse files
Rewrite assembly to not need codegen
The original assembly relied on a build-time generated "gen_defines.asm" that contains various values and struct offsets needed by the assembly. This creates a lot of friction, as this build step must account for the numerous build environments and configurations that are possible, on top of needing an alternative path to handle cross-compilation. By modifying the assembly to no use extern variables, asm_offsets.c can contain all the necessary values as a compile-time value, such that at link time the values are available to the assembler. This does slightly increase runtime as we are replacing constants with variables, but the added benefits of simplifying the build step outweighs it. Doing the above required splitting the marmasm into separate files, one for 32 and the other for 64 bit. This is because there is no straight forward support for multiple architectures in a single file. Without codegen, determining which architecture to use was easiest by creating separate files.
1 parent 40fdef4 commit 3437c80

9 files changed

Lines changed: 1197 additions & 557 deletions

loader/CMakeLists.txt

Lines changed: 7 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ endif()
6868
set(NORMAL_LOADER_SRCS
6969
allocation.c
7070
allocation.h
71+
asm_offsets.c
7172
cJSON.c
7273
cJSON.h
7374
debug_utils.c
@@ -215,30 +216,12 @@ end
215216
endif()
216217

217218
if(ASM_COMPILER_WORKS)
218-
add_executable(asm_offset asm_offset.c)
219-
target_link_libraries(asm_offset PRIVATE loader_specific_options)
220-
# If am emulator is provided (Like Wine), or running on native, run asm_offset to generate gen_defines.asm
221-
if (CMAKE_CROSSCOMPILING_EMULATOR OR NOT CMAKE_CROSSCOMPILING)
222-
add_custom_command(OUTPUT gen_defines.asm DEPENDS asm_offset COMMAND asm_offset ${LOADER_ASM_DIALECT})
223-
else()
224-
# Forces compiler to write the intermediate asm file, needed so that we can get sizeof/offset of info out of it.
225-
target_compile_options(asm_offset PRIVATE "/Fa$<TARGET_FILE_DIR:asm_offset>/asm_offset.asm" /FA)
226-
# Force off optimization so that the output assembly includes all the necessary info - optimizer would get rid of it otherwise.
227-
target_compile_options(asm_offset PRIVATE /Od)
228-
229-
find_package(Python3 REQUIRED QUIET)
230-
# Run parse_asm_values.py on asm_offset's assembly file to generate the gen_defines.asm, which the asm code depends on
231-
add_custom_command(TARGET asm_offset POST_BUILD
232-
COMMAND Python3::Interpreter ${PROJECT_SOURCE_DIR}/scripts/parse_asm_values.py "${CMAKE_CURRENT_BINARY_DIR}/gen_defines.asm"
233-
"$<TARGET_FILE_DIR:asm_offset>/asm_offset.asm" "${LOADER_ASM_DIALECT}" "${CMAKE_C_COMPILER_ID}" "${SYSTEM_PROCESSOR}"
234-
BYPRODUCTS gen_defines.asm
235-
)
236-
endif()
237-
add_custom_target(loader_asm_gen_files DEPENDS gen_defines.asm)
238-
set_target_properties(loader_asm_gen_files PROPERTIES FOLDER ${LOADER_HELPER_FOLDER})
239-
240219
if(SYSTEM_PROCESSOR MATCHES "arm")
241-
list(APPEND OPT_LOADER_SRCS unknown_ext_chain_marmasm.asm)
220+
if(SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
221+
list(APPEND OPT_LOADER_SRCS unknown_ext_chain_marmasm64.asm)
222+
else()
223+
list(APPEND OPT_LOADER_SRCS unknown_ext_chain_marmasm32.asm)
224+
endif()
242225
else()
243226
list(APPEND OPT_LOADER_SRCS unknown_ext_chain_masm.asm)
244227
endif()
@@ -296,67 +279,14 @@ elseif(UNIX OR MINGW OR (WIN32 AND USE_GAS)) # i.e.: Linux & Apple & MinGW & Win
296279
endif()
297280
endif()
298281

299-
# When compiling for x86 on x64, we can't use CMAKE_SYSTEM_PROCESSOR to determine which architecture to use,
300-
# Instead, check the size of void* and if its 4, set ASM_OFFSET_SYSTEM_PROCESSOR to x86 if we aren't on arm
301-
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
302-
set(ASM_OFFSET_SYSTEM_PROCESSOR ${SYSTEM_PROCESSOR}) # x86_64 or aarch64/arm64
303-
string(REPLACE amd64 x86_64 ASM_OFFSET_SYSTEM_PROCESSOR "${ASM_OFFSET_SYSTEM_PROCESSOR}")
304-
else()
305-
if(${SYSTEM_PROCESSOR} MATCHES "86")
306-
set(ASM_OFFSET_SYSTEM_PROCESSOR "x86")
307-
else()
308-
set(ASM_OFFSET_SYSTEM_PROCESSOR ${SYSTEM_PROCESSOR})
309-
endif()
310-
endif()
311-
312282
if(ASSEMBLER_WORKS)
313-
add_executable(asm_offset asm_offset.c)
314-
target_link_libraries(asm_offset loader_specific_options)
315-
# If not cross compiling, run asm_offset to generate gen_defines.asm
316-
if (NOT CMAKE_CROSSCOMPILING)
317-
add_custom_command(OUTPUT gen_defines.asm DEPENDS asm_offset COMMAND asm_offset GAS)
318-
else()
319-
# Forces compiler to write the intermediate asm file, needed so that we can get sizeof/offset of info out of it.
320-
# If with lto, compiler will output IR instead of asm, so we need to explicitly disable lto here.
321-
if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
322-
target_compile_options(asm_offset PRIVATE -save-temps=obj -fno-lto)
323-
elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
324-
target_compile_options(asm_offset PRIVATE -save-temps=obj -fno-lto -fno-whole-program-vtables -fno-virtual-function-elimination)
325-
else()
326-
target_compile_options(asm_offset PRIVATE -save-temps=obj)
327-
endif()
328-
if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
329-
set(ASM_OFFSET_EXECUTABLE_LOCATION "$<TARGET_FILE_DIR:asm_offset>/gen_defines.asm")
330-
set(ASM_OFFSET_INTERMEDIATE_LOCATION "$<TARGET_FILE_DIR:asm_offset>/CMakeFiles/asm_offset.dir/asm_offset.c.s")
331-
elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
332-
set(ASM_OFFSET_EXECUTABLE_LOCATION "$<TARGET_FILE_DIR:asm_offset>/gen_defines.asm")
333-
set(ASM_OFFSET_INTERMEDIATE_LOCATION "$<TARGET_FILE_DIR:asm_offset>/CMakeFiles/asm_offset.dir/asm_offset.s")
334-
elseif(CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
335-
# Need to use the current binary dir since the asm_offset.s file is in that folder rather than the bundle
336-
set(ASM_OFFSET_EXECUTABLE_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/gen_defines.asm")
337-
set(ASM_OFFSET_INTERMEDIATE_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/asm_offset.dir/asm_offset.s")
338-
else()
339-
message(FATAL_ERROR "C_COMPILER_ID not supported!")
340-
endif()
341-
message(STATUS "CMAKE_CROSSCOMPILING FALSE")
342-
343-
find_package(Python3 REQUIRED QUIET)
344-
# Run parse_asm_values.py on asm_offset's assembly file to generate the gen_defines.asm, which the asm code depends on
345-
add_custom_command(TARGET asm_offset POST_BUILD
346-
COMMAND Python3::Interpreter ${PROJECT_SOURCE_DIR}/scripts/parse_asm_values.py "${ASM_OFFSET_EXECUTABLE_LOCATION}"
347-
"${ASM_OFFSET_INTERMEDIATE_LOCATION}" "GAS" "${CMAKE_C_COMPILER_ID}" "${ASM_OFFSET_SYSTEM_PROCESSOR}"
348-
BYPRODUCTS gen_defines.asm
349-
)
350-
endif()
351-
add_custom_target(loader_asm_gen_files DEPENDS gen_defines.asm)
352-
353283
if (APPLE)
354284
set(MODIFY_UNKNOWN_FUNCTION_DECLS ON)
355285
endif()
356286
set(UNKNOWN_FUNCTIONS_SUPPORTED ON)
357287
else()
358288
if(USE_GAS)
359-
message(WARNING "Could not find working ${ASM_OFFSET_SYSTEM_PROCESSOR} GAS assembler\n${ASM_FAILURE_MSG}")
289+
message(WARNING "Could not find working ${SYSTEM_PROCESSOR} GAS assembler\n${ASM_FAILURE_MSG}")
360290
else()
361291
message(WARNING "Assembly sources have been disabled\n${ASM_FAILURE_MSG}")
362292
endif()
@@ -455,10 +385,6 @@ else()
455385
add_library(vulkan-framework SHARED)
456386
target_sources(vulkan-framework PRIVATE ${NORMAL_LOADER_SRCS} ${FRAMEWORK_HEADERS})
457387

458-
if (UNKNOWN_FUNCTIONS_SUPPORTED)
459-
add_dependencies(vulkan-framework loader_asm_gen_files)
460-
endif()
461-
462388
target_link_libraries(vulkan-framework ${CMAKE_DL_LIBS} Threads::Threads -lm "-framework CoreFoundation")
463389
target_link_libraries(vulkan-framework loader_specific_options)
464390

@@ -514,7 +440,6 @@ add_library(Vulkan::Loader ALIAS vulkan)
514440

515441
if (UNKNOWN_FUNCTIONS_SUPPORTED)
516442
target_compile_definitions(vulkan PRIVATE UNKNOWN_FUNCTIONS_SUPPORTED)
517-
add_dependencies(vulkan loader_asm_gen_files)
518443
endif()
519444

520445
if (BUILD_TESTS)

loader/asm_offset.c

Lines changed: 0 additions & 172 deletions
This file was deleted.

loader/asm_offsets.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright (c) 2017-2024 The Khronos Group Inc.
3+
* Copyright (c) 2017-2024 Valve Corporation
4+
* Copyright (c) 2017-2024 LunarG, Inc.
5+
* Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
6+
*
7+
* Licensed under the Apache License, Version 2.0 (the "License");
8+
* you may not use this file except in compliance with the License.
9+
* You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
*
19+
* Author: Charles Giessen <charles@lunarg.com>
20+
*/
21+
22+
#include <stddef.h>
23+
#include "loader_common.h"
24+
#include "log.h"
25+
26+
#if VK_USE_64_BIT_PTR_DEFINES == 1
27+
#define INT_TYPE uint64_t
28+
#else
29+
#define INT_TYPE uint32_t
30+
#endif
31+
32+
// Apple's ABI is to prefix the symbol with an underscore. Because we are using the symbols in assembly, we don't want to have to
33+
// worry about it. The fix is to use `foo __asm__("foo")` in order to change the symbol name as it appears to the linker.
34+
#if defined(__APPLE__)
35+
#define DEF(x) x __asm__(#x)
36+
#else
37+
#define DEF(x) x
38+
#endif
39+
40+
const INT_TYPE DEF(VULKAN_LOADER_ERROR_BIT_VALUE) = VULKAN_LOADER_ERROR_BIT;
41+
const INT_TYPE DEF(FUNCTION_OFFSET_INSTANCE) = offsetof(struct loader_instance, phys_dev_ext_disp_functions);
42+
const INT_TYPE DEF(PHYS_DEV_OFFSET_INST_DISPATCH) = offsetof(struct loader_instance_dispatch_table, phys_dev_ext);
43+
const INT_TYPE DEF(PHYS_DEV_OFFSET_PHYS_DEV_TRAMP) = offsetof(struct loader_physical_device_tramp, phys_dev);
44+
const INT_TYPE DEF(ICD_TERM_OFFSET_PHYS_DEV_TERM) = offsetof(struct loader_physical_device_term, this_icd_term);
45+
const INT_TYPE DEF(PHYS_DEV_OFFSET_PHYS_DEV_TERM) = offsetof(struct loader_physical_device_term, phys_dev);
46+
const INT_TYPE DEF(INSTANCE_OFFSET_ICD_TERM) = offsetof(struct loader_icd_term, this_instance);
47+
const INT_TYPE DEF(DISPATCH_OFFSET_ICD_TERM) = offsetof(struct loader_icd_term, phys_dev_ext);
48+
const INT_TYPE DEF(EXT_OFFSET_DEVICE_DISPATCH) = offsetof(struct loader_dev_dispatch_table, ext_dispatch);

0 commit comments

Comments
 (0)