Skip to content

Commit 1c89c79

Browse files
committed
Rename PyInit syms to avoid clashes
Built extensions in packages often have common names like speedups, utils, _objects, cpython, etc. which reside inside the package namespace. The compiled extensions each have a PyInit_<module> which needs to be renamed to PyInit_<pkg>_<module> to avoid clashes when combined into a static binary. Fixes #169
1 parent 38179ef commit 1c89c79

5 files changed

Lines changed: 193 additions & 5 deletions

File tree

ci/azure-pipelines-template.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@ jobs:
4949

5050
- ${{ if ne(parameters.name, 'Windows') }}:
5151
- script: |
52-
cargo run --bin pyoxidizer -- init --python-code 'print("hello, world")' ~/pyapp
52+
cargo run --bin pyoxidizer -- init --pip-install markupsafe --pip-install simplejson --python-code 'import markupsafe; import simplejson; print("hello, world")' ~/pyapp
5353
cargo run --bin pyoxidizer -- run ~/pyapp
5454
displayName: Build Oxidized Application
5555
5656
- ${{ if eq(parameters.name, 'Windows') }}:
5757
- script: |
58-
cargo run --bin pyoxidizer -- init --python-code 'print("hello, world")' %HOME%/pyapp
58+
cargo run --bin pyoxidizer -- init --pip-install markupsafe --pip-install simplejson --python-code 'print("hello, world")' %HOME%/pyapp
5959
cargo run --bin pyoxidizer -- run %HOME%/pyapp
6060
displayName: Build Oxidized Application (Windows)

pyoxidizer/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ hex = "0.3"
3636
itertools = "0.8"
3737
lazy_static = "1.3"
3838
libc = "0.2"
39+
object = { version = "0.16.0", features = ["read", "std", "write"] }
3940
regex = "1"
4041
reqwest = "0.9"
4142
rustc_version = "0.2"

pyoxidizer/src/py_packaging/libpython.rs

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::path::{Path, PathBuf};
1313
use super::bytecode::{BytecodeCompiler, CompileMode};
1414
use super::distribution::{ExtensionModule, LicenseInfo, ParsedPythonDistribution};
1515
use super::embedded_resource::EmbeddedPythonResources;
16+
use super::object::rename_init;
1617
use super::resource::BuiltExtensionModule;
1718

1819
pub const PYTHON_IMPORTER: &[u8] = include_bytes!("memoryimporter.py");
@@ -107,23 +108,44 @@ pub fn make_config_c(
107108
}
108109

109110
for em in built_extension_modules.values() {
110-
lines.push(format!("extern PyObject* {}(void);", em.init_fn));
111+
let ambiguous_line = format!("extern PyObject* {}(void);", em.init_fn);
112+
113+
if lines.contains(&ambiguous_line) {
114+
lines.push(format!(
115+
"extern PyObject* PyInit_{}(void);",
116+
em.name.replace(".", "_")
117+
));
118+
} else {
119+
lines.push(ambiguous_line);
120+
}
111121
}
112122

113123
lines.push(String::from("struct _inittab _PyImport_Inittab[] = {"));
114124

125+
let mut ambiguous_init_fns: Vec<String> = Vec::new();
126+
115127
for em in extension_modules.values() {
116128
if let Some(init_fn) = &em.init_fn {
117129
if init_fn == "NULL" {
118130
continue;
119131
}
120132

121133
lines.push(format!("{{\"{}\", {}}},", em.module, init_fn));
134+
ambiguous_init_fns.push(init_fn.to_string());
122135
}
123136
}
124137

125138
for em in built_extension_modules.values() {
126-
lines.push(format!("{{\"{}\", {}}},", em.name, em.init_fn));
139+
if ambiguous_init_fns.contains(&em.init_fn) {
140+
lines.push(format!(
141+
"{{\"{}\", PyInit_{}}},",
142+
em.name,
143+
em.name.replace(".", "_")
144+
));
145+
} else {
146+
lines.push(format!("{{\"{}\", {}}},", em.name, em.init_fn));
147+
ambiguous_init_fns.push(em.init_fn.clone());
148+
}
127149
}
128150

129151
lines.push(String::from("{0, 0}"));
@@ -265,12 +287,20 @@ pub fn link_libpython(
265287
// TODO handle static/dynamic libraries.
266288
}
267289

290+
let mut ambiguous_init_fns: Vec<String> = Vec::new();
291+
268292
warn!(
269293
logger,
270294
"resolving inputs for {} extension modules...",
271295
extension_modules.len() + built_extension_modules.len()
272296
);
273297
for (name, em) in extension_modules {
298+
if let Some(init_fn) = &em.init_fn {
299+
if init_fn != "NULL" {
300+
ambiguous_init_fns.push(init_fn.to_string());
301+
}
302+
}
303+
274304
if em.builtin_default {
275305
continue;
276306
}
@@ -319,10 +349,21 @@ pub fn link_libpython(
319349
em.object_file_data.len(),
320350
name
321351
);
352+
322353
for (i, object_data) in em.object_file_data.iter().enumerate() {
323354
let out_path = temp_dir_path.join(format!("{}.{}.o", name, i));
324355

325-
fs::write(&out_path, object_data).expect("unable to write object file");
356+
if i == em.object_file_data.len() - 1 && ambiguous_init_fns.contains(&em.init_fn) {
357+
match rename_init(logger, name, object_data) {
358+
Ok(val) => fs::write(&out_path, val).expect("unable to write object file"),
359+
Err(_) => {
360+
fs::write(&out_path, object_data).expect("unable to write object file")
361+
}
362+
};
363+
} else {
364+
fs::write(&out_path, object_data).expect("unable to write object file");
365+
}
366+
326367
build.object(&out_path);
327368
}
328369

@@ -331,6 +372,10 @@ pub fn link_libpython(
331372
needed_libraries_external.insert(&library);
332373
}
333374

375+
if !ambiguous_init_fns.contains(&em.init_fn) {
376+
ambiguous_init_fns.push(em.init_fn.clone());
377+
}
378+
334379
// TODO do something with library_dirs.
335380
}
336381

pyoxidizer/src/py_packaging/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ pub mod distutils;
99
pub mod embedded_resource;
1010
pub mod fsscan;
1111
pub mod libpython;
12+
pub mod object;
1213
pub mod pyembed;
1314
pub mod resource;
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
use object::{write, Object, ObjectSection, RelocationTarget, SectionKind, SymbolKind};
6+
use slog::{info, warn};
7+
use std::collections::HashMap;
8+
use std::error::Error;
9+
use std::fmt;
10+
11+
#[derive(Debug, Clone)]
12+
pub struct NoRewriteError;
13+
14+
impl fmt::Display for NoRewriteError {
15+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
16+
write!(f, "no object rewriting was performed")
17+
}
18+
}
19+
20+
impl Error for NoRewriteError {
21+
fn source(&self) -> Option<&(dyn Error + 'static)> {
22+
// Generic error, underlying cause isn't tracked.
23+
None
24+
}
25+
}
26+
27+
/// Rename object syn PyInit_foo to PyInit_<full_name> to avoid clashes
28+
pub fn rename_init(
29+
logger: &slog::Logger,
30+
name: &String,
31+
object_data: &[u8],
32+
) -> Result<Vec<u8>, NoRewriteError> {
33+
let mut rewritten = false;
34+
35+
let name_prefix = name.split('.').next().unwrap();
36+
37+
let in_object = match object::File::parse(object_data) {
38+
Ok(object) => object,
39+
Err(err) => {
40+
warn!(logger, "Failed to parse compiled object for {}: {}", name, err);
41+
return Err(NoRewriteError);
42+
}
43+
};
44+
45+
let mut out_object = write::Object::new(in_object.format(), in_object.architecture());
46+
47+
let mut out_sections = HashMap::new();
48+
for in_section in in_object.sections() {
49+
if in_section.kind() == SectionKind::Metadata {
50+
continue;
51+
}
52+
let section_id = out_object.add_section(
53+
in_section.segment_name().unwrap_or("").as_bytes().to_vec(),
54+
in_section.name().unwrap_or("").as_bytes().to_vec(),
55+
in_section.kind(),
56+
);
57+
let out_section = out_object.section_mut(section_id);
58+
if out_section.is_bss() {
59+
out_section.append_bss(in_section.size(), in_section.align());
60+
} else {
61+
out_section.set_data(in_section.uncompressed_data().into(), in_section.align());
62+
}
63+
out_sections.insert(in_section.index(), section_id);
64+
}
65+
66+
let mut out_symbols = HashMap::new();
67+
for (symbol_index, in_symbol) in in_object.symbols() {
68+
if in_symbol.kind() == SymbolKind::Null {
69+
continue;
70+
}
71+
let (section, value) = match in_symbol.section_index() {
72+
Some(index) => (
73+
Some(*out_sections.get(&index).unwrap()),
74+
in_symbol.address() - in_object.section_by_index(index).unwrap().address(),
75+
),
76+
None => (None, in_symbol.address()),
77+
};
78+
let in_sym_name = in_symbol.name().unwrap_or("");
79+
let sym_name = if in_sym_name.contains("PyInit_") && !in_sym_name.contains(name_prefix) {
80+
match out_object.mangling.global_prefix() {
81+
Some(prefix) => format!("{}PyInit_{}", prefix as char, name.replace(".", "_")),
82+
None => format!("PyInit_{}", name.replace(".", "_"))
83+
}
84+
} else {
85+
String::from(in_sym_name)
86+
};
87+
if sym_name != in_sym_name {
88+
warn!(
89+
logger,
90+
"rewrote object symbol name {} to {}", in_sym_name, sym_name,
91+
);
92+
93+
rewritten = true;
94+
}
95+
96+
let out_symbol = write::Symbol {
97+
name: sym_name.as_bytes().to_vec(),
98+
value,
99+
size: in_symbol.size(),
100+
kind: in_symbol.kind(),
101+
scope: in_symbol.scope(),
102+
weak: in_symbol.is_weak(),
103+
section,
104+
};
105+
let symbol_id = out_object.add_symbol(out_symbol);
106+
out_symbols.insert(symbol_index, symbol_id);
107+
}
108+
109+
if !rewritten {
110+
info!(logger, "no symbol name rewriting occurred for {}", name,);
111+
return Err(NoRewriteError);
112+
}
113+
114+
for in_section in in_object.sections() {
115+
if in_section.kind() == SectionKind::Metadata {
116+
continue;
117+
}
118+
let out_section = *out_sections.get(&in_section.index()).unwrap();
119+
for (offset, in_relocation) in in_section.relocations() {
120+
let symbol = match in_relocation.target() {
121+
RelocationTarget::Symbol(symbol) => *out_symbols.get(&symbol).unwrap(),
122+
RelocationTarget::Section(section) => {
123+
out_object.section_symbol(*out_sections.get(&section).unwrap())
124+
}
125+
};
126+
let out_relocation = write::Relocation {
127+
offset,
128+
size: in_relocation.size(),
129+
kind: in_relocation.kind(),
130+
encoding: in_relocation.encoding(),
131+
symbol,
132+
addend: in_relocation.addend(),
133+
};
134+
out_object
135+
.add_relocation(out_section, out_relocation)
136+
.unwrap();
137+
}
138+
}
139+
140+
Ok(out_object.write().unwrap())
141+
}

0 commit comments

Comments
 (0)