Skip to content

Commit 0ca173b

Browse files
committed
Rename PyInit syms to avoid clashes
Built extensions in packages often have common names like speedups, utils, _objects, cpython, etc. which reside inside the package namespace. The compiled extensions each have a PyInit_<module> which needs to be renamed to PyInit_<pkg>_<module> to avoid clashes when combined into a static binary. Fixes #169
1 parent c99b271 commit 0ca173b

6 files changed

Lines changed: 233 additions & 5 deletions

File tree

ci/azure-pipelines-template.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@ jobs:
4949

5050
- ${{ if ne(parameters.name, 'Windows') }}:
5151
- script: |
52-
cargo run --bin pyoxidizer -- init --pip-install appdirs==1.4.3 --pip-install zero-buffer==0.5.1 ~/pyapp
52+
cargo run --bin pyoxidizer -- init --pip-install appdirs==1.4.3 --pip-install zero-buffer==0.5.1 --pip-install markupsafe==1.1.1 --pip-install simplejson==3.17.0 ~/pyapp
5353
cat ci/pyapp.py | cargo run --bin pyoxidizer -- run ~/pyapp
5454
displayName: Build Oxidized Application
5555
5656
- ${{ if eq(parameters.name, 'Windows') }}:
5757
- script: |
58-
cargo run --bin pyoxidizer -- init --pip-install appdirs==1.4.3 --pip-install zero-buffer==0.5.1 %USERPROFILE%/pyapp
58+
cargo run --bin pyoxidizer -- init --pip-install appdirs==1.4.3 --pip-install zero-buffer==0.5.1 --pip-install markupsafe==1.1.1 --pip-install simplejson==3.17.0 %USERPROFILE%/pyapp
5959
cat ci/pyapp.py | cargo run --bin pyoxidizer -- run %USERPROFILE%/pyapp
6060
displayName: Build Oxidized Application (Windows)

ci/pyapp.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,10 @@
88
except AttributeError:
99
pass
1010

11+
import markupsafe._speedups
12+
import simplejson._speedups
13+
14+
import markupsafe
15+
import simplejson
16+
1117
print("hello, world")

pyoxidizer/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ hex = "0.3"
3737
itertools = "0.8"
3838
lazy_static = "1.3"
3939
libc = "0.2"
40+
object = { version = "0.16.0", features = ["read", "std", "write"] }
4041
regex = "1"
4142
reqwest = "0.9"
4243
rustc_version = "0.2"

pyoxidizer/src/py_packaging/libpython.rs

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::path::{Path, PathBuf};
1313
use super::bytecode::{BytecodeCompiler, CompileMode};
1414
use super::distribution::{ExtensionModule, LicenseInfo, ParsedPythonDistribution};
1515
use super::embedded_resource::EmbeddedPythonResources;
16+
use super::object::rename_init;
1617
use super::resource::BuiltExtensionModule;
1718

1819
pub const PYTHON_IMPORTER: &[u8] = include_bytes!("memoryimporter.py");
@@ -107,23 +108,44 @@ pub fn make_config_c(
107108
}
108109

109110
for em in built_extension_modules.values() {
110-
lines.push(format!("extern PyObject* {}(void);", em.init_fn));
111+
let ambiguous_line = format!("extern PyObject* {}(void);", em.init_fn);
112+
113+
if lines.contains(&ambiguous_line) {
114+
lines.push(format!(
115+
"extern PyObject* PyInit_{}(void);",
116+
em.name.replace(".", "_")
117+
));
118+
} else {
119+
lines.push(ambiguous_line);
120+
}
111121
}
112122

113123
lines.push(String::from("struct _inittab _PyImport_Inittab[] = {"));
114124

125+
let mut ambiguous_init_fns: Vec<String> = Vec::new();
126+
115127
for em in extension_modules.values() {
116128
if let Some(init_fn) = &em.init_fn {
117129
if init_fn == "NULL" {
118130
continue;
119131
}
120132

121133
lines.push(format!("{{\"{}\", {}}},", em.module, init_fn));
134+
ambiguous_init_fns.push(init_fn.to_string());
122135
}
123136
}
124137

125138
for em in built_extension_modules.values() {
126-
lines.push(format!("{{\"{}\", {}}},", em.name, em.init_fn));
139+
if ambiguous_init_fns.contains(&em.init_fn) {
140+
lines.push(format!(
141+
"{{\"{}\", PyInit_{}}},",
142+
em.name,
143+
em.name.replace(".", "_")
144+
));
145+
} else {
146+
lines.push(format!("{{\"{}\", {}}},", em.name, em.init_fn));
147+
ambiguous_init_fns.push(em.init_fn.clone());
148+
}
127149
}
128150

129151
lines.push(String::from("{0, 0}"));
@@ -265,12 +287,20 @@ pub fn link_libpython(
265287
// TODO handle static/dynamic libraries.
266288
}
267289

290+
let mut ambiguous_init_fns: Vec<String> = Vec::new();
291+
268292
warn!(
269293
logger,
270294
"resolving inputs for {} extension modules...",
271295
extension_modules.len() + built_extension_modules.len()
272296
);
273297
for (name, em) in extension_modules {
298+
if let Some(init_fn) = &em.init_fn {
299+
if init_fn != "NULL" {
300+
ambiguous_init_fns.push(init_fn.to_string());
301+
}
302+
}
303+
274304
if em.builtin_default {
275305
continue;
276306
}
@@ -319,10 +349,21 @@ pub fn link_libpython(
319349
em.object_file_data.len(),
320350
name
321351
);
352+
322353
for (i, object_data) in em.object_file_data.iter().enumerate() {
323354
let out_path = temp_dir_path.join(format!("{}.{}.o", name, i));
324355

325-
fs::write(&out_path, object_data).expect("unable to write object file");
356+
if i == em.object_file_data.len() - 1 && ambiguous_init_fns.contains(&em.init_fn) {
357+
match rename_init(logger, name, object_data) {
358+
Ok(val) => fs::write(&out_path, val).expect("unable to write object file"),
359+
Err(_) => {
360+
fs::write(&out_path, object_data).expect("unable to write object file")
361+
}
362+
};
363+
} else {
364+
fs::write(&out_path, object_data).expect("unable to write object file");
365+
}
366+
326367
build.object(&out_path);
327368
}
328369

@@ -331,6 +372,10 @@ pub fn link_libpython(
331372
needed_libraries_external.insert(&library);
332373
}
333374

375+
if !ambiguous_init_fns.contains(&em.init_fn) {
376+
ambiguous_init_fns.push(em.init_fn.clone());
377+
}
378+
334379
// TODO do something with library_dirs.
335380
}
336381

pyoxidizer/src/py_packaging/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub mod distutils;
99
pub mod embedded_resource;
1010
pub mod fsscan;
1111
pub mod libpython;
12+
pub mod object;
1213
pub mod pip;
1314
pub mod pyembed;
1415
pub mod resource;
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
use object::{write, Object, ObjectSection, RelocationTarget, SectionKind, SymbolKind};
6+
use slog::{info, warn};
7+
use std::collections::HashMap;
8+
use std::error::Error;
9+
use std::fmt;
10+
11+
#[derive(Debug, Clone)]
12+
pub struct NoRewriteError;
13+
14+
impl fmt::Display for NoRewriteError {
15+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
16+
write!(f, "no object rewriting was performed")
17+
}
18+
}
19+
20+
impl Error for NoRewriteError {
21+
fn source(&self) -> Option<&(dyn Error + 'static)> {
22+
// Generic error, underlying cause isn't tracked.
23+
None
24+
}
25+
}
26+
27+
/// Rename object syn PyInit_foo to PyInit_<full_name> to avoid clashes
28+
pub fn rename_init(
29+
logger: &slog::Logger,
30+
name: &String,
31+
object_data: &[u8],
32+
) -> Result<Vec<u8>, NoRewriteError> {
33+
let mut rewritten = false;
34+
35+
let name_prefix = name.split('.').next().unwrap();
36+
37+
let in_object = match object::File::parse(object_data) {
38+
Ok(object) => object,
39+
Err(err) => {
40+
let magic = [
41+
object_data[0],
42+
object_data[1],
43+
object_data[2],
44+
object_data[3],
45+
];
46+
warn!(
47+
logger,
48+
"Failed to parse compiled object for {} (magic {:x?}): {}", name, magic, err
49+
);
50+
return Err(NoRewriteError);
51+
}
52+
};
53+
54+
let mut out_object = write::Object::new(in_object.format(), in_object.architecture());
55+
56+
let mut out_sections = HashMap::new();
57+
for in_section in in_object.sections() {
58+
if in_section.kind() == SectionKind::Metadata {
59+
continue;
60+
}
61+
let section_id = out_object.add_section(
62+
in_section.segment_name().unwrap_or("").as_bytes().to_vec(),
63+
in_section.name().unwrap_or("").as_bytes().to_vec(),
64+
in_section.kind(),
65+
);
66+
let out_section = out_object.section_mut(section_id);
67+
if out_section.is_bss() {
68+
out_section.append_bss(in_section.size(), in_section.align());
69+
} else {
70+
out_section.set_data(in_section.uncompressed_data().into(), in_section.align());
71+
}
72+
out_sections.insert(in_section.index(), section_id);
73+
}
74+
75+
let mut out_symbols = HashMap::new();
76+
for (symbol_index, in_symbol) in in_object.symbols() {
77+
if in_symbol.kind() == SymbolKind::Null {
78+
// This is normal in ELF
79+
info!(
80+
logger,
81+
"object symbol name kind 'null' discarded",
82+
);
83+
continue;
84+
}
85+
let in_sym_name = in_symbol.name().unwrap_or("");
86+
if in_symbol.kind() == SymbolKind::Unknown {
87+
warn!(
88+
logger,
89+
"object symbol name {} kind 'unknown' encountered", in_sym_name,
90+
);
91+
}
92+
let (section, value) = match in_symbol.section_index() {
93+
Some(index) => (
94+
Some(*out_sections.get(&index).unwrap()),
95+
in_symbol.address() - in_object.section_by_index(index).unwrap().address(),
96+
),
97+
None => (None, in_symbol.address()),
98+
};
99+
let sym_name = if !in_sym_name.starts_with("$")
100+
&& in_sym_name.contains("PyInit_")
101+
&& !in_sym_name.contains(name_prefix)
102+
{
103+
"PyInit_".to_string() + &name.replace(".", "_")
104+
} else {
105+
String::from(in_sym_name)
106+
};
107+
if sym_name != in_sym_name {
108+
warn!(
109+
logger,
110+
"renaming object symbol name {} to {}", in_sym_name, sym_name,
111+
);
112+
113+
rewritten = true;
114+
}
115+
116+
let out_symbol = write::Symbol {
117+
name: sym_name.as_bytes().to_vec(),
118+
value,
119+
size: in_symbol.size(),
120+
kind: in_symbol.kind(),
121+
scope: in_symbol.scope(),
122+
weak: in_symbol.is_weak(),
123+
section,
124+
};
125+
126+
let symbol_id = out_object.add_symbol(out_symbol);
127+
out_symbols.insert(symbol_index, symbol_id);
128+
info!(
129+
logger,
130+
"added object symbol name {} kind {:?}", sym_name, in_symbol,
131+
);
132+
}
133+
134+
if !rewritten {
135+
warn!(logger, "no symbol name rewriting occurred for {}", name);
136+
return Err(NoRewriteError);
137+
}
138+
139+
for in_section in in_object.sections() {
140+
if in_section.kind() == SectionKind::Metadata {
141+
continue;
142+
}
143+
let out_section = *out_sections.get(&in_section.index()).unwrap();
144+
for (offset, in_relocation) in in_section.relocations() {
145+
let symbol = match in_relocation.target() {
146+
RelocationTarget::Symbol(symbol) => *out_symbols.get(&symbol).unwrap(),
147+
RelocationTarget::Section(section) => {
148+
out_object.section_symbol(*out_sections.get(&section).unwrap())
149+
}
150+
};
151+
let out_relocation = write::Relocation {
152+
offset,
153+
size: in_relocation.size(),
154+
kind: in_relocation.kind(),
155+
encoding: in_relocation.encoding(),
156+
symbol,
157+
addend: in_relocation.addend(),
158+
};
159+
out_object
160+
.add_relocation(out_section, out_relocation)
161+
.unwrap();
162+
}
163+
}
164+
165+
info!(logger, "serialising object for {} ..", name);
166+
167+
match out_object.write() {
168+
Ok(obj) => Ok(obj),
169+
Err(err) => {
170+
warn!(logger, "object {} serialisation failed: {}", name, err);
171+
172+
Err(NoRewriteError)
173+
}
174+
}
175+
}

0 commit comments

Comments
 (0)