Skip to content

Commit c3034d3

Browse files
EliahKaganclaude
andcommitted
feat: include workspace members with different license or authorship
Workspace members that share the root package's license and author set are covered by the top-level LICENSE-MIT / LICENSE-APACHE files and need no separate attribution. But some workspace crates have a different license (`gix-imara-diff` and `gix-imara-diff-01` are vendored from upstream under Apache-2.0, not the root's MIT-or-Apache-2.0) or different authors (`gix-config` by Edward Shen, `gix-hashtable` by Pascal Kuthe, and several others with co-authors). Omitting these from the manifest is a compliance gap — Apache-2.0 §4(a) and §4(c) require distributing the license and retaining copyright notices, and MIT requires preserving the copyright notice. Detect these automatically: for each workspace member in the resolved dependency graph that is reachable from the `gitoxide` package (so test-only crates like `gix-config-tests` are excluded), compare its `license` field (SPDX-normalized, order-independent) and `authors` set against the root package's. If either differs, include it in the manifest and scan its source tree for LICENSE / NOTICE / AUTHORS files exactly as we do for third-party crates. No per-crate configuration is needed; vendoring a crate into the workspace with different metadata is sufficient for it to be discovered. Tests assert that `gix-imara-diff` (different license) and `gix-config` (different author) appear, while `gix-config-tests` (not linked) and `gitoxide` itself (the root) do not. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent c4e6677 commit c3034d3

2 files changed

Lines changed: 135 additions & 5 deletions

File tree

build.rs

Lines changed: 93 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ mod spdx_texts;
5252
#[path = "gitoxide-core/src/licenses/build_support.rs"]
5353
mod build_support;
5454

55-
use std::collections::HashSet;
55+
use std::collections::{HashMap, HashSet, VecDeque};
5656
use std::path::{Path, PathBuf};
5757
use std::process::Command;
5858
use std::time::{SystemTime, UNIX_EPOCH};
@@ -153,14 +153,36 @@ fn collect_manifest() -> Result<Manifest, String> {
153153
let metadata = cmd.exec().map_err(|e| format!("cargo metadata failed: {e}"))?;
154154

155155
let workspace_members: HashSet<_> = metadata.workspace_members.iter().cloned().collect();
156-
let mut third_party: Vec<&cargo_metadata::Package> = metadata
156+
157+
// Determine which packages are reachable from the `gitoxide` binary's
158+
// dependency graph (as opposed to other workspace members' graphs).
159+
// This matters because some workspace members (e.g. test crates) are
160+
// never linked into gix/ein and shouldn't appear in the manifest.
161+
let reachable = reachable_from_root(&metadata, "gitoxide")?;
162+
163+
let root_pkg = metadata
157164
.packages
158165
.iter()
159-
.filter(|p| !workspace_members.contains(&p.id) && p.source.is_some())
166+
.find(|p| p.name == "gitoxide")
167+
.ok_or("gitoxide package not found in metadata")?;
168+
169+
// Collect third-party deps (non-workspace, sourced) as before, PLUS
170+
// workspace members whose license or authorship differs from the root
171+
// package and that are actually linked into the binary.
172+
let mut to_attribute: Vec<&cargo_metadata::Package> = metadata
173+
.packages
174+
.iter()
175+
.filter(|p| {
176+
if workspace_members.contains(&p.id) {
177+
reachable.contains(&p.id) && needs_separate_attribution(p, root_pkg)
178+
} else {
179+
p.source.is_some()
180+
}
181+
})
160182
.collect();
161-
third_party.sort_by(|a, b| a.name.cmp(&b.name).then_with(|| a.version.cmp(&b.version)));
183+
to_attribute.sort_by(|a, b| a.name.cmp(&b.name).then_with(|| a.version.cmp(&b.version)));
162184

163-
let crates: Vec<CrateLicense> = third_party.into_iter().map(build_crate_entry).collect();
185+
let crates: Vec<CrateLicense> = to_attribute.into_iter().map(build_crate_entry).collect();
164186

165187
Ok(Manifest {
166188
crates,
@@ -204,6 +226,72 @@ fn build_crate_entry(p: &cargo_metadata::Package) -> CrateLicense {
204226
}
205227
}
206228

229+
/// BFS from the named root package through the resolve graph, returning
230+
/// every package ID reachable from it. This lets us distinguish workspace
231+
/// members that are linked into the binary from those that only exist as
232+
/// standalone workspace members (e.g. test crates).
233+
fn reachable_from_root(
234+
metadata: &cargo_metadata::Metadata,
235+
root_name: &str,
236+
) -> Result<HashSet<cargo_metadata::PackageId>, String> {
237+
let resolve = metadata
238+
.resolve
239+
.as_ref()
240+
.ok_or("cargo metadata produced no resolve graph")?;
241+
242+
let root_id = metadata
243+
.packages
244+
.iter()
245+
.find(|p| p.name == root_name)
246+
.map(|p| &p.id)
247+
.ok_or_else(|| format!("package `{root_name}` not found in metadata"))?;
248+
249+
let deps_of: HashMap<&cargo_metadata::PackageId, Vec<&cargo_metadata::PackageId>> = resolve
250+
.nodes
251+
.iter()
252+
.map(|n| (&n.id, n.deps.iter().map(|d| &d.pkg).collect()))
253+
.collect();
254+
255+
let mut reachable = HashSet::new();
256+
let mut queue = VecDeque::new();
257+
queue.push_back(root_id.clone());
258+
while let Some(id) = queue.pop_front() {
259+
if !reachable.insert(id.clone()) {
260+
continue;
261+
}
262+
if let Some(deps) = deps_of.get(&id) {
263+
for dep_id in deps {
264+
queue.push_back((*dep_id).clone());
265+
}
266+
}
267+
}
268+
Ok(reachable)
269+
}
270+
271+
/// Return `true` if a workspace member's license or authorship differs
272+
/// from the root package's and so requires its own attribution entry in the
273+
/// manifest. The comparison normalises SPDX expressions (so `MIT OR
274+
/// Apache-2.0` and `Apache-2.0 OR MIT` are treated as equivalent) and
275+
/// compares author sets order-independently.
276+
fn needs_separate_attribution(pkg: &cargo_metadata::Package, root: &cargo_metadata::Package) -> bool {
277+
// Compare normalized SPDX id sets.
278+
match (&pkg.license, &root.license) {
279+
(Some(pkg_lic), Some(root_lic)) => {
280+
let pkg_ids = build_support::parse_spdx_ids(pkg_lic);
281+
let root_ids = build_support::parse_spdx_ids(root_lic);
282+
if pkg_ids != root_ids {
283+
return true;
284+
}
285+
}
286+
(None, None) => {}
287+
_ => return true, // one declares a license, the other doesn't
288+
}
289+
// Compare author sets (order-independent).
290+
let pkg_authors: HashSet<&str> = pkg.authors.iter().map(String::as_str).collect();
291+
let root_authors: HashSet<&str> = root.authors.iter().map(String::as_str).collect();
292+
pkg_authors != root_authors
293+
}
294+
207295
fn enabled_top_level_features() -> Vec<String> {
208296
// Cargo exposes each enabled feature as `CARGO_FEATURE_<NAME>` where the
209297
// name is uppercased and hyphens become underscores. These are features of

src/licenses/embedded.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,48 @@ mod tests {
145145
);
146146
}
147147

148+
/// Workspace members with different license or authorship must appear
149+
/// in the manifest alongside third-party deps. `gix-imara-diff` is
150+
/// vendored from upstream under Apache-2.0 (not the root's
151+
/// `MIT OR Apache-2.0`) with a different author — it is the strongest
152+
/// case for separate attribution. `gix-config` has the same license
153+
/// but a different sole author (Edward Shen). Both must be present.
154+
///
155+
/// Test crates like `gix-config-tests` share authorship traits but
156+
/// are never linked into the binary; they must NOT appear.
157+
#[test]
158+
fn workspace_members_with_different_attribution_are_included() {
159+
let manifest = load().expect("load manifest");
160+
let names: std::collections::BTreeSet<&str> = manifest.crates.iter().map(|c| c.name.as_str()).collect();
161+
162+
// Apache-2.0-only vendored crate — different license AND author.
163+
assert!(
164+
names.contains("gix-imara-diff"),
165+
"vendored workspace member `gix-imara-diff` (Apache-2.0, different author) \
166+
must appear in the manifest",
167+
);
168+
169+
// Same license, different sole author.
170+
assert!(
171+
names.contains("gix-config"),
172+
"workspace member `gix-config` (different author: Edward Shen) \
173+
must appear in the manifest",
174+
);
175+
176+
// Test crate — different author but not linked into the binary.
177+
assert!(
178+
!names.contains("gix-config-tests"),
179+
"test workspace member `gix-config-tests` must NOT appear in \
180+
the manifest (not linked into the binary)",
181+
);
182+
183+
// Root package itself must never appear.
184+
assert!(
185+
!names.contains("gitoxide"),
186+
"root package `gitoxide` must not appear in its own manifest",
187+
);
188+
}
189+
148190
/// `build.rs` derives `feature_profile` from the `CARGO_FEATURE_*` env
149191
/// set at build time. The test binary is compiled with the same feature
150192
/// set (via `cargo test --features X`), so `cfg!(feature = X)` here must

0 commit comments

Comments
 (0)