diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index aa105a2dab..5591965595 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -20,6 +20,9 @@ - Add ``TreeSequence.map_to_vcf_model`` method to return a mapping of the tree sequence to the VCF model. (:user:`benjeffery`, :pr:`3163`) +- Use a thin space as the thousands separator in HTML output, + and a comma in CLI output. + (:user:`hossam26644`, :pr:`3167`, :issue:`2951`) **Fixes** @@ -38,6 +41,7 @@ to the required pattern. (:user:`benjeffery`, :pr:`3163`) + -------------------- [0.6.3] - 2025-04-28 -------------------- diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index 5c8c4256c8..0562a0f315 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -3764,19 +3764,19 @@ def test_str(self, ts_fixture): ╔═+╗\s* ║Tree.*?║\s* ╠═+╤═+╣\s* - ║Index.*?│\s*\d+║\s* + ║Index.*?│\s*[\d\u2009,]+║\s* ╟─+┼─+╢\s* - ║Interval.*?│\s*\d+-\d+\s*\(\d+\)║\s* + ║Interval.*?│\s*[\d\u2009,]+-[\d\u2009,]+\s*\([\d\u2009,]+\)║\s* ╟─+┼─+╢\s* - ║Roots.*?│\s*\d+║\s* + ║Roots.*?│\s*[\d\u2009,]+║\s* ╟─+┼─+╢\s* - ║Nodes.*?│\s*\d+║\s* + ║Nodes.*?│\s*[\d\u2009,]+║\s* ╟─+┼─+╢\s* - ║Sites.*?│\s*\d+║\s* + ║Sites.*?│\s*[\d\u2009,]+║\s* ╟─+┼─+╢\s* - ║Mutations.*?│\s*\d+║\s* + ║Mutations.*?│\s*[\d\u2009,]+║\s* ╟─+┼─+╢\s* - ║Total\s*Branch\s*Length.*?│\s*[\d,]+\.\d+║\s* + ║Total\s*Branch\s*Length.*?│\s*[\d\u2009,]+\.\d+║\s* ╚═+╧═+╝\s* """, re.VERBOSE | re.DOTALL, diff --git a/python/tests/test_util.py b/python/tests/test_util.py index a10b292db4..5d61df81b2 100644 --- a/python/tests/test_util.py +++ b/python/tests/test_util.py @@ -371,6 +371,19 @@ def test_naturalsize(value, expected): assert util.naturalsize(-value) == expected +def test_format_number(): + assert util.format_number(0) == "0" + assert util.format_number("1.23") == "1.23" + assert util.format_number(3216546.34) == "3 216 546.3" + assert util.format_number(3216546.34, 9) == "3 216 546.34" + assert util.format_number(-3456.23) == "-3 456.23" + assert util.format_number(-3456.23, sep=",") == "-3,456.23" + + with pytest.raises(TypeError) as e_info: + util.format_number("bad") + assert str(e_info.value) == "The string cannot be converted to a number" + + @pytest.mark.parametrize( "obj, expected", [ diff --git a/python/tskit/genotypes.py b/python/tskit/genotypes.py index 15fffba0bf..83d7b50163 100644 --- a/python/tskit/genotypes.py +++ b/python/tskit/genotypes.py @@ -334,22 +334,25 @@ def __str__(self) -> str: Return a plain text summary of the contents of a variant. """ try: - site_id = self.site.id - site_position = self.site.position + site_id = util.format_number(self.site.id, sep=",") + site_position = util.format_number(self.site.position, sep=",") counts = self.counts() freqs = self.frequencies() + samples = util.format_number(len(self.samples), sep=",") + num_alleles = util.format_number(self.num_alleles, sep=",") rows = ( [ - ["Site id", f"{site_id:,}"], - ["Site position", f"{site_position:,}"], - ["Number of samples", f"{len(self.samples):,}"], - ["Number of alleles", f"{self.num_alleles:,}"], + ["Site id", f"{site_id}"], + ["Site position", f"{site_position}"], + ["Number of samples", f"{samples}"], + ["Number of alleles", f"{num_alleles}"], ] + [ [ f"Samples with allele " f"""{'missing' if k is None else "'" + k + "'"}""", - f"{counts[k]:,} ({freqs[k] * 100:.2g}%)", + f"{util.format_number(counts[k], sep=',')} " + f"({util.format_number(freqs[k] * 100, 2, sep=',')}%)", ] for k in self.alleles ] diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 37c41a767e..937978d78d 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -2822,17 +2822,21 @@ def __str__(self): Return a plain text summary of a tree in a tree sequence """ tree_rows = [ - ["Index", f"{self.index:,}"], + ["Index", f"{util.format_number(self.index, sep=',')}"], [ "Interval", - f"{self.interval.left:,.8g}-{self.interval.right:,.8g}" - f"({self.span:,.8g})", + f"{util.format_number(self.interval.left, sep=',')}-" + f"{util.format_number(self.interval.right, sep=',')}" + f"({util.format_number(self.span, sep=',')})", + ], + ["Roots", f"{util.format_number(self.num_roots, sep=',')}"], + ["Nodes", f"{util.format_number(len(self.preorder()), sep=',')}"], + ["Sites", f"{util.format_number(self.num_sites, sep=',')}"], + ["Mutations", f"{util.format_number(self.num_mutations, sep=',')}"], + [ + "Total Branch Length", + f"{util.format_number(self.total_branch_length, sep=',')}", ], - ["Roots", f"{self.num_roots:,}"], - ["Nodes", f"{len(self.preorder()):,}"], - ["Sites", f"{self.num_sites:,}"], - ["Mutations", f"{self.num_mutations:,}"], - ["Total Branch Length", f"{self.total_branch_length:,.8g}"], ] return util.unicode_table(tree_rows, title="Tree") @@ -4399,17 +4403,10 @@ def __str__(self): Return a plain text summary of the contents of a tree sequence """ ts_rows = [ - ["Trees", str(self.num_trees)], - [ - "Sequence Length", - str( - int(self.sequence_length) - if self.discrete_genome - else self.sequence_length - ), - ], + ["Trees", util.format_number(self.num_trees, sep=",")], + ["Sequence Length", util.format_number(self.sequence_length, sep=",")], ["Time Units", self.time_units], - ["Sample Nodes", str(self.num_samples)], + ["Sample Nodes", util.format_number(self.num_samples, sep=",")], ["Total Size", util.naturalsize(self.nbytes)], ] header = ["Table", "Rows", "Size", "Has Metadata"] @@ -4418,7 +4415,7 @@ def __str__(self): table_rows.append( [ name.capitalize(), - f"{table.num_rows:,}", + f"{util.format_number(table.num_rows, sep=',')}", util.naturalsize(table.nbytes), ( "Yes" diff --git a/python/tskit/util.py b/python/tskit/util.py index 4fd077f03a..a22ca19564 100644 --- a/python/tskit/util.py +++ b/python/tskit/util.py @@ -480,6 +480,26 @@ def unicode_table( return "".join(out) +def format_number(number, sig_digits=8, sep="\u2009"): + """ + Format a number with with a separator to indicate thousands + and up to `sig_digits` significant digits using 'g' format. + + number: int, float, or a numeric string. + sig_digits: int, number of significant digits to display. + sep: str, the separator to use for thousands, default is a thin space. + Returns a string. + """ + if isinstance(number, str): + try: + number = float(number) + except ValueError: + raise TypeError("The string cannot be converted to a number") + + fmt = f",.{sig_digits}g" + return format(number, fmt).replace(",", sep) + + def html_table(rows, *, header): headers = "".join(f"