Skip to content

Commit 59541d1

Browse files
authored
Use independent seeds in makeSizedByteStrings (#7735)
* Use independent seeds in makeSizedByteStrings `makeSizedByteStrings` used the same `H.Seed` for every element, so each generated ByteString was a prefix of the same deterministic byte sequence. Use `unfoldr (Just . Seed.split)` to produce a stream of independent SplitMix seeds instead, giving uncorrelated content across sizes. * Extract splitSeeds and apply to Text/Utf8 generators `makeSizedTextStrings` and `makeSizedUtf8ByteStrings` had the same bug as `makeSizedByteStrings`: they fmap'd a single seed across all sizes, producing correlated string and decodeUtf8 benchmark inputs. Extract the seed stream into a named top-level binding `splitSeeds` and reuse it across all three sized-list helpers.
1 parent 5f79449 commit 59541d1

1 file changed

Lines changed: 11 additions & 5 deletions

File tree

plutus-core/cost-model/budgeting-bench/Generators.hs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@ import Control.Monad
1212
import Data.Bits
1313
import Data.ByteString (ByteString)
1414
import Data.Int (Int64)
15-
import Data.List as List (foldl')
15+
import Data.List as List (foldl', unfoldr)
1616
import Data.Text (Text)
1717
import Data.Word (Word64)
1818

1919
import Hedgehog qualified as H
2020
import Hedgehog.Internal.Gen qualified as G
2121
import Hedgehog.Internal.Range qualified as R
22+
import Hedgehog.Internal.Seed qualified as Seed
2223
import Hedgehog.Internal.Tree qualified as T
2324
import System.IO.Unsafe (unsafePerformIO)
2425
import System.Random (StdGen, randomR)
@@ -77,9 +78,12 @@ makeSizedIntegers g (n : ns) =
7778
makeSizedByteString :: H.Seed -> Int -> ByteString
7879
makeSizedByteString seed n = genSample seed (G.bytes (R.singleton (8 * n)))
7980

80-
-- FIXME: this is terrible
81+
-- | Infinite stream of independent seeds derived from a root seed.
82+
splitSeeds :: H.Seed -> [H.Seed]
83+
splitSeeds = unfoldr (Just . Seed.split)
84+
8185
makeSizedByteStrings :: H.Seed -> [Int] -> [ByteString]
82-
makeSizedByteStrings seed l = map (makeSizedByteString seed) l
86+
makeSizedByteStrings seed sizes = zipWith makeSizedByteString (splitSeeds seed) sizes
8387

8488
-- TODO: don't use Hedgehog's 'sample' below: it silently resizes the generator
8589
-- to size 30, so listOfByteStringsOfLength and listOfByteStrings are biased
@@ -105,7 +109,8 @@ makeSizedTextString :: H.Seed -> Int -> Text
105109
makeSizedTextString seed n = genSample seed (G.text (R.singleton (2 * n)) G.unicode)
106110

107111
makeSizedTextStrings :: H.Seed -> [Integer] -> [Text]
108-
makeSizedTextStrings seed sizes = fmap (makeSizedTextString seed . fromInteger) sizes
112+
makeSizedTextStrings seed sizes =
113+
zipWith makeSizedTextString (splitSeeds seed) (fmap fromInteger sizes)
109114

110115
{-| Generate a valid UTF-8 bytestring with memory usage approximately n for
111116
benchmarking decodeUtf8. We use the 'unicode' generator beacuse that gives
@@ -115,7 +120,8 @@ makeSizedUtf8ByteString :: H.Seed -> Int -> ByteString
115120
makeSizedUtf8ByteString seed n = genSample seed (G.utf8 (R.singleton (2 * n)) G.unicode)
116121

117122
makeSizedUtf8ByteStrings :: H.Seed -> [Integer] -> [ByteString]
118-
makeSizedUtf8ByteStrings seed sizes = (makeSizedUtf8ByteString seed . fromInteger) <$> sizes
123+
makeSizedUtf8ByteStrings seed sizes =
124+
zipWith makeSizedUtf8ByteString (splitSeeds seed) (fmap fromInteger sizes)
119125

120126
---------------- Data (QuickCheck) ----------------
121127

0 commit comments

Comments
 (0)