|
4 | 4 |
|
5 | 5 | using System.Collections.Generic; |
6 | 6 | using System.Globalization; |
| 7 | +using System.Threading; |
7 | 8 | using BenchmarkDotNet.Attributes; |
8 | 9 | using BenchmarkDotNet.Engines; |
9 | 10 | using Microsoft.ML.Data; |
@@ -75,39 +76,51 @@ private TransformerChain<MulticlassPredictionTransformer<MaximumEntropyModelPara |
75 | 76 | [Benchmark] |
76 | 77 | public void TrainSentiment() |
77 | 78 | { |
78 | | - // Pipeline |
79 | | - var arguments = new TextLoader.Options() |
| 79 | + // BDN 0.16 installs BenchmarkDotNetSynchronizationContext in the child process. |
| 80 | + // ML.NET's ApplyWordEmbedding downloads a pretrained model using sync-over-async |
| 81 | + // I/O that deadlocks on the single-threaded SyncCtx. Clear it for this benchmark. |
| 82 | + var savedCtx = SynchronizationContext.Current; |
| 83 | + SynchronizationContext.SetSynchronizationContext(null); |
| 84 | + try |
80 | 85 | { |
81 | | - Columns = new TextLoader.Column[] |
| 86 | + // Pipeline |
| 87 | + var arguments = new TextLoader.Options() |
82 | 88 | { |
83 | | - new TextLoader.Column("Label", DataKind.Single, new[] { new TextLoader.Range() { Min = 0, Max = 0 } }), |
84 | | - new TextLoader.Column("SentimentText", DataKind.String, new[] { new TextLoader.Range() { Min = 1, Max = 1 } }) |
85 | | - }, |
86 | | - HasHeader = true, |
87 | | - AllowQuoting = false, |
88 | | - AllowSparse = false |
89 | | - }; |
90 | | - |
91 | | - var loader = mlContext.Data.LoadFromTextFile(_sentimentDataPath, arguments); |
92 | | - var text = mlContext.Transforms.Text.FeaturizeText("WordEmbeddings", new TextFeaturizingEstimator.Options |
| 89 | + Columns = new TextLoader.Column[] |
| 90 | + { |
| 91 | + new TextLoader.Column("Label", DataKind.Single, new[] { new TextLoader.Range() { Min = 0, Max = 0 } }), |
| 92 | + new TextLoader.Column("SentimentText", DataKind.String, new[] { new TextLoader.Range() { Min = 1, Max = 1 } }) |
| 93 | + }, |
| 94 | + HasHeader = true, |
| 95 | + AllowQuoting = false, |
| 96 | + AllowSparse = false |
| 97 | + }; |
| 98 | + |
| 99 | + var loader = mlContext.Data.LoadFromTextFile(_sentimentDataPath, arguments); |
| 100 | + var text = mlContext.Transforms.Text.FeaturizeText("WordEmbeddings", new TextFeaturizingEstimator.Options |
| 101 | + { |
| 102 | + OutputTokensColumnName = "WordEmbeddings_TransformedText", |
| 103 | + KeepPunctuations = false, |
| 104 | + StopWordsRemoverOptions = new StopWordsRemovingEstimator.Options(), |
| 105 | + Norm = TextFeaturizingEstimator.NormFunction.None, |
| 106 | + CharFeatureExtractor = null, |
| 107 | + WordFeatureExtractor = null, |
| 108 | + }, "SentimentText").Fit(loader).Transform(loader); |
| 109 | + |
| 110 | + var trans = mlContext.Transforms.Text.ApplyWordEmbedding("Features", "WordEmbeddings_TransformedText", |
| 111 | + WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding) |
| 112 | + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) |
| 113 | + .Fit(text).Transform(text); |
| 114 | + |
| 115 | + // Train |
| 116 | + var trainer = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(); |
| 117 | + var predicted = trainer.Fit(trans); |
| 118 | + _consumer.Consume(predicted); |
| 119 | + } |
| 120 | + finally |
93 | 121 | { |
94 | | - OutputTokensColumnName = "WordEmbeddings_TransformedText", |
95 | | - KeepPunctuations = false, |
96 | | - StopWordsRemoverOptions = new StopWordsRemovingEstimator.Options(), |
97 | | - Norm = TextFeaturizingEstimator.NormFunction.None, |
98 | | - CharFeatureExtractor = null, |
99 | | - WordFeatureExtractor = null, |
100 | | - }, "SentimentText").Fit(loader).Transform(loader); |
101 | | - |
102 | | - var trans = mlContext.Transforms.Text.ApplyWordEmbedding("Features", "WordEmbeddings_TransformedText", |
103 | | - WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding) |
104 | | - .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) |
105 | | - .Fit(text).Transform(text); |
106 | | - |
107 | | - // Train |
108 | | - var trainer = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(); |
109 | | - var predicted = trainer.Fit(trans); |
110 | | - _consumer.Consume(predicted); |
| 122 | + SynchronizationContext.SetSynchronizationContext(savedCtx); |
| 123 | + } |
111 | 124 | } |
112 | 125 |
|
113 | 126 | [GlobalSetup(Targets = new string[] { nameof(PredictIris), nameof(PredictIrisBatchOf1), nameof(PredictIrisBatchOf2), nameof(PredictIrisBatchOf5) })] |
|
0 commit comments