Skip to content

Commit cd2c54c

Browse files
committed
fixed bug
1 parent b96e728 commit cd2c54c

12 files changed

Lines changed: 643 additions & 31 deletions

File tree

docs/internal-report.html

Lines changed: 383 additions & 15 deletions
Large diffs are not rendered by default.

src/Tender.Crawler/Parsing/AngleSharpTenderParser.cs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ public IReadOnlyList<TenderItem> Parse(string html, DateTimeOffset now)
8585
?? tenderCell.TextContent.Trim();
8686
}
8787

88+
// "(更正公告)" 在同一個 <td> 內以獨立 <span> 呈現,不在 pageCode2Img 裡
89+
if (HasCorrectionNotice(tenderCell) && !tenderName.Contains("(更正公告)"))
90+
tenderName += "(更正公告)";
91+
8892
// detail URL 從 <a href="..."> 取得
8993
var detailUrl = tenderCell.QuerySelector("a")?.GetAttribute("href") ?? string.Empty;
9094
detailUrl = NormalizeUrl(detailUrl);
@@ -143,6 +147,29 @@ public IReadOnlyList<TenderItem> Parse(string html, DateTimeOffset now)
143147
};
144148
}
145149

150+
private static bool HasCorrectionNotice(IElement cell)
151+
{
152+
// 政府網站以 <span> 或其他行內元素包裹 "(更正公告)" 文字,
153+
// 位於 <script> 之外,需從整個 cell 的非 script 文字內容偵測。
154+
foreach (var node in cell.ChildNodes)
155+
{
156+
if (node is IElement el && el.LocalName != "script" && el.LocalName != "a")
157+
{
158+
if (el.TextContent.Contains("更正公告"))
159+
return true;
160+
}
161+
}
162+
163+
// 也可能是 <a> 外的直接文字節點
164+
foreach (var node in cell.ChildNodes)
165+
{
166+
if (node.NodeType == AngleSharp.Dom.NodeType.Text && node.TextContent.Contains("更正公告"))
167+
return true;
168+
}
169+
170+
return false;
171+
}
172+
146173
private static string ExtractTenderName(IElement cell)
147174
{
148175
// 找 <script> 標籤內容
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
using System.Globalization;
2+
using System.Windows.Data;
3+
4+
namespace Tender.Desktop.Converters;
5+
6+
public sealed class BoolToCorrectionTextConverter : IValueConverter
7+
{
8+
public object Convert(object? value, Type targetType, object? parameter, CultureInfo culture)
9+
=> value is true ? " (更正公告)" : string.Empty;
10+
11+
public object ConvertBack(object? value, Type targetType, object? parameter, CultureInfo culture)
12+
=> throw new NotSupportedException();
13+
}

src/Tender.Desktop/ViewModels/TenderItemViewModel.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,13 @@ public partial class TenderItemViewModel : ObservableObject
2323
[ObservableProperty]
2424
private string _note = string.Empty;
2525

26+
private const string CorrectionTag = "(更正公告)";
27+
2628
public string SourcePk => Item.SourcePk;
2729
public string AgencyName => Item.AgencyName;
2830
public string TenderName => Item.TenderName;
31+
public bool IsCorrection => Item.TenderName.Contains(CorrectionTag);
32+
public string DisplayTenderName => Item.TenderName.Replace(CorrectionTag, "").Trim();
2933
public string TenderMethod => Item.TenderMethod;
3034
public string? ProcurementType => Item.ProcurementType;
3135
public string AnnouncementDate => Item.AnnouncementDate;

src/Tender.Desktop/Views/DailyQueryView.xaml

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
<conv:ListJoinConverter x:Key="ListJoinConverter" />
1111
<conv:ColorStringToColorConverter x:Key="ColorStringToColor" />
1212
<conv:SortLabelConverter x:Key="SortLabel" />
13+
<conv:BoolToCorrectionTextConverter x:Key="BoolToCorrectionText" />
1314
<conv:DateOnlyToDateTimeConverter x:Key="DateOnlyToDateTime" />
1415
<conv:ZeroToVisibilityConverter x:Key="ZeroToVis" />
1516

@@ -572,7 +573,17 @@
572573
</DataGridTemplateColumn.CellTemplate>
573574
</DataGridTemplateColumn>
574575
<DataGridTextColumn Header="機關名稱" Binding="{Binding AgencyName}" Width="180" MinWidth="140" />
575-
<DataGridTextColumn Header="標案名稱" Binding="{Binding TenderName}" Width="*" MinWidth="320" />
576+
<DataGridTemplateColumn Header="標案名稱" Width="*" MinWidth="320" SortMemberPath="TenderName">
577+
<DataGridTemplateColumn.CellTemplate>
578+
<DataTemplate>
579+
<TextBlock TextTrimming="CharacterEllipsis" VerticalAlignment="Center">
580+
<Run Text="{Binding DisplayTenderName, Mode=OneWay}" />
581+
<Run Text="{Binding IsCorrection, Mode=OneWay, Converter={StaticResource BoolToCorrectionText}}"
582+
Foreground="Red" FontWeight="Bold" />
583+
</TextBlock>
584+
</DataTemplate>
585+
</DataGridTemplateColumn.CellTemplate>
586+
</DataGridTemplateColumn>
576587
<DataGridTextColumn Header="招標方式" Binding="{Binding TenderMethod}" Width="120" MinWidth="100" />
577588
<DataGridTextColumn Header="採購性質" Binding="{Binding ProcurementType}" Width="80" MinWidth="70" />
578589
<DataGridTextColumn Header="公告日期" Binding="{Binding AnnouncementDate}" Width="90" MinWidth="80" />
@@ -611,9 +622,13 @@
611622
<TextBlock Text="📋 標案詳情" FontSize="14" FontWeight="SemiBold" Foreground="{StaticResource TextPrimaryBrush}" />
612623
</Border>
613624
<StackPanel Visibility="{Binding SelectedItem, Converter={StaticResource NullToVis}}">
614-
<TextBlock Text="{Binding SelectedItem.TenderName}" FontWeight="SemiBold" FontSize="14"
625+
<TextBlock FontWeight="SemiBold" FontSize="14"
615626
TextWrapping="Wrap" Margin="0,0,0,12"
616-
Foreground="{StaticResource TextPrimaryBrush}" />
627+
Foreground="{StaticResource TextPrimaryBrush}">
628+
<Run Text="{Binding SelectedItem.DisplayTenderName, Mode=OneWay}" />
629+
<Run Text="{Binding SelectedItem.IsCorrection, Mode=OneWay, Converter={StaticResource BoolToCorrectionText}}"
630+
Foreground="Red" />
631+
</TextBlock>
617632

618633
<Grid>
619634
<Grid.ColumnDefinitions>

src/Tender.Storage/Repositories/TenderRepository.cs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,16 @@ public async Task<MergeResult> MergeDailySnapshotAsync(
6565
int updatedCount = 0;
6666
int skippedCount = 0;
6767

68-
var merged = new Dictionary<string, TenderItem>(existingDict, StringComparer.Ordinal);
68+
// Replace 策略:最終快照只包含本次爬蟲結果,不保留舊資料中本次未出現的記錄。
69+
// 仍比對 existingDict 以保留 CreatedAt 並計算 insert/update/skip 數量。
70+
var merged = new Dictionary<string, TenderItem>(incomingItems.Count, StringComparer.Ordinal);
6971

7072
foreach (var incoming in incomingItems)
7173
{
7274
if (existingDict.TryGetValue(incoming.SourcePk, out var existingItem))
7375
{
74-
// 判斷是否有變更(比較關鍵欄位)
7576
if (HasChanges(existingItem, incoming))
7677
{
77-
// 更新:保留 CreatedAt,更新 LastSeenAt 與其他欄位
7878
merged[incoming.SourcePk] = incoming with
7979
{
8080
CreatedAt = existingItem.CreatedAt,
@@ -84,14 +84,12 @@ public async Task<MergeResult> MergeDailySnapshotAsync(
8484
}
8585
else
8686
{
87-
// 略過:保留既有,只更新 LastSeenAt
8887
merged[incoming.SourcePk] = existingItem with { LastSeenAt = now };
8988
skippedCount++;
9089
}
9190
}
9291
else
9392
{
94-
// 新增
9593
merged[incoming.SourcePk] = incoming with
9694
{
9795
CreatedAt = now,

tests/Tender.Crawler.Tests/AngleSharpTenderParserTests.cs

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ private static string LoadFixture(string name)
2727
// ---- 正常解析 ----
2828

2929
[Fact]
30-
public void Parse_SampleHtml_Returns3Items()
30+
public void Parse_SampleHtml_Returns4Items()
3131
{
3232
var html = LoadFixture("tender_list_sample.html");
3333
var items = _parser.Parse(html, _now);
34-
items.Should().HaveCount(3);
34+
items.Should().HaveCount(4);
3535
}
3636

3737
[Fact]
@@ -77,6 +77,63 @@ public void Parse_SampleHtml_CreatedAtAndLastSeenAt_SetToNow()
7777
}
7878
}
7979

80+
// ---- 更正公告 ----
81+
82+
[Fact]
83+
public void Parse_SampleHtml_CorrectionItemHasCorrectionTag()
84+
{
85+
var html = LoadFixture("tender_list_sample.html");
86+
var items = _parser.Parse(html, _now);
87+
88+
var correction = items.First(x => x.SourcePk == "Y29ycmVjdDAxMQ==");
89+
correction.TenderName.Should().Contain("(更正公告)");
90+
correction.TenderName.Should().StartWith("台北市道路養護工程");
91+
}
92+
93+
[Fact]
94+
public void Parse_SampleHtml_NormalItemsDoNotHaveCorrectionTag()
95+
{
96+
var html = LoadFixture("tender_list_sample.html");
97+
var items = _parser.Parse(html, _now);
98+
99+
items[0].TenderName.Should().NotContain("更正公告");
100+
items[1].TenderName.Should().NotContain("更正公告");
101+
}
102+
103+
[Fact]
104+
public void Parse_CorrectionSpanInTd_AppendsCorrectionTag()
105+
{
106+
var html = """
107+
<html><body>
108+
<table id="tpam">
109+
<thead><tr><th>1</th><th>2</th><th>3</th><th>4</th><th>5</th><th>6</th><th>7</th><th>8</th><th>9</th></tr></thead>
110+
<tbody>
111+
<tr>
112+
<td>1</td>
113+
<td>測試機關</td>
114+
<td>
115+
<a href="https://web.pcc.gov.tw/prkms/urlSelector/common/tpam?pk=CORRPK01">
116+
<script>Geps3.CNS.pageCode2Img("道路工程");</script>
117+
</a>
118+
<span class="red">(更正公告)</span>
119+
</td>
120+
<td>T-001</td>
121+
<td>公開招標</td>
122+
<td>工程類</td>
123+
<td>115/05/28</td>
124+
<td>115/06/15</td>
125+
<td>5,000,000</td>
126+
</tr>
127+
</tbody>
128+
</table>
129+
</body></html>
130+
""";
131+
132+
var items = _parser.Parse(html, _now);
133+
items.Should().HaveCount(1);
134+
items[0].TenderName.Should().Be("道路工程(更正公告)");
135+
}
136+
80137
// ---- 空表格 ----
81138

82139
[Fact]

tests/Tender.Crawler.Tests/CrawlerOrchestratorTests.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,12 +252,12 @@ public async Task RunAsync_Dedup_MergesCorrectly()
252252
var orchestrator = CreateOrchestrator();
253253
var run = await orchestrator.RunAsync(new CrawlerArgs { Mode = CrawlerMode.Manual, TargetDate = targetDate });
254254

255-
// 總數 = 50 + 3 新 = 53
255+
// Replace 策略:只保留本次爬蟲的 7 筆
256256
var snapshot = await _tenderRepo.LoadAsync(targetDate);
257-
snapshot!.Items.Should().HaveCount(53);
257+
snapshot!.Items.Should().HaveCount(7);
258258

259259
// 每個 SourcePk 唯一
260-
snapshot.Items.Select(i => i.SourcePk).Distinct().Should().HaveCount(53);
260+
snapshot.Items.Select(i => i.SourcePk).Distinct().Should().HaveCount(7);
261261

262262
// Run 紀錄
263263
run.InsertedCount.Should().Be(3);

tests/Tender.Crawler.Tests/Fixtures/tender_list_sample.html

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<head><meta charset="utf-8"><title>查詢結果</title></head>
44
<body>
55
<div id="queryResult">
6-
<p>共找到 <strong>3</strong> 筆資料</p>
6+
<p>共找到 <strong>4</strong> 筆資料</p>
77
<table id="tpam">
88
<thead>
99
<tr>
@@ -64,6 +64,22 @@
6464
<td>115/06/05</td>
6565
<td>3,500,000</td>
6666
</tr>
67+
<tr>
68+
<td>4</td>
69+
<td>台北市政府工務局</td>
70+
<td>
71+
<a href="https://web.pcc.gov.tw/prkms/urlSelector/common/tpam?pk=Y29ycmVjdDAxMQ==&amp;tenderType=TENDER_DECLARATION">
72+
<script>Geps3.CNS.pageCode2Img("台北市道路養護工程");</script>
73+
</a>
74+
<span class="red">(更正公告)</span>
75+
</td>
76+
<td>TPE-PW-114-007</td>
77+
<td>公開招標</td>
78+
<td>工程類</td>
79+
<td>115/05/08</td>
80+
<td>115/06/10</td>
81+
<td>8,000,000</td>
82+
</tr>
6783
</tbody>
6884
</table>
6985
</div>
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
using System.Globalization;
2+
using FluentAssertions;
3+
using Tender.Desktop.Converters;
4+
using Xunit;
5+
6+
namespace Tender.Desktop.Tests;
7+
8+
public sealed class BoolToCorrectionTextConverterTests
9+
{
10+
private readonly BoolToCorrectionTextConverter _sut = new();
11+
12+
[Fact]
13+
public void Convert_True_ReturnsCorrectionText()
14+
{
15+
var result = _sut.Convert(true, typeof(string), null, CultureInfo.InvariantCulture);
16+
17+
result.Should().Be(" (更正公告)");
18+
}
19+
20+
[Fact]
21+
public void Convert_False_ReturnsEmpty()
22+
{
23+
var result = _sut.Convert(false, typeof(string), null, CultureInfo.InvariantCulture);
24+
25+
result.Should().Be(string.Empty);
26+
}
27+
28+
[Fact]
29+
public void Convert_Null_ReturnsEmpty()
30+
{
31+
var result = _sut.Convert(null, typeof(string), null, CultureInfo.InvariantCulture);
32+
33+
result.Should().Be(string.Empty);
34+
}
35+
}

0 commit comments

Comments
 (0)