Skip to content

Commit d1f22a2

Browse files
authored
Fix duplicate issue detection logic (#1273)
* Fix duplicate issue detection logic * added logs
1 parent 402f632 commit d1f22a2

1 file changed

Lines changed: 19 additions & 9 deletions

File tree

.github/workflows/duplicate_issue_detector.yaml

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
body: issue.body || ""
4646
},
4747
others: upstreamIssues
48-
.filter(i => !i.pull_request)
48+
.filter(i => !i.pull_request && i.number !== issue.number)
4949
.map(i => ({
5050
number: i.number,
5151
title: i.title,
@@ -66,6 +66,7 @@ jobs:
6666
6767
THRESHOLD = 0.82
6868
MAX_RESULTS = 3
69+
print(f"Threshold = {THRESHOLD}")
6970
7071
with open("issues.json") as f:
7172
data = json.load(f)
@@ -77,6 +78,8 @@ jobs:
7778
7879
current_text = text(data["current"])
7980
others = data["others"]
81+
print(f"Current issue: #{data['current']['number']}")
82+
print(f"Candidate issues: {len(others)}")
8083
8184
if not others:
8285
with open("matches.json", "w") as f:
@@ -94,15 +97,22 @@ jobs:
9497
sims = cosine_similarity([current_vec], other_vecs)[0]
9598
9699
matches = []
100+
97101
for issue, score in zip(others, sims):
98-
if score >= THRESHOLD:
99-
matches.append({
100-
"number": issue["number"],
101-
"title": issue["title"],
102-
"url": issue["url"],
103-
"state": issue["state"],
104-
"score": round(float(score) * 100, 1)
105-
})
102+
print(
103+
f"Issue #{issue['number']} | "
104+
f"Score={score:.4f} | "
105+
f"Title={issue['title']}"
106+
)
107+
108+
if score >= THRESHOLD:
109+
matches.append({
110+
"number": issue["number"],
111+
"title": issue["title"],
112+
"url": issue["url"],
113+
"state": issue["state"],
114+
"score": round(float(score) * 100, 1)
115+
})
106116
107117
matches = sorted(matches, key=lambda x: x["score"], reverse=True)[:MAX_RESULTS]
108118

0 commit comments

Comments
 (0)