Skip to content

Commit d9a29d4

Browse files
authored
[ThreadsBridge] Add timestamp support using taken_at from profile JSON (#4916)
1 parent ae9e342 commit d9a29d4

1 file changed

Lines changed: 24 additions & 9 deletions

File tree

bridges/ThreadsBridge.php

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,26 @@ public function collectData()
7373

7474
$jsonBlobs = $html->find('script[type="application/json"]');
7575

76-
$gatheredCodes = [];
76+
$gatheredPosts = [];
7777
$limit = $this->getInput('limit');
7878
foreach ($jsonBlobs as $jsonBlob) {
79-
// The structure of the JSON document is likely to change, but we're looking for a "code" inside a "post"
80-
foreach ($this->recursiveFind($this->recursiveFind(json_decode($jsonBlob->innertext), 'post'), 'code') as $candidateCode) {
79+
// The structure of the JSON document is likely to change, but we're looking for "post" objects
80+
foreach ($this->recursiveFind(json_decode($jsonBlob->innertext), 'post') as $post) {
81+
if (!is_object($post) && !is_array($post)) {
82+
continue;
83+
}
84+
$post = (array)$post;
85+
if (!isset($post['code'])) {
86+
continue;
87+
}
88+
$candidateCode = $post['code'];
8189
// code should be like CzZk4-USq1O or Cy3m1VnRiwP or Cywjyrdv9T6 or CzZk4-USq1O
82-
if (grapheme_strlen($candidateCode) == 11 and !in_array($candidateCode, $gatheredCodes)) {
83-
$gatheredCodes[] = $candidateCode;
84-
if (count($gatheredCodes) >= $limit) {
90+
if (grapheme_strlen($candidateCode) == 11 and !isset($gatheredPosts[$candidateCode])) {
91+
$gatheredPosts[$candidateCode] = [
92+
'code' => $candidateCode,
93+
'taken_at' => $post['taken_at'] ?? null,
94+
];
95+
if (count($gatheredPosts) >= $limit) {
8596
break 2;
8697
}
8798
}
@@ -91,10 +102,10 @@ public function collectData()
91102
$this->feedName = html_entity_decode($html->find('meta[property=og:title]', 0)->content);
92103
// todo: meta[property=og:description] could populate the feed description
93104

94-
foreach ($gatheredCodes as $postCode) {
105+
foreach ($gatheredPosts as $postData) {
95106
$item = [];
96107
// post URL is like: https://www.threads.net/@zuck/post/Czrr520PZfh
97-
$item['uri'] = $this->getURI() . '/post/' . $postCode;
108+
$item['uri'] = $this->getURI() . '/post/' . $postData['code'];
98109
$articleHtml = getSimpleHTMLDOMCached($item['uri'], 15778800); // cache time: six months
99110

100111
// Relying on meta tags ought to be more reliable.
@@ -111,7 +122,11 @@ public function collectData()
111122
}
112123

113124
// todo: parse hashtags out of content for $item['categories']
114-
// todo: try to scrape out a timestamp for $item['timestamp'], it's not in the meta tags
125+
126+
// Extract timestamp from profile JSON data (taken_at is a Unix timestamp)
127+
if (isset($postData['taken_at']) && $postData['taken_at']) {
128+
$item['timestamp'] = $postData['taken_at'];
129+
}
115130

116131
$this->items[] = $item;
117132
}

0 commit comments

Comments
 (0)