-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathparse-tweets.js
118 lines (91 loc) · 3.06 KB
/
parse-tweets.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
const fs = require('fs');
const data = require('./tweets.json');
var tweets = [];
const exclude = [
"1596182186503606272",
"1594367560375574528",
"1594985668073005056",
"1592471685252644864",
"1594772054875967488",
"1592471053699551233",
"1592179278002917378",
"1592176704641957888",
"1592471237657505793",
"1592471159689580544",
"1592471105771827200",
"1592177093106102272",
"1592528967852257282"
];
for (let t = 0; t < data.length; t++) {
const tweet = data[t].tweet;
if(!exclude.includes(tweet.id)) {
// linkify mentions
if(tweet.entities?.user_mentions) {
// collected user mentions of this tweet
const names = [];
for(const user in tweet.entities.user_mentions){
names.push(tweet.entities.user_mentions[user].screen_name);
}
// replace mentions in the text
names.forEach(name => {
let re = new RegExp(String.raw`@${name}`);
tweet.full_text = tweet.full_text.replace(re, `<a href="https://twitter.com/${name}">@${name}</a>`);
});
}
// remove twitter url shortener
if(tweet.entities?.urls) {
// collected shortened urls
const links = {};
for(const url in tweet.entities.urls){
link = tweet.entities.urls[url];
links[link.url] = link.expanded_url;
}
// replace the shortened t.co urls in the text with their original
for(link in links) {
let re = new RegExp(String.raw`${link}`);
tweet.full_text = tweet.full_text.replace(re, `<a href="${links[link]}">${links[link]}</a>`);
}
}
// resolve embedded media to a local asset
const media = [];
if(tweet.entities?.media) {
// collect the t.co link for each piece of media in the tweet
const links = {};
for(const url in tweet.entities?.media){
link = tweet.entities.media[url];
links[link.url] = {
url: link.id_str,
type: link.type
}
if(link.media_url_https == "https://pbs.twimg.com/media/FhxYQnjacAAXkaa.jpg") {
console.log(link.media_url_https);
}
media.push(link.media_url);
}
// collected shortened urls
// // replace the shortened t.co urls in the text with their original
for(link in links) {
let re = new RegExp(String.raw`${link}`);
tweet.full_text = tweet.full_text.replace(re, ``);
}
}
// console.log(new Date(tweet.created_at));
tweets.push({
"id": tweet.id,
"platform": "twitter",
"created_at": new Date(tweet.created_at),
"full_text": tweet.full_text,
"urls": tweet.entities?.urls || null,
"in_reply_to_status_id": tweet.in_reply_to_status_id || null,
"in_reply_to_screen_name": tweet.in_reply_to_screen_name || null,
"media" : { "url" : media || null }
});
}
}
tweets.sort((a, b) => (Number(a.created_at) < Number(b.created_at)) ? 1 : -1);
fs.writeFile('src/site/_data/tweets.json', JSON.stringify(tweets), err => {
if (err) {
console.error(err);
}
console.error("Notes saved");
});