-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathProject_Objects.R
More file actions
164 lines (124 loc) · 7.96 KB
/
Project_Objects.R
File metadata and controls
164 lines (124 loc) · 7.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
########
## Most critical reused objects
########
######
## Main search queries
######
main.queries <- list(
RedTide = '(red-algae OR red-tides OR red-tide OR #redtide OR karenia-brevis OR kbrevis OR #kareniabrevis)',
BlueGreen = '(blue-green-algae OR blue-green-algal OR #bluegreenalgae OR cyanobacteria OR cyanotoxins)',
OilSpill = '(oil OR crude OR petroleum OR tar_ball OR tar_balls) -(-leak -leaks -leaked -leaking -spill -spills -spilled -spilling) -(-ocean -beach -beaches -bay -gulf -sea -lake -river -creek -waterway)',
SewageSpill = '(sewer OR sewers OR sewage OR septic OR stormwater OR storm_water) -(-untreated -raw -overflow -discharge -pump -pumps -pumping -pumped -leak -leaks -leaked -leaking -spill -spills -spilled -spilling -dump -dumps -dumped -dumping) -(-ocean -beach -beaches -bay -gulf -sea -lake -river -creek -waterway)',
IndustrialSpill = '(wastewater OR contaminants OR contamination OR contaminating OR chemical OR chemicals) -(-(-leak -leaks -leaked -leaking) -(-spill -spills -spilled -spilling)) -(-ocean -beach -beaches -bay -gulf -sea -lake -river -creek -waterway)'
)
#####
## Geographical reference terms, by area
#####
Tampa.query.chunk <- '-(-Tampa -Tampas -#TampaBay -TB-area -((Hillsborough OR HillsboroughCounty) (FL OR Florida))
-Apollo-Beach -#ApolloBeach -Wimauma
-((Gibsonton OR Ruskin OR Sun-City) (FL OR Florida)) -Hillsborough-Bay
-Davis-Islands -Alafia-River -McKay-Bay
-lake-thonotosassa)'
nchar(Tampa.query.chunk)
Pinellas.Clearwater.query.chunk <- '-(-Pinellas -PinellasCounty -((Clearwater OR Dunedin) (FL OR Florida)) -Clearwater-Beach -#ClearwaterBeach
-Indian-Rocks-Beach -#IndianRocksBeach -Tarpon-Springs -#TarponSprings -Belleair -Palm-Harbor -#PalmHarbor -Safety-Harbor -#SafetyHarbor
-Honeymoon-Island -Sand-Key
-Caladesi -Lake-Tarpon)'
nchar(Pinellas.Clearwater.query.chunk)
# -\"Redington Beach\" -\"Redington Shores\" => "Redington" could just be added to Treasure Island/Tierra Verde in "FL OR Florda" chunk
# Tierra Verde could be dropped if need be for space. Not super-popular, but included just in case.
StPete.query.chunk <- '-(-StPetersburg -St-Petersburg -St-Pete -StPete -#StPeteBeach
-Madeira-Beach -#MadeiraBeach -((Treasure-Island OR Tierra-Verde) (FL OR Florida)) -Sunshine-Skyway
-Fort-De-Soto -Fort-DeSoto -Redington-Beach -Redington-Shores -Pass-a-grille -Boca-Ciega-Bay
-Egmont-Key -Weedon-Island)'
nchar(StPete.query.chunk)
# The last few - Terra Ceia, Palma Sola Bay, Bishop Harbor - have been really low mentions.
# Could easily drop the off for space if need be.
Manatee.query.chunk <- '-(-Manatee-county -Manatee-counties -ManateeCounty -Bradenton -Bradentons -#BradentonBeach
-Anna-Maria-Island -#AnnaMariaIsland -Longboat-Key -#LongboatKey -Holmes-Beach -#HolmesBeach
-Manatee-River -#ManateeRiver -Port-Manatee -(Coquina-Beach -NC -Carolina)
-(Terra-Ceia (FL OR Florida))
-Palma-Sola-Bay -Bishop-Harbor
-lake-manatee)'
nchar(Manatee.query.chunk)
# #ManasotaKey, #CaseyKey, #LemonBay hashes are not that popular, could drop for space.
# Watch for 'Sarasota Herald-Tribune' in post-processing: seemingly the only reference to the area.
Sarasota.query.chunk <- '-(-Sarasota -Sarasotas -SarasotaCounty
-Siesta-Key-Beach -#SiestaKeyBeach
-((Venice OR Englewood OR North-Port OR #NorthPort OR Lido-Beach) (FL OR Florida)) -Casey-Key -#CaseyKey -Nokomis -Lemon-Bay -#LemonBay -St-Armands -#StArmands
-Manasota-Key -#ManasotaKey -Manasota-Beach -Englewood-Beach -Lido-Key
-Caspersen-Beach -Stump-Pass -#SarasotaBay)'
nchar(Sarasota.query.chunk)
# That last line of places (Werner-Boys, Three Rooker, Beacon) - very low mentions.. could easily drop if space is needed.
# Elfers & Shady Hills are both pretty low on mentions - could also drop.
# Pithlachascotee is really low mentions, but at least it's a water body (river).. could still drop it though.
Pasco.query.chunk <- '-(-(Pasco (county OR counties OR Florida OR FL)) -PascoCounty
-Port-Richey -#PortRichey
-((Bayonet-Point OR Anclote OR Elfers OR Shady-Hills) (FL OR Florida))
-Cotee-River -Pithlachascotee -Jasmine-Estates -Key-Vista -Aripeka
-Werner-Boyce -Three-Rooker-Island -Beacon-Square
) -@Lou_Port_Richey'
nchar(Pasco.query.chunk)
area.terms <- list(
Tampa = Tampa.query.chunk,
Pinellas.Clearwater = Pinellas.Clearwater.query.chunk,
Pinellas.StPete = StPete.query.chunk,
Manatee = Manatee.query.chunk,
Sarasota = Sarasota.query.chunk,
Pasco = Pasco.query.chunk
)
#####
## !!! LANG:EN - DELETES TWEETS THAT ARE ONLY LINKS or ONlY MEDIA....
## https://twittercommunity.com/t/unkown-language-code-qht-returned-by-api/172819/2
##
## lang:qam 3 for tweets with mentions only (works for tweets since 2022-06-14)
## lang:qct 1 for tweets with cashtags only (works for tweets since 2022-06-14)
## ... etc
## SOLUTION: WILL HAVE TO USE "(x OR y OR z)" format.. at least for ONE of the (main, geographical) query chunks
####
Tampa.query.words <- c("Tampa", "Tampas", "#TampaBay", "TB area",
"Hillsborough", "HillsboroughCounty",
"Apollo Beach", "#ApolloBeach", "Wimauma",
"Gibsonton", "Ruskin", "Sun City", "Hillsborough Bay",
"Davis Islands", "Alafia River", "McKay Bay",
"lake thonotosassa")
Pinellas.Clearwater.query.words <- c("Pinellas", "PinellasCounty", "Clearwater", "Dunedin", "Clearwater Beach", "#ClearwaterBeach",
"Indian Rocks Beach", "#IndianRocksBeach", "Tarpon Springs", "#TarponSprings", "Belleair", "Palm Harbor", "#PalmHarbor", "Safety Harbor", "#SafetyHarbor",
"Honeymoon Island", "Sand Key", "Caladesi", "Lake Tarpon")
# -\"Redington Beach\" -\"Redington Shores\" => "Redington" could just be added to Treasure Island/Tierra Verde in "FL OR Florda" chunk
# Tierra Verde could be dropped if need be for space. Not super-popular, but included just in case.
StPete.query.words <- c("StPetersburg", "St Petersburg", "St Pete", "StPete", "#StPeteBeach",
"Madeira Beach", "#MadeiraBeach", "Treasure Island", "Tierra Verde", "Sunshine Skyway",
"Fort De Soto", "Fort DeSoto", "Redington Beach", "Redington Shores", "Pass a grille", "Boca Ciega Bay",
"Egmont Key", "Weedon Island")
# The last few - Terra Ceia, Palma Sola Bay, Bishop Harbor - have been really low mentions.
# Could easily drop the off for space if need be.
Manatee.query.words <- c("Manatee county", "Manatee counties", "ManateeCounty", "Bradenton", "Bradentons", "#BradentonBeach",
"Anna Maria Island", "#AnnaMariaIsland", "Longboat Key", "#LongboatKey", "Holmes Beach", "#HolmesBeach",
"Manatee River", "#ManateeRiver", "Port Manatee", "Coquina Beach",
"Terra Ceia",
"Palma Sola Bay", "Bishop Harbor",
"lake manatee")
# #ManasotaKey, #CaseyKey, #LemonBay hashes are not that popular, could drop for space.
# Watch for 'Sarasota Herald Tribune' in post processing: seemingly the only reference to the area.
Sarasota.query.words <- c("Sarasota", "Sarasotas", "SarasotaCounty",
"Siesta Key Beach", "#SiestaKeyBeach",
"Venice", "Englewood", "North Port", "#NorthPort", "Lido Beach", "Casey Key", "#CaseyKey", "Nokomis", "Lemon Bay", "#LemonBay", "St Armands", "#StArmands",
"Manasota Key", "#ManasotaKey", "Manasota Beach", "Englewood Beach", "Lido Key",
"Caspersen Beach", "Stump Pass", "#SarasotaBay")
# That last line of places (Werner Boys, Three Rooker, Beacon) very low mentions.. could easily drop if space is needed.
# Elfers & Shady Hills are both pretty low on mentions could also drop.
# Pithlachascotee is really low mentions, but at least it's a water body (river).. could still drop it though.
Pasco.query.words <- c("Pasco county", "Pasco counties", "PascoCounty",
"Port Richey", "#PortRichey",
"Bayonet Point", "Anclote", "Elfers", "Shady Hills",
"Cotee River", "Pithlachascotee", "Jasmine Estates", "Key Vista", "Aripeka",
"Werner Boyce", "Three Rooker Island", "Beacon Square")
area.words <- list(
Tampa.query.words,
Pinellas.Clearwater.query.words,
StPete.query.words,
Manatee.query.words,
Sarasota.query.words,
Pasco.query.words
)