-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAnalisador.py
150 lines (121 loc) · 6.5 KB
/
Analisador.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# -*- coding: utf-8 -*-
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
import persistencia.DataBase as dao
import time
import tweepy
from geopy.geocoders import Nominatim
BUSCAR_TEXTO_TWEETS = "select texto,classe from voudeque.tweet,voudeque.sentimento where voudeque.tweet.id_sentimento=voudeque.sentimento.id;"
BUSCA_HASHTAGS_UBER = "select texto from voudeque.hashtag,voudeque.marca where hashtag.id_marca = marca.id and marca.nome = 'Uber';"
BUSCA_HASHTAGS_CABIFY = "select texto from voudeque.hashtag,voudeque.marca where hashtag.id_marca = marca.id and marca.nome = 'Cabify';"
BUSCA_HASHTAGS_99POP = "select texto from voudeque.hashtag,voudeque.marca where hashtag.id_marca = marca.id and marca.nome = '99pop';"
OAUTH_TOKEN = '865998132517236736-Bn4F0J8agczPJOSE9CzTOzqkrvuTp75'
OAUTH_SECRET = 'ziX22stOPOkCZi4vQnVLQXOWoUDGeOBaNMu64mDVdxgcq'
CONSUMER_KEY = 'Jk1j59W1pzteRMVy85SRVKQZN'
CONSUMER_SECRET = 'C437hqnxUEhlLfcikKK0aOJjENEDyQ0mhg3xMxt2r9QLZIZK8U'
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(OAUTH_TOKEN, OAUTH_SECRET)
api = tweepy.API(auth)
geolocator = Nominatim()
listaTweet = []
listaClassificacao = []
modelo = MultinomialNB()
vectorizer = CountVectorizer(analyzer="word")
def treinar(model,vector):
resposta = dao.Busca_SQL(BUSCAR_TEXTO_TWEETS)
tweets=[]
sentimentos=[]
for i in resposta:
tweets.append(i[0])
sentimentos.append(i[1])
freq_tweets = vector.fit_transform(tweets)
model.fit(freq_tweets, sentimentos)
def buscarHashtags():
reUber = dao.Busca_SQL(BUSCA_HASHTAGS_UBER)
reCab = dao.Busca_SQL(BUSCA_HASHTAGS_CABIFY)
re99 = dao.Busca_SQL(BUSCA_HASHTAGS_99POP)
lista = []
listaUber = []
lista99 = []
listaCab =[]
for i in reUber:
listaUber.append(i[0])
lista.append(listaUber)
for i in reCab:
listaCab.append(i[0])
lista.append(listaCab)
for i in re99:
lista99.append(i[0])
lista.append(lista99)
return lista
def pegarTweetsNovos():
hashtags = buscarHashtags()
id_marca = 0
for i in hashtags:
id_marca += 1
if (id_marca == 1 ):
marca = "uber"
elif id_marca == 2:
marca = "cabify"
elif id_marca == 3:
marca = "99pop"
elif id_marca == 4:
marca = "uber"
id_marca = 1
for j in i:
tweet = tweepy.Cursor(api.search, q=str(j), lang="pt").items(5)
time.sleep(10)
for twe in tweet:
try:
nome_usuario = str(twe.user.name).replace("\n", " ").replace('"', '')
screen_name_usuario = str(twe.user.screen_name).replace("\n", " ").replace('"', '')
lugar_usuario = twe.user.location.replace("\n", " ").replace('"', '')
id_tweet = twe.id
texto_tweet = str(twe.text).replace("\n", " ").replace('"', '')
freq_testes = vectorizer.transform([texto_tweet])
sent = modelo.predict(freq_testes)[0]
if(sent == "Positivo"):
sentimento = 1
elif(sent == "Negativo"):
sentimento = 2
else:
sentimento = 3
created_at_tweet = twe.created_at
hashtags = twe.entities.get('hashtags')
if(lugar_usuario != ""):
dao.Executa_SQL("insert into voudeque.usuario(nome, username, nome_lugar) values('" + str(nome_usuario) + "', '" + str(screen_name_usuario) + "', '" + str(lugar_usuario) + "');")
else:
dao.Executa_SQL("insert into voudeque.usuario(nome, username) values('" + str(nome_usuario) + "', '" + str(screen_name_usuario) + "');")
id_usuario = dao.Busca_SQL("select id from voudeque.usuario where usuario.nome = '"+ str(nome_usuario) +"';")[0][0]
id_lugar = "null"
if (twe.place != None):
coordenadas = twe.place.bounding_box.coordinates[0][0]
local = geolocator.reverse(query=str(coordenadas[1]) + ", " + str(coordenadas[0]),
language="pt")
cidade = local.raw["address"]["city"]
dao.Executa_SQL("insert into voudeque.lugar(latitude, longitude, nome_lugar) values('" + str(coordenadas[1]) + "', '" + str(coordenadas[0]) + "', '" + str(
cidade).lower() + "');")
id_lugar = dao.Busca_SQL("select id from voudeque.lugar where nome_lugar = '" + str(cidade).lower() + "'" + " and latitude = " + "'" + str(
coordenadas[1]) + "'" + "and longitude = " + "'" + str(coordenadas[0]) + "';")[0][0]
dao.Executa_SQL(
"insert into voudeque.tweet(id, texto, dataHora, id_sentimento, id_usuario, id_lugar) values('" + str(id_tweet) + "', '" + str(texto_tweet) + "', '" + str(created_at_tweet) + "', '" + str(sentimento)
+ "', '" + str(id_usuario) + "', " + str(id_lugar) + ");")
dao.Executa_SQL(
"insert into voudeque.tweet_marca(id_tweet, id_marca) values('" + str(id_tweet) + "', '" + str(marca) + "');")
for j1 in hashtags:
if marca in j1.get("text").lower():
dao.Executa_SQL("insert into voudeque.hashtag(texto,id_marca) values('" + str(j.get("text").lower()) + "', '" + str(id_marca) + "');")
else:
dao.Executa_SQL("insert into voudeque.hashtag(texto) values('" + str(j.get("text").lower()) + "');")
except:
print("Exception")
dao.Executa_SQL(
'DELETE FROM voudeque.usuario WHERE (id) IN (select * from(select usuario.id from voudeque.usuario left join voudeque.tweet on voudeque.usuario.id = voudeque.tweet.id_usuario where voudeque.tweet.id_usuario is null) as p);')
dao.Executa_SQL(
'DELETE FROM voudeque.lugar WHERE (id) IN (select * from(select lugar.id from voudeque.lugar left join voudeque.tweet on voudeque.lugar.id = voudeque.tweet.id_lugar where voudeque.tweet.id_lugar is null) as p);')
def iniciar(tempo):
while True:
treinar(modelo,vectorizer)
pegarTweetsNovos()
time.sleep(tempo)
iniciar(40)