List Index Out of Range - Tweepy/Twitter API into Geodatabase - twitter

Soooo I have been working on a script I took from ArcGIS Blueprints:
http://file.allitebooks.com/20151230/ArcGIS%20Blueprints.pdf
It should convert geolocated tweets into a geodatabase. I have the Twitter Streaming API already operational, and been playing with different ways to extract x/y, but keep coming back to this script, every so often, hoping I can get it running with no luck. I am stuck on a "List Index Out of Range" error. If anyone is gracious enough to offer some ideas on how I can get by this error I will be forever grateful. If nothing else this endeavor has exploited my shortcomings with Python and Arcpy, and hopefully it will round me out in the long run. For right now, I sure would like to get some mileage out of this script and the work Ive invested into it. Thank you!
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import arcpy
import sys
import time
consumer_key = 'xxx'
consumer_secret = 'xxx'
token_key = 'xxx'
token_secret = 'xxx'
class StdOutListener(StreamListener):
def __init__(self, start_time, featureClass, time_limit):
super(StdOutListener, self).__init__()
self.time = start_time
self.limit = time_limit
self.featureClass = featureClass
def on_status(self, status):
while (time.time() - self.time) < self.limit:
if status.geo is not None:
dictCoords = status.geo
listCoords = dictCoords['coordinates']
latitude = listCoords[0]
longitude = listCoords[1]
cursor =arcpy.da.InsertCursor(self.featureClass,"SHAPE#XY"))
cursor.insertRow([(longitude,latitude)])
print(str(listCoords[0]) + "," + str(listCoords[1]))
return True
else:
print "No coordinates found"
return True
start_time = time.time()
arcpy.env.workspace = "c:\ArcGIS_Blueprint_Python\data\Twitter\TweetInformation.gdb" "
def main():
try: #new
featureClass = sys.argv[1]
monitorTime = sys.argv[2]
monitorTime = monitorTime * 3600
sr = arcpy.SpatialReference(4326)
arcpy.env.overwriteOutput = True
arcpy.CreateFeatureclass_management(arcpy.env.workspace,
featureClass, "POINT", spatial_reference=sr)
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(token_key, token_secret)
stream = Stream(auth, StdOutListener(start_time, featureClass,
time_limit=monitorTime)) #172800
stream.filter(track=['car'])
except Exception as e:
print(e.message)
if __name__ == '__main__':
main()

Related

Scrapy Splash - I am not able to get the value

I am trying to scrape this page: https://simple.ripley.com.pe/laptop-lenovo-ideapad-5-amd-ryzen-7-16gb-ram-256gb-ssd-14-2004286061746p?s=o
All ok, but I am not able to get the values in this xpath:
//*[#id="panel-Especificaciones"]/div/div/table/tbody/tr[19]/td[2]
I think it loads dynamically. It's a table with many rows inside. I would like to get those values.
Image: page section i can't scrape
This is my spider code:
import scrapy
from scrapy_splash import SplashRequest
from numpy import nan
LUA_SCRIPT = """
function main(splash)
splash.private_mode_enabled = false
splash:go(splash.args.url)
splash:wait(2)
html = splash:html()
splash.private_mode_enabled = true
return html
end
"""
class RipleySpider(scrapy.Spider):
name = "ripley"
def start_requests(self):
url = 'https://simple.ripley.com.pe/tecnologia/computacion/laptops?facet%5B%5D=Procesador%3AIntel+Core+i7'
yield SplashRequest(url=url, callback=self.parse)
def parse(self, response):
for link in response.xpath("//div[#class='catalog-container']/div/a/#href"):
yield response.follow(link.get(), callback=self.parse_products)
# for href in response.xpath("//ul[#class='pagination']/li[last()]/a/#href").getall():
# yield SplashRequest(response.urljoin(href), callback=self.parse)
def parse_products(self, response):
titulo = response.css("h1::text").get()
link = response.request.url
sku = response.css(".sku-value::text").get()
precio = response.css(".product-price::text").getall()
if len(precio)==1:
precio_normal = nan
precio_internet = precio[0]
precio_tarjeta_ripley = nan
elif len(precio)==2:
precio_normal = precio[0]
precio_internet = precio[1]
precio_tarjeta_ripley = nan
elif len(precio)==4:
precio_normal = precio[0]
precio_internet = precio[1]
precio_tarjeta_ripley = precio[-1]
try:
# descripcion = response.css(".product-short-description::text").get()
descripcion = response.xpath('//*[#id="panel-Especificaciones"]/div/div/table/tbody/tr[1]/td[2]/text()').get()
except:
descripcion = 'sin valor'
yield {
'Título': titulo,
'Link': link,
'SKU': sku,
'Precio Normal': precio_normal,
'Precio Internet': precio_internet,
'Precio Tarjeta Ripley': precio_tarjeta_ripley,
'Descripción': descripcion,
}
Please, what solutions does scrapy offer? Thanks in advance for your help.
P.D.: I'm using Docker with Splash in localhost:8050. settings.py according to documentation.

eval() arg 1 must be a string, bytes or code object Traceback (most recent call last)

So I try to deploy a machine Learning model to streaml1t Using Flask. But as we know from the title, the error gave me that 'eval() args1 must be a string
------This is The Code For The Back End------
from flask import Flask, request,jsonify
import pickle
app = Flask(__name__)
with open ('forest_opt.pkl', 'rb') as model_file:
model = pickle.load(model_file)
#app.route('/')
def model_prediction():
age = eval(request.args.get('age'))
internship = eval(request.args.get('internship'))
cgpa = eval(request.args.get('cgpa'))
hostel = eval(request.args.get('hostel'))
history = eval(request.args.get('history'))
new_data = [age, internship, cgpa, hostel, history]
res = model.predict([new_data])
classes = ['No','Yes']
response = {'status': 'success',
'code' : 200,
'data' : {'result':classes(res[0])}
}
return jsonify(response)
#app.route('/predict', methods=['POST'])
def predict_post():
content = request.json
data = [content['age'],
content['internship'],
content['cgpa'],
content['hostel'],
content['history']
]
res = model.predict([data])
response = {'status': 'success',
'code' : 200,
'data' : {'result':str(res[0])}
}
return jsonify(response)
app.run(debug=True)
------This is The Code For The Front End------
import streamlit as st
import requests
URL = 'http://127.0.0.1:5000/'
st.title('App for Detecting Chance of Getting a Job')
age = st.number_input('age')
internship = st.number_input('Internship (0,1,2,3)')
cgpa = st.number_input('cgpa')
hostel = st.number_input('hostel')
history = st.number_input('history')
data = {'age':age,
'internship':internship,
'cgpa':cgpa,
'hostel':hostel,
'history':history}
r = requests.post(URL, json=data)
res = r.json()
st.write (f" Predict The Result: {res['data']['result']}")
The error keep saying this 'age = eval(request.args.get('age'))' I don't know why inside eval() have no information. Please help I kind of new to this one. Thank you !

How to deploy a model having no predict attribute?

app.py
from flask import Flask, jsonify, request,render_template
import pickle
app = Flask(__name__,template_folder='template')
# load model
model = pickle.load(open("model.pkl",'rb'))
# app
#app.route('/')
def home():
return render_template('recommendation.html')
# routes
#app.route('/api', methods=['POST'])
def predict():
result=request.form
query_user_name=result["user name"]
user_input = {'query':query_user_name}
output_data=model(query_user_name)
print(output_data)
# send back to browser
output ={output_data}
return f'<html><body><h1>{output_data}</h1><form action="/"><button type="submit">back </button> </form></body></html>'
if __name__ == '__main__':
app.run(debug=True)
I am deploying it using a model made from following function,not a pre-existing model having predict attribute.
def model(user):
recommended_list=[]
top_list=[]
x = data.iloc[data.loc[data.Users == user].index[0],2:]
similar = np.array([(data.iloc[i,0],weight_factor(x,data.iloc[i, 2:])) for i in range(0,data.shape[0],1)])
index= np.argsort( similar[:,1] )
index=index[::-1]
similar=similar[index]
neighbours = similar[similar[:,1].astype(float) > 0.6] #Taking threshold as 0.6
for i in range(0,len(neighbours),1):
for j in range(2,len(data.columns),1):
if data.iloc[data.loc[data.Users == neighbours[i][0]].index[0],j]==1 and data.iloc[data.loc[data.Users == user].index[0],j]==0:
recommended_list.append(data.columns[j])
if (len(neighbours)>10):
for i in range(0,10,1): #Top 10 neighbours
top_list.append(neighbours[i][0])
else:
for i in range(len(neighbours)):
top_list.append(neighbours[i][0])
if user in top_list: #Remove the user of which we are asked to find neighbours,each user is always strongly correlated with itself and its of no use to us.
top_list.remove(user)
print(" ")
print("Top users similar to this user are:")
print(" ")
for i in range(0,len(top_list),1):
print(top_list[i])
print(" ")
print("Users similar to this user liked these products too:")
print(" ")
recommended_array=np.unique(np.array(recommended_list))
for i in range(0,len(recommended_array),1):
print(recommended_array[i])
How to deploy it using flask,my output is not being shown in the window,though on deploying the home page is being shown and input is being taken.
your function "model(user)" must return somethings
example:
def model(user):
# code
return somethings

Basic Twitter Data Mining Causing Problem

This is my first attempt to extract tweets using twitter api and tweepy. When I execute my code it keep printing 401 every time in a new line. What am I doing wrong is I am not able to figure out. Any help is appreciated.
import tweepy
import json
access_token = ""
access_token_secret = ""
consumer_key = ""
consumer_secret = ""
auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_token,access_token_secret)
class MyStreamListener(tweepy.StreamListener):
def __init__(self, api=None):
super(MyStreamListener, self).__init__()
self.num_tweets = 0
self.file = open("tweets.txt", "w")
def on_status(self, status):
tweet = status._json
self.file.write( json.dumps(tweet) + '\n' )
self.num_tweets += 1
if self.num_tweets < 100:
return True
else:
return False
self.file.close()
def on_error(self, status):
print(status)
l = MyStreamListener()
stream=tweepy.Stream(auth,l)
stream.filter()
tweets_data_path = 'tweets.txt'
tweets_file = open(tweets_data_path, "r")
tweets_data = []
for line in tweets_file:
tweet = json.loads(line)
tweets_data.append(tweet)
tweets_file.close()
print(tweets_data[0].keys())
Go to your twitter account settings and change timezone to that as of your computer. Then, go to twitter app settings and generate new consumer key and new access token. These newly generated keys and tokens you should use to avoid 401 error.

Not able to collect any tweets

This code was working fine few moments ago but now its not working?
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import time
import random
consumer_key = ""
consumer_secret = ""
access_token_key = ""
access_token_secret = ""
Coords = dict()
Place = dict()
PlaceCoords = dict()
XY = []
class StdOutListener(StreamListener):
""" A listener handles tweets that are the received from the stream.
This is a basic listener that inserts tweets into MySQLdb.
"""
def on_status(self, status):
#print "Tweet Text: ",status.text
text = status.text
#print "Time Stamp: ",status.created_at
try:
Coords.update(status.coordinates)
XY = (Coords.get('coordinates')) #Place the coordinates values into a list 'XY'
#print "X: ", XY[0]
#print "Y: ", XY[1]
except:
#Often times users opt into 'place' which is neighborhood size polygon
#Calculate center of polygon
Place.update(status.place)
PlaceCoords.update(Place['bounding_box'])
Box = PlaceCoords['coordinates'][0]
XY = [(Box[0][0] + Box[2][0])/2, (Box[0][1] + Box[2][1])/2]
#print "X: ", XY[0]
#print "Y: ", XY[1]
pass
# Comment out next 4 lines to avoid MySQLdb to simply read stream at console
#print {"status_id":status.id_str,"timestamp":status.created_at,"location X":XY[0],"location Y":XY[1],"text":text}
print status.id_str,status.created_at,XY[0],XY[1],text
def main():
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token_key, access_token_secret)
stream = Stream(auth, l, timeout=30)
#sleep
nsecs = 2
#Only records 'locations' OR 'tracks', NOT 'tracks (keywords) with locations'
while True:
try:
# Call tweepy's userstream method
#stream.filter(locations=[-122.75,36.8,-121.75,37.8,-74,40,-73,41],languages=['es','tr','ko','fr','ru','de','ja','it','pt'], async=False)##These coordinates are approximate bounding box around USA
stream.filter(locations=[-180,-90,180,90],async=False)
#stream.filter()
#stream.filter(track=['obama'])## This will feed the stream all mentions of 'keyword'
break
except Exception, e:
print Exception , e
# Abnormal exit: Reconnect
#nsecs=random.randint(30)
print "Reconnecting ",nsecs
time.sleep(nsecs)
if __name__ == '__main__':
main()
Is there any other way to collect streaming data based on locations?
No, using the locations parameter is the only way. However, I would strongly advise against your catch-all except statement in the StdOutListener. It is most likely catching a different type of error than the one you expect, masking a problem.

Resources