Basic Twitter Data Mining Causing Problem - twitter

This is my first attempt to extract tweets using twitter api and tweepy. When I execute my code it keep printing 401 every time in a new line. What am I doing wrong is I am not able to figure out. Any help is appreciated.
import tweepy
import json
access_token = ""
access_token_secret = ""
consumer_key = ""
consumer_secret = ""
auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_token,access_token_secret)
class MyStreamListener(tweepy.StreamListener):
def __init__(self, api=None):
super(MyStreamListener, self).__init__()
self.num_tweets = 0
self.file = open("tweets.txt", "w")
def on_status(self, status):
tweet = status._json
self.file.write( json.dumps(tweet) + '\n' )
self.num_tweets += 1
if self.num_tweets < 100:
return True
else:
return False
self.file.close()
def on_error(self, status):
print(status)
l = MyStreamListener()
stream=tweepy.Stream(auth,l)
stream.filter()
tweets_data_path = 'tweets.txt'
tweets_file = open(tweets_data_path, "r")
tweets_data = []
for line in tweets_file:
tweet = json.loads(line)
tweets_data.append(tweet)
tweets_file.close()
print(tweets_data[0].keys())

Go to your twitter account settings and change timezone to that as of your computer. Then, go to twitter app settings and generate new consumer key and new access token. These newly generated keys and tokens you should use to avoid 401 error.

Related

Is there a Bitbucket API to search if a repository variable is defined in all of my workspace's repos?

Instead of defining a Bitbucket Cloud workspace variable that can be used by all the repos in the workspace, someone defined it in each repo, but not in all of them, of the workspace. Now I want to remove the variable in the individual repos, and define it in the workspace.
Is there a Bitbucket API that would do this pseudo-code?
def bb = Bitbucket.getInstance()
String workspace = "MyWorkspace"
String myVariable = "NEXUS_USER"
List<Repository> reposInWorkspace = bb.getWorkspace(workspace).getAllReposInWorkspace()
reposInWorkspace.each { repo ->
if (repo.hasVariable(myVariable)) {
println repo.name
}
}
I put a Bitbucket support ticket, and a sharp Atlassian support person gave me this Python3 script
from requests import Session
from time import sleep
username = 'your_username_not_email'
password = 'app_pw_not_bb_user_pw'
workspace = 'your_workspace'
variable_name = 'your_variable'
URL = f'https://api.bitbucket.org/2.0/repositories/{workspace}'
session = Session()
session.auth = (username, password)
def get_repos(page=None):
while True:
params = {'page': page, 'pagelen': 100}
r = session.get(URL, params=params)
while r.status_code == 429:
print("Hit the API rate limit. Sleeping for 10 sec...")
sleep(10)
print("Resuming...")
r = session.get(URL, params=params)
r_data = r.json()
for repo in r_data.get('values'):
yield repo.get('slug')
if not r_data.get('next'):
return
if page is None:
page = 1
page += 1
def get_variables(repo, page=None):
while True:
params = {'page': page, 'pagelen': 100}
r = session.get(f'{URL}/{repo}/pipelines_config/variables/', params=params)
while r.status_code == 429:
print("Hit the API rate limit. Sleeping for 10 sec...")
sleep(10)
print("Resuming...")
r = session.get(URL, params=params)
r_data = r.json()
for var in r_data.get('values'):
yield var.get('key')
if not r_data.get('next'):
return
if page is None:
page = 1
page += 1
def has_variable(var):
if var == variable_name:
return True
def main():
for repo in get_repos():
for var in get_variables(repo):
if has_variable(var):
print(f'{repo}')
if __name__ == '__main__':
main()

eval() arg 1 must be a string, bytes or code object Traceback (most recent call last)

So I try to deploy a machine Learning model to streaml1t Using Flask. But as we know from the title, the error gave me that 'eval() args1 must be a string
------This is The Code For The Back End------
from flask import Flask, request,jsonify
import pickle
app = Flask(__name__)
with open ('forest_opt.pkl', 'rb') as model_file:
model = pickle.load(model_file)
#app.route('/')
def model_prediction():
age = eval(request.args.get('age'))
internship = eval(request.args.get('internship'))
cgpa = eval(request.args.get('cgpa'))
hostel = eval(request.args.get('hostel'))
history = eval(request.args.get('history'))
new_data = [age, internship, cgpa, hostel, history]
res = model.predict([new_data])
classes = ['No','Yes']
response = {'status': 'success',
'code' : 200,
'data' : {'result':classes(res[0])}
}
return jsonify(response)
#app.route('/predict', methods=['POST'])
def predict_post():
content = request.json
data = [content['age'],
content['internship'],
content['cgpa'],
content['hostel'],
content['history']
]
res = model.predict([data])
response = {'status': 'success',
'code' : 200,
'data' : {'result':str(res[0])}
}
return jsonify(response)
app.run(debug=True)
------This is The Code For The Front End------
import streamlit as st
import requests
URL = 'http://127.0.0.1:5000/'
st.title('App for Detecting Chance of Getting a Job')
age = st.number_input('age')
internship = st.number_input('Internship (0,1,2,3)')
cgpa = st.number_input('cgpa')
hostel = st.number_input('hostel')
history = st.number_input('history')
data = {'age':age,
'internship':internship,
'cgpa':cgpa,
'hostel':hostel,
'history':history}
r = requests.post(URL, json=data)
res = r.json()
st.write (f" Predict The Result: {res['data']['result']}")
The error keep saying this 'age = eval(request.args.get('age'))' I don't know why inside eval() have no information. Please help I kind of new to this one. Thank you !

List Index Out of Range - Tweepy/Twitter API into Geodatabase

Soooo I have been working on a script I took from ArcGIS Blueprints:
http://file.allitebooks.com/20151230/ArcGIS%20Blueprints.pdf
It should convert geolocated tweets into a geodatabase. I have the Twitter Streaming API already operational, and been playing with different ways to extract x/y, but keep coming back to this script, every so often, hoping I can get it running with no luck. I am stuck on a "List Index Out of Range" error. If anyone is gracious enough to offer some ideas on how I can get by this error I will be forever grateful. If nothing else this endeavor has exploited my shortcomings with Python and Arcpy, and hopefully it will round me out in the long run. For right now, I sure would like to get some mileage out of this script and the work Ive invested into it. Thank you!
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import arcpy
import sys
import time
consumer_key = 'xxx'
consumer_secret = 'xxx'
token_key = 'xxx'
token_secret = 'xxx'
class StdOutListener(StreamListener):
def __init__(self, start_time, featureClass, time_limit):
super(StdOutListener, self).__init__()
self.time = start_time
self.limit = time_limit
self.featureClass = featureClass
def on_status(self, status):
while (time.time() - self.time) < self.limit:
if status.geo is not None:
dictCoords = status.geo
listCoords = dictCoords['coordinates']
latitude = listCoords[0]
longitude = listCoords[1]
cursor =arcpy.da.InsertCursor(self.featureClass,"SHAPE#XY"))
cursor.insertRow([(longitude,latitude)])
print(str(listCoords[0]) + "," + str(listCoords[1]))
return True
else:
print "No coordinates found"
return True
start_time = time.time()
arcpy.env.workspace = "c:\ArcGIS_Blueprint_Python\data\Twitter\TweetInformation.gdb" "
def main():
try: #new
featureClass = sys.argv[1]
monitorTime = sys.argv[2]
monitorTime = monitorTime * 3600
sr = arcpy.SpatialReference(4326)
arcpy.env.overwriteOutput = True
arcpy.CreateFeatureclass_management(arcpy.env.workspace,
featureClass, "POINT", spatial_reference=sr)
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(token_key, token_secret)
stream = Stream(auth, StdOutListener(start_time, featureClass,
time_limit=monitorTime)) #172800
stream.filter(track=['car'])
except Exception as e:
print(e.message)
if __name__ == '__main__':
main()

Not able to collect any tweets

This code was working fine few moments ago but now its not working?
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import time
import random
consumer_key = ""
consumer_secret = ""
access_token_key = ""
access_token_secret = ""
Coords = dict()
Place = dict()
PlaceCoords = dict()
XY = []
class StdOutListener(StreamListener):
""" A listener handles tweets that are the received from the stream.
This is a basic listener that inserts tweets into MySQLdb.
"""
def on_status(self, status):
#print "Tweet Text: ",status.text
text = status.text
#print "Time Stamp: ",status.created_at
try:
Coords.update(status.coordinates)
XY = (Coords.get('coordinates')) #Place the coordinates values into a list 'XY'
#print "X: ", XY[0]
#print "Y: ", XY[1]
except:
#Often times users opt into 'place' which is neighborhood size polygon
#Calculate center of polygon
Place.update(status.place)
PlaceCoords.update(Place['bounding_box'])
Box = PlaceCoords['coordinates'][0]
XY = [(Box[0][0] + Box[2][0])/2, (Box[0][1] + Box[2][1])/2]
#print "X: ", XY[0]
#print "Y: ", XY[1]
pass
# Comment out next 4 lines to avoid MySQLdb to simply read stream at console
#print {"status_id":status.id_str,"timestamp":status.created_at,"location X":XY[0],"location Y":XY[1],"text":text}
print status.id_str,status.created_at,XY[0],XY[1],text
def main():
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token_key, access_token_secret)
stream = Stream(auth, l, timeout=30)
#sleep
nsecs = 2
#Only records 'locations' OR 'tracks', NOT 'tracks (keywords) with locations'
while True:
try:
# Call tweepy's userstream method
#stream.filter(locations=[-122.75,36.8,-121.75,37.8,-74,40,-73,41],languages=['es','tr','ko','fr','ru','de','ja','it','pt'], async=False)##These coordinates are approximate bounding box around USA
stream.filter(locations=[-180,-90,180,90],async=False)
#stream.filter()
#stream.filter(track=['obama'])## This will feed the stream all mentions of 'keyword'
break
except Exception, e:
print Exception , e
# Abnormal exit: Reconnect
#nsecs=random.randint(30)
print "Reconnecting ",nsecs
time.sleep(nsecs)
if __name__ == '__main__':
main()
Is there any other way to collect streaming data based on locations?
No, using the locations parameter is the only way. However, I would strongly advise against your catch-all except statement in the StdOutListener. It is most likely catching a different type of error than the one you expect, masking a problem.

How do I use the Bilty API to shorten a list of URLs?

I have an account with Bitly which personalizes my URL shortening. How can I use the API to sign in and shorten a list of URLs?
Here is my solution in python using python requests library
import base64
import requests
import json
credentials = 'USERNAME:PASSWORD'
urls = ['www.google.com', 'www.google.co.uk', 'www.google.fr']
def getShortURLs(urls):
token = auth()
return shortenURLs(token, urls)
def auth():
base_auth = "https://api-ssl.bitly.com/oauth/access_token"
headers = {'Authorization': 'Basic ' + base64.b64encode(credentials)}
resp = requests.post(base_auth, headers=headers)
return resp.content
def shortenURLs(token, long_urls):
base = 'https://api-ssl.bitly.com/v3/shorten'
short_urls = []
for long_url in long_urls:
if long_url:
params = {'access_token':token, 'longUrl' : 'https://' + long_url}
response = requests.get(base, params=params)
r = json.loads(response.content)
short_urls.append(r['data']['url'])
return short_urls

Resources