While requesting youtube search functionality the token pages are modifying between request (look at prev and next token)
the code of the cycle is the next
done = "N"
while (done == "N") :
request = youtube.search().list(
part="snippet"
, q="crime|airport delay|traffic accident|home invasion"
, publishedBefore =publish_end_date
, publishedAfter =publish_start_date
, maxResults=50
, pageToken=page_token
, type="video"
)
response = request.execute()
print ('Total results: ' + str(response["pageInfo"]["totalResults"]))
if 'prevPageToken' in response:
print ('prevPageToken: ' + response["prevPageToken"])
else:
print ('prevPageToken: NO_MORE_PAGES')
if 'nextPageToken' in response:
page_token = str(response["nextPageToken"])
print ('nextPageToken: ' + page_token)
else:
page_token = ""
done = "Y"
print ('nextPageToken: NO_MORE_PAGES')
num_posts = response["pageInfo"]["resultsPerPage"]
if num_posts > batch_size:
num_posts=batch_size
print ('Number of posts to download: ' + str(num_posts))
Related
I'm coding a Discord bot in Lua and I thought it would be fun to implement OpenAI's api somehow, and I've got everything right except I keep getting a 401 error. Here's a portion of my code
coroutine.wrap(function()
local s,e = pcall(function()
local Headers = {
["Authorization"] = "Bearer "..key,
["Content-Type"] = "application/json",
}
local Body = json.encode({
model = "text-davinci-002",
prompt = "Human: ".. table.concat(Args, " ") .. "\n\nAI:",
temperature = 0.9,
max_tokens = 47, --150
top_p = 1,
frequency_penalty = 0.0,
presence_penalty = 0.6,
stop = {" Human:", " AI:"}
})
res,body = coro.request("POST", link, Headers, Body, 5000)
if res == nil then
Message:reply("didnt return anything")
return
end
if res.code < 200 or res.code >= 300 then
Message:reply("Failed to send request: " .. res.reason); return --Always ends up here "Failed to send request: Unauthorized"
end
Message:reply("Request sent successfully!")
end)
end)()
The "key" is the API key I got from the website. I feel like the mistake is simple and stupid but regardless I'm stuck
It's good code, though I'd do some checks on the types before you validate the codes.
Another reason behind this, is some domains may require a proxy rather than a direct connection.
coroutine.resume(coroutine.create(function()
local headers = {
Authorization = "Bearer " .. key,
["Content-Type"] = "application/json",
}
local body = json.encode({
model = "text-davinci-002",
prompt = "Human: " .. table.concat(Args, " ") .. "\n\nAI:",
temperature = 0.9,
max_tokens = 47, --150
top_p = 1,
frequency_penalty = 0.0,
presence_penalty = 0.6,
stop = { " Human:", " AI:" },
})
local success, http_result, http_body = pcall(coro.request, "POST", link, headers, body, 5e3)
if success ~= true then
return error(http_result, 0)
elseif type(http_result) == "table" and type(http_result.code) == "number" and http_result.code < 200 or http_result.code >= 300 then
return Message:reply("Failed to send request: " .. type(http_result.reason) == "string" and http_result.reason or "No reason provided.")
end
return Message:reply("Request sent successfully!")
end))
I'm using the code from here to scrape the tweets of a few users and export is as a .csv: https://towardsdatascience.com/tweepy-for-beginners-24baf21f2c25
I want to ideally get all the tweets of each user, but it seems to be limited to only the most recent 3200 tweets. Here's the exact code I'm using with trump as an example:
ids = ['realDonaldTrump']
def extract_hashtags(hashtag_list):
final_hashtag = ''
for hashtag in hashtag_list:
final_hashtag = final_hashtag + ' ' + hashtag['text']
return final_hashtag.strip()
#from https://towardsdatascience.com/tweepy-for-beginners-24baf21f2c25
class TweetMiner(object):
result_limit = 20
data = []
api = False
twitter_keys = { #redacted }
def __init__(self, keys_dict=twitter_keys, api=api, result_limit = 20):
self.twitter_keys = keys_dict
auth = tw.OAuthHandler(keys_dict['consumer_key'], keys_dict['consumer_secret'])
auth.set_access_token(keys_dict['access_token_key'], keys_dict['access_token_secret'])
self.api = tw.API(auth)
self.twitter_keys = keys_dict
self.result_limit = result_limit
def mine_user_tweets(self, user,
mine_rewteets=False,
max_pages=5):
data = []
last_tweet_id = False
page = 1
while page <= max_pages:
if last_tweet_id:
statuses = self.api.user_timeline(screen_name=user,
count=self.result_limit,
max_id=last_tweet_id - 1,
tweet_mode = 'extended',
include_retweets=True
)
else:
statuses = self.api.user_timeline(screen_name=user,
count=self.result_limit,
tweet_mode = 'extended',
include_retweets=True)
for item in statuses:
mined = {
'tweet_id': item.id,
'name': item.user.name,
'screen_name': item.user.screen_name,
'retweet_count': item.retweet_count,
'text': item.full_text,
'mined_at': datetime.datetime.now(),
'created_at': item.created_at,
#'time_zone': item._json['time_zone'],
'favourite_count': item.favorite_count,
'hashtags': extract_hashtags(item.entities['hashtags']),
#'links': extract_
'status_count': item.user.statuses_count,
'location': item.place,
'source_device': item.source
}
try:
mined['retweet_text'] = item.retweeted_status.full_text
except:
mined['retweet_text'] = 'None'
try:
mined['quote_text'] = item.quoted_status.full_text
mined['quote_screen_name'] = status.quoted_status.user.screen_name
except:
mined['quote_text'] = 'None'
mined['quote_screen_name'] = 'None'
last_tweet_id = item.id
data.append(mined)
page += 1
return data
#result_limit * max_pages is the no of tweets for each id
miner=TweetMiner(result_limit = 460) #200
counter = 0
counter2 = 0
for id in ids:
try:
print("Fetching tweets of " + id+ " now...")
mined_tweets = miner.mine_user_tweets(user= id, max_pages=460) #100
mined_tweets_df= pd.DataFrame(mined_tweets)
counter2 = counter2 +1
#after 40 tries, pause for 15 mins
if counter2%40==0: #5
print("Couldn't fetch, sleeping for 15 mins")
time.sleep(900) #15 minute sleep time
except:
print(id, 'is invalid or locked')
if counter>0:
final_df = pd.concat([final_df, mined_tweets_df], ignore_index = True)
print("Fetched and added!")
else:
final_df = mined_tweets_df
print("Fetched and added!")
counter +=1
print(final_df)
final_df.to_csv('tweets.csv', encoding='UTF-8')
The number of tweets returned should be 460*460 = 211,600 tweets for each user in ids, but it only returns a total of 3200 tweets per id. Is this limit a strict one built into the API, and if so, is there any way to get more than 3200 tweets per user?
This is a limit built into the Twitter API. The user timeline can only return a maximum of 3200 Tweets (in 200 Tweets per “page”). To retrieve more, you would need to use the premium or enterprise full archive search API.
I'm trying to make an web app using linkedin rest api.
I'm following these instructions. I have done step-1.
I have created an application on Linkedin. I got Client ID and Client Secret for that app.
I'm stuck with step-2. How do I get USER_TOKEN and USER_SECRET for my app? Any help would be appreciated.
Try following this. For Python linkedin lib.
consumer = oauth.Consumer(consumer_key,consumer_secret)
client = oauth.Client(consumer)
request_token_url = 'https://api.linkedin.com/uas/oauth/requestToken'
resp, content = client.request(request_token_url, "POST")
if resp['status'] != '200':
raise Exception("Invalid response %s." % resp['status'])
print content
request_token = dict(urlparse.parse_qsl(content))
print "Requesr Token:", "\n"
print "- oauth_token = %s" % request_token['oauth_token'], "\n"
print "- oauth_token_secret = %s" % request_token['oauth_token_secret'], "\n"
authorize_url = 'https://api.linkedin.com/uas/oauth/authorize'
print "Go to the following link in your browser:", "\n"
print "%s?oauth_token=%s" % (authorize_url, request_token['oauth_token']), "\n"
accepted = 'n'
while accepted.lower() == 'n':
accepted = raw_input('Have you authorized me? (y/n) ')
oauth_verifier = raw_input('What is the PIN? ')
access_token_url = 'https://api.linkedin.com/uas/oauth/accessToken'
token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret'])
token.set_verifier(oauth_verifier)
client = oauth.Client(consumer, token)
resp, content = client.request(access_token_url, "POST")
access_token = dict(urlparse.parse_qsl(content))
print "Access Token:", "\n"
print "- oauth_token = %s" % access_token['oauth_token'], "\n"
print "- oauth_token_secret = %s" % access_token['oauth_token_secret']
print "You may now access protected resources using the access tokens above."
I am following this https://dev.twitter.com/oauth/application-only
def get_bearer_token(self):
data = {"grant_type": "client_credentials"}
headers = {"Authorization": "Basic " + self.encoded_cred}
r = requests.post("https://api.twitter.com/oauth2/token",
data=data, headers=headers)
res = r.json()
if res.get('token_type') != 'bearer':
raise Exception("invalid type")
self.access_token = res['access_token']
def search(self, q):
data = {"count": 100, "q": q}
headers = {"Authorization": "Bearer " + self.access_token}
r = requests.get("https://api.twitter.com/1.1/search/tweets.json", data=data, headers=headers)
print r.status_code
print r.text
I was able to get an access_token but the search API call returns 400 with no content. Any idea?
Similar to this https://stackoverflow.com/questions/24102409/twitter-application-only-auth-returning-empty-response but no answer yet
Quick facts, I got this function from http://lua-users.org/wiki/SplitJoin at the very bottom, and am attempting to use it in the Corona SDK, though I doubt that's important.
function string:split(sSeparator, nMax, bRegexp)
assert(sSeparator ~= '')
assert(nMax == nil or nMax >= 1)
local aRecord = {}
if self:len() > 0 then
local bPlain = not bRegexp
nMax = nMax or -1
local nField=1 nStart=1
local nFirst,nLast = self:find(sSeparator, nStart, bPlain)
while nFirst and nMax ~= 0 do
aRecord[nField] = self:sub(nStart, nFirst-1)
nField = nField+1
nStart = nLast+1
nFirst,nLast = self:find(sSeparator, nStart, bPlain)
nMax = nMax-1
end
aRecord[nField] = self:sub(nStart)
end
return aRecord
end
The input: "1316982303 Searching server"
msglist = string.split(msg, ' ')
Gives me the error in the title. Any ideas? I'm fairly certain it's just the function is out of date.
Edit: lots more code
Here's some more from the main.lua file:
multiplayer = pubnub.new({
publish_key = "demo",
subscribe_key = "demo",
secret_key = nil,
ssl = nil, -- ENABLE SSL?
origin = "pubsub.pubnub.com" -- PUBNUB CLOUD ORIGIN
})
multiplayer:subscribe({
channel = "MBPocketChange",
callback = function(msg)
-- MESSAGE RECEIVED!!!
print (msg)
msglist = string.split(msg, ' ')
local recipient = msglist[0] --Get the value
table.remove(msglist, 0) --Remove the value from the table.
local cmdarg = msglist[0]
table.remove(msglist, 0)
arglist = string.split(cmdarg, ',')
local command = arglist[0]
table.remove(arglist, 0)
argCount = 1
while #arglist > 0 do
argname = "arg" .. argCount
_G[argname] = arglist[0]
table.remove(arglist, 0)
argCount = argCount + 1
end
Server.py:
This is the multiplayer server that sends the necessary info to clients.
import sys
import tornado
import os
from Pubnub import Pubnub
## Initiat Class
pubnub = Pubnub( 'demo', 'demo', None, False )
## Subscribe Example
def receive(message) :
test = str(message)
msglist = test.split()
recipient = msglist.pop(0)
msg = msglist.pop(0)
id = msglist.pop(0)
if id != "server":
print id
print msg
commandHandler(msg,id)
return True
def commandHandler(cmd,id):
global needOp
needOp = False
global matchListing
if server is True:
cmdArgList = cmd.split(',')
cmd = cmdArgList.pop(0)
while len(cmdArgList) > 0:
argument = 1
locals()["arg" + str(argument)] = cmdArgList.pop(0)
argument += 1
if cmd == "Seeking":
if needOp != False and needOp != id:
needOp = str(needOp)
id = str(id)
pubnub.publish({
'channel' : 'MBPocketChange',
#Message order is, and should remain:
#----------Recipient, Command,Arguments, Sender
'message' : needOp + " FoundOp," + id + " server"
})
print ("Attempting to match " + id + " with " + needOp + ".")
needOp = False
matchListing[needOp] = id
else:
needOp = id
pubnub.publish({
'channel' : 'MBPocketChange',
#Message order is, and should remain:
#----------Recipient, Command,Arguments, Sender
'message' : id + ' Searching server'
})
print "Finding a match for: " + id
elif cmd == "Confirm":
if matchListing[id] == arg1:
pubnub.publish({
'channel' : 'MBPocketChange',
#Message order is, and should remain:
#----------Recipient, Command,Arguments, Sender
'message' : arg1 + ' FoundCOp,' + id + ' server'
})
matchListing[arg1] = id
else:
pass #Cheater.
elif cmd == "SConfirm":
if matchListing[id] == arg1 and matchListing[arg1] == id:
os.system('python server.py MBPocketChange' + arg1)
#Here, the argument tells both players what room to join.
#The room is created from the first player's ID.
pubnub.publish({
'channel' : 'MBPocketChange',
#Message order is, and should remain:
#----------Recipient, Command,Arguments, Sender
'message' : id + ' GameStart,' + arg1 + ' server'
})
pubnub.publish({
'channel' : 'MBPocketChange',
#Message order is, and should remain:
#----------Recipient, Command,Arguments, Sender
'message' : arg1 + ' GameStart,' + arg1 + ' server'
})
else:
pass #hax
else:
pass
def connected():
pass
try:
channel = sys.argv[1]
server = False
print("Listening for messages on '%s' channel..." % channel)
pubnub.subscribe({
'channel' : channel,
'connect' : connected,
'callback' : receive
})
except:
channel = "MBPocketChange"
server = True
print("Listening for messages on '%s' channel..." % channel)
pubnub.subscribe({
'channel' : channel,
'connect' : connected,
'callback' : receive
})
tornado.ioloop.IOLoop.instance().start()
This error message happens if you run:
string.split(nil, ' ')
Double check your inputs to be sure you are really passing in a string.
Edit: in particular, msglist[0] is not the first position in the array in Lua, Lua arrays start at 1.
As an aside, this function was written when the intention that you'd use the colon syntactic sugar, e.g.
msglist=msg:split(' ')