can't retrieving files from pubmed using biopython - biopython

I am using this script to get data about covid-19 from pubmed
from Bio import Entrez
def search(query):
Entrez.email = 'your.email#example.com'
handle = Entrez.esearch(db='pubmed',
sort='relevance',
retmax='20',
retmode='xml',
term=query)
results = Entrez.read(handle)
return results
def fetch_details(id_list):
ids = ','.join(id_list)
Entrez.email = 'your.email#example.com'
handle = Entrez.efetch(db='pubmed',
retmode='xml',
id=ids)
results = Entrez.read(handle)
return results
if __name__ == '__main__':
results = search('covid-19')
id_list = results['IdList']
papers = fetch_details(id_list)
for i, paper in enumerate(papers['PubmedArticle']):
print("{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))
I get results in console but what I want is to automatically download files like XML files or text files of articles, any suggestions please on how to do that I googled it but nothing found

You can add this code at the end to save to a JSON file
#write to file
import json
with open('file.json', 'w') as json_file:
json.dump(papers, json_file)

Related

ChildProcess close with all studio code 1

I was trying to deploy ml model using node_js with help of ChildProcess package ,while running __predict(), it is taking too long and end with code_1 error.
Here I share all related code to decode the issue :
Model python code -->
import keras
import time
start = time.time()
encoder = keras.models.load_model('enc', compile = False)
decoder = keras.models.load_model('dec', compile = False)
import numpy as np
from flask import Flask, request, jsonify , render_template
import tensorflow as tf
import pickle
import string
import re
from keras_preprocessing.sequence import pad_sequences
def initialize_hidden_state():
return tf.zeros((1, 1024))
eng_tokenizer , hin_tokenizer = pickle.load( open('tokenizer.pkl','rb'))
def clean(text):
text = text.lower()
special_char = set(string.punctuation+'ред') # Set of all special characters
# Remove all the special characters
text = ''.join(word for word in text if word not in special_char)
seq = eng_tokenizer.texts_to_sequences([text])
seq = pad_sequences(seq, maxlen=23, padding='post')
return seq
def __predict(data):
# Get the data from the POST request.
#data = request.get_json(force=True)
clean_input = clean(data)
# Make prediction using model loaded from disk as per the data.
hidden_enc = initialize_hidden_state()
enc_out, enc_hidden = encoder(clean_input, hidden_enc)
result = ''
dec_hidden = enc_hidden
dec_input = tf.expand_dims(hin_tokenizer.texts_to_sequences(['<Start>'])[0], 0)
#------------------------------------------------------------------
for t in range(25):
predictions, dec_hidden, attention_weights = decoder(dec_input, dec_hidden, enc_out)
predicted_id = tf.argmax(predictions[0]).numpy()
x = hin_tokenizer.sequences_to_texts([[predicted_id]])[0]
if x == 'end':
break
result += x + ' '
# the predicted ID is fed back into the model
dec_input = tf.expand_dims([predicted_id], 0)
CLEANR = re.compile(r"([A-Za-z])", re.DOTALL)
result = re.sub(CLEANR, '', result)
return result
# import json
# with open('data.json', 'r') as openfile:
# json_object = json.load(openfile).get('data')
data =__predict("file")
end= time.time()
# print(start-end)
data1 = data +"abcd"
print(data1)
# print("abcd")
# dictionary = {
# "data": data,
# }
# json_object = json.dumps(dictionary, indent=2)
# with open("result.json", "w") as outfile:
# outfile.write(json_object)
When I type print("abcd") or print(start-end), it is giving result ,ending with code_0. But when I type print("data") not giving any result and ending with code_1 .
Here is the childProcess code -->
app.get('/', (req, res) => {
let dataToSend
let largeDataSet = []
// spawn new child process to call the python script
const python = spawn('python', ['app.py'])
// console.log(python);
// collect data from script
python.stdout.on('data', function (data) {
console.log('Pipe data from python script ...')
//dataToSend = data;
largeDataSet.push(data)
})
// in close event we are sure that stream is from child process is closed
python.on('close', (code) => {
console.log(`child process close all stdio with code ${code}`)
// send data to browser
// largeDataSet = []
console.log(largeDataSet.join(''));
res.send(largeDataSet.join(''))
})
})
Here is the error --->
child process close all stdio with code 1
Pls help , I tried to understand the problem but failed severely even in understanding it.
Thanks in advance !!!

Outcput of Chi2 is showing as in dataframe fromat:

I am trying loop the ChiSquare test and outcome is not shown as required, that is in Dataframe.
All columns are coming one row..
Please help
# Import the function
from scipy.stats import chi2_contingency
chi2_check = []
for i in df_clean.select_dtypes(['object']):
if chi2_contingency(pd.crosstab(df_clean['Final_Comments'], df_clean[i]))[1] < 0.05:
chi2_check.append('Reject Null Hypothesis')
else:
chi2_check.append('Fail to Reject Null Hypothesis')
res = pd.DataFrame(data = [df_clean.select_dtypes(['object']), chi2_check]
).T
res.columns = ['Column', 'Hypothesis']
print(res)
res.columns

Reading Pointcloud from .csv to ROS PointCloud2

I have a .csv file which has /raw_points rostopic, and i'm trying to convert that file into PointCloud2 data(http://docs.ros.org/en/api/sensor_msgs/html/msg/PointCloud2.html).
import csv
import sys
csv.field_size_limit(sys.maxsize)
file = open("points_raw.csv")
csvreader = csv.reader(file)
header = next(csvreader)
print(header)
This is my header:
['Time', 'header.seq', 'header.stamp.secs', 'header.stamp.nsecs', 'header.frame_id', 'height', 'width', 'fields', 'is_bigendian', 'point_step', 'row_step', 'data', 'is_dense']
These information match the CloudPoint2, but I'm not sure how to convert it to this type.
You need to simply iterate over each row and for every row store the relative fields in a PointCloud2 message and publish it out. For example:
import rospy
import csv
from sensor_msgs.msg import PointCloud2
def main():
#Setup ros param/init here
some_pub = rospy.Publisher('output_topic', PointCloud2, queue_size=10)
with open('some_file.csv', 'r') as f:
reader = csv.reader(f)
for line in reader:
split_line = line.split(',')
new_msg = PointCloud2()
new_msg.header.seq = split_line[1]
new_msg.header.stamp.secs = split_line[2]
#Iterate over the rest
new_msg.data = split_line[11]
new_msg.is_dense = split_line[12]
some_pub.publish(new_msg)
rospy.Rate(10).sleep() #Sleep at 10Hz

Write Twitter Frequency analysis to a CSV using python

How do I write the output of my code to a csv?
Here is what I'm trying, the frequency analysis works, but I can't get the csv to write. Pretty new to python, so I am sure that I am doing something wrong.
# This Python file uses the following encoding: utf-8
import os, sys
import re
import csv
filename = 'TweetsCSV_ORIGINAL.txt'
word_list = re.split('\s+', file(filename).read().lower())
print 'Words in text:', len(word_list)
freq_dic = {}
punctuation = re.compile(r'[.?!,":;]')
for word in word_list:
word = punctuation.sub("", word)
try:
freq_dic[word] += 1
except:
freq_dic[word] = 1
print 'Unique words:', len(freq_dic)
freq_list = freq_dic.items()
freq_list.sort()
for word, freq in freq_list:
print word, freq
#write to CSV
res = [word, freq]
csvfile = "tweetfreq.csv"
#Assuming res is a flat list
with open(csvfile, "w") as output:
writer = csv.writer(output, lineterminator='\n')
for val in res:
writer.writerow([val])
This snippet will append a line to the end of your CSV file.
with open('tweetfreq.csv', 'a') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow([word,freq])

Getting {"errors":[{"message":"Bad Authentication data","code":215}]} using python-twitter API v1.0

I am using python-twitter API v1.0. It says that it works with v1.1 of the twitter API.
This is my code :
import json
import urllib2
import urllib
import csv
file1 = open("somez.csv", "wb")
fname="tw1.txt"
file=open(fname,"r")
ins = open( "tw1.txt", "r" )
array = []
for line in ins:
array.append( line )
s = 'https://api.twitter.com/1.1/statuses/show/' + line[:-1] + '.json'
print s
try:
data=urllib2.urlopen(s)
except:
print "Not Found"
continue
print data
json_format=json.load(data)
js=json_format
print line[:-1]
print js[('user')]['id']
print js[('user')]['created_at']
print js['retweet_count']
print js['text']
# js = js.decode('utf-8')
one = line[:-1].encode('utf-8')
thr = js['user']['id']
two = js['user']['created_at'].encode('utf-8')
four = js['retweet_count']
five = js['text'].encode('utf-8')
rw = [one,two,thr,four,five];
spamWriter = csv.writer(file1 , delimiter=',')
spamWriter.writerow(rw)
file1.close()
I am not able to retrieve any data. It is saying "Not Found". When I open one of the URLs, I get this error:
{"errors":[{"message":"Bad Authentication data","code":215}]}
Can anyone suggest what the problem might be?

Resources