Write Twitter Frequency analysis to a CSV using python

Write Twitter Frequency analysis to a CSV using python - twitter

How do I write the output of my code to a csv?
Here is what I'm trying, the frequency analysis works, but I can't get the csv to write. Pretty new to python, so I am sure that I am doing something wrong.
# This Python file uses the following encoding: utf-8
import os, sys
import re
import csv
filename = 'TweetsCSV_ORIGINAL.txt'
word_list = re.split('\s+', file(filename).read().lower())
print 'Words in text:', len(word_list)
freq_dic = {}
punctuation = re.compile(r'[.?!,":;]')
for word in word_list:
word = punctuation.sub("", word)
try:
freq_dic[word] += 1
except:
freq_dic[word] = 1
print 'Unique words:', len(freq_dic)
freq_list = freq_dic.items()
freq_list.sort()
for word, freq in freq_list:
print word, freq
#write to CSV
res = [word, freq]
csvfile = "tweetfreq.csv"
#Assuming res is a flat list
with open(csvfile, "w") as output:
writer = csv.writer(output, lineterminator='\n')
for val in res:
writer.writerow([val])

This snippet will append a line to the end of your CSV file.
with open('tweetfreq.csv', 'a') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow([word,freq])

Related

ChildProcess close with all studio code 1

I was trying to deploy ml model using node_js with help of ChildProcess package ,while running __predict(), it is taking too long and end with code_1 error.
Here I share all related code to decode the issue :
Model python code -->
import keras
import time
start = time.time()
encoder = keras.models.load_model('enc', compile = False)
decoder = keras.models.load_model('dec', compile = False)
import numpy as np
from flask import Flask, request, jsonify , render_template
import tensorflow as tf
import pickle
import string
import re
from keras_preprocessing.sequence import pad_sequences
def initialize_hidden_state():
return tf.zeros((1, 1024))
eng_tokenizer , hin_tokenizer = pickle.load( open('tokenizer.pkl','rb'))
def clean(text):
text = text.lower()
special_char = set(string.punctuation+'।') # Set of all special characters
# Remove all the special characters
text = ''.join(word for word in text if word not in special_char)
seq = eng_tokenizer.texts_to_sequences([text])
seq = pad_sequences(seq, maxlen=23, padding='post')
return seq
def __predict(data):
# Get the data from the POST request.
#data = request.get_json(force=True)
clean_input = clean(data)
# Make prediction using model loaded from disk as per the data.
hidden_enc = initialize_hidden_state()
enc_out, enc_hidden = encoder(clean_input, hidden_enc)
result = ''
dec_hidden = enc_hidden
dec_input = tf.expand_dims(hin_tokenizer.texts_to_sequences(['<Start>'])[0], 0)
#------------------------------------------------------------------
for t in range(25):
predictions, dec_hidden, attention_weights = decoder(dec_input, dec_hidden, enc_out)
predicted_id = tf.argmax(predictions[0]).numpy()
x = hin_tokenizer.sequences_to_texts([[predicted_id]])[0]
if x == 'end':
break
result += x + ' '
# the predicted ID is fed back into the model
dec_input = tf.expand_dims([predicted_id], 0)
CLEANR = re.compile(r"([A-Za-z])", re.DOTALL)
result = re.sub(CLEANR, '', result)
return result
# import json
# with open('data.json', 'r') as openfile:
# json_object = json.load(openfile).get('data')
data =__predict("file")
end= time.time()
# print(start-end)
data1 = data +"abcd"
print(data1)
# print("abcd")
# dictionary = {
# "data": data,
# }
# json_object = json.dumps(dictionary, indent=2)
# with open("result.json", "w") as outfile:
# outfile.write(json_object)
When I type print("abcd") or print(start-end), it is giving result ,ending with code_0. But when I type print("data") not giving any result and ending with code_1 .
Here is the childProcess code -->
app.get('/', (req, res) => {
let dataToSend
let largeDataSet = []
// spawn new child process to call the python script
const python = spawn('python', ['app.py'])
// console.log(python);
// collect data from script
python.stdout.on('data', function (data) {
console.log('Pipe data from python script ...')
//dataToSend = data;
largeDataSet.push(data)
})
// in close event we are sure that stream is from child process is closed
python.on('close', (code) => {
console.log(`child process close all stdio with code ${code}`)
// send data to browser
// largeDataSet = []
console.log(largeDataSet.join(''));
res.send(largeDataSet.join(''))
})
})
Here is the error --->
child process close all stdio with code 1
Pls help , I tried to understand the problem but failed severely even in understanding it.
Thanks in advance !!!

Reading Pointcloud from .csv to ROS PointCloud2

I have a .csv file which has /raw_points rostopic, and i'm trying to convert that file into PointCloud2 data(http://docs.ros.org/en/api/sensor_msgs/html/msg/PointCloud2.html).
import csv
import sys
csv.field_size_limit(sys.maxsize)
file = open("points_raw.csv")
csvreader = csv.reader(file)
header = next(csvreader)
print(header)
This is my header:
['Time', 'header.seq', 'header.stamp.secs', 'header.stamp.nsecs', 'header.frame_id', 'height', 'width', 'fields', 'is_bigendian', 'point_step', 'row_step', 'data', 'is_dense']
These information match the CloudPoint2, but I'm not sure how to convert it to this type.

You need to simply iterate over each row and for every row store the relative fields in a PointCloud2 message and publish it out. For example:
import rospy
import csv
from sensor_msgs.msg import PointCloud2
def main():
#Setup ros param/init here
some_pub = rospy.Publisher('output_topic', PointCloud2, queue_size=10)
with open('some_file.csv', 'r') as f:
reader = csv.reader(f)
for line in reader:
split_line = line.split(',')
new_msg = PointCloud2()
new_msg.header.seq = split_line[1]
new_msg.header.stamp.secs = split_line[2]
#Iterate over the rest
new_msg.data = split_line[11]
new_msg.is_dense = split_line[12]
some_pub.publish(new_msg)
rospy.Rate(10).sleep() #Sleep at 10Hz

can't retrieving files from pubmed using biopython

I am using this script to get data about covid-19 from pubmed
from Bio import Entrez
def search(query):
Entrez.email = 'your.email#example.com'
handle = Entrez.esearch(db='pubmed',
sort='relevance',
retmax='20',
retmode='xml',
term=query)
results = Entrez.read(handle)
return results
def fetch_details(id_list):
ids = ','.join(id_list)
Entrez.email = 'your.email#example.com'
handle = Entrez.efetch(db='pubmed',
retmode='xml',
id=ids)
results = Entrez.read(handle)
return results
if __name__ == '__main__':
results = search('covid-19')
id_list = results['IdList']
papers = fetch_details(id_list)
for i, paper in enumerate(papers['PubmedArticle']):
print("{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))
I get results in console but what I want is to automatically download files like XML files or text files of articles, any suggestions please on how to do that I googled it but nothing found

You can add this code at the end to save to a JSON file
#write to file
import json
with open('file.json', 'w') as json_file:
json.dump(papers, json_file)

h5py reading raw data with escape characters

Hi I want to read the hdf5 file data as it is written
But when I read it with the following code I get the following output
COde
hf = h5py.File('Json.h5', 'r')
data_read = hf.get("BinaryData_metadata")
rmdwrite = open("Test.json", "w")
rmdwrite.write(str(np.array(data_read)))
rmdwrite.close()
hf.close()
Output
[b'{\n\t"TestReport": {\n\t\t"TestName": "XYZ",\n\t\t"Description"................
How to get the exact output with the same formatting in my output file?
When I print with this
Data_arr = str(np.array(data_read))
Data_arr = repr(Data_arr)
I get
'[b\'{\\n\\t"TestReport": {\\n\\t\\t"Te................
OKey this is how I am writing the data via C++
DataSpace dataspace(1, dimsf); //Creating Dataspace
StrType datatype(PredType::C_S1); //Creating Datatype of type char
datatype.setOrder(order); //Data Store Order
datatype.setSize(file_datastring.length()); //Datalength
datatype.setCset(H5T_CSET_UTF8);
DataSet dataset = Hdf5::fileObject.createDataSet(WriteDataSet, datatype, dataspace); //Create dataset
dataset.write(file_datastring, datatype); //Write to dataset
is there something here which is appending that extra \

The solution which I found was
hf = h5py.File(H5FileName, 'r')
FileObj = open(OutFileName, "w")
hf.get(H5DataSetName).value.tofile(FileObj)
FileObj.close()
hf.close()
This works perfectly
Regards.
Siddharth

NLTK Word Extraction

So I am trying to read a txt file, process it by taking out the stop words, and then output that result into a new file. However, I keep getting the following error:
TypeError: expected a string or other character buffer object
This is my code:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
f=open('tess1.txt','rU')
stop_words = set(stopwords.words('english'))
raw=f.read()
word_tokens = word_tokenize(raw)
text = nltk.Text(word_tokens)
filtered_sentence = [w for w in word_tokens if not w in stop_words]
if w not in stop_words:
filtered_sentence.append(w)
K = open("tess12.txt", "w")
K.write(filtered_sentence)
K.close()
print(filtered_sentence)

The solution's to write a string inside the buffer:
K.write(str(filtered_sentence))

Develop Reference

ios ruby-on-rails asp.net-mvc docker delphi jenkins grails google-sheets machine-learning dart

Write Twitter Frequency analysis to a CSV using python - twitter

This snippet will append a line to the end of your CSV file. with open('tweetfreq.csv', 'a') as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerow([word,freq])

Related

ChildProcess close with all studio code 1

Reading Pointcloud from .csv to ROS PointCloud2

can't retrieving files from pubmed using biopython

h5py reading raw data with escape characters

NLTK Word Extraction

Categories

Resources