NLTK Word Extraction - machine-learning

So I am trying to read a txt file, process it by taking out the stop words, and then output that result into a new file. However, I keep getting the following error:
TypeError: expected a string or other character buffer object
This is my code:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
f=open('tess1.txt','rU')
stop_words = set(stopwords.words('english'))
raw=f.read()
word_tokens = word_tokenize(raw)
text = nltk.Text(word_tokens)
filtered_sentence = [w for w in word_tokens if not w in stop_words]
if w not in stop_words:
filtered_sentence.append(w)
K = open("tess12.txt", "w")
K.write(filtered_sentence)
K.close()
print(filtered_sentence)

The solution's to write a string inside the buffer:
K.write(str(filtered_sentence))

Related

unable take user input from python file through jenkins pipeline

import os
import sys
import json
import requests
import isodate
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
bufsize=1024
session = requests.Session()
session.trust_env=False
path = os.getcwd()
print(path)
format(os.getcwd())
os.chdir('/raj')
file_name = pd.read_excel('repos_desc12_p61qr.xlsx')
file2 = open("commit_details.csv", "w+", buffering=bufsize)
file3 = open("merge_details.csv", "w+", buffering=bufsize)
hostname = "https://bsp-os.git.visteon.com"
private = "-rBpd_x15GRTmFkk_T9H"
def excel_parser(meExcel):
dict_format = meExcel.to_dict(orient='record')
#print(dict_format.columns.ravel())
#dict_format = json.loads(dict_format)
#print(dict_format)
for repo_detail in dict_format:
parsed_repo_path = repo_detail["REPO"]
#print(parsed_repo_path)
parsed_branch_name = repo_detail["BranchName"]
#print(parsed_branch_name)
parsed_duration = repo_detail["StartDate"]
while am trying to run and take input through pipeline , the " EOFError: EOF when reading a line " is occuring i tried error exception but not working please help to get the input from python file through jenkins pipeline

Reading Pointcloud from .csv to ROS PointCloud2

I have a .csv file which has /raw_points rostopic, and i'm trying to convert that file into PointCloud2 data(http://docs.ros.org/en/api/sensor_msgs/html/msg/PointCloud2.html).
import csv
import sys
csv.field_size_limit(sys.maxsize)
file = open("points_raw.csv")
csvreader = csv.reader(file)
header = next(csvreader)
print(header)
This is my header:
['Time', 'header.seq', 'header.stamp.secs', 'header.stamp.nsecs', 'header.frame_id', 'height', 'width', 'fields', 'is_bigendian', 'point_step', 'row_step', 'data', 'is_dense']
These information match the CloudPoint2, but I'm not sure how to convert it to this type.
You need to simply iterate over each row and for every row store the relative fields in a PointCloud2 message and publish it out. For example:
import rospy
import csv
from sensor_msgs.msg import PointCloud2
def main():
#Setup ros param/init here
some_pub = rospy.Publisher('output_topic', PointCloud2, queue_size=10)
with open('some_file.csv', 'r') as f:
reader = csv.reader(f)
for line in reader:
split_line = line.split(',')
new_msg = PointCloud2()
new_msg.header.seq = split_line[1]
new_msg.header.stamp.secs = split_line[2]
#Iterate over the rest
new_msg.data = split_line[11]
new_msg.is_dense = split_line[12]
some_pub.publish(new_msg)
rospy.Rate(10).sleep() #Sleep at 10Hz

AttributeError: 'list' object has no attribute 'tolist'

It's a two part question,
import face_recognition
import os
import json
loadarr=[]
encodearr=[]
for i in range(0, 4):
loadarr.append(face_recognition.load_image_file( "brad"+str(i+1)+".jpg"))
encodearr.append(face_recognition.face_encodings(loadarr[i])[0])
encodearr = encodearr.tolist()
# print(encodearr)
encodedDic = {"des": encodearr}
with open("sample.json", "w") as outfile:
json.dump(encodedDic,outfile)
When I tried to convert the list encodearr as value of the key "des" (without .tolist()) it shows
TypeError: Object of type ndarray is not JSON serializable .Then I added .tolist() to encode arr as show. it shows AttributeError: 'list' object has no attribute 'tolist', brad1 to brad5 are the jpg files in the directory.
I did a workaround using numpy.
import face_recognition
import os
import json
import numpy as np
encodearr=[]
for i in range(0, 4):
load=face_recognition.load_image_file( "brad"+str(i+1)+".jpg")
encodearr.append(face_recognition.face_encodings(loadarr)[0])
reshapped_array = np.reshape(encodearr,(total_images,128) //each image is an array consisting 128 images
encodedDic = {"des": reshapped_array }
with open("sample.json", "w") as outfile:
json.dump(encodedDic,outfile)

How to convert a text file by word2vec using python

I am beginner of python language,natural language processing,deep learning,neural networks.I want to execute a program which convert text file into vector by using word2vec in python..someone please help me
import math
import nltk file = "/home/stephy/Demo/textfile.txt"
import numpy as np
def loadGloveModel(gloveFile):
with open(gloveFile, encoding="utf8" ) as f:
content = f.readlines()
model = {} for line in content:
splitLine = line.split()
word = splitLine[0]
embedding = np.array([float(val) for val in splitLine[1:]])
model[word] = embedding
print ("Done.",len(model)," words loaded!")
return model
model= loadGloveModel(file)
print (model['file'])

Write Twitter Frequency analysis to a CSV using python

How do I write the output of my code to a csv?
Here is what I'm trying, the frequency analysis works, but I can't get the csv to write. Pretty new to python, so I am sure that I am doing something wrong.
# This Python file uses the following encoding: utf-8
import os, sys
import re
import csv
filename = 'TweetsCSV_ORIGINAL.txt'
word_list = re.split('\s+', file(filename).read().lower())
print 'Words in text:', len(word_list)
freq_dic = {}
punctuation = re.compile(r'[.?!,":;]')
for word in word_list:
word = punctuation.sub("", word)
try:
freq_dic[word] += 1
except:
freq_dic[word] = 1
print 'Unique words:', len(freq_dic)
freq_list = freq_dic.items()
freq_list.sort()
for word, freq in freq_list:
print word, freq
#write to CSV
res = [word, freq]
csvfile = "tweetfreq.csv"
#Assuming res is a flat list
with open(csvfile, "w") as output:
writer = csv.writer(output, lineterminator='\n')
for val in res:
writer.writerow([val])
This snippet will append a line to the end of your CSV file.
with open('tweetfreq.csv', 'a') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow([word,freq])

Resources