Log file ruby testing - ruby-on-rails

I have this ruby script and I have a problem on the line 57..
please let me know where is the error
#!/usr/bin/ruby
class CommonLog
# init method takes log filename, computes frequency counts
# for ips, urls & statuses
def initialize(log)
f = File.open(log)
#filename = log
#filesize = f.size
#ip_counts = Hash.new(0)
#url_counts = Hash.new(0)
#status_counts = Hash.new(0)
#total_records = 0
f.readlines.each do |line|
tokens = line.split(' ')
ip = tokens[0]
url = tokens[-4]
status = tokens[-2]
#ip_counts[ip] += 1
#url_counts[url] += 1
#status_counts[status] += 1
#total_records += 1
end
f.close
end
# displays filename and bytesize
def file_info
"Filename: #{#filename}, Bytes Transferred: #{#filesize}"
end
# draws ip histogram
def ip_hist
#ip_counts.each do |ip, freq|
puts "#{ip}: #{'*'*freq}"
end
puts file_info
end
# draws url histogram
def url_hist
#url_counts.each do |url,freq|
puts "#{url} #{'*'*freq}"
end
puts file_info
end
# draws list of statuses
def status_list
msg = ""
sorted_status_codes = #status_counts.keys.sort
sorted_status_codes do |code|
count = #status_counts[code]
pct = ((count.to_f/#total_records)*100).to_i
msg += "#{code}: #{percentage}%\n"
end
puts msg
puts file_info
end
end
def test
log = CommonLog.new('**TEST LOG FILE PATH**')
log.ip_histogram
log.url_histogram
log.status_codes
end
test()
Thank you for your help in advance.. your help would be much appreciated..
The log file is on the server, so I removed the path because you won't be able to reach it ..
Here are the message I get back after I run the file
/1.rb:57:in `status_list': undefined method `keys' for nil:NilClass (NoMethodError)
from ./1.rb:73:in `test'
from ./1.rb:75:in `<main>'

Related

Ruby on Rails Encoding::UndefinedConversionError ("\xF8" from ASCII-8BIT to UTF-8)

I saw a bunch of question with a similar topic but I couldn't find a solution to my problem. Hopefully someone can help.
I have a Ruby on Rails app. In this app, I have some base64 data that I want to decode and write in a file. When I have a small script that I call through ruby myFile.rb, the program behaves as expeted. However when I run the same code with rails c. I have the following error:
Traceback (most recent call last):
7: from (irb):1:in `<main>'
6: from app/models/node.rb:1:in `<main>'
5: from app/models/node_manager.rb:317:in `<main>'
4: from app/models/node_manager.rb:255:in `convert_base64_to_file'
3: from app/models/node_manager.rb:255:in `open'
2: from app/models/node_manager.rb:255:in `block in convert_base64_to_file'
1: from app/models/node_manager.rb:255:in `write'
Encoding::UndefinedConversionError ("\xF8" from ASCII-8BIT to UTF-8)
Here are my files:
class I wrote in node_manager.rb
class NodeManager
class MacaroonInterceptor < GRPC::ClientInterceptor
attr_reader :macaroon
def initialize(macaroon)
#macaroon = macaroon
super
end
def request_response(request:, call:, method:, metadata:)
metadata['macaroon'] = macaroon
yield
end
def server_streamer(request:, call:, method:, metadata:)
metadata['macaroon'] = macaroon
yield
end
end
def initialize(tls_path, macaroon_path, node_ip)
#tls_path = tls_path
#macaroon_path = macaroon_path
#node_ip = node_ip
#node = connect(#tls_path, #macaroon_path, #node_ip)
end
def connect(tls_path, macaroon_path, node_ip)
certificate = File.read(File.expand_path(tls_path))
credentials = GRPC::Core::ChannelCredentials.new(certificate)
macaroon_binary = File.read(File.expand_path(macaroon_path))
macaroon = macaroon_binary.each_byte.map { |b| b.to_s(16).rjust(2,'0') }.join
if stub = Lnrpc::Lightning::Stub.new(
node_ip,
credentials,
interceptors: [NodeManager::MacaroonInterceptor.new(macaroon)],
channel_args: {"grpc.max_receive_message_length" => 1024 * 1024 * 50}
)
return stub
else
puts 'error'
raise "Could not connect to the node"
end
end
def get_info()
begin
node_info = #node.get_info(Lnrpc::GetInfoRequest.new())
raise LoadError if !node_info
return #node.get_info(Lnrpc::GetInfoRequest.new())
rescue
puts "could not connect to the node"
end
end
# Class methods
def self.convert_base64_to_file(directory: nil, file_name:, base64data:)
if directory
file_path = [directory, file_name].join('/')
else
file_path = file_name
end
file = File.open("file_name", 'w') {|f| f.write(Base64.decode64(base64data)) }
return file_path
end
end
script I run - works when I call ruby node_manager.rb and not when I run it with rails c
admin_tls_base64 = "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNKekNDQWMyZ0F3SUJBZ0lSQUlWYzdlNHkxVGlpcHpmNWtHd09BUkF3Q2dZSUtvWkl6ajBFQXdJd01URWYKTUIwR0ExVUVDaE1XYkc1a0lHRjFkRzluWlc1bGNtRjBaV1FnWTJWeWRERU9NQXdHQTFVRUF4TUZZV3hwWTJVdwpIaGNOTWpFd056QTRNVEF5TmpFMFdoY05Nakl3T1RBeU1UQXlOakUwV2pBeE1SOHdIUVlEVlFRS0V4WnNibVFnCllYVjBiMmRsYm1WeVlYUmxaQ0JqWlhKME1RNHdEQVlEVlFRREV3VmhiR2xqWlRCWk1CTUdCeXFHU000OUFnRUcKQ0NxR1NNNDlBd0VIQTBJQUJMQ3B2eDdZb1ptZURIQVdjN0ozdmZTTUhPNEJhRnpMV0hJUXhJM2swaVJRS2xVVQpaSkdUcnFCQm1kU3AzcnNRMWsvSmtqOVlLZEVRMHVmcTdNeDBzcEdqZ2NVd2djSXdEZ1lEVlIwUEFRSC9CQVFECkFnS2tNQk1HQTFVZEpRUU1NQW9HQ0NzR0FRVUZCd01CTUE4R0ExVWRFd0VCL3dRRk1BTUJBZjh3SFFZRFZSME8KQkJZRUZJWlUrUHlTYVB0eFdOV0RqckJ5SUZuVXdTL0JNR3NHQTFVZEVRUmtNR0tDQldGc2FXTmxnZ2xzYjJOaApiR2h2YzNTQ0JXRnNhV05sZ2c1d2IyeGhjaTF1TWkxaGJHbGpaWUlFZFc1cGVJSUtkVzVwZUhCaFkydGxkSUlIClluVm1ZMjl1Ym9jRWZ3QUFBWWNRQUFBQUFBQUFBQUFBQUFBQUFBQUFBWWNFckJJQUFqQUtCZ2dxaGtqT1BRUUQKQWdOSUFEQkZBaUVBbDhnOERONStpRTA5c1ZZS2RoblExUmZWeWhhaTdLMG9xK2puV3NyNHpVNENJR2t6Nko4eQprTk4rSTRHT040UGNIWGN1QjRjWEt1aDJxVWFvd2IvZVBxV3YKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo="
admin_mac_base64= "AgEDbG5kAvgBAwoQ4MUVu75H5pCIGbg7qzbRfBIBMBoWCgdhZGRyZXNzEgRyZWFkEgV3cml0ZRoTCgRpbmZvEgRyZWFkEgV3cml0ZRoXCghpbnZvaWNlcxIEcmVhZBIFd3JpdGUaIQoIbWFjYXJvb24SCGdlbmVyYXRlEgRyZWFkEgV3cml0ZRoWCgdtZXNzYWdlEgRyZWFkEgV3cml0ZRoXCghvZmZjaGFpbhIEcmVhZBIFd3JpdGUaFgoHb25jaGFpbhIEcmVhZBIFd3JpdGUaFAoFcGVlcnMSBHJlYWQSBXdyaXRlGhgKBnNpZ25lchIIZ2VuZXJhdGUSBHJlYWQAAAYgzSSZTiO2yEt9aP+zP95czvfNNPgQXhyLNto2X1onfqQ="
admin_tls = NodeManager.convert_base64_to_file(file_name: "test_tls.cert", base64data: admin_tls_base64)
admin_mac = NodeManager.convert_base64_to_file(file_name: "test.macaroon", base64data: admin_mac_base64)
admin_node = NodeManager.new(admin_tls, admin_mac, '127.0.0.1:10001')
puts admin_node.get_info
Thank you for your help,
A simple solution was to give the File.write function the 'wb' rights
def self.convert_base64_to_file(directory: nil, file_name:, base64data:)
if directory
file_path = [directory, file_name].join('/')
else
file_path = file_name
end
file = File.open("file_name", 'w') {|f| f.write(Base64.decode64(base64data)) }
return file_path
end
I hope that'll help someone!

Scraping data in rails using thread

I am doing scraping to fetch the data from the website to my database in rails.I am fetching the 32000 record with this script there isn't any issue but i want to fetch the data faster so i apply the thread in my rake task but then there is a issue while running the rake task some of the data is fetching then the rake task getting aborted.
I am not aware of what to do task if any help can be done i am really grateful . Here is my rake task code for the scraping.
task scratch_to_database: :environment do
time2 = Time.now
puts "Current Time : " + time2.inspect
client = Mechanize.new
giftcard_types=Giftcard.card_types
find_all_merchant=Merchant.all.pluck(:id, :name).to_h
#first index page of the merchant
index_page = client.get('https://www.twitter.com//')
document_page_index = Nokogiri::HTML::Document.parse(index_page.body)
#set all merchant is deteled true
# set_merchant_as_deleted = Merchant.update_all(is_deleted: true) if Merchant.exists?
# set_giftcard_as_deleted = Giftcard.update_all(is_deleted: true) if Giftcard.exists?
update_all_merchant_record = []
update_all_giftcard_record = []
threads = []
#Merchant inner page pagination loop
page_no_merchant = document_page_index.css('.pagination.pagination-centered ul li:nth-last-child(2) a').text.to_i
1.upto(page_no_merchant) do |page_number|
threads << Thread.new do
client.get("https://www.twitter.com/buy-gift-cards?page=#{page_number}") do |page|
document = Nokogiri::HTML::Document.parse(page.body)
#Generate the name of the merchant and image of the merchant loop
document.css('.product-source').each do |item|
merchant_name= item.children.css('.name').text.gsub("Gift Cards", "")
href = item.css('a').first.attr('href')
image_url=item.children.css('.img img').attr('data-src').text.strip
#image url to parse the url of the image
image_url=URI.parse(image_url)
#saving the record of the merchant
# #merchant=Merchant.create(name: merchant_name , image_url:image_url)
if find_all_merchant.has_value?(merchant_name)
puts "this if"
merchant_id=find_all_merchant.key(merchant_name)
puts merchant_id
else
#merchant= Merchant.create(name: merchant_name , image_url:image_url)
update_all_merchant_record << #merchant.id
merchant_id=#merchant.id
end
# #merchant.update_attribute(:is_deleted, false)
#set all giftcard is deteled true
# set_giftcard_as_deleted = Giftcard.where(merchant_id: #merchant.id).update_all(is_deleted: true) if Giftcard.where(merchant_id: #merchant.id).exists?
#first page of the giftcard details page
first_page = client.get("https://www.twitter.com#{href}")
document_page = Nokogiri::HTML::Document.parse(first_page.body)
page_no = document_page.css('.pagination.pagination-centered ul li:nth-last-child(2) a').text.to_i
hrefextra =document_page.css('.dropdown-menu li a').last.attr('href')
#generate the giftcard details loop with the pagination
# update_all_record = []
find_all_giftcard=Giftcard.where(merchant_id:merchant_id).pluck(:row_id)
puts merchant_name
# puts find_all_giftcard.inspect
card_page = client.get("https://www.twitter.com#{hrefextra}")
document_page = Nokogiri::HTML::Document.parse(card_page.body)
#table details to generate the details of the giftcard with price ,per_off and final value of the giftcard
document_page.xpath('//table/tbody/tr[#class="toggle-details"]').collect do |row|
type1=[]
row_id = row.attr("id").to_i
row.at("td[2] ul").children.each do |typeli|
type = typeli.text.strip if typeli.text.strip.length != 0
type1 << type if typeli.text.strip.length != 0
end
value = row.at('td[3]').text.strip
value = value.to_s.tr('$', '').to_f
per_discount = row.at('td[4]').text.strip
per_discount = per_discount.to_s.tr('%', '').to_f
final_price = row.at('td[5] strong').text.strip
final_price = final_price.to_s.tr('$', '').to_f
type1.each do |type|
if find_all_giftcard.include?(row_id)
update_all_giftcard_record<<row_id
puts "exists"
else
puts "new"
#giftcard= Giftcard.create(card_type: giftcard_types.values_at(type.to_sym)[0], card_value:value, per_off:per_discount, card_price: final_price, merchant_id: merchant_id , row_id: row_id )
update_all_giftcard_record << #giftcard.row_id
end
end
#saving the record of the giftcard
# #giftcard=Giftcard.create(card_type:1, card_value:value, per_off:per_discount, card_price: final_price, merchant_id: #merchant.id , gift_card_type: type1)
end
# Giftcard.where(:id =>update_all_record).update_all(:is_deleted => false)
#delete all giftcard which is not present
# giftcard_deleted = Giftcard.where(:is_deleted => true,:merchant_id => #merchant.id).destroy_all if Giftcard.where(merchant_id: #merchant.id).exists?
time2 = Time.now
puts "Current Time : " + time2.inspect
end
end
end
end
threads.each(&:join)
puts "-------"
puts threads
# merchant_deleted = Merchant.where(:is_deleted => true).destroy_all if Merchant.exists?
merchant_deleted = Merchant.where('id NOT IN (?)',update_all_merchant_record).destroy_all if Merchant.exists?
giftcard_deleted = Giftcard.where('row_id NOT IN (?)',update_all_giftcard_record).destroy_all if Giftcard.exists?
end
end
Error i am receiving:
ActiveRecord::ConnectionTimeoutError: could not obtain a connection from the pool within 5.000 seconds (waited 5.001 seconds); all pooled connections were in use
Each thread requires a separate connection to your database. You need to increase the connection pool size that your application can use in your database.yml file.
But your database should also be capable of handling the incoming connections. If you are using mysql you can check this by running select ##MAX_CONNECTIONS on your console.

Ruby each block next when exception or error is raised

I have the following method in my rake task.
def call
orders = Spree::Order.complete.where('completed_at >= :last_day', last_day: Time.now - 30.days)
orders.each do |order|
order_tracking = order.shipments.first.tracking
next if order_tracking.nil?
shipment = order.shipments.first
results = fedex.track(tracking_number: order_tracking)
tracking_info = results.first
status = tracking_info.status.to_s
delivery_date = tracking_info.delivery_at
shipment.is_delivered = delivered?(status)
shipment.date_delivered = delivery_date
shipment.save
puts "-> Shipping status was updated for #{order.number}"
end
end
If there is an order with no tracking number I skipping it with next on line 5.
My question: How would I do next if a tracking number is invalid and the following error is raised:
Fedex::RateError: Invalid tracking number.
Ideally I would like to change line 5 to:
next if order_tracking.nil? || order_tracking.raised(Fedex::RateError) # something like that
Thank you in advance.
Also RateError is raised here:
def process_request
api_response = self.class.post(api_url, :body => build_xml)
puts api_response if #debug == true
response = parse_response(api_response)
if success?(response)
options = response[:track_reply][:track_details]
if response[:track_reply][:duplicate_waybill].downcase == 'true'
shipments = []
[options].flatten.map do |details|
options = {:tracking_number => #package_id, :uuid => details[:tracking_number_unique_identifier]}
shipments << Request::TrackingInformation.new(#credentials, options).process_request
end
shipments.flatten
else
[options].flatten.map do |details|
Fedex::TrackingInformation.new(details)
end
end
else
error_message = if response[:track_reply]
response[:track_reply][:notifications][:message]
else
"#{api_response["Fault"]["detail"]["fault"]["reason"]}\n--#{api_response["Fault"]["detail"]["fault"]["details"]["ValidationFailureDetail"]["message"].join("\n--")}"
end rescue $1
raise RateError, error_message
end
end
added:
private
def fedex_track(tracking)
fedex.track(tracking_number: tracking)
end
And changed results on line 7 to:
results = fedex_track(order_tracking) rescue next

A ruby script and a rails model: Same code - different behaviour

I have coded the following ruby script:
require 'open-uri'
require 'Nokogiri'
require 'anemone'
class JobFox
attr_accessor :company_url,
:jobs_page,
:max_words,
:jobs_part,
:jobs_container,
:element_score,
:max_score,
:jobs
def calc_element_score(element)
self.element_score += (element['class'].to_s.scan(/job|career|position|opening/).count + element['id'].to_s.scan(/job|career|position|opening/).count) * 100
self.element_score += element.to_s.scan(/job|career|position|opening/).count * 5
element.css('a').each do |a|
self.element_score += a.to_s.scan(/job|career|position|opening/).count * 7
end
element.css('li').each do |li|
self.element_score += li.to_s.scan(/job|career|position|opening/).count * 5
end
element.css('h').each do |h|
self.element_score += h.to_s.scan(/job|career|position|opening/).count * 3
end
if self.element_score > self.max_score
self.max_score = self.element_score
self.jobs_part = element
end
if element.children.count == 0
self.element_score = 0
end
end
end
fox = JobFox.new
fox.company_url = 'http://www.website.com'
fox.max_words = 0
fox.jobs = []
# CRAWL THE WEBSITE TO FIND THE JOBS LINK
Anemone.crawl(fox.company_url, :depth_limit => 3) do |anemone|
anemone.on_pages_like(/job|jobs|career|careers|team|about/) do |page|
begin
puts "SCANNING: " + page.url.to_s
# SCAN THE HTML AND FIND THE OCCURENCES OF THE WORD "JOB"
source_html = open(page.url).read
job_occurences = source_html.scan(/job|jobs|work|position/).count
# IF MORE OCCURENCES THAN BEFORE, WE KEEP THE PAGE URL
if job_occurences > fox.max_words
fox.max_words = job_occurences
fox.jobs_page = page.url
end
rescue Exception => e
puts e
end
end
end
fox.jobs_container = Nokogiri::HTML(open(fox.jobs_page))
fox.element_score = fox.max_score = 0
fox.jobs_container.css('div, section').each do |container|
container.traverse do |element|
fox.calc_element_score(element)
end
end
fox.jobs_part.traverse do |element|
element.css('a').each do |job|
fox.jobs << job.text
end
end
# REMOVE POSSIBLE DUPLICATE ENTRIES
fox.jobs = fox.jobs.uniq
puts fox.jobs
and I am trying to port it to a rails application - not as a script/task but as a model function:
require 'anemone'
require 'open-uri'
require 'Nokogiri'
class Company < ActiveRecord::Base
has_many :jobs
accepts_nested_attributes_for :jobs
# CALCULATE THE RELATEDNESS OF EACH HTML ELEMENT
def calculate_element_score(element)
#jobs_expression = '/job|career|position|opening/'
#element_score += (element['class'].to_s.scan(#jobs_expression).count + element['id'].to_s.scan(#jobs_expression).count) * 100
#element_score += element.to_s.scan(#jobs_expression).count * 5
element.css('a').each do |a|
#element_score += a.to_s.scan(#jobs_expression).count * 7
end
element.css('li').each do |li|
#element_score += li.to_s.scan(#jobs_expression).count * 5
end
element.css('h').each do |h|
#element_score += h.to_s.scan(#jobs_expression).count * 3
end
if #element_score > #max_score
#max_score = #element_score
#jobs_part = element
end
if element.children.count == 0
#element_score = 0
end
end
# CRAWL THE WEBSITE TO FIND THE JOBS PAGE
def find_jobs_page
max_words = 0
Anemone.crawl(self.website, :depth_limit => 3) do |anemone|
anemone.on_pages_like(/job|jobs|career|careers|team|about/) do |page|
begin
# SCAN THE HTML AND FIND OCCURENCES OF RELEVANT WORDS
source_html = open(page.url).read
job_occurences = source_html.scan(/job|jobs|work|position/).count
# IF MORE OCCURENCES THAN BEFORE, KEEP THE PAGE URL
if job_occurences > max_words
max_words = job_occurences
self.jobs_page = page.url
end
rescue Exception => e
puts e
end
end
end
end
# FIND THE CONTAINER THAT HAS THE JOB LISTINGS
def find_jobs_container
jobs_container = Nokogiri::HTML(open(self.jobs_page))
#element_score = #max_score = 0
#jobs_expression = '/job|career|position|opening/'
jobs_container.css('div, section').each do |container|
container.traverse do |element|
self.calculate_element_score(element)
end
end
end
# ADD THE JOBS FROM THE PAGE TO THE COMPANY ASSOCIATION
def extract_jobs
#jobs_part.traverse do |element|
element.css('a').each do |job|
j = JOBS.new()
j.title = job.text
j.url = job
self.jobs << j
end
end
end
# THE METHOD TO FIND ALL THE JOBS FOR A COMPANY
def find_jobs
self.find_jobs_page
self.find_jobs_container
self.extract_jobs
end
end
Everything works just fine apart from the calculate_element_score method - #elements_score is always 0. Have I understood something entirely wrong regarding global variables?

Gem Resque Error - Undefined "method perform" after Overriding it form the super class

First of all Thanks for you all for helping programmers like me with your valuable inputs in solving day to day issues.
This is my first question in stack overflow as I am experiencing this problems from almost one week.
WE are building a crawler which crawls the specific websites and extract the contents from it, we are using mechanize to acheive this , as it was taking loads of time we decided to run the crawling process as a background task using resque with redis gem , but while sending the process to background I am experiencing the error as the title saying,
my code in lib/parsers/home.rb
require 'resque'
require File.dirname(__FILE__)+"/../index"
class Home < Index
Resque.enqueue(Index , :page )
def self.perform(page)
super (page)
search_form = page.form_with :name=>"frmAgent"
resuts_page = search_form.submit
total_entries = resuts_page.parser.xpath('//*[#id="PagingTable"]/tr[2]/td[2]').text
if total_entries =~ /(\d+)\s*$/
total_entries = $1
else
total_entries = "unknown"
end
start_res_idx = 1
while true
puts "Found #{total_entries} entries"
detail_links = resuts_page.parser.xpath('//*[#id="MainTable"]/tr/td/a')
detail_links.each do |d_link|
if d_link.attribute("class")
next
else
data_page = #agent.get d_link.attribute("href")
fields = get_fields_from_page data_page
save_result_page page.uri.to_s, fields
#break
end
end
site_done
rescue Exception => e
puts "error: #{e}"
end
end
and the superclass in lib/index.rb is
require 'resque'
require 'mechanize'
require 'mechanize/form'
class Index
#queue = :Index_queue
def initialize(site)
#site = site
#agent = Mechanize.new
#agent.user_agent = Mechanize::AGENT_ALIASES['Windows Mozilla']
#agent.follow_meta_refresh = true
#rows_parsed = 0
#rows_total = 0
rescue Exception => e
log "Unable to login: #{e.message}"
end
def run
log "Parsing..."
url = "unknown"
if #site.url
url = #site.url
log "Opening #{url} as a data page"
#page = #agent.get(url)
#perform method should be override in subclasses
#data = self.perform(#page)
else
#some sites do not have "datapage" URL
#for example after login you're already on your very own datapage
#this is to be addressed in 'perform' method of subclass
#data = self.perform(nil)
end
rescue Exception=>e
puts "Failed to parse URL '#{url}', exception=>"+e.message
set_site_status("error "+e.message)
end
#overriding method
def self.perform(page)
end
def save_result_page(url, result_params)
result = Result.find_by_sql(["select * from results where site_id = ? AND ref_code = ?", #site.id, utf8(result_params[:ref_code])]).first
if result.nil?
result_params[:site_id] = #site.id
result_params[:time_crawled] = DateTime.now().strftime "%Y-%m-%d %H:%M:%S"
result_params[:link] = url
result = Result.create result_params
else
result.result_fields.each do |f|
f.delete
end
result.link = url
result.time_crawled = DateTime.now().strftime "%Y-%m-%d %H:%M:%S"
result.html = result_params[:html]
fields = []
result_params[:result_fields_attributes].each do |f|
fields.push ResultField.new(f)
end
result.result_fields = fields
result.save
end
#rows_parsed +=1
msg = "Saved #{#rows_parsed}"
msg +=" of #{#rows_total}" if #rows_total.to_i > 0
log msg
return result
end
end
What's Wrong with this code?
Thanks

Resources