Delete a part of a code with ruby with a variable - ruby-on-rails

I have this code
def index
require 'net/http'
require 'json'
#url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?start=1&limit=100&CMC_PRO_API_KEY=mykey'
#uri = URI(#url)
#response = Net::HTTP.get(#uri)
#coins = JSON.parse(#response)
#my_coins = ["BTC", "XRP", "ADA", "ETH", "USDT"]
end
The url brings
{"status"=>{"timestamp"=>"2021-02-16T03:55:40.727Z", "error_code"=>0, "error_message"=>nil, "elapsed"=>21, "credit_count"=>1, "notice"=>nil, "total_count"=>4078}, "data"=>[{"id"=>1, "name"=>"
Using that variable (#coins) how could I give the instruction to delete everythin until ' "data"=>'?

def index
require 'net/http'
require 'json'
#url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?start=1&limit=100&CMC_PRO_API_KEY=mykey'
#uri = URI(#url)
#response = Net::HTTP.get(#uri)
#coins = get_coins(#response)
#my_coins = ["BTC", "XRP", "ADA", "ETH", "USDT"]
end
def get_coins(response)
coins = JSON.parse(response)
coins.slice('data')
end
it will give you only 'data' part. because 'data' is a key of hash same as 'status'
the #coins variable will remain the same, but the output is a new variable which is result from slice operation
you can also delete using delete operation then #coins will change to remaining key
#coins.delete('status')
puts #coins #{"data"=>[{"id"=>1, "name"=>"somename"}]
def index
require 'net/http'
require 'json'
#url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?start=1&limit=100&CMC_PRO_API_KEY=mykey'
#uri = URI(#url)
#response = Net::HTTP.get(#uri)
#coins = JSON.parse(response)
#coins.delete('status')
#my_coins = ["BTC", "XRP", "ADA", "ETH", "USDT"]
end

Related

nil is not a symbol nor a string

Im trying to use Kimurai to scrape a website. Im running into this error when I want to do /scrape.
def scrape
url = "https://www.tripadvisor.com/Restaurants-g31892-Rogers_Arkansas.html"
response = RestaurantsScraper.parse!(response, url, data: {})
if response[status] == :completed && response[error].nil?
flash.now[notice] = "Successfully scraped url"
else
flash.now[alert] = response[error]
end
end
Here is my scraper class
class RestaurantsScraper < Kimurai::Base
#name = "restaurants_scraper"
#driver = :selenium_chrome
#start_urls = ["https://www.tripadvisor.com/Restaurants-g31892-Rogers_Arkansas.html"]
def parse(response, url:, data: {})
response.xpath("//div[#class=_1llCuDZj]").each do |a|
request_to :parse_repo_page, url: absolute_url(a[:href], base: url)
end
end
def parse_repo_page(response, url:, data: {})
item = {}
item["title"] = t.css('a._15_ydu6b')&.text&.squish&.gsub('[^0-9].', '')
item["type"] = t.css('span._1p0FLy4t')&.text&.squish
item["reviews"] = t.css('span.w726Ki5B').text&.squish
item["top_reviews"] = t.css('a._2uEVo25r _3mPt7dFq').text&.squish
Restaurant.where(item).first_or_create
end
end
Here is the error im getting
It's because response from RestaurantsScraper.parse!(response, url, data: {}) isn't defined.
From the kimurai docs it says you need to pass a Nokogiri::HTML::Document object.
I haven't used Kimurai and it feels like there is definitely a better way to do this, but something like the following may be enough to get you to the next step:
def scrape
require 'open-uri'
url = "https://www.tripadvisor.com/Restaurants-g31892-Rogers_Arkansas.html"
html = Nokogiri.parse open(url)
response = RestaurantsScraper.parse!(html, url, data: {})
if response[status] == :completed && response[error].nil?
flash.now[notice] = "Successfully scraped url"
else
flash.now[alert] = response[error]
end
end

Get Input from form for API url Request in rails?

I'm new to Rails and I'm trying to make a simple weather API to get weather by zipcode
is there a way to get the zipcode from user input from a simple form, this will be just for learning so I'm not trying to make users devise, or users model
require 'net/http'
require 'json'
#url = 'http://api.openweathermap.org/data/2.5/weather?zip=#{zipcode}&appid=APIKEY'
#uri = URI(#url)
#response = Net::HTTP.get(#uri)
#output = JSON.parse(#response)
actually I figured it out, i needed to add
def zipcode
#zip_query = params[:zipcode]
if params[:zipcode] == ""
#zip_query = "Hey you forgot to enter a zipcode!"
elsif params[:zipcode]
# Do Api stuff
require 'net/http'
require 'json'
#url = 'http://api.openweathermap.org/data/2.5/weather?zip='+ #zip_query +'&appid=APIKEY'
#uri = URI(#url)
#response = Net::HTTP.get(#uri)
#output = JSON.parse(#response)
#name = #output['name']
# Check for empty return result
if #output.empty?
#final_output = "Error"
elsif !#output
#final_output = "Error"
else
#final_output = ((#output['main']['temp'] - 273.15) * 9/5 +32).round(2)
end
end
end
in the controller.rb file
and add
post "zipcode" => 'home#zipcode'
get "home/zipcode"
in the routes file
but I'm sure this is not the best practice

How to scrape multiple pages using nokogiri and how to scrape fast with rails

I am trying to scrape certain elements from 99 pages of a web page. I cannot for the life of me figure out how to do it.
Here is my code:
require 'open-uri'
require 'nokogiri'
#title = []
html_content = open("https://www.imdb.com/list/ls057823854/?
sort=list_order,asc&st_dt=&mode=detail&page=1").read
doc = Nokogiri::HTML(html_content)
doc.search(".lister-item-header/a").each do |title|
#title << title.text.strip
If u want to collect all titles here is the scraper code.
require 'open-uri'
require 'nokogiri'
require 'json'
#title = []
url = "https://www.imdb.com/list/ls057823854/?sort=list_order,asc&st_dt=&mode=detail&page="
html_content = open(url+"1").read
doc = Nokogiri::HTML(html_content)
max = doc.search(".pagination-range").first.text.split("of")[1].gsub(",","").strip.to_i
max = (max / 100).floor + 1
doc.search(".lister-item-header/a").each do |title|
#title << title.text.strip
end
for i in 2..max
html_content = open(url+i.to_s).read
doc = Nokogiri::HTML(html_content)
doc.search(".lister-item-header/a").each do |title|
#title << title.text.strip
end
sleep(1)
end
File.open("imdb-titles.json","w") do |f|
f.write(JSON.pretty_generate(#title))
end

Why are my scripts from lib/assets directory in a Rails application not executing, or erring out?

I'm trying to use ConnectWise API's in my Rails 3 application. I've built and tested an API using plain ruby and it's successful. Now I'm trying to add this script into lib/assets in my Rails app to run but It seems to be either not executing the script, or executing and failing without giving me any error output.
Here are the successful Ruby scripts:
CompanyApis Class:
require 'builder'
class CompanyApis
def self.get_company
cw_company_id = 21920
integrator_company_id = 'COMPANY_ID'
integrator_login_id = 'INTERGRATOR_LOGIN'
integrator_password = 'INTEGRATOR PW'
xml = Builder::XmlMarkup.new(:indent=>2)
xml.instruct!
xml.tag!('soap:Envelope',
:'xmlns:soap' => 'http://schemas.xmlsoap.org/soap/envelope/',
:xmlns => 'http://connectwise.com'){
xml.tag!('soap:Body'){
xml.tag!('GetCompany'){
xml.tag!('credentials'){
xml.CompanyId(integrator_company_id)
xml.IntegratorLoginId(integrator_login_id)
xml.IntegratorPassword(integrator_password)
}
xml.id(cw_company_id)
}
}
}
end
end
CwIntegrator Class:
require 'net/https'
require 'uri'
require 'nokogiri'
require './company_api'
class CwIntegrator
cw_company_id = 21920
cw_hostname = 'cw.host.com'
companyapi_url = "https://#{cw_hostname}/v4_6_release/apis/2.0/CompanyApi.asmx"
uri = URI.parse(companyapi_url)
# Use for proxying to Kali
#proxy_addr = '172.16.1.149'
#proxy_port = 8080
request = Net::HTTP::Post.new(uri.path)
request.add_field('Content-Type', 'text/xml; charset=utf-8')
request.add_field('SOAPAction', 'http://connectwise.com/GetCompany')
request.body = CompanyApis.get_company
http = Net::HTTP.new(uri.host, uri.port)
# Use for proxying to Kali
#http = Net::HTTP.new(uri.host, uri.port, proxy_addr, proxy_port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
response = http.start {|h| h.request(request)}
company_info = []
xml_doc = Nokogiri::XML(response.body).remove_namespaces!
company_name = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/CompanyName').text
company_street_addr = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/StreetLines/string')[0].text
begin
company_street_addr2 = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/StreetLines/string')[1].text
end
company_city = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/City').text
company_state = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/State').text
company_zip = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/Zip').text
company_country = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/Country').text
company_status = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/Status').text
company_phone = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/PhoneNumber').text
company_fax = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/FaxNumber').text
company_www = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/WebSite').text
company_info += [company_name: company_name, cw_company_id: cw_company_id, company_name: company_name,
company_street_addr: company_street_addr, company_street_addr2: company_street_addr2,
company_city: company_city, company_state: company_state, company_zip: company_zip,
company_country:company_country,company_status: company_status, company_phone: company_phone,
company_fax: company_fax, company_www: company_www]
puts(company_info)
end
Running this script has this output:
ruby cw_integrator.rb
{:company_name=>"Orlando Fake Co, LLC.", :cw_company_id=>21920, :company_street_addr=>"STREET ADDR.", :company_street_addr2=>"Suite 400", :company_city=>"Orlando", :company_state=>"FL", :company_zip=>"32839", :company_country=>"United States", :company_status=>"Active Gold TTS", :company_phone=>"5558765309", :company_fax=>"", :company_www=>"www.myweb.com"}
So to add this to the Rails app I've added two .rb files to lib/assets, cw_apis.rb and cw_get_company_integrator.rb. Here are their contents:
CwApis class:
#!/usr/bin/env ruby
require 'builder'
class CwApis
def self.get_company_xml_request
cw_integrator_account = CwIntegratorAccount.first
integrator_company_id = cw_integrator_account.integrator_company_id
integrator_login_id = cw_integrator_account.integrator_login_id
integrator_password = cw_integrator_account.integrator_password
xml = Builder::XmlMarkup.new(:indent=>2)
xml.instruct!
xml.tag!('soap:Envelope',
:'xmlns:soap' => 'http://schemas.xmlsoap.org/soap/envelope/',
:xmlns => 'http://connectwise.com'){
xml.tag!('soap:Body'){
xml.tag!('GetCompany'){
xml.tag!('credentials'){
xml.CompanyId(integrator_company_id)
xml.IntegratorLoginId(integrator_login_id)
xml.IntegratorPassword(integrator_password)
}
xml.id(cw_integrator_account.cw_company_id)
}
}
}
end
end
And CwGetCompanyIntegrator Class:
#!/usr/bin/env ruby
require 'net/https'
require 'uri'
require 'nokogiri'
require 'assets/cw_apis'
class CwGetCompanyIntegrator
cw_integrator_account = CwIntegratorAccount.first
cw_hostname = cw_integrator_account.cw_hostname
company_api_url = "https://#{cw_hostname}/v4_6_release/apis/2.0/CompanyApi.asmx"
uri = URI.parse(company_api_url)
request = Net::HTTP::Post.new(uri.path)
request.add_field('Content-Type', 'text/xml; charset=utf-8')
request.add_field('SOAPAction', 'http://connectwise.com/GetCompany')
request.body = CwApis.get_company_xml_request
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL
response = http.start {|h| h.request(request)}
xml_doc = Nokogiri::XML(response.body).remove_namespaces!
company_name = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/CompanyName').text
company_street_addr = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/StreetLines/string')[0].text
begin
company_street_addr2 = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/StreetLines/string')[1].text
end
company_city = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/City').text
company_state = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/State').text
company_zip = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/Zip').text
company_country = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/DefaultAddress/Country').text
company_status = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/Status').text
company_phone = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/PhoneNumber').text
company_fax = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/FaxNumber').text
company_www = xml_doc.xpath('//Envelope/Body/GetCompanyResponse/GetCompanyResult/WebSite').text
CompanyInfosController.create!(cw_company_id: cw_integrator_account.cw_company_id, company_name: company_name,
company_street_addr: company_street_addr, company_street_addr2: company_street_addr2,
company_city: company_city, company_state: company_state, company_zip: company_zip,
company_country:company_country, company_status: company_status, company_phone: company_phone,
company_fax: company_fax, company_www: company_www)
end
I'm trying to execute the CwGetCompanyIntegrator class in my CwIntegratorAccountsController.
Here is the code inside the create action from the CwIntegratorAccountsController I've omitted index, show, new, edit, update, and destroy:
require 'assets/cw_get_company_integrator'
class CwIntegratorAccountsController < ApplicationController
skip_before_filter :require_company, :only => [:create,:new]
# GET /cw_integrator_accounts
# GET /cw_integrator_accounts.json
def create
unless CwIntegratorAccount.count >= 1
#cw_integrator_account = CwIntegratorAccount.new(params[:cw_integrator_account])
respond_to do |format|
if #cw_integrator_account.save
# Run the CW Integrator
CwGetCompanyIntegrator
format.html { redirect_to root_url, notice: 'cw_integrator success' }
#format.json { render json: #cw_integrator_account, status: :created, location: #cw_integrator_account }
else
format.html { render action: 'new' }
format.json { render json: #cw_integrator_account.errors, status: :unprocessable_entity }
end
end
end
end
end
I know that I'm getting past the if #cw_integrator_account.save, and CwGetCompanyIntegrator because I'm getting the redirect (and see it in the logs) from format.html { redirect_to root_url, notice: 'cw_integrator success' }, but the CwGetCompanyIntegrator is not erring or executing (properly any way).
What is the proper way to make this class execute?
lib is not autoloaded by default. You need to add it to the autoload path - see Auto-loading lib files in Rails 4
In addition, lib/assets is used by the Asset Pipeline. You should create a different folder for your classes.
Your code in CwGetCompanyIntegrator needs to be wrapped in a method, like the following block which you'd call using CwGetCompanyIntegrator.call
class CwGetCompanyIntegrator
def self.call
# execute code here
end
end

axslx - how can I check if an array element exists and if so alter its output?

I have a Xpath query which accepts array elements for output using Axslx, I need to tidy up my ouput for certain conditions one of which is the 'Software included'
My xpath scrapes the following URL http://h10010.www1.hp.com/wwpc/ie/en/ho/WF06b/321957-321957-3329742-89318-89318-5186820-5231694.html?dnr=1
A sample of my code is below:
clues = Array.new
clues << 'Optical drive'
clues << 'Pointing device'
clues << 'Software included'
selector = "//td[text()='%s']/following-sibling::td"
data = clues.map do |clue|
xpath = selector % clue
[clue, doc.at(xpath).text.strip]
end
Axlsx::Package.new do |p|
p.workbook.add_worksheet do |sheet|
data.each { |datum| sheet.add_row datum }
end
p.serialize 'output.xlsx'
end
My Current output formatting
My Desired output formatting
If you can rely on the data always using ';' for separators, have a go at this:
data = []
clues.each do |clue|
xpath = selector % clue
details = doc.at(xpath).text.strip.split(';')
data << [clue, details.pop]
details.each { |detail| data << ['', detail] }
end
to generate the data before the Axlsx::Package.new block
In answer to you comment/question: You do it with something like this ;)
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'axlsx'
class Scraper
def initialize(url, selector)
#url = url
#selector = selector
end
def hooks
#hooks ||= {}
end
def add_hook(clue, p_roc)
hooks[clue] = p_roc
end
def export(file_name)
Scraper.clues.each do |clue|
if detail = parse_clue(clue)
output << [clue, detail.pop]
detail.each { |datum| output << ['', datum] }
end
end
serialize(file_name)
end
private
def self.clues
#clues ||= ['Operating system', 'Processors', 'Chipset', 'Memory type', 'Hard drive', 'Graphics',
'Ports', 'Webcam', 'Pointing device', 'Keyboard', 'Network interface', 'Chipset', 'Wireless',
'Power supply type', 'Energy efficiency', 'Weight', 'Minimum dimensions (W x D x H)',
'Warranty', 'Software included', 'Product color']
end
def doc
#doc ||= begin
Nokogiri::HTML(open(#url))
rescue
raise ArgumentError, 'Invalid URL - Nothing to parse'
end
end
def output
#output ||= []
end
def selector_for_clue(clue)
#selector % clue
end
def parse_clue(clue)
if element = doc.at(selector_for_clue(clue))
call_hook(clue, element) || element.inner_html.split('<br>').each(&:strip)
end
end
def call_hook(clue, element)
if hooks[clue].is_a? Proc
value = hooks[clue].call(element)
value.is_a?(Array) ? value : [value]
end
end
def package
#package ||= Axlsx::Package.new
end
def serialize(file_name)
package.workbook.add_worksheet do |sheet|
output.each { |datum| sheet.add_row datum }
end
package.serialize(file_name)
end
end
scraper = Scraper.new("http://h10010.www1.hp.com/wwpc/ie/en/ho/WF06b/321957-321957-3329742-89318-89318-5186820-5231694.html?dnr=1", "//td[text()='%s']/following-sibling::td")
# define a custom action to take against any elements found.
os_parse = Proc.new do |element|
element.inner_html.split('<br>').each(&:strip!).each(&:upcase!)
end
scraper.add_hook('Operating system', os_parse)
scraper.export('foo.xlsx')
And the FINAL answer is... a gem.
http://rubydoc.info/gems/ninja2k/0.0.2/frames

Resources