so im trying to do web scraping with rails and kimurai, the problem i ran in to was that for some reason i get a single big object instead of one for each of the products im scraping, here is my code:
class ProductsSpider < Kimurai::Base
#name = "products_spider"
#engine = :mechanize
def self.process(url)
#start_urls = [url]
self.crawl!
end
def parse(response, url:, data: {})
response.xpath("//div[#class='andes-card andes-card--flat andes-card--default ui-search-result ui-search-result--core andes-card--padding-default andes-card--animated']").each do |product|
item = {}
item[:product_name] = product.xpath("//h2[#class='ui-search-item__title ui-search-item__group__element']")&.text&.squish
item[:price] = product.xpath("//span[#class='price-tag-fraction']")&.text&.squish&.delete('^0-9')to_i
item[:shipping] = product.xpath("//p[#class='ui-search-item__shipping ui-search-item__shipping--free']")&.text&.squish
Product.where(item).first_or_create
end
end
end
and here is the function on the controller:
def scrape
url = "https://computacion.mercadolibre.com.ar/componentes-pc-placas-video/msi/cordoba/placa-de-video_NoIndex_True#applied_filter_id%3Dstate%26applied_filter_name%3DUbicaci%C3%B3n%26applied_filter_order%3D13%26applied_value_id%3DTUxBUENPUmFkZGIw%26applied_value_name%3DC%C3%B3rdoba%26applied_value_order%3D11%26applied_value_results%3D120%26is_custom%3Dfalse%26view_more_flag%3Dtrue"
response = ProductsSpider.process(url)
if response[:status] == :completed && response[:error].nil?
flash.now[:notice] = "Successfully scraped url"
else
flash.now[:alert] = response[:error]
end
rescue StandardError => e
flash.now[:alert] = "Error: #{e}"
end
Im building a basic rails web scraper and im running into this error. Here is my scraper method and my scraper model too.
https://gyazo.com/d866cd8def5ac107ea8c13515faac989
class RestaurantsScraper < Kimurai::Base
#name = 'restaurants_scraper'
#engine = :mechanize
def self.process(url)
url = 'tripadvisor.com/Restaurants-g31892-Rogers_Arkansas.html'
#start_url = [url]
self.crawl!
end
def parse(response, url:, data: {})
response.xpath("//div[#class=_1llCuDZj]").each do |t|
item = {}
item[:title] = t.css('a._15_ydu6b')&.text&.squish&.gsub('[^0-9].', '')
item[:type] = t.css('span._1p0FLy4t')&.text&.squish
item[:reviews] = t.css('span.w726Ki5B').text&.squish
item[:top_reviews] = t.css('a._2uEVo25r _3mPt7dFq').text&.squish
Restaurant.where(item).first_or_create
end
end
end
def scrape
url = 'https://www.tripadvisor.com/Restaurants-g31892-Rogers_Arkansas.html'
response = RestaurantsScraper.process(url)
if response[:status] == :completed && response[:error].nil?
flash.now[:notice] = "Successfully scraped url"
else
flash.now[:alert] = response[:error]
end
rescue StandardError => e
flash.now[:alert] = "Error: #{e}"
end
I have a rails app and I'm trying to call the methods below from the controller, but I get this error:
undefined local variable or method "service" for #<EventsController:0x007fb6d27da1c8>` for this line: `api_method: service.freebusy.query
What's the problem here? Why can't the get_busy_events see the service var if it's defined above it?
controller
include GoogleCalendarApi
.....
#user = current_user
#google = #user.socials.where(provider: "google_oauth2").first
unless #google.blank?
#client = init_google_api_calendar_client(#google)
#result = open_gcal_connection(get_busy_events, #client, #google)
lib/google_api_calendar.rb
def init_google_api_calendar_client(google_account)
#method only called if google_oauth2 social exists
client = Google::APIClient.new
client.authorization.access_token = google_account.token
client.authorization.client_id = ENV['GOOGLE_API_KEY']
client.authorization.client_secret = ENV['GOOGLE_API_SECRET']
client.authorization.refresh_token = google_account.refresh_token
return client
end
def open_gcal_connection(options, initialized_client, social_object)
client = initialized_client
old_token = client.authorization.access_token
service = client.discovered_api('calendar', 'v3')
result = client.execute(options) #after execution you may get new token
# update token if the token that was sent back is expired
new_token = client.authorization.access_token
if old_token != new_token
social_object.update_attribute(token: new_token)
end
return result
end
def get_busy_events
result = open_gcal_connection(
api_method: service.freebusy.query,
body: JSON.dump({ timeMin: '2015-12-24T17:06:02.000Z',
timeMax: '2013-12-31T17:06:02.000Z',
items: social_object.email }),
headers: {'Content-Type' => 'application/json'})
#handling results
end
To answer your question(as I did in the comments):
To fix your method, you have to define the service variable in the action where you are calling it.
As for your posted link: if you look at the get_busy_events method there is a line where service = client.discovered_api('calendar', 'v3')
and it is fine, because it is in the method. The same goes for client that the service declaration depends on- you have to declare them inside the method where you use them.
You should follow the article and make the code as it is there so you would have:
def init_client
client = Google::APIClient.new
# Fill client with all needed data
client.authorization.access_token = #token #token is taken from auth table
client.authorization.client_id = #oauth2_key
client.authorization.client_secret = #oauth2_secret
client.authorization.refresh_token = #refresh_token
return client
end
which you can use to define client variable in all your other actions and then use the service method:
def get_busy_times
client = init_client
service = client.discovered_api('calendar', 'v3')
#result = client.execute(
:api_method => service.freebusy.query,
:body_object => { :timeMin => start_time, #example: DateTime.now - 1.month
:timeMax => end_time, #example: DateTime.now + 1.month
:items => items
},
:headers => {'Content-Type' => 'application/json'}})
end
EDIT No2:
Since you have a controller, where client is initialized I suggest passing it down as an argument:
include GoogleCalendarApi
.....
#user = current_user
#google = #user.socials.where(provider: "google_oauth2").first
unless #google.blank?
#client = init_google_api_calendar_client(#google)
#result = open_gcal_connection(get_busy_events(#client), #client, #google)
and changing your get_busy_events method:
def get_busy_events(client)
service = client.discovered_api('calendar', 'v3')
result = open_gcal_connection(
api_method: service.freebusy.query,
body: JSON.dump({ timeMin: '2015-12-24T17:06:02.000Z',
timeMax: '2013-12-31T17:06:02.000Z',
items: social_object.email }),
headers: {'Content-Type' => 'application/json'})
#handling results
end
Although this is a bit weird for me(nesting arguments like this) so you should look at refactoring this.
I am trying to use PayPal ExpressCheckout button with multiple items but with no success.I am using NetBeans IDE, rails 4 and MySQL db.Here is what I did so far:
In my production.rb file I have:
Rails.application.configure do
config/application.rb.
config.after_initialize do
ActiveMerchant::Billing::Base.mode = :test
paypal_options = {
:login => "xxxx",
:password => "xxxx ",
:signature => "xxxx "
}
::STANDARD_GATEWAY = ActiveMerchant::Billing::PaypalGateway.new(paypal_options)
::EXPRESS_GATEWAY = ActiveMerchant::Billing::PaypalExpressGateway.new(paypal_options)
end
In my transaction.rb model I have:
def valid_purchase
if express_token.blank?
standard_purchase
else
express_token
end
def express_purchase
# price_in_cents = total
response = EXPRESS_GATEWAY.purchase(total, express_purchase_options)
if response.success?
self.status = "processed"
else
errors.add(:transactions, "---- #{response.message}.")
end
end
def express_token=(token)
self[:express_token] = token
if new_record? && !token.blank?
details = EXPRESS_GATEWAY.details_for(token)
self.express_payer_id = details.payer_id
self.ship_to_first_name = details.params["first_name"]
self.ship_to_last_name = details.params["last_name"]
end
end
private
def express_purchase_options
{
:ip => customer_ip,
:token => express_token,
:payer_id => express_payer_id
}
end
And in my transaction_controller.rb I have:
def express_checkout
order_items =[]
postage_rate=nil
item = Hash.new
#order = Order.find(session[:order_id])
#receipts = Receipt.where(:order_id=>#order.id)
#receipts.each do |r|
postage_rate = r.postage_rate * 100
end
#cart = Cart.find(#order.cart_id)
#cart.cart_items.each do |i|
#product = Product.find(i.product_id)
item = {
name: #product.product_name,
quantity: i.amount,
description: "ORDER_ID: #{#order.id}",
amount: #product.selling_price * 100 ,
shipping: postage_rate/#cart.cart_items.size
}
order_items << item
end
price_in_cents = (#order.total_p_pr * 100).round(2)
options = {
:ip => request.remote_ip,
:return_url => url_for(:action=>:new, :only_path => false),
:cancel_return_url => catalogs_traders_url,
:currency => "USD",
:allow_guest_checkout=> true,
:items => order_items # this line outputs: [{:name=>"owl potty", :quantity=>1, :description=>"ORDER_ID: 249", :amount=>2808.0, :shipping=>332.0}, {:name=>"a bag", :quantity=>1, :description=>"ORDER_ID: 249", :amount=>1260.0, :shipping=>332.0}, {:name=>"bracelet", :quantity=>1, :description=>"ORDER_ID: 249", :amount=>120.0, :shipping=>332.0}, {:name=>"beautiful woman", :quantity=>1, :description=>"ORDER_ID: 249", :amount=>74352.0, :shipping=>332.0}]
}
#passing the cost of the order
response = EXPRESS_GATEWAY.setup_purchase(price_in_cents,options )
redirect_to EXPRESS_GATEWAY.redirect_url_for(response.token)
end
def new
#transaction = Transaction.new(:express_token => params[:token])
end
I get:
Any help will be more than welcome. Thank you!
I red this post very,very carefully
setting tax amount in Active Merchant / PayPal Express Checkout
and I understood my mistakes. Here is my corrected transaction_controller:
# to redirect to PayPay site
def express_checkout
pr = nil
tp = nil
items =[]
postage_r=[]
total_p = []
order_items =[]
postage_rate=nil
item = Hash.new
#order = Order.find(session[:order_id])
#receipts = Receipt.where(:order_id=>#order.id)
#receipts.each do |r|
total_p << r.total_price
postage_r << r.postage_rate
end
tp = total_p.inject{|sum,x| sum + x }
pr = postage_r.inject{|sum,x| sum + x }
#cart = Cart.find(#order.cart_id)
#cart.cart_items.each do |i|
#product = Product.find(i.product_id)
item = {
name: #product.product_name,
quantity: i.amount,
description: "ORDER_ID: #{#order.id}",
amount: #product.selling_price * 100 ,
}
order_items << item
end
price_in_cents = (#order.total_p_pr * 100).round(2)
options = {
:subtotal => tp * 100,
:shipping => pr * 100,
:handling => 0,
:tax => 0,
:ip => request.remote_ip,
:return_url => url_for(:action=>:new, :only_path => false),
:cancel_return_url => catalogs_traders_url,
:currency => "USD",
:allow_guest_checkout=> true,
:items => order_items
}
#passing the cost of the order
response = EXPRESS_GATEWAY.setup_purchase(price_in_cents,options )
redirect_to EXPRESS_GATEWAY.redirect_url_for(response.token)
end
It worked. I hope my post will be useful for someone who want to integrate Express Checkout button. Thank you for all your help!
have a transaction model similar to RailsCasts ActiveMerchant tutorial.
How can I create a fake response?
Tried something like the following but no luck.
response = #success=true, #params = {"ref" => "123"}, #authorization = "54321", ...
models/order_transaction.rb
class OrderTransaction < ActiveRecord::Base
belongs_to :order
serialize :params
def response=(response)
self.success = response.success?
self.authorization = response.authorization
self.message = response.message
self.params = response.params
rescue ActiveMerchant::ActiveMerchantError => e
self.success = false
self.authorization = nil
self.message = e.message
self.params = {}
end
end
you can do something like
a = OpenStruct.new
def a.success?
true
end