How can I click all of the node of the category trees by playwright? - playwright

I want to use playwright to automatically click and expand all the child nodes. But my code only expands part of the nodes. How should I fix the code? Thank you.
Current:
What I want:
import json
import time
from playwright.sync_api import sync_playwright
p = sync_playwright().start()
browser = p.chromium.launch(headless=False, slow_mo=2000)
context = browser.new_context()
page = context.new_page()
try:
# page.add_init_script(js);
page.goto("https://keepa.com/#!categorytree", timeout=10000)
# Click text=Log in / register now to subscribe
page.click("text=Log in / register now to subscribe")
# Click input[name="username"]
page.click("input[name=\"username\"]")
# Fill input[name="username"]
page.fill("input[name=\"username\"]", "tylrr123#outlook.com")
# Click input[name="password"]
page.click("input[name=\"password\"]")
# Fill input[name="password"]
page.fill("input[name=\"password\"]", "adnCgL#f$krY9Q9")
# Click input:has-text("Log in")
page.click("input:has-text(\"Log in\")")
page.wait_for_timeout(2000)
page.goto("https://keepa.com/#!categorytree", timeout=10000)
while(True):
#loc.first.click()
loc = page.locator(".ag-icon.ag-icon-expanded")
print(loc.count())
loc.first.click(timeout=5000)
page.wait_for_timeout(2000)
except Exception as err:
print(err)
finally:
print("finished")`
My code only expands part of the nodes. How should I fix the code? Thank you.

Sometimes I try to do some scripts, but being honest, this was one of the most harder ones. It has been a real challenge.
I think it is finished.
# Import needed libs
import time
from playwright.sync_api import sync_playwright
import datetime
# We save the time when script starts
init = datetime.datetime.now()
print(f"{datetime.datetime.now()} - Script starts")
# We initiate the playwright page
p = sync_playwright().start()
browser = p.chromium.launch(headless=False)
context = browser.new_context()
page = context.new_page()
# Navigate to Keepa and login
page.goto("https://keepa.com/#!categorytree")
page.click("text=Log in / register now to subscribe")
page.fill("#username", "tylrr123#outlook.com")
page.fill("#password", "adnCgL#f$krY9Q9")
page.click("#submitLogin", delay=200)
# We wait for the selector of the profile user, that means that we are already logged in
page.wait_for_selector("#panelUsername")
# Navigate to the categorytree url
page.goto("https://keepa.com/#!categorytree")
time.sleep(1)
#This function try to click on the arrow for expanding an subtree
def try_click():
# We save the number of elements that are closed trees
count = page.locator(f"//span[#class='ag-group-contracted']").count()
# We iterate the number of elements we had
for i in range(0, count):
# If the last element is visible, then we go inside the "if" statement. Why the last element instead of the first one? Because I don't know why the last element is usually the frist one...Keepa things, don't ask
if page.locator(f"(//span[#class='ag-group-contracted'])[{count-i}]").is_visible():
# Element was visible, so we try to click on it (Expand it). I wrapped the click inside a try/except block because sometimes playwright says that click failed, but actually does not fail and element is clicked. I don't know why
try:
# Clicking the element
page.click(f"(//span[#class='ag-group-contracted'])[{count-i}]", timeout=200)
print(f"Clicking Correct {count-i}. Wheel up")
# If element is clicked, we do scroll up, and we return true
page.mouse.wheel(0, -500)
return True
except:
# As I said, sometimes click fails but is actually clicked, so we return also true. The only way of returning False is if the elements are not visible
print(f"Error Clicking {count-i} but probably was clicked")
return True
# This function basically checks that there are closed trees
def there_is_still_closed_trees():
try:
page.wait_for_selector(selector=f"//span[#class='ag-group-contracted']", state='attached')
return True
except:
print("No more trees closed")
return False
# When we navigated to categorytree page a pop up appears, and you have to move the mouse to make it disappear, so I move the mouse and I keep it on the list, because later we will need to do scroll up and scroll down over the list
page.mouse.move(400, 1000)
page.mouse.move(400, 400)
# Var to count how many times we made scroll down
wheel_down_times = 0
# We will do this loop until there are not more closed trees
while there_is_still_closed_trees():
# If we could not make click (The closed trees were not visibles in the page) we will do scroll down to find them out
if not try_click():
# We do scroll down, and we sum one to the scroll down counter
print("Wheel down")
page.mouse.wheel(0, 400)
wheel_down_times = wheel_down_times + 1
print(f"Wheel down times = {wheel_down_times}")
# Sometimes if we do a lot of scrolls, page can crash, so we sleep the script 10 secs every 100 scrolls
if wheel_down_times % 100 == 0:
print("Sleeping 10 secs in order to avoid page crashes")
time.sleep(10)
# This "if" checks that the latest element of the whole tree is visible and we did more than 5 scroll down. That means that we are at the end of the list and we forget some closed trees, so we do scroll up till we arrive at the top of the list and we will make scroll down trying to find the pending closed trees
if page.locator(f"//span[text()='Walkthroughs & Tutorials']").is_visible() and wheel_down_times > 5:
page.mouse.wheel(0, -5000000)
else:
print(f"Wheel down times from {wheel_down_times} to 0")
wheel_down_times = 0
# Script finishes and show a summary of time
end = datetime.datetime.now()
print(f"{datetime.datetime.now()} - Script finished")
print(f"Script started at: {init}")
print(f"Script ended at: {end}")
print("There should not be any more closed trees")
# This sleeps the script if you want to see the screen. But actually you can remove and page will be closed
time.sleep(10000)
The scripts takes almost 3 hours. I don't know how keepa has a so many categories. Awesome...

Related

getPageSource is not working for RSelenium

My goal is to retrieve information about the reviews which are located on different pages. I created a csv file with URL for every page and set each row as a destination for navigation of remotedriver. I created a loop for Selenium which was supposed to visit all this urls and retrieve needed nodes. After that
I created a loop for retrieving html page nodes and making unified dataframe out of this (via combining the result of each iteration in the loop with df with the same names of columns - was created before).
Actually i did this two month ago but now I am able to get only page source for the first page but not the next. At the same time the remotedriver is navigating the pages smoothly (it means URLs are alright!)
Example of the code:
uuu<-read.csv("/Users/uuu.csv")
data<-data.frame(title=character(),
date_travel=character(),
grades=character())
####starting selenium####
rd<-rsDriver(remoteServerAddr = "localhost", browser = "firefox", port=9353L)
for (i in 1:nrow(uuu)){
tryCatch({
url<-uuu$url[i]
cd<-rd$client
cd$navigate(url)
Sys.sleep(2)
reviews2<-read_html(cd$getPageSource()[[1]])
title<-reviews2 %>%
html_nodes(".ocfR3SKN") %>%
html_text()
date_travel<-reviews2 %>%
html_nodes("._34Xs-BQm") %>%
html_text()
try(
{
record_url<-data.frame(title,
date_travel))
}
, silent = T
)
try(
{
data <- bind_rows(data, record_url)
}
, silent = T
)
Sys.sleep(2)
})
}
The result of the parsing - seems like that he managed to get only the page source of the first urlbut not the others. However RD was navigating them.`

rails multiple user editing the same record

I have a table in the front end with multiple rows that can be read and edited. Each row has an edit icon that the user will click on, and a dialog will pop up to update the fields in the row. The save button on the dialog will save the fields (call the update API), close the dialog, and reload the table by calling the list API with the same page, filters, and sort order.
To support multiple users reading and editing the same table, I want to lock the row that clicks on the edit icon, and unlock if when the user clicks on save or cancel on the dialog that pops up. To do this, I added a lock field to each row in the database.
When the user clicks on the edit icon, I send a lock API call:
lock_success = false
message = nil
row = Row.find(id)
# (with_lock)
if row.lock.nil?
row.lock = #current_user.user_name
row.save!
lock_success = true
end
# (with_lock_end)
When the edit dialog is closed on save or cancel:
Row.update(id, lock: nil)
But there could be a case where this follows?
(1) row = Row.find(1)
(2) row = Row.find(1)
(1) if row.lock.nil?
(2) if row.lock.nil?
(1) row.lock = #current.user_name
(2) row.lock = #current.user_name
(1) row.save!
(2) row.save!
If I wrap row.with_lock around (with_lock) and (with_lock_end), it should solve this problem right?
Lastly, can I use optimistic locking with lock_version?.
User (1) loads row 1 with version 1.
User (2) loads row 1 with version 1.
User (1) updates row 1 with version 1, now row 1 is version 2.
User (2) updates row 1 with version 1, gets back stale object exception.
Then I won't need to wrap the update calls with with_lock? However, how can I keep track of who locked the row via this method?
Try doing something like this:
lock_obtained = Row.where(id: 1, locked_by: nil).update_all(locked_by: #current_user.user_name, locked_at: Time.zone.now) == 1
# this try to update the row with ID 1 with the current_user's name only if it's not already locked, if the call returns 1 it means that the record was updated so #current_user locked it, if it returns 0 it means the record was already locked by someone else, that's the "== 1" for
Now, you'll try to lock the row and know if the row was locked before or succeded in just one line so you won't run into that race condition.
Just a few suggestion:
- I would use an integer with the user id for the locked_by column instead of the name
- I would also save the locked_at timestamp, sounds it could be usedfull
- What do you do if the user closes the tab that locked the row? For this kind of stuff you should use ActionCable so you have can detect if the user got disconnected too.

Applescript Result (links as text) to URL

It seems not as easy as i thought it should be.
My Script fetches Link URL's from websites
As of now, the resulting URL's are just text and i need them to be put out as URL's (clipboard or variable) to paste them into an email message
I have tried various things from saving first to a rtf file and reading/pasting it to my email message body or copy and paste trough the clipboard.
Any help would be awesome as i can't get this solved since 2 days. Thanks
--prompt for keyword
display dialog "Keyword or Sentence" default answer "mad dog" buttons {"Done"} default button 1
set Keyword to text returned of the result
--create URL filter from Keyword
set my text item delimiters to " "
delay 0.2
set split_list to every text item of Keyword -- split in to list of everything between the spaces
set my text item delimiters to "-"
set Filter to (split_list as text) -- join, using the - as the delimter
--Open Pages
set site_url to "https://teespring.com/search?q=" & Keyword
tell application "Safari"
activate
open location site_url
end tell
-- wait until page loaded
property testingString : "Help" --Text on website to look for
set pageLoaded to false
tell application "Safari"
repeat while pageLoaded is false
set readyState to (do JavaScript "document.readyState" in document 1)
set pageText to text of document 1
if (readyState is "complete") and (pageText contains testingString) then set pageLoaded to true
delay 0.2
end repeat
end tell
-- get number of links
set theLinks to {}
tell application "Safari" to set num_links to (do JavaScript "document.links.length" in document 1)
set linkCounter to num_links - 1
-- retrieve the links
repeat with i from 0 to linkCounter
tell application "Safari" to set end of theLinks to do JavaScript "document.links[" & i & "].href" in document 1
end repeat
theLinks
set nonExcludedURLs to {}
--Filter URLs
repeat with i from 1 to length of theLinks
if item i of theLinks contains Filter then
set end of nonExcludedURLs to item i of theLinks
end if
end repeat
nonExcludedURLs
on page_loaded(timeout_value)
delay 2
repeat with i from 1 to the timeout_value
tell application "Safari"
if (do JavaScript "document.readyState" in document 1) is "complete" then
set nonExcludedURLs to {}
return true
else if i is the timeout_value then
return false
else
delay 1
end if
end tell
end repeat
return false
end page_loaded
Found the solution. Maybe i described the problem not very well.
I just needed to split the resulting url's from a block of text to single lines with the following code:
set Single_URLs to ""
repeat with this_line in nonExcludedURLs -- the URL's as block of text
set Single_URLs to Single_URLs & this_line & return --split into lines
end repeat

undefined method `click' for "2":String, Rails error when using Mechanize

class ScraperController < ApplicationController
def show
mechanize = Mechanize.new
website = mechanize.get('https://website.com/')
$max = 2
$counter = 0
$link_to_click = 2
#names = []
while $counter <= $max do
#names.push(website.css('.memName').text.strip)
website.link_with(:text => '2').text.strip.click
$link_to_click += 1
$counter += 1
end
end
end
I am trying to scrape 20 items off of each page and then click on the link at the bottom (1, 2, 3, 4, 5, etc.). However, I get the error as seen in the title which tells me that I cannot click the string. So it recognizes that the button '2' exists but will tell me if cannot click it. Ideally, once this is sorted out, I wanted to the use the $link_to_click variable as a way to replace the '2' so that it will increment each time but it always comes back as nil. I have also changed it to .to_s with the same result.
If I remove the click all together, it will scrape the same page 3 times instead of moving onto the next page. I have also removed the text.strip part before the .click and it will do the same thing. I have tried many variations but have had no luck.
I would really appreciate any advice you could offer.
I ended up reviewing the articles I was referencing to solve this and came to this conclusion.
I changed the website_link to website = website.link_with(:text => $link_to_click.to_s).click (because it only worked as a string) and it printed out the first page, second and each one thereafter.
These are the articles that I was referencing to learn how to do this.
http://docs.seattlerb.org/mechanize/GUIDE_rdoc.html
and
https://readysteadycode.com/howto-scrape-websites-with-ruby-and-mechanize

PsychoPy Coder: define image duration based on frames

I have some experience in Matlab, but am very new to PsychoPy.
For now I would like to continuously switch between two images until there is a keyboard response.
Each image should stay on the screen exactly for 100ms, and I want to be able to verify that this is the case (e.g. in the log file).
I sort of got it right by using core.wait(.084) after win.flip() - on a 60Hz screen that gives approximately 100ms.
I am verifying it by writing the frame of each flip to the log file with win.logOnFlip()
But I believe I could be way more precise I only knew how to define the duration of the image in terms of frames.
The function core.wait() only takes time in seconds, and not in frames, correct?
I would be very grateful if you could give me some tips on how to achieve (and verify) presentation of each image for 6 frames.
Thanks a ton in advance
Best
Sebastian
Here my code:
import os # for file/folder operations
from psychopy import visual, event, core, gui, data, logging
# Ensure that relative paths start from the same directory as this script
_thisDir = os.path.dirname(os.path.abspath(__file__))
os.chdir(_thisDir)
# screen size in pixels
scrsize = (600,400)
# gather info participant
exp_name = 'MyFirstPsychoPy'
exp_info = {
'participant': '',
}
dlg = gui.DlgFromDict(dictionary=exp_info, title=exp_name)
# if user pressed cancel quit
if dlg.OK == False:
core.quit()
# Get date and time
exp_info['date'] = data.getDateStr()
exp_info['exp_name'] = exp_name
#save a log file for detail verbose info
filename = _thisDir + os.sep + 'data/%s_%s_%s' %(exp_info['participant'], exp_name, exp_info['date'])
# print filename #to see if path correct
logFile = logging.LogFile(filename+'.log', level=logging.DEBUG)
logging.console.setLevel(logging.WARNING) # outputs to the screen, not a file
# Create a window small window
win = visual.Window(size=scrsize, color='white', units='pix', fullscr=False)
# or go full screen
#win = visual.Window([1280,1024], fullscr=True, allowGUI=False, waitBlanking=True)
# this is supposed to record all frames
win.setRecordFrameIntervals(True)
# show instructions until spacebar
start_message = visual.TextStim(win,
text="hello. you will see mondrians. press space to respond.",
color='red', height=20)
event.clearEvents()
keys = event.getKeys(keyList=['space', 'escape']) #allow only space and escape keys
while len(keys) == 0:
start_message.draw()
win.flip()
keys = event.getKeys(keyList=['space', 'escape'])
if len(keys)>0:
break
print keys #show on output screen
keys = event.clearEvents() # empty keys
keys = event.getKeys(keyList=['space', 'escape'])
# define 2 pictures
bitmap1 = visual.ImageStim(win, 'Mondrians/Mask_1.bmp', size=scrsize)
bitmap2 = visual.ImageStim(win, 'Mondrians/Mask_2.bmp', size=scrsize)
bitmap = bitmap1
# Initialize clock to register response time
rt_clock = core.Clock()
rt_clock.reset() # set rt clock to 0
# show alternating pics until response
frameN = 0
while len(keys) == 0:
if bitmap == bitmap1:
bitmap = bitmap2
else:
bitmap = bitmap1
bitmap.draw()
win.logOnFlip(msg='frame=%i' %frameN, level=logging.DEBUG) #record the time of win.flip() in the log file
win.flip() # show image
frameN = frameN + 1
core.wait(.084) # wait 100 ms
keys = event.getKeys(keyList=['space', 'escape']) #record resp
# if response stop
if len(keys)>0:
rt = rt_clock.getTime()
break
print keys, rt #show resp and rt on screen
win.saveFrameIntervals(filename+'.log', clear=True)
win.close()
core.quit()
Yes, there is a better way! The standard solution exploits the fact that win.flip() halts code execution until the next monitor update. So looping over win.flip() will give you exactly one frame per loop. So to switch between two imagesStims (bitmap1 and bitmap2) until there is a response:
clock = core.Clock() # to assess timing
keepLooping = True
while keepLooping: # continue until break
for thisBitmap in [bitmap1, bitmap2]: # alternate between images
if keepLooping: # do not show bitmap2 if a key was pressed on bitmap1
for frameN in range(6): # 100 ms
thisBitmap.draw()
print clock.getTime()
win.callOnFlip(clock.reset) # ... or use win.logOnFlip
win.flip() # inner loop is timed to this, as long as the rest of the code in here doesn't take longer than a frame.
keys = event.getKeys(keyList=['space', 'escape'])
if keys: # notice simplification. [] evaluates to False.
rt = rt_clock.getTime()
keepLooping = False
break
... and then the rest. I've used core.Clock() to assess the time here, but your win.logOnFlip() is just as good. Depends on what kind of output you want.
Note that event.getKeys() records the time this line was executed, not the time that the key was pressed. Therefore it adds a small delay. In this "frame-locked loop" the key response is therefore discretized to frame intervals. If you want to get real asynchronous polling of the keyboard state (i.e. if up to +16ms error in RT recording matters), use the iohub module. Many keyboards have an inherent 10-30 ms latency anyway, so you can't get rid of all latency.

Resources