Puppeteer page.goto error only with readFileSync - url

When using await page.goto('http://www.URL.edu') a PDF is generated, but when loading the same URL from a csv file, Puppeteer returns error.
Thought is might have been either a timing issue or a redirect from http to https, but the script proves both not to be the problem.
The commented out is where the url is loaded from a single CSV file with one row: "1,oldwestbury.edu,SUNY College at Old Westbury"
var dir1 = './screenshots';
var dir2 = './pdfs';
const fs = require('fs');
if (!fs.existsSync(dir1)) {
fs.mkdirSync(dir1);
}
if (!fs.existsSync(dir2)) {
fs.mkdirSync(dir2);
}
function readURLFile(path) {
return fs.readFileSync(path, 'utf-8')
.split('\n')
.map((elt) => {
const url = elt.split(',')[1].replace('\r', '');
return `http://${url.toLowerCase()}`;
});
}
const puppeteer = require('puppeteer');
(async () => {
const startDate = new Date().getTime();
const USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3239.108 Safari/537.36';
const urls = readURLFile('./UNVurls.csv');
const browser = await puppeteer.launch({
headless: true
});
for (let url of urls) {
console.log(`Visiting url: ${url}`);
let page = await browser.newPage();
try {
await page.setViewport({ width: 1440, height: 900, deviceScaleFactor: 2 });
await page.goto('http://www.oldwestbury.edu')
// await page.goto(url, {
// waitUntil: 'networkidle2',
// timeout: 0
// });
let fileName = url.replace(/(\.|\/|:|%|#)/g, "_");
if (fileName.length > 100) {
fileName = fileName.substring(0, 100);
}
await page.waitForSelector('title');
await page.screenshot({
path: `./screenshots/${fileName}.jpeg`,
omitBackground: true
});
await page.emulateMedia('screen');
await page.pdf({
path: `./pdfs/${fileName}.pdf`,
pageRanges: "1",
format: 'A4',
printBackground: true
});
} catch (err) {
console.log(`An error occured on url: ${url}`);
} finally {
await page.close();
}
}
await browser.close();
console.log(`Time elapsed ${Math.round((new Date().getTime() - startDate) / 1000)} s`);
})();
Hoping to determine WHY the PDF is created when using the url direct and why the page load fails when retrieving from CSV file.

Related

How to send parallel POST requests in puppeteer?

I want to send parallel POST requests in puppeteer. I have to change the payload with every request (URL remains the same).
I tried using puppeteer cluster, but how do I change payload with every request when I queue the same request?
Using normal puppeteer
(async() => {
const browser = await puppeteer.launch({
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-web-security",
],
executablePath: 'C:/Program Files/..',
headless: false,
});
for(const id of Ids) {
const page = await browser.newPage();
await page.setDefaultNavigationTimeout(60000);
await page.evaluateOnNewDocument(() => {
// Some code
})
await page.setRequestInterception(true);
// Request intercept handler... will be triggered with
// each page.goto() statement
page.on('request', interceptedRequest => {
// Here, is where you change the request method and
// add your post data
var data = {
'method': 'POST',
'postData': JSON.stringify({
....
"similarMaterialId": `${id}`,
}),
'headers': {
.....
},
};
// Request modified... finish sending!
interceptedRequest.continue(data);
});
const response = await page.goto('https://.../getProductInfo');
const responseBody = await response.json();
try {
let title = responseBody.description;
let price = responseBody.price;
fs.appendFile('temp.tsv', `${title}\t${price}\n`, function (err) {
if (err) throw err;
})
}
catch {
console.log(id)
}
await page.close();
}
console.log("Code ended!!")
await browser.close();
})();
I want to create many pages in parallel on a single browser.

node-fetch stream file to nodemailer, and then nodemailer to send the file as attachment

I'm trying to send a file along with other fields on the state in Nextjs using node-fetch.
the goal is not to store the file on the server (even temporarily) but to stream it from the web browser as the user submits the form to the Nodemailer, and then to be emailed from Nodemailer as an attachment along with other information.
client:
const handleFile = (e) => {
let file = e.target.files[0];
let attachment = new FormData();
attachment.append("file", file);
fetch(`route`, {
method: "POST",
headers: {},
body: attachment,
})
.then((response) => {
if (response.ok) console.log("Uploaded");
else console.log("Error");
})
.catch((error) => {
console.log(error);
});
SMTP:
const nodemailer = require("nodemailer");
async function main(subject, html, to, file) {
let transporter = nodemailer.createTransport({
// mail server setup
});
let attachment = [];
if (file) {
attachment = [
{
filename: file.file[0].originalFilename,
path: file.file[0].path,
},
];
}
const mailOptions = {
from: from,
to: to,
subject: subject,
html: html,
attachments: attachment,
};
try {
let info = await transporter.sendMail(mailOptions);
console.log(info);
} catch (error) {
console.error(error, "fail to send email");
}
API :
const express = require("express");
const router = express.Router();
const multiparty = require("multiparty");
const sendEmail = require("../../utilities/SMTP");
router.post("/route", (req, res) => {
var form = new multiparty.Form();
form.parse(req, function (err, fields, files) {
sendEmail(
"Career Attachment",
contactEmail(fields),
"to#mail.com",
files
);
res.send("Your Request Sent Successfully");
});
});
Edit: I'm able to stream the file as an attachment with the above code.
Needs improvements.
you should use formData and append your files to it like in the code below
let file = e.target.files[0];
let formData = new FormData();
formData.append("file", file);
if you want to let it be dynamic and upload multiple files you can write your function like follows
let files = e.target.files;
let formData = new FormData();
for (let i = 0; i < files .length; i++) {
formData.append("file", files [i]);
}

Loopback 4 implementing Microsoft Graph API

I am currently building a microservice that is responsible to communicate with Microsoft Graph, I have already made one with Loopback 3 and this was not a problem.
Except now, I am trying to do the same thing but with Loopback 4, but since the language changes from JavaScript to TypeScript I don't know if it's still possible to achieve this.
This was the code I used for Loopback 3 in my root server file:
'use strict';
const express = require('express');
const erouter = require('express').Router();
var session = require('express-session');
var passport = require('passport');
var OIDCStrategy = require('passport-azure-ad').OIDCStrategy;
const request = require('request');
var querystring = require('querystring');
const graph = require('./graph.service');
const getBookings = require('./getBookings.service');
const cors = require('cors');
var compression = require('compression');
module.exports = function(server) {
// Install a `/` route that returns server status
var router = server.loopback.Router();
router.get('/', server.loopback.status());
// Configure simple-oauth2
const oauth2 = require('simple-oauth2').create({
client: {
id: process.env.OAUTH_APP_ID,
secret: process.env.OAUTH_APP_PASSWORD
},
auth: {
tokenHost: process.env.OAUTH_AUTHORITY,
authorizePath: process.env.OAUTH_AUTHORIZE_ENDPOINT,
tokenPath: process.env.OAUTH_TOKEN_ENDPOINT
}
});
passport.serializeUser(function(user, done) {
var MSUser = server.models.MSUser;
var id = user.profile.oid;
MSUser.find({ where: { oid: id } }, function(err, msu) {
if (err) return done(err, null);
if (!msu) {
MSUser.create(user);
} else {
done(null, id);
}
});
});
passport.deserializeUser(function(id, done) {
var MSUser = server.models.MSUser;
MSUser.findById(id, function(err, user) {
if (err) return next(err);
done(null, user);
});
});
async function signInComplete(iss, sub, profile, accessToken, refreshToken, params, done) {
if (!profile.oid) {
return done(new Error("No OID found in user profile."), null);
}
try {
const user = await graph.getUserDetails(accessToken);
if (user) {
profile['email'] = user.mail ? user.mail.toLowerCase() : user.userPrincipalName.toLowerCase();
}
} catch (err) {
done(err, null);
}
let oauthToken = oauth2.accessToken.create(params);
var AuthUser = server.models.AuthUser;
var user = {};
AuthUser.find({ where: { email: profile['email'] } }, function(err, au) {
if (err) return done(err, null);
if (au.length != 1) return done(new Error("User was not found with that email address."), null);
user = au[0];
const dataMsAuth = querystring.stringify({
"created": new Date().toDateString(),
"token_type": oauthToken.token.token_type,
"expires_in": oauthToken.token.expires_in,
"access_token": oauthToken.token.access_token,
"scope": oauthToken.token.scope,
"ext_expires_in": oauthToken.token.ext_expires_in,
"refresh_token": oauthToken.token.refresh_token,
"id_token": oauthToken.token.id_token,
"expires_at": new Date(oauthToken.token.expires_at).toDateString()
});
const postMSAuth = {
url: process.env.API_URL + "api/Companies/" + user.companyId + "/msauth",
method: 'POST',
body: dataMsAuth,
headers: {
'Content-Type': 'application/x-www-form-urlencoded'
}
}
request(postMSAuth, function(err, resp, body) {
if (err) return done(err, null);
var MSUser = server.models.MSUser;
var id = profile.oid;
var msuser = { profile, oauthToken, oid: id, email: profile.email }
MSUser.findById(id, function(err, msu) {
if (err) return done(err, null);
if (!msu) {
MSUser.create(msuser);
}
});
return done(null, msuser);
});
});
}
passport.use(new OIDCStrategy({
identityMetadata: `${process.env.OAUTH_AUTHORITY}${process.env.OAUTH_ID_METADATA}`,
clientID: process.env.OAUTH_APP_ID,
responseType: 'code id_token',
responseMode: 'form_post',
redirectUrl: process.env.OAUTH_REDIRECT_URI,
allowHttpForRedirectUrl: true,
clientSecret: process.env.OAUTH_APP_PASSWORD,
validateIssuer: false,
passReqToCallback: false,
scope: process.env.OAUTH_SCOPES.split(' ')
},
signInComplete
));
var app = express();
app.use(compression());
app.use(session({
secret: process.env.BOOKINGS_LOOPBACK_SECRET,
resave: false,
saveUninitialized: false,
unset: 'destroy'
}));
app.use("/result", express.static('client'));
app.use(passport.initialize());
app.use(passport.session());
app.use(cors({
origin: '*'
}));
erouter.get('/API/bookings/:companyid', getBookings());
erouter.get('/auth/signin',
function(req, res, next) {
passport.authenticate('azuread-openidconnect', {
response: res,
prompt: 'login',
state: req.query.state,
failureRedirect: process.env.WEBSITE_URL + 'settings?error=incorrect_request',
successRedirect: process.env.WEBSITE_URL + 'settings?auth=success'
})(req, res, next);
}
);
erouter.post('/auth/callback',
function(req, res, next) {
passport.authenticate('azuread-openidconnect', {
response: res,
failureRedirect: process.env.WEBSITE_URL + 'settings?error=permission_denied',
successRedirect: process.env.WEBSITE_URL + 'settings?auth=success'
})(req, res, next);
}
);
app.use(erouter);
server.use(app);
server.use(router);
};
So my question is: "Is it possible to implement Microsoft Graph API in TypeScript using Loopback 4 or should I keep using Loopback 3 In JavaScript?"
Thanks in advance,
Billy Cottrell

How to do a POST Request with PlayWright

I have been stuck with this for a bit. I need to test a website and I need to post info in order to test if it appears on the page.
What I have so far is this
(async () => {
const browser = await webkit.launch();
const page = await browser.newPage();
await page.route('http://100.100.100.100/', route => route.fulfill({
status: 200,
body: body,
}));
await page.goto('https://theurlofmywebsite/');
await page.click('button')
await page.click('text=Login with LoadTest')
await page.fill('#Username','username')
await page.fill('#Password','password')
await page.click('#loginButton')
// await page.waitForSelector('text=Dropdown');
await page.click('css=span >> text=Test')
await page.click('#root > div > div > header > ul.nav.navbar-nav.area-tabs > li:nth-child(6) > a','Test')
await page.waitForSelector('text=Detail')
await page.screenshot({ path: `example3.png` })
await browser.close();
})();
const body = [ my json post request ]
jest.setTimeout(1000000);
let browser: any;
let page: any;
beforeAll(async () => {
browser = await chromium.launch();
});
afterAll(async () => {
await browser.close();
});
beforeEach(async () => {
page = await browser.newPage();
});
afterEach(async () => {
await page.close();
});
it("should work", async () => {
await fetch("http://YOUAWESOMEURL", {
method: "post",
body: JSON.stringify(body),
})
.then((response) => console.log(response))
.catch((error) => console.log(error));
await page.goto("https://YOUAWESOMEURL");
await page.click("button");
await page.click("text=Login");
await page.fill("#Username", "YOURUSERNAME");
await page.fill("#Password", "YOURPASSWORD");
await page.click("#loginButton");
// await page.click("css=span >> text=Load Test");
await page.click(
"#root > div > div > header > ul.nav.navbar-nav.area-tabs > li:nth-child(6) > a >> text=Test"
);
await page.waitForSelector("text=SOMETEXTYOUWANTTOCHECKIFTHERE");
// await page.waitForSelector(`text=SOMEOTHERTEXTYOUWANTTOCHECKIFTHERE`);
// Another way to check for success
// await expect(page).toHaveText(`SOMEOTHERTEXTYOUWANTTOCHECKIFTHERE`);
console.log("test was successful!");
});
With 1.19 version it looks easy.
test('get respons variable form post in Playwright', async ({ request }) => {
const responsMy= await request.post(`/repos/${USER}/${REPO}/issues`, {
data: {
title: '[Bug] report 1',
body: 'Bug description',
}
});
expect(responsMy.ok()).toBeTruthy();
}
See more on https://playwright.dev/docs/test-api-testing
import { expect, request } from '#playwright/test';
const baseApiUrl = "https://api.xxx.pro/node-api/graphql";
test('API Search', async ({ request }) => {
const search_query = `query {me { id username}} `;
const response = await request.post(baseApiUrl, {
data: {
query: search_query
},
headers: {
authorization: `Bearer eyJhbGciOiJIUzcCI6IkpXVCJ9.eyJzd`
}
});
const bodyResponse = (await response.body()).toString();
expect(response.ok(), `${JSON.stringify(bodyResponse)}`).toBeTruthy();
expect(response.status()).toBe(200);
const textResponse = JSON.stringify(bodyResponse);
expect(textResponse, textResponse).not.toContain('errors');
});

Using puppeteer, on TimeoutError screenshot the current state

I'm trying to screenshot a website using puppeteer, and on slow sites I receive a TimeoutError.
In this case, I'd like to get the screenshot of the current page state - is this possible? if so, how?
Code sample:
const puppeteer = require('puppeteer');
let url = "http://...";
let timeout = 30000;
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page
.goto(url, {waitUntil: 'load', timeout: timeout}).then(async () => {
await page
.screenshot({path: 'example.png'})
.catch(error => console.error(error));
})
.catch(error => {
if (error.name === "TimeoutError") {
// -----> calling await page.screenshot({path: 'example.png'}) gets stuck
} else {
console.error(error);
}
});
await browser.close();
})();
Don't use browser.close when using puppeteer in development, as this may cause the browser closed and puppeteer crashed.
const puppeteer = require('puppeteer')
let url = "https://www.tokopedia.com"
let filename = 'timeout.png'
let timeoutNum = 30000
;(async () => {
const browser = await puppeteer.launch({
headless: false
});
const [page] = await browser.pages ()
page.setViewport ({ width: 1366, height: 768 })
try {
await page.goto(url, {waitUntil: 'networkidle0', timeout: timeoutNum}).then(async () => {
await page.screenshot({ path: 'example.png', fullPage: true })
})
} catch (error) {
if (error.name === "TimeoutError") {
console.log (error.name)
console.log (`Screenshot saved as ${filename}`)
await page.screenshot({ path: filename, fullPage: true })
} else {
console.log (error)
}
}
})()

Resources