Is there some kind of limit when using IMPORTXML? [duplicate] - google-sheets

I am stucking on a "scraping problem" right now. Especially i want to extract the name of the author from a webpage to google spreadsheet. Actually the function =IMPORTXML(A2,"//span[#class='author vcard meta-item']") is working, but after i raise the amount of links to scrape it just starts to load endless.
So i researched and find out, that this problem is due to the fact, that there is a limit of google.
Does anybody know of to exceed the limit or a script, which i could "easily copy" ? - i really do not have a hunch of coding.

I created a custom import function that overcomes all limits of IMPORTXML I have a sheet using this in about 800 cells and it works great.
It makes use of Google Sheet’s custom scripts (Tools > Script editor…) and searches through content using regex instead of xpath.
function importRegex(url, regexInput) {
var output = '';
var fetchedUrl = UrlFetchApp.fetch(url, {muteHttpExceptions: true});
if (fetchedUrl) {
var html = fetchedUrl.getContentText();
if (html.length && regexInput.length) {
output = html.match(new RegExp(regexInput, 'i'))[1];
}
}
// Grace period to not overload
Utilities.sleep(1000);
return output;
}
You can then use this function like any function.
=importRegex("https://example.com", "<title>(.*)<\/title>")
Of course, you can also reference cells.
=importRegex(A2, "<title>(.*)<\/title>")
If you don’t want to see HTML entities in the output, you can use this function.
var htmlEntities = {
nbsp: ' ',
cent: '¢',
pound: '£',
yen: '¥',
euro: '€',
copy: '©',
reg: '®',
lt: '<',
gt: '>',
mdash: '–',
ndash: '-',
quot: '"',
amp: '&',
apos: '\''
};
function unescapeHTML(str) {
return str.replace(/\&([^;]+);/g, function (entity, entityCode) {
var match;
if (entityCode in htmlEntities) {
return htmlEntities[entityCode];
} else if (match = entityCode.match(/^#x([\da-fA-F]+)$/)) {
return String.fromCharCode(parseInt(match[1], 16));
} else if (match = entityCode.match(/^#(\d+)$/)) {
return String.fromCharCode(~~match[1]);
} else {
return entity;
}
});
};
All together…
function importRegex(url, regexInput) {
var output = '';
var fetchedUrl = UrlFetchApp.fetch(url, {muteHttpExceptions: true});
if (fetchedUrl) {
var html = fetchedUrl.getContentText();
if (html.length && regexInput.length) {
output = html.match(new RegExp(regexInput, 'i'))[1];
}
}
// Grace period to not overload
Utilities.sleep(1000);
return unescapeHTML(output);
}
var htmlEntities = {
nbsp: ' ',
cent: '¢',
pound: '£',
yen: '¥',
euro: '€',
copy: '©',
reg: '®',
lt: '<',
gt: '>',
mdash: '–',
ndash: '-',
quot: '"',
amp: '&',
apos: '\''
};
function unescapeHTML(str) {
return str.replace(/\&([^;]+);/g, function (entity, entityCode) {
var match;
if (entityCode in htmlEntities) {
return htmlEntities[entityCode];
} else if (match = entityCode.match(/^#x([\da-fA-F]+)$/)) {
return String.fromCharCode(parseInt(match[1], 16));
} else if (match = entityCode.match(/^#(\d+)$/)) {
return String.fromCharCode(~~match[1]);
} else {
return entity;
}
});
};

There is no such script to exceed the limits. Since the code is run on a Google machine (server) you can not cheat.
Some limits are bind to your spreadsheet, so you could try to use multiple spreadsheets, if that helps.

Related

Get term for selected autocomplete when multiple are on one page

I have a page where I am adding jquery-ui autocompletes dynamically
My .autocomplete() code includes a $.getJSON('my_url', my_payload) where, in my_payload,' I am trying to send the request.term (what I typed into the jqueryui textbox) as well as the id of the jquery ui text box.
The problem is, for all the dynamically added textboxes, they were just picking up the term and id of the original autocomplete.
I managed to find a way to get the id of the added (not original) autocomplete by wrapping the autocomplete in a function that has the added field passed in as a parameter, but because the 'term' is in the request, which comes from .autocomplete, I do not know how to get this for the new ones.
https://jsfiddle.net/amchugh89/1L8jvea5/4/
//=======dynamic formset script from https://medium.com/all-about-
django/adding-forms-dynamically-to-a-django-formset-375f1090c2b0======
function updateElementIndex(el, prefix, ndx) {
var id_regex = new RegExp('(' + prefix + '-\\d+)');
var replacement = prefix + '-' + ndx;
if ($(el).attr("for")) $(el).attr("for", $(el).attr("for").replace(id_regex, replacement));
if (el.id) el.id = el.id.replace(id_regex, replacement);
if (el.name) el.name = el.name.replace(id_regex, replacement);
}
function cloneMore(selector, prefix) {
var newElement = $(selector).clone(true);
var total = $('#id_' + prefix + '-TOTAL_FORMS').val();
newElement.find(':input:not([type=button]):not([type=submit]):not([type=reset])').each(function() {
if ($(this).attr('name')){
var name = $(this).attr('name').replace('-' + (total-1) + '-', '-' + total + '-');
var id = 'id_' + name;
$(this).attr({'name': name, 'id': id}).val('').removeAttr('checked');
if($(this).attr('id').includes('gl')){
console.log($(this).attr('id'))
make_autocomplete($(this))
}
}
});
newElement.find('label').each(function() {
var forValue = $(this).attr('for');
if (forValue) {
forValue = forValue.replace('-' + (total-1) + '-', '-' + total + '-');
$(this).attr({'for': forValue});
}
});
total++;
$('#id_' + prefix + '-TOTAL_FORMS').val(total);
$(selector).after(newElement);
var conditionRow = $('.form-row:not(:last)');
conditionRow.find('.btn.add-form-row')
.removeClass('btn-success').addClass('btn-danger')
.removeClass('add-form-row').addClass('remove-form-row')
.html('<span class="glyphicon glyphicon-minus" aria-hidden="true"></span>');
return false;
}
function deleteForm(prefix, btn) {
var total = parseInt($('#id_' + prefix + '-TOTAL_FORMS').val());
if (total > 1){
btn.closest('.form-row').remove();
var forms = $('.form-row');
$('#id_' + prefix + '-TOTAL_FORMS').val(forms.length);
for (var i=0, formCount=forms.length; i<formCount; i++) {
$(forms.get(i)).find(':input').each(function() {
updateElementIndex(this, prefix, i);
});
}
}
return false;
}
$(document).on('click', '.add-form-row', function(e){
e.preventDefault();
cloneMore('.form-row:last', 'form');
return false;
});
$(document).on('click', '.remove-form-row', function(e){
e.preventDefault();
deleteForm('form', $(this));
return false;
});
//====================
//AUTOCOMPLETE==(that allows for multiple ACs
https://stackoverflow.com/questions/24656589/using-jquery-ui-autocomplete-
with-multiple-input-fields)===================================
function make_autocomplete(ee) {
ee.on("focus", function(){ //.autocomplete({
$(this).autocomplete({
minLength: 2,
source: function( request, response ) {
var term = request.term;
//with the formset, I want to get the row for which I am typing in the
'term'
var this_formset_row_autocomplete_id
=ee.attr('id');//$(this.element).prop("id");//
$(this).attr('id');
console.log(this_formset_row_autocomplete_id);
var corresponding_branch_html_id =
this_formset_row_autocomplete_id.replace('gl_account','branch');
var this_formset_row_branch_sym_id =
$('#'+corresponding_branch_html_id).val();
//console.log(corresponding_branch_html_id, this_formset_row_branch_sym_id)
var appended_data={term:term,
this_formset_row_branch_sym_id:this_formset_row_branch_sym_id};
console.log(appended_data);
$.getJSON( "{% url 'dashapp:account_autocomplete' %}", appended_data,
function( data,
status, xhr ) {
//cache[ term ] = data;
response( data );
});
}
});
});
}//end function make_autocomplete
var ee =$( ".account_autocomplete" )
make_autocomplete(ee)
//===============
You may want to try to make it more simple for testing. Something like:
function make_autocomplete(obj) {
obj.autocomplete({
minLength: 2,
source: function(req, resp) {
var myData = {
term: req.term,
original_form_branch_id: $(this).closest("form").attr("id"),
this_formset_row_branch_sym_id: $(this).closest(".row").find("select").val()
}
$.getJSON("myurl", myData, function(results) {
resp(results);
});
}
});
}
Fiddle: https://jsfiddle.net/Twisty/pywb9nhv/23/
This uses .closest() to gather details from the relative objects. Also I do not see any benefit to initializing Autocomplete on focus event.
If you would like further help, please provide Example Data that can be used in a working example.
Hope that helps a little.

Why my md-autoComplete is not displaying return values

I am using Angular Material for the first time. I am stuck with an issue with autocomplete. Below is my template:
<md-autocomplete class="flex"
md-no-cache="true"
md-selected-item="c.receipt"
md-item-text="item.name"
md-search-text="SearchText"
md-items="item in querySearch(SearchText)"
md-floating-label="search">
<md-item-template>
<span><span class="search-result-type">{{item.GEOType}}</span><span md-highlight-text="SearchText">{{item.GEOName+(item.country?' / '+item.country:'')}}</span></span>
</md-item-template>
<md-not-found>No matches found.</md-not-found>
</md-autocomplete>
And in ctrl I have:
$scope.querySearch = function (query) {
var GeoDataAPIUrl = '/api/TargetSettings/RetrieveCorridorLeverValues';
if (query.length < 5)
return;
else {
var GeoDataSearchUrl = GeoDataAPIUrl + '?' + 'strGeoName=' + query;
$http
.get(GeoDataSearchUrl)
.then(function (geoAPIResponse) {
console.log("GeoAPIResponse was ", geoAPIResponse);
return geoAPIResponse.data;
},
function (geoAPIError) {
console.log("GeoAPI call failed ", geoAPIError);
});
}
};
With above code, I am getting nothing as suggestions, only my not-found text is displayed, while my http call return an array which is printed in console too. Am I missing something??
I saw at many places, people have used some filters with autocomplete, I dont think that is something essential.
Pls advice how to make above work.
$http returns promise and md-autocomplete uses same promise to display the result. In your case you are returning result but not promise. Your code should be
$scope.querySearch = function (query) {
var GeoDataAPIUrl = '/api/TargetSettings/RetrieveCorridorLeverValues';
if (query.length < 5)
return;
else {
var GeoDataSearchUrl = GeoDataAPIUrl + '?' + 'strGeoName=' + query;
var promise = $http.get(GeoDataSearchUrl).then(function (geoAPIResponse) {
console.log("GeoAPIResponse was ", geoAPIResponse);
return geoAPIResponse.data;
},
function (geoAPIError) {
console.log("GeoAPI call failed ", geoAPIError);
});
return promise;
}
};
It will work now.

firefox addon, how to modify(change) the url before the request is sent (even made) by the browser?

I want to remove some parameters in a url, currently my code:
require("sdk/tabs").on("ready", removeList);
function removeList(tab) {
var index = tab.url.indexOf("&list=");
if (tab.url.indexOf("youtube.com") > -1 && index > -1) {
console.log(tab.url);
var temp = tab.url.slice(0, index);
console.log(temp);
tab.url = "";
tab.url = temp;
}
}
But it will send two urls(requests) to the server, the original one (I can see the response without the video being played) and the truncated one(as expected).
Your two options are http-on-modify-request and http-on-opening-request. The first is fine, but the second fires earlier and you lose a lot of a ability. The first method the url is fine because server never sees it.
const { Ci, Cu, Cc, Cr } = require('chrome'); //const {interfaces: Ci, utils: Cu, classes: Cc, results: Cr } = Components;
Cu.import('resource://gre/modules/Services.jsm');
Cu.import('resource://gre/modules/devtools/Console.jsm');
var observers = {
'http-on-examine-response': {
observe: function (aSubject, aTopic, aData) {
console.info('http-on-modify-request: aSubject = ' + aSubject + ' | aTopic = ' + aTopic + ' | aData = ' + aData);
var httpChannel = aSubject.QueryInterface(Ci.nsIHttpChannel);
var requestUrl = httpChannel.URI.spec
var index = requestUrl.indexOf('&list=');
if (requestUrl.indexOf('youtube.com') > -1 && index > -1) {
console.log(requestUrl);
var temp = requestUrl.slice(0, index);
httpChannel.redirectTo(Services.io.newURI(temp, null, null));
}
},
reg: function () {
Services.obs.addObserver(observers['http-on-modify-request'], 'http-on-modify-request', false);
},
unreg: function () {
Services.obs.removeObserver(observers['http-on-modify-request'], 'http-on-modify-request');
}
}
};
or instead of the redirectTo line you can do httpChannel.cancel(Cr.NS_BINDING_ABORTED); than get that loadConext and change the url.
To start observing
To start start obseving all requests do this (for example on startup of your addon)
for (var o in observers) {
observers[o].reg();
}
To stop observing
Its important to stop observring (make sure to run this at least on shutdown of addon, you dont want to leave the observer registered for memory reasons)
for (var o in observers) {
observers[o].unreg();
}

Display result matching optgroup using select2

I'm using select2 with Bootstrap 3.
Now I would like to know whether it is possible to display all optgroup items if the search matches the optgroup name while still being able to search for items as well. If this is possible, how can I do it?
The above answers don't seem to work out of the box with Select2 4.0 so if you're hunting for that, check this out: https://github.com/select2/select2/issues/3034
(Use the function like this: $("#example").select2({matcher: modelMatcher});)
function modelMatcher (params, data) {
data.parentText = data.parentText || "";
// Always return the object if there is nothing to compare
if ($.trim(params.term) === '') {
return data;
}
// Do a recursive check for options with children
if (data.children && data.children.length > 0) {
// Clone the data object if there are children
// This is required as we modify the object to remove any non-matches
var match = $.extend(true, {}, data);
// Check each child of the option
for (var c = data.children.length - 1; c >= 0; c--) {
var child = data.children[c];
child.parentText += data.parentText + " " + data.text;
var matches = modelMatcher(params, child);
// If there wasn't a match, remove the object in the array
if (matches == null) {
match.children.splice(c, 1);
}
}
// If any children matched, return the new object
if (match.children.length > 0) {
return match;
}
// If there were no matching children, check just the plain object
return modelMatcher(params, match);
}
// If the typed-in term matches the text of this term, or the text from any
// parent term, then it's a match.
var original = (data.parentText + ' ' + data.text).toUpperCase();
var term = params.term.toUpperCase();
// Check if the text contains the term
if (original.indexOf(term) > -1) {
return data;
}
// If it doesn't contain the term, don't return anything
return null;
}
Actually found the solution by modifying the matcher opt
$("#myselect").select2({
matcher: function(term, text, opt){
return text.toUpperCase().indexOf(term.toUpperCase())>=0 || opt.parent("optgroup").attr("label").toUpperCase().indexOf(term.toUpperCase())>=0
}
});
Under the premise that the label attribute has been set in each optgroup.
Found a solution from select2/issues/3034
Tested with select2 v.4
$("select").select2({
matcher(params, data) {
const originalMatcher = $.fn.select2.defaults.defaults.matcher;
const result = originalMatcher(params, data);
if (
result &&
data.children &&
result.children &&
data.children.length
) {
if (
data.children.length !== result.children.length &&
data.text.toLowerCase().includes(params.term.toLowerCase())
) {
result.children = data.children;
}
return result;
}
return null;
},
});
A few minor changes to people suggested code, less repetitive and copes when there are no parent optgroups:
$('select').select2({
matcher: function(term, text, opt){
var matcher = opt.parent('select').select2.defaults.matcher;
return matcher(term, text) || (opt.parent('optgroup').length && matcher(term, opt.parent('optgroup').attr("label")));
}
});

Calling Predicate() constructor causes Knockout to throw unexplained exception.

I'm new to breeze and I can't begin to imagine what's causing this to happen. This is a two part question:
1) My function is very simple. I'm querying with two predicates:
var getUserHealthMetricFromId = function (userId, healthMetricId, forceRemote) {
var p1 = new Predicate('userId', '==', userId);
var p2 = new Predicate('healthMetricId', '==', healthMetricId);
var query = EntityQuery.from('UserHealthMetrics').select('lowerValue', 'upperValue')
.where(p1.and(p2));
if (!forceRemote) {
//results = getUserHealthMetricFromLocal(userId, healthMetricId);
var query = query.using(breeze.FetchStrategy.FromLocalCache);
}
var promise = manager.executeQuery(query);
return promise;
};
While I'm debugging (Chrome) the first predicate declaration line, calling the Predicate ctor causes execution to jump to the following finally clause in Knockout-3.0.0.debug.js (line 1483):
finally {
ko.dependencyDetection.end();
_isBeingEvaluated = false;
}
When I execute the "_isBeingEvaluated = false" statement,
an exception is inexplicably thrown landing me here (line 2607):
} catch (ex) {
ex.message = "Unable to process binding \"" + bindingKey + ": " + bindings[bindingKey] + "\"\nMessage: " + ex.message;
throw ex;
}
Thinking this might have more to do with Knockout than with Breeze, I tested by altering the code by hardcoding the Id's so that the parameter variables (which are observables) aren't involved in calling the ctor anymore:
var p1 = new Predicate('userId', '==', 1);
var p2 = new Predicate('healthMetricId', '==', 4);
No dice. The same thing happens. When I try to step into Predicate() the same thing happens. I just throws me over to the knockout debug file.
2) In the same function, the variables I'm passing in are showing up as dependentObservables() in the debug window. These values are the product of another breeze call to the server. Why would breeze render these as dependentObservables instead of plain observables (I do not declare any computeds anywhere in the code)? Here's a quick overview of my code:
In the view model:
var latestEntriesObservable = ko.observableArray(null);
function activate() {
$('#rangeDialog').hide();
var promise = Q.all([datacontext.getLatestEntries(latestEntriesObservable, currentUserId, false),
datacontext.getUserHealthMetrics(userHealthMetricsObservable, currentUserId, false),
datacontext.getUserHealthMetricNames(userHealthMetricNamesObservable, currentUserId, false)]);
return promise;
}
var getLatestEntries = function (latestEntriesObservable, userId, forceRemote) {
var lastEntryQuery = EntityQuery.from('LatestEntries').withParameters({ id: 1 });
if (!forceRemote) {
var e = getLocal('HealthMetricValues', 'healthMetricId');
if (e.length > 0) {
latestEntriesObservable(e);
return Q.resolve();
}
}
return manager.executeQuery(lastEntryQuery)
.then(querySucceeded)
.fail(queryFailed);
// handle the ajax callback
function querySucceeded(data) {
if (latestEntriesObservable) {
latestEntriesObservable(data.results);
//latestEntriesObservable(model.toProtectedObservableItemArray(data.results));
}
log('Retrieved latest entries.', data, true);
}
};
function getLocal(resource, orderBy) {
var query = EntityQuery.from(resource).orderBy(orderBy).withParameters({ id: 1 });
return manager.executeQueryLocally(query);
}
If I haven't provided enough code to help make a diagnosis I'll be happy to provide more upon request.
Any suggestions would be much appreciated!
Ok, I think the issue is that the class is actually breeze.Predicate. In order to save typing we often assign a local variable like this.
var Predicate = breeze.Predicate;
var p1 = new Predicate('userId', '==', 1);
or you can explicitly do this via
var p1 = new breeze.Predicate('userId', '==', 1);
or
var p1 = breeze.Predicate.create('userId', '==', 1);
Presumably, you are doing the same thing with EntityQuery, i.e.
var EntityQuery = breeze.EntityQuery;

Resources