Search Google Sheet column for matching text and print matches - google-sheets

I have a table with Long Words like 'Condemnation' and 'Income' in column A, and Shorter Words such as 'Con' and 'Come' in column B.
I'd like to create a cell to the right which will search through the 'LONG WORD' column if it contains the text of the 'SHORTER WORD' column and print them as a pair.
I only need it to return the first instance it comes across as it goes down.
I have looked at various MATCH and LOOKUP commands, but none seem quite to be able to do the 'return one matching word from a whole column' bit.
Thanks
Tardy

I've thrown together a script based solution for you. Other solutions that require a formula on every line where you might have partials will end up bogging down the sheet by quite a bit for large data sets. This should generate a range of matches after a couple seconds for data several tens of thousands of rows long.
Note: Since you opted to not provide a sample dataset, I had to assume how it's laid out. However, this will work regardless of where your columns are, as long as they are titled as Full Words, Partials, and Matches.
Link to spreadsheet (Must be signed into a google account to use the button): Google Sheet
Just click the Get Matches button to have it generate the matches.
The source is a bit more complex/dynamic than it needs to be, but I had a bunch of functions already laying around that I just reused.
Source:
//Retrieves all the necessary word matches
function GetWordMatches() {
var spreadsheet = SpreadsheetApp.openById('1s0S2iJ7L0wEXgVsKrpuK-aLysaxfHYRDQgp3ShPR8Ns').getSheetByName('Matches');
var dataRange = spreadsheet.getDataRange();
var valuesRange = dataRange.getValues();
var columns = GetColumns(valuesRange, dataRange.getNumColumns(), 0);
var fullWordsData = GetColumnAsArray(valuesRange, columns.columns['Full Words'].index, true, 1);
var partialsArray = GetColumnAsArray(valuesRange, columns.columns['Partials'].index, true, 1);
var partialsData = GeneratePartialsRegexArray(partialsArray);
var matches = GenerateMatches(fullWordsData, partialsData);
WriteMatchesToSheet(spreadsheet, columns.columns['Matches'].index, matches, partialsArray);
}
//Writes the matches to the sheet
function WriteMatchesToSheet(spreadsheet, matchesColumnIndex, matches, partialsArray){
var sortedMatches = SortByKeys(matches, partialsArray);
var dataRange = spreadsheet.getRange(2, matchesColumnIndex+1, sortedMatches.length);
dataRange.setValues(sortedMatches);
}
//Generates an array of matches for the full words and partials
function GenerateMatches(fullwordsData, partialsData){
var output = [];
var totalLoops = 0;
for(var i = 0; i < fullwordsData.length; i++){
totalLoops++;
for(var ii = 0; ii < partialsData.length; ii++){
totalLoops++;
var result = fullwordsData[i].match(partialsData[ii].regex)
if(result){
output.push([fullwordsData[i], partialsData[ii].value]);
partialsData.splice(ii, 1);
break;
}
}
}
if(partialsData.length > 0){
var missedData = GenerateMissedPartialsArray(partialsData);
output = output.concat(missedData);
}
return output;
}
//Generates a missed partials array based on the partials that found no match.
function GenerateMissedPartialsArray(partialsData){
var output = [];
for(var i = 0; i < partialsData.length; i++){
output.push(['No Match', partialsData[i].value])
}
return output;
}
//Generates the regex array for the partials
function GeneratePartialsRegexArray(partialsArray){
var output = [];
for(var i = 0; i < partialsArray.length; i++){
output.push({regex: new RegExp(partialsArray[i], 'i'), value: partialsArray[i]});
}
return output;
}
//http://stackoverflow.com/a/13305008/3547347
function SortByKeys(itemsArray, sortingArray){
var itemsMap = CreateItemsMap(itemsArray), result = [];
for (var i = 0; i < sortingArray.length; ++i) {
var key = sortingArray[i];
result.push([itemsMap[key].shift()]);
}
return result;
}
//http://stackoverflow.com/a/13305008/3547347
function CreateItemsMap(itemsArray) {
var itemsMap = {};
for (var i = 0, item; (item = itemsArray[i]); ++i) {
(itemsMap[item[1]] || (itemsMap[item[1]] = [])).push(item[0]);
}
return itemsMap;
}
//Gets a column of data as an array
function GetColumnAsArray(valuesRange, columnIndex, ignoreBlank, startRowIndex){
var output = [];
for(var i = startRowIndex; i < valuesRange.length; i++){
if(ignoreBlank){
if(valuesRange[i][columnIndex] !== ''){
output.push(valuesRange[i][columnIndex]);
}
continue;
}
output.push(valuesRange[i][columnIndex]);
}
return output;
}
//Gets a columns object for the sheet for easy indexing
function GetColumns(valuesRange, columnCount, rowIndex)
{
var columns = {
columns: {},
length: 0
}
Logger.log("Populating columns...");
for(var i = 0; i < columnCount; i++)
{
if(valuesRange[0][i] !== ''){
columns.columns[valuesRange[0][i]] = {index: i ,value: valuesRange[0][i]};
columns.length++;
}
}
return columns;
}
A note on some decisions: I opted to not use map, or other more concise array functions for the sake of performance.

This works too:
=QUERY(FILTER($D$1:$D$3,REGEXMATCH(A1,"(?i)"&$D$1:$D$3)),"limit 1")
we use REGEXMATCH and (?i) makes the search case-insensitive. limit 1 in query gives only first occurrence.

OK, I think I've found an answer. I'll post it here in case it's of use to anyone else.
To give credit where's credit's due, I found it here
This does what I was looking for:
=INDEX($D$1:$D$3,MATCH(1,COUNTIF(A1,"*"&$D$1:$D$3&"*"),0))
It does slow EVERYTHING down a lot because everything is cross-referencing like mad (I had 3000 lines on my spreadsheet), but if there's a list of words in D1-3 it will see if cell A1 contains one of those words and print the word it matches with.
Thanks to everyone who offered solutions, particularly #douglasg14b - if there is one that is less taxing in terms of memory, that would be great, but this does the trick in a slow kind of way!
Thanks
Tardy

MATCH and LOOKUP doesn't work for partial matches.
One alternative is to use SEARCH or FIND together with other functions in an array formula.
Example:
Column A contains a list of long strings
Cell B1 contain a short string
Cell C1 contain a formula that returns the first long string in column a that contains the short string in B1
=ArrayFormula(INDEX(A1:A,SORT(IF(search(B1,A1:A),ROW(A1:A),),1,TRUE)))
Data
+---+--------------+-------+-------------+
| | A | B | C |
+---+--------------+-------+-------------+
| 1 | Orange juice | apple | Apple cider |
| 2 | Apple cider | | |
| 3 | Apple pay | | |
+---+--------------+-------+-------------+

Related

Highlight near duplicate in conditional formating to highlight values with one character difference

I'm currently using this formula to highlight duplicates in my spreadsheet.
=ARRAYFORMULA(COUNTIF(A$2:$A2,$A2)>1)
Quite simple, it allows me to skip the first occurrence and only highlight 2nd, 3rd, ... occurrences.
I would like the formula to go a bit further and highlight near duplicates as well.
Meaning if there is only one character difference between 2 cells, then it should be considered as a duplicate.
For instance: "Marketing", "Marketng", "Marketingg" and "Market ing" would all be considered the same.
I've made a sample sheet in case my requirement is not straightforward to understand.
Thanks in advance.
Answer
Unfortunately, it is not possible to do this only through Formulas. Apps Scripts are need as well. The process for achieving your desired results is described below.
In Google Sheets, go to Extensions > Apps Script, paste the following code1 and save.
function TypoFinder(range, word) { // created by https://stackoverflow.com/users/19361936
if (!Array.isArray(range) || word == "") {
return false;
}
distances = range.map(row => row.map(cell => Levenshtein(cell, word))) // Iterate over range and check Levenshtein distance.
var accumulator = 0;
for (var i = 0; i < distances.length; i++) {
if (distances[i] < 2) {
accumulator++
} // Keep track of how many times there's a Levenshtein distance of 0 or 1.
}
return accumulator > 1;
}
function Levenshtein(a, b) { // created by https://stackoverflow.com/users/4269081
if (a.length == 0) return b.length;
if (b.length == 0) return a.length;
// swap to save some memory O(min(a,b)) instead of O(a)
if (a.length > b.length) {
var tmp = a;
a = b;
b = tmp;
}
var row = [];
// init the row
for (var i = 0; i <= a.length; i++) {
row[i] = i;
}
// fill in the rest
for (var i = 0; i < b.length; i++) {
var prev = i;
for (var j = 0; j < a.length; j++) {
var val;
if (b.charAt(i) == a.charAt(j)) {
val = row[j]; // match
} else {
val = Math.min(row[j] + 1, // substitution
prev + 1, // insertion
row[j + 1] + 1); // deletion
}
row[j] = prev;
prev = val;
}
row[a.length] = prev;
}
return row[a.length];
}
In cell B1, enter =TypoFinder($A$2:$A2,$A2). Autofill that formula down the column by draggin.
Create a conditional formatting rule for column A. Using Format Rules > Custom Formula, enter =B2:B.
At this point, you might wish to hide column B. To do so, right click on the column and press Hide Column.
The above explanation assumes the column you wish to highlight is Column A and the helper column is column B. Adjust appropriately.
Note that I have assumed you do not wish to highlight repeated blank columns as duplicate. If I am incorrect, remove || word == "" from line 2 of the provided snippet.
Explanation
The concept you have described is called Levenshtein Distance, which is a measure of how close together two strings are. There is no built-in way for Google Sheets to process this, so the Levenshtein() portion of the snippet above implements a custom function to do so instead. Then the TypoFinder() function is built on top of it, providing a method for evaluating a range of data against a specified "correct" word (looking for typos anywhere in the range).
Next, a helper column is used because Sheets has difficulties parsing custom formulas as part of a conditional formatting rule. Finally, the rule itself is implemented to check the helper column's determination of whether the row should be highlighted or not. Altogether, this highlights near-duplicate results in a specified column.
1 Adapted from duality's answer to a related question.

Google Sheets change value of text in a cell using Query?

I'd like to be able to change the value of cell, from a number to text - but I'm not sure how to do it?
I have two tabs, Source and Heath Hill.
Heath Hill uses Query to pull in specific cells from the source tab. Here's my formula:
=QUERY(Source!$1:$1000,"select A,AB, D,F where C = 'Heath Hill'",1)
Columns C and D in Heath Hill are numbers, I'd like to be able to replace them with text.
Column C
Replace any number 1 with Business Interest
Replace any number 2 with Representing an interest group
Replace any number 3 with Profession
Column D
Replace any number 1 with Agree
Replace any number 2 with Disagree
Replace any number 3 with Don't know
Here's a link to my example Google Sheet I've simplified it.
Solution
To achive what you are aiming here you will need to use an if function.
The formulas you would need to use are as follow:
For column C : =IF(C2=1,"Business Interest",IF(C2=2,"Representing an interest group","Profession"))
For column D : =IF(C2=1,"Agree",IF(C2=2,"Disagree","Don't know"))
Here is an example of this implementation. I have created a new column for making it easier to visualise and the I just dragged and dropped to include all the values.
I hope this has helped you. Let me know if you need anything else or if you did not understood something. :)
Its better to use App Script instead
function UpdateReplace() {
var spreadsheet = SpreadsheetApp.openById('Sheet ID');
var doc = spreadsheet.getSheetByName('Sheet_Name');
var range = doc.getRange("C2:C");
var data = range.getValues();
for (var row = 0; row < data.length; row++) {
for (var col = 0; col < data[row].length; col++) {
data[row][col] = (data[row][col]).toString().replace('1', 'Business Interest');
data[row][col] = (data[row][col]).toString().replace('2', 'Representing an interest group');
data[row][col] = (data[row][col]).toString().replace('3', 'Profession');
}
}
range.setValues(data);
Var range1 = doc.getRange("D2:D");
​Var data1 = range1.getValues();
for (var row = 0; row < data.length; row++) {
for (var col = 0; col < data[row].length; col++) {
data1[row][col] = (data1[row][col]).toString().replace('1', 'Agree');
data1[row][col] = (data1[row][col]).toString().replace('2', 'Disagree');
data1[row][col] = (data1[row][col]).toString().replace('2', 'Don't know');
}
}
range1.setValues(data1);
};

How to grab all non-empty cell data in row - Google Sheets Script Editor

I'm not sure if this is even possible, and to be quite honest, I haven't tried many things because I wasn't sure where to even start. I'm using the Script Editor from Google Sheets, btw. I know there are SpreadsheetApp.getRange() and another to get the values or something like that. But what I want is a bit specific.
Is there a way to grab all the cell data in a given row and put it into an array? The rows will vary in size, that's why I can't do an exact range.
So for example, if I were to have rows have these values:
abc | 123 | 987 | efg
blah| cat | 654
I want to be able to grab those values and place them into an array like ["abc", "123", "987, "efg"]. And then if I run the function on the next row, it'd be ["blah", "cat", "654"].
Actually, it can be placed into any data type as long as there's a delimiter I'd be able to use.
Thank you in advance!
This is easier to achieve without a script, with the formula =filter(1:1, len(1:1)) returning all values in nonempty cells in row 1, etc.
From a script, you can do something like this:
function flat_nonempty() {
var range = SpreadsheetApp.getActiveSheet().getRange("A:A"); // range here
var values = range.getValues();
var flat = values.reduce(function(acc, row) {
return acc.concat(row.filter(function(x) {
return x != "";
}));
}, []);
Logger.log(flat); // flat list of values, no blanks
}
The range here can have one row or multiple rows.
Is this useful for you?
When there are abc | 123 | 987 | efg, blah| cat | 654 and abc | | 987 | efg at row 1, row 2 and row 3, myFunction(1), myFunction(2) and myFunction(3) return [abc, 123.0, 987.0, efg], [blah, cat, 654.0] and [abc, , 987.0, efg].
function myFunction(row){
var ss = SpreadsheetApp.getActiveSheet();
var values = ss.getRange(row, 1, 1, ss.getLastColumn()).getValues();
var c = 0;
for (var c = values[0].length - 1; c >= 0; c--){
if (values[0][c] != "") break;
}
values[0].splice(c + 1, values[0].length - c - 1);
return values[0];
}
Adapted from https://developers.google.com/apps-script/reference/spreadsheet/sheet#getDataRange()
You can loop and create your array directly if you do want an array. Here I am illustrating creating for all the rows, but if you know the row you want, you could do without the outer loop (and just set i to the desired row).
function rowArr() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheets()[0];
// This represents ALL the rows
var range = sheet.getDataRange();
var values = range.getValues();
for (var i = 0; i < values.length; i++) {
var row = [];
for (var j = 0; j < values[i].length; j++) {
if (values[i][j]) {
row.push(values[i][j]);
}
}
Logger.log(row);
}
}

Google Sheets - Clone row but with only data from one cell

I'm looking to clone a row 3x, but only keeping data from one column.
So essentially I have the following [Name / Time / Booking], and each row is populated with all 3 properties, I'm trying to create 3 blank rows underneath each current row which is populated with only the persons name.
Can't work how to do it in scripting and can't find a plugin to do this. My data set is over 10,000 big so doing it manually isn't an option.
What I have:
What I want:
UPDATED code:
function duplicateRows() {
var sh, v, arr, c, b;
sh = SpreadsheetApp.getActive()
.getSheetByName('Blad1')
v = sh.getRange(1, 1, sh.getLastRow(), 40)
.getValues();
arr = [v[0]];
v.splice(1)
.forEach(function (r, i) {
arr.push(r)
c = 0
while (c < 3) {
dup = makeEmptyArrayXEl(40)
dup[0] = r[0];
arr.push(dup)
c += 1;
}
})
sh.getRange(1, 1, arr.length, arr[0].length)
.setValues(arr);
}
function makeEmptyArrayXEl(num) {
var arr = [];
for (var i = 0; i < num; i++) {
arr.push("")
}
return arr;
}
Would this work for you? It requires a free column to the left of Booking in the original data set. The formula below is a new sheet.
=ArrayFormula(sort({A2:A4,B2:B4,C2:C4;A2:A4,D2:D4,D2:D4;A2:A4,D2:D4,D2:D4;A2:A4,D2:D4,D2:D4},1,FALSE))

How to compare two column in a spreadsheet

I have 30 columns and 1000 rows, I would like to compare column1 with another column. IF the value dont match then I would like to colour it red. Below is a small dataset in my spreadsheet:
A B C D E F ...
1 name sName email
2
3
.
n
Because I have a large dataset and I want to storing my columns in a array, the first row is heading. This is what I have done, however when testing I get empty result, can someone correct me what I am doing wrong?
var index = [];
var sheet = SpreadsheetApp.getActiveSheet();
function col(){
var data = sheet.getDataRange().getValues();
for (var i = 1; i <= data.length; i++) {
te = index[i] = data[1];
Logger.log(columnIndex[i])
if (data[3] != data[7]){
// column_id.setFontColor('red'); <--- I can set the background like this
}
}
}
From the code you can see I am scanning whole spreadsheet data[1] get the heading and in if loop (data[3] != data[7]) compare two columns. I do have to work on my colour variable but that can be done once I get the data that I need.
Try to check this tutorial if it can help you with your problem. This tutorial use a Google AppsScript to compare the two columns. If differences are found, the script should point these out. If no differences are found at all, the script should put out the text "[id]". Just customize this code for your own function.
Here is the code used to achieve this kind of comparison
function stringComparison(s1, s2) {
// lets test both variables are the same object type if not throw an error
if (Object.prototype.toString.call(s1) !== Object.prototype.toString.call(s2)){
throw("Both values need to be an array of cells or individual cells")
}
// if we are looking at two arrays of cells make sure the sizes match and only one column wide
if( Object.prototype.toString.call(s1) === '[object Array]' ) {
if (s1.length != s2.length || s1[0].length > 1 || s2[0].length > 1){
throw("Arrays of cells need to be same size and 1 column wide");
}
// since we are working with an array intialise the return
var out = [];
for (r in s1){ // loop over the rows and find differences using diff sub function
out.push([diff(s1[r][0], s2[r][0])]);
}
return out; // return response
} else { // we are working with two cells so return diff
return diff(s1, s2)
}
}
function diff (s1, s2){
var out = "[ ";
var notid = false;
// loop to match each character
for (var n = 0; n < s1.length; n++){
if (s1.charAt(n) == s2.charAt(n)){
out += "–";
} else {
out += s2.charAt(n);
notid = true;
}
out += " ";
}
out += " ]"
return (notid) ? out : "[ id. ]"; // if notid(entical) return output or [id.]
}
For more information, just check the tutorial link above and this SO question on how to compare two Spreadsheets.

Resources