nikhilboorla Posted July 22, 2020 Report Posted July 22, 2020 I have two columns name and ename from two tables and I want to find the string match probability percentage for these columns as both contain employee name. For eg. If values are name ename ----- -------- Kim Gutierrez Kimberly Gutierrez then the probability percentage is around 0.3 as the percentage value will be between 0 and 1. Now I have find the % for around 500k names in the table. i have tried this query but didn't get the required output. CREATE TEMPORARY FUNCTION similarity(name STRING, ename STRING) RETURNS FLOAT64 LANGUAGE js AS """ var _extend = function(dst) { var sources = Array.prototype.slice.call(arguments, 1); for (var i=0; i<sources.length; ++i) { var src = sources[i]; for (var p in src) { if (src.hasOwnProperty(p)) dst[p] = src[p]; } } return dst; }; var Levenshtein = { get: function(str1, str2) { // base cases if (str1 === str2) return 0; if (str1.length === 0) return str2.length; if (str2.length === 0) return str1.length; // two rows var prevRow = new Array(str2.length + 1), curCol, nextCol, i, j, tmp; // initialise previous row for (i=0; i<prevRow.length; ++i) { prevRow[i] = i; } // calculate current row distance from previous row for (i=0; i<str1.length; ++i) { nextCol = i + 1; for (j=0; j<str2.length; ++j) { curCol = nextCol; // substution nextCol = prevRow[j] + ( (str1.charAt(i) === str2.charAt(j)) ? 0 : 1 ); // insertion tmp = curCol + 1; if (nextCol > tmp) { nextCol = tmp; } // deletion tmp = prevRow[j + 1] + 1; if (nextCol > tmp) { nextCol = tmp; } // copy current col value into previous (in preparation for next iteration) prevRow[j] = curCol; } // copy last col value into previous (in preparation for next iteration) prevRow[j] = nextCol; } return nextCol; } }; var the_Name1; try { the_Name1 = decodeURI(name).toLowerCase(); } catch (ex) { the_Name1 = Name1.toLowerCase(); } try { the_Name2 = decodeURI(ename).toLowerCase(); } catch (ex) { the_Name2 = Name2.toLowerCase(); } return 1 - Levenshtein.get(the_Name1, the_Name2) / the_Name1.length; """; SELECT t1.name, t2.ename, similarity(t1.name, t2.ename) similarity_percentge, FROM ratings t1 JOIN reviews t2 on t1.location_id=t2.location_id Quote
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.