Can I search countries by country_code in AdWords API v201603? - google-ads-api
I want to estimate searches for a keyword, limited to a country.
A similar question was asked about 4 years ago: Can I search countries by country_code in AdWords API v201109?.
The accepted answer was: it is not currently possible.
We are now at version v201603, and I wonder if there is a change.
In my specific case I code in Java, but will appreciate the answer in any language, I'll be able to find the relevant Java counterpart.
Update:
To add to the accepted answer, here is the list of the country codes extracted from AdWords CSV file.
private static HashMap<String, Long> COUNTRIES = new HashMap<String, Long>() {
{
put("ad",2020L); //,Andorra
put("ae",2784L); //,United Arab Emirates
put("af",2004L); //,Afghanistan
put("ag",2028L); //,Antigua and Barbuda
put("al",2008L); //,Albania
put("am",2051L); //,Armenia
put("ao",2024L); //,Angola
put("aq",2010L); //,Antarctica
put("ar",2032L); //,Argentina
put("as",2016L); //,American Samoa
put("at",2040L); //,Austria
put("au",2036L); //,Australia
put("az",2031L); //,Azerbaijan
put("ba",2070L); //,Bosnia and Herzegovina
put("bb",2052L); //,Barbados
put("bd",2050L); //,Bangladesh
put("be",2056L); //,Belgium
put("bf",2854L); //,Burkina Faso
put("bg",2100L); //,Bulgaria
put("bh",2048L); //,Bahrain
put("bi",2108L); //,Burundi
put("bj",2204L); //,Benin
put("bn",2096L); //,Brunei
put("bo",2068L); //,Bolivia
put("br",2076L); //,Brazil
put("bs",2044L); //,The Bahamas
put("bt",2064L); //,Bhutan
put("bw",2072L); //,Botswana
put("by",2112L); //,Belarus
put("bz",2084L); //,Belize
put("ca",2124L); //,Canada
put("cc",2166L); //,Cocos (Keeling) Islands
put("cd",2180L); //,Democratic Republic of the Congo
put("cf",2140L); //,Central African Republic
put("cg",2178L); //,Republic of the Congo
put("ch",2756L); //,Switzerland
put("ci",2384L); //,Cote d'Ivoire
put("ck",2184L); //,Cook Islands
put("cl",2152L); //,Chile
put("cm",2120L); //,Cameroon
put("cn",2156L); //,China
put("co",2170L); //,Colombia
put("cr",2188L); //,Costa Rica
put("cv",2132L); //,Cape Verde
put("cx",2162L); //,Christmas Island
put("cy",2196L); //,Cyprus
put("cz",2203L); //,Czech Republic
put("de",2276L); //,Germany
put("dj",2262L); //,Djibouti
put("dk",2208L); //,Denmark
put("dm",2212L); //,Dominica
put("do",2214L); //,Dominican Republic
put("dz",2012L); //,Algeria
put("ec",2218L); //,Ecuador
put("ee",2233L); //,Estonia
put("eg",2818L); //,Egypt
put("er",2232L); //,Eritrea
put("es",2724L); //,Spain
put("et",2231L); //,Ethiopia
put("fi",2246L); //,Finland
put("fj",2242L); //,Fiji
put("fm",2583L); //,Federated States of Micronesia
put("fr",2250L); //,France
put("ga",2266L); //,Gabon
put("gb",2826L); //,United Kingdom
put("gd",2308L); //,Grenada
put("ge",2268L); //,Georgia
put("gh",2288L); //,Ghana
put("gm",2270L); //,The Gambia
put("gn",2324L); //,Guinea
put("gq",2226L); //,Equatorial Guinea
put("gr",2300L); //,Greece
put("gs",2239L); //,South Georgia and the South Sandwich Islands
put("gt",2320L); //,Guatemala
put("gu",2316L); //,Guam
put("gw",2624L); //,Guinea-Bissau
put("gy",2328L); //,Guyana
put("hm",2334L); //,Heard Island and McDonald Islands
put("hn",2340L); //,Honduras
put("hr",2191L); //,Croatia
put("ht",2332L); //,Haiti
put("hu",2348L); //,Hungary
put("id",2360L); //,Indonesia
put("ie",2372L); //,Ireland
put("il",2376L); //,Israel
put("in",2356L); //,India
put("iq",2368L); //,Iraq
put("is",2352L); //,Iceland
put("it",2380L); //,Italy
put("jm",2388L); //,Jamaica
put("jo",2400L); //,Jordan
put("jp",2392L); //,Japan
put("ke",2404L); //,Kenya
put("kg",2417L); //,Kyrgyzstan
put("kh",2116L); //,Cambodia
put("ki",2296L); //,Kiribati
put("km",2174L); //,Comoros
put("kn",2659L); //,Saint Kitts and Nevis
put("kr",2410L); //,South Korea
put("kw",2414L); //,Kuwait
put("kz",2398L); //,Kazakhstan
put("la",2418L); //,Laos
put("lb",2422L); //,Lebanon
put("lc",2662L); //,Saint Lucia
put("li",2438L); //,Liechtenstein
put("lk",2144L); //,Sri Lanka
put("lr",2430L); //,Liberia
put("ls",2426L); //,Lesotho
put("lt",2440L); //,Lithuania
put("lu",2442L); //,Luxembourg
put("lv",2428L); //,Latvia
put("ly",2434L); //,Libya
put("ma",2504L); //,Morocco
put("mc",2492L); //,Monaco
put("md",2498L); //,Moldova
put("me",2499L); //,Montenegro
put("mg",2450L); //,Madagascar
put("mh",2584L); //,Marshall Islands
put("mk",2807L); //,Macedonia (fyroM)
put("ml",2466L); //,Mali
put("mn",2496L); //,Mongolia
put("mp",2580L); //,Northern Mariana Islands
put("mr",2478L); //,Mauritania
put("mt",2470L); //,Malta
put("mu",2480L); //,Mauritius
put("mv",2462L); //,Maldives
put("mw",2454L); //,Malawi
put("mx",2484L); //,Mexico
put("my",2458L); //,Malaysia
put("mz",2508L); //,Mozambique
put("na",2516L); //,Namibia
put("nc",2540L); //,New Caledonia
put("ne",2562L); //,Niger
put("nf",2574L); //,Norfolk Island
put("ng",2566L); //,Nigeria
put("ni",2558L); //,Nicaragua
put("nl",2528L); //,Netherlands
put("no",2578L); //,Norway
put("np",2524L); //,Nepal
put("nr",2520L); //,Nauru
put("nu",2570L); //,Niue
put("nz",2554L); //,New Zealand
put("om",2512L); //,Oman
put("pa",2591L); //,Panama
put("pe",2604L); //,Peru
put("pf",2258L); //,French Polynesia
put("pg",2598L); //,Papua New Guinea
put("ph",2608L); //,Philippines
put("pk",2586L); //,Pakistan
put("pl",2616L); //,Poland
put("pm",2666L); //,Saint Pierre and Miquelon
put("pn",2612L); //,Pitcairn Islands
put("pt",2620L); //,Portugal
put("pw",2585L); //,Palau
put("py",2600L); //,Paraguay
put("qa",2634L); //,Qatar
put("ro",2642L); //,Romania
put("rs",2688L); //,Serbia
put("ru",2643L); //,Russia
put("rw",2646L); //,Rwanda
put("sa",2682L); //,Saudi Arabia
put("sb",2090L); //,Solomon Islands
put("sc",2690L); //,Seychelles
put("se",2752L); //,Sweden
put("sg",2702L); //,Singapore
put("sh",2654L); //,Saint Helena
put("si",2705L); //,Slovenia
put("sk",2703L); //,Slovakia
put("sl",2694L); //,Sierra Leone
put("sm",2674L); //,San Marino
put("sn",2686L); //,Senegal
put("so",2706L); //,Somalia
put("sr",2740L); //,Suriname
put("st",2678L); //,Sao Tome and Principe
put("sv",2222L); //,El Salvador
put("sz",2748L); //,Swaziland
put("td",2148L); //,Chad
put("tf",2260L); //,French Southern and Antarctic Lands
put("tg",2768L); //,Togo
put("th",2764L); //,Thailand
put("tj",2762L); //,Tajikistan
put("tk",2772L); //,Tokelau
put("tl",2626L); //,Timor-Leste
put("tm",2795L); //,Turkmenistan
put("tn",2788L); //,Tunisia
put("to",2776L); //,Tonga
put("tr",2792L); //,Turkey
put("tt",2780L); //,Trinidad and Tobago
put("tv",2798L); //,Tuvalu
put("tz",2834L); //,Tanzania
put("ua",2804L); //,Ukraine
put("ug",2800L); //,Uganda
put("um",2581L); //,United States Minor Outlying Islands
put("us",2840L); //,United States
put("uy",2858L); //,Uruguay
put("uz",2860L); //,Uzbekistan
put("va",2336L); //,Vatican City
put("vc",2670L); //,Saint Vincent and the Grenadines
put("ve",2862L); //,Venezuela
put("vn",2704L); //,Vietnam
put("vu",2548L); //,Vanuatu
put("wf",2876L); //,Wallis and Futuna
put("ws",2882L); //,Samoa
put("ye",2887L); //,Yemen
put("za",2710L); //,South Africa
put("zm",2894L); //,Zambia
put("zw",2716L); //,Zimbabwe
}
};
Yes - you can use the Targeting Idea Service to specify any location code (which includes countries, regions, cities, etc) for a Search Volume based query.
I am unfamiliar with the Java client library but I am sure it will be similar to the .NET one. The C# code below outputs the search volume for the terms 'blue fedora' and 'red fedora' for queries based in Canada.
var targettingIdeaSvc = (TargetingIdeaService)awUser.GetService(AdWordsService.v201601.TargetingIdeaService);
var searchQueries = new string[] { "blue fedora", "red fedora" };
var ideasPg = targettingIdeaSvc.get(new TargetingIdeaSelector
{
ideaType = IdeaType.KEYWORD,
requestType = RequestType.STATS,
requestedAttributeTypes = new AttributeType[]
{
AttributeType.SEARCH_VOLUME
},
searchParameters = new SearchParameter[]
{
new RelatedToQuerySearchParameter
{
queries = searchQueries,
},
new LocationSearchParameter
{
locations = new Location[]
{
new Location
{
id = 2124 // This is the location id for Canada - comprehensive list of location ids is available here https://developers.google.com/adwords/api/docs/appendix/geotargeting
}
}
},
},
paging = new Paging
{
numberResults = 5,
startIndex = 0
}
});
for (var i = 0; i < searchQueries.Length; i++)
{
var searchVolume = (ideasPg.entries[i].data.First().value as LongAttribute).value;
Console.WriteLine($#"Search Term: ""{searchQueries[i]}"" has search volume of {searchVolume} in Canada");
}
This service uses a location id. You can look up a specific id from the AdWords Geo-Location reference page (or even access this list programatically if you need to)
Related
Year of publication not appearing in the list of references at the end of a compiled LaTeX document using bibliographystyle{naturemag}
I'm new to LaTex, so I am using overleaf to create my citations. My bibliographystyle is naturemag. My references are not including the year of publication in the reference list. Here is an example of the output I am seeing. Cite these [1, 2] References [1] Claw, K. et al. A framework for enhancing ethical genomic research with indigenous communities. Nature Communications 9, 2957. URL https://doi.org/10.1038/s41467-018-05188-3. [2] Tsosie, K., Yracheta, J., Kolopenuk, J. & Smith, R. Indigenous data sovereignties and data sharing in biological anthropology. American Journal of Physical Anthropology 174, 183–186. URL https://doi.org/10.1002/ajpa2484. 1 Below is a sample of my bib.bib bibliography file #article{claw2018a, author = {Claw, K.G. and Anderson, M.Z. and Begay, R.L. and Tsosie, K.S. and Fox, K. and Garrison, N.A. and Consortium, S.internship for In peoples in G.}, title = {A framework for enhancing ethical genomic research with Indigenous communities}, volume = {9}, pages = {2957}, url = {https://doi.org/10.1038/s41467-018-05188-3}, doi = {10.1038/s41467-018-05188-3}, language = {en}, journal = {Nature Communications}, number = {1}, date = {2018} } #article{tsosie2021b, author = {Tsosie, K.S. and Yracheta, J.M. and Kolopenuk, J.A. and Smith, R.W.}, date = {2021}, title = {Indigenous data sovereignties and data sharing in biological anthropology}, volume = {174}, pages = {183–186}, url = {https://doi.org/10.1002/ajpa2484}, doi = {10.1002/ajpa2484}, language = {en}, journal = {American Journal of Physical Anthropology}, number = {2} } Below are the LaTeX commands I am running. \documentclass{article} \usepackage[utf8]{inputenc} \title{paper} \author{author} \date{February 2022} \begin{document} \maketitle Cite these \cite{claw2018a,tsosie2021b} \bibliographystyle{naturemag} \bibliography{bib.bib} \end{document}
You are using bibtex and not biblatex, so you need to use the less flexible year field instead of the date field: #article{claw2018a, author = {Claw, K.G. and Anderson, M.Z. and Begay, R.L. and Tsosie, K.S. and Fox, K. and Garrison, N.A. and Consortium, S.internship for In peoples in G.}, title = {A framework for enhancing ethical genomic research with Indigenous communities}, volume = {9}, pages = {2957}, url = {https://doi.org/10.1038/s41467-018-05188-3}, doi = {10.1038/s41467-018-05188-3}, language = {en}, journal = {Nature Communications}, number = {1}, year = {2018} } #article{tsosie2021b, author = {Tsosie, K.S. and Yracheta, J.M. and Kolopenuk, J.A. and Smith, R.W.}, year = {2021}, title = {Indigenous data sovereignties and data sharing in biological anthropology}, volume = {174}, pages = {183–186}, url = {https://doi.org/10.1002/ajpa2484}, doi = {10.1002/ajpa2484}, language = {en}, journal = {American Journal of Physical Anthropology}, number = {2} }
Highmaps - How to remove canary islands from spain highlight
I am using highmaps with the custom world mapdata. Only Spain is being highlighted in my example. However, when you mouseover the country Spain some islands to the north of Africa highlight as well. I suspect that these are the Canary Islands. How do I prevent these Islands from being highlighted? Example: http://bastionstudio.co.za/test/spain.html
It can be solved by pushing new coordinates for Spain. http://bastionstudio.co.za/test/spain-no-canary-islands.html console.log(Highcharts.maps['custom/world'].features); Highcharts.maps['custom/world'].features.push({ "type":"Feature", "id":"ES", "properties": { "hc-group":"admin0", "hc-middle-x":0.76, "hc-middle-y":0.27, "hc-key":"es", "hc-a2":"ES","name":"Spain", "labelrank":"2", "country-abbrev":"Sp.", "subregion":"Southern Europe", "region-wb":"Europe & Central Asia", "iso-a3":"ESP","iso-a2":"ES","woe-id":"23424950", "continent":"Europe" }, "geometry":{ "type":"MultiPolygon", "coordinates":[ [ [ [4045, 7594], [4021, 7599], [4011, 7621], [3984, 7629], [3997, 7692], [3998, 7762], [4020, 7781], [4009, 7795], [3944, 7795], [3930, 7838], [3969, 7860], [3988, 7852], [4098, 7851], [4151, 7846], [4203, 7821], [4246, 7818], [4249, 7812], [4255, 7814], [4262, 7812], [4262, 7813], [4262, 7814], [4263, 7812], [4263, 7812], [4279, 7808], [4298, 7812], [4300, 7795], [4265, 7771], [4233, 7763], [4194, 7709], [4211, 7683], [4189, 7669], [4183, 7644], [4154, 7635], [4143, 7615], [4072, 7613], [4046, 7594], [4045, 7594] ] ] ] } });
You can also create a custom map: https://www.highcharts.com/docs/maps/custom-geojson-maps or remove Canary Islands from the point graphic, but then they will completely disappear from the map: chart: { events: { load: function() { var point = this.series[0].points[0], pathArr = point.graphic.attr('d').split('M'); pathArr.shift(); point.graphic.attr({ 'd': 'M' + pathArr[3] }); } } } Live demo: http://jsfiddle.net/BlackLabel/6oa08vyd/ API: https://api.highcharts.com/class-reference/Highcharts.SVGElement#attr
Automated Business Cards with Zapier
I am using a combination of Google Sheets, Google Presentation, and Zapier to automate my companies business card design workflow. My template looks like this: {Full Name}} {{Job Title}} {{School Site}} {{Street Address}} Broken Arrow, Oklahoma {{Zip Code}} {{Office Number}}{{Print Cell/Fax}} {{Email}} {{Print Cell/Fax}} is populated by a javascript step in Zapier that detects whether the submitter included their cell and fax numbers(These are optional) and outputs a single variable. This script looks like this: var cell = inputData.cell var fax = inputData.fax var cellFax if (cell) { console.log('C: ', cell); cell='C: '+ inputData.cell; }else{ console.log('empty'); cell=null; } if (fax) { console.log('F: ', fax); fax='F: '+ inputData.fax; }else{ console.log('empty'); fax=null; } if (cell!=null && fax!=null) { cellFax=' / '+cell+fax; }else if(cell!=null && fax==null) { cellFax=' / '+cell; }else if(cell==null && fax!=null) { cellFax=' / '+fax; }else{ cellFax=null; } output = {cellFax: cellFax}; In the event that both cell and fax variables are empty I need it to populate my template place holder as empty like this: John Doe Janitor International Space Station 555 S. Milkyway St. Saturn, Vermont 11122 555-259-5555 email#email.com But what I keep getting this: John Doe Janitor International Space Station 555 S. Milkyway St. Saturn, Vermont 11122 555-259-5555{{Print Cell/Fax}} email#email.com How Do I get it to populate as empty?
try just changing this last part of the javascript from: }else{ cellFax=null; } to: }else{ cellFax=' '; }
Comparing Stock Prices
I have a table full of different villages and their commodities, and the stock, buying, and selling price they have. The table looks like this: stocks = { village = { fish = { 12, 76, 0 }, silver = { 0, 220, 0 }, stone = { 0, 200, 0 }, silk = { 4, 1100, 0 }, salt = { 0, 10, 0 }, wood = { 23, 0, 49 } }, } It has three numbers in the commodity table, the first being the amount they have in stock (useful since if I'm comparing prices, i'd only want it to count if there were some in stock to buy!), second is the price they are buying for (higher the better), and the third the price they're selling it for (lower the better, and this is where stock comes in handy, being more than 0). I've tortured myself trying to come up with ways to make a nice function (like so, prices("fish")) that, when given a commodity to look for, will find the best places to buy and sell to make a profit. The coding language I'm using to do this is Lua but I'm sure this could be done in many languages with the know-how ^_^ If someone could point me in the right direction here then I'd be forever greatful!
Here's a direction... local function prices(commodity) for placeName, placeData in pairs(stocks) do local data = placeData[commodity] if (data) then print(placeName .. " has " .. data[1].. " " .. commodity) -- ... end end -- ... end I'm not clear on what results you're after but the above is one way to dig into the data structure.
How to parse freeform street/postal address out of text, and into components
We do business largely in the United States and are trying to improve user experience by combining all the address fields into a single text area. But there are a few problems: The address the user types may not be correct or in a standard format The address must be separated into parts (street, city, state, etc.) to process credit card payments Users may enter more than just their address (like their name or company with it) Google can do this but the Terms of Service and query limits are prohibitive, especially on a tight budget Apparently, this is a common question: PHP script to parse address? How do I parse the free format address to save into the DataBase java postal address parser More efficient way to extract address components How can i show a pre populated postal address in contacts screen with street, city, zip on android PHP regexp US address Is there a way to isolate an address from the text around it and break it into pieces? Is there a regular expression to parse addresses?
I saw this question a lot when I worked for an address verification company. I'm posting the answer here to make it more accessible to programmers who are searching around with the same question. The company I was at processed billions of addresses, and we learned a lot in the process. First, we need to understand a few things about addresses. Addresses are not regular This means that regular expressions are out. I've seen it all, from simple regular expressions that match addresses in a very specific format, to this: /\s+(\d{2,5}\s+)(?![a|p]m\b)(([a-zA-Z|\s+]{1,5}){1,2})?([\s|,|.]+)?(([a-zA-Z|\s+]{1,30}){1,4})(court|ct|street|st|drive|dr|lane|ln|road|rd|blvd)([\s|,|.|;]+)?(([a-zA-Z|\s+]{1,30}){1,2})([\s|,|.]+)?\b(AK|AL|AR|AZ|CA|CO|CT|DC|DE|FL|GA|GU|HI|IA|ID|IL|IN|KS|KY|LA|MA|MD|ME|MI|MN|MO|MS|MT|NC|ND|NE|NH|NJ|NM|NV|NY|OH|OK|OR|PA|RI|SC|SD|TN|TX|UT|VA|VI|VT|WA|WI|WV|WY)([\s|,|.]+)?(\s+\d{5})?([\s|,|.]+)/i ... to this where a 900+ line-class file generates a supermassive regular expression on the fly to match even more. I don't recommend these (for example, here's a fiddle of the above regex, that makes plenty of mistakes). There isn't an easy magic formula to get this to work. In theory and by theory, it's not possible to match addresses with a regular expression. USPS Publication 28 documents the many formats of addresses that are possible, with all their keywords and variations. Worst of all, addresses are often ambiguous. Words can mean more than one thing ("St" can be "Saint" or "Street") and there are words that I'm pretty sure they invented. (Who knew that "Stravenue" was a street suffix?) You'd need some code that really understands addresses, and if that code does exist, it's a trade secret. But you could probably roll your own if you're really into that. Addresses come in unexpected shapes and sizes Here are some contrived (but complete) addresses: 1) 102 main street Anytown, state 2) 400n 600e #2, 52173 3) p.o. #104 60203 Even these are possibly valid: 4) 829 LKSDFJlkjsdflkjsdljf Bkpw 12345 5) 205 1105 14 90210 Obviously, these are not standardized. Punctuation and line breaks not guaranteed. Here's what's going on: Number 1 is complete because it contains a street address and a city and state. With that information, there's enough to identify the address, and it can be considered "deliverable" (with some standardization). Number 2 is complete because it also contains a street address (with secondary/unit number) and a 5-digit ZIP code, which is enough to identify an address. Number 3 is a complete post office box format, as it contains a ZIP code. Number 4 is also complete because the ZIP code is unique, meaning that a private entity or corporation has purchased that address space. A unique ZIP code is for high-volume or concentrated delivery spaces. Anything addressed to ZIP code 12345 goes to General Electric in Schenectady, NY. This example won't reach anyone in particular, but the USPS would still be able to deliver it. Number 5 is also complete, believe it or not. With just those numbers, the full address can be discovered when parsed against a database of all possible addresses. Filling in the missing directionals, secondary designator, and ZIP+4 code is trivial when you see each number as a component. Here's what it looks like, fully expanded and standardized: 205 N 1105 W Apt 14 Beverly Hills CA 90210-5221 Address data is not your own In most countries that provide official address data to licensed vendors, the address data itself belongs to the governing agency. In the US, the USPS owns the addresses. The same is true for Canada Post, Royal Mail, and others, though each country enforces or defines ownership a little differently. Knowing this is important, since it usually forbids reverse-engineering the address database. You have to be careful how to acquire, store, and use the data. Google Maps is a common go-to for quick address fixes, but the TOS is rather prohibitive; for example, you can't use their data or APIs without showing a Google Map, and for non-commercial purposes only (unless you pay), and you can't store the data (except for temporary caching). Makes sense. Google's data is some of the best in the world. However, Google Maps does not verify the address. If an address does not exist, it will still show you where the address would be if it did exist (try it on your own street; use a house number that you know doesn't exist). This is useful sometimes, but be aware of that. Nominatim's usage policy is similarly limiting, especially for high volume and commercial use, and the data is mostly drawn from free sources, so it isn't as well maintained (such is the nature of open projects) -- however, this may still suit your needs. It is supported by a great community. The USPS itself has an API, but it goes down a lot and comes with no guarantees nor support. It might also be hard to use. Some people use it sparingly with no problems. But it's easy to miss that the USPS requires that you use their API only for confirming addresses to ship through them. People expect addresses to be hard Unfortunately, we've conditioned our society to expect addresses to be complicated. There's dozens of good UX articles all over the Internet about this, but the fact is, if you have an address form with individual fields, that's what users expect, even though it makes it harder for edge-case addresses that don't fit the format the form is expecting, or maybe the form requires a field it shouldn't. Or users don't know where to put a certain part of their address. I could go on and on about the bad UX of checkout forms these days, but instead I'll just say that combining the addresses into a single field will be a welcome change -- people will be able to type their address how they see fit, rather than trying to figure out your lengthy form. However, this change will be unexpected and users may find it a little jarring at first. Just be aware of that. Part of this pain can be alleviated by putting the country field out front, before the address. When they fill out the country field first, you know how to make your form appear. Maybe you have a good way to deal with single-field US addresses, so if they select United States, you can reduce your form to a single field, otherwise show the component fields. Just things to think about! Now we know why it's hard; what can you do about it? The USPS licenses vendors through a process called CASS™ Certification to provide verified addresses to customers. These vendors have access to the USPS database, updated monthly. Their software must conform to rigorous standards to be certified, and they don't often require agreement to such limiting terms as discussed above. There are many CASS-Certified companies that can process lists or have APIs: Melissa Data, Experian QAS, and SmartyStreets to name a few. (Due to getting flak for "advertising" I've truncated my answer at this point. It's up to you to find a solution that works for you.) The Truth: Really, folks, I don't work at any of these companies. It's not an advertisement.
There are many street address parsers. They come in two basic flavors - ones that have databases of place names and street names, and ones that don't. A regular expression street address parser can get up to about a 95% success rate without much trouble. Then you start hitting the unusual cases. The Perl one in CPAN, "Geo::StreetAddress::US", is about that good. There are Python and Javascript ports of that, all open source. I have an improved version in Python which moves the success rate up slightly by handling more cases. To get the last 3% right, though, you need databases to help with disambiguation. A database with 3-digit ZIP codes and US state names and abbreviations is a big help. When a parser sees a consistent postal code and state name, it can start to lock on to the format. This works very well for the US and UK. Proper street address parsing starts from the end and works backwards. That's how the USPS systems do it. Addresses are least ambiguous at the end, where country names, city names, and postal codes are relatively easy to recognize. Street names can usually be isolated. Locations on streets are the most complex to parse; there you encounter things such as "Fifth Floor" and "Staples Pavillion". That's when a database is a big help.
UPDATE: Geocode.xyz now works worldwide. For examples see https://geocode.xyz For USA, Mexico and Canada, see geocoder.ca. For example: Input: something going on near the intersection of main and arthur kill rd new york Output: <geodata> <latt>40.5123510000</latt> <longt>-74.2500500000</longt> <AreaCode>347,718</AreaCode> <TimeZone>America/New_York</TimeZone> <standard> <street1>main</street1> <street2>arthur kill</street2> <stnumber/> <staddress/> <city>STATEN ISLAND</city> <prov>NY</prov> <postal>11385</postal> <confidence>0.9</confidence> </standard> </geodata> You may also check the results in the web interface or get output as Json or Jsonp. eg. I'm looking for restaurants around 123 Main Street, New York
No code? For shame! Here is a simple JavaScript address parser. It's pretty awful for every single reason that Matt gives in his dissertation above (which I almost 100% agree with: addresses are complex types, and humans make mistakes; better to outsource and automate this - when you can afford to). But rather than cry, I decided to try: This code works OK for parsing most Esri results for findAddressCandidate and also with some other (reverse)geocoders that return single-line address where street/city/state are delimited by commas. You can extend if you want or write country-specific parsers. Or just use this as case study of how challenging this exercise can be or at how lousy I am at JavaScript. I admit I only spent about thirty mins on this (future iterations could add caches, zip validation, and state lookups as well as user location context), but it worked for my use case: End user sees form that parses geocode search response into 4 textboxes. If address parsing comes out wrong (which is rare unless source data was poor) it's no big deal - the user gets to verify and fix it! (But for automated solutions could either discard/ignore or flag as error so dev can either support the new format or fix source data.) /* address assumptions: - US addresses only (probably want separate parser for different countries) - No country code expected. - if last token is a number it is probably a postal code -- 5 digit number means more likely - if last token is a hyphenated string it might be a postal code -- if both sides are numeric, and in form #####-#### it is more likely - if city is supplied, state will also be supplied (city names not unique) - zip/postal code may be omitted even if has city & state - state may be two-char code or may be full state name. - commas: -- last comma is usually city/state separator -- second-to-last comma is possibly street/city separator -- other commas are building-specific stuff that I don't care about right now. - token count: -- because units, street names, and city names may contain spaces token count highly variable. -- simplest address has at least two tokens: 714 OAK -- common simple address has at least four tokens: 714 S OAK ST -- common full (mailing) address has at least 5-7: --- 714 OAK, RUMTOWN, VA 59201 --- 714 S OAK ST, RUMTOWN, VA 59201 -- complex address may have a dozen or more: --- MAGICICIAN SUPPLY, LLC, UNIT 213A, MAGIC TOWN MALL, 13 MAGIC CIRCLE DRIVE, LAND OF MAGIC, MA 73122-3412 */ var rawtext = $("textarea").val(); var rawlist = rawtext.split("\n"); function ParseAddressEsri(singleLineaddressString) { var address = { street: "", city: "", state: "", postalCode: "" }; // tokenize by space (retain commas in tokens) var tokens = singleLineaddressString.split(/[\s]+/); var tokenCount = tokens.length; var lastToken = tokens.pop(); if ( // if numeric assume postal code (ignore length, for now) !isNaN(lastToken) || // if hyphenated assume long zip code, ignore whether numeric, for now lastToken.split("-").length - 1 === 1) { address.postalCode = lastToken; lastToken = tokens.pop(); } if (lastToken && isNaN(lastToken)) { if (address.postalCode.length && lastToken.length === 2) { // assume state/province code ONLY if had postal code // otherwise it could be a simple address like "714 S OAK ST" // where "ST" for "street" looks like two-letter state code // possibly this could be resolved with registry of known state codes, but meh. (and may collide anyway) address.state = lastToken; lastToken = tokens.pop(); } if (address.state.length === 0) { // check for special case: might have State name instead of State Code. var stateNameParts = [lastToken.endsWith(",") ? lastToken.substring(0, lastToken.length - 1) : lastToken]; // check remaining tokens from right-to-left for the first comma while (2 + 2 != 5) { lastToken = tokens.pop(); if (!lastToken) break; else if (lastToken.endsWith(",")) { // found separator, ignore stuff on left side tokens.push(lastToken); // put it back break; } else { stateNameParts.unshift(lastToken); } } address.state = stateNameParts.join(' '); lastToken = tokens.pop(); } } if (lastToken) { // here is where it gets trickier: if (address.state.length) { // if there is a state, then assume there is also a city and street. // PROBLEM: city may be multiple words (spaces) // but we can pretty safely assume next-from-last token is at least PART of the city name // most cities are single-name. It would be very helpful if we knew more context, like // the name of the city user is in. But ignore that for now. // ideally would have zip code service or lookup to give city name for the zip code. var cityNameParts = [lastToken.endsWith(",") ? lastToken.substring(0, lastToken.length - 1) : lastToken]; // assumption / RULE: street and city must have comma delimiter // addresses that do not follow this rule will be wrong only if city has space // but don't care because Esri formats put comma before City var streetNameParts = []; // check remaining tokens from right-to-left for the first comma while (2 + 2 != 5) { lastToken = tokens.pop(); if (!lastToken) break; else if (lastToken.endsWith(",")) { // found end of street address (may include building, etc. - don't care right now) // add token back to end, but remove trailing comma (it did its job) tokens.push(lastToken.endsWith(",") ? lastToken.substring(0, lastToken.length - 1) : lastToken); streetNameParts = tokens; break; } else { cityNameParts.unshift(lastToken); } } address.city = cityNameParts.join(' '); address.street = streetNameParts.join(' '); } else { // if there is NO state, then assume there is NO city also, just street! (easy) // reasoning: city names are not very original (Portland, OR and Portland, ME) so if user wants city they need to store state also (but if you are only ever in Portlan, OR, you don't care about city/state) // put last token back in list, then rejoin on space tokens.push(lastToken); address.street = tokens.join(' '); } } // when parsing right-to-left hard to know if street only vs street + city/state // hack fix for now is to shift stuff around. // assumption/requirement: will always have at least street part; you will never just get "city, state" // could possibly tweak this with options or more intelligent parsing&sniffing if (!address.city && address.state) { address.city = address.state; address.state = ''; } if (!address.street) { address.street = address.city; address.city = ''; } return address; } // get list of objects with discrete address properties var addresses = rawlist .filter(function(o) { return o.length > 0 }) .map(ParseAddressEsri); $("#output").text(JSON.stringify(addresses)); console.log(addresses); <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script> <textarea> 27488 Stanford Ave, Bowden, North Dakota 380 New York St, Redlands, CA 92373 13212 E SPRAGUE AVE, FAIR VALLEY, MD 99201 1005 N Gravenstein Highway, Sebastopol CA 95472 A. P. Croll & Son 2299 Lewes-Georgetown Hwy, Georgetown, DE 19947 11522 Shawnee Road, Greenwood, DE 19950 144 Kings Highway, S.W. Dover, DE 19901 Intergrated Const. Services 2 Penns Way Suite 405, New Castle, DE 19720 Humes Realty 33 Bridle Ridge Court, Lewes, DE 19958 Nichols Excavation 2742 Pulaski Hwy, Newark, DE 19711 2284 Bryn Zion Road, Smyrna, DE 19904 VEI Dover Crossroads, LLC 1500 Serpentine Road, Suite 100 Baltimore MD 21 580 North Dupont Highway, Dover, DE 19901 P.O. Box 778, Dover, DE 19903 714 S OAK ST 714 S OAK ST, RUM TOWN, VA, 99201 3142 E SPRAGUE AVE, WHISKEY VALLEY, WA 99281 27488 Stanford Ave, Bowden, North Dakota 380 New York St, Redlands, CA 92373 </textarea> <div id="output"> </div>
For U.S. address parsing, I prefer using the usaddress package that is available in pip. python3 -m pip install usaddress Usage sample: #!/usr/bin/env python3 # -*- coding: utf-8 -*- # address_parser.py import sys from usaddress import tag from json import dumps, loads if __name__ == '__main__': tag_mapping = { 'Recipient': 'recipient', 'AddressNumber': 'addressStreet', 'AddressNumberPrefix': 'addressStreet', 'AddressNumberSuffix': 'addressStreet', 'StreetName': 'addressStreet', 'StreetNamePreDirectional': 'addressStreet', 'StreetNamePreModifier': 'addressStreet', 'StreetNamePreType': 'addressStreet', 'StreetNamePostDirectional': 'addressStreet', 'StreetNamePostModifier': 'addressStreet', 'StreetNamePostType': 'addressStreet', 'CornerOf': 'addressStreet', 'IntersectionSeparator': 'addressStreet', 'LandmarkName': 'addressStreet', 'USPSBoxGroupID': 'addressStreet', 'USPSBoxGroupType': 'addressStreet', 'USPSBoxID': 'addressStreet', 'USPSBoxType': 'addressStreet', 'BuildingName': 'addressStreet', 'OccupancyType': 'addressStreet', 'OccupancyIdentifier': 'addressStreet', 'SubaddressIdentifier': 'addressStreet', 'SubaddressType': 'addressStreet', 'PlaceName': 'addressCity', 'StateName': 'addressState', 'ZipCode': 'addressPostalCode', } try: address, _ = tag(' '.join(sys.argv[1:]), tag_mapping=tag_mapping) except: with open('failed_address.txt', 'a') as fp: fp.write(sys.argv[1] + '\n') print(dumps({})) else: print(dumps(dict(address))) Running address_parser.py: python3 address_parser.py 9757 East Arcadia Ave. Saugus MA 01906 {"addressStreet": "9757 East Arcadia Ave.", "addressCity": "Saugus", "addressState": "MA", "addressPostalCode": "01906"}
I'm late to the party, but here is an Excel VBA script I wrote years ago for Australia. It can be easily modified to support other Countries. I've made a GitHub repository of the C# code here. I've hosted it on my site and you can download it here: http://jeremythompson.net/Rocks/ParseAddress.xlsm Strategy For any country with a PostCode that's numeric or can be matched with a RegEx my strategy works very well: First we detect the First and Surname which are assumed to be the top line. Its easy to skip the name and start with the address by unticking the checkbox (called 'Name is top row' as shown below). Next its safe to expect the Address consisting of the Street and Number come before the Suburb and the St, Pde, Ave, Av, Rd, Cres, loop, etc is a separator. Detecting the Suburb vs the State and even Country can trick the most sophisticated parsers as there can be conflicts. To overcome this I use a PostCode look up based on the fact that after stripping Street and Apartment/Unit numbers as well as the PoBox,Ph,Fax,Mobile etc, only the PostCode number will remain. This is easy to match with a regEx to then look up the suburb(s) and country. Your National Post Office Service will provide a list of post codes with Suburbs and States free of charge that you can store in an excel sheet, db table, text/json/xml file, etc. Finally, since some Post Codes have multiple Suburbs we check which suburb appears in the Address. Example VBA Code DISCLAIMER, I know this code is not perfect, or even written well however its very easy to convert to any programming language and run in any type of application. The strategy is the answer depending on your country and rules, take this code as an example: Option Explicit Private Const TopRow As Integer = 0 Public Sub ParseAddress() Dim strArr() As String Dim sigRow() As String Dim i As Integer Dim j As Integer Dim k As Integer Dim Stat As String Dim SpaceInName As Integer Dim Temp As String Dim PhExt As String On Error Resume Next Temp = ActiveSheet.Range("Address") 'Split info into array strArr = Split(Temp, vbLf) 'Trim the array For i = 0 To UBound(strArr) strArr(i) = VBA.Trim(strArr(i)) Next i 'Remove empty items/rows ReDim sigRow(LBound(strArr) To UBound(strArr)) For i = LBound(strArr) To UBound(strArr) If Trim(strArr(i)) <> "" Then sigRow(j) = strArr(i) j = j + 1 End If Next i ReDim Preserve sigRow(LBound(strArr) To j) 'Find the name (MUST BE ON THE FIRST ROW UNLESS CHECKBOX UNTICKED) i = TopRow If ActiveSheet.Shapes("chkFirst").ControlFormat.Value = 1 Then SpaceInName = InStr(1, sigRow(i), " ", vbTextCompare) - 1 If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then ActiveSheet.Range("FirstName") = VBA.Left(sigRow(i), SpaceInName) Else If MsgBox("First Name: " & VBA.Mid$(sigRow(i), 1, SpaceInName), vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("FirstName") = VBA.Left(sigRow(i), SpaceInName) End If If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then ActiveSheet.Range("Surname") = VBA.Mid(sigRow(i), SpaceInName + 2) Else If MsgBox("Surame: " & VBA.Mid(sigRow(i), SpaceInName + 2), vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("Surname") = VBA.Mid(sigRow(i), SpaceInName + 2) End If sigRow(i) = "" End If 'Find the Street by looking for a "St, Pde, Ave, Av, Rd, Cres, loop, etc" For i = 1 To UBound(sigRow) If Len(sigRow(i)) > 0 Then For j = 0 To 8 If InStr(1, VBA.UCase(sigRow(i)), Street(j), vbTextCompare) > 0 Then 'Find the position of the street in order to get the suburb SpaceInName = InStr(1, VBA.UCase(sigRow(i)), Street(j), vbTextCompare) + Len(Street(j)) - 1 'If its a po box then add 5 chars If VBA.Right(Street(j), 3) = "BOX" Then SpaceInName = SpaceInName + 5 If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then ActiveSheet.Range("Street") = VBA.Mid(sigRow(i), 1, SpaceInName) Else If MsgBox("Street Address: " & VBA.Mid(sigRow(i), 1, SpaceInName), vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("Street") = VBA.Mid(sigRow(i), 1, SpaceInName) End If 'Trim the Street, Number leaving the Suburb if its exists on the same line sigRow(i) = VBA.Mid(sigRow(i), SpaceInName) + 2 sigRow(i) = Replace(sigRow(i), VBA.Mid(sigRow(i), 1, SpaceInName), "") GoTo PastAddress: End If Next j End If Next i PastAddress: 'Mobile For i = 1 To UBound(sigRow) If Len(sigRow(i)) > 0 Then For j = 0 To 3 Temp = Mb(j) If VBA.Left(VBA.UCase(sigRow(i)), Len(Temp)) = Temp Then If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then ActiveSheet.Range("Mobile") = VBA.Mid(sigRow(i), Len(Temp) + 2) Else If MsgBox("Mobile: " & VBA.Mid(sigRow(i), Len(Temp) + 2), vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("Mobile") = VBA.Mid(sigRow(i), Len(Temp) + 2) End If sigRow(i) = "" GoTo PastMobile: End If Next j End If Next i PastMobile: 'Phone For i = 1 To UBound(sigRow) If Len(sigRow(i)) > 0 Then For j = 0 To 1 Temp = Ph(j) If VBA.Left(VBA.UCase(sigRow(i)), Len(Temp)) = Temp Then 'TODO: Detect the intl or national extension here.. or if we can from the postcode. If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then ActiveSheet.Range("Phone") = VBA.Mid(sigRow(i), Len(Temp) + 3) Else If MsgBox("Phone: " & VBA.Mid(sigRow(i), Len(Temp) + 3), vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("Phone") = VBA.Mid(sigRow(i), Len(Temp) + 3) End If sigRow(i) = "" GoTo PastPhone: End If Next j End If Next i PastPhone: 'Email For i = 1 To UBound(sigRow) If Len(sigRow(i)) > 0 Then 'replace with regEx search If InStr(1, sigRow(i), "#", vbTextCompare) And InStr(1, VBA.UCase(sigRow(i)), ".CO", vbTextCompare) Then Dim email As String email = sigRow(i) email = Replace(VBA.UCase(email), "EMAIL:", "") email = Replace(VBA.UCase(email), "E-MAIL:", "") email = Replace(VBA.UCase(email), "E:", "") email = Replace(VBA.UCase(Trim(email)), "E ", "") email = VBA.LCase(email) If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then ActiveSheet.Range("Email") = email Else If MsgBox("Email: " & email, vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("Email") = email End If sigRow(i) = "" Exit For End If End If Next i 'Now the only remaining items will be the postcode, suburb, country 'there shouldn't be any numbers (eg. from PoBox,Ph,Fax,Mobile) except for the Post Code 'Join the string and filter out the Post Code Temp = Join(sigRow, vbCrLf) Temp = Trim(Temp) For i = 1 To Len(Temp) Dim postCode As String postCode = VBA.Mid(Temp, i, 4) 'In Australia PostCodes are 4 digits If VBA.Mid(Temp, i, 1) <> " " And IsNumeric(postCode) Then If ActiveSheet.Shapes("chkConfirm").ControlFormat.Value = 0 Then ActiveSheet.Range("PostCode") = postCode Else If MsgBox("Post Code: " & postCode, vbQuestion + vbYesNo, "Confirm Details") = vbYes Then ActiveSheet.Range("PostCode") = postCode End If 'Lookup the Suburb and State based on the PostCode, the PostCode sheet has the lookup Dim mySuburbArray As Range Set mySuburbArray = Sheets("PostCodes").Range("A2:B16670") Dim suburbs As String For j = 1 To mySuburbArray.Columns(1).Cells.Count If mySuburbArray.Cells(j, 1) = postCode Then 'Check if the suburb is listed in the address If InStr(1, UCase(Temp), mySuburbArray.Cells(j, 2), vbTextCompare) > 0 Then 'Set the Suburb and State ActiveSheet.Range("Suburb") = mySuburbArray.Cells(j, 2) Stat = mySuburbArray.Cells(j, 3) ActiveSheet.Range("State") = Stat 'Knowing the State - for Australia we can get the telephone Ext PhExt = PhExtension(VBA.UCase(Stat)) ActiveSheet.Range("PhExt") = PhExt 'remove the phone extension from the number Dim prePhone As String prePhone = ActiveSheet.Range("Phone") prePhone = Replace(prePhone, PhExt & " ", "") prePhone = Replace(prePhone, "(" & PhExt & ") ", "") prePhone = Replace(prePhone, "(" & PhExt & ")", "") ActiveSheet.Range("Phone") = prePhone Exit For End If End If Next j Exit For End If Next i End Sub Private Function PhExtension(ByVal State As String) As String Select Case State Case Is = "NSW" PhExtension = "02" Case Is = "QLD" PhExtension = "07" Case Is = "VIC" PhExtension = "03" Case Is = "NT" PhExtension = "04" Case Is = "WA" PhExtension = "05" Case Is = "SA" PhExtension = "07" Case Is = "TAS" PhExtension = "06" End Select End Function Private Function Ph(ByVal Num As Integer) As String Select Case Num Case Is = 0 Ph = "PH" Case Is = 1 Ph = "PHONE" 'Case Is = 2 'Ph = "P" End Select End Function Private Function Mb(ByVal Num As Integer) As String Select Case Num Case Is = 0 Mb = "MB" Case Is = 1 Mb = "MOB" Case Is = 2 Mb = "CELL" Case Is = 3 Mb = "MOBILE" 'Case Is = 4 'Mb = "M" End Select End Function Private Function Fax(ByVal Num As Integer) As String Select Case Num Case Is = 0 Fax = "FAX" Case Is = 1 Fax = "FACSIMILE" 'Case Is = 2 'Fax = "F" End Select End Function Private Function State(ByVal Num As Integer) As String Select Case Num Case Is = 0 State = "NSW" Case Is = 1 State = "QLD" Case Is = 2 State = "VIC" Case Is = 3 State = "NT" Case Is = 4 State = "WA" Case Is = 5 State = "SA" Case Is = 6 State = "TAS" End Select End Function Private Function Street(ByVal Num As Integer) As String Select Case Num Case Is = 0 Street = " ST" Case Is = 1 Street = " RD" Case Is = 2 Street = " AVE" Case Is = 3 Street = " AV" Case Is = 4 Street = " CRES" Case Is = 5 Street = " LOOP" Case Is = 6 Street = "PO BOX" Case Is = 7 Street = " STREET" Case Is = 8 Street = " ROAD" Case Is = 9 Street = " AVENUE" Case Is = 10 Street = " CRESENT" Case Is = 11 Street = " PARADE" Case Is = 12 Street = " PDE" Case Is = 13 Street = " LANE" Case Is = 14 Street = " COURT" Case Is = 15 Street = " BLVD" Case Is = 16 Street = "P.O. BOX" Case Is = 17 Street = "P.O BOX" Case Is = 18 Street = "PO BOX" Case Is = 19 Street = "POBOX" End Select End Function