001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.validator.routines;
018
019 import java.io.Serializable;
020 import java.util.Arrays;
021 import java.util.List;
022
023 /**
024 * <p><b>Domain name</b> validation routines.</p>
025 *
026 * <p>
027 * This validator provides methods for validating Internet domain names
028 * and top-level domains.
029 * </p>
030 *
031 * <p>Domain names are evaluated according
032 * to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>,
033 * section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>,
034 * section 2.1. No accomodation is provided for the specialized needs of
035 * other applications; if the domain name has been URL-encoded, for example,
036 * validation will fail even though the equivalent plaintext version of the
037 * same name would have passed.
038 * </p>
039 *
040 * <p>
041 * Validation is also provided for top-level domains (TLDs) as defined and
042 * maintained by the Internet Assigned Numbers Authority (IANA):
043 * </p>
044 *
045 * <ul>
046 * <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs
047 * (<code>.arpa</code>, etc.)</li>
048 * <li>{@link #isValidGenericTld} - validates generic TLDs
049 * (<code>.com, .org</code>, etc.)</li>
050 * <li>{@link #isValidCountryCodeTld} - validates country code TLDs
051 * (<code>.us, .uk, .cn</code>, etc.)</li>
052 * </ul>
053 *
054 * <p>
055 * (<b>NOTE</b>: This class does not provide IP address lookup for domain names or
056 * methods to ensure that a given domain name matches a specific IP; see
057 * {@link java.net.InetAddress} for that functionality.)
058 * </p>
059 *
060 * @version $Revision: 600231 $ $Date: 2007-12-02 04:39:09 +0100 (So, 02. Dez 2007) $
061 * @since Validator 1.4
062 */
063 public class DomainValidator implements Serializable {
064
065 // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123)
066 private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*";
067 private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}";
068 private static final String DOMAIN_NAME_REGEX =
069 "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + ")$";
070
071 /**
072 * Singleton instance of this validator.
073 */
074 private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator();
075
076 /**
077 * RegexValidator for matching domains.
078 */
079 private final RegexValidator domainRegex =
080 new RegexValidator(DOMAIN_NAME_REGEX);
081
082 /**
083 * Returns the singleton instance of this validator.
084 * @return the singleton instance of this validator
085 */
086 public static DomainValidator getInstance() {
087 return DOMAIN_VALIDATOR;
088 }
089
090 /** Private constructor. */
091 private DomainValidator() {}
092
093 /**
094 * Returns true if the specified <code>String</code> parses
095 * as a valid domain name with a recognized top-level domain.
096 * The parsing is case-sensitive.
097 * @param domain the parameter to check for domain name syntax
098 * @return true if the parameter is a valid domain name
099 */
100 public boolean isValid(String domain) {
101 String[] groups = domainRegex.match(domain);
102 if (groups != null && groups.length > 0) {
103 return isValidTld(groups[0]);
104 } else {
105 return false;
106 }
107 }
108
109 /**
110 * Returns true if the specified <code>String</code> matches any
111 * IANA-defined top-level domain. Leading dots are ignored if present.
112 * The search is case-sensitive.
113 * @param tld the parameter to check for TLD status
114 * @return true if the parameter is a TLD
115 */
116 public boolean isValidTld(String tld) {
117 return isValidInfrastructureTld(tld)
118 || isValidGenericTld(tld)
119 || isValidCountryCodeTld(tld);
120 }
121
122 /**
123 * Returns true if the specified <code>String</code> matches any
124 * IANA-defined infrastructure top-level domain. Leading dots are
125 * ignored if present. The search is case-sensitive.
126 * @param iTld the parameter to check for infrastructure TLD status
127 * @return true if the parameter is an infrastructure TLD
128 */
129 public boolean isValidInfrastructureTld(String iTld) {
130 return INFRASTRUCTURE_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase()));
131 }
132
133 /**
134 * Returns true if the specified <code>String</code> matches any
135 * IANA-defined generic top-level domain. Leading dots are ignored
136 * if present. The search is case-sensitive.
137 * @param gTld the parameter to check for generic TLD status
138 * @return true if the parameter is a generic TLD
139 */
140 public boolean isValidGenericTld(String gTld) {
141 return GENERIC_TLD_LIST.contains(chompLeadingDot(gTld.toLowerCase()));
142 }
143
144 /**
145 * Returns true if the specified <code>String</code> matches any
146 * IANA-defined country code top-level domain. Leading dots are
147 * ignored if present. The search is case-sensitive.
148 * @param ccTld the parameter to check for country code TLD status
149 * @return true if the parameter is a country code TLD
150 */
151 public boolean isValidCountryCodeTld(String ccTld) {
152 return COUNTRY_CODE_TLD_LIST.contains(chompLeadingDot(ccTld.toLowerCase()));
153 }
154
155 private String chompLeadingDot(String str) {
156 if (str.startsWith(".")) {
157 return str.substring(1);
158 } else {
159 return str;
160 }
161 }
162
163 // ---------------------------------------------
164 // ----- TLDs defined by IANA
165 // ----- Authoritative and comprehensive list at:
166 // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt
167
168 private static final String[] INFRASTRUCTURE_TLDS = new String[] {
169 "arpa", // internet infrastructure
170 "root" // diagnostic marker for non-truncated root zone
171 };
172
173 private static final String[] GENERIC_TLDS = new String[] {
174 "aero", // air transport industry
175 "asia", // Pan-Asia/Asia Pacific
176 "biz", // businesses
177 "cat", // Catalan linguistic/cultural community
178 "com", // commercial enterprises
179 "coop", // cooperative associations
180 "info", // informational sites
181 "jobs", // Human Resource managers
182 "mobi", // mobile products and services
183 "museum", // museums, surprisingly enough
184 "name", // individuals' sites
185 "net", // internet support infrastructure/business
186 "org", // noncommercial organizations
187 "pro", // credentialed professionals and entities
188 "tel", // contact data for businesses and individuals
189 "travel", // entities in the travel industry
190 "gov", // United States Government
191 "edu", // accredited postsecondary US education entities
192 "mil", // United States Military
193 "int" // organizations established by international treaty
194 };
195
196 private static final String[] COUNTRY_CODE_TLDS = new String[] {
197 "ac", // Ascension Island
198 "ad", // Andorra
199 "ae", // United Arab Emirates
200 "af", // Afghanistan
201 "ag", // Antigua and Barbuda
202 "ai", // Anguilla
203 "al", // Albania
204 "am", // Armenia
205 "an", // Netherlands Antilles
206 "ao", // Angola
207 "aq", // Antarctica
208 "ar", // Argentina
209 "as", // American Samoa
210 "at", // Austria
211 "au", // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands)
212 "aw", // Aruba
213 "ax", // ??land
214 "az", // Azerbaijan
215 "ba", // Bosnia and Herzegovina
216 "bb", // Barbados
217 "bd", // Bangladesh
218 "be", // Belgium
219 "bf", // Burkina Faso
220 "bg", // Bulgaria
221 "bh", // Bahrain
222 "bi", // Burundi
223 "bj", // Benin
224 "bm", // Bermuda
225 "bn", // Brunei Darussalam
226 "bo", // Bolivia
227 "br", // Brazil
228 "bs", // Bahamas
229 "bt", // Bhutan
230 "bv", // Bouvet Island
231 "bw", // Botswana
232 "by", // Belarus
233 "bz", // Belize
234 "ca", // Canada
235 "cc", // Cocos (Keeling) Islands
236 "cd", // Democratic Republic of the Congo (formerly Zaire)
237 "cf", // Central African Republic
238 "cg", // Republic of the Congo
239 "ch", // Switzerland
240 "ci", // C??te d'Ivoire
241 "ck", // Cook Islands
242 "cl", // Chile
243 "cm", // Cameroon
244 "cn", // China, mainland
245 "co", // Colombia
246 "cr", // Costa Rica
247 "cu", // Cuba
248 "cv", // Cape Verde
249 "cx", // Christmas Island
250 "cy", // Cyprus
251 "cz", // Czech Republic
252 "de", // Germany
253 "dj", // Djibouti
254 "dk", // Denmark
255 "dm", // Dominica
256 "do", // Dominican Republic
257 "dz", // Algeria
258 "ec", // Ecuador
259 "ee", // Estonia
260 "eg", // Egypt
261 "er", // Eritrea
262 "es", // Spain
263 "et", // Ethiopia
264 "eu", // European Union
265 "fi", // Finland
266 "fj", // Fiji
267 "fk", // Falkland Islands
268 "fm", // Federated States of Micronesia
269 "fo", // Faroe Islands
270 "fr", // France
271 "ga", // Gabon
272 "gb", // Great Britain (United Kingdom)
273 "gd", // Grenada
274 "ge", // Georgia
275 "gf", // French Guiana
276 "gg", // Guernsey
277 "gh", // Ghana
278 "gi", // Gibraltar
279 "gl", // Greenland
280 "gm", // The Gambia
281 "gn", // Guinea
282 "gp", // Guadeloupe
283 "gq", // Equatorial Guinea
284 "gr", // Greece
285 "gs", // South Georgia and the South Sandwich Islands
286 "gt", // Guatemala
287 "gu", // Guam
288 "gw", // Guinea-Bissau
289 "gy", // Guyana
290 "hk", // Hong Kong
291 "hm", // Heard Island and McDonald Islands
292 "hn", // Honduras
293 "hr", // Croatia (Hrvatska)
294 "ht", // Haiti
295 "hu", // Hungary
296 "id", // Indonesia
297 "ie", // Ireland (??ire)
298 "il", // Israel
299 "im", // Isle of Man
300 "in", // India
301 "io", // British Indian Ocean Territory
302 "iq", // Iraq
303 "ir", // Iran
304 "is", // Iceland
305 "it", // Italy
306 "je", // Jersey
307 "jm", // Jamaica
308 "jo", // Jordan
309 "jp", // Japan
310 "ke", // Kenya
311 "kg", // Kyrgyzstan
312 "kh", // Cambodia (Khmer)
313 "ki", // Kiribati
314 "km", // Comoros
315 "kn", // Saint Kitts and Nevis
316 "kp", // North Korea
317 "kr", // South Korea
318 "kw", // Kuwait
319 "ky", // Cayman Islands
320 "kz", // Kazakhstan
321 "la", // Laos (currently being marketed as the official domain for Los Angeles)
322 "lb", // Lebanon
323 "lc", // Saint Lucia
324 "li", // Liechtenstein
325 "lk", // Sri Lanka
326 "lr", // Liberia
327 "ls", // Lesotho
328 "lt", // Lithuania
329 "lu", // Luxembourg
330 "lv", // Latvia
331 "ly", // Libya
332 "ma", // Morocco
333 "mc", // Monaco
334 "md", // Moldova
335 "me", // Montenegro
336 "mg", // Madagascar
337 "mh", // Marshall Islands
338 "mk", // Republic of Macedonia
339 "ml", // Mali
340 "mm", // Myanmar
341 "mn", // Mongolia
342 "mo", // Macau
343 "mp", // Northern Mariana Islands
344 "mq", // Martinique
345 "mr", // Mauritania
346 "ms", // Montserrat
347 "mt", // Malta
348 "mu", // Mauritius
349 "mv", // Maldives
350 "mw", // Malawi
351 "mx", // Mexico
352 "my", // Malaysia
353 "mz", // Mozambique
354 "na", // Namibia
355 "nc", // New Caledonia
356 "ne", // Niger
357 "nf", // Norfolk Island
358 "ng", // Nigeria
359 "ni", // Nicaragua
360 "nl", // Netherlands
361 "no", // Norway
362 "np", // Nepal
363 "nr", // Nauru
364 "nu", // Niue
365 "nz", // New Zealand
366 "om", // Oman
367 "pa", // Panama
368 "pe", // Peru
369 "pf", // French Polynesia With Clipperton Island
370 "pg", // Papua New Guinea
371 "ph", // Philippines
372 "pk", // Pakistan
373 "pl", // Poland
374 "pm", // Saint-Pierre and Miquelon
375 "pn", // Pitcairn Islands
376 "pr", // Puerto Rico
377 "ps", // Palestinian territories (PA-controlled West Bank and Gaza Strip)
378 "pt", // Portugal
379 "pw", // Palau
380 "py", // Paraguay
381 "qa", // Qatar
382 "re", // R??union
383 "ro", // Romania
384 "rs", // Serbia
385 "ru", // Russia
386 "rw", // Rwanda
387 "sa", // Saudi Arabia
388 "sb", // Solomon Islands
389 "sc", // Seychelles
390 "sd", // Sudan
391 "se", // Sweden
392 "sg", // Singapore
393 "sh", // Saint Helena
394 "si", // Slovenia
395 "sj", // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no)
396 "sk", // Slovakia
397 "sl", // Sierra Leone
398 "sm", // San Marino
399 "sn", // Senegal
400 "so", // Somalia
401 "sr", // Suriname
402 "st", // S??o Tom?? and Pr??ncipe
403 "su", // Soviet Union (deprecated)
404 "sv", // El Salvador
405 "sy", // Syria
406 "sz", // Swaziland
407 "tc", // Turks and Caicos Islands
408 "td", // Chad
409 "tf", // French Southern and Antarctic Lands
410 "tg", // Togo
411 "th", // Thailand
412 "tj", // Tajikistan
413 "tk", // Tokelau
414 "tl", // East Timor (deprecated old code)
415 "tm", // Turkmenistan
416 "tn", // Tunisia
417 "to", // Tonga
418 "tp", // East Timor
419 "tr", // Turkey
420 "tt", // Trinidad and Tobago
421 "tv", // Tuvalu
422 "tw", // Taiwan, Republic of China
423 "tz", // Tanzania
424 "ua", // Ukraine
425 "ug", // Uganda
426 "uk", // United Kingdom
427 "um", // United States Minor Outlying Islands
428 "us", // United States of America
429 "uy", // Uruguay
430 "uz", // Uzbekistan
431 "va", // Vatican City State
432 "vc", // Saint Vincent and the Grenadines
433 "ve", // Venezuela
434 "vg", // British Virgin Islands
435 "vi", // U.S. Virgin Islands
436 "vn", // Vietnam
437 "vu", // Vanuatu
438 "wf", // Wallis and Futuna
439 "ws", // Samoa (formerly Western Samoa)
440 "ye", // Yemen
441 "yt", // Mayotte
442 "yu", // Serbia and Montenegro (originally Yugoslavia)
443 "za", // South Africa
444 "zm", // Zambia
445 "zw", // Zimbabwe
446 };
447
448 private static final List INFRASTRUCTURE_TLD_LIST = Arrays.asList(INFRASTRUCTURE_TLDS);
449 private static final List GENERIC_TLD_LIST = Arrays.asList(GENERIC_TLDS);
450 private static final List COUNTRY_CODE_TLD_LIST = Arrays.asList(COUNTRY_CODE_TLDS);
451 }