001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.io.File; 020import java.io.IOException; 021import java.util.ArrayDeque; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Deque; 026import java.util.List; 027import java.util.Objects; 028import java.util.regex.Matcher; 029import java.util.regex.Pattern; 030 031/** 032 * General file name and file path manipulation utilities. 033 * <p> 034 * When dealing with file names you can hit problems when moving from a Windows 035 * based development machine to a Unix based production machine. 036 * This class aims to help avoid those problems. 037 * <p> 038 * <b>NOTE</b>: You may be able to avoid using this class entirely simply by 039 * using JDK {@link java.io.File File} objects and the two argument constructor 040 * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}. 041 * <p> 042 * Most methods on this class are designed to work the same on both Unix and Windows. 043 * Those that don't include 'System', 'Unix' or 'Windows' in their name. 044 * <p> 045 * Most methods recognize both separators (forward and back), and both 046 * sets of prefixes. See the Javadoc of each method for details. 047 * <p> 048 * This class defines six components within a file name 049 * (example C:\dev\project\file.txt): 050 * <ul> 051 * <li>the prefix - C:\</li> 052 * <li>the path - dev\project\</li> 053 * <li>the full path - C:\dev\project\</li> 054 * <li>the name - file.txt</li> 055 * <li>the base name - file</li> 056 * <li>the extension - txt</li> 057 * </ul> 058 * Note that this class works best if directory file names end with a separator. 059 * If you omit the last separator, it is impossible to determine if the file name 060 * corresponds to a file or a directory. As a result, we have chosen to say 061 * it corresponds to a file. 062 * <p> 063 * This class only supports Unix and Windows style names. 064 * Prefixes are matched as follows: 065 * <pre> 066 * Windows: 067 * a\b\c.txt --> "" --> relative 068 * \a\b\c.txt --> "\" --> current drive absolute 069 * C:a\b\c.txt --> "C:" --> drive relative 070 * C:\a\b\c.txt --> "C:\" --> absolute 071 * \\server\a\b\c.txt --> "\\server\" --> UNC 072 * 073 * Unix: 074 * a/b/c.txt --> "" --> relative 075 * /a/b/c.txt --> "/" --> absolute 076 * ~/a/b/c.txt --> "~/" --> current user 077 * ~ --> "~/" --> current user (slash added) 078 * ~user/a/b/c.txt --> "~user/" --> named user 079 * ~user --> "~user/" --> named user (slash added) 080 * </pre> 081 * Both prefix styles are matched always, irrespective of the machine that you are 082 * currently running on. 083 * <p> 084 * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils. 085 * 086 * @since 1.1 087 */ 088public class FilenameUtils { 089 090 private static final String[] EMPTY_STRING_ARRAY = {}; 091 092 private static final String EMPTY_STRING = ""; 093 094 private static final int NOT_FOUND = -1; 095 096 /** 097 * The extension separator character. 098 * @since 1.4 099 */ 100 public static final char EXTENSION_SEPARATOR = '.'; 101 102 /** 103 * The extension separator String. 104 * @since 1.4 105 */ 106 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); 107 108 /** 109 * The Unix separator character. 110 */ 111 private static final char UNIX_SEPARATOR = '/'; 112 113 /** 114 * The Windows separator character. 115 */ 116 private static final char WINDOWS_SEPARATOR = '\\'; 117 118 /** 119 * The system separator character. 120 */ 121 private static final char SYSTEM_SEPARATOR = File.separatorChar; 122 123 /** 124 * The separator character that is the opposite of the system separator. 125 */ 126 private static final char OTHER_SEPARATOR; 127 static { 128 if (isSystemWindows()) { 129 OTHER_SEPARATOR = UNIX_SEPARATOR; 130 } else { 131 OTHER_SEPARATOR = WINDOWS_SEPARATOR; 132 } 133 } 134 135 /** 136 * Instances should NOT be constructed in standard programming. 137 */ 138 public FilenameUtils() { 139 } 140 141 //----------------------------------------------------------------------- 142 /** 143 * Determines if Windows file system is in use. 144 * 145 * @return true if the system is Windows 146 */ 147 static boolean isSystemWindows() { 148 return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR; 149 } 150 151 //----------------------------------------------------------------------- 152 /** 153 * Checks if the character is a separator. 154 * 155 * @param ch the character to check 156 * @return true if it is a separator character 157 */ 158 private static boolean isSeparator(final char ch) { 159 return ch == UNIX_SEPARATOR || ch == WINDOWS_SEPARATOR; 160 } 161 162 //----------------------------------------------------------------------- 163 /** 164 * Normalizes a path, removing double and single dot path steps. 165 * <p> 166 * This method normalizes a path to a standard format. 167 * The input may contain separators in either Unix or Windows format. 168 * The output will contain separators in the format of the system. 169 * <p> 170 * A trailing slash will be retained. 171 * A double slash will be merged to a single slash (but UNC names are handled). 172 * A single dot path segment will be removed. 173 * A double dot will cause that path segment and the one before to be removed. 174 * If the double dot has no parent path segment to work with, {@code null} 175 * is returned. 176 * <p> 177 * The output will be the same on both Unix and Windows except 178 * for the separator character. 179 * <pre> 180 * /foo// --> /foo/ 181 * /foo/./ --> /foo/ 182 * /foo/../bar --> /bar 183 * /foo/../bar/ --> /bar/ 184 * /foo/../bar/../baz --> /baz 185 * //foo//./bar --> /foo/bar 186 * /../ --> null 187 * ../foo --> null 188 * foo/bar/.. --> foo/ 189 * foo/../../bar --> null 190 * foo/../bar --> bar 191 * //server/foo/../bar --> //server/bar 192 * //server/../bar --> null 193 * C:\foo\..\bar --> C:\bar 194 * C:\..\bar --> null 195 * ~/foo/../bar/ --> ~/bar/ 196 * ~/../bar --> null 197 * </pre> 198 * (Note the file separator returned will be correct for Windows/Unix) 199 * 200 * @param fileName the fileName to normalize, null returns null 201 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 202 */ 203 public static String normalize(final String fileName) { 204 return doNormalize(fileName, SYSTEM_SEPARATOR, true); 205 } 206 /** 207 * Normalizes a path, removing double and single dot path steps. 208 * <p> 209 * This method normalizes a path to a standard format. 210 * The input may contain separators in either Unix or Windows format. 211 * The output will contain separators in the format specified. 212 * <p> 213 * A trailing slash will be retained. 214 * A double slash will be merged to a single slash (but UNC names are handled). 215 * A single dot path segment will be removed. 216 * A double dot will cause that path segment and the one before to be removed. 217 * If the double dot has no parent path segment to work with, {@code null} 218 * is returned. 219 * <p> 220 * The output will be the same on both Unix and Windows except 221 * for the separator character. 222 * <pre> 223 * /foo// --> /foo/ 224 * /foo/./ --> /foo/ 225 * /foo/../bar --> /bar 226 * /foo/../bar/ --> /bar/ 227 * /foo/../bar/../baz --> /baz 228 * //foo//./bar --> /foo/bar 229 * /../ --> null 230 * ../foo --> null 231 * foo/bar/.. --> foo/ 232 * foo/../../bar --> null 233 * foo/../bar --> bar 234 * //server/foo/../bar --> //server/bar 235 * //server/../bar --> null 236 * C:\foo\..\bar --> C:\bar 237 * C:\..\bar --> null 238 * ~/foo/../bar/ --> ~/bar/ 239 * ~/../bar --> null 240 * </pre> 241 * The output will be the same on both Unix and Windows including 242 * the separator character. 243 * 244 * @param fileName the fileName to normalize, null returns null 245 * @param unixSeparator {@code true} if a unix separator should 246 * be used or {@code false} if a windows separator should be used. 247 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 248 * @since 2.0 249 */ 250 public static String normalize(final String fileName, final boolean unixSeparator) { 251 final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 252 return doNormalize(fileName, separator, true); 253 } 254 255 //----------------------------------------------------------------------- 256 /** 257 * Normalizes a path, removing double and single dot path steps, 258 * and removing any final directory separator. 259 * <p> 260 * This method normalizes a path to a standard format. 261 * The input may contain separators in either Unix or Windows format. 262 * The output will contain separators in the format of the system. 263 * <p> 264 * A trailing slash will be removed. 265 * A double slash will be merged to a single slash (but UNC names are handled). 266 * A single dot path segment will be removed. 267 * A double dot will cause that path segment and the one before to be removed. 268 * If the double dot has no parent path segment to work with, {@code null} 269 * is returned. 270 * <p> 271 * The output will be the same on both Unix and Windows except 272 * for the separator character. 273 * <pre> 274 * /foo// --> /foo 275 * /foo/./ --> /foo 276 * /foo/../bar --> /bar 277 * /foo/../bar/ --> /bar 278 * /foo/../bar/../baz --> /baz 279 * //foo//./bar --> /foo/bar 280 * /../ --> null 281 * ../foo --> null 282 * foo/bar/.. --> foo 283 * foo/../../bar --> null 284 * foo/../bar --> bar 285 * //server/foo/../bar --> //server/bar 286 * //server/../bar --> null 287 * C:\foo\..\bar --> C:\bar 288 * C:\..\bar --> null 289 * ~/foo/../bar/ --> ~/bar 290 * ~/../bar --> null 291 * </pre> 292 * (Note the file separator returned will be correct for Windows/Unix) 293 * 294 * @param fileName the fileName to normalize, null returns null 295 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 296 */ 297 public static String normalizeNoEndSeparator(final String fileName) { 298 return doNormalize(fileName, SYSTEM_SEPARATOR, false); 299 } 300 301 /** 302 * Normalizes a path, removing double and single dot path steps, 303 * and removing any final directory separator. 304 * <p> 305 * This method normalizes a path to a standard format. 306 * The input may contain separators in either Unix or Windows format. 307 * The output will contain separators in the format specified. 308 * <p> 309 * A trailing slash will be removed. 310 * A double slash will be merged to a single slash (but UNC names are handled). 311 * A single dot path segment will be removed. 312 * A double dot will cause that path segment and the one before to be removed. 313 * If the double dot has no parent path segment to work with, {@code null} 314 * is returned. 315 * <p> 316 * The output will be the same on both Unix and Windows including 317 * the separator character. 318 * <pre> 319 * /foo// --> /foo 320 * /foo/./ --> /foo 321 * /foo/../bar --> /bar 322 * /foo/../bar/ --> /bar 323 * /foo/../bar/../baz --> /baz 324 * //foo//./bar --> /foo/bar 325 * /../ --> null 326 * ../foo --> null 327 * foo/bar/.. --> foo 328 * foo/../../bar --> null 329 * foo/../bar --> bar 330 * //server/foo/../bar --> //server/bar 331 * //server/../bar --> null 332 * C:\foo\..\bar --> C:\bar 333 * C:\..\bar --> null 334 * ~/foo/../bar/ --> ~/bar 335 * ~/../bar --> null 336 * </pre> 337 * 338 * @param fileName the fileName to normalize, null returns null 339 * @param unixSeparator {@code true} if a unix separator should 340 * be used or {@code false} if a windows separator should be used. 341 * @return the normalized fileName, or null if invalid. Null bytes inside string will be removed 342 * @since 2.0 343 */ 344 public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) { 345 final char separator = unixSeparator ? UNIX_SEPARATOR : WINDOWS_SEPARATOR; 346 return doNormalize(fileName, separator, false); 347 } 348 349 /** 350 * Internal method to perform the normalization. 351 * 352 * @param fileName the fileName 353 * @param separator The separator character to use 354 * @param keepSeparator true to keep the final separator 355 * @return the normalized fileName. Null bytes inside string will be removed. 356 */ 357 private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) { 358 if (fileName == null) { 359 return null; 360 } 361 362 requireNonNullChars(fileName); 363 364 int size = fileName.length(); 365 if (size == 0) { 366 return fileName; 367 } 368 final int prefix = getPrefixLength(fileName); 369 if (prefix < 0) { 370 return null; 371 } 372 373 final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 374 fileName.getChars(0, fileName.length(), array, 0); 375 376 // fix separators throughout 377 final char otherSeparator = separator == SYSTEM_SEPARATOR ? OTHER_SEPARATOR : SYSTEM_SEPARATOR; 378 for (int i = 0; i < array.length; i++) { 379 if (array[i] == otherSeparator) { 380 array[i] = separator; 381 } 382 } 383 384 // add extra separator on the end to simplify code below 385 boolean lastIsDirectory = true; 386 if (array[size - 1] != separator) { 387 array[size++] = separator; 388 lastIsDirectory = false; 389 } 390 391 // adjoining slashes 392 for (int i = prefix + 1; i < size; i++) { 393 if (array[i] == separator && array[i - 1] == separator) { 394 System.arraycopy(array, i, array, i - 1, size - i); 395 size--; 396 i--; 397 } 398 } 399 400 // dot slash 401 for (int i = prefix + 1; i < size; i++) { 402 if (array[i] == separator && array[i - 1] == '.' && 403 (i == prefix + 1 || array[i - 2] == separator)) { 404 if (i == size - 1) { 405 lastIsDirectory = true; 406 } 407 System.arraycopy(array, i + 1, array, i - 1, size - i); 408 size -=2; 409 i--; 410 } 411 } 412 413 // double dot slash 414 outer: 415 for (int i = prefix + 2; i < size; i++) { 416 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && 417 (i == prefix + 2 || array[i - 3] == separator)) { 418 if (i == prefix + 2) { 419 return null; 420 } 421 if (i == size - 1) { 422 lastIsDirectory = true; 423 } 424 int j; 425 for (j = i - 4 ; j >= prefix; j--) { 426 if (array[j] == separator) { 427 // remove b/../ from a/b/../c 428 System.arraycopy(array, i + 1, array, j + 1, size - i); 429 size -= i - j; 430 i = j + 1; 431 continue outer; 432 } 433 } 434 // remove a/../ from a/../c 435 System.arraycopy(array, i + 1, array, prefix, size - i); 436 size -= i + 1 - prefix; 437 i = prefix + 1; 438 } 439 } 440 441 if (size <= 0) { // should never be less than 0 442 return EMPTY_STRING; 443 } 444 if (size <= prefix) { // should never be less than prefix 445 return new String(array, 0, size); 446 } 447 if (lastIsDirectory && keepSeparator) { 448 return new String(array, 0, size); // keep trailing separator 449 } 450 return new String(array, 0, size - 1); // lose trailing separator 451 } 452 453 //----------------------------------------------------------------------- 454 /** 455 * Concatenates a fileName to a base path using normal command line style rules. 456 * <p> 457 * The effect is equivalent to resultant directory after changing 458 * directory to the first argument, followed by changing directory to 459 * the second argument. 460 * <p> 461 * The first argument is the base path, the second is the path to concatenate. 462 * The returned path is always normalized via {@link #normalize(String)}, 463 * thus {@code ..} is handled. 464 * <p> 465 * If {@code pathToAdd} is absolute (has an absolute prefix), then 466 * it will be normalized and returned. 467 * Otherwise, the paths will be joined, normalized and returned. 468 * <p> 469 * The output will be the same on both Unix and Windows except 470 * for the separator character. 471 * <pre> 472 * /foo/ + bar --> /foo/bar 473 * /foo + bar --> /foo/bar 474 * /foo + /bar --> /bar 475 * /foo + C:/bar --> C:/bar 476 * /foo + C:bar --> C:bar (*) 477 * /foo/a/ + ../bar --> /foo/bar 478 * /foo/ + ../../bar --> null 479 * /foo/ + /bar --> /bar 480 * /foo/.. + /bar --> /bar 481 * /foo + bar/c.txt --> /foo/bar/c.txt 482 * /foo/c.txt + bar --> /foo/c.txt/bar (!) 483 * </pre> 484 * (*) Note that the Windows relative drive prefix is unreliable when 485 * used with this method. 486 * (!) Note that the first parameter must be a path. If it ends with a name, then 487 * the name will be built into the concatenated path. If this might be a problem, 488 * use {@link #getFullPath(String)} on the base path argument. 489 * 490 * @param basePath the base path to attach to, always treated as a path 491 * @param fullFileNameToAdd the fileName (or path) to attach to the base 492 * @return the concatenated path, or null if invalid. Null bytes inside string will be removed 493 */ 494 public static String concat(final String basePath, final String fullFileNameToAdd) { 495 final int prefix = getPrefixLength(fullFileNameToAdd); 496 if (prefix < 0) { 497 return null; 498 } 499 if (prefix > 0) { 500 return normalize(fullFileNameToAdd); 501 } 502 if (basePath == null) { 503 return null; 504 } 505 final int len = basePath.length(); 506 if (len == 0) { 507 return normalize(fullFileNameToAdd); 508 } 509 final char ch = basePath.charAt(len - 1); 510 if (isSeparator(ch)) { 511 return normalize(basePath + fullFileNameToAdd); 512 } 513 return normalize(basePath + '/' + fullFileNameToAdd); 514 } 515 516 /** 517 * Determines whether the {@code parent} directory contains the {@code child} element (a file or directory). 518 * <p> 519 * The files names are expected to be normalized. 520 * </p> 521 * 522 * Edge cases: 523 * <ul> 524 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> 525 * <li>A directory does not contain itself: return false</li> 526 * <li>A null child file is not contained in any parent: return false</li> 527 * </ul> 528 * 529 * @param canonicalParent 530 * the file to consider as the parent. 531 * @param canonicalChild 532 * the file to consider as the child. 533 * @return true is the candidate leaf is under by the specified composite. False otherwise. 534 * @throws IOException Never thrown. 535 * @since 2.2 536 * @see FileUtils#directoryContains(File, File) 537 */ 538 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) 539 throws IOException { 540 Objects.requireNonNull(canonicalParent, "canonicalParent"); 541 542 if (canonicalChild == null) { 543 return false; 544 } 545 546 if (IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { 547 return false; 548 } 549 550 return IOCase.SYSTEM.checkStartsWith(canonicalChild, canonicalParent); 551 } 552 553 /** 554 * Converts all separators to the Unix separator of forward slash. 555 * 556 * @param path the path to be changed, null ignored 557 * @return the updated path 558 */ 559 public static String separatorsToUnix(final String path) { 560 if (path == null || path.indexOf(WINDOWS_SEPARATOR) == NOT_FOUND) { 561 return path; 562 } 563 return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR); 564 } 565 566 /** 567 * Converts all separators to the Windows separator of backslash. 568 * 569 * @param path the path to be changed, null ignored 570 * @return the updated path 571 */ 572 public static String separatorsToWindows(final String path) { 573 if (path == null || path.indexOf(UNIX_SEPARATOR) == NOT_FOUND) { 574 return path; 575 } 576 return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR); 577 } 578 579 /** 580 * Converts all separators to the system separator. 581 * 582 * @param path the path to be changed, null ignored 583 * @return the updated path 584 */ 585 public static String separatorsToSystem(final String path) { 586 if (path == null) { 587 return null; 588 } 589 return isSystemWindows() ? separatorsToWindows(path) : separatorsToUnix(path); 590 } 591 592 /** 593 * Returns the length of the fileName prefix, such as {@code C:/} or {@code ~/}. 594 * <p> 595 * This method will handle a file in either Unix or Windows format. 596 * <p> 597 * The prefix length includes the first slash in the full fileName 598 * if applicable. Thus, it is possible that the length returned is greater 599 * than the length of the input string. 600 * <pre> 601 * Windows: 602 * a\b\c.txt --> 0 --> relative 603 * \a\b\c.txt --> 1 --> current drive absolute 604 * C:a\b\c.txt --> 2 --> drive relative 605 * C:\a\b\c.txt --> 3 --> absolute 606 * \\server\a\b\c.txt --> 9 --> UNC 607 * \\\a\b\c.txt --> -1 --> error 608 * 609 * Unix: 610 * a/b/c.txt --> 0 --> relative 611 * /a/b/c.txt --> 1 --> absolute 612 * ~/a/b/c.txt --> 2 --> current user 613 * ~ --> 2 --> current user (slash added) 614 * ~user/a/b/c.txt --> 6 --> named user 615 * ~user --> 6 --> named user (slash added) 616 * //server/a/b/c.txt --> 9 617 * ///a/b/c.txt --> -1 --> error 618 * C: --> 0 --> valid filename as only null byte and / are reserved characters 619 * </pre> 620 * <p> 621 * The output will be the same irrespective of the machine that the code is running on. 622 * ie. both Unix and Windows prefixes are matched regardless. 623 * 624 * Note that a leading // (or \\) is used to indicate a UNC name on Windows. 625 * These must be followed by a server name, so double-slashes are not collapsed 626 * to a single slash at the start of the fileName. 627 * 628 * @param fileName the fileName to find the prefix in, null returns -1 629 * @return the length of the prefix, -1 if invalid or null 630 */ 631 public static int getPrefixLength(final String fileName) { 632 if (fileName == null) { 633 return NOT_FOUND; 634 } 635 final int len = fileName.length(); 636 if (len == 0) { 637 return 0; 638 } 639 char ch0 = fileName.charAt(0); 640 if (ch0 == ':') { 641 return NOT_FOUND; 642 } 643 if (len == 1) { 644 if (ch0 == '~') { 645 return 2; // return a length greater than the input 646 } 647 return isSeparator(ch0) ? 1 : 0; 648 } 649 if (ch0 == '~') { 650 int posUnix = fileName.indexOf(UNIX_SEPARATOR, 1); 651 int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 1); 652 if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { 653 return len + 1; // return a length greater than the input 654 } 655 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 656 posWin = posWin == NOT_FOUND ? posUnix : posWin; 657 return Math.min(posUnix, posWin) + 1; 658 } 659 final char ch1 = fileName.charAt(1); 660 if (ch1 == ':') { 661 ch0 = Character.toUpperCase(ch0); 662 if (ch0 >= 'A' && ch0 <= 'Z') { 663 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) { 664 return 0; 665 } 666 if (len == 2 || !isSeparator(fileName.charAt(2))) { 667 return 2; 668 } 669 return 3; 670 } 671 if (ch0 == UNIX_SEPARATOR) { 672 return 1; 673 } 674 return NOT_FOUND; 675 676 } 677 if (!isSeparator(ch0) || !isSeparator(ch1)) { 678 return isSeparator(ch0) ? 1 : 0; 679 } 680 int posUnix = fileName.indexOf(UNIX_SEPARATOR, 2); 681 int posWin = fileName.indexOf(WINDOWS_SEPARATOR, 2); 682 if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { 683 return NOT_FOUND; 684 } 685 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 686 posWin = posWin == NOT_FOUND ? posUnix : posWin; 687 final int pos = Math.min(posUnix, posWin) + 1; 688 final String hostnamePart = fileName.substring(2, pos - 1); 689 return isValidHostName(hostnamePart) ? pos : NOT_FOUND; 690 } 691 692 /** 693 * Returns the index of the last directory separator character. 694 * <p> 695 * This method will handle a file in either Unix or Windows format. 696 * The position of the last forward or backslash is returned. 697 * <p> 698 * The output will be the same irrespective of the machine that the code is running on. 699 * 700 * @param fileName the fileName to find the last path separator in, null returns -1 701 * @return the index of the last separator character, or -1 if there 702 * is no such character 703 */ 704 public static int indexOfLastSeparator(final String fileName) { 705 if (fileName == null) { 706 return NOT_FOUND; 707 } 708 final int lastUnixPos = fileName.lastIndexOf(UNIX_SEPARATOR); 709 final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_SEPARATOR); 710 return Math.max(lastUnixPos, lastWindowsPos); 711 } 712 713 /** 714 * Returns the index of the last extension separator character, which is a dot. 715 * <p> 716 * This method also checks that there is no directory separator after the last dot. To do this it uses 717 * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format. 718 * </p> 719 * <p> 720 * The output will be the same irrespective of the machine that the code is running on, with the 721 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 722 * </p> 723 * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". 724 * In this case, the name wouldn't be the name of a file, but the identifier of an 725 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 726 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing 727 * an {@link IllegalArgumentException} for names like this. 728 * 729 * @param fileName 730 * the fileName to find the last extension separator in, null returns -1 731 * @return the index of the last extension separator character, or -1 if there is no such character 732 * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact, 733 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 734 */ 735 public static int indexOfExtension(final String fileName) throws IllegalArgumentException { 736 if (fileName == null) { 737 return NOT_FOUND; 738 } 739 if (isSystemWindows()) { 740 // Special handling for NTFS ADS: Don't accept colon in the fileName. 741 final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName)); 742 if (offset != -1) { 743 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden."); 744 } 745 } 746 final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR); 747 final int lastSeparator = indexOfLastSeparator(fileName); 748 return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; 749 } 750 751 //----------------------------------------------------------------------- 752 /** 753 * Gets the prefix from a full fileName, such as {@code C:/} 754 * or {@code ~/}. 755 * <p> 756 * This method will handle a file in either Unix or Windows format. 757 * The prefix includes the first slash in the full fileName where applicable. 758 * <pre> 759 * Windows: 760 * a\b\c.txt --> "" --> relative 761 * \a\b\c.txt --> "\" --> current drive absolute 762 * C:a\b\c.txt --> "C:" --> drive relative 763 * C:\a\b\c.txt --> "C:\" --> absolute 764 * \\server\a\b\c.txt --> "\\server\" --> UNC 765 * 766 * Unix: 767 * a/b/c.txt --> "" --> relative 768 * /a/b/c.txt --> "/" --> absolute 769 * ~/a/b/c.txt --> "~/" --> current user 770 * ~ --> "~/" --> current user (slash added) 771 * ~user/a/b/c.txt --> "~user/" --> named user 772 * ~user --> "~user/" --> named user (slash added) 773 * </pre> 774 * <p> 775 * The output will be the same irrespective of the machine that the code is running on. 776 * ie. both Unix and Windows prefixes are matched regardless. 777 * 778 * @param fileName the fileName to query, null returns null 779 * @return the prefix of the file, null if invalid. Null bytes inside string will be removed 780 */ 781 public static String getPrefix(final String fileName) { 782 if (fileName == null) { 783 return null; 784 } 785 final int len = getPrefixLength(fileName); 786 if (len < 0) { 787 return null; 788 } 789 if (len > fileName.length()) { 790 requireNonNullChars(fileName + UNIX_SEPARATOR); 791 return fileName + UNIX_SEPARATOR; 792 } 793 final String path = fileName.substring(0, len); 794 requireNonNullChars(path); 795 return path; 796 } 797 798 /** 799 * Gets the path from a full fileName, which excludes the prefix. 800 * <p> 801 * This method will handle a file in either Unix or Windows format. 802 * The method is entirely text based, and returns the text before and 803 * including the last forward or backslash. 804 * <pre> 805 * C:\a\b\c.txt --> a\b\ 806 * ~/a/b/c.txt --> a/b/ 807 * a.txt --> "" 808 * a/b/c --> a/b/ 809 * a/b/c/ --> a/b/c/ 810 * </pre> 811 * <p> 812 * The output will be the same irrespective of the machine that the code is running on. 813 * <p> 814 * This method drops the prefix from the result. 815 * See {@link #getFullPath(String)} for the method that retains the prefix. 816 * 817 * @param fileName the fileName to query, null returns null 818 * @return the path of the file, an empty string if none exists, null if invalid. 819 * Null bytes inside string will be removed 820 */ 821 public static String getPath(final String fileName) { 822 return doGetPath(fileName, 1); 823 } 824 825 /** 826 * Gets the path from a full fileName, which excludes the prefix, and 827 * also excluding the final directory separator. 828 * <p> 829 * This method will handle a file in either Unix or Windows format. 830 * The method is entirely text based, and returns the text before the 831 * last forward or backslash. 832 * <pre> 833 * C:\a\b\c.txt --> a\b 834 * ~/a/b/c.txt --> a/b 835 * a.txt --> "" 836 * a/b/c --> a/b 837 * a/b/c/ --> a/b/c 838 * </pre> 839 * <p> 840 * The output will be the same irrespective of the machine that the code is running on. 841 * <p> 842 * This method drops the prefix from the result. 843 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 844 * 845 * @param fileName the fileName to query, null returns null 846 * @return the path of the file, an empty string if none exists, null if invalid. 847 * Null bytes inside string will be removed 848 */ 849 public static String getPathNoEndSeparator(final String fileName) { 850 return doGetPath(fileName, 0); 851 } 852 853 /** 854 * Does the work of getting the path. 855 * 856 * @param fileName the fileName 857 * @param separatorAdd 0 to omit the end separator, 1 to return it 858 * @return the path. Null bytes inside string will be removed 859 */ 860 private static String doGetPath(final String fileName, final int separatorAdd) { 861 if (fileName == null) { 862 return null; 863 } 864 final int prefix = getPrefixLength(fileName); 865 if (prefix < 0) { 866 return null; 867 } 868 final int index = indexOfLastSeparator(fileName); 869 final int endIndex = index+separatorAdd; 870 if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) { 871 return EMPTY_STRING; 872 } 873 final String path = fileName.substring(prefix, endIndex); 874 requireNonNullChars(path); 875 return path; 876 } 877 878 /** 879 * Gets the full path from a full fileName, which is the prefix + path. 880 * <p> 881 * This method will handle a file in either Unix or Windows format. 882 * The method is entirely text based, and returns the text before and 883 * including the last forward or backslash. 884 * <pre> 885 * C:\a\b\c.txt --> C:\a\b\ 886 * ~/a/b/c.txt --> ~/a/b/ 887 * a.txt --> "" 888 * a/b/c --> a/b/ 889 * a/b/c/ --> a/b/c/ 890 * C: --> C: 891 * C:\ --> C:\ 892 * ~ --> ~/ 893 * ~/ --> ~/ 894 * ~user --> ~user/ 895 * ~user/ --> ~user/ 896 * </pre> 897 * <p> 898 * The output will be the same irrespective of the machine that the code is running on. 899 * 900 * @param fileName the fileName to query, null returns null 901 * @return the path of the file, an empty string if none exists, null if invalid 902 */ 903 public static String getFullPath(final String fileName) { 904 return doGetFullPath(fileName, true); 905 } 906 907 /** 908 * Gets the full path from a full fileName, which is the prefix + path, 909 * and also excluding the final directory separator. 910 * <p> 911 * This method will handle a file in either Unix or Windows format. 912 * The method is entirely text based, and returns the text before the 913 * last forward or backslash. 914 * <pre> 915 * C:\a\b\c.txt --> C:\a\b 916 * ~/a/b/c.txt --> ~/a/b 917 * a.txt --> "" 918 * a/b/c --> a/b 919 * a/b/c/ --> a/b/c 920 * C: --> C: 921 * C:\ --> C:\ 922 * ~ --> ~ 923 * ~/ --> ~ 924 * ~user --> ~user 925 * ~user/ --> ~user 926 * </pre> 927 * <p> 928 * The output will be the same irrespective of the machine that the code is running on. 929 * 930 * @param fileName the fileName to query, null returns null 931 * @return the path of the file, an empty string if none exists, null if invalid 932 */ 933 public static String getFullPathNoEndSeparator(final String fileName) { 934 return doGetFullPath(fileName, false); 935 } 936 937 /** 938 * Does the work of getting the path. 939 * 940 * @param fileName the fileName 941 * @param includeSeparator true to include the end separator 942 * @return the path 943 */ 944 private static String doGetFullPath(final String fileName, final boolean includeSeparator) { 945 if (fileName == null) { 946 return null; 947 } 948 final int prefix = getPrefixLength(fileName); 949 if (prefix < 0) { 950 return null; 951 } 952 if (prefix >= fileName.length()) { 953 if (includeSeparator) { 954 return getPrefix(fileName); // add end slash if necessary 955 } 956 return fileName; 957 } 958 final int index = indexOfLastSeparator(fileName); 959 if (index < 0) { 960 return fileName.substring(0, prefix); 961 } 962 int end = index + (includeSeparator ? 1 : 0); 963 if (end == 0) { 964 end++; 965 } 966 return fileName.substring(0, end); 967 } 968 969 /** 970 * Gets the name minus the path from a full fileName. 971 * <p> 972 * This method will handle a file in either Unix or Windows format. 973 * The text after the last forward or backslash is returned. 974 * <pre> 975 * a/b/c.txt --> c.txt 976 * a.txt --> a.txt 977 * a/b/c --> c 978 * a/b/c/ --> "" 979 * </pre> 980 * <p> 981 * The output will be the same irrespective of the machine that the code is running on. 982 * 983 * @param fileName the fileName to query, null returns null 984 * @return the name of the file without the path, or an empty string if none exists. 985 * Null bytes inside string will be removed 986 */ 987 public static String getName(final String fileName) { 988 if (fileName == null) { 989 return null; 990 } 991 requireNonNullChars(fileName); 992 final int index = indexOfLastSeparator(fileName); 993 return fileName.substring(index + 1); 994 } 995 996 /** 997 * Checks the input for null bytes, a sign of unsanitized data being passed to to file level functions. 998 * 999 * This may be used for poison byte attacks. 1000 * 1001 * @param path the path to check 1002 */ 1003 private static void requireNonNullChars(final String path) { 1004 if (path.indexOf(0) >= 0) { 1005 throw new IllegalArgumentException("Null byte present in file/path name. There are no " 1006 + "known legitimate use cases for such data, but several injection attacks may use it"); 1007 } 1008 } 1009 1010 /** 1011 * Gets the base name, minus the full path and extension, from a full fileName. 1012 * <p> 1013 * This method will handle a file in either Unix or Windows format. 1014 * The text after the last forward or backslash and before the last dot is returned. 1015 * <pre> 1016 * a/b/c.txt --> c 1017 * a.txt --> a 1018 * a/b/c --> c 1019 * a/b/c/ --> "" 1020 * </pre> 1021 * <p> 1022 * The output will be the same irrespective of the machine that the code is running on. 1023 * 1024 * @param fileName the fileName to query, null returns null 1025 * @return the name of the file without the path, or an empty string if none exists. Null bytes inside string 1026 * will be removed 1027 */ 1028 public static String getBaseName(final String fileName) { 1029 return removeExtension(getName(fileName)); 1030 } 1031 1032 /** 1033 * Gets the extension of a fileName. 1034 * <p> 1035 * This method returns the textual part of the fileName after the last dot. 1036 * There must be no directory separator after the dot. 1037 * <pre> 1038 * foo.txt --> "txt" 1039 * a/b/c.jpg --> "jpg" 1040 * a/b.txt/c --> "" 1041 * a/b/c --> "" 1042 * </pre> 1043 * <p> 1044 * The output will be the same irrespective of the machine that the code is running on, with the 1045 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 1046 * </p> 1047 * <p> 1048 * <b>Note:</b> This method used to have a hidden problem for names like "foo.exe:bar.txt". 1049 * In this case, the name wouldn't be the name of a file, but the identifier of an 1050 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 1051 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing 1052 * an {@link IllegalArgumentException} for names like this. 1053 * 1054 * @param fileName the fileName to retrieve the extension of. 1055 * @return the extension of the file or an empty string if none exists or {@code null} 1056 * if the fileName is {@code null}. 1057 * @throws IllegalArgumentException <b>Windows only:</b> The fileName parameter is, in fact, 1058 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 1059 */ 1060 public static String getExtension(final String fileName) throws IllegalArgumentException { 1061 if (fileName == null) { 1062 return null; 1063 } 1064 final int index = indexOfExtension(fileName); 1065 if (index == NOT_FOUND) { 1066 return EMPTY_STRING; 1067 } 1068 return fileName.substring(index + 1); 1069 } 1070 1071 /** 1072 * Special handling for NTFS ADS: Don't accept colon in the fileName. 1073 * 1074 * @param fileName a file name 1075 * @return ADS offsets. 1076 */ 1077 private static int getAdsCriticalOffset(final String fileName) { 1078 // Step 1: Remove leading path segments. 1079 final int offset1 = fileName.lastIndexOf(SYSTEM_SEPARATOR); 1080 final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR); 1081 if (offset1 == -1) { 1082 if (offset2 == -1) { 1083 return 0; 1084 } 1085 return offset2 + 1; 1086 } 1087 if (offset2 == -1) { 1088 return offset1 + 1; 1089 } 1090 return Math.max(offset1, offset2) + 1; 1091 } 1092 1093 //----------------------------------------------------------------------- 1094 /** 1095 * Removes the extension from a fileName. 1096 * <p> 1097 * This method returns the textual part of the fileName before the last dot. 1098 * There must be no directory separator after the dot. 1099 * <pre> 1100 * foo.txt --> foo 1101 * a\b\c.jpg --> a\b\c 1102 * a\b\c --> a\b\c 1103 * a.b\c --> a.b\c 1104 * </pre> 1105 * <p> 1106 * The output will be the same irrespective of the machine that the code is running on. 1107 * 1108 * @param fileName the fileName to query, null returns null 1109 * @return the fileName minus the extension 1110 */ 1111 public static String removeExtension(final String fileName) { 1112 if (fileName == null) { 1113 return null; 1114 } 1115 requireNonNullChars(fileName); 1116 1117 final int index = indexOfExtension(fileName); 1118 if (index == NOT_FOUND) { 1119 return fileName; 1120 } 1121 return fileName.substring(0, index); 1122 } 1123 1124 //----------------------------------------------------------------------- 1125 /** 1126 * Checks whether two fileNames are equal exactly. 1127 * <p> 1128 * No processing is performed on the fileNames other than comparison, 1129 * thus this is merely a null-safe case-sensitive equals. 1130 * 1131 * @param fileName1 the first fileName to query, may be null 1132 * @param fileName2 the second fileName to query, may be null 1133 * @return true if the fileNames are equal, null equals null 1134 * @see IOCase#SENSITIVE 1135 */ 1136 public static boolean equals(final String fileName1, final String fileName2) { 1137 return equals(fileName1, fileName2, false, IOCase.SENSITIVE); 1138 } 1139 1140 /** 1141 * Checks whether two fileNames are equal using the case rules of the system. 1142 * <p> 1143 * No processing is performed on the fileNames other than comparison. 1144 * The check is case-sensitive on Unix and case-insensitive on Windows. 1145 * 1146 * @param fileName1 the first fileName to query, may be null 1147 * @param fileName2 the second fileName to query, may be null 1148 * @return true if the fileNames are equal, null equals null 1149 * @see IOCase#SYSTEM 1150 */ 1151 public static boolean equalsOnSystem(final String fileName1, final String fileName2) { 1152 return equals(fileName1, fileName2, false, IOCase.SYSTEM); 1153 } 1154 1155 //----------------------------------------------------------------------- 1156 /** 1157 * Checks whether two fileNames are equal after both have been normalized. 1158 * <p> 1159 * Both fileNames are first passed to {@link #normalize(String)}. 1160 * The check is then performed in a case-sensitive manner. 1161 * 1162 * @param fileName1 the first fileName to query, may be null 1163 * @param fileName2 the second fileName to query, may be null 1164 * @return true if the fileNames are equal, null equals null 1165 * @see IOCase#SENSITIVE 1166 */ 1167 public static boolean equalsNormalized(final String fileName1, final String fileName2) { 1168 return equals(fileName1, fileName2, true, IOCase.SENSITIVE); 1169 } 1170 1171 /** 1172 * Checks whether two fileNames are equal after both have been normalized 1173 * and using the case rules of the system. 1174 * <p> 1175 * Both fileNames are first passed to {@link #normalize(String)}. 1176 * The check is then performed case-sensitive on Unix and 1177 * case-insensitive on Windows. 1178 * 1179 * @param fileName1 the first fileName to query, may be null 1180 * @param fileName2 the second fileName to query, may be null 1181 * @return true if the fileNames are equal, null equals null 1182 * @see IOCase#SYSTEM 1183 */ 1184 public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) { 1185 return equals(fileName1, fileName2, true, IOCase.SYSTEM); 1186 } 1187 1188 /** 1189 * Checks whether two fileNames are equal, optionally normalizing and providing 1190 * control over the case-sensitivity. 1191 * 1192 * @param fileName1 the first fileName to query, may be null 1193 * @param fileName2 the second fileName to query, may be null 1194 * @param normalized whether to normalize the fileNames 1195 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1196 * @return true if the fileNames are equal, null equals null 1197 * @since 1.3 1198 */ 1199 public static boolean equals( 1200 String fileName1, String fileName2, 1201 final boolean normalized, IOCase caseSensitivity) { 1202 1203 if (fileName1 == null || fileName2 == null) { 1204 return fileName1 == null && fileName2 == null; 1205 } 1206 if (normalized) { 1207 fileName1 = normalize(fileName1); 1208 if (fileName1 == null) { 1209 return false; 1210 } 1211 fileName2 = normalize(fileName2); 1212 if (fileName2 == null) { 1213 return false; 1214 } 1215 } 1216 if (caseSensitivity == null) { 1217 caseSensitivity = IOCase.SENSITIVE; 1218 } 1219 return caseSensitivity.checkEquals(fileName1, fileName2); 1220 } 1221 1222 //----------------------------------------------------------------------- 1223 /** 1224 * Checks whether the extension of the fileName is that specified. 1225 * <p> 1226 * This method obtains the extension as the textual part of the fileName 1227 * after the last dot. There must be no directory separator after the dot. 1228 * The extension check is case-sensitive on all platforms. 1229 * 1230 * @param fileName the fileName to query, null returns false 1231 * @param extension the extension to check for, null or empty checks for no extension 1232 * @return true if the fileName has the specified extension 1233 * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes 1234 */ 1235 public static boolean isExtension(final String fileName, final String extension) { 1236 if (fileName == null) { 1237 return false; 1238 } 1239 requireNonNullChars(fileName); 1240 1241 if (extension == null || extension.isEmpty()) { 1242 return indexOfExtension(fileName) == NOT_FOUND; 1243 } 1244 final String fileExt = getExtension(fileName); 1245 return fileExt.equals(extension); 1246 } 1247 1248 /** 1249 * Checks whether the extension of the fileName is one of those specified. 1250 * <p> 1251 * This method obtains the extension as the textual part of the fileName 1252 * after the last dot. There must be no directory separator after the dot. 1253 * The extension check is case-sensitive on all platforms. 1254 * 1255 * @param fileName the fileName to query, null returns false 1256 * @param extensions the extensions to check for, null checks for no extension 1257 * @return true if the fileName is one of the extensions 1258 * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes 1259 */ 1260 public static boolean isExtension(final String fileName, final String... extensions) { 1261 if (fileName == null) { 1262 return false; 1263 } 1264 requireNonNullChars(fileName); 1265 1266 if (extensions == null || extensions.length == 0) { 1267 return indexOfExtension(fileName) == NOT_FOUND; 1268 } 1269 final String fileExt = getExtension(fileName); 1270 for (final String extension : extensions) { 1271 if (fileExt.equals(extension)) { 1272 return true; 1273 } 1274 } 1275 return false; 1276 } 1277 1278 /** 1279 * Checks whether the extension of the fileName is one of those specified. 1280 * <p> 1281 * This method obtains the extension as the textual part of the fileName 1282 * after the last dot. There must be no directory separator after the dot. 1283 * The extension check is case-sensitive on all platforms. 1284 * 1285 * @param fileName the fileName to query, null returns false 1286 * @param extensions the extensions to check for, null checks for no extension 1287 * @return true if the fileName is one of the extensions 1288 * @throws java.lang.IllegalArgumentException if the supplied fileName contains null bytes 1289 */ 1290 public static boolean isExtension(final String fileName, final Collection<String> extensions) { 1291 if (fileName == null) { 1292 return false; 1293 } 1294 requireNonNullChars(fileName); 1295 1296 if (extensions == null || extensions.isEmpty()) { 1297 return indexOfExtension(fileName) == NOT_FOUND; 1298 } 1299 final String fileExt = getExtension(fileName); 1300 for (final String extension : extensions) { 1301 if (fileExt.equals(extension)) { 1302 return true; 1303 } 1304 } 1305 return false; 1306 } 1307 1308 //----------------------------------------------------------------------- 1309 /** 1310 * Checks a fileName to see if it matches the specified wildcard matcher, 1311 * always testing case-sensitive. 1312 * <p> 1313 * The wildcard matcher uses the characters '?' and '*' to represent a 1314 * single or multiple (zero or more) wildcard characters. 1315 * This is the same as often found on Dos/Unix command lines. 1316 * The check is case-sensitive always. 1317 * <pre> 1318 * wildcardMatch("c.txt", "*.txt") --> true 1319 * wildcardMatch("c.txt", "*.jpg") --> false 1320 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1321 * wildcardMatch("c.txt", "*.???") --> true 1322 * wildcardMatch("c.txt", "*.????") --> false 1323 * </pre> 1324 * N.B. the sequence "*?" does not work properly at present in match strings. 1325 * 1326 * @param fileName the fileName to match on 1327 * @param wildcardMatcher the wildcard string to match against 1328 * @return true if the fileName matches the wildcard string 1329 * @see IOCase#SENSITIVE 1330 */ 1331 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) { 1332 return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE); 1333 } 1334 1335 /** 1336 * Checks a fileName to see if it matches the specified wildcard matcher 1337 * using the case rules of the system. 1338 * <p> 1339 * The wildcard matcher uses the characters '?' and '*' to represent a 1340 * single or multiple (zero or more) wildcard characters. 1341 * This is the same as often found on Dos/Unix command lines. 1342 * The check is case-sensitive on Unix and case-insensitive on Windows. 1343 * <pre> 1344 * wildcardMatch("c.txt", "*.txt") --> true 1345 * wildcardMatch("c.txt", "*.jpg") --> false 1346 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1347 * wildcardMatch("c.txt", "*.???") --> true 1348 * wildcardMatch("c.txt", "*.????") --> false 1349 * </pre> 1350 * N.B. the sequence "*?" does not work properly at present in match strings. 1351 * 1352 * @param fileName the fileName to match on 1353 * @param wildcardMatcher the wildcard string to match against 1354 * @return true if the fileName matches the wildcard string 1355 * @see IOCase#SYSTEM 1356 */ 1357 public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) { 1358 return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM); 1359 } 1360 1361 /** 1362 * Checks a fileName to see if it matches the specified wildcard matcher 1363 * allowing control over case-sensitivity. 1364 * <p> 1365 * The wildcard matcher uses the characters '?' and '*' to represent a 1366 * single or multiple (zero or more) wildcard characters. 1367 * N.B. the sequence "*?" does not work properly at present in match strings. 1368 * 1369 * @param fileName the fileName to match on 1370 * @param wildcardMatcher the wildcard string to match against 1371 * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive 1372 * @return true if the fileName matches the wildcard string 1373 * @since 1.3 1374 */ 1375 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase caseSensitivity) { 1376 if (fileName == null && wildcardMatcher == null) { 1377 return true; 1378 } 1379 if (fileName == null || wildcardMatcher == null) { 1380 return false; 1381 } 1382 if (caseSensitivity == null) { 1383 caseSensitivity = IOCase.SENSITIVE; 1384 } 1385 final String[] wcs = splitOnTokens(wildcardMatcher); 1386 boolean anyChars = false; 1387 int textIdx = 0; 1388 int wcsIdx = 0; 1389 final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length); 1390 1391 // loop around a backtrack stack, to handle complex * matching 1392 do { 1393 if (!backtrack.isEmpty()) { 1394 final int[] array = backtrack.pop(); 1395 wcsIdx = array[0]; 1396 textIdx = array[1]; 1397 anyChars = true; 1398 } 1399 1400 // loop whilst tokens and text left to process 1401 while (wcsIdx < wcs.length) { 1402 1403 if (wcs[wcsIdx].equals("?")) { 1404 // ? so move to next text char 1405 textIdx++; 1406 if (textIdx > fileName.length()) { 1407 break; 1408 } 1409 anyChars = false; 1410 1411 } else if (wcs[wcsIdx].equals("*")) { 1412 // set any chars status 1413 anyChars = true; 1414 if (wcsIdx == wcs.length - 1) { 1415 textIdx = fileName.length(); 1416 } 1417 1418 } else { 1419 // matching text token 1420 if (anyChars) { 1421 // any chars then try to locate text token 1422 textIdx = caseSensitivity.checkIndexOf(fileName, textIdx, wcs[wcsIdx]); 1423 if (textIdx == NOT_FOUND) { 1424 // token not found 1425 break; 1426 } 1427 final int repeat = caseSensitivity.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]); 1428 if (repeat >= 0) { 1429 backtrack.push(new int[] {wcsIdx, repeat}); 1430 } 1431 } else if (!caseSensitivity.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) { 1432 // matching from current position 1433 // couldn't match token 1434 break; 1435 } 1436 1437 // matched text token, move text index to end of matched token 1438 textIdx += wcs[wcsIdx].length(); 1439 anyChars = false; 1440 } 1441 1442 wcsIdx++; 1443 } 1444 1445 // full match 1446 if (wcsIdx == wcs.length && textIdx == fileName.length()) { 1447 return true; 1448 } 1449 1450 } while (!backtrack.isEmpty()); 1451 1452 return false; 1453 } 1454 1455 /** 1456 * Splits a string into a number of tokens. 1457 * The text is split by '?' and '*'. 1458 * Where multiple '*' occur consecutively they are collapsed into a single '*'. 1459 * 1460 * @param text the text to split 1461 * @return the array of tokens, never null 1462 */ 1463 static String[] splitOnTokens(final String text) { 1464 // used by wildcardMatch 1465 // package level so a unit test may run on this 1466 1467 if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { 1468 return new String[] { text }; 1469 } 1470 1471 final char[] array = text.toCharArray(); 1472 final ArrayList<String> list = new ArrayList<>(); 1473 final StringBuilder buffer = new StringBuilder(); 1474 char prevChar = 0; 1475 for (final char ch : array) { 1476 if (ch == '?' || ch == '*') { 1477 if (buffer.length() != 0) { 1478 list.add(buffer.toString()); 1479 buffer.setLength(0); 1480 } 1481 if (ch == '?') { 1482 list.add("?"); 1483 } else if (prevChar != '*') {// ch == '*' here; check if previous char was '*' 1484 list.add("*"); 1485 } 1486 } else { 1487 buffer.append(ch); 1488 } 1489 prevChar = ch; 1490 } 1491 if (buffer.length() != 0) { 1492 list.add(buffer.toString()); 1493 } 1494 1495 return list.toArray(EMPTY_STRING_ARRAY); 1496 } 1497 1498 /** 1499 * Checks whether a given string is a valid host name according to 1500 * RFC 3986. 1501 * 1502 * <p>Accepted are IP addresses (v4 and v6) as well as what the 1503 * RFC calls a "reg-name". Percent encoded names don't seem to be 1504 * valid names in UNC paths.</p> 1505 * 1506 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1507 * @param name the hostname to validate 1508 * @return true if the given name is a valid host name 1509 */ 1510 private static boolean isValidHostName(final String name) { 1511 return isIPv6Address(name) || isRFC3986HostName(name); 1512 } 1513 1514 private static final Pattern IPV4_PATTERN = 1515 Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); 1516 private static final int IPV4_MAX_OCTET_VALUE = 255; 1517 1518 /** 1519 * Checks whether a given string represents a valid IPv4 address. 1520 * 1521 * @param name the name to validate 1522 * @return true if the given name is a valid IPv4 address 1523 */ 1524 // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address 1525 private static boolean isIPv4Address(final String name) { 1526 final Matcher m = IPV4_PATTERN.matcher(name); 1527 if (!m.matches() || m.groupCount() != 4) { 1528 return false; 1529 } 1530 1531 // verify that address subgroups are legal 1532 for (int i = 1; i <= 4; i++) { 1533 final String ipSegment = m.group(i); 1534 final int iIpSegment = Integer.parseInt(ipSegment); 1535 if (iIpSegment > IPV4_MAX_OCTET_VALUE) { 1536 return false; 1537 } 1538 1539 if (ipSegment.length() > 1 && ipSegment.startsWith("0")) { 1540 return false; 1541 } 1542 1543 } 1544 1545 return true; 1546 } 1547 1548 private static final int IPV6_MAX_HEX_GROUPS = 8; 1549 private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; 1550 private static final int MAX_UNSIGNED_SHORT = 0xffff; 1551 private static final int BASE_16 = 16; 1552 1553 // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address 1554 /** 1555 * Checks whether a given string represents a valid IPv6 address. 1556 * 1557 * @param inet6Address the name to validate 1558 * @return true if the given name is a valid IPv6 address 1559 */ 1560 private static boolean isIPv6Address(final String inet6Address) { 1561 final boolean containsCompressedZeroes = inet6Address.contains("::"); 1562 if (containsCompressedZeroes && (inet6Address.indexOf("::") != inet6Address.lastIndexOf("::"))) { 1563 return false; 1564 } 1565 if ((inet6Address.startsWith(":") && !inet6Address.startsWith("::")) 1566 || (inet6Address.endsWith(":") && !inet6Address.endsWith("::"))) { 1567 return false; 1568 } 1569 String[] octets = inet6Address.split(":"); 1570 if (containsCompressedZeroes) { 1571 final List<String> octetList = new ArrayList<>(Arrays.asList(octets)); 1572 if (inet6Address.endsWith("::")) { 1573 // String.split() drops ending empty segments 1574 octetList.add(""); 1575 } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) { 1576 octetList.remove(0); 1577 } 1578 octets = octetList.toArray(EMPTY_STRING_ARRAY); 1579 } 1580 if (octets.length > IPV6_MAX_HEX_GROUPS) { 1581 return false; 1582 } 1583 int validOctets = 0; 1584 int emptyOctets = 0; // consecutive empty chunks 1585 for (int index = 0; index < octets.length; index++) { 1586 final String octet = octets[index]; 1587 if (octet.isEmpty()) { 1588 emptyOctets++; 1589 if (emptyOctets > 1) { 1590 return false; 1591 } 1592 } else { 1593 emptyOctets = 0; 1594 // Is last chunk an IPv4 address? 1595 if (index == octets.length - 1 && octet.contains(".")) { 1596 if (!isIPv4Address(octet)) { 1597 return false; 1598 } 1599 validOctets += 2; 1600 continue; 1601 } 1602 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) { 1603 return false; 1604 } 1605 int octetInt = 0; 1606 try { 1607 octetInt = Integer.parseInt(octet, BASE_16); 1608 } catch (final NumberFormatException e) { 1609 return false; 1610 } 1611 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) { 1612 return false; 1613 } 1614 } 1615 validOctets++; 1616 } 1617 return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes); 1618 } 1619 1620 private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); 1621 1622 /** 1623 * Checks whether a given string is a valid host name according to 1624 * RFC 3986 - not accepting IP addresses. 1625 * 1626 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1627 * @param name the hostname to validate 1628 * @return true if the given name is a valid host name 1629 */ 1630 private static boolean isRFC3986HostName(final String name) { 1631 final String[] parts = name.split("\\.", -1); 1632 for (int i = 0; i < parts.length; i++) { 1633 if (parts[i].isEmpty()) { 1634 // trailing dot is legal, otherwise we've hit a .. sequence 1635 return i == parts.length - 1; 1636 } 1637 if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) { 1638 return false; 1639 } 1640 } 1641 return true; 1642 } 1643}