001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.io.File; 020import java.util.ArrayDeque; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.Deque; 025import java.util.List; 026import java.util.regex.Matcher; 027import java.util.regex.Pattern; 028import java.util.stream.Stream; 029 030/** 031 * General file name and file path manipulation utilities. The methods in this class 032 * operate on strings that represent relative or absolute paths. Nothing in this class 033 * ever accesses the file system, or depends on whether a path points to a file that exists. 034 * <p> 035 * When dealing with file names, you can hit problems when moving from a Windows 036 * based development machine to a Unix based production machine. 037 * This class aims to help avoid those problems. 038 * </p> 039 * <p> 040 * <strong>NOTE</strong>: You may be able to avoid using this class entirely simply by 041 * using JDK {@link File File} objects and the two argument constructor 042 * {@link File#File(java.io.File, String) File(File,String)}. 043 * </p> 044 * <p> 045 * Most methods in this class are designed to work the same on both Unix and Windows. 046 * Those that don't include 'System', 'Unix', or 'Windows' in their name. 047 * </p> 048 * <p> 049 * Most methods recognize both separators (forward and backslashes), and both 050 * sets of prefixes. See the Javadoc of each method for details. 051 * </p> 052 * <p> 053 * This class defines six components within a path (sometimes called a file name or a full file name). 054 * Given an absolute Windows path such as C:\dev\project\file.txt they are: 055 * </p> 056 * <ul> 057 * <li>the full file name, or just file name - C:\dev\project\file.txt</li> 058 * <li>the prefix - C:\</li> 059 * <li>the path - dev\project\</li> 060 * <li>the full path - C:\dev\project\</li> 061 * <li>the name - file.txt</li> 062 * <li>the base name - file</li> 063 * <li>the extension - txt</li> 064 * </ul> 065 * <p> 066 * Given an absolute Unix path such as /dev/project/file.txt they are: 067 * </p> 068 * <ul> 069 * <li>the full file name, or just file name - /dev/project/file.txt</li> 070 * <li>the prefix - /</li> 071 * <li>the path - dev/project</li> 072 * <li>the full path - /dev/project</li> 073 * <li>the name - file.txt</li> 074 * <li>the base name - file</li> 075 * <li>the extension - txt</li> 076 * </ul> 077 * <p> 078 * Given a relative Windows path such as dev\project\file.txt they are: 079 * </p> 080 * <ul> 081 * <li>the full file name, or just file name - dev\project\file.txt</li> 082 * <li>the prefix - null</li> 083 * <li>the path - dev\project\</li> 084 * <li>the full path - dev\project\</li> 085 * <li>the name - file.txt</li> 086 * <li>the base name - file</li> 087 * <li>the extension - txt</li> 088 * </ul> 089 * <p> 090 * Given an absolute Unix path such as /dev/project/file.txt they are: 091 * </p> 092 * <ul> 093 * <li>the full path, full file name, or just file name - /dev/project/file.txt</li> 094 * <li>the prefix - /</li> 095 * <li>the path - dev/project</li> 096 * <li>the full path - /dev/project</li> 097 * <li>the name - file.txt</li> 098 * <li>the base name - file</li> 099 * <li>the extension - txt</li> 100 * </ul> 101 * 102 * 103 * <p> 104 * This class works best if directory names end with a separator. 105 * If you omit the last separator, it is impossible to determine if the last component 106 * corresponds to a file or a directory. This class treats final components 107 * that do not end with a separator as files, not directories. 108 * </p> 109 * <p> 110 * This class only supports Unix and Windows style names. 111 * Prefixes are matched as follows: 112 * </p> 113 * <pre> 114 * Windows: 115 * a\b\c.txt --> "" --> relative 116 * \a\b\c.txt --> "\" --> current drive absolute 117 * C:a\b\c.txt --> "C:" --> drive relative 118 * C:\a\b\c.txt --> "C:\" --> absolute 119 * \\server\a\b\c.txt --> "\\server\" --> UNC 120 * 121 * Unix: 122 * a/b/c.txt --> "" --> relative 123 * /a/b/c.txt --> "/" --> absolute 124 * ~/a/b/c.txt --> "~/" --> current user 125 * ~ --> "~/" --> current user (slash added) 126 * ~user/a/b/c.txt --> "~user/" --> named user 127 * ~user --> "~user/" --> named user (slash added) 128 * </pre> 129 * <p> 130 * Both prefix styles are matched, irrespective of the machine that you are 131 * currently running on. 132 * </p> 133 * 134 * @since 1.1 135 */ 136public class FilenameUtils { 137 138 private static final String[] EMPTY_STRING_ARRAY = {}; 139 140 private static final String EMPTY_STRING = ""; 141 142 private static final int NOT_FOUND = -1; 143 144 /** 145 * The extension separator character. 146 * 147 * @since 1.4 148 */ 149 public static final char EXTENSION_SEPARATOR = '.'; 150 151 /** 152 * The extension separator String. 153 * 154 * @since 1.4 155 */ 156 public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR); 157 158 /** 159 * The Unix separator character. 160 */ 161 private static final char UNIX_NAME_SEPARATOR = '/'; 162 163 /** 164 * The Windows separator character. 165 */ 166 private static final char WINDOWS_NAME_SEPARATOR = '\\'; 167 168 /** 169 * The system separator character. 170 */ 171 private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar; 172 173 /** 174 * The separator character that is the opposite of the system separator. 175 */ 176 private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR); 177 178 private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$"); 179 180 private static final int IPV4_MAX_OCTET_VALUE = 255; 181 182 private static final int IPV6_MAX_HEX_GROUPS = 8; 183 184 private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4; 185 186 private static final int MAX_UNSIGNED_SHORT = 0xffff; 187 188 private static final int BASE_16 = 16; 189 190 private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$"); 191 192 /** 193 * Concatenates a fileName to a base path using normal command line style rules. 194 * <p> 195 * The effect is equivalent to resultant directory after changing 196 * directory to the first argument, followed by changing directory to 197 * the second argument. 198 * </p> 199 * <p> 200 * The first argument is the base path, the second is the path to concatenate. 201 * The returned path is always normalized via {@link #normalize(String)}, 202 * thus {@code ..} is handled. 203 * </p> 204 * <p> 205 * If {@code pathToAdd} is absolute (has an absolute prefix), then 206 * it will be normalized and returned. 207 * Otherwise, the paths will be joined, normalized and returned. 208 * </p> 209 * <p> 210 * The output will be the same on both Unix and Windows except 211 * for the separator character. 212 * </p> 213 * <pre> 214 * /foo/ + bar --> /foo/bar 215 * /foo + bar --> /foo/bar 216 * /foo + /bar --> /bar 217 * /foo + C:/bar --> C:/bar 218 * /foo + C:bar --> C:bar [1] 219 * /foo/a/ + ../bar --> /foo/bar 220 * /foo/ + ../../bar --> null 221 * /foo/ + /bar --> /bar 222 * /foo/.. + /bar --> /bar 223 * /foo + bar/c.txt --> /foo/bar/c.txt 224 * /foo/c.txt + bar --> /foo/c.txt/bar [2] 225 * </pre> 226 * <p> 227 * [1] Note that the Windows relative drive prefix is unreliable when 228 * used with this method. 229 * </p> 230 * <p> 231 * [2] Note that the first parameter must be a path. If it ends with a name, then 232 * the name will be built into the concatenated path. If this might be a problem, 233 * use {@link #getFullPath(String)} on the base path argument. 234 * </p> 235 * 236 * @param basePath the base path to attach to, always treated as a path. 237 * @param fullFileNameToAdd the file name (or path) to attach to the base. 238 * @return the concatenated path, or null if invalid. 239 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}). 240 */ 241 public static String concat(final String basePath, final String fullFileNameToAdd) { 242 final int prefix = getPrefixLength(fullFileNameToAdd); 243 if (prefix < 0) { 244 return null; 245 } 246 if (prefix > 0) { 247 return normalize(fullFileNameToAdd); 248 } 249 if (basePath == null) { 250 return null; 251 } 252 final int len = basePath.length(); 253 if (len == 0) { 254 return normalize(fullFileNameToAdd); 255 } 256 final char ch = basePath.charAt(len - 1); 257 if (isSeparator(ch)) { 258 return normalize(basePath + fullFileNameToAdd); 259 } 260 return normalize(basePath + '/' + fullFileNameToAdd); 261 } 262 263 /** 264 * Determines whether the {@code parent} directory contains the {@code child} (a file or directory). 265 * This does not read from the file system, and there is no guarantee or expectation that 266 * these paths actually exist. 267 * <p> 268 * The files names are expected to be normalized. 269 * </p> 270 * 271 * Edge cases: 272 * <ul> 273 * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li> 274 * <li>A directory does not contain itself: return false</li> 275 * <li>A null child file is not contained in any parent: return false</li> 276 * </ul> 277 * 278 * @param canonicalParent the path string to consider as the parent. 279 * @param canonicalChild the path string to consider as the child. 280 * @return true if the candidate leaf is under the specified composite. False otherwise. 281 * @since 2.2 282 * @see FileUtils#directoryContains(File, File) 283 */ 284 public static boolean directoryContains(final String canonicalParent, final String canonicalChild) { 285 if (isEmpty(canonicalParent) || isEmpty(canonicalChild) || IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) { 286 return false; 287 } 288 final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR); 289 final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator; 290 return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator); 291 } 292 293 /** 294 * Does the work of getting the path. 295 * 296 * @param fileName the file name. 297 * @param includeEndSeparator true to include the end separator. 298 * @return the path. 299 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}). 300 */ 301 private static String doGetFullPath(final String fileName, final boolean includeEndSeparator) { 302 if (fileName == null) { 303 return null; 304 } 305 final int prefix = getPrefixLength(fileName); 306 if (prefix < 0) { 307 return null; 308 } 309 if (prefix >= fileName.length()) { 310 if (includeEndSeparator) { 311 return getPrefix(fileName); // add end slash if necessary 312 } 313 return fileName; 314 } 315 final int index = indexOfLastSeparator(fileName); 316 if (index < 0) { 317 return fileName.substring(0, prefix); 318 } 319 int end = index + (includeEndSeparator ? 1 : 0); 320 if (end == 0) { 321 end++; 322 } 323 return fileName.substring(0, end); 324 } 325 326 /** 327 * Does the work of getting the path. 328 * 329 * @param fileName the file name. 330 * @param separatorAdd 0 to omit the end separator, 1 to return it. 331 * @return the path. 332 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}). 333 */ 334 private static String doGetPath(final String fileName, final int separatorAdd) { 335 if (fileName == null) { 336 return null; 337 } 338 final int prefix = getPrefixLength(fileName); 339 if (prefix < 0) { 340 return null; 341 } 342 final int index = indexOfLastSeparator(fileName); 343 final int endIndex = index + separatorAdd; 344 if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) { 345 return EMPTY_STRING; 346 } 347 return requireNonNullChars(fileName.substring(prefix, endIndex)); 348 } 349 350 /** 351 * Internal method to perform the normalization. 352 * 353 * @param fileName the file name. 354 * @param separator The separator character to use. 355 * @param keepSeparator true to keep the final separator. 356 * @return the normalized fileName. 357 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 358 */ 359 private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) { 360 if (fileName == null) { 361 return null; 362 } 363 requireNonNullChars(fileName); 364 int size = fileName.length(); 365 if (size == 0) { 366 return fileName; 367 } 368 final int prefix = getPrefixLength(fileName); 369 if (prefix < 0) { 370 return null; 371 } 372 final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy 373 fileName.getChars(0, fileName.length(), array, 0); 374 // fix separators throughout 375 final char otherSeparator = flipSeparator(separator); 376 for (int i = 0; i < array.length; i++) { 377 if (array[i] == otherSeparator) { 378 array[i] = separator; 379 } 380 } 381 // add extra separator on the end to simplify code below 382 boolean lastIsDirectory = true; 383 if (array[size - 1] != separator) { 384 array[size++] = separator; 385 lastIsDirectory = false; 386 } 387 // adjoining slashes 388 // If we get here, prefix can only be 0 or greater, size 1 or greater 389 // If prefix is 0, set loop start to 1 to prevent index errors 390 for (int i = prefix != 0 ? prefix : 1; i < size; i++) { 391 if (array[i] == separator && array[i - 1] == separator) { 392 System.arraycopy(array, i, array, i - 1, size - i); 393 size--; 394 i--; 395 } 396 } 397 // period slash 398 for (int i = prefix + 1; i < size; i++) { 399 if (array[i] == separator && array[i - 1] == '.' && (i == prefix + 1 || array[i - 2] == separator)) { 400 if (i == size - 1) { 401 lastIsDirectory = true; 402 } 403 System.arraycopy(array, i + 1, array, i - 1, size - i); 404 size -= 2; 405 i--; 406 } 407 } 408 // double period slash 409 outer: for (int i = prefix + 2; i < size; i++) { 410 if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && (i == prefix + 2 || array[i - 3] == separator)) { 411 if (i == prefix + 2) { 412 return null; 413 } 414 if (i == size - 1) { 415 lastIsDirectory = true; 416 } 417 int j; 418 for (j = i - 4; j >= prefix; j--) { 419 if (array[j] == separator) { 420 // remove b/../ from a/b/../c 421 System.arraycopy(array, i + 1, array, j + 1, size - i); 422 size -= i - j; 423 i = j + 1; 424 continue outer; 425 } 426 } 427 // remove a/../ from a/../c 428 System.arraycopy(array, i + 1, array, prefix, size - i); 429 size -= i + 1 - prefix; 430 i = prefix + 1; 431 } 432 } 433 if (size <= 0) { // should never be less than 0 434 return EMPTY_STRING; 435 } 436 if (size <= prefix || lastIsDirectory && keepSeparator) { 437 return new String(array, 0, size); // keep trailing separator 438 } 439 return new String(array, 0, size - 1); // lose trailing separator 440 } 441 442 /** 443 * Checks whether two file names are exactly equal. 444 * <p> 445 * No processing is performed on the file names other than comparison. 446 * This is merely a null-safe case-sensitive string equality. 447 * </p> 448 * 449 * @param fileName1 the first file name, may be null. 450 * @param fileName2 the second file name, may be null. 451 * @return true if the file names are equal, null equals null. 452 * @see IOCase#SENSITIVE 453 */ 454 public static boolean equals(final String fileName1, final String fileName2) { 455 return equals(fileName1, fileName2, false, IOCase.SENSITIVE); 456 } 457 458 /** 459 * Checks whether two file names are equal, optionally normalizing and providing 460 * control over the case-sensitivity. 461 * 462 * @param fileName1 the first file name, may be null. 463 * @param fileName2 the second file name, may be null. 464 * @param normalize whether to normalize the file names. 465 * @param ioCase what case sensitivity rule to use, null means case-sensitive. 466 * @return true if the file names are equal, null equals null. 467 * @since 1.3 468 */ 469 public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) { 470 if (fileName1 == null || fileName2 == null) { 471 return fileName1 == null && fileName2 == null; 472 } 473 if (normalize) { 474 fileName1 = normalize(fileName1); 475 if (fileName1 == null) { 476 return false; 477 } 478 fileName2 = normalize(fileName2); 479 if (fileName2 == null) { 480 return false; 481 } 482 } 483 return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2); 484 } 485 486 /** 487 * Checks whether two file names are equal after both have been normalized. 488 * <p> 489 * Both file names are first passed to {@link #normalize(String)}. 490 * The check is then performed in a case-sensitive manner. 491 * </p> 492 * 493 * @param fileName1 the first file name, may be null. 494 * @param fileName2 the second file name, may be null. 495 * @return true if the file names are equal, null equals null. 496 * @see IOCase#SENSITIVE 497 */ 498 public static boolean equalsNormalized(final String fileName1, final String fileName2) { 499 return equals(fileName1, fileName2, true, IOCase.SENSITIVE); 500 } 501 502 /** 503 * Checks whether two file names are equal using the case rules of the system 504 * after both have been normalized. 505 * <p> 506 * Both file names are first passed to {@link #normalize(String)}. 507 * The check is then performed case-sensitively on Unix and 508 * case-insensitively on Windows. 509 * </p> 510 * 511 * @param fileName1 the first file name, may be null. 512 * @param fileName2 the second file name, may be null. 513 * @return true if the file names are equal, null equals null. 514 * @see IOCase#SYSTEM 515 */ 516 public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) { 517 return equals(fileName1, fileName2, true, IOCase.SYSTEM); 518 } 519 520 /** 521 * Checks whether two file names are equal using the case rules of the system. 522 * <p> 523 * No processing is performed on the file names other than comparison. 524 * The check is case-sensitive on Unix and case-insensitive on Windows. 525 * </p> 526 * 527 * @param fileName1 the first file name, may be null. 528 * @param fileName2 the second file name, may be null. 529 * @return true if the file names are equal, null equals null. 530 * @see IOCase#SYSTEM 531 */ 532 public static boolean equalsOnSystem(final String fileName1, final String fileName2) { 533 return equals(fileName1, fileName2, false, IOCase.SYSTEM); 534 } 535 536 /** 537 * Flips the Windows name separator to Linux and vice-versa. 538 * 539 * @param ch The Windows or Linux name separator. 540 * @return The Windows or Linux name separator. 541 */ 542 static char flipSeparator(final char ch) { 543 if (ch == UNIX_NAME_SEPARATOR) { 544 return WINDOWS_NAME_SEPARATOR; 545 } 546 if (ch == WINDOWS_NAME_SEPARATOR) { 547 return UNIX_NAME_SEPARATOR; 548 } 549 throw new IllegalArgumentException(String.valueOf(ch)); 550 } 551 552 /** 553 * Special handling for NTFS ADS: Don't accept colon in the file name. 554 * 555 * @param fileName a file name. 556 * @return ADS offsets. 557 */ 558 private static int getAdsCriticalOffset(final String fileName) { 559 // Step 1: Remove leading path segments. 560 final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR); 561 final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR); 562 if (offset1 == -1) { 563 if (offset2 == -1) { 564 return 0; 565 } 566 return offset2 + 1; 567 } 568 if (offset2 == -1) { 569 return offset1 + 1; 570 } 571 return Math.max(offset1, offset2) + 1; 572 } 573 574 /** 575 * Gets the base name, minus the full path and extension, from a full file name. 576 * <p> 577 * This method will handle a path in either Unix or Windows format. 578 * The text after the last forward or backslash and before the last period is returned. 579 * </p> 580 * <pre> 581 * a/b/c.txt --> c 582 * a\b\c.txt --> c 583 * a/b/c.foo.txt --> c.foo 584 * a.txt --> a 585 * a/b/c --> c 586 * a/b/c/ --> "" 587 * </pre> 588 * <p> 589 * The output will be the same irrespective of the machine that the code is running on. 590 * </p> 591 * 592 * @param fileName the file name, null returns null. 593 * @return the name of the file without the path, or an empty string if none exists. 594 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 595 */ 596 public static String getBaseName(final String fileName) { 597 return removeExtension(getName(fileName)); 598 } 599 600 /** 601 * Gets the extension of a file name. 602 * <p> 603 * This method returns the textual part of the file name after the last period. 604 * There must be no directory separator after the period. 605 * </p> 606 * <pre> 607 * foo.txt --> "txt" 608 * a/b/c.jpg --> "jpg" 609 * a/b.txt/c --> "" 610 * a/b/c --> "" 611 * </pre> 612 * <p> 613 * The output will be the same irrespective of the machine that the code is running on, with the 614 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 615 * </p> 616 * <p> 617 * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt". 618 * In this case, the name wouldn't be the name of a file, but the identifier of an 619 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 620 * ".txt" here, which would be misleading. Commons IO 2.7 and later throw 621 * an {@link IllegalArgumentException} for names like this. 622 * </p> 623 * 624 * @param fileName the file name to retrieve the extension of. 625 * @return the extension of the file or an empty string if none exists or {@code null} 626 * if the file name is {@code null}. 627 * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact, 628 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 629 */ 630 public static String getExtension(final String fileName) throws IllegalArgumentException { 631 if (fileName == null) { 632 return null; 633 } 634 final int index = indexOfExtension(fileName); 635 if (index == NOT_FOUND) { 636 return EMPTY_STRING; 637 } 638 return fileName.substring(index + 1); 639 } 640 641 /** 642 * Gets the full path (prefix + path) from a full file name. 643 * <p> 644 * This method will handle a file in either Unix or Windows format. 645 * The method is entirely text based, and returns the text before and 646 * including the last forward or backslash. 647 * </p> 648 * <pre> 649 * C:\a\b\c.txt --> C:\a\b\ 650 * ~/a/b/c.txt --> ~/a/b/ 651 * a.txt --> "" 652 * a/b/c --> a/b/ 653 * a/b/c/ --> a/b/c/ 654 * C: --> C: 655 * C:\ --> C:\ 656 * ~ --> ~/ 657 * ~/ --> ~/ 658 * ~user --> ~user/ 659 * ~user/ --> ~user/ 660 * </pre> 661 * <p> 662 * The output will be the same irrespective of the machine that the code is running on. 663 * </p> 664 * 665 * @param fileName the file name, null returns null. 666 * @return the path of the file, an empty string if none exists, null if invalid. 667 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}). 668 */ 669 public static String getFullPath(final String fileName) { 670 return doGetFullPath(fileName, true); 671 } 672 673 /** 674 * Gets the full path (prefix + path) from a full file name, 675 * excluding the final directory separator. 676 * <p> 677 * This method will handle a file in either Unix or Windows format. 678 * The method is entirely text based, and returns the text before the 679 * last forward or backslash. 680 * </p> 681 * <pre> 682 * C:\a\b\c.txt --> C:\a\b 683 * ~/a/b/c.txt --> ~/a/b 684 * a.txt --> "" 685 * a/b/c --> a/b 686 * a/b/c/ --> a/b/c 687 * C: --> C: 688 * C:\ --> C:\ 689 * ~ --> ~ 690 * ~/ --> ~ 691 * ~user --> ~user 692 * ~user/ --> ~user 693 * </pre> 694 * <p> 695 * The output will be the same irrespective of the machine that the code is running on. 696 * </p> 697 * 698 * @param fileName the file name, null returns null. 699 * @return the path of the file, an empty string if none exists, null if invalid. 700 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}). 701 */ 702 public static String getFullPathNoEndSeparator(final String fileName) { 703 return doGetFullPath(fileName, false); 704 } 705 706 /** 707 * Gets the name minus the path from a full file name. 708 * <p> 709 * This method will handle a file in either Unix or Windows format. 710 * The text after the last forward or backslash is returned. 711 * </p> 712 * <pre> 713 * a/b/c.txt --> c.txt 714 * a\b\c.txt --> c.txt 715 * a.txt --> a.txt 716 * a/b/c --> c 717 * a/b/c/ --> "" 718 * </pre> 719 * <p> 720 * The output will be the same irrespective of the machine that the code is running on. 721 * </p> 722 * 723 * @param fileName the file name, null returns null. 724 * @return the name of the file without the path, or an empty string if none exists. 725 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 726 */ 727 public static String getName(final String fileName) { 728 if (fileName == null) { 729 return null; 730 } 731 return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1); 732 } 733 734 /** 735 * Gets the path from a full file name, which excludes the prefix and the name. 736 * <p> 737 * This method will handle a file in either Unix or Windows format. 738 * The method is entirely text based, and returns the text before and 739 * including the last forward or backslash. 740 * </p> 741 * <pre> 742 * C:\a\b\c.txt --> a\b\ 743 * ~/a/b/c.txt --> a/b/ 744 * a.txt --> "" 745 * a/b/c --> a/b/ 746 * a/b/c/ --> a/b/c/ 747 * </pre> 748 * <p> 749 * The output will be the same irrespective of the machine that the code is running on. 750 * </p> 751 * <p> 752 * This method drops the prefix from the result. 753 * See {@link #getFullPath(String)} for the method that retains the prefix. 754 * </p> 755 * 756 * @param fileName the file name, null returns null. 757 * @return the path of the file, an empty string if none exists, null if invalid. 758 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}). 759 */ 760 public static String getPath(final String fileName) { 761 return doGetPath(fileName, 1); 762 } 763 764 /** 765 * Gets the path (which excludes the prefix) from a full file name, and 766 * also excluding the final directory separator. 767 * <p> 768 * This method will handle a file in either Unix or Windows format. 769 * The method is entirely text based, and returns the text before the 770 * last forward or backslash. 771 * </p> 772 * <pre> 773 * C:\a\b\c.txt --> a\b 774 * ~/a/b/c.txt --> a/b 775 * a.txt --> "" 776 * a/b/c --> a/b 777 * a/b/c/ --> a/b/c 778 * </pre> 779 * <p> 780 * The output will be the same irrespective of the machine that the code is running on. 781 * </p> 782 * <p> 783 * This method drops the prefix from the result. 784 * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix. 785 * </p> 786 * 787 * @param fileName the file name, null returns null. 788 * @return the path of the file, an empty string if none exists, null if invalid. 789 * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}). 790 */ 791 public static String getPathNoEndSeparator(final String fileName) { 792 return doGetPath(fileName, 0); 793 } 794 795 /** 796 * Gets the prefix such as {@code C:/} or {@code ~/} from a full file name, 797 * <p> 798 * This method will handle a file in either Unix or Windows format. 799 * The prefix includes the first slash in the full file name where applicable. 800 * </p> 801 * <pre> 802 * Windows: 803 * a\b\c.txt --> "" --> relative 804 * \a\b\c.txt --> "\" --> current drive absolute 805 * C:a\b\c.txt --> "C:" --> drive relative 806 * C:\a\b\c.txt --> "C:\" --> absolute 807 * \\server\a\b\c.txt --> "\\server\" --> UNC 808 * 809 * Unix: 810 * a/b/c.txt --> "" --> relative 811 * /a/b/c.txt --> "/" --> absolute 812 * ~/a/b/c.txt --> "~/" --> current user 813 * ~ --> "~/" --> current user (slash added) 814 * ~user/a/b/c.txt --> "~user/" --> named user 815 * ~user --> "~user/" --> named user (slash added) 816 * </pre> 817 * <p> 818 * The output will be the same irrespective of the machine that the code is running on. 819 * ie. both Unix and Windows prefixes are matched regardless. 820 * </p> 821 * 822 * @param fileName the file name, null returns null. 823 * @return the prefix of the file, null if invalid. 824 * @throws IllegalArgumentException if the result contains the null character ({@code U+0000}). 825 */ 826 public static String getPrefix(final String fileName) { 827 if (fileName == null) { 828 return null; 829 } 830 final int len = getPrefixLength(fileName); 831 if (len < 0) { 832 return null; 833 } 834 if (len > fileName.length()) { 835 requireNonNullChars(fileName); 836 return fileName + UNIX_NAME_SEPARATOR; 837 } 838 return requireNonNullChars(fileName.substring(0, len)); 839 } 840 841 /** 842 * Returns the length of the file name prefix, such as {@code C:/} or {@code ~/}. 843 * <p> 844 * This method will handle a file in either Unix or Windows format. 845 * </p> 846 * <p> 847 * The prefix length includes the first slash in the full file name 848 * if applicable. Thus, it is possible that the length returned is greater 849 * than the length of the input string. 850 * </p> 851 * <pre> 852 * Windows: 853 * a\b\c.txt --> 0 --> relative 854 * \a\b\c.txt --> 1 --> current drive absolute 855 * C:a\b\c.txt --> 2 --> drive relative 856 * C:\a\b\c.txt --> 3 --> absolute 857 * \\server\a\b\c.txt --> 9 --> UNC 858 * \\\a\b\c.txt --> -1 --> error 859 * 860 * Unix: 861 * a/b/c.txt --> 0 --> relative 862 * /a/b/c.txt --> 1 --> absolute 863 * ~/a/b/c.txt --> 2 --> current user 864 * ~ --> 2 --> current user (slash added) 865 * ~user/a/b/c.txt --> 6 --> named user 866 * ~user --> 6 --> named user (slash added) 867 * //server/a/b/c.txt --> 9 868 * ///a/b/c.txt --> -1 --> error 869 * C: --> 0 --> valid file name as only null character and / are reserved characters 870 * </pre> 871 * <p> 872 * The output will be the same irrespective of the machine that the code is running on. 873 * ie. both Unix and Windows prefixes are matched regardless. 874 * </p> 875 * <p> 876 * Note that a leading // (or \\) is used to indicate a UNC name on Windows. 877 * These must be followed by a server name, so double-slashes are not collapsed 878 * to a single slash at the start of the file name. 879 * </p> 880 * 881 * @param fileName the file name to find the prefix in, null returns -1. 882 * @return the length of the prefix, -1 if invalid or null. 883 */ 884 public static int getPrefixLength(final String fileName) { 885 if (fileName == null) { 886 return NOT_FOUND; 887 } 888 final int len = fileName.length(); 889 if (len == 0) { 890 return 0; 891 } 892 char ch0 = fileName.charAt(0); 893 if (ch0 == ':') { 894 return NOT_FOUND; 895 } 896 if (len == 1) { 897 if (ch0 == '~') { 898 return 2; // return a length greater than the input 899 } 900 return isSeparator(ch0) ? 1 : 0; 901 } 902 if (ch0 == '~') { 903 int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1); 904 int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1); 905 if (posUnix == NOT_FOUND && posWin == NOT_FOUND) { 906 return len + 1; // return a length greater than the input 907 } 908 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 909 posWin = posWin == NOT_FOUND ? posUnix : posWin; 910 return Math.min(posUnix, posWin) + 1; 911 } 912 final char ch1 = fileName.charAt(1); 913 if (ch1 == ':') { 914 ch0 = Character.toUpperCase(ch0); 915 if (ch0 >= 'A' && ch0 <= 'Z') { 916 if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) { 917 return 0; 918 } 919 if (len == 2 || !isSeparator(fileName.charAt(2))) { 920 return 2; 921 } 922 return 3; 923 } 924 if (ch0 == UNIX_NAME_SEPARATOR) { 925 return 1; 926 } 927 return NOT_FOUND; 928 929 } 930 if (!isSeparator(ch0) || !isSeparator(ch1)) { 931 return isSeparator(ch0) ? 1 : 0; 932 } 933 int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2); 934 int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2); 935 if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) { 936 return NOT_FOUND; 937 } 938 posUnix = posUnix == NOT_FOUND ? posWin : posUnix; 939 posWin = posWin == NOT_FOUND ? posUnix : posWin; 940 final int pos = Math.min(posUnix, posWin) + 1; 941 final String hostnamePart = fileName.substring(2, pos - 1); 942 return isValidHostName(hostnamePart) ? pos : NOT_FOUND; 943 } 944 945 /** 946 * Returns the index of the last extension separator character, which is a period. 947 * <p> 948 * This method also checks that there is no directory separator after the last period. To do this it uses 949 * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format. 950 * </p> 951 * <p> 952 * The output will be the same irrespective of the machine that the code is running on, with the 953 * exception of a possible {@link IllegalArgumentException} on Windows (see below). 954 * </p> 955 * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt". 956 * In this case, the name wouldn't be the name of a file, but the identifier of an 957 * alternate data stream (bar.txt) on the file foo.exe. The method used to return 958 * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing 959 * an {@link IllegalArgumentException} for names like this. 960 * 961 * @param fileName 962 * the file name to find the last extension separator in, null returns -1. 963 * @return the index of the last extension separator character, or -1 if there is no such character. 964 * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact, 965 * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt". 966 */ 967 public static int indexOfExtension(final String fileName) throws IllegalArgumentException { 968 if (fileName == null) { 969 return NOT_FOUND; 970 } 971 if (isSystemWindows()) { 972 // Special handling for NTFS ADS: Don't accept colon in the file name. 973 final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName)); 974 if (offset != -1) { 975 throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden."); 976 } 977 } 978 final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR); 979 final int lastSeparator = indexOfLastSeparator(fileName); 980 return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; 981 } 982 983 /** 984 * Returns the index of the last directory separator character. 985 * <p> 986 * This method will handle a file in either Unix or Windows format. 987 * The position of the last forward or backslash is returned. 988 * <p> 989 * The output will be the same irrespective of the machine that the code is running on. 990 * 991 * @param fileName the file name to find the last path separator in, null returns -1. 992 * @return the index of the last separator character, or -1 if there 993 * is no such character. 994 */ 995 public static int indexOfLastSeparator(final String fileName) { 996 if (fileName == null) { 997 return NOT_FOUND; 998 } 999 final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR); 1000 final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR); 1001 return Math.max(lastUnixPos, lastWindowsPos); 1002 } 1003 1004 private static boolean isEmpty(final String string) { 1005 return string == null || string.isEmpty(); 1006 } 1007 1008 /** 1009 * Checks whether the extension of the file name is one of those specified. 1010 * <p> 1011 * This method obtains the extension as the textual part of the file name 1012 * after the last period. There must be no directory separator after the period. 1013 * The extension check is case-sensitive on all platforms. 1014 * 1015 * @param fileName the file name, null returns false. 1016 * @param extensions the extensions to check for, null checks for no extension. 1017 * @return true if the file name is one of the extensions. 1018 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 1019 */ 1020 public static boolean isExtension(final String fileName, final Collection<String> extensions) { 1021 if (fileName == null) { 1022 return false; 1023 } 1024 requireNonNullChars(fileName); 1025 if (extensions == null || extensions.isEmpty()) { 1026 return indexOfExtension(fileName) == NOT_FOUND; 1027 } 1028 return extensions.contains(getExtension(fileName)); 1029 } 1030 1031 /** 1032 * Checks whether the extension of the file name is that specified. 1033 * <p> 1034 * This method obtains the extension as the textual part of the file name 1035 * after the last period. There must be no directory separator after the period. 1036 * The extension check is case-sensitive on all platforms. 1037 * 1038 * @param fileName the file name, null returns false. 1039 * @param extension the extension to check for, null or empty checks for no extension. 1040 * @return true if the file name has the specified extension. 1041 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 1042 */ 1043 public static boolean isExtension(final String fileName, final String extension) { 1044 if (fileName == null) { 1045 return false; 1046 } 1047 requireNonNullChars(fileName); 1048 if (isEmpty(extension)) { 1049 return indexOfExtension(fileName) == NOT_FOUND; 1050 } 1051 return getExtension(fileName).equals(extension); 1052 } 1053 1054 /** 1055 * Checks whether the extension of the file name is one of those specified. 1056 * <p> 1057 * This method obtains the extension as the textual part of the file name 1058 * after the last period. There must be no directory separator after the period. 1059 * The extension check is case-sensitive on all platforms. 1060 * 1061 * @param fileName the file name, null returns false. 1062 * @param extensions the extensions to check for, null checks for no extension. 1063 * @return true if the file name is one of the extensions. 1064 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 1065 */ 1066 public static boolean isExtension(final String fileName, final String... extensions) { 1067 if (fileName == null) { 1068 return false; 1069 } 1070 requireNonNullChars(fileName); 1071 1072 if (extensions == null || extensions.length == 0) { 1073 return indexOfExtension(fileName) == NOT_FOUND; 1074 } 1075 final String fileExt = getExtension(fileName); 1076 return Stream.of(extensions).anyMatch(fileExt::equals); 1077 } 1078 1079 /** 1080 * Checks whether a given string represents a valid IPv4 address. 1081 * 1082 * @param name the name to validate. 1083 * @return true if the given name is a valid IPv4 address. 1084 */ 1085 // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address 1086 private static boolean isIPv4Address(final String name) { 1087 final Matcher m = IPV4_PATTERN.matcher(name); 1088 if (!m.matches() || m.groupCount() != 4) { 1089 return false; 1090 } 1091 // verify that address subgroups are legal 1092 for (int i = 1; i <= 4; i++) { 1093 final String ipSegment = m.group(i); 1094 final int iIpSegment = Integer.parseInt(ipSegment); 1095 if (iIpSegment > IPV4_MAX_OCTET_VALUE || ipSegment.length() > 1 && ipSegment.startsWith("0")) { 1096 return false; 1097 } 1098 } 1099 return true; 1100 } 1101 1102 // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address 1103 /** 1104 * Checks whether a given string represents a valid IPv6 address. 1105 * 1106 * @param inet6Address the name to validate. 1107 * @return true if the given name is a valid IPv6 address. 1108 */ 1109 private static boolean isIPv6Address(final String inet6Address) { 1110 final boolean containsCompressedZeroes = inet6Address.contains("::"); 1111 if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) { 1112 return false; 1113 } 1114 if (inet6Address.startsWith(":") && !inet6Address.startsWith("::") 1115 || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) { 1116 return false; 1117 } 1118 String[] octets = inet6Address.split(":"); 1119 if (containsCompressedZeroes) { 1120 final List<String> octetList = new ArrayList<>(Arrays.asList(octets)); 1121 if (inet6Address.endsWith("::")) { 1122 // String.split() drops ending empty segments 1123 octetList.add(""); 1124 } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) { 1125 octetList.remove(0); 1126 } 1127 octets = octetList.toArray(EMPTY_STRING_ARRAY); 1128 } 1129 if (octets.length > IPV6_MAX_HEX_GROUPS) { 1130 return false; 1131 } 1132 int validOctets = 0; 1133 int emptyOctets = 0; // consecutive empty chunks 1134 for (int index = 0; index < octets.length; index++) { 1135 final String octet = octets[index]; 1136 if (octet.isEmpty()) { 1137 emptyOctets++; 1138 if (emptyOctets > 1) { 1139 return false; 1140 } 1141 } else { 1142 emptyOctets = 0; 1143 // Is last chunk an IPv4 address? 1144 if (index == octets.length - 1 && octet.contains(".")) { 1145 if (!isIPv4Address(octet)) { 1146 return false; 1147 } 1148 validOctets += 2; 1149 continue; 1150 } 1151 if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) { 1152 return false; 1153 } 1154 final int octetInt; 1155 try { 1156 octetInt = Integer.parseInt(octet, BASE_16); 1157 } catch (final NumberFormatException e) { 1158 return false; 1159 } 1160 if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) { 1161 return false; 1162 } 1163 } 1164 validOctets++; 1165 } 1166 return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes); 1167 } 1168 1169 /** 1170 * Checks whether a given string is a valid host name according to 1171 * RFC 3986 - not accepting IP addresses. 1172 * 1173 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1174 * @param name the hostname to validate. 1175 * @return true if the given name is a valid host name. 1176 */ 1177 private static boolean isRFC3986HostName(final String name) { 1178 final String[] parts = name.split("\\.", -1); 1179 for (int i = 0; i < parts.length; i++) { 1180 if (parts[i].isEmpty()) { 1181 // trailing period is legal, otherwise we've hit a .. sequence 1182 return i == parts.length - 1; 1183 } 1184 if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) { 1185 return false; 1186 } 1187 } 1188 return true; 1189 } 1190 1191 /** 1192 * Checks if the character is a separator. 1193 * 1194 * @param ch the character to check. 1195 * @return true if it is a separator character. 1196 */ 1197 private static boolean isSeparator(final char ch) { 1198 return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR; 1199 } 1200 1201 /** 1202 * Determines if Windows file system is in use. 1203 * 1204 * @return true if the system is Windows. 1205 */ 1206 static boolean isSystemWindows() { 1207 return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR; 1208 } 1209 1210 /** 1211 * Checks whether a given string is a valid host name according to 1212 * RFC 3986. 1213 * 1214 * <p>Accepted are IP addresses (v4 and v6) as well as what the 1215 * RFC calls a "reg-name". Percent encoded names don't seem to be 1216 * valid names in UNC paths.</p> 1217 * 1218 * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2" 1219 * @param name the hostname to validate. 1220 * @return true if the given name is a valid host name. 1221 */ 1222 private static boolean isValidHostName(final String name) { 1223 return isIPv6Address(name) || isRFC3986HostName(name); 1224 } 1225 1226 /** 1227 * Normalizes a path, removing double and single period path steps. 1228 * <p> 1229 * This method normalizes a path to a standard format. 1230 * The input may contain separators in either Unix or Windows format. 1231 * The output will contain separators in the format of the system. 1232 * <p> 1233 * A trailing slash will be retained. 1234 * A double slash will be merged to a single slash (but UNC names are handled). 1235 * A single period path segment will be removed. 1236 * A double period will cause that path segment and the one before to be removed. 1237 * If the double period has no parent path segment, {@code null} is returned. 1238 * <p> 1239 * The output will be the same on both Unix and Windows except 1240 * for the separator character. 1241 * <pre> 1242 * /foo// --> /foo/ 1243 * /foo/./ --> /foo/ 1244 * /foo/../bar --> /bar 1245 * /foo/../bar/ --> /bar/ 1246 * /foo/../bar/../baz --> /baz 1247 * //foo//./bar --> //foo/bar 1248 * /../ --> null 1249 * ../foo --> null 1250 * foo/bar/.. --> foo/ 1251 * foo/../../bar --> null 1252 * foo/../bar --> bar 1253 * //server/foo/../bar --> //server/bar 1254 * //server/../bar --> null 1255 * C:\foo\..\bar --> C:\bar 1256 * C:\..\bar --> null 1257 * ~/foo/../bar/ --> ~/bar/ 1258 * ~/../bar --> null 1259 * </pre> 1260 * (Note the file separator will be correct for Windows/Unix.) 1261 * 1262 * @param fileName the file name to normalize, null returns null. 1263 * @return the normalized fileName, or null if invalid. 1264 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 1265 */ 1266 public static String normalize(final String fileName) { 1267 return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true); 1268 } 1269 1270 /** 1271 * Normalizes a path, removing double and single period path steps. 1272 * <p> 1273 * This method normalizes a path to a standard format. 1274 * The input may contain separators in either Unix or Windows format. 1275 * The output will contain separators in the format specified. 1276 * <p> 1277 * A trailing slash will be retained. 1278 * A double slash will be merged to a single slash (but UNC names are handled). 1279 * A single period path segment will be removed. 1280 * A double period will cause that path segment and the one before to be removed. 1281 * If the double period has no parent path segment to work with, {@code null} 1282 * is returned. 1283 * <p> 1284 * The output will be the same on both Unix and Windows except 1285 * for the separator character. 1286 * <pre> 1287 * /foo// --> /foo/ 1288 * /foo/./ --> /foo/ 1289 * /foo/../bar --> /bar 1290 * /foo/../bar/ --> /bar/ 1291 * /foo/../bar/../baz --> /baz 1292 * //foo//./bar --> /foo/bar 1293 * /../ --> null 1294 * ../foo --> null 1295 * foo/bar/.. --> foo/ 1296 * foo/../../bar --> null 1297 * foo/../bar --> bar 1298 * //server/foo/../bar --> //server/bar 1299 * //server/../bar --> null 1300 * C:\foo\..\bar --> C:\bar 1301 * C:\..\bar --> null 1302 * ~/foo/../bar/ --> ~/bar/ 1303 * ~/../bar --> null 1304 * </pre> 1305 * The output will be the same on both Unix and Windows including 1306 * the separator character. 1307 * 1308 * @param fileName the file name to normalize, null returns null. 1309 * @param unixSeparator {@code true} if a Unix separator should 1310 * be used or {@code false} if a Windows separator should be used. 1311 * @return the normalized fileName, or null if invalid. 1312 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 1313 * @since 2.0 1314 */ 1315 public static String normalize(final String fileName, final boolean unixSeparator) { 1316 return doNormalize(fileName, toSeparator(unixSeparator), true); 1317 } 1318 1319 /** 1320 * Normalizes a path, removing double and single period path steps, 1321 * and removing any final directory separator. 1322 * <p> 1323 * This method normalizes a path to a standard format. 1324 * The input may contain separators in either Unix or Windows format. 1325 * The output will contain separators in the format of the system. 1326 * <p> 1327 * A trailing slash will be removed. 1328 * A double slash will be merged to a single slash (but UNC names are handled). 1329 * A single period path segment will be removed. 1330 * A double period will cause that path segment and the one before to be removed. 1331 * If the double period has no parent path segment to work with, {@code null} 1332 * is returned. 1333 * <p> 1334 * The output will be the same on both Unix and Windows except 1335 * for the separator character. 1336 * <pre> 1337 * /foo// --> /foo 1338 * /foo/./ --> /foo 1339 * /foo/../bar --> /bar 1340 * /foo/../bar/ --> /bar 1341 * /foo/../bar/../baz --> /baz 1342 * //foo//./bar --> /foo/bar 1343 * /../ --> null 1344 * ../foo --> null 1345 * foo/bar/.. --> foo 1346 * foo/../../bar --> null 1347 * foo/../bar --> bar 1348 * //server/foo/../bar --> //server/bar 1349 * //server/../bar --> null 1350 * C:\foo\..\bar --> C:\bar 1351 * C:\..\bar --> null 1352 * ~/foo/../bar/ --> ~/bar 1353 * ~/../bar --> null 1354 * </pre> 1355 * (Note the file separator returned will be correct for Windows/Unix) 1356 * 1357 * @param fileName the file name to normalize, null returns null. 1358 * @return the normalized fileName, or null if invalid. 1359 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 1360 */ 1361 public static String normalizeNoEndSeparator(final String fileName) { 1362 return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false); 1363 } 1364 1365 /** 1366 * Normalizes a path, removing double and single period path steps, 1367 * and removing any final directory separator. 1368 * <p> 1369 * This method normalizes a path to a standard format. 1370 * The input may contain separators in either Unix or Windows format. 1371 * The output will contain separators in the format specified. 1372 * <p> 1373 * A trailing slash will be removed. 1374 * A double slash will be merged to a single slash (but UNC names are handled). 1375 * A single period path segment will be removed. 1376 * A double period will cause that path segment and the one before to be removed. 1377 * If the double period has no parent path segment to work with, {@code null} 1378 * is returned. 1379 * <p> 1380 * The output will be the same on both Unix and Windows including 1381 * the separator character. 1382 * <pre> 1383 * /foo// --> /foo 1384 * /foo/./ --> /foo 1385 * /foo/../bar --> /bar 1386 * /foo/../bar/ --> /bar 1387 * /foo/../bar/../baz --> /baz 1388 * //foo//./bar --> /foo/bar 1389 * /../ --> null 1390 * ../foo --> null 1391 * foo/bar/.. --> foo 1392 * foo/../../bar --> null 1393 * foo/../bar --> bar 1394 * //server/foo/../bar --> //server/bar 1395 * //server/../bar --> null 1396 * C:\foo\..\bar --> C:\bar 1397 * C:\..\bar --> null 1398 * ~/foo/../bar/ --> ~/bar 1399 * ~/../bar --> null 1400 * </pre> 1401 * 1402 * @param fileName the file name to normalize, null returns null. 1403 * @param unixSeparator {@code true} if a Unix separator should 1404 * be used or {@code false} if a Windows separator should be used. 1405 * @return the normalized fileName, or null if invalid. 1406 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 1407 * @since 2.0 1408 */ 1409 public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) { 1410 return doNormalize(fileName, toSeparator(unixSeparator), false); 1411 } 1412 1413 /** 1414 * Removes the extension from a fileName. 1415 * <p> 1416 * This method returns the textual part of the file name before the last period. 1417 * There must be no directory separator after the period. 1418 * <pre> 1419 * foo.txt --> foo 1420 * .txt --> "" (empty string) 1421 * a\b\c.jpg --> a\b\c 1422 * /a/b/c.jpg --> /a/b/c 1423 * a\b\c --> a\b\c 1424 * a.b\c --> a.b\c 1425 * </pre> 1426 * <p> 1427 * The output will be the same irrespective of the machine that the code is running on. 1428 * 1429 * @param fileName the file name, null returns null. 1430 * @return the file name minus the extension. 1431 * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}). 1432 */ 1433 public static String removeExtension(final String fileName) { 1434 if (fileName == null) { 1435 return null; 1436 } 1437 requireNonNullChars(fileName); 1438 final int index = indexOfExtension(fileName); 1439 if (index == NOT_FOUND) { 1440 return fileName; 1441 } 1442 return fileName.substring(0, index); 1443 } 1444 1445 /** 1446 * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions. 1447 * 1448 * This may be used to defend against poison byte attacks. 1449 * 1450 * @param path the path to check. 1451 * @return The input. 1452 * @throws IllegalArgumentException if path contains the null character ({@code U+0000}). 1453 */ 1454 private static String requireNonNullChars(final String path) { 1455 if (path.indexOf(0) >= 0) { 1456 throw new IllegalArgumentException( 1457 "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it"); 1458 } 1459 return path; 1460 } 1461 1462 /** 1463 * Converts all separators to the system separator. 1464 * 1465 * @param path the path to be changed, null ignored. 1466 * @return the updated path. 1467 */ 1468 public static String separatorsToSystem(final String path) { 1469 return FileSystem.getCurrent().normalizeSeparators(path); 1470 } 1471 1472 /** 1473 * Converts all separators to the Unix separator of forward slash. 1474 * 1475 * @param path the path to be changed, null ignored. 1476 * @return the new path. 1477 */ 1478 public static String separatorsToUnix(final String path) { 1479 return FileSystem.LINUX.normalizeSeparators(path); 1480 } 1481 1482 /** 1483 * Converts all separators to the Windows separator of backslash. 1484 * 1485 * @param path the path to be changed, null ignored. 1486 * @return the updated path. 1487 */ 1488 public static String separatorsToWindows(final String path) { 1489 return FileSystem.WINDOWS.normalizeSeparators(path); 1490 } 1491 1492 /** 1493 * Splits a string into a number of tokens. 1494 * The text is split by '?' and '*'. 1495 * Where multiple '*' occur consecutively they are collapsed into a single '*'. 1496 * 1497 * @param text the text to split. 1498 * @return the array of tokens, never null. 1499 */ 1500 static String[] splitOnTokens(final String text) { 1501 // used by wildcardMatch 1502 // package level so a unit test may run on this 1503 if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) { 1504 return new String[] { text }; 1505 } 1506 final char[] array = text.toCharArray(); 1507 final ArrayList<String> list = new ArrayList<>(); 1508 final StringBuilder buffer = new StringBuilder(); 1509 char prevChar = 0; 1510 for (final char ch : array) { 1511 if (ch == '?' || ch == '*') { 1512 if (buffer.length() != 0) { 1513 list.add(buffer.toString()); 1514 buffer.setLength(0); 1515 } 1516 if (ch == '?') { 1517 list.add("?"); 1518 } else if (prevChar != '*') { // ch == '*' here; check if previous char was '*' 1519 list.add("*"); 1520 } 1521 } else { 1522 buffer.append(ch); 1523 } 1524 prevChar = ch; 1525 } 1526 if (buffer.length() != 0) { 1527 list.add(buffer.toString()); 1528 } 1529 return list.toArray(EMPTY_STRING_ARRAY); 1530 } 1531 1532 /** 1533 * Returns '/' if given true, '\\' otherwise. 1534 * 1535 * @param unixSeparator which separator to return. 1536 * @return '/' if given true, '\\' otherwise. 1537 */ 1538 private static char toSeparator(final boolean unixSeparator) { 1539 return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR; 1540 } 1541 1542 /** 1543 * Checks a fileName to see if it matches the specified wildcard matcher, 1544 * always testing case-sensitive. 1545 * <p> 1546 * The wildcard matcher uses the characters '?' and '*' to represent a 1547 * single or multiple (zero or more) wildcard characters. 1548 * This is the same as often found on DOS/Unix command lines. 1549 * The check is case-sensitive always. 1550 * <pre> 1551 * wildcardMatch("c.txt", "*.txt") --> true 1552 * wildcardMatch("c.txt", "*.jpg") --> false 1553 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1554 * wildcardMatch("c.txt", "*.???") --> true 1555 * wildcardMatch("c.txt", "*.????") --> false 1556 * </pre> 1557 * The sequence "*?" does not work properly at present in match strings. 1558 * 1559 * @param fileName the file name to match on, may be null. 1560 * @param wildcardMatcher the wildcard string to match against, may be null. 1561 * @return true if the file name matches the wildcard string. 1562 * @see IOCase#SENSITIVE 1563 */ 1564 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) { 1565 return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE); 1566 } 1567 1568 /** 1569 * Checks a fileName to see if it matches the specified wildcard matcher 1570 * allowing control over case-sensitivity. 1571 * <p> 1572 * The wildcard matcher uses the characters '?' and '*' to represent a 1573 * single or multiple (zero or more) wildcard characters. 1574 * The sequence "*?" does not work properly at present in match strings. 1575 * 1576 * @param fileName the file name to match on, may be null. 1577 * @param wildcardMatcher the wildcard string to match against, may be null. 1578 * @param ioCase what case sensitivity rule to use, null means case-sensitive. 1579 * @return true if the file name matches the wildcard string. 1580 * @since 1.3 1581 */ 1582 public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) { 1583 if (fileName == null && wildcardMatcher == null) { 1584 return true; 1585 } 1586 if (fileName == null || wildcardMatcher == null) { 1587 return false; 1588 } 1589 ioCase = IOCase.value(ioCase, IOCase.SENSITIVE); 1590 final String[] wcs = splitOnTokens(wildcardMatcher); 1591 boolean anyChars = false; 1592 int textIdx = 0; 1593 int wcsIdx = 0; 1594 final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length); 1595 // loop around a backtrack stack, to handle complex * matching 1596 do { 1597 if (!backtrack.isEmpty()) { 1598 final int[] array = backtrack.pop(); 1599 wcsIdx = array[0]; 1600 textIdx = array[1]; 1601 anyChars = true; 1602 } 1603 // loop whilst tokens and text left to process 1604 while (wcsIdx < wcs.length) { 1605 if (wcs[wcsIdx].equals("?")) { 1606 // ? so move to next text char 1607 textIdx++; 1608 if (textIdx > fileName.length()) { 1609 break; 1610 } 1611 anyChars = false; 1612 } else if (wcs[wcsIdx].equals("*")) { 1613 // set any chars status 1614 anyChars = true; 1615 if (wcsIdx == wcs.length - 1) { 1616 textIdx = fileName.length(); 1617 } 1618 } else { 1619 // matching text token 1620 if (anyChars) { 1621 // any chars then try to locate text token 1622 textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]); 1623 if (textIdx == NOT_FOUND) { 1624 // token not found 1625 break; 1626 } 1627 final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]); 1628 if (repeat >= 0) { 1629 backtrack.push(new int[] { wcsIdx, repeat }); 1630 } 1631 } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) { 1632 // matching from current position 1633 // couldn't match token 1634 break; 1635 } 1636 // matched text token, move text index to end of matched token 1637 textIdx += wcs[wcsIdx].length(); 1638 anyChars = false; 1639 } 1640 wcsIdx++; 1641 } 1642 // full match 1643 if (wcsIdx == wcs.length && textIdx == fileName.length()) { 1644 return true; 1645 } 1646 } while (!backtrack.isEmpty()); 1647 return false; 1648 } 1649 1650 /** 1651 * Checks a fileName to see if it matches the specified wildcard matcher 1652 * using the case rules of the system. 1653 * <p> 1654 * The wildcard matcher uses the characters '?' and '*' to represent a 1655 * single or multiple (zero or more) wildcard characters. 1656 * This is the same as often found on DOS/Unix command lines. 1657 * The check is case-sensitive on Unix and case-insensitive on Windows. 1658 * <pre> 1659 * wildcardMatch("c.txt", "*.txt") --> true 1660 * wildcardMatch("c.txt", "*.jpg") --> false 1661 * wildcardMatch("a/b/c.txt", "a/b/*") --> true 1662 * wildcardMatch("c.txt", "*.???") --> true 1663 * wildcardMatch("c.txt", "*.????") --> false 1664 * </pre> 1665 * The sequence "*?" does not work properly at present in match strings. 1666 * 1667 * @param fileName the file name to match on. 1668 * @param wildcardMatcher the wildcard string to match against. 1669 * @return true if the file name matches the wildcard string. 1670 * @see IOCase#SYSTEM 1671 */ 1672 public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) { 1673 return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM); 1674 } 1675 1676 /** 1677 * Instances should NOT be constructed in standard programming. 1678 * 1679 * @deprecated TODO Make private in 3.0. 1680 */ 1681 @Deprecated 1682 public FilenameUtils() { 1683 // empty 1684 } 1685}