001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.File;
020import java.util.ArrayDeque;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.Deque;
025import java.util.List;
026import java.util.regex.Matcher;
027import java.util.regex.Pattern;
028import java.util.stream.Stream;
029
030/**
031 * General file name and file path manipulation utilities. The methods in this class
032 * operate on strings that represent relative or absolute paths. Nothing in this class
033 * ever accesses the file system, or depends on whether a path points to a file that exists.
034 * <p>
035 * When dealing with file names, you can hit problems when moving from a Windows
036 * based development machine to a Unix based production machine.
037 * This class aims to help avoid those problems.
038 * </p>
039 * <p>
040 * <strong>NOTE</strong>: You may be able to avoid using this class entirely simply by
041 * using JDK {@link File File} objects and the two argument constructor
042 * {@link File#File(java.io.File, String) File(File,String)}.
043 * </p>
044 * <p>
045 * Most methods in this class are designed to work the same on both Unix and Windows.
046 * Those that don't include 'System', 'Unix', or 'Windows' in their name.
047 * </p>
048 * <p>
049 * Most methods recognize both separators (forward and backslashes), and both
050 * sets of prefixes. See the Javadoc of each method for details.
051 * </p>
052 * <p>
053 * This class defines six components within a path (sometimes called a file name or a full file name).
054 * Given an absolute Windows path such as C:\dev\project\file.txt they are:
055 * </p>
056 * <ul>
057 * <li>the full file name, or just file name - C:\dev\project\file.txt</li>
058 * <li>the prefix - C:\</li>
059 * <li>the path - dev\project\</li>
060 * <li>the full path - C:\dev\project\</li>
061 * <li>the name - file.txt</li>
062 * <li>the base name - file</li>
063 * <li>the extension - txt</li>
064 * </ul>
065 * <p>
066 * Given an absolute Unix path such as /dev/project/file.txt they are:
067 * </p>
068 * <ul>
069 * <li>the full file name, or just file name - /dev/project/file.txt</li>
070 * <li>the prefix - /</li>
071 * <li>the path - dev/project</li>
072 * <li>the full path - /dev/project</li>
073 * <li>the name - file.txt</li>
074 * <li>the base name - file</li>
075 * <li>the extension - txt</li>
076 * </ul>
077 * <p>
078 * Given a relative Windows path such as dev\project\file.txt they are:
079 * </p>
080 * <ul>
081 * <li>the full file name, or just file name - dev\project\file.txt</li>
082 * <li>the prefix - null</li>
083 * <li>the path - dev\project\</li>
084 * <li>the full path - dev\project\</li>
085 * <li>the name - file.txt</li>
086 * <li>the base name - file</li>
087 * <li>the extension - txt</li>
088 * </ul>
089 * <p>
090 * Given an absolute Unix path such as /dev/project/file.txt they are:
091 * </p>
092 * <ul>
093 * <li>the full path, full file name, or just file name - /dev/project/file.txt</li>
094 * <li>the prefix - /</li>
095 * <li>the path - dev/project</li>
096 * <li>the full path - /dev/project</li>
097 * <li>the name - file.txt</li>
098 * <li>the base name - file</li>
099 * <li>the extension - txt</li>
100 * </ul>
101 *
102 *
103 * <p>
104 * This class works best if directory names end with a separator.
105 * If you omit the last separator, it is impossible to determine if the last component
106 * corresponds to a file or a directory. This class treats final components
107 * that do not end with a separator as files, not directories.
108 * </p>
109 * <p>
110 * This class only supports Unix and Windows style names.
111 * Prefixes are matched as follows:
112 * </p>
113 * <pre>
114 * Windows:
115 * a\b\c.txt           --&gt; ""          --&gt; relative
116 * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
117 * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
118 * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
119 * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
120 *
121 * Unix:
122 * a/b/c.txt           --&gt; ""          --&gt; relative
123 * /a/b/c.txt          --&gt; "/"         --&gt; absolute
124 * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
125 * ~                   --&gt; "~/"        --&gt; current user (slash added)
126 * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
127 * ~user               --&gt; "~user/"    --&gt; named user (slash added)
128 * </pre>
129 * <p>
130 * Both prefix styles are matched, irrespective of the machine that you are
131 * currently running on.
132 * </p>
133 *
134 * @since 1.1
135 */
136public class FilenameUtils {
137
138    private static final String[] EMPTY_STRING_ARRAY = {};
139
140    private static final String EMPTY_STRING = "";
141
142    private static final int NOT_FOUND = -1;
143
144    /**
145     * The extension separator character.
146     *
147     * @since 1.4
148     */
149    public static final char EXTENSION_SEPARATOR = '.';
150
151    /**
152     * The extension separator String.
153     *
154     * @since 1.4
155     */
156    public static final String EXTENSION_SEPARATOR_STR = Character.toString(EXTENSION_SEPARATOR);
157
158    /**
159     * The Unix separator character.
160     */
161    private static final char UNIX_NAME_SEPARATOR = '/';
162
163    /**
164     * The Windows separator character.
165     */
166    private static final char WINDOWS_NAME_SEPARATOR = '\\';
167
168    /**
169     * The system separator character.
170     */
171    private static final char SYSTEM_NAME_SEPARATOR = File.separatorChar;
172
173    /**
174     * The separator character that is the opposite of the system separator.
175     */
176    private static final char OTHER_SEPARATOR = flipSeparator(SYSTEM_NAME_SEPARATOR);
177
178    private static final Pattern IPV4_PATTERN = Pattern.compile("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$");
179
180    private static final int IPV4_MAX_OCTET_VALUE = 255;
181
182    private static final int IPV6_MAX_HEX_GROUPS = 8;
183
184    private static final int IPV6_MAX_HEX_DIGITS_PER_GROUP = 4;
185
186    private static final int MAX_UNSIGNED_SHORT = 0xffff;
187
188    private static final int BASE_16 = 16;
189
190    private static final Pattern REG_NAME_PART_PATTERN = Pattern.compile("^[a-zA-Z0-9][a-zA-Z0-9-]*$");
191
192    /**
193     * Concatenates a fileName to a base path using normal command line style rules.
194     * <p>
195     * The effect is equivalent to resultant directory after changing
196     * directory to the first argument, followed by changing directory to
197     * the second argument.
198     * </p>
199     * <p>
200     * The first argument is the base path, the second is the path to concatenate.
201     * The returned path is always normalized via {@link #normalize(String)},
202     * thus {@code ..} is handled.
203     * </p>
204     * <p>
205     * If {@code pathToAdd} is absolute (has an absolute prefix), then
206     * it will be normalized and returned.
207     * Otherwise, the paths will be joined, normalized and returned.
208     * </p>
209     * <p>
210     * The output will be the same on both Unix and Windows except
211     * for the separator character.
212     * </p>
213     * <pre>
214     * /foo/      + bar        --&gt;  /foo/bar
215     * /foo       + bar        --&gt;  /foo/bar
216     * /foo       + /bar       --&gt;  /bar
217     * /foo       + C:/bar     --&gt;  C:/bar
218     * /foo       + C:bar      --&gt;  C:bar [1]
219     * /foo/a/    + ../bar     --&gt;  /foo/bar
220     * /foo/      + ../../bar  --&gt;  null
221     * /foo/      + /bar       --&gt;  /bar
222     * /foo/..    + /bar       --&gt;  /bar
223     * /foo       + bar/c.txt  --&gt;  /foo/bar/c.txt
224     * /foo/c.txt + bar        --&gt;  /foo/c.txt/bar [2]
225     * </pre>
226     * <p>
227     * [1] Note that the Windows relative drive prefix is unreliable when
228     * used with this method.
229     * </p>
230     * <p>
231     * [2] Note that the first parameter must be a path. If it ends with a name, then
232     * the name will be built into the concatenated path. If this might be a problem,
233     * use {@link #getFullPath(String)} on the base path argument.
234     * </p>
235     *
236     * @param basePath  the base path to attach to, always treated as a path.
237     * @param fullFileNameToAdd  the file name (or path) to attach to the base.
238     * @return the concatenated path, or null if invalid.
239     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
240     */
241    public static String concat(final String basePath, final String fullFileNameToAdd) {
242        final int prefix = getPrefixLength(fullFileNameToAdd);
243        if (prefix < 0) {
244            return null;
245        }
246        if (prefix > 0) {
247            return normalize(fullFileNameToAdd);
248        }
249        if (basePath == null) {
250            return null;
251        }
252        final int len = basePath.length();
253        if (len == 0) {
254            return normalize(fullFileNameToAdd);
255        }
256        final char ch = basePath.charAt(len - 1);
257        if (isSeparator(ch)) {
258            return normalize(basePath + fullFileNameToAdd);
259        }
260        return normalize(basePath + '/' + fullFileNameToAdd);
261    }
262
263    /**
264     * Determines whether the {@code parent} directory contains the {@code child} (a file or directory).
265     * This does not read from the file system, and there is no guarantee or expectation that
266     * these paths actually exist.
267     * <p>
268     * The files names are expected to be normalized.
269     * </p>
270     *
271     * Edge cases:
272     * <ul>
273     * <li>A {@code directory} must not be null: if null, throw IllegalArgumentException</li>
274     * <li>A directory does not contain itself: return false</li>
275     * <li>A null child file is not contained in any parent: return false</li>
276     * </ul>
277     *
278     * @param canonicalParent the path string to consider as the parent.
279     * @param canonicalChild the path string to consider as the child.
280     * @return true if the candidate leaf is under the specified composite. False otherwise.
281     * @since 2.2
282     * @see FileUtils#directoryContains(File, File)
283     */
284    public static boolean directoryContains(final String canonicalParent, final String canonicalChild) {
285        if (isEmpty(canonicalParent) || isEmpty(canonicalChild) || IOCase.SYSTEM.checkEquals(canonicalParent, canonicalChild)) {
286            return false;
287        }
288        final char separator = toSeparator(canonicalParent.charAt(0) == UNIX_NAME_SEPARATOR);
289        final String parentWithEndSeparator = canonicalParent.charAt(canonicalParent.length() - 1) == separator ? canonicalParent : canonicalParent + separator;
290        return IOCase.SYSTEM.checkStartsWith(canonicalChild, parentWithEndSeparator);
291    }
292
293    /**
294     * Does the work of getting the path.
295     *
296     * @param fileName  the file name.
297     * @param includeEndSeparator  true to include the end separator.
298     * @return the path.
299     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
300     */
301    private static String doGetFullPath(final String fileName, final boolean includeEndSeparator) {
302        if (fileName == null) {
303            return null;
304        }
305        final int prefix = getPrefixLength(fileName);
306        if (prefix < 0) {
307            return null;
308        }
309        if (prefix >= fileName.length()) {
310            if (includeEndSeparator) {
311                return getPrefix(fileName);  // add end slash if necessary
312            }
313            return fileName;
314        }
315        final int index = indexOfLastSeparator(fileName);
316        if (index < 0) {
317            return fileName.substring(0, prefix);
318        }
319        int end = index + (includeEndSeparator ?  1 : 0);
320        if (end == 0) {
321            end++;
322        }
323        return fileName.substring(0, end);
324    }
325
326    /**
327     * Does the work of getting the path.
328     *
329     * @param fileName  the file name.
330     * @param separatorAdd  0 to omit the end separator, 1 to return it.
331     * @return the path.
332     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
333     */
334    private static String doGetPath(final String fileName, final int separatorAdd) {
335        if (fileName == null) {
336            return null;
337        }
338        final int prefix = getPrefixLength(fileName);
339        if (prefix < 0) {
340            return null;
341        }
342        final int index = indexOfLastSeparator(fileName);
343        final int endIndex = index + separatorAdd;
344        if (prefix >= fileName.length() || index < 0 || prefix >= endIndex) {
345            return EMPTY_STRING;
346        }
347        return requireNonNullChars(fileName.substring(prefix, endIndex));
348    }
349
350    /**
351     * Internal method to perform the normalization.
352     *
353     * @param fileName  the file name.
354     * @param separator The separator character to use.
355     * @param keepSeparator  true to keep the final separator.
356     * @return the normalized fileName.
357     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
358     */
359    private static String doNormalize(final String fileName, final char separator, final boolean keepSeparator) {
360        if (fileName == null) {
361            return null;
362        }
363        requireNonNullChars(fileName);
364        int size = fileName.length();
365        if (size == 0) {
366            return fileName;
367        }
368        final int prefix = getPrefixLength(fileName);
369        if (prefix < 0) {
370            return null;
371        }
372        final char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy
373        fileName.getChars(0, fileName.length(), array, 0);
374        // fix separators throughout
375        final char otherSeparator = flipSeparator(separator);
376        for (int i = 0; i < array.length; i++) {
377            if (array[i] == otherSeparator) {
378                array[i] = separator;
379            }
380        }
381        // add extra separator on the end to simplify code below
382        boolean lastIsDirectory = true;
383        if (array[size - 1] != separator) {
384            array[size++] = separator;
385            lastIsDirectory = false;
386        }
387        // adjoining slashes
388        // If we get here, prefix can only be 0 or greater, size 1 or greater
389        // If prefix is 0, set loop start to 1 to prevent index errors
390        for (int i = prefix != 0 ? prefix : 1; i < size; i++) {
391            if (array[i] == separator && array[i - 1] == separator) {
392                System.arraycopy(array, i, array, i - 1, size - i);
393                size--;
394                i--;
395            }
396        }
397        // period slash
398        for (int i = prefix + 1; i < size; i++) {
399            if (array[i] == separator && array[i - 1] == '.' && (i == prefix + 1 || array[i - 2] == separator)) {
400                if (i == size - 1) {
401                    lastIsDirectory = true;
402                }
403                System.arraycopy(array, i + 1, array, i - 1, size - i);
404                size -= 2;
405                i--;
406            }
407        }
408        // double period slash
409        outer: for (int i = prefix + 2; i < size; i++) {
410            if (array[i] == separator && array[i - 1] == '.' && array[i - 2] == '.' && (i == prefix + 2 || array[i - 3] == separator)) {
411                if (i == prefix + 2) {
412                    return null;
413                }
414                if (i == size - 1) {
415                    lastIsDirectory = true;
416                }
417                int j;
418                for (j = i - 4; j >= prefix; j--) {
419                    if (array[j] == separator) {
420                        // remove b/../ from a/b/../c
421                        System.arraycopy(array, i + 1, array, j + 1, size - i);
422                        size -= i - j;
423                        i = j + 1;
424                        continue outer;
425                    }
426                }
427                // remove a/../ from a/../c
428                System.arraycopy(array, i + 1, array, prefix, size - i);
429                size -= i + 1 - prefix;
430                i = prefix + 1;
431            }
432        }
433        if (size <= 0) { // should never be less than 0
434            return EMPTY_STRING;
435        }
436        if (size <= prefix || lastIsDirectory && keepSeparator) {
437            return new String(array, 0, size); // keep trailing separator
438        }
439        return new String(array, 0, size - 1); // lose trailing separator
440    }
441
442    /**
443     * Checks whether two file names are exactly equal.
444     * <p>
445     * No processing is performed on the file names other than comparison.
446     * This is merely a null-safe case-sensitive string equality.
447     * </p>
448     *
449     * @param fileName1  the first file name, may be null.
450     * @param fileName2  the second file name, may be null.
451     * @return true if the file names are equal, null equals null.
452     * @see IOCase#SENSITIVE
453     */
454    public static boolean equals(final String fileName1, final String fileName2) {
455        return equals(fileName1, fileName2, false, IOCase.SENSITIVE);
456    }
457
458    /**
459     * Checks whether two file names are equal, optionally normalizing and providing
460     * control over the case-sensitivity.
461     *
462     * @param fileName1  the first file name, may be null.
463     * @param fileName2  the second file name, may be null.
464     * @param normalize  whether to normalize the file names.
465     * @param ioCase  what case sensitivity rule to use, null means case-sensitive.
466     * @return true if the file names are equal, null equals null.
467     * @since 1.3
468     */
469    public static boolean equals(String fileName1, String fileName2, final boolean normalize, final IOCase ioCase) {
470        if (fileName1 == null || fileName2 == null) {
471            return fileName1 == null && fileName2 == null;
472        }
473        if (normalize) {
474            fileName1 = normalize(fileName1);
475            if (fileName1 == null) {
476                return false;
477            }
478            fileName2 = normalize(fileName2);
479            if (fileName2 == null) {
480                return false;
481            }
482        }
483        return IOCase.value(ioCase, IOCase.SENSITIVE).checkEquals(fileName1, fileName2);
484    }
485
486    /**
487     * Checks whether two file names are equal after both have been normalized.
488     * <p>
489     * Both file names are first passed to {@link #normalize(String)}.
490     * The check is then performed in a case-sensitive manner.
491     * </p>
492     *
493     * @param fileName1  the first file name, may be null.
494     * @param fileName2  the second file name, may be null.
495     * @return true if the file names are equal, null equals null.
496     * @see IOCase#SENSITIVE
497     */
498    public static boolean equalsNormalized(final String fileName1, final String fileName2) {
499        return equals(fileName1, fileName2, true, IOCase.SENSITIVE);
500    }
501
502    /**
503     * Checks whether two file names are equal using the case rules of the system
504     * after both have been normalized.
505     * <p>
506     * Both file names are first passed to {@link #normalize(String)}.
507     * The check is then performed case-sensitively on Unix and
508     * case-insensitively on Windows.
509     * </p>
510     *
511     * @param fileName1  the first file name, may be null.
512     * @param fileName2  the second file name, may be null.
513     * @return true if the file names are equal, null equals null.
514     * @see IOCase#SYSTEM
515     */
516    public static boolean equalsNormalizedOnSystem(final String fileName1, final String fileName2) {
517        return equals(fileName1, fileName2, true, IOCase.SYSTEM);
518    }
519
520    /**
521     * Checks whether two file names are equal using the case rules of the system.
522     * <p>
523     * No processing is performed on the file names other than comparison.
524     * The check is case-sensitive on Unix and case-insensitive on Windows.
525     * </p>
526     *
527     * @param fileName1  the first file name, may be null.
528     * @param fileName2  the second file name, may be null.
529     * @return true if the file names are equal, null equals null.
530     * @see IOCase#SYSTEM
531     */
532    public static boolean equalsOnSystem(final String fileName1, final String fileName2) {
533        return equals(fileName1, fileName2, false, IOCase.SYSTEM);
534    }
535
536    /**
537     * Flips the Windows name separator to Linux and vice-versa.
538     *
539     * @param ch The Windows or Linux name separator.
540     * @return The Windows or Linux name separator.
541     */
542    static char flipSeparator(final char ch) {
543        if (ch == UNIX_NAME_SEPARATOR) {
544            return WINDOWS_NAME_SEPARATOR;
545        }
546        if (ch == WINDOWS_NAME_SEPARATOR) {
547            return UNIX_NAME_SEPARATOR;
548        }
549        throw new IllegalArgumentException(String.valueOf(ch));
550    }
551
552    /**
553     * Special handling for NTFS ADS: Don't accept colon in the file name.
554     *
555     * @param fileName a file name.
556     * @return ADS offsets.
557     */
558    private static int getAdsCriticalOffset(final String fileName) {
559        // Step 1: Remove leading path segments.
560        final int offset1 = fileName.lastIndexOf(SYSTEM_NAME_SEPARATOR);
561        final int offset2 = fileName.lastIndexOf(OTHER_SEPARATOR);
562        if (offset1 == -1) {
563            if (offset2 == -1) {
564                return 0;
565            }
566            return offset2 + 1;
567        }
568        if (offset2 == -1) {
569            return offset1 + 1;
570        }
571        return Math.max(offset1, offset2) + 1;
572    }
573
574    /**
575     * Gets the base name, minus the full path and extension, from a full file name.
576     * <p>
577     * This method will handle a path in either Unix or Windows format.
578     * The text after the last forward or backslash and before the last period is returned.
579     * </p>
580     * <pre>
581     * a/b/c.txt --&gt; c
582     * a\b\c.txt --&gt; c
583     * a/b/c.foo.txt --&gt; c.foo
584     * a.txt     --&gt; a
585     * a/b/c     --&gt; c
586     * a/b/c/    --&gt; ""
587     * </pre>
588     * <p>
589     * The output will be the same irrespective of the machine that the code is running on.
590     * </p>
591     *
592     * @param fileName  the file name, null returns null.
593     * @return the name of the file without the path, or an empty string if none exists.
594     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
595     */
596    public static String getBaseName(final String fileName) {
597        return removeExtension(getName(fileName));
598    }
599
600    /**
601     * Gets the extension of a file name.
602     * <p>
603     * This method returns the textual part of the file name after the last period.
604     * There must be no directory separator after the period.
605     * </p>
606     * <pre>
607     * foo.txt      --&gt; "txt"
608     * a/b/c.jpg    --&gt; "jpg"
609     * a/b.txt/c    --&gt; ""
610     * a/b/c        --&gt; ""
611     * </pre>
612     * <p>
613     * The output will be the same irrespective of the machine that the code is running on, with the
614     * exception of a possible {@link IllegalArgumentException} on Windows (see below).
615     * </p>
616     * <p>
617     * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
618     * In this case, the name wouldn't be the name of a file, but the identifier of an
619     * alternate data stream (bar.txt) on the file foo.exe. The method used to return
620     * ".txt" here, which would be misleading. Commons IO 2.7 and later throw
621     * an {@link IllegalArgumentException} for names like this.
622     * </p>
623     *
624     * @param fileName the file name to retrieve the extension of.
625     * @return the extension of the file or an empty string if none exists or {@code null}
626     * if the file name is {@code null}.
627     * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
628     * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
629     */
630    public static String getExtension(final String fileName) throws IllegalArgumentException {
631        if (fileName == null) {
632            return null;
633        }
634        final int index = indexOfExtension(fileName);
635        if (index == NOT_FOUND) {
636            return EMPTY_STRING;
637        }
638        return fileName.substring(index + 1);
639    }
640
641    /**
642     * Gets the full path (prefix + path) from a full file name.
643     * <p>
644     * This method will handle a file in either Unix or Windows format.
645     * The method is entirely text based, and returns the text before and
646     * including the last forward or backslash.
647     * </p>
648     * <pre>
649     * C:\a\b\c.txt --&gt; C:\a\b\
650     * ~/a/b/c.txt  --&gt; ~/a/b/
651     * a.txt        --&gt; ""
652     * a/b/c        --&gt; a/b/
653     * a/b/c/       --&gt; a/b/c/
654     * C:           --&gt; C:
655     * C:\          --&gt; C:\
656     * ~            --&gt; ~/
657     * ~/           --&gt; ~/
658     * ~user        --&gt; ~user/
659     * ~user/       --&gt; ~user/
660     * </pre>
661     * <p>
662     * The output will be the same irrespective of the machine that the code is running on.
663     * </p>
664     *
665     * @param fileName  the file name, null returns null.
666     * @return the path of the file, an empty string if none exists, null if invalid.
667     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
668     */
669    public static String getFullPath(final String fileName) {
670        return doGetFullPath(fileName, true);
671    }
672
673    /**
674     * Gets the full path (prefix + path) from a full file name,
675     * excluding the final directory separator.
676     * <p>
677     * This method will handle a file in either Unix or Windows format.
678     * The method is entirely text based, and returns the text before the
679     * last forward or backslash.
680     * </p>
681     * <pre>
682     * C:\a\b\c.txt --&gt; C:\a\b
683     * ~/a/b/c.txt  --&gt; ~/a/b
684     * a.txt        --&gt; ""
685     * a/b/c        --&gt; a/b
686     * a/b/c/       --&gt; a/b/c
687     * C:           --&gt; C:
688     * C:\          --&gt; C:\
689     * ~            --&gt; ~
690     * ~/           --&gt; ~
691     * ~user        --&gt; ~user
692     * ~user/       --&gt; ~user
693     * </pre>
694     * <p>
695     * The output will be the same irrespective of the machine that the code is running on.
696     * </p>
697     *
698     * @param fileName  the file name, null returns null.
699     * @return the path of the file, an empty string if none exists, null if invalid.
700     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
701     */
702    public static String getFullPathNoEndSeparator(final String fileName) {
703        return doGetFullPath(fileName, false);
704    }
705
706    /**
707     * Gets the name minus the path from a full file name.
708     * <p>
709     * This method will handle a file in either Unix or Windows format.
710     * The text after the last forward or backslash is returned.
711     * </p>
712     * <pre>
713     * a/b/c.txt --&gt; c.txt
714     * a\b\c.txt --&gt; c.txt
715     * a.txt     --&gt; a.txt
716     * a/b/c     --&gt; c
717     * a/b/c/    --&gt; ""
718     * </pre>
719     * <p>
720     * The output will be the same irrespective of the machine that the code is running on.
721     * </p>
722     *
723     * @param fileName  the file name, null returns null.
724     * @return the name of the file without the path, or an empty string if none exists.
725     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
726     */
727    public static String getName(final String fileName) {
728        if (fileName == null) {
729            return null;
730        }
731        return requireNonNullChars(fileName).substring(indexOfLastSeparator(fileName) + 1);
732    }
733
734    /**
735     * Gets the path from a full file name, which excludes the prefix and the name.
736     * <p>
737     * This method will handle a file in either Unix or Windows format.
738     * The method is entirely text based, and returns the text before and
739     * including the last forward or backslash.
740     * </p>
741     * <pre>
742     * C:\a\b\c.txt --&gt; a\b\
743     * ~/a/b/c.txt  --&gt; a/b/
744     * a.txt        --&gt; ""
745     * a/b/c        --&gt; a/b/
746     * a/b/c/       --&gt; a/b/c/
747     * </pre>
748     * <p>
749     * The output will be the same irrespective of the machine that the code is running on.
750     * </p>
751     * <p>
752     * This method drops the prefix from the result.
753     * See {@link #getFullPath(String)} for the method that retains the prefix.
754     * </p>
755     *
756     * @param fileName  the file name, null returns null.
757     * @return the path of the file, an empty string if none exists, null if invalid.
758     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
759     */
760    public static String getPath(final String fileName) {
761        return doGetPath(fileName, 1);
762    }
763
764    /**
765     * Gets the path (which excludes the prefix) from a full file name, and
766     * also excluding the final directory separator.
767     * <p>
768     * This method will handle a file in either Unix or Windows format.
769     * The method is entirely text based, and returns the text before the
770     * last forward or backslash.
771     * </p>
772     * <pre>
773     * C:\a\b\c.txt --&gt; a\b
774     * ~/a/b/c.txt  --&gt; a/b
775     * a.txt        --&gt; ""
776     * a/b/c        --&gt; a/b
777     * a/b/c/       --&gt; a/b/c
778     * </pre>
779     * <p>
780     * The output will be the same irrespective of the machine that the code is running on.
781     * </p>
782     * <p>
783     * This method drops the prefix from the result.
784     * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
785     * </p>
786     *
787     * @param fileName  the file name, null returns null.
788     * @return the path of the file, an empty string if none exists, null if invalid.
789     * @throws IllegalArgumentException if the result path contains the null character ({@code U+0000}).
790     */
791    public static String getPathNoEndSeparator(final String fileName) {
792        return doGetPath(fileName, 0);
793    }
794
795    /**
796     * Gets the prefix such as {@code C:/} or {@code ~/} from a full file name,
797     * <p>
798     * This method will handle a file in either Unix or Windows format.
799     * The prefix includes the first slash in the full file name where applicable.
800     * </p>
801     * <pre>
802     * Windows:
803     * a\b\c.txt           --&gt; ""          --&gt; relative
804     * \a\b\c.txt          --&gt; "\"         --&gt; current drive absolute
805     * C:a\b\c.txt         --&gt; "C:"        --&gt; drive relative
806     * C:\a\b\c.txt        --&gt; "C:\"       --&gt; absolute
807     * \\server\a\b\c.txt  --&gt; "\\server\" --&gt; UNC
808     *
809     * Unix:
810     * a/b/c.txt           --&gt; ""          --&gt; relative
811     * /a/b/c.txt          --&gt; "/"         --&gt; absolute
812     * ~/a/b/c.txt         --&gt; "~/"        --&gt; current user
813     * ~                   --&gt; "~/"        --&gt; current user (slash added)
814     * ~user/a/b/c.txt     --&gt; "~user/"    --&gt; named user
815     * ~user               --&gt; "~user/"    --&gt; named user (slash added)
816     * </pre>
817     * <p>
818     * The output will be the same irrespective of the machine that the code is running on.
819     * ie. both Unix and Windows prefixes are matched regardless.
820     * </p>
821     *
822     * @param fileName  the file name, null returns null.
823     * @return the prefix of the file, null if invalid.
824     * @throws IllegalArgumentException if the result contains the null character ({@code U+0000}).
825     */
826    public static String getPrefix(final String fileName) {
827        if (fileName == null) {
828            return null;
829        }
830        final int len = getPrefixLength(fileName);
831        if (len < 0) {
832            return null;
833        }
834        if (len > fileName.length()) {
835            requireNonNullChars(fileName);
836            return fileName + UNIX_NAME_SEPARATOR;
837        }
838        return requireNonNullChars(fileName.substring(0, len));
839    }
840
841    /**
842     * Returns the length of the file name prefix, such as {@code C:/} or {@code ~/}.
843     * <p>
844     * This method will handle a file in either Unix or Windows format.
845     * </p>
846     * <p>
847     * The prefix length includes the first slash in the full file name
848     * if applicable. Thus, it is possible that the length returned is greater
849     * than the length of the input string.
850     * </p>
851     * <pre>
852     * Windows:
853     * a\b\c.txt           --&gt; 0           --&gt; relative
854     * \a\b\c.txt          --&gt; 1           --&gt; current drive absolute
855     * C:a\b\c.txt         --&gt; 2           --&gt; drive relative
856     * C:\a\b\c.txt        --&gt; 3           --&gt; absolute
857     * \\server\a\b\c.txt  --&gt; 9           --&gt; UNC
858     * \\\a\b\c.txt        --&gt; -1          --&gt; error
859     *
860     * Unix:
861     * a/b/c.txt           --&gt; 0           --&gt; relative
862     * /a/b/c.txt          --&gt; 1           --&gt; absolute
863     * ~/a/b/c.txt         --&gt; 2           --&gt; current user
864     * ~                   --&gt; 2           --&gt; current user (slash added)
865     * ~user/a/b/c.txt     --&gt; 6           --&gt; named user
866     * ~user               --&gt; 6           --&gt; named user (slash added)
867     * //server/a/b/c.txt  --&gt; 9
868     * ///a/b/c.txt        --&gt; -1          --&gt; error
869     * C:                  --&gt; 0           --&gt; valid file name as only null character and / are reserved characters
870     * </pre>
871     * <p>
872     * The output will be the same irrespective of the machine that the code is running on.
873     * ie. both Unix and Windows prefixes are matched regardless.
874     * </p>
875     * <p>
876     * Note that a leading // (or \\) is used to indicate a UNC name on Windows.
877     * These must be followed by a server name, so double-slashes are not collapsed
878     * to a single slash at the start of the file name.
879     * </p>
880     *
881     * @param fileName  the file name to find the prefix in, null returns -1.
882     * @return the length of the prefix, -1 if invalid or null.
883     */
884    public static int getPrefixLength(final String fileName) {
885        if (fileName == null) {
886            return NOT_FOUND;
887        }
888        final int len = fileName.length();
889        if (len == 0) {
890            return 0;
891        }
892        char ch0 = fileName.charAt(0);
893        if (ch0 == ':') {
894            return NOT_FOUND;
895        }
896        if (len == 1) {
897            if (ch0 == '~') {
898                return 2;  // return a length greater than the input
899            }
900            return isSeparator(ch0) ? 1 : 0;
901        }
902        if (ch0 == '~') {
903            int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 1);
904            int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 1);
905            if (posUnix == NOT_FOUND && posWin == NOT_FOUND) {
906                return len + 1;  // return a length greater than the input
907            }
908            posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
909            posWin = posWin == NOT_FOUND ? posUnix : posWin;
910            return Math.min(posUnix, posWin) + 1;
911        }
912        final char ch1 = fileName.charAt(1);
913        if (ch1 == ':') {
914            ch0 = Character.toUpperCase(ch0);
915            if (ch0 >= 'A' && ch0 <= 'Z') {
916                if (len == 2 && !FileSystem.getCurrent().supportsDriveLetter()) {
917                    return 0;
918                }
919                if (len == 2 || !isSeparator(fileName.charAt(2))) {
920                    return 2;
921                }
922                return 3;
923            }
924            if (ch0 == UNIX_NAME_SEPARATOR) {
925                return 1;
926            }
927            return NOT_FOUND;
928
929        }
930        if (!isSeparator(ch0) || !isSeparator(ch1)) {
931            return isSeparator(ch0) ? 1 : 0;
932        }
933        int posUnix = fileName.indexOf(UNIX_NAME_SEPARATOR, 2);
934        int posWin = fileName.indexOf(WINDOWS_NAME_SEPARATOR, 2);
935        if (posUnix == NOT_FOUND && posWin == NOT_FOUND || posUnix == 2 || posWin == 2) {
936            return NOT_FOUND;
937        }
938        posUnix = posUnix == NOT_FOUND ? posWin : posUnix;
939        posWin = posWin == NOT_FOUND ? posUnix : posWin;
940        final int pos = Math.min(posUnix, posWin) + 1;
941        final String hostnamePart = fileName.substring(2, pos - 1);
942        return isValidHostName(hostnamePart) ? pos : NOT_FOUND;
943    }
944
945    /**
946     * Returns the index of the last extension separator character, which is a period.
947     * <p>
948     * This method also checks that there is no directory separator after the last period. To do this it uses
949     * {@link #indexOfLastSeparator(String)} which will handle a file in either Unix or Windows format.
950     * </p>
951     * <p>
952     * The output will be the same irrespective of the machine that the code is running on, with the
953     * exception of a possible {@link IllegalArgumentException} on Windows (see below).
954     * </p>
955     * <strong>Note:</strong> This method used to have a hidden problem for names like "foo.exe:bar.txt".
956     * In this case, the name wouldn't be the name of a file, but the identifier of an
957     * alternate data stream (bar.txt) on the file foo.exe. The method used to return
958     * ".txt" here, which would be misleading. Commons IO 2.7, and later versions, are throwing
959     * an {@link IllegalArgumentException} for names like this.
960     *
961     * @param fileName
962     *            the file name to find the last extension separator in, null returns -1.
963     * @return the index of the last extension separator character, or -1 if there is no such character.
964     * @throws IllegalArgumentException <strong>Windows only:</strong> the file name parameter is, in fact,
965     * the identifier of an Alternate Data Stream, for example "foo.exe:bar.txt".
966     */
967    public static int indexOfExtension(final String fileName) throws IllegalArgumentException {
968        if (fileName == null) {
969            return NOT_FOUND;
970        }
971        if (isSystemWindows()) {
972            // Special handling for NTFS ADS: Don't accept colon in the file name.
973            final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName));
974            if (offset != -1) {
975                throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden.");
976            }
977        }
978        final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
979        final int lastSeparator = indexOfLastSeparator(fileName);
980        return lastSeparator > extensionPos ? NOT_FOUND : extensionPos;
981    }
982
983    /**
984     * Returns the index of the last directory separator character.
985     * <p>
986     * This method will handle a file in either Unix or Windows format.
987     * The position of the last forward or backslash is returned.
988     * <p>
989     * The output will be the same irrespective of the machine that the code is running on.
990     *
991     * @param fileName  the file name to find the last path separator in, null returns -1.
992     * @return the index of the last separator character, or -1 if there
993     * is no such character.
994     */
995    public static int indexOfLastSeparator(final String fileName) {
996        if (fileName == null) {
997            return NOT_FOUND;
998        }
999        final int lastUnixPos = fileName.lastIndexOf(UNIX_NAME_SEPARATOR);
1000        final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_NAME_SEPARATOR);
1001        return Math.max(lastUnixPos, lastWindowsPos);
1002    }
1003
1004    private static boolean isEmpty(final String string) {
1005        return string == null || string.isEmpty();
1006    }
1007
1008    /**
1009     * Checks whether the extension of the file name is one of those specified.
1010     * <p>
1011     * This method obtains the extension as the textual part of the file name
1012     * after the last period. There must be no directory separator after the period.
1013     * The extension check is case-sensitive on all platforms.
1014     *
1015     * @param fileName  the file name, null returns false.
1016     * @param extensions  the extensions to check for, null checks for no extension.
1017     * @return true if the file name is one of the extensions.
1018     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1019     */
1020    public static boolean isExtension(final String fileName, final Collection<String> extensions) {
1021        if (fileName == null) {
1022            return false;
1023        }
1024        requireNonNullChars(fileName);
1025        if (extensions == null || extensions.isEmpty()) {
1026            return indexOfExtension(fileName) == NOT_FOUND;
1027        }
1028        return extensions.contains(getExtension(fileName));
1029    }
1030
1031    /**
1032     * Checks whether the extension of the file name is that specified.
1033     * <p>
1034     * This method obtains the extension as the textual part of the file name
1035     * after the last period. There must be no directory separator after the period.
1036     * The extension check is case-sensitive on all platforms.
1037     *
1038     * @param fileName  the file name, null returns false.
1039     * @param extension  the extension to check for, null or empty checks for no extension.
1040     * @return true if the file name has the specified extension.
1041     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1042     */
1043    public static boolean isExtension(final String fileName, final String extension) {
1044        if (fileName == null) {
1045            return false;
1046        }
1047        requireNonNullChars(fileName);
1048        if (isEmpty(extension)) {
1049            return indexOfExtension(fileName) == NOT_FOUND;
1050        }
1051        return getExtension(fileName).equals(extension);
1052    }
1053
1054    /**
1055     * Checks whether the extension of the file name is one of those specified.
1056     * <p>
1057     * This method obtains the extension as the textual part of the file name
1058     * after the last period. There must be no directory separator after the period.
1059     * The extension check is case-sensitive on all platforms.
1060     *
1061     * @param fileName  the file name, null returns false.
1062     * @param extensions  the extensions to check for, null checks for no extension.
1063     * @return true if the file name is one of the extensions.
1064     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1065     */
1066    public static boolean isExtension(final String fileName, final String... extensions) {
1067        if (fileName == null) {
1068            return false;
1069        }
1070        requireNonNullChars(fileName);
1071
1072        if (extensions == null || extensions.length == 0) {
1073            return indexOfExtension(fileName) == NOT_FOUND;
1074        }
1075        final String fileExt = getExtension(fileName);
1076        return Stream.of(extensions).anyMatch(fileExt::equals);
1077    }
1078
1079    /**
1080     * Checks whether a given string represents a valid IPv4 address.
1081     *
1082     * @param name the name to validate.
1083     * @return true if the given name is a valid IPv4 address.
1084     */
1085    // mostly copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet4Address
1086    private static boolean isIPv4Address(final String name) {
1087        final Matcher m = IPV4_PATTERN.matcher(name);
1088        if (!m.matches() || m.groupCount() != 4) {
1089            return false;
1090        }
1091        // verify that address subgroups are legal
1092        for (int i = 1; i <= 4; i++) {
1093            final String ipSegment = m.group(i);
1094            final int iIpSegment = Integer.parseInt(ipSegment);
1095            if (iIpSegment > IPV4_MAX_OCTET_VALUE || ipSegment.length() > 1 && ipSegment.startsWith("0")) {
1096                return false;
1097            }
1098        }
1099        return true;
1100    }
1101
1102    // copied from org.apache.commons.validator.routines.InetAddressValidator#isValidInet6Address
1103    /**
1104     * Checks whether a given string represents a valid IPv6 address.
1105     *
1106     * @param inet6Address the name to validate.
1107     * @return true if the given name is a valid IPv6 address.
1108     */
1109    private static boolean isIPv6Address(final String inet6Address) {
1110        final boolean containsCompressedZeroes = inet6Address.contains("::");
1111        if (containsCompressedZeroes && inet6Address.indexOf("::") != inet6Address.lastIndexOf("::")) {
1112            return false;
1113        }
1114        if (inet6Address.startsWith(":") && !inet6Address.startsWith("::")
1115                || inet6Address.endsWith(":") && !inet6Address.endsWith("::")) {
1116            return false;
1117        }
1118        String[] octets = inet6Address.split(":");
1119        if (containsCompressedZeroes) {
1120            final List<String> octetList = new ArrayList<>(Arrays.asList(octets));
1121            if (inet6Address.endsWith("::")) {
1122                // String.split() drops ending empty segments
1123                octetList.add("");
1124            } else if (inet6Address.startsWith("::") && !octetList.isEmpty()) {
1125                octetList.remove(0);
1126            }
1127            octets = octetList.toArray(EMPTY_STRING_ARRAY);
1128        }
1129        if (octets.length > IPV6_MAX_HEX_GROUPS) {
1130            return false;
1131        }
1132        int validOctets = 0;
1133        int emptyOctets = 0; // consecutive empty chunks
1134        for (int index = 0; index < octets.length; index++) {
1135            final String octet = octets[index];
1136            if (octet.isEmpty()) {
1137                emptyOctets++;
1138                if (emptyOctets > 1) {
1139                    return false;
1140                }
1141            } else {
1142                emptyOctets = 0;
1143                // Is last chunk an IPv4 address?
1144                if (index == octets.length - 1 && octet.contains(".")) {
1145                    if (!isIPv4Address(octet)) {
1146                        return false;
1147                    }
1148                    validOctets += 2;
1149                    continue;
1150                }
1151                if (octet.length() > IPV6_MAX_HEX_DIGITS_PER_GROUP) {
1152                    return false;
1153                }
1154                final int octetInt;
1155                try {
1156                    octetInt = Integer.parseInt(octet, BASE_16);
1157                } catch (final NumberFormatException e) {
1158                    return false;
1159                }
1160                if (octetInt < 0 || octetInt > MAX_UNSIGNED_SHORT) {
1161                    return false;
1162                }
1163            }
1164            validOctets++;
1165        }
1166        return validOctets <= IPV6_MAX_HEX_GROUPS && (validOctets >= IPV6_MAX_HEX_GROUPS || containsCompressedZeroes);
1167    }
1168
1169    /**
1170     * Checks whether a given string is a valid host name according to
1171     * RFC 3986 - not accepting IP addresses.
1172     *
1173     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1174     * @param name the hostname to validate.
1175     * @return true if the given name is a valid host name.
1176     */
1177    private static boolean isRFC3986HostName(final String name) {
1178        final String[] parts = name.split("\\.", -1);
1179        for (int i = 0; i < parts.length; i++) {
1180            if (parts[i].isEmpty()) {
1181                // trailing period is legal, otherwise we've hit a .. sequence
1182                return i == parts.length - 1;
1183            }
1184            if (!REG_NAME_PART_PATTERN.matcher(parts[i]).matches()) {
1185                return false;
1186            }
1187        }
1188        return true;
1189    }
1190
1191    /**
1192     * Checks if the character is a separator.
1193     *
1194     * @param ch  the character to check.
1195     * @return true if it is a separator character.
1196     */
1197    private static boolean isSeparator(final char ch) {
1198        return ch == UNIX_NAME_SEPARATOR || ch == WINDOWS_NAME_SEPARATOR;
1199    }
1200
1201    /**
1202     * Determines if Windows file system is in use.
1203     *
1204     * @return true if the system is Windows.
1205     */
1206    static boolean isSystemWindows() {
1207        return SYSTEM_NAME_SEPARATOR == WINDOWS_NAME_SEPARATOR;
1208    }
1209
1210    /**
1211     * Checks whether a given string is a valid host name according to
1212     * RFC 3986.
1213     *
1214     * <p>Accepted are IP addresses (v4 and v6) as well as what the
1215     * RFC calls a "reg-name". Percent encoded names don't seem to be
1216     * valid names in UNC paths.</p>
1217     *
1218     * @see "https://tools.ietf.org/html/rfc3986#section-3.2.2"
1219     * @param name the hostname to validate.
1220     * @return true if the given name is a valid host name.
1221     */
1222    private static boolean isValidHostName(final String name) {
1223        return isIPv6Address(name) || isRFC3986HostName(name);
1224    }
1225
1226    /**
1227     * Normalizes a path, removing double and single period path steps.
1228     * <p>
1229     * This method normalizes a path to a standard format.
1230     * The input may contain separators in either Unix or Windows format.
1231     * The output will contain separators in the format of the system.
1232     * <p>
1233     * A trailing slash will be retained.
1234     * A double slash will be merged to a single slash (but UNC names are handled).
1235     * A single period path segment will be removed.
1236     * A double period will cause that path segment and the one before to be removed.
1237     * If the double period has no parent path segment, {@code null} is returned.
1238     * <p>
1239     * The output will be the same on both Unix and Windows except
1240     * for the separator character.
1241     * <pre>
1242     * /foo//               --&gt;   /foo/
1243     * /foo/./              --&gt;   /foo/
1244     * /foo/../bar          --&gt;   /bar
1245     * /foo/../bar/         --&gt;   /bar/
1246     * /foo/../bar/../baz   --&gt;   /baz
1247     * //foo//./bar         --&gt;   //foo/bar
1248     * /../                 --&gt;   null
1249     * ../foo               --&gt;   null
1250     * foo/bar/..           --&gt;   foo/
1251     * foo/../../bar        --&gt;   null
1252     * foo/../bar           --&gt;   bar
1253     * //server/foo/../bar  --&gt;   //server/bar
1254     * //server/../bar      --&gt;   null
1255     * C:\foo\..\bar        --&gt;   C:\bar
1256     * C:\..\bar            --&gt;   null
1257     * ~/foo/../bar/        --&gt;   ~/bar/
1258     * ~/../bar             --&gt;   null
1259     * </pre>
1260     * (Note the file separator will be correct for Windows/Unix.)
1261     *
1262     * @param fileName  the file name to normalize, null returns null.
1263     * @return the normalized fileName, or null if invalid.
1264     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1265     */
1266    public static String normalize(final String fileName) {
1267        return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, true);
1268    }
1269
1270    /**
1271     * Normalizes a path, removing double and single period path steps.
1272     * <p>
1273     * This method normalizes a path to a standard format.
1274     * The input may contain separators in either Unix or Windows format.
1275     * The output will contain separators in the format specified.
1276     * <p>
1277     * A trailing slash will be retained.
1278     * A double slash will be merged to a single slash (but UNC names are handled).
1279     * A single period path segment will be removed.
1280     * A double period will cause that path segment and the one before to be removed.
1281     * If the double period has no parent path segment to work with, {@code null}
1282     * is returned.
1283     * <p>
1284     * The output will be the same on both Unix and Windows except
1285     * for the separator character.
1286     * <pre>
1287     * /foo//               --&gt;   /foo/
1288     * /foo/./              --&gt;   /foo/
1289     * /foo/../bar          --&gt;   /bar
1290     * /foo/../bar/         --&gt;   /bar/
1291     * /foo/../bar/../baz   --&gt;   /baz
1292     * //foo//./bar         --&gt;   /foo/bar
1293     * /../                 --&gt;   null
1294     * ../foo               --&gt;   null
1295     * foo/bar/..           --&gt;   foo/
1296     * foo/../../bar        --&gt;   null
1297     * foo/../bar           --&gt;   bar
1298     * //server/foo/../bar  --&gt;   //server/bar
1299     * //server/../bar      --&gt;   null
1300     * C:\foo\..\bar        --&gt;   C:\bar
1301     * C:\..\bar            --&gt;   null
1302     * ~/foo/../bar/        --&gt;   ~/bar/
1303     * ~/../bar             --&gt;   null
1304     * </pre>
1305     * The output will be the same on both Unix and Windows including
1306     * the separator character.
1307     *
1308     * @param fileName  the file name to normalize, null returns null.
1309     * @param unixSeparator {@code true} if a Unix separator should
1310     * be used or {@code false} if a Windows separator should be used.
1311     * @return the normalized fileName, or null if invalid.
1312     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1313     * @since 2.0
1314     */
1315    public static String normalize(final String fileName, final boolean unixSeparator) {
1316        return doNormalize(fileName, toSeparator(unixSeparator), true);
1317    }
1318
1319    /**
1320     * Normalizes a path, removing double and single period path steps,
1321     * and removing any final directory separator.
1322     * <p>
1323     * This method normalizes a path to a standard format.
1324     * The input may contain separators in either Unix or Windows format.
1325     * The output will contain separators in the format of the system.
1326     * <p>
1327     * A trailing slash will be removed.
1328     * A double slash will be merged to a single slash (but UNC names are handled).
1329     * A single period path segment will be removed.
1330     * A double period will cause that path segment and the one before to be removed.
1331     * If the double period has no parent path segment to work with, {@code null}
1332     * is returned.
1333     * <p>
1334     * The output will be the same on both Unix and Windows except
1335     * for the separator character.
1336     * <pre>
1337     * /foo//               --&gt;   /foo
1338     * /foo/./              --&gt;   /foo
1339     * /foo/../bar          --&gt;   /bar
1340     * /foo/../bar/         --&gt;   /bar
1341     * /foo/../bar/../baz   --&gt;   /baz
1342     * //foo//./bar         --&gt;   /foo/bar
1343     * /../                 --&gt;   null
1344     * ../foo               --&gt;   null
1345     * foo/bar/..           --&gt;   foo
1346     * foo/../../bar        --&gt;   null
1347     * foo/../bar           --&gt;   bar
1348     * //server/foo/../bar  --&gt;   //server/bar
1349     * //server/../bar      --&gt;   null
1350     * C:\foo\..\bar        --&gt;   C:\bar
1351     * C:\..\bar            --&gt;   null
1352     * ~/foo/../bar/        --&gt;   ~/bar
1353     * ~/../bar             --&gt;   null
1354     * </pre>
1355     * (Note the file separator returned will be correct for Windows/Unix)
1356     *
1357     * @param fileName  the file name to normalize, null returns null.
1358     * @return the normalized fileName, or null if invalid.
1359     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1360     */
1361    public static String normalizeNoEndSeparator(final String fileName) {
1362        return doNormalize(fileName, SYSTEM_NAME_SEPARATOR, false);
1363    }
1364
1365    /**
1366     * Normalizes a path, removing double and single period path steps,
1367     * and removing any final directory separator.
1368     * <p>
1369     * This method normalizes a path to a standard format.
1370     * The input may contain separators in either Unix or Windows format.
1371     * The output will contain separators in the format specified.
1372     * <p>
1373     * A trailing slash will be removed.
1374     * A double slash will be merged to a single slash (but UNC names are handled).
1375     * A single period path segment will be removed.
1376     * A double period will cause that path segment and the one before to be removed.
1377     * If the double period has no parent path segment to work with, {@code null}
1378     * is returned.
1379     * <p>
1380     * The output will be the same on both Unix and Windows including
1381     * the separator character.
1382     * <pre>
1383     * /foo//               --&gt;   /foo
1384     * /foo/./              --&gt;   /foo
1385     * /foo/../bar          --&gt;   /bar
1386     * /foo/../bar/         --&gt;   /bar
1387     * /foo/../bar/../baz   --&gt;   /baz
1388     * //foo//./bar         --&gt;   /foo/bar
1389     * /../                 --&gt;   null
1390     * ../foo               --&gt;   null
1391     * foo/bar/..           --&gt;   foo
1392     * foo/../../bar        --&gt;   null
1393     * foo/../bar           --&gt;   bar
1394     * //server/foo/../bar  --&gt;   //server/bar
1395     * //server/../bar      --&gt;   null
1396     * C:\foo\..\bar        --&gt;   C:\bar
1397     * C:\..\bar            --&gt;   null
1398     * ~/foo/../bar/        --&gt;   ~/bar
1399     * ~/../bar             --&gt;   null
1400     * </pre>
1401     *
1402     * @param fileName  the file name to normalize, null returns null.
1403     * @param unixSeparator {@code true} if a Unix separator should
1404     * be used or {@code false} if a Windows separator should be used.
1405     * @return the normalized fileName, or null if invalid.
1406     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1407     * @since 2.0
1408     */
1409    public static String normalizeNoEndSeparator(final String fileName, final boolean unixSeparator) {
1410         return doNormalize(fileName, toSeparator(unixSeparator), false);
1411    }
1412
1413    /**
1414     * Removes the extension from a fileName.
1415     * <p>
1416     * This method returns the textual part of the file name before the last period.
1417     * There must be no directory separator after the period.
1418     * <pre>
1419     * foo.txt    --&gt; foo
1420     * .txt       --&gt; "" (empty string)
1421     * a\b\c.jpg  --&gt; a\b\c
1422     * /a/b/c.jpg --&gt; /a/b/c
1423     * a\b\c      --&gt; a\b\c
1424     * a.b\c      --&gt; a.b\c
1425     * </pre>
1426     * <p>
1427     * The output will be the same irrespective of the machine that the code is running on.
1428     *
1429     * @param fileName  the file name, null returns null.
1430     * @return the file name minus the extension.
1431     * @throws IllegalArgumentException if the file name contains the null character ({@code U+0000}).
1432     */
1433    public static String removeExtension(final String fileName) {
1434        if (fileName == null) {
1435            return null;
1436        }
1437        requireNonNullChars(fileName);
1438        final int index = indexOfExtension(fileName);
1439        if (index == NOT_FOUND) {
1440            return fileName;
1441        }
1442        return fileName.substring(0, index);
1443    }
1444
1445    /**
1446     * Checks the input for null characters ({@code U+0000}), a sign of unsanitized data being passed to file level functions.
1447     *
1448     * This may be used to defend against poison byte attacks.
1449     *
1450     * @param path the path to check.
1451     * @return The input.
1452     * @throws IllegalArgumentException if path contains the null character ({@code U+0000}).
1453     */
1454    private static String requireNonNullChars(final String path) {
1455        if (path.indexOf(0) >= 0) {
1456            throw new IllegalArgumentException(
1457                "Null character present in file/path name. There are no known legitimate use cases for such data, but several injection attacks may use it");
1458        }
1459        return path;
1460    }
1461
1462    /**
1463     * Converts all separators to the system separator.
1464     *
1465     * @param path the path to be changed, null ignored.
1466     * @return the updated path.
1467     */
1468    public static String separatorsToSystem(final String path) {
1469        return FileSystem.getCurrent().normalizeSeparators(path);
1470    }
1471
1472    /**
1473     * Converts all separators to the Unix separator of forward slash.
1474     *
1475     * @param path the path to be changed, null ignored.
1476     * @return the new path.
1477     */
1478    public static String separatorsToUnix(final String path) {
1479        return FileSystem.LINUX.normalizeSeparators(path);
1480    }
1481
1482    /**
1483     * Converts all separators to the Windows separator of backslash.
1484     *
1485     * @param path the path to be changed, null ignored.
1486     * @return the updated path.
1487     */
1488    public static String separatorsToWindows(final String path) {
1489        return FileSystem.WINDOWS.normalizeSeparators(path);
1490    }
1491
1492    /**
1493     * Splits a string into a number of tokens.
1494     * The text is split by '?' and '*'.
1495     * Where multiple '*' occur consecutively they are collapsed into a single '*'.
1496     *
1497     * @param text  the text to split.
1498     * @return the array of tokens, never null.
1499     */
1500    static String[] splitOnTokens(final String text) {
1501        // used by wildcardMatch
1502        // package level so a unit test may run on this
1503        if (text.indexOf('?') == NOT_FOUND && text.indexOf('*') == NOT_FOUND) {
1504            return new String[] { text };
1505        }
1506        final char[] array = text.toCharArray();
1507        final ArrayList<String> list = new ArrayList<>();
1508        final StringBuilder buffer = new StringBuilder();
1509        char prevChar = 0;
1510        for (final char ch : array) {
1511            if (ch == '?' || ch == '*') {
1512                if (buffer.length() != 0) {
1513                    list.add(buffer.toString());
1514                    buffer.setLength(0);
1515                }
1516                if (ch == '?') {
1517                    list.add("?");
1518                } else if (prevChar != '*') { // ch == '*' here; check if previous char was '*'
1519                    list.add("*");
1520                }
1521            } else {
1522                buffer.append(ch);
1523            }
1524            prevChar = ch;
1525        }
1526        if (buffer.length() != 0) {
1527            list.add(buffer.toString());
1528        }
1529        return list.toArray(EMPTY_STRING_ARRAY);
1530    }
1531
1532    /**
1533     * Returns '/' if given true, '\\' otherwise.
1534     *
1535     * @param unixSeparator which separator to return.
1536     * @return '/' if given true, '\\' otherwise.
1537     */
1538    private static char toSeparator(final boolean unixSeparator) {
1539        return unixSeparator ? UNIX_NAME_SEPARATOR : WINDOWS_NAME_SEPARATOR;
1540    }
1541
1542    /**
1543     * Checks a fileName to see if it matches the specified wildcard matcher,
1544     * always testing case-sensitive.
1545     * <p>
1546     * The wildcard matcher uses the characters '?' and '*' to represent a
1547     * single or multiple (zero or more) wildcard characters.
1548     * This is the same as often found on DOS/Unix command lines.
1549     * The check is case-sensitive always.
1550     * <pre>
1551     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1552     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1553     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1554     * wildcardMatch("c.txt", "*.???")      --&gt; true
1555     * wildcardMatch("c.txt", "*.????")     --&gt; false
1556     * </pre>
1557     * The sequence "*?" does not work properly at present in match strings.
1558     *
1559     * @param fileName  the file name to match on, may be null.
1560     * @param wildcardMatcher  the wildcard string to match against, may be null.
1561     * @return true if the file name matches the wildcard string.
1562     * @see IOCase#SENSITIVE
1563     */
1564    public static boolean wildcardMatch(final String fileName, final String wildcardMatcher) {
1565        return wildcardMatch(fileName, wildcardMatcher, IOCase.SENSITIVE);
1566    }
1567
1568    /**
1569     * Checks a fileName to see if it matches the specified wildcard matcher
1570     * allowing control over case-sensitivity.
1571     * <p>
1572     * The wildcard matcher uses the characters '?' and '*' to represent a
1573     * single or multiple (zero or more) wildcard characters.
1574     * The sequence "*?" does not work properly at present in match strings.
1575     *
1576     * @param fileName  the file name to match on, may be null.
1577     * @param wildcardMatcher  the wildcard string to match against, may be null.
1578     * @param ioCase  what case sensitivity rule to use, null means case-sensitive.
1579     * @return true if the file name matches the wildcard string.
1580     * @since 1.3
1581     */
1582    public static boolean wildcardMatch(final String fileName, final String wildcardMatcher, IOCase ioCase) {
1583        if (fileName == null && wildcardMatcher == null) {
1584            return true;
1585        }
1586        if (fileName == null || wildcardMatcher == null) {
1587            return false;
1588        }
1589        ioCase = IOCase.value(ioCase, IOCase.SENSITIVE);
1590        final String[] wcs = splitOnTokens(wildcardMatcher);
1591        boolean anyChars = false;
1592        int textIdx = 0;
1593        int wcsIdx = 0;
1594        final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
1595        // loop around a backtrack stack, to handle complex * matching
1596        do {
1597            if (!backtrack.isEmpty()) {
1598                final int[] array = backtrack.pop();
1599                wcsIdx = array[0];
1600                textIdx = array[1];
1601                anyChars = true;
1602            }
1603            // loop whilst tokens and text left to process
1604            while (wcsIdx < wcs.length) {
1605                if (wcs[wcsIdx].equals("?")) {
1606                    // ? so move to next text char
1607                    textIdx++;
1608                    if (textIdx > fileName.length()) {
1609                        break;
1610                    }
1611                    anyChars = false;
1612                } else if (wcs[wcsIdx].equals("*")) {
1613                    // set any chars status
1614                    anyChars = true;
1615                    if (wcsIdx == wcs.length - 1) {
1616                        textIdx = fileName.length();
1617                    }
1618                } else {
1619                    // matching text token
1620                    if (anyChars) {
1621                        // any chars then try to locate text token
1622                        textIdx = ioCase.checkIndexOf(fileName, textIdx, wcs[wcsIdx]);
1623                        if (textIdx == NOT_FOUND) {
1624                            // token not found
1625                            break;
1626                        }
1627                        final int repeat = ioCase.checkIndexOf(fileName, textIdx + 1, wcs[wcsIdx]);
1628                        if (repeat >= 0) {
1629                            backtrack.push(new int[] { wcsIdx, repeat });
1630                        }
1631                    } else if (!ioCase.checkRegionMatches(fileName, textIdx, wcs[wcsIdx])) {
1632                        // matching from current position
1633                        // couldn't match token
1634                        break;
1635                    }
1636                    // matched text token, move text index to end of matched token
1637                    textIdx += wcs[wcsIdx].length();
1638                    anyChars = false;
1639                }
1640                wcsIdx++;
1641            }
1642            // full match
1643            if (wcsIdx == wcs.length && textIdx == fileName.length()) {
1644                return true;
1645            }
1646        } while (!backtrack.isEmpty());
1647        return false;
1648    }
1649
1650    /**
1651     * Checks a fileName to see if it matches the specified wildcard matcher
1652     * using the case rules of the system.
1653     * <p>
1654     * The wildcard matcher uses the characters '?' and '*' to represent a
1655     * single or multiple (zero or more) wildcard characters.
1656     * This is the same as often found on DOS/Unix command lines.
1657     * The check is case-sensitive on Unix and case-insensitive on Windows.
1658     * <pre>
1659     * wildcardMatch("c.txt", "*.txt")      --&gt; true
1660     * wildcardMatch("c.txt", "*.jpg")      --&gt; false
1661     * wildcardMatch("a/b/c.txt", "a/b/*")  --&gt; true
1662     * wildcardMatch("c.txt", "*.???")      --&gt; true
1663     * wildcardMatch("c.txt", "*.????")     --&gt; false
1664     * </pre>
1665     * The sequence "*?" does not work properly at present in match strings.
1666     *
1667     * @param fileName  the file name to match on.
1668     * @param wildcardMatcher  the wildcard string to match against.
1669     * @return true if the file name matches the wildcard string.
1670     * @see IOCase#SYSTEM
1671     */
1672    public static boolean wildcardMatchOnSystem(final String fileName, final String wildcardMatcher) {
1673        return wildcardMatch(fileName, wildcardMatcher, IOCase.SYSTEM);
1674    }
1675
1676    /**
1677     * Instances should NOT be constructed in standard programming.
1678     *
1679     * @deprecated TODO Make private in 3.0.
1680     */
1681    @Deprecated
1682    public FilenameUtils() {
1683        // empty
1684    }
1685}