001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.UnsupportedEncodingException;
023import java.nio.ByteBuffer;
024import java.nio.channels.SeekableByteChannel;
025import java.nio.charset.Charset;
026import java.nio.charset.CharsetEncoder;
027import java.nio.charset.StandardCharsets;
028import java.nio.file.Path;
029import java.nio.file.StandardOpenOption;
030import java.util.ArrayList;
031import java.util.Arrays;
032import java.util.Collections;
033import java.util.Iterator;
034import java.util.List;
035
036import org.apache.commons.io.Charsets;
037import org.apache.commons.io.FileSystem;
038import org.apache.commons.io.StandardLineSeparator;
039import org.apache.commons.io.build.AbstractStreamBuilder;
040import org.apache.commons.io.function.IOIterable;
041import org.apache.commons.io.function.IOIterator;
042
043/**
044 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
045 * <p>
046 * To build an instance, use {@link Builder}.
047 * </p>
048 * <p>
049 * For example:
050 * </p>
051 * <pre>
052 * <code>
053 * try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
054 *   .setPath(path)
055 *   .setBufferSize(4096)
056 *   .setCharset(StandardCharsets.UTF_8)
057 *   .get()) {
058 *      reader.forEach(line -&gt; System.out.println(line));
059 * }
060 * </code>
061 * </pre>
062 *
063 * @see Builder
064 * @since 2.2
065 */
066public class ReversedLinesFileReader implements Closeable, IOIterable<String> {
067
068    // @formatter:off
069    /**
070     * Builds a new {@link ReversedLinesFileReader}.
071     *
072     * <p>
073     * For example:
074     * </p>
075     * <pre>{@code
076     * ReversedLinesFileReader reader = ReversedLinesFileReader.builder()
077     *   .setPath(path)
078     *   .setBufferSize(4096)
079     *   .setCharset(StandardCharsets.UTF_8)
080     *   .get());}
081     * </pre>
082     *
083     * @see #get()
084     * @since 2.12.0
085     */
086    // @formatter:on
087    public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
088
089        /**
090         * Constructs a new builder of {@link ReversedLinesFileReader}.
091         */
092        public Builder() {
093            setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
094            setBufferSize(DEFAULT_BLOCK_SIZE);
095            setOpenOptions(StandardOpenOption.READ);
096        }
097
098        /**
099         * Builds a new {@link ReversedLinesFileReader}.
100         * <p>
101         * You must set an aspect that supports {@link #getInputStream()} on this builder, otherwise, this method throws an exception.
102         * </p>
103         * <p>
104         * This builder uses the following aspects:
105         * </p>
106         * <ul>
107         * <li>{@link #getPath()} gets the target aspect.</li>
108         * <li>{@link #getBufferSize()}</li>
109         * <li>{@link #getCharset()}</li>
110         * </ul>
111         *
112         * @return a new instance.
113         * @throws IllegalStateException         if the {@code origin} is {@code null}.
114         * @throws UnsupportedOperationException if the origin cannot be converted to a {@link Path}.
115         * @throws IOException                   if an I/O error occurs converting to a {@link Path} using {@link #getPath()}.
116         * @see #getPath()
117         * @see #getBufferSize()
118         * @see #getCharset()
119         * @see #getUnchecked()
120         */
121        @Override
122        public ReversedLinesFileReader get() throws IOException {
123            return new ReversedLinesFileReader(this);
124        }
125
126    }
127
128    private final class FilePart {
129        private final long partNumber;
130
131        private final byte[] data;
132
133        private byte[] leftOver;
134
135        private int currentLastBytePos;
136
137        /**
138         * Constructs a new instance.
139         *
140         * @param partNumber             the part number.
141         * @param length                 its length.
142         * @param leftOverOfLastFilePart remainder.
143         * @throws IOException if there is a problem reading the file.
144         */
145        private FilePart(final long partNumber, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
146            this.partNumber = partNumber;
147            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
148            this.data = new byte[dataLength];
149            final long off = (partNumber - 1) * blockSize;
150
151            // read data
152            if (partNumber > 0 /* file not empty */) {
153                channel.position(off);
154                final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
155                if (countRead != length) {
156                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
157                }
158            }
159            // copy left over part into data arr
160            if (leftOverOfLastFilePart != null) {
161                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
162            }
163            this.currentLastBytePos = data.length - 1;
164            this.leftOver = null;
165        }
166
167        /**
168         * Constructs the buffer containing any leftover bytes.
169         */
170        private void createLeftOver() {
171            final int lineLengthBytes = currentLastBytePos + 1;
172            if (lineLengthBytes > 0) {
173                // create left over for next block
174                leftOver = Arrays.copyOf(data, lineLengthBytes);
175            } else {
176                leftOver = null;
177            }
178            currentLastBytePos = -1;
179        }
180
181        /**
182         * Finds the new-line sequence and return its length.
183         *
184         * @param data buffer to scan.
185         * @param i    start offset in buffer.
186         * @return length of newline sequence or 0 if none found.
187         */
188        private int getNewLineMatchByteCount(final byte[] data, final int i) {
189            for (final byte[] newLineSequence : newLineSequences) {
190                boolean match = true;
191                for (int j = newLineSequence.length - 1; j >= 0; j--) {
192                    final int k = i + j - (newLineSequence.length - 1);
193                    match &= k >= 0 && data[k] == newLineSequence[j];
194                }
195                if (match) {
196                    return newLineSequence.length;
197                }
198            }
199            return 0;
200        }
201
202        /**
203         * Reads a line.
204         *
205         * @return the line or null.
206         */
207        private String readLine() { //NOPMD Bug in PMD
208
209            String line = null;
210            int newLineMatchByteCount;
211
212            final boolean isLastFilePart = partNumber == 1;
213
214            int i = currentLastBytePos;
215
216            if (i == -1 && isLastFilePart && leftOver != null) {
217                line = new String(leftOver, charset);
218                leftOver = null;
219                return line;
220            }
221
222            while (i > -1) {
223
224                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
225                    // avoidNewlineSplitBuffer: for all except the last file part we
226                    // take a few bytes to the next file part to avoid splitting of newlines
227                    createLeftOver();
228                    break; // skip last few bytes and leave it to the next file part
229                }
230
231                // check for newline
232                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
233                    final int lineStart = i + 1;
234                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;
235
236                    if (lineLengthBytes < 0) {
237                        throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
238                    }
239                    final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
240
241                    line = new String(lineData, charset);
242
243                    currentLastBytePos = i - newLineMatchByteCount;
244
245                    if (isLastFilePart && currentLastBytePos == -1 && i == 0) {
246                        leftOver = new byte[0];
247                    }
248                    break; // found line
249                }
250
251                // move cursor
252                i -= byteDecrement;
253
254                // end of file part handling
255                if (i < 0) {
256                    if (isLastFilePart) {
257                        final int lineLengthBytes = currentLastBytePos + 1;
258                        if (lineLengthBytes > 0) {
259                            final byte[] lineData = Arrays.copyOf(data, lineLengthBytes);
260                            line = new String(lineData, charset);
261                        }
262                        currentLastBytePos = -1;
263                    } else {
264                        createLeftOver();
265                    }
266                    break; // end of file part
267                }
268            }
269
270            // there will be partNumber line break anymore, this is the first line of the file
271            if (line == null && isLastFilePart && leftOver != null) {
272                line = new String(leftOver, charset);
273                leftOver = null;
274            }
275
276            return line;
277        }
278
279        /**
280         * Handles block rollover
281         *
282         * @return the new FilePart or null.
283         * @throws IOException if there was a problem reading the file.
284         */
285        private FilePart rollOver() throws IOException {
286
287            if (currentLastBytePos > -1) {
288                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
289                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
290            }
291
292            if (partNumber > 1) {
293                return new FilePart(partNumber - 1, blockSize, leftOver);
294            }
295            // NO 1 was the last FilePart, we're finished
296            if (leftOver != null) {
297                throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
298                        + new String(leftOver, charset));
299            }
300            return null;
301        }
302    }
303
304    private static final String EMPTY_STRING = "";
305
306    private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
307
308    /**
309     * Constructs a new {@link Builder}.
310     *
311     * @return a new {@link Builder}.
312     * @since 2.12.0
313     */
314    public static Builder builder() {
315        return new Builder();
316    }
317
318    private final int blockSize;
319    private final Charset charset;
320    private final SeekableByteChannel channel;
321    private final long totalByteLength;
322    private final long totalBlockCount;
323    private final byte[][] newLineSequences;
324    private final int avoidNewlineSplitBufferSize;
325    private final int byteDecrement;
326    private FilePart currentFilePart;
327    private boolean trailingNewlineOfFileSkipped;
328
329    private ReversedLinesFileReader(final Builder builder) throws IOException {
330        this.blockSize = builder.getBufferSize();
331        this.charset = Charsets.toCharset(builder.getCharset());
332        // check & prepare encoding
333        final CharsetEncoder charsetEncoder = charset.newEncoder();
334        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
335        if (maxBytesPerChar == 1f || charset == StandardCharsets.UTF_8) {
336            // all one byte encodings are partNumber problem
337            byteDecrement = 1;
338        } else if (charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
339                // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
340                charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
341                charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
342                charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
343                charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
344            byteDecrement = 1;
345        } else if (charset == StandardCharsets.UTF_16BE || charset == StandardCharsets.UTF_16LE) {
346            // UTF-16 new line sequences are not allowed as second tuple of four byte
347            // sequences,
348            // however byte order has to be specified
349            byteDecrement = 2;
350        } else if (charset == StandardCharsets.UTF_16) {
351            throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
352        } else {
353            throw new UnsupportedEncodingException("Encoding " + charset + " is not supported yet (feel free to submit a patch)");
354        }
355        // NOTE: The new line sequences are matched in the order given, so it is
356        // important that \r\n is BEFORE \n
357        this.newLineSequences = new byte[][] { StandardLineSeparator.CRLF.getBytes(charset), StandardLineSeparator.LF.getBytes(charset),
358                StandardLineSeparator.CR.getBytes(charset) };
359        this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
360        // Open file
361        this.channel = builder.getChannel(SeekableByteChannel.class);
362        this.totalByteLength = channel.size();
363        int lastBlockLength = (int) (totalByteLength % blockSize);
364        if (lastBlockLength > 0) {
365            this.totalBlockCount = totalByteLength / blockSize + 1;
366        } else {
367            this.totalBlockCount = totalByteLength / blockSize;
368            if (totalByteLength > 0) {
369                lastBlockLength = blockSize;
370            }
371        }
372        this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
373    }
374
375    /**
376     * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@linkplain Charset#defaultCharset() default charset}.
377     *
378     * @param file the file to be read.
379     * @throws IOException if an I/O error occurs.
380     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
381     */
382    @Deprecated
383    public ReversedLinesFileReader(final File file) throws IOException {
384        this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
385    }
386
387    /**
388     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
389     * specified encoding.
390     *
391     * @param file    the file to be read.
392     * @param charset the charset to use, null uses the default Charset.
393     * @throws IOException if an I/O error occurs.
394     * @since 2.5
395     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
396     */
397    @Deprecated
398    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
399        this(file.toPath(), charset);
400    }
401
402    /**
403     * Constructs a ReversedLinesFileReader with the given block size and encoding.
404     *
405     * @param file      the file to be read.
406     * @param blockSize size of the internal buffer (for ideal performance this
407     *                  should match with the block size of the underlying file
408     *                  system).
409     * @param charset  the encoding of the file, null uses the default Charset.
410     * @throws IOException if an I/O error occurs.
411     * @since 2.3
412     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
413     */
414    @Deprecated
415    public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
416        this(file.toPath(), blockSize, charset);
417    }
418
419    /**
420     * Constructs a ReversedLinesFileReader with the given block size and encoding.
421     *
422     * @param file      the file to be read.
423     * @param blockSize size of the internal buffer (for ideal performance this
424     *                  should match with the block size of the underlying file
425     *                  system).
426     * @param charsetName  the encoding of the file, null uses the default Charset.
427     * @throws IOException                                  if an I/O error occurs.
428     * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported.
429     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
430     */
431    @Deprecated
432    public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
433        this(file.toPath(), blockSize, charsetName);
434    }
435
436    /**
437     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
438     * specified encoding.
439     *
440     * @param file    the file to be read.
441     * @param charset the charset to use, null uses the default Charset.
442     * @throws IOException if an I/O error occurs.
443     * @since 2.7
444     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
445     */
446    @Deprecated
447    public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
448        this(file, DEFAULT_BLOCK_SIZE, charset);
449    }
450
451    /**
452     * Constructs a ReversedLinesFileReader with the given block size and encoding.
453     *
454     * @param file      the file to be read.
455     * @param blockSize size of the internal buffer (for ideal performance this
456     *                  should match with the block size of the underlying file
457     *                  system).
458     * @param charset  the encoding of the file, null uses the default Charset.
459     * @throws IOException if an I/O error occurs.
460     * @since 2.7
461     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
462     */
463    @Deprecated
464    public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
465        this(builder().setPath(file).setBufferSize(blockSize).setCharset(charset));
466    }
467
468    /**
469     * Constructs a ReversedLinesFileReader with the given block size and encoding.
470     *
471     * @param file        the file to be read.
472     * @param blockSize   size of the internal buffer (for ideal performance this
473     *                    should match with the block size of the underlying file
474     *                    system).
475     * @param charsetName the encoding of the file, null uses the default Charset.
476     * @throws IOException                                  if an I/O error occurs.
477     * @throws java.nio.charset.UnsupportedCharsetException if the encoding is not supported.
478     * @since 2.7
479     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
480     */
481    @Deprecated
482    public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
483        this(file, blockSize, Charsets.toCharset(charsetName));
484    }
485
486    /**
487     * Closes underlying resources.
488     *
489     * @throws IOException if an I/O error occurs.
490     */
491    @Override
492    public void close() throws IOException {
493        channel.close();
494    }
495
496    @Override
497    public IOIterator<String> iterator() {
498        return new IOIterator<String>() {
499
500            private String next;
501
502            @Override
503            public boolean hasNext() throws IOException {
504                if (next == null) {
505                    next = readLine();
506                }
507                return next != null;
508            }
509
510            @Override
511            public String next() throws IOException {
512                if (next == null) {
513                    next = readLine();
514                }
515                final String tmp = next;
516                next = null;
517                return tmp;
518            }
519
520            @Override
521            public Iterator<String> unwrap() {
522                return null;
523            }
524
525        };
526    }
527
528    /**
529     * Returns the lines of the file from bottom to top.
530     *
531     * @return the next line or null if the start of the file is reached.
532     * @throws IOException if an I/O error occurs.
533     */
534    public String readLine() throws IOException {
535        String line = currentFilePart.readLine();
536        while (line == null) {
537            currentFilePart = currentFilePart.rollOver();
538            if (currentFilePart == null) {
539                // partNumber more FileParts: we're done, leave line set to null
540                break;
541            }
542            line = currentFilePart.readLine();
543        }
544        // aligned behavior with BufferedReader that doesn't return a last, empty line
545        if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
546            trailingNewlineOfFileSkipped = true;
547            line = readLine();
548        }
549        return line;
550    }
551
552    /**
553     * Returns {@code lineCount} lines of the file from bottom to top.
554     * <p>
555     * If there are less than {@code lineCount} lines in the file, then that's what
556     * you get.
557     * </p>
558     * <p>
559     * Note: You can easily flip the result with {@link Collections#reverse(List)}.
560     * </p>
561     *
562     * @param lineCount How many lines to read.
563     * @return A new list.
564     * @throws IOException if an I/O error occurs.
565     * @since 2.8.0
566     */
567    public List<String> readLines(final int lineCount) throws IOException {
568        if (lineCount < 0) {
569            throw new IllegalArgumentException("lineCount < 0");
570        }
571        final ArrayList<String> arrayList = new ArrayList<>(lineCount);
572        for (int i = 0; i < lineCount; i++) {
573            final String line = readLine();
574            if (line == null) {
575                return arrayList;
576            }
577            arrayList.add(line);
578        }
579        return arrayList;
580    }
581
582    /**
583     * Returns the last {@code lineCount} lines of the file.
584     * <p>
585     * If there are less than {@code lineCount} lines in the file, then that's what
586     * you get.
587     * </p>
588     *
589     * @param lineCount How many lines to read.
590     * @return A String.
591     * @throws IOException if an I/O error occurs.
592     * @since 2.8.0
593     */
594    public String toString(final int lineCount) throws IOException {
595        final List<String> lines = readLines(lineCount);
596        Collections.reverse(lines);
597        return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
598    }
599
600    @Override
601    public Iterable<String> unwrap() {
602        return null;
603    }
604
605}