001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.UnsupportedEncodingException; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.charset.Charset; 026import java.nio.charset.CharsetEncoder; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.nio.file.StandardOpenOption; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.Collections; 034import java.util.List; 035 036import org.apache.commons.io.Charsets; 037import org.apache.commons.io.FileSystem; 038import org.apache.commons.io.StandardLineSeparator; 039import org.apache.commons.io.build.AbstractOrigin; 040import org.apache.commons.io.build.AbstractStreamBuilder; 041 042/** 043 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files. 044 * <p> 045 * To build an instance, see {@link Builder}. 046 * </p> 047 * 048 * @since 2.2 049 */ 050public class ReversedLinesFileReader implements Closeable { 051 052 /** 053 * Builds a new {@link ReversedLinesFileReader} instance. 054 * <p> 055 * For example: 056 * </p> 057 * <pre>{@code 058 * ReversedLinesFileReader r = ReversedLinesFileReader.builder() 059 * .setPath(path) 060 * .setBufferSize(4096) 061 * .setCharset(StandardCharsets.UTF_8) 062 * .get();} 063 * </pre> 064 * 065 * @since 2.12.0 066 */ 067 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> { 068 069 public Builder() { 070 setBufferSizeDefault(DEFAULT_BLOCK_SIZE); 071 setBufferSize(DEFAULT_BLOCK_SIZE); 072 } 073 074 /** 075 * Constructs a new instance. 076 * <p> 077 * This builder use the aspects Path, Charset, buffer size. 078 * </p> 079 * <p> 080 * You must provide an origin that can be converted to a Path by this builder, otherwise, this call will throw an 081 * {@link UnsupportedOperationException}. 082 * </p> 083 * 084 * @return a new instance. 085 * @throws UnsupportedOperationException if the origin cannot provide a Path. 086 * @see AbstractOrigin#getPath() 087 */ 088 @Override 089 public ReversedLinesFileReader get() throws IOException { 090 return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset()); 091 } 092 093 } 094 095 private class FilePart { 096 private final long no; 097 098 private final byte[] data; 099 100 private byte[] leftOver; 101 102 private int currentLastBytePos; 103 104 /** 105 * Constructs a new instance. 106 * 107 * @param no the part number 108 * @param length its length 109 * @param leftOverOfLastFilePart remainder 110 * @throws IOException if there is a problem reading the file 111 */ 112 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 113 this.no = no; 114 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 115 this.data = new byte[dataLength]; 116 final long off = (no - 1) * blockSize; 117 118 // read data 119 if (no > 0 /* file not empty */) { 120 channel.position(off); 121 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); 122 if (countRead != length) { 123 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 124 } 125 } 126 // copy left over part into data arr 127 if (leftOverOfLastFilePart != null) { 128 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 129 } 130 this.currentLastBytePos = data.length - 1; 131 this.leftOver = null; 132 } 133 134 /** 135 * Constructs the buffer containing any leftover bytes. 136 */ 137 private void createLeftOver() { 138 final int lineLengthBytes = currentLastBytePos + 1; 139 if (lineLengthBytes > 0) { 140 // create left over for next block 141 leftOver = Arrays.copyOf(data, lineLengthBytes); 142 } else { 143 leftOver = null; 144 } 145 currentLastBytePos = -1; 146 } 147 148 /** 149 * Finds the new-line sequence and return its length. 150 * 151 * @param data buffer to scan 152 * @param i start offset in buffer 153 * @return length of newline sequence or 0 if none found 154 */ 155 private int getNewLineMatchByteCount(final byte[] data, final int i) { 156 for (final byte[] newLineSequence : newLineSequences) { 157 boolean match = true; 158 for (int j = newLineSequence.length - 1; j >= 0; j--) { 159 final int k = i + j - (newLineSequence.length - 1); 160 match &= k >= 0 && data[k] == newLineSequence[j]; 161 } 162 if (match) { 163 return newLineSequence.length; 164 } 165 } 166 return 0; 167 } 168 169 /** 170 * Reads a line. 171 * 172 * @return the line or null 173 */ 174 private String readLine() { //NOPMD Bug in PMD 175 176 String line = null; 177 int newLineMatchByteCount; 178 179 final boolean isLastFilePart = no == 1; 180 181 int i = currentLastBytePos; 182 while (i > -1) { 183 184 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 185 // avoidNewlineSplitBuffer: for all except the last file part we 186 // take a few bytes to the next file part to avoid splitting of newlines 187 createLeftOver(); 188 break; // skip last few bytes and leave it to the next file part 189 } 190 191 // --- check for newline --- 192 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 193 final int lineStart = i + 1; 194 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 195 196 if (lineLengthBytes < 0) { 197 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes); 198 } 199 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes); 200 201 line = new String(lineData, charset); 202 203 currentLastBytePos = i - newLineMatchByteCount; 204 break; // found line 205 } 206 207 // --- move cursor --- 208 i -= byteDecrement; 209 210 // --- end of file part handling --- 211 if (i < 0) { 212 createLeftOver(); 213 break; // end of file part 214 } 215 } 216 217 // --- last file part handling --- 218 if (isLastFilePart && leftOver != null) { 219 // there will be no line break anymore, this is the first line of the file 220 line = new String(leftOver, charset); 221 leftOver = null; 222 } 223 224 return line; 225 } 226 227 /** 228 * Handles block rollover 229 * 230 * @return the new FilePart or null 231 * @throws IOException if there was a problem reading the file 232 */ 233 private FilePart rollOver() throws IOException { 234 235 if (currentLastBytePos > -1) { 236 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 237 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 238 } 239 240 if (no > 1) { 241 return new FilePart(no - 1, blockSize, leftOver); 242 } 243 // NO 1 was the last FilePart, we're finished 244 if (leftOver != null) { 245 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 246 + new String(leftOver, charset)); 247 } 248 return null; 249 } 250 } 251 252 private static final String EMPTY_STRING = ""; 253 254 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize(); 255 256 /** 257 * Constructs a new {@link Builder}. 258 * 259 * @return a new {@link Builder}. 260 * @since 2.12.0 261 */ 262 public static Builder builder() { 263 return new Builder(); 264 } 265 266 private final int blockSize; 267 private final Charset charset; 268 private final SeekableByteChannel channel; 269 private final long totalByteLength; 270 private final long totalBlockCount; 271 private final byte[][] newLineSequences; 272 private final int avoidNewlineSplitBufferSize; 273 private final int byteDecrement; 274 private FilePart currentFilePart; 275 private boolean trailingNewlineOfFileSkipped; 276 277 /** 278 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 279 * platform's default encoding. 280 * 281 * @param file the file to be read 282 * @throws IOException if an I/O error occurs. 283 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 284 */ 285 @Deprecated 286 public ReversedLinesFileReader(final File file) throws IOException { 287 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); 288 } 289 290 /** 291 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 292 * specified encoding. 293 * 294 * @param file the file to be read 295 * @param charset the charset to use, null uses the default Charset. 296 * @throws IOException if an I/O error occurs. 297 * @since 2.5 298 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 299 */ 300 @Deprecated 301 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 302 this(file.toPath(), charset); 303 } 304 305 /** 306 * Constructs a ReversedLinesFileReader with the given block size and encoding. 307 * 308 * @param file the file to be read 309 * @param blockSize size of the internal buffer (for ideal performance this 310 * should match with the block size of the underlying file 311 * system). 312 * @param charset the encoding of the file, null uses the default Charset. 313 * @throws IOException if an I/O error occurs. 314 * @since 2.3 315 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 316 */ 317 @Deprecated 318 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { 319 this(file.toPath(), blockSize, charset); 320 } 321 322 /** 323 * Constructs a ReversedLinesFileReader with the given block size and encoding. 324 * 325 * @param file the file to be read 326 * @param blockSize size of the internal buffer (for ideal performance this 327 * should match with the block size of the underlying file 328 * system). 329 * @param charsetName the encoding of the file, null uses the default Charset. 330 * @throws IOException if an I/O error occurs 331 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 332 * {@link UnsupportedEncodingException} 333 * in version 2.2 if the 334 * encoding is not 335 * supported. 336 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 337 */ 338 @Deprecated 339 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException { 340 this(file.toPath(), blockSize, charsetName); 341 } 342 343 /** 344 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 345 * specified encoding. 346 * 347 * @param file the file to be read 348 * @param charset the charset to use, null uses the default Charset. 349 * @throws IOException if an I/O error occurs. 350 * @since 2.7 351 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 352 */ 353 @Deprecated 354 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { 355 this(file, DEFAULT_BLOCK_SIZE, charset); 356 } 357 358 /** 359 * Constructs a ReversedLinesFileReader with the given block size and encoding. 360 * 361 * @param file the file to be read 362 * @param blockSize size of the internal buffer (for ideal performance this 363 * should match with the block size of the underlying file 364 * system). 365 * @param charset the encoding of the file, null uses the default Charset. 366 * @throws IOException if an I/O error occurs. 367 * @since 2.7 368 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 369 */ 370 @Deprecated 371 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException { 372 this.blockSize = blockSize; 373 this.charset = Charsets.toCharset(charset); 374 375 // --- check & prepare encoding --- 376 final CharsetEncoder charsetEncoder = this.charset.newEncoder(); 377 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 378 if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) { 379 // all one byte encodings are no problem 380 byteDecrement = 1; 381 } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 382 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 383 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 384 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 385 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 386 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 387 byteDecrement = 1; 388 } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) { 389 // UTF-16 new line sequences are not allowed as second tuple of four byte 390 // sequences, 391 // however byte order has to be specified 392 byteDecrement = 2; 393 } else if (this.charset == StandardCharsets.UTF_16) { 394 throw new UnsupportedEncodingException( 395 "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)"); 396 } else { 397 throw new UnsupportedEncodingException( 398 "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)"); 399 } 400 401 // NOTE: The new line sequences are matched in the order given, so it is 402 // important that \r\n is BEFORE \n 403 this.newLineSequences = new byte[][] { 404 StandardLineSeparator.CRLF.getBytes(this.charset), 405 StandardLineSeparator.LF.getBytes(this.charset), 406 StandardLineSeparator.CR.getBytes(this.charset) 407 }; 408 409 this.avoidNewlineSplitBufferSize = newLineSequences[0].length; 410 411 // Open file 412 this.channel = Files.newByteChannel(file, StandardOpenOption.READ); 413 this.totalByteLength = channel.size(); 414 int lastBlockLength = (int) (this.totalByteLength % blockSize); 415 if (lastBlockLength > 0) { 416 this.totalBlockCount = this.totalByteLength / blockSize + 1; 417 } else { 418 this.totalBlockCount = this.totalByteLength / blockSize; 419 if (this.totalByteLength > 0) { 420 lastBlockLength = blockSize; 421 } 422 } 423 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 424 425 } 426 427 /** 428 * Constructs a ReversedLinesFileReader with the given block size and encoding. 429 * 430 * @param file the file to be read 431 * @param blockSize size of the internal buffer (for ideal performance this 432 * should match with the block size of the underlying file 433 * system). 434 * @param charsetName the encoding of the file, null uses the default Charset. 435 * @throws IOException if an I/O error occurs 436 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 437 * {@link UnsupportedEncodingException} 438 * in version 2.2 if the 439 * encoding is not 440 * supported. 441 * @since 2.7 442 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 443 */ 444 @Deprecated 445 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { 446 this(file, blockSize, Charsets.toCharset(charsetName)); 447 } 448 449 /** 450 * Closes underlying resources. 451 * 452 * @throws IOException if an I/O error occurs. 453 */ 454 @Override 455 public void close() throws IOException { 456 channel.close(); 457 } 458 459 /** 460 * Returns the lines of the file from bottom to top. 461 * 462 * @return the next line or null if the start of the file is reached 463 * @throws IOException if an I/O error occurs. 464 */ 465 public String readLine() throws IOException { 466 467 String line = currentFilePart.readLine(); 468 while (line == null) { 469 currentFilePart = currentFilePart.rollOver(); 470 if (currentFilePart == null) { 471 // no more FileParts: we're done, leave line set to null 472 break; 473 } 474 line = currentFilePart.readLine(); 475 } 476 477 // aligned behavior with BufferedReader that doesn't return a last, empty line 478 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { 479 trailingNewlineOfFileSkipped = true; 480 line = readLine(); 481 } 482 483 return line; 484 } 485 486 /** 487 * Returns {@code lineCount} lines of the file from bottom to top. 488 * <p> 489 * If there are less than {@code lineCount} lines in the file, then that's what 490 * you get. 491 * </p> 492 * <p> 493 * Note: You can easily flip the result with {@link Collections#reverse(List)}. 494 * </p> 495 * 496 * @param lineCount How many lines to read. 497 * @return A new list 498 * @throws IOException if an I/O error occurs. 499 * @since 2.8.0 500 */ 501 public List<String> readLines(final int lineCount) throws IOException { 502 if (lineCount < 0) { 503 throw new IllegalArgumentException("lineCount < 0"); 504 } 505 final ArrayList<String> arrayList = new ArrayList<>(lineCount); 506 for (int i = 0; i < lineCount; i++) { 507 final String line = readLine(); 508 if (line == null) { 509 return arrayList; 510 } 511 arrayList.add(line); 512 } 513 return arrayList; 514 } 515 516 /** 517 * Returns the last {@code lineCount} lines of the file. 518 * <p> 519 * If there are less than {@code lineCount} lines in the file, then that's what 520 * you get. 521 * </p> 522 * 523 * @param lineCount How many lines to read. 524 * @return A String. 525 * @throws IOException if an I/O error occurs. 526 * @since 2.8.0 527 */ 528 public String toString(final int lineCount) throws IOException { 529 final List<String> lines = readLines(lineCount); 530 Collections.reverse(lines); 531 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator(); 532 } 533 534}