001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload2.core;
018
019import java.io.IOException;
020import java.nio.charset.Charset;
021import java.nio.charset.StandardCharsets;
022import java.util.ArrayList;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Locale;
026import java.util.Map;
027import java.util.Objects;
028
029import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder;
030import org.apache.commons.io.IOUtils;
031
032/**
033 * High level API for processing file uploads.
034 * <p>
035 * This class handles multiple files per single HTML widget, sent using {@code multipart/mixed} encoding type, as specified by
036 * <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>. Use {@link #parseRequest(RequestContext)} to acquire a list of {@link FileItem}s associated with
037 * a given HTML widget.
038 * </p>
039 * <p>
040 * How the data for individual parts is stored is determined by the factory used to create them; a given part may be in memory, on disk, or somewhere else.
041 * </p>
042 *
043 * @param <R> The request context type.
044 * @param <I> The FileItem type.
045 * @param <F> the FileItemFactory type.
046 */
047public abstract class AbstractFileUpload<R, I extends FileItem<I>, F extends FileItemFactory<I>> {
048
049    /**
050     * Boundary parameter key.
051     */
052    private static final String BOUNDARY_KEY = "boundary";
053
054    /**
055     * Name parameter key.
056     */
057    private static final String NAME_KEY = "name";
058
059    /**
060     * File name parameter key.
061     */
062    private static final String FILENAME_KEY = "filename";
063
064    /**
065     * HTTP content type header name.
066     */
067    public static final String CONTENT_TYPE = "Content-type";
068
069    /**
070     * HTTP content disposition header name.
071     */
072    public static final String CONTENT_DISPOSITION = "Content-disposition";
073
074    /**
075     * HTTP content length header name.
076     */
077    public static final String CONTENT_LENGTH = "Content-length";
078
079    /**
080     * Content-disposition value for form data.
081     */
082    public static final String FORM_DATA = "form-data";
083
084    /**
085     * Content-disposition value for file attachment.
086     */
087    public static final String ATTACHMENT = "attachment";
088
089    /**
090     * Part of HTTP content type header.
091     */
092    public static final String MULTIPART = "multipart/";
093
094    /**
095     * HTTP content type header for multipart forms.
096     */
097    public static final String MULTIPART_FORM_DATA = "multipart/form-data";
098
099    /**
100     * HTTP content type header for multiple uploads.
101     */
102    public static final String MULTIPART_MIXED = "multipart/mixed";
103
104    /**
105     * Utility method that determines whether the request contains multipart content.
106     * <p>
107     * <strong>NOTE:</strong> This method will be moved to the {@code ServletFileUpload} class after the FileUpload 1.1 release. Unfortunately, since this
108     * method is static, it is not possible to provide its replacement until this method is removed.
109     * </p>
110     *
111     * @param ctx The request context to be evaluated. Must be non-null.
112     * @return {@code true} if the request is multipart; {@code false} otherwise.
113     */
114    public static final boolean isMultipartContent(final RequestContext ctx) {
115        final var contentType = ctx.getContentType();
116        if (contentType == null) {
117            return false;
118        }
119        return contentType.toLowerCase(Locale.ROOT).startsWith(MULTIPART);
120    }
121
122    /**
123     * The maximum size permitted for the complete request, as opposed to {@link #fileSizeMax}. A value of -1 indicates no maximum.
124     */
125    private long sizeMax = -1;
126
127    /**
128     * The maximum size permitted for a single uploaded file, as opposed to {@link #sizeMax}. A value of -1 indicates no maximum.
129     */
130    private long fileSizeMax = -1;
131
132    /**
133     * The maximum permitted number of files that may be uploaded in a single request. A value of -1 indicates no maximum.
134     */
135    private long fileCountMax = -1;
136
137    /**
138     * The content encoding to use when reading part headers.
139     */
140    private Charset headerCharset;
141
142    /**
143     * The progress listener.
144     */
145    private ProgressListener progressListener = ProgressListener.NOP;
146
147    /**
148     * The factory to use to create new form items.
149     */
150    private F fileItemFactory;
151
152    /**
153     * Constructs a new instance for subclasses.
154     */
155    public AbstractFileUpload() {
156        // empty
157    }
158
159    /**
160     * Gets the boundary from the {@code Content-type} header.
161     *
162     * @param contentType The value of the content type header from which to extract the boundary value.
163     * @return The boundary, as a byte array.
164     */
165    public byte[] getBoundary(final String contentType) {
166        final var parser = new ParameterParser();
167        parser.setLowerCaseNames(true);
168        // Parameter parser can handle null input
169        final var params = parser.parse(contentType, new char[] { ';', ',' });
170        final var boundaryStr = params.get(BOUNDARY_KEY);
171        return boundaryStr != null ? boundaryStr.getBytes(StandardCharsets.ISO_8859_1) : null;
172    }
173
174    /**
175     * Gets the field name from the {@code Content-disposition} header.
176     *
177     * @param headers A {@code Map} containing the HTTP request headers.
178     * @return The field name for the current {@code encapsulation}.
179     */
180    public String getFieldName(final FileItemHeaders headers) {
181        return getFieldName(headers.getHeader(CONTENT_DISPOSITION));
182    }
183
184    /**
185     * Gets the field name, which is given by the content-disposition header.
186     *
187     * @param contentDisposition The content-dispositions header value.
188     * @return The field name.
189     */
190    private String getFieldName(final String contentDisposition) {
191        String fieldName = null;
192        if (contentDisposition != null && contentDisposition.toLowerCase(Locale.ROOT).startsWith(FORM_DATA)) {
193            final var parser = new ParameterParser();
194            parser.setLowerCaseNames(true);
195            // Parameter parser can handle null input
196            final var params = parser.parse(contentDisposition, ';');
197            fieldName = params.get(NAME_KEY);
198            if (fieldName != null) {
199                fieldName = fieldName.trim();
200            }
201        }
202        return fieldName;
203    }
204
205    /**
206     * Gets the maximum number of files allowed in a single request.
207     *
208     * @return The maximum number of files allowed in a single request.
209     */
210    public long getFileCountMax() {
211        return fileCountMax;
212    }
213
214    /**
215     * Gets the factory class used when creating file items.
216     *
217     * @return The factory class for new file items.
218     */
219    public F getFileItemFactory() {
220        return fileItemFactory;
221    }
222
223    /**
224     * Gets the file name from the {@code Content-disposition} header.
225     *
226     * @param headers The HTTP headers object.
227     * @return The file name for the current {@code encapsulation}.
228     */
229    public String getFileName(final FileItemHeaders headers) {
230        return getFileName(headers.getHeader(CONTENT_DISPOSITION));
231    }
232
233    /**
234     * Gets the given content-disposition headers file name.
235     *
236     * @param contentDisposition The content-disposition headers value.
237     * @return The file name
238     */
239    private String getFileName(final String contentDisposition) {
240        String fileName = null;
241        if (contentDisposition != null) {
242            final var cdl = contentDisposition.toLowerCase(Locale.ROOT);
243            if (cdl.startsWith(FORM_DATA) || cdl.startsWith(ATTACHMENT)) {
244                final var parser = new ParameterParser();
245                parser.setLowerCaseNames(true);
246                // Parameter parser can handle null input
247                final var params = parser.parse(contentDisposition, ';');
248                if (params.containsKey(FILENAME_KEY)) {
249                    fileName = params.get(FILENAME_KEY);
250                    if (fileName != null) {
251                        fileName = fileName.trim();
252                    } else {
253                        // Even if there is no value, the parameter is present,
254                        // so we return an empty file name rather than no file
255                        // name.
256                        fileName = "";
257                    }
258                }
259            }
260        }
261        return fileName;
262    }
263
264    /**
265     * Gets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
266     *
267     * @see #setFileSizeMax(long)
268     * @return Maximum size of a single uploaded file.
269     */
270    public long getFileSizeMax() {
271        return fileSizeMax;
272    }
273
274    /**
275     * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the request encoding is used. If
276     * that is also not specified, or {@code null}, the platform default encoding is used.
277     *
278     * @return The encoding used to read part headers.
279     */
280    public Charset getHeaderCharset() {
281        return headerCharset;
282    }
283
284    /**
285     * Gets a file item iterator.
286     *
287     * @param request The servlet request to be parsed.
288     * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
289     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
290     * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
291     *                             uploaded content.
292     */
293    public abstract FileItemInputIterator getItemIterator(R request) throws FileUploadException, IOException;
294
295    /**
296     * Gets an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
297     *
298     * @param requestContext The context for the request to be parsed.
299     * @return An iterator to instances of {@code FileItemInput} parsed from the request, in the order that they were transmitted.
300     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
301     * @throws IOException         An I/O error occurred. This may be a network error while communicating with the client or a problem while storing the
302     *                             uploaded content.
303     */
304    public FileItemInputIterator getItemIterator(final RequestContext requestContext) throws FileUploadException, IOException {
305        return new FileItemInputIteratorImpl(this, requestContext);
306    }
307
308    /**
309     * Parses the {@code header-part} and returns as key/value pairs.
310     * <p>
311     * If there are multiple headers of the same names, the name will map to a comma-separated list containing the values.
312     * </p>
313     *
314     * @param headerPart The {@code header-part} of the current {@code encapsulation}.
315     * @return A {@code Map} containing the parsed HTTP request headers.
316     */
317    public FileItemHeaders getParsedHeaders(final String headerPart) {
318        final var len = headerPart.length();
319        final var headers = newFileItemHeaders();
320        var start = 0;
321        for (;;) {
322            var end = parseEndOfLine(headerPart, start);
323            if (start == end) {
324                break;
325            }
326            final var header = new StringBuilder(headerPart.substring(start, end));
327            start = end + 2;
328            while (start < len) {
329                var nonWs = start;
330                while (nonWs < len) {
331                    final var c = headerPart.charAt(nonWs);
332                    if (c != ' ' && c != '\t') {
333                        break;
334                    }
335                    ++nonWs;
336                }
337                if (nonWs == start) {
338                    break;
339                }
340                // Continuation line found
341                end = parseEndOfLine(headerPart, nonWs);
342                header.append(' ').append(headerPart, nonWs, end);
343                start = end + 2;
344            }
345            parseHeaderLine(headers, header.toString());
346        }
347        return headers;
348    }
349
350    /**
351     * Gets the progress listener.
352     *
353     * @return The progress listener, if any, or null.
354     */
355    public ProgressListener getProgressListener() {
356        return progressListener;
357    }
358
359    /**
360     * Gets the maximum allowed size of a complete request, as opposed to {@link #getFileSizeMax()}.
361     *
362     * @return The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
363     * @see #setSizeMax(long)
364     */
365    public long getSizeMax() {
366        return sizeMax;
367    }
368
369    /**
370     * Creates a new instance of {@link FileItemHeaders}.
371     *
372     * @return The new instance.
373     */
374    protected FileItemHeaders newFileItemHeaders() {
375        return AbstractFileItemBuilder.newFileItemHeaders();
376    }
377
378    /**
379     * Skips bytes until the end of the current line.
380     *
381     * @param headerPart The headers, which are being parsed.
382     * @param end        Index of the last byte, which has yet been processed.
383     * @return Index of the \r\n sequence, which indicates end of line.
384     */
385    private int parseEndOfLine(final String headerPart, final int end) {
386        var index = end;
387        for (;;) {
388            final var offset = headerPart.indexOf('\r', index);
389            if (offset == -1 || offset + 1 >= headerPart.length()) {
390                throw new IllegalStateException("Expected headers to be terminated by an empty line.");
391            }
392            if (headerPart.charAt(offset + 1) == '\n') {
393                return offset;
394            }
395            index = offset + 1;
396        }
397    }
398
399    /**
400     * Parses the next header line.
401     *
402     * @param headers String with all headers.
403     * @param header  Map where to store the current header.
404     */
405    private void parseHeaderLine(final FileItemHeaders headers, final String header) {
406        final var colonOffset = header.indexOf(':');
407        if (colonOffset == -1) {
408            // This header line is malformed, skip it.
409            return;
410        }
411        final var headerName = header.substring(0, colonOffset).trim();
412        final var headerValue = header.substring(colonOffset + 1).trim();
413        headers.addHeader(headerName, headerValue);
414    }
415
416    /**
417     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
418     *
419     * @param request The servlet request to be parsed.
420     * @return A map of {@code FileItem} instances parsed from the request.
421     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
422     */
423    public abstract Map<String, List<I>> parseParameterMap(R request) throws FileUploadException;
424
425    /**
426     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
427     *
428     * @param ctx The context for the request to be parsed.
429     * @return A map of {@code FileItem} instances parsed from the request.
430     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
431     */
432    public Map<String, List<I>> parseParameterMap(final RequestContext ctx) throws FileUploadException {
433        final var items = parseRequest(ctx);
434        final Map<String, List<I>> itemsMap = new HashMap<>(items.size());
435
436        for (final I fileItem : items) {
437            final var fieldName = fileItem.getFieldName();
438            final var mappedItems = itemsMap.computeIfAbsent(fieldName, k -> new ArrayList<>());
439            mappedItems.add(fileItem);
440        }
441
442        return itemsMap;
443    }
444
445    /**
446     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
447     *
448     * @param request The servlet request to be parsed.
449     * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
450     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
451     */
452    public abstract List<I> parseRequest(R request) throws FileUploadException;
453
454    /**
455     * Parses an <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a> compliant {@code multipart/form-data} stream.
456     *
457     * @param requestContext The context for the request to be parsed.
458     * @return A list of {@code FileItem} instances parsed from the request, in the order that they were transmitted.
459     * @throws FileUploadException if there are problems reading/parsing the request or storing files.
460     */
461    public List<I> parseRequest(final RequestContext requestContext) throws FileUploadException {
462        final List<I> itemList = new ArrayList<>();
463        var successful = false;
464        try {
465            final var fileItemFactory = Objects.requireNonNull(getFileItemFactory(), "No FileItemFactory has been set.");
466            final var buffer = new byte[IOUtils.DEFAULT_BUFFER_SIZE];
467            getItemIterator(requestContext).forEachRemaining(fileItemInput -> {
468                if (itemList.size() == fileCountMax) {
469                    // The next item will exceed the limit.
470                    throw new FileUploadFileCountLimitException(ATTACHMENT, getFileCountMax(), itemList.size());
471                }
472                // Don't use getName() here to prevent an InvalidFileNameException.
473                // @formatter:off
474                final var fileItem = fileItemFactory.fileItemBuilder()
475                    .setFieldName(fileItemInput.getFieldName())
476                    .setContentType(fileItemInput.getContentType())
477                    .setFormField(fileItemInput.isFormField())
478                    .setFileName(fileItemInput.getName())
479                    .setFileItemHeaders(fileItemInput.getHeaders())
480                    .get();
481                // @formatter:on
482                itemList.add(fileItem);
483                try (var inputStream = fileItemInput.getInputStream();
484                        var outputStream = fileItem.getOutputStream()) {
485                    IOUtils.copyLarge(inputStream, outputStream, buffer);
486                } catch (final FileUploadException e) {
487                    throw e;
488                } catch (final IOException e) {
489                    throw new FileUploadException(String.format("Processing of %s request failed. %s", MULTIPART_FORM_DATA, e.getMessage()), e);
490                }
491            });
492            successful = true;
493            return itemList;
494        } catch (final FileUploadException e) {
495            throw e;
496        } catch (final IOException e) {
497            throw new FileUploadException(e.getMessage(), e);
498        } finally {
499            if (!successful) {
500                for (final I fileItem : itemList) {
501                    try {
502                        fileItem.delete();
503                    } catch (final Exception ignored) {
504                        // ignored TODO perhaps add to tracker delete failure list somehow?
505                    }
506                }
507            }
508        }
509    }
510
511    /**
512     * Sets the maximum number of files allowed per request.
513     *
514     * @param fileCountMax The new limit. {@code -1} means no limit.
515     */
516    public void setFileCountMax(final long fileCountMax) {
517        this.fileCountMax = fileCountMax;
518    }
519
520    /**
521     * Sets the factory class to use when creating file items.
522     *
523     * @param factory The factory class for new file items.
524     */
525    public void setFileItemFactory(final F factory) {
526        this.fileItemFactory = factory;
527    }
528
529    /**
530     * Sets the maximum allowed size of a single uploaded file, as opposed to {@link #getSizeMax()}.
531     *
532     * @see #getFileSizeMax()
533     * @param fileSizeMax Maximum size of a single uploaded file.
534     */
535    public void setFileSizeMax(final long fileSizeMax) {
536        this.fileSizeMax = fileSizeMax;
537    }
538
539    /**
540     * Specifies the character encoding to be used when reading the headers of individual part. When not specified, or {@code null}, the request encoding is
541     * used. If that is also not specified, or {@code null}, the platform default encoding is used.
542     *
543     * @param headerCharset The encoding used to read part headers.
544     */
545    public void setHeaderCharset(final Charset headerCharset) {
546        this.headerCharset = headerCharset;
547    }
548
549    /**
550     * Sets the progress listener.
551     *
552     * @param progressListener The progress listener, if any. Defaults to null.
553     */
554    public void setProgressListener(final ProgressListener progressListener) {
555        this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP;
556    }
557
558    /**
559     * Sets the maximum allowed size of a complete request, as opposed to {@link #setFileSizeMax(long)}.
560     *
561     * @param sizeMax The maximum allowed size, in bytes. The default value of -1 indicates, that there is no limit.
562     * @see #getSizeMax()
563     */
564    public void setSizeMax(final long sizeMax) {
565        this.sizeMax = sizeMax;
566    }
567
568}