001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io;
018
019import java.io.Serializable;
020import java.nio.charset.StandardCharsets;
021import java.util.Locale;
022import java.util.Objects;
023
024/**
025 * Byte Order Mark (BOM) representation. See {@link org.apache.commons.io.input.BOMInputStream}.
026 * <p>
027 * We define the follow BOM constants:
028 * </p>
029 * <ul>
030 * <li>{@link #UTF_16BE}</li>
031 * <li>{@link #UTF_16LE}</li>
032 * <li>{@link #UTF_32BE}</li>
033 * <li>{@link #UTF_32LE}</li>
034 * <li>{@link #UTF_8}</li>
035 * </ul>
036 * <h2>Deprecating Serialization</h2>
037 * <p>
038 * <em>Serialization is deprecated and will be removed in 3.0.</em>
039 * </p>
040 *
041 * @see org.apache.commons.io.input.BOMInputStream
042 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
043 * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
044 *      (Non-Normative)</a>
045 * @since 2.0
046 */
047public class ByteOrderMark implements Serializable {
048
049    private static final long serialVersionUID = 1L;
050
051    /**
052     * UTF-8 BOM.
053     * <p>
054     * This BOM is:
055     * </p>
056     * <pre>
057     * 0xEF 0xBB 0xBF
058     * </pre>
059     */
060    public static final ByteOrderMark UTF_8 = new ByteOrderMark(StandardCharsets.UTF_8.name(), 0xEF, 0xBB, 0xBF);
061
062    /**
063     * UTF-16BE BOM (Big-Endian).
064     * <p>
065     * This BOM is:
066     * </p>
067     * <pre>
068     * 0xFE 0xFF
069     * </pre>
070     */
071    public static final ByteOrderMark UTF_16BE = new ByteOrderMark(StandardCharsets.UTF_16BE.name(), 0xFE, 0xFF);
072
073    /**
074     * UTF-16LE BOM (Little-Endian).
075     * <p>
076     * This BOM is:
077     * </p>
078     * <pre>
079     * 0xFF 0xFE
080     * </pre>
081     */
082    public static final ByteOrderMark UTF_16LE = new ByteOrderMark(StandardCharsets.UTF_16LE.name(), 0xFF, 0xFE);
083
084    /**
085     * UTF-32BE BOM (Big-Endian).
086     * <p>
087     * This BOM is:
088     * </p>
089     * <pre>
090     * 0x00 0x00 0xFE 0xFF
091     * </pre>
092     *
093     * @since 2.2
094     */
095    public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
096
097    /**
098     * UTF-32LE BOM (Little-Endian).
099     * <p>
100     * This BOM is:
101     * </p>
102     * <pre>
103     * 0xFF 0xFE 0x00 0x00
104     * </pre>
105     *
106     * @since 2.2
107     */
108    public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
109
110    /**
111     * Unicode BOM character; external form depends on the encoding.
112     *
113     * @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a>
114     * @since 2.5
115     */
116    public static final char UTF_BOM = '\uFEFF';
117
118    private final String charsetName;
119    private final int[] bytes;
120
121    /**
122     * Constructs a new instance.
123     *
124     * @param charsetName The name of the charset the BOM represents
125     * @param bytes The BOM's bytes
126     * @throws IllegalArgumentException if the charsetName is zero length
127     * @throws IllegalArgumentException if the bytes are zero length
128     */
129    public ByteOrderMark(final String charsetName, final int... bytes) {
130        Objects.requireNonNull(charsetName, "charsetName");
131        Objects.requireNonNull(bytes, "bytes");
132        if (charsetName.isEmpty()) {
133            throw new IllegalArgumentException("No charsetName specified");
134        }
135        if (bytes.length == 0) {
136            throw new IllegalArgumentException("No bytes specified");
137        }
138        this.charsetName = charsetName;
139        this.bytes = bytes.clone();
140    }
141
142    /**
143     * Indicates if this instance's bytes equals another.
144     *
145     * @param obj The object to compare to
146     * @return true if the bom's bytes are equal, otherwise
147     * false
148     */
149    @Override
150    public boolean equals(final Object obj) {
151        if (!(obj instanceof ByteOrderMark)) {
152            return false;
153        }
154        final ByteOrderMark bom = (ByteOrderMark) obj;
155        if (bytes.length != bom.length()) {
156            return false;
157        }
158        for (int i = 0; i < bytes.length; i++) {
159            if (bytes[i] != bom.get(i)) {
160                return false;
161            }
162        }
163        return true;
164    }
165
166    /**
167     * Gets the byte at the specified position.
168     *
169     * @param pos The position
170     * @return The specified byte
171     */
172    public int get(final int pos) {
173        return bytes[pos];
174    }
175
176    /**
177     * Gets a copy of the BOM's bytes.
178     *
179     * @return a copy of the BOM's bytes
180     */
181    public byte[] getBytes() {
182        final byte[] copy = IOUtils.byteArray(bytes.length);
183        for (int i = 0; i < bytes.length; i++) {
184            copy[i] = (byte) bytes[i];
185        }
186        return copy;
187    }
188
189    /**
190     * Gets the name of the {@link java.nio.charset.Charset} the BOM represents.
191     *
192     * @return the character set name
193     */
194    public String getCharsetName() {
195        return charsetName;
196    }
197
198    /**
199     * Computes the hash code for this BOM.
200     *
201     * @return the hash code for this BOM.
202     * @see Object#hashCode()
203     */
204    @Override
205    public int hashCode() {
206        int hashCode = getClass().hashCode();
207        for (final int b : bytes) {
208            hashCode += b;
209        }
210        return hashCode;
211    }
212
213    /**
214     * Gets the length of the BOM's bytes.
215     *
216     * @return the length of the BOM's bytes
217     */
218    public int length() {
219        return bytes.length;
220    }
221
222    /**
223     * Converts this instance to a String representation of the BOM.
224     *
225     * @return the length of the BOM's bytes
226     */
227    @Override
228    public String toString() {
229        final StringBuilder builder = new StringBuilder();
230        builder.append(getClass().getSimpleName());
231        builder.append('[');
232        builder.append(charsetName);
233        builder.append(": ");
234        for (int i = 0; i < bytes.length; i++) {
235            if (i > 0) {
236                builder.append(",");
237            }
238            builder.append("0x");
239            builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT));
240        }
241        builder.append(']');
242        return builder.toString();
243    }
244
245}