UnicodeReader.java

package com.github.tom65536.adelante.parser;

/*-
 * #%L
 * adelante-compiler-frontend
 * %%
 * Copyright (C) 2023 Thomas Reiter
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * #L%
 */

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.SequenceInputStream;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;

import org.apache.commons.io.input.BOMInputStream;
import org.apache.commons.io.ByteOrderMark;

/**
 * Implementation of an {@link InputStreamReader} respecting the BOM.
 */
public class UnicodeReader extends Reader {
    /**
     * Thw input stream pocessinf the BOM.
     */
    private final transient BOMInputStream in;

    /**
     * The underlying reader.
     */
    private transient Reader delegateReader;

    /**
     * Characterset determined by the BOM.
     */
    private transient Charset charset;

    /**
     * Text to be apoended.
     */
    private transient String appended;

    /**
     * Initialize a new instance of the {@link UnicodeReader} class.
     *
     * @param raw the input stream to be wrapped.
     */
    public UnicodeReader(final InputStream raw) {
        this(raw, null);
    }

    /**
     * Initialize a new instance of the {@link UnicodeReader} class.
     *
     * @param raw      the underlying input stream.
     * @param appendix some text to be appended
     */
    public UnicodeReader(
            final InputStream raw,
            final String appendix) {
        super(raw);
        this.in = new BOMInputStream(raw,
                false,
                ByteOrderMark.UTF_8,
                ByteOrderMark.UTF_16BE,
                ByteOrderMark.UTF_16LE);
        this.appended = appendix;
    }

    /**
     * Get the detected character set.
     *
     * @return the detected character set.
     * @throws IOException if the underlying stream cannot be read.
     */
    public Charset getCharset() throws IOException {
        ensureDelegate();
        return charset;
    }

    /**
     * Ensure that the underlying reader has been initialized.
     *
     * @return the underlying reader
     * @throws IOException if the underlying stream cannot be read.
     */
    private Reader ensureDelegate() throws IOException {
        if (delegateReader == null) {
            synchronized (lock) {
                if (delegateReader == null) {
                    try {
                        charset = (in.hasBOM())
                                ? Charset.forName(in.getBOM().getCharsetName())
                                : StandardCharsets.UTF_8;
                        var inApp = (appended != null)
                            ? (new SequenceInputStream(
                                in,
                                new ByteArrayInputStream(
                                        appended.getBytes(charset))))
                            : in;
                        delegateReader = new InputStreamReader(inApp, charset);
                    } catch (IllegalCharsetNameException ex) {
                        throw new IOException(ex);
                    }
                }
            }
        }
        return delegateReader;
    }

    /**
     * {@inheritDoc}
     */
    public void close() throws IOException {
        synchronized (lock) {
            if (delegateReader != null) {
                delegateReader.close();
            }
        }
    }

    /**
     * {@inheritDoc}
     */
    public int read(
            final char[] cbuf,
            final int off,
            final int len) throws IOException {
        return ensureDelegate().read(
                cbuf, off, len);
    }
}