1 package com.github.tom65536.adelante.parser;
2
3 /*-
4 * #%L
5 * adelante-compiler-frontend
6 * %%
7 * Copyright (C) 2023 Thomas Reiter
8 * %%
9 * This program is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU Affero General Public License as published by
11 * the Free Software Foundation, either version 3 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU Affero General Public License
20 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * #L%
22 */
23
24 import java.io.ByteArrayInputStream;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.InputStreamReader;
28 import java.io.Reader;
29 import java.io.SequenceInputStream;
30 import java.nio.charset.Charset;
31 import java.nio.charset.IllegalCharsetNameException;
32 import java.nio.charset.StandardCharsets;
33
34 import org.apache.commons.io.input.BOMInputStream;
35 import org.apache.commons.io.ByteOrderMark;
36
37 /**
38 * Implementation of an {@link InputStreamReader} respecting the BOM.
39 */
40 public class UnicodeReader extends Reader {
41 /**
42 * Thw input stream pocessinf the BOM.
43 */
44 private final transient BOMInputStream in;
45
46 /**
47 * The underlying reader.
48 */
49 private transient Reader delegateReader;
50
51 /**
52 * Characterset determined by the BOM.
53 */
54 private transient Charset charset;
55
56 /**
57 * Text to be apoended.
58 */
59 private transient String appended;
60
61 /**
62 * Initialize a new instance of the {@link UnicodeReader} class.
63 *
64 * @param raw the input stream to be wrapped.
65 */
66 public UnicodeReader(final InputStream raw) {
67 this(raw, null);
68 }
69
70 /**
71 * Initialize a new instance of the {@link UnicodeReader} class.
72 *
73 * @param raw the underlying input stream.
74 * @param appendix some text to be appended
75 */
76 public UnicodeReader(
77 final InputStream raw,
78 final String appendix) {
79 super(raw);
80 this.in = new BOMInputStream(raw,
81 false,
82 ByteOrderMark.UTF_8,
83 ByteOrderMark.UTF_16BE,
84 ByteOrderMark.UTF_16LE);
85 this.appended = appendix;
86 }
87
88 /**
89 * Get the detected character set.
90 *
91 * @return the detected character set.
92 * @throws IOException if the underlying stream cannot be read.
93 */
94 public Charset getCharset() throws IOException {
95 ensureDelegate();
96 return charset;
97 }
98
99 /**
100 * Ensure that the underlying reader has been initialized.
101 *
102 * @return the underlying reader
103 * @throws IOException if the underlying stream cannot be read.
104 */
105 private Reader ensureDelegate() throws IOException {
106 if (delegateReader == null) {
107 synchronized (lock) {
108 if (delegateReader == null) {
109 try {
110 charset = (in.hasBOM())
111 ? Charset.forName(in.getBOM().getCharsetName())
112 : StandardCharsets.UTF_8;
113 var inApp = (appended != null)
114 ? (new SequenceInputStream(
115 in,
116 new ByteArrayInputStream(
117 appended.getBytes(charset))))
118 : in;
119 delegateReader = new InputStreamReader(inApp, charset);
120 } catch (IllegalCharsetNameException ex) {
121 throw new IOException(ex);
122 }
123 }
124 }
125 }
126 return delegateReader;
127 }
128
129 /**
130 * {@inheritDoc}
131 */
132 public void close() throws IOException {
133 synchronized (lock) {
134 if (delegateReader != null) {
135 delegateReader.close();
136 }
137 }
138 }
139
140 /**
141 * {@inheritDoc}
142 */
143 public int read(
144 final char[] cbuf,
145 final int off,
146 final int len) throws IOException {
147 return ensureDelegate().read(
148 cbuf, off, len);
149 }
150 }