View Javadoc

1   package com.github.tom65536.adelante.parser;
2   
3   /*-
4    * #%L
5    * adelante-compiler-frontend
6    * %%
7    * Copyright (C) 2023 Thomas Reiter
8    * %%
9    * This program is free software: you can redistribute it and/or modify
10   * it under the terms of the GNU Affero General Public License as published by
11   * the Free Software Foundation, either version 3 of the License, or
12   * (at your option) any later version.
13   * 
14   * This program is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU General Public License for more details.
18   * 
19   * You should have received a copy of the GNU Affero General Public License
20   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21   * #L%
22   */
23  
24  import java.io.ByteArrayInputStream;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.io.InputStreamReader;
28  import java.io.Reader;
29  import java.io.SequenceInputStream;
30  import java.nio.charset.Charset;
31  import java.nio.charset.IllegalCharsetNameException;
32  import java.nio.charset.StandardCharsets;
33  
34  import org.apache.commons.io.input.BOMInputStream;
35  import org.apache.commons.io.ByteOrderMark;
36  
37  /**
38   * Implementation of an {@link InputStreamReader} respecting the BOM.
39   */
40  public class UnicodeReader extends Reader {
41      /**
42       * Thw input stream pocessinf the BOM.
43       */
44      private final transient BOMInputStream in;
45  
46      /**
47       * The underlying reader.
48       */
49      private transient Reader delegateReader;
50  
51      /**
52       * Characterset determined by the BOM.
53       */
54      private transient Charset charset;
55  
56      /**
57       * Text to be apoended.
58       */
59      private transient String appended;
60  
61      /**
62       * Initialize a new instance of the {@link UnicodeReader} class.
63       *
64       * @param raw the input stream to be wrapped.
65       */
66      public UnicodeReader(final InputStream raw) {
67          this(raw, null);
68      }
69  
70      /**
71       * Initialize a new instance of the {@link UnicodeReader} class.
72       *
73       * @param raw      the underlying input stream.
74       * @param appendix some text to be appended
75       */
76      public UnicodeReader(
77              final InputStream raw,
78              final String appendix) {
79          super(raw);
80          this.in = new BOMInputStream(raw,
81                  false,
82                  ByteOrderMark.UTF_8,
83                  ByteOrderMark.UTF_16BE,
84                  ByteOrderMark.UTF_16LE);
85          this.appended = appendix;
86      }
87  
88      /**
89       * Get the detected character set.
90       *
91       * @return the detected character set.
92       * @throws IOException if the underlying stream cannot be read.
93       */
94      public Charset getCharset() throws IOException {
95          ensureDelegate();
96          return charset;
97      }
98  
99      /**
100      * Ensure that the underlying reader has been initialized.
101      *
102      * @return the underlying reader
103      * @throws IOException if the underlying stream cannot be read.
104      */
105     private Reader ensureDelegate() throws IOException {
106         if (delegateReader == null) {
107             synchronized (lock) {
108                 if (delegateReader == null) {
109                     try {
110                         charset = (in.hasBOM())
111                                 ? Charset.forName(in.getBOM().getCharsetName())
112                                 : StandardCharsets.UTF_8;
113                         var inApp = (appended != null)
114                             ? (new SequenceInputStream(
115                                 in,
116                                 new ByteArrayInputStream(
117                                         appended.getBytes(charset))))
118                             : in;
119                         delegateReader = new InputStreamReader(inApp, charset);
120                     } catch (IllegalCharsetNameException ex) {
121                         throw new IOException(ex);
122                     }
123                 }
124             }
125         }
126         return delegateReader;
127     }
128 
129     /**
130      * {@inheritDoc}
131      */
132     public void close() throws IOException {
133         synchronized (lock) {
134             if (delegateReader != null) {
135                 delegateReader.close();
136             }
137         }
138     }
139 
140     /**
141      * {@inheritDoc}
142      */
143     public int read(
144             final char[] cbuf,
145             final int off,
146             final int len) throws IOException {
147         return ensureDelegate().read(
148                 cbuf, off, len);
149     }
150 }