diff options
author | Hilko Bengen <bengen@debian.org> | 2014-06-07 12:02:12 +0200 |
---|---|---|
committer | Hilko Bengen <bengen@debian.org> | 2014-06-07 12:02:12 +0200 |
commit | d5ed89b946297270ec28abf44bef2371a06f1f4f (patch) | |
tree | ce2d945e4dde69af90bd9905a70d8d27f4936776 /src/main/java/org/elasticsearch/common/io/UTF8StreamWriter.java | |
download | elasticsearch-d5ed89b946297270ec28abf44bef2371a06f1f4f.tar.gz |
Imported Upstream version 1.0.3upstream/1.0.3
Diffstat (limited to 'src/main/java/org/elasticsearch/common/io/UTF8StreamWriter.java')
-rw-r--r-- | src/main/java/org/elasticsearch/common/io/UTF8StreamWriter.java | 333 |
1 files changed, 333 insertions, 0 deletions
diff --git a/src/main/java/org/elasticsearch/common/io/UTF8StreamWriter.java b/src/main/java/org/elasticsearch/common/io/UTF8StreamWriter.java new file mode 100644 index 0000000..59e1698 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/io/UTF8StreamWriter.java @@ -0,0 +1,333 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.io; + +import java.io.CharConversionException; +import java.io.IOException; +import java.io.OutputStream; +import java.io.Writer; + + +/** + */ +public final class UTF8StreamWriter extends Writer { + + /** + * Holds the current output stream or <code>null</code> if closed. + */ + private OutputStream _outputStream; + + /** + * Holds the bytes' buffer. + */ + private final byte[] _bytes; + + /** + * Holds the bytes buffer index. + */ + private int _index; + + /** + * Creates a UTF-8 writer having a byte buffer of moderate capacity (2048). + */ + public UTF8StreamWriter() { + _bytes = new byte[2048]; + } + + /** + * Creates a UTF-8 writer having a byte buffer of specified capacity. + * + * @param capacity the capacity of the byte buffer. + */ + public UTF8StreamWriter(int capacity) { + _bytes = new byte[capacity]; + } + + /** + * Sets the output stream to use for writing until this writer is closed. + * For example:[code] + * Writer writer = new UTF8StreamWriter().setOutputStream(out); + * [/code] is equivalent but writes faster than [code] + * Writer writer = new java.io.OutputStreamWriter(out, "UTF-8"); + * [/code] + * + * @param out the output stream. + * @return this UTF-8 writer. + * @throws IllegalStateException if this writer is being reused and + * it has not been {@link #close closed} or {@link #reset reset}. + */ + public UTF8StreamWriter setOutput(OutputStream out) { + if (_outputStream != null) + throw new IllegalStateException("Writer not closed or reset"); + _outputStream = out; + return this; + } + + /** + * Writes a single character. This method supports 16-bits + * character surrogates. + * + * @param c <code>char</code> the character to be written (possibly + * a surrogate). + * @throws IOException if an I/O error occurs. + */ + public void write(char c) throws IOException { + if ((c < 0xd800) || (c > 0xdfff)) { + write((int) c); + } else if (c < 0xdc00) { // High surrogate. + _highSurrogate = c; + } else { // Low surrogate. + int code = ((_highSurrogate - 0xd800) << 10) + (c - 0xdc00) + + 0x10000; + write(code); + } + } + + private char _highSurrogate; + + /** + * Writes a character given its 31-bits Unicode. + * + * @param code the 31 bits Unicode of the character to be written. + * @throws IOException if an I/O error occurs. + */ + public void write(int code) throws IOException { + if ((code & 0xffffff80) == 0) { + _bytes[_index] = (byte) code; + if (++_index >= _bytes.length) { + flushBuffer(); + } + } else { // Writes more than one byte. + write2(code); + } + } + + private void write2(int c) throws IOException { + if ((c & 0xfffff800) == 0) { // 2 bytes. + _bytes[_index] = (byte) (0xc0 | (c >> 6)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | (c & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + } else if ((c & 0xffff0000) == 0) { // 3 bytes. + _bytes[_index] = (byte) (0xe0 | (c >> 12)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | (c & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + } else if ((c & 0xff200000) == 0) { // 4 bytes. + _bytes[_index] = (byte) (0xf0 | (c >> 18)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | (c & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + } else if ((c & 0xf4000000) == 0) { // 5 bytes. + _bytes[_index] = (byte) (0xf8 | (c >> 24)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | ((c >> 18) & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | (c & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + } else if ((c & 0x80000000) == 0) { // 6 bytes. + _bytes[_index] = (byte) (0xfc | (c >> 30)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | ((c >> 24) & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | ((c >> 18) & 0x3f)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3F)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3F)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + _bytes[_index] = (byte) (0x80 | (c & 0x3F)); + if (++_index >= _bytes.length) { + flushBuffer(); + } + } else { + throw new CharConversionException("Illegal character U+" + + Integer.toHexString(c)); + } + } + + /** + * Writes a portion of an array of characters. + * + * @param cbuf the array of characters. + * @param off the offset from which to start writing characters. + * @param len the number of characters to write. + * @throws IOException if an I/O error occurs. + */ + public void write(char cbuf[], int off, int len) throws IOException { + final int off_plus_len = off + len; + for (int i = off; i < off_plus_len; ) { + char c = cbuf[i++]; + if (c < 0x80) { + _bytes[_index] = (byte) c; + if (++_index >= _bytes.length) { + flushBuffer(); + } + } else { + write(c); + } + } + } + + /** + * Writes a portion of a string. + * + * @param str a String. + * @param off the offset from which to start writing characters. + * @param len the number of characters to write. + * @throws IOException if an I/O error occurs + */ + public void write(String str, int off, int len) throws IOException { + final int off_plus_len = off + len; + for (int i = off; i < off_plus_len; ) { + char c = str.charAt(i++); + if (c < 0x80) { + _bytes[_index] = (byte) c; + if (++_index >= _bytes.length) { + flushBuffer(); + } + } else { + write(c); + } + } + } + + /** + * Writes the specified character sequence. + * + * @param csq the character sequence. + * @throws IOException if an I/O error occurs + */ + public void write(CharSequence csq) throws IOException { + final int length = csq.length(); + for (int i = 0; i < length; ) { + char c = csq.charAt(i++); + if (c < 0x80) { + _bytes[_index] = (byte) c; + if (++_index >= _bytes.length) { + flushBuffer(); + } + } else { + write(c); + } + } + } + + /** + * Flushes the stream. If the stream has saved any characters from the + * various write() methods in a buffer, write them immediately to their + * intended destination. Then, if that destination is another character or + * byte stream, flush it. Thus one flush() invocation will flush all the + * buffers in a chain of Writers and OutputStreams. + * + * @throws IOException if an I/O error occurs. + */ + public void flush() throws IOException { + flushBuffer(); + _outputStream.flush(); + } + + /** + * Closes and {@link #reset resets} this writer for reuse. + * + * @throws IOException if an I/O error occurs + */ + public void close() throws IOException { + if (_outputStream != null) { + flushBuffer(); + _outputStream.close(); + reset(); + } + } + + /** + * Flushes the internal bytes buffer. + * + * @throws IOException if an I/O error occurs + */ + private void flushBuffer() throws IOException { + if (_outputStream == null) + throw new IOException("Stream closed"); + _outputStream.write(_bytes, 0, _index); + _index = 0; + } + + // Implements Reusable. + public void reset() { + _highSurrogate = 0; + _index = 0; + _outputStream = null; + } + + /** + * @deprecated Replaced by {@link #setOutput(OutputStream)} + */ + public UTF8StreamWriter setOutputStream(OutputStream out) { + return this.setOutput(out); + } +}
\ No newline at end of file |