diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2021-02-25 10:36:43 -0500 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2021-03-05 16:34:52 -0500 |
commit | c4089574242a027db1655f0ee15eac9953fe9993 (patch) | |
tree | c7576090a317f5cb915c4a6407393ca458d4ec4b /src/ext_depends/D-YAML/source/dyaml | |
parent | external & build dependences in src tree (diff) |
cleaning
Diffstat (limited to 'src/ext_depends/D-YAML/source/dyaml')
-rw-r--r-- | src/ext_depends/D-YAML/source/dyaml/tinyendian.d | 213 |
1 files changed, 0 insertions, 213 deletions
diff --git a/src/ext_depends/D-YAML/source/dyaml/tinyendian.d b/src/ext_depends/D-YAML/source/dyaml/tinyendian.d deleted file mode 100644 index 731b048..0000000 --- a/src/ext_depends/D-YAML/source/dyaml/tinyendian.d +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright Ferdinand Majerech 2014. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/// A minimal library providing functionality for changing the endianness of data. -module tinyendian; - -import std.system : Endian, endian; - -/// Unicode UTF encodings. -enum UTFEncoding : ubyte -{ - UTF_8, - UTF_16, - UTF_32 -} -/// -@safe unittest -{ - const ints = [314, -101]; - int[2] intsSwapBuffer = ints; - swapByteOrder(intsSwapBuffer[]); - swapByteOrder(intsSwapBuffer[]); - assert(ints == intsSwapBuffer, "Lost information when swapping byte order"); - - const floats = [3.14f, 10.1f]; - float[2] floatsSwapBuffer = floats; - swapByteOrder(floatsSwapBuffer[]); - swapByteOrder(floatsSwapBuffer[]); - assert(floats == floatsSwapBuffer, "Lost information when swapping byte order"); -} - -/** Swap byte order of items in an array in place. - * - * Params: - * - * T = Item type. Must be either 2 or 4 bytes long. - * array = Buffer with values to fix byte order of. - */ -void swapByteOrder(T)(T[] array) @trusted @nogc pure nothrow -if (T.sizeof == 2 || T.sizeof == 4) -{ - // Swap the byte order of all read characters. - foreach (ref item; array) - { - static if (T.sizeof == 2) - { - import std.algorithm.mutation : swap; - swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1)); - } - else static if (T.sizeof == 4) - { - import core.bitop : bswap; - const swapped = bswap(*cast(uint*)&item); - item = *cast(const(T)*)&swapped; - } - else static assert(false, "Unsupported T: " ~ T.stringof); - } -} - -/// See fixUTFByteOrder. -struct FixUTFByteOrderResult -{ - ubyte[] array; - UTFEncoding encoding; - Endian endian; - uint bytesStripped = 0; -} - -/** Convert byte order of an array encoded in UTF(8/16/32) to system endianness in place. - * - * Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM - * at the beginning of array, UTF-8 is assumed (this is compatible with ASCII). The - * BOM, if any, will be removed from the buffer. - * - * If the encoding is determined to be UTF-16 or UTF-32 and there aren't enough bytes - * for the last code unit (i.e. if array.length is odd for UTF-16 or not divisible by - * 4 for UTF-32), the extra bytes (1 for UTF-16, 1-3 for UTF-32) are stripped. - * - * Note that this function does $(B not) check if the array is a valid UTF string. It - * only works with the BOM and 1,2 or 4-byte items. - * - * Params: - * - * array = The array with UTF-data. - * - * Returns: - * - * A struct with the following members: - * - * $(D ubyte[] array) A slice of the input array containing data in correct - * byte order, without BOM and in case of UTF-16/UTF-32, - * without stripped bytes, if any. - * $(D UTFEncoding encoding) Encoding of the result (UTF-8, UTF-16 or UTF-32) - * $(D std.system.Endian endian) Endianness of the original array. - * $(D uint bytesStripped) Number of bytes stripped from a UTF-16/UTF-32 array, if - * any. This is non-zero only if array.length was not - * divisible by 2 or 4 for UTF-16 and UTF-32, respectively. - * - * Complexity: (BIGOH array.length) - */ -auto fixUTFByteOrder(ubyte[] array) @safe @nogc pure nothrow -{ - // Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian. - enum BOM: ubyte - { - UTF_8 = 0, - UTF_16_LE = 1, - UTF_16_BE = 2, - UTF_32_LE = 3, - UTF_32_BE = 4, - None = ubyte.max - } - - // These 2 are from std.stream - static immutable ubyte[][5] byteOrderMarks = [ [0xEF, 0xBB, 0xBF], - [0xFF, 0xFE], - [0xFE, 0xFF], - [0xFF, 0xFE, 0x00, 0x00], - [0x00, 0x00, 0xFE, 0xFF] ]; - static immutable Endian[5] bomEndian = [ endian, - Endian.littleEndian, - Endian.bigEndian, - Endian.littleEndian, - Endian.bigEndian ]; - - // Documented in function ddoc. - - FixUTFByteOrderResult result; - - // Detect BOM, if any, in the bytes we've read. -1 means no BOM. - // Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we - // used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM. - import std.algorithm.searching : startsWith; - BOM bomId = BOM.None; - foreach (i, bom; byteOrderMarks) - if (array.startsWith(bom)) - bomId = cast(BOM)i; - - result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init; - - // Start of UTF data (after BOM, if any) - size_t start = 0; - // If we've read more than just the BOM, put the rest into the array. - with(BOM) final switch(bomId) - { - case None: result.encoding = UTFEncoding.UTF_8; break; - case UTF_8: - start = 3; - result.encoding = UTFEncoding.UTF_8; - break; - case UTF_16_LE, UTF_16_BE: - result.bytesStripped = array.length % 2; - start = 2; - result.encoding = UTFEncoding.UTF_16; - break; - case UTF_32_LE, UTF_32_BE: - result.bytesStripped = array.length % 4; - start = 4; - result.encoding = UTFEncoding.UTF_32; - break; - } - - // If there's a BOM, we need to move data back to ensure it starts at array[0] - if (start != 0) - { - array = array[start .. $ - result.bytesStripped]; - } - - // We enforce above that array.length is divisible by 2/4 for UTF-16/32 - if (endian != result.endian) - { - if (result.encoding == UTFEncoding.UTF_16) - swapByteOrder(cast(wchar[])array); - else if (result.encoding == UTFEncoding.UTF_32) - swapByteOrder(cast(dchar[])array); - } - - result.array = array; - return result; -} -/// -@safe unittest -{ - { - ubyte[] s = [0xEF, 0xBB, 0xBF, 'a']; - FixUTFByteOrderResult r = fixUTFByteOrder(s); - assert(r.encoding == UTFEncoding.UTF_8); - assert(r.array.length == 1); - assert(r.array == ['a']); - assert(r.endian == Endian.littleEndian); - } - - { - ubyte[] s = ['a']; - FixUTFByteOrderResult r = fixUTFByteOrder(s); - assert(r.encoding == UTFEncoding.UTF_8); - assert(r.array.length == 1); - assert(r.array == ['a']); - assert(r.endian == Endian.bigEndian); - } - - { - // strip 'a' b/c not complete unit - ubyte[] s = [0xFE, 0xFF, 'a']; - FixUTFByteOrderResult r = fixUTFByteOrder(s); - assert(r.encoding == UTFEncoding.UTF_16); - assert(r.array.length == 0); - assert(r.endian == Endian.bigEndian); - } - -} |