diff options
| author | Ralph Amissah <ralph.amissah@gmail.com> | 2021-02-19 17:10:51 -0500 | 
|---|---|---|
| committer | Ralph Amissah <ralph.amissah@gmail.com> | 2021-02-24 16:46:47 -0500 | 
| commit | 02ca32ae0a5bc290918d2b2a3288e385b9cc6b11 (patch) | |
| tree | 06379785e8a0165a7deb981c2eba362894820634 /src/ext_depends/tinyendian/source | |
| parent | build from static source-tree pre fetch depends (diff) | |
external & build dependences in src tree
- external & build dependences boost licensed
  - ext_depends (external depends)
    - D-YAML
      - tinyendian
    - d2sqlite3
    - imageformats
  - build_depends
    - dub2nix
Diffstat (limited to 'src/ext_depends/tinyendian/source')
| -rw-r--r-- | src/ext_depends/tinyendian/source/tinyendian.d | 213 | 
1 files changed, 213 insertions, 0 deletions
| diff --git a/src/ext_depends/tinyendian/source/tinyendian.d b/src/ext_depends/tinyendian/source/tinyendian.d new file mode 100644 index 0000000..731b048 --- /dev/null +++ b/src/ext_depends/tinyendian/source/tinyendian.d @@ -0,0 +1,213 @@ +//          Copyright Ferdinand Majerech 2014. +// Distributed under the Boost Software License, Version 1.0. +//    (See accompanying file LICENSE_1_0.txt or copy at +//          http://www.boost.org/LICENSE_1_0.txt) + +/// A minimal library providing functionality for changing the endianness of data. +module tinyendian; + +import std.system : Endian, endian; + +/// Unicode UTF encodings. +enum UTFEncoding : ubyte +{ +    UTF_8, +    UTF_16, +    UTF_32 +} +/// +@safe unittest +{ +    const ints = [314, -101]; +    int[2] intsSwapBuffer = ints; +    swapByteOrder(intsSwapBuffer[]); +    swapByteOrder(intsSwapBuffer[]); +    assert(ints == intsSwapBuffer, "Lost information when swapping byte order"); + +    const floats = [3.14f, 10.1f]; +    float[2] floatsSwapBuffer = floats; +    swapByteOrder(floatsSwapBuffer[]); +    swapByteOrder(floatsSwapBuffer[]); +    assert(floats == floatsSwapBuffer, "Lost information when swapping byte order"); +} + +/** Swap byte order of items in an array in place. + * + * Params: + * + * T     = Item type. Must be either 2 or 4 bytes long. + * array = Buffer with values to fix byte order of. + */ +void swapByteOrder(T)(T[] array) @trusted @nogc pure nothrow +if (T.sizeof == 2 || T.sizeof == 4) +{ +    // Swap the byte order of all read characters. +    foreach (ref item; array) +    { +        static if (T.sizeof == 2) +        { +            import std.algorithm.mutation : swap; +            swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1)); +        } +        else static if (T.sizeof == 4) +        { +            import core.bitop : bswap; +            const swapped = bswap(*cast(uint*)&item); +            item = *cast(const(T)*)&swapped; +        } +        else static assert(false, "Unsupported T: " ~ T.stringof); +    } +} + +/// See fixUTFByteOrder. +struct FixUTFByteOrderResult +{ +    ubyte[] array; +    UTFEncoding encoding; +    Endian endian; +    uint bytesStripped = 0; +} + +/** Convert byte order of an array encoded in UTF(8/16/32) to system endianness in place. + * + * Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM + * at the beginning of array, UTF-8 is assumed (this is compatible with ASCII). The + * BOM, if any, will be removed from the buffer. + * + * If the encoding is determined to be UTF-16 or UTF-32 and there aren't enough bytes + * for the last code unit (i.e. if array.length is odd for UTF-16 or not divisible by + * 4 for UTF-32), the extra bytes (1 for UTF-16, 1-3 for UTF-32) are stripped. + * + * Note that this function does $(B not) check if the array is a valid UTF string. It + * only works with the BOM and 1,2 or 4-byte items. + * + * Params: + * + * array = The array with UTF-data. + * + * Returns: + * + * A struct with the following members: + * + * $(D ubyte[] array)            A slice of the input array containing data in correct + *                               byte order, without BOM and in case of UTF-16/UTF-32, + *                               without stripped bytes, if any. + * $(D UTFEncoding encoding)     Encoding of the result (UTF-8, UTF-16 or UTF-32) + * $(D std.system.Endian endian) Endianness of the original array. + * $(D uint bytesStripped)       Number of bytes stripped from a UTF-16/UTF-32 array, if + *                               any. This is non-zero only if array.length was not + *                               divisible by 2 or 4 for UTF-16 and UTF-32, respectively. + * + * Complexity: (BIGOH array.length) + */ +auto fixUTFByteOrder(ubyte[] array) @safe @nogc pure nothrow +{ +    // Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian. +    enum BOM: ubyte +    { +        UTF_8     = 0, +        UTF_16_LE = 1, +        UTF_16_BE = 2, +        UTF_32_LE = 3, +        UTF_32_BE = 4, +        None      = ubyte.max +    } + +    // These 2 are from std.stream +    static immutable ubyte[][5] byteOrderMarks = [ [0xEF, 0xBB, 0xBF], +                                                   [0xFF, 0xFE], +                                                   [0xFE, 0xFF], +                                                   [0xFF, 0xFE, 0x00, 0x00], +                                                   [0x00, 0x00, 0xFE, 0xFF] ]; +    static immutable Endian[5] bomEndian = [ endian, +                                             Endian.littleEndian, +                                             Endian.bigEndian, +                                             Endian.littleEndian,  +                                             Endian.bigEndian ]; + +    // Documented in function ddoc. + +    FixUTFByteOrderResult result; + +    // Detect BOM, if any, in the bytes we've read. -1 means no BOM. +    // Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we +    // used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM. +    import std.algorithm.searching : startsWith; +    BOM bomId = BOM.None; +    foreach (i, bom; byteOrderMarks) +        if (array.startsWith(bom)) +            bomId = cast(BOM)i; + +    result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init; + +    // Start of UTF data (after BOM, if any) +    size_t start = 0; +    // If we've read more than just the BOM, put the rest into the array. +    with(BOM) final switch(bomId) +    { +        case None: result.encoding = UTFEncoding.UTF_8; break; +        case UTF_8: +            start = 3; +            result.encoding = UTFEncoding.UTF_8; +            break; +        case UTF_16_LE, UTF_16_BE: +            result.bytesStripped = array.length % 2; +            start = 2; +            result.encoding = UTFEncoding.UTF_16; +            break; +        case UTF_32_LE, UTF_32_BE: +            result.bytesStripped = array.length % 4; +            start = 4; +            result.encoding = UTFEncoding.UTF_32; +            break; +    } + +    // If there's a BOM, we need to move data back to ensure it starts at array[0] +    if (start != 0) +    { +        array = array[start .. $  - result.bytesStripped]; +    } + +    // We enforce above that array.length is divisible by 2/4 for UTF-16/32 +    if (endian != result.endian) +    { +        if (result.encoding == UTFEncoding.UTF_16) +            swapByteOrder(cast(wchar[])array); +        else if (result.encoding == UTFEncoding.UTF_32) +            swapByteOrder(cast(dchar[])array); +    } + +    result.array = array; +    return result; +} +/// +@safe unittest +{ +    { +        ubyte[] s = [0xEF, 0xBB, 0xBF, 'a']; +        FixUTFByteOrderResult r = fixUTFByteOrder(s); +        assert(r.encoding == UTFEncoding.UTF_8); +        assert(r.array.length == 1); +        assert(r.array == ['a']); +        assert(r.endian == Endian.littleEndian); +    } + +    { +        ubyte[] s = ['a']; +        FixUTFByteOrderResult r = fixUTFByteOrder(s); +        assert(r.encoding == UTFEncoding.UTF_8); +        assert(r.array.length == 1); +        assert(r.array == ['a']); +        assert(r.endian == Endian.bigEndian); +    } + +    { +        // strip 'a' b/c not complete unit +        ubyte[] s = [0xFE, 0xFF, 'a']; +        FixUTFByteOrderResult r = fixUTFByteOrder(s); +        assert(r.encoding == UTFEncoding.UTF_16); +        assert(r.array.length == 0); +        assert(r.endian == Endian.bigEndian); +    } + +} | 
