external & build dependences in src tree

- external & build dependences boost licensed - ext_depends (external depends) - D-YAML - tinyendian - d2sqlite3 - imageformats - build_depends - dub2nix
author: Ralph Amissah <ralph.amissah@gmail.com> 2021-02-19 17:10:51 -0500
committer: Ralph Amissah <ralph.amissah@gmail.com> 2021-02-24 16:46:47 -0500
commit: 02ca32ae0a5bc290918d2b2a3288e385b9cc6b11 (patch)
tree: 06379785e8a0165a7deb981c2eba362894820634 /src/ext_depends/D-YAML/source/dyaml/reader.d
parent: build from static source-tree pre fetch depends (diff)
1 files changed, 906 insertions, 0 deletions
diff --git a/src/ext_depends/D-YAML/source/dyaml/reader.d b/src/ext_depends/D-YAML/source/dyaml/reader.d
new file mode 100644
index 0000000..9fe42fc
--- /dev/null
+++ b/src/ext_depends/D-YAML/source/dyaml/reader.d
@@ -0,0 +1,906 @@
+
+//          Copyright Ferdinand Majerech 2011-2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+module dyaml.reader;
+
+
+import core.stdc.stdlib;
+import core.stdc.string;
+import core.thread;
+
+import std.algorithm;
+import std.array;
+import std.conv;
+import std.exception;
+import std.range;
+import std.string;
+import std.system;
+import std.typecons;
+import std.utf;
+
+import tinyendian;
+
+import dyaml.encoding;
+import dyaml.exception;
+
+alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029');
+
+package:
+
+
+///Exception thrown at Reader errors.
+class ReaderException : YAMLException
+{
+    this(string msg, string file = __FILE__, size_t line = __LINE__)
+        @safe pure nothrow
+    {
+        super("Reader error: " ~ msg, file, line);
+    }
+}
+
+/// Provides an API to read characters from a UTF-8 buffer and build slices into that
+/// buffer to avoid allocations (see SliceBuilder).
+final class Reader
+{
+    private:
+        // Buffer of currently loaded characters.
+        char[] buffer_;
+
+        // Current position within buffer. Only data after this position can be read.
+        size_t bufferOffset_;
+
+        // Index of the current character in the buffer.
+        size_t charIndex_;
+        // Number of characters (code points) in buffer_.
+        size_t characterCount_;
+
+        // File name
+        string name_;
+        // Current line in file.
+        uint line_;
+        // Current column in file.
+        uint column_;
+
+        // Original Unicode encoding of the data.
+        Encoding encoding_;
+
+        version(unittest)
+        {
+            // Endianness of the input before it was converted (for testing)
+            Endian endian_;
+        }
+
+        // The number of consecutive ASCII characters starting at bufferOffset_.
+        //
+        // Used to minimize UTF-8 decoding.
+        size_t upcomingASCII_;
+
+        // Index to buffer_ where the last decoded character starts.
+        size_t lastDecodedBufferOffset_;
+        // Offset, relative to charIndex_, of the last decoded character,
+        // in code points, not chars.
+        size_t lastDecodedCharOffset_;
+
+    public:
+        /// Construct a Reader.
+        ///
+        /// Params:  buffer = Buffer with YAML data. This may be e.g. the entire
+        ///                   contents of a file or a string. $(B will) be modified by
+        ///                   the Reader and other parts of D:YAML (D:YAML tries to
+        ///                   reuse the buffer to minimize memory allocations)
+        ///          name   = File name if the buffer is the contents of a file or
+        ///                   `"<unknown>"` if the buffer is the contents of a string.
+        ///
+        /// Throws:  ReaderException on a UTF decoding error or if there are
+        ///          nonprintable Unicode characters illegal in YAML.
+        this(ubyte[] buffer, string name = "<unknown>") @safe pure
+        {
+            name_ = name;
+            auto endianResult = fixUTFByteOrder(buffer);
+            if(endianResult.bytesStripped > 0)
+            {
+                throw new ReaderException("Size of UTF-16 or UTF-32 input not aligned " ~
+                                          "to 2 or 4 bytes, respectively");
+            }
+
+            version(unittest) { endian_ = endianResult.endian; }
+            encoding_ = endianResult.encoding;
+
+            auto utf8Result = toUTF8(endianResult.array, endianResult.encoding);
+            const msg = utf8Result.errorMessage;
+            if(msg !is null)
+            {
+                throw new ReaderException("Error when converting to UTF-8: " ~ msg);
+            }
+
+            buffer_ = utf8Result.utf8;
+
+            characterCount_ = utf8Result.characterCount;
+            // Check that all characters in buffer are printable.
+            enforce(isPrintableValidUTF8(buffer_),
+                    new ReaderException("Special unicode characters are not allowed"));
+
+            this.sliceBuilder = SliceBuilder(this);
+            checkASCII();
+        }
+
+        /// Get character at specified index relative to current position.
+        ///
+        /// Params:  index = Index of the character to get relative to current position
+        ///                  in the buffer. Can point outside of the buffer; In that
+        ///                  case, '\0' will be returned.
+        ///
+        /// Returns: Character at specified position or '\0' if outside of the buffer.
+        ///
+        // XXX removed; search for 'risky' to find why.
+        // Throws:  ReaderException if trying to read past the end of the buffer.
+        dchar peek(const size_t index) @safe pure
+        {
+            if(index < upcomingASCII_) { return buffer_[bufferOffset_ + index]; }
+            if(characterCount_ <= charIndex_ + index)
+            {
+                // XXX This is risky; revert this if bugs are introduced. We rely on
+                // the assumption that Reader only uses peek() to detect end of buffer.
+                // The test suite passes.
+                // Revert this case here and in other peek() versions if this causes
+                // errors.
+                // throw new ReaderException("Trying to read past the end of the buffer");
+                return '\0';
+            }
+
+            // Optimized path for Scanner code that peeks chars in linear order to
+            // determine the length of some sequence.
+            if(index == lastDecodedCharOffset_)
+            {
+                ++lastDecodedCharOffset_;
+                const char b = buffer_[lastDecodedBufferOffset_];
+                // ASCII
+                if(b < 0x80)
+                {
+                    ++lastDecodedBufferOffset_;
+                    return b;
+                }
+                return decode(buffer_, lastDecodedBufferOffset_);
+            }
+
+            // 'Slow' path where we decode everything up to the requested character.
+            const asciiToTake = min(upcomingASCII_, index);
+            lastDecodedCharOffset_   = asciiToTake;
+            lastDecodedBufferOffset_ = bufferOffset_ + asciiToTake;
+            dchar d;
+            while(lastDecodedCharOffset_ <= index)
+            {
+                d = decodeNext();
+            }
+
+            return d;
+        }
+
+        /// Optimized version of peek() for the case where peek index is 0.
+        dchar peek() @safe pure
+        {
+            if(upcomingASCII_ > 0)            { return buffer_[bufferOffset_]; }
+            if(characterCount_ <= charIndex_) { return '\0'; }
+
+            lastDecodedCharOffset_   = 0;
+            lastDecodedBufferOffset_ = bufferOffset_;
+            return decodeNext();
+        }
+
+        /// Get byte at specified index relative to current position.
+        ///
+        /// Params:  index = Index of the byte to get relative to current position
+        ///                  in the buffer. Can point outside of the buffer; In that
+        ///                  case, '\0' will be returned.
+        ///
+        /// Returns: Byte at specified position or '\0' if outside of the buffer.
+        char peekByte(const size_t index) @safe pure nothrow @nogc
+        {
+            return characterCount_ > (charIndex_ + index) ? buffer_[bufferOffset_ + index] : '\0';
+        }
+
+        /// Optimized version of peekByte() for the case where peek byte index is 0.
+        char peekByte() @safe pure nothrow @nogc
+        {
+            return characterCount_ > charIndex_ ? buffer_[bufferOffset_] : '\0';
+        }
+
+
+        /// Get specified number of characters starting at current position.
+        ///
+        /// Note: This gets only a "view" into the internal buffer, which will be
+        ///       invalidated after other Reader calls. Use SliceBuilder to build slices
+        ///       for permanent use.
+        ///
+        /// Params: length = Number of characters (code points, not bytes) to get. May
+        ///                  reach past the end of the buffer; in that case the returned
+        ///                  slice will be shorter.
+        ///
+        /// Returns: Characters starting at current position or an empty slice if out of bounds.
+        char[] prefix(const size_t length) @safe pure
+        {
+            return slice(length);
+        }
+
+        /// Get specified number of bytes, not code points, starting at current position.
+        ///
+        /// Note: This gets only a "view" into the internal buffer, which will be
+        ///       invalidated after other Reader calls. Use SliceBuilder to build slices
+        ///       for permanent use.
+        ///
+        /// Params: length = Number bytes (not code points) to get. May NOT reach past
+        ///                  the end of the buffer; should be used with peek() to avoid
+        ///                  this.
+        ///
+        /// Returns: Bytes starting at current position.
+        char[] prefixBytes(const size_t length) @safe pure nothrow @nogc
+        in(length == 0 || bufferOffset_ + length <= buffer_.length, "prefixBytes out of bounds")
+        {
+            return buffer_[bufferOffset_ .. bufferOffset_ + length];
+        }
+
+        /// Get a slice view of the internal buffer, starting at the current position.
+        ///
+        /// Note: This gets only a "view" into the internal buffer,
+        ///       which get invalidated after other Reader calls.
+        ///
+        /// Params:  end = End of the slice relative to current position. May reach past
+        ///                the end of the buffer; in that case the returned slice will
+        ///                be shorter.
+        ///
+        /// Returns: Slice into the internal buffer or an empty slice if out of bounds.
+        char[] slice(const size_t end) @safe pure
+        {
+            // Fast path in case the caller has already peek()ed all the way to end.
+            if(end == lastDecodedCharOffset_)
+            {
+                return buffer_[bufferOffset_ .. lastDecodedBufferOffset_];
+            }
+
+            const asciiToTake = min(upcomingASCII_, end, buffer_.length);
+            lastDecodedCharOffset_   = asciiToTake;
+            lastDecodedBufferOffset_ = bufferOffset_ + asciiToTake;
+
+            // 'Slow' path - decode everything up to end.
+            while(lastDecodedCharOffset_ < end &&
+                  lastDecodedBufferOffset_ < buffer_.length)
+            {
+                decodeNext();
+            }
+
+            return buffer_[bufferOffset_ .. lastDecodedBufferOffset_];
+        }
+
+        /// Get the next character, moving buffer position beyond it.
+        ///
+        /// Returns: Next character.
+        ///
+        /// Throws:  ReaderException if trying to read past the end of the buffer
+        ///          or if invalid data is read.
+        dchar get() @safe pure
+        {
+            const result = peek();
+            forward();
+            return result;
+        }
+
+        /// Get specified number of characters, moving buffer position beyond them.
+        ///
+        /// Params:  length = Number or characters (code points, not bytes) to get.
+        ///
+        /// Returns: Characters starting at current position.
+        char[] get(const size_t length) @safe pure
+        {
+            auto result = slice(length);
+            forward(length);
+            return result;
+        }
+
+        /// Move current position forward.
+        ///
+        /// Params:  length = Number of characters to move position forward.
+        void forward(size_t length) @safe pure
+        {
+            while(length > 0)
+            {
+                auto asciiToTake = min(upcomingASCII_, length);
+                charIndex_     += asciiToTake;
+                length         -= asciiToTake;
+                upcomingASCII_ -= asciiToTake;
+
+                for(; asciiToTake > 0; --asciiToTake)
+                {
+                    const c = buffer_[bufferOffset_++];
+                    // c is ASCII, do we only need to check for ASCII line breaks.
+                    if(c == '\n' || (c == '\r' && buffer_[bufferOffset_] != '\n'))
+                    {
+                        ++line_;
+                        column_ = 0;
+                        continue;
+                    }
+                    ++column_;
+                }
+
+                // If we have used up all upcoming ASCII chars, the next char is
+                // non-ASCII even after this returns, so upcomingASCII_ doesn't need to
+                // be updated - it's zero.
+                if(length == 0) { break; }
+
+                assert(upcomingASCII_ == 0,
+                       "Running unicode handling code but we haven't run out of ASCII chars");
+                assert(bufferOffset_ < buffer_.length,
+                       "Attempted to decode past the end of YAML buffer");
+                assert(buffer_[bufferOffset_] >= 0x80,
+                       "ASCII must be handled by preceding code");
+
+                ++charIndex_;
+                const c = decode(buffer_, bufferOffset_);
+
+                // New line. (can compare with '\n' without decoding since it's ASCII)
+                if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
+                {
+                    ++line_;
+                    column_ = 0;
+                }
+                else if(c != '\uFEFF') { ++column_; }
+                --length;
+                checkASCII();
+            }
+
+            lastDecodedBufferOffset_ = bufferOffset_;
+            lastDecodedCharOffset_ = 0;
+        }
+
+        /// Move current position forward by one character.
+        void forward() @safe pure
+        {
+            ++charIndex_;
+            lastDecodedBufferOffset_ = bufferOffset_;
+            lastDecodedCharOffset_ = 0;
+
+            // ASCII
+            if(upcomingASCII_ > 0)
+            {
+                --upcomingASCII_;
+                const c = buffer_[bufferOffset_++];
+
+                if(c == '\n' || (c == '\r' && buffer_[bufferOffset_] != '\n'))
+                {
+                    ++line_;
+                    column_ = 0;
+                    return;
+                }
+                ++column_;
+                return;
+            }
+
+            // UTF-8
+            assert(bufferOffset_ < buffer_.length,
+                   "Attempted to decode past the end of YAML buffer");
+            assert(buffer_[bufferOffset_] >= 0x80,
+                   "ASCII must be handled by preceding code");
+
+            const c = decode(buffer_, bufferOffset_);
+
+            // New line. (can compare with '\n' without decoding since it's ASCII)
+            if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n'))
+            {
+                ++line_;
+                column_ = 0;
+            }
+            else if(c != '\uFEFF') { ++column_; }
+
+            checkASCII();
+        }
+
+        /// Used to build slices of read data in Reader; to avoid allocations.
+        SliceBuilder sliceBuilder;
+
+        /// Get a string describing current buffer position, used for error messages.
+        Mark mark() const pure nothrow @nogc @safe { return Mark(name_, line_, column_); }
+
+        /// Get file name.
+        string name() const @safe pure nothrow @nogc { return name_; }
+
+        /// Get current line number.
+        uint line() const @safe pure nothrow @nogc { return line_; }
+
+        /// Get current column number.
+        uint column() const @safe pure nothrow @nogc { return column_; }
+
+        /// Get index of the current character in the buffer.
+        size_t charIndex() const @safe pure nothrow @nogc { return charIndex_; }
+
+        /// Get encoding of the input buffer.
+        Encoding encoding() const @safe pure nothrow @nogc { return encoding_; }
+
+private:
+        // Update upcomingASCII_ (should be called forward()ing over a UTF-8 sequence)
+        void checkASCII() @safe pure nothrow @nogc
+        {
+            upcomingASCII_ = countASCII(buffer_[bufferOffset_ .. $]);
+        }
+
+        // Decode the next character relative to
+        // lastDecodedCharOffset_/lastDecodedBufferOffset_ and update them.
+        //
+        // Does not advance the buffer position. Used in peek() and slice().
+        dchar decodeNext() @safe pure
+        {
+            assert(lastDecodedBufferOffset_ < buffer_.length,
+                   "Attempted to decode past the end of YAML buffer");
+            const char b = buffer_[lastDecodedBufferOffset_];
+            ++lastDecodedCharOffset_;
+            // ASCII
+            if(b < 0x80)
+            {
+                ++lastDecodedBufferOffset_;
+                return b;
+            }
+
+            return decode(buffer_, lastDecodedBufferOffset_);
+        }
+}
+
+/// Used to build slices of already read data in Reader buffer, avoiding allocations.
+///
+/// Usually these slices point to unchanged Reader data, but sometimes the data is
+/// changed due to how YAML interprets certain characters/strings.
+///
+/// See begin() documentation.
+struct SliceBuilder
+{
+private:
+    // No copying by the user.
+    @disable this(this);
+    @disable void opAssign(ref SliceBuilder);
+
+    // Reader this builder works in.
+    Reader reader_;
+
+    // Start of the slice om reader_.buffer_ (size_t.max while no slice being build)
+    size_t start_ = size_t.max;
+    // End of the slice om reader_.buffer_ (size_t.max while no slice being build)
+    size_t end_   = size_t.max;
+
+    // Stack of slice ends to revert to (see Transaction)
+    //
+    // Very few levels as we don't want arbitrarily nested transactions.
+    size_t[4] endStack_;
+    // The number of elements currently in endStack_.
+    size_t endStackUsed_;
+
+    @safe const pure nothrow @nogc invariant()
+    {
+        if(!inProgress) { return; }
+        assert(end_ <= reader_.bufferOffset_, "Slice ends after buffer position");
+        assert(start_ <= end_, "Slice start after slice end");
+    }
+
+    // Is a slice currently being built?
+    bool inProgress() @safe const pure nothrow @nogc
+    in(start_ == size_t.max ? end_ == size_t.max : end_ != size_t.max, "start_/end_ are not consistent")
+    {
+        return start_ != size_t.max;
+    }
+
+public:
+    /// Begin building a slice.
+    ///
+    /// Only one slice can be built at any given time; before beginning a new slice,
+    /// finish the previous one (if any).
+    ///
+    /// The slice starts at the current position in the Reader buffer. It can only be
+    /// extended up to the current position in the buffer; Reader methods get() and
+    /// forward() move the position. E.g. it is valid to extend a slice by write()-ing
+    /// a string just returned by get() - but not one returned by prefix() unless the
+    /// position has changed since the prefix() call.
+    void begin() @safe pure nothrow @nogc
+    in(!inProgress, "Beginning a slice while another slice is being built")
+    in(endStackUsed_ == 0, "Slice stack not empty at slice begin")
+    {
+
+        start_ = reader_.bufferOffset_;
+        end_   = reader_.bufferOffset_;
+    }
+
+    /// Finish building a slice and return it.
+    ///
+    /// Any Transactions on the slice must be committed or destroyed before the slice
+    /// is finished.
+    ///
+    /// Returns a string; once a slice is finished it is definitive that its contents
+    /// will not be changed.
+    char[] finish() @safe pure nothrow @nogc
+    in(inProgress, "finish called without begin")
+    in(endStackUsed_ == 0, "Finishing a slice with running transactions.")
+    {
+
+        auto result = reader_.buffer_[start_ .. end_];
+        start_ = end_ = size_t.max;
+        return result;
+    }
+
+    /// Write a string to the slice being built.
+    ///
+    /// Data can only be written up to the current position in the Reader buffer.
+    ///
+    /// If str is a string returned by a Reader method, and str starts right after the
+    /// end of the slice being built, the slice is extended (trivial operation).
+    ///
+    /// See_Also: begin
+    void write(scope char[] str) @safe pure nothrow @nogc
+    {
+        assert(inProgress, "write called without begin");
+        assert(end_ <= reader_.bufferOffset_,
+               "AT START: Slice ends after buffer position");
+
+        // Nothing? Already done.
+        if (str.length == 0) { return; }
+        // If str starts at the end of the slice (is a string returned by a Reader
+        // method), just extend the slice to contain str.
+        if(&str[0] == &reader_.buffer_[end_])
+        {
+            end_ += str.length;
+        }
+        // Even if str does not start at the end of the slice, it still may be returned
+        // by a Reader method and point to buffer. So we need to memmove.
+        else
+        {
+            copy(str, reader_.buffer_[end_..end_ + str.length * char.sizeof]);
+            end_ += str.length;
+        }
+    }
+
+    /// Write a character to the slice being built.
+    ///
+    /// Data can only be written up to the current position in the Reader buffer.
+    ///
+    /// See_Also: begin
+    void write(dchar c) @safe pure
+    in(inProgress, "write called without begin")
+    {
+        if(c < 0x80)
+        {
+            reader_.buffer_[end_++] = cast(char)c;
+            return;
+        }
+
+        // We need to encode a non-ASCII dchar into UTF-8
+        char[4] encodeBuf;
+        const bytes = encode(encodeBuf, c);
+        reader_.buffer_[end_ .. end_ + bytes] = encodeBuf[0 .. bytes];
+        end_ += bytes;
+    }
+
+    /// Insert a character to a specified position in the slice.
+    ///
+    /// Enlarges the slice by 1 char. Note that the slice can only extend up to the
+    /// current position in the Reader buffer.
+    ///
+    /// Params:
+    ///
+    /// c        = The character to insert.
+    /// position = Position to insert the character at in code units, not code points.
+    ///            Must be less than slice length(); a previously returned length()
+    ///            can be used.
+    void insert(const dchar c, const size_t position) @safe pure
+    in(inProgress, "insert called without begin")
+    in(start_ + position <= end_, "Trying to insert after the end of the slice")
+    {
+
+        const point       = start_ + position;
+        const movedLength = end_ - point;
+
+        // Encode c into UTF-8
+        char[4] encodeBuf;
+        if(c < 0x80) { encodeBuf[0] = cast(char)c; }
+        const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c);
+
+        if(movedLength > 0)
+        {
+            copy(reader_.buffer_[point..point + movedLength * char.sizeof],
+                    reader_.buffer_[point + bytes..point + bytes + movedLength * char.sizeof]);
+        }
+        reader_.buffer_[point .. point + bytes] = encodeBuf[0 .. bytes];
+        end_ += bytes;
+    }
+
+    /// Get the current length of the slice.
+    size_t length() @safe const pure nothrow @nogc
+    {
+        return end_ - start_;
+    }
+
+    /// A slice building transaction.
+    ///
+    /// Can be used to save and revert back to slice state.
+    struct Transaction
+    {
+    private:
+        // The slice builder affected by the transaction.
+        SliceBuilder* builder_;
+        // Index of the return point of the transaction in StringBuilder.endStack_.
+        size_t stackLevel_;
+        // True after commit() has been called.
+        bool committed_;
+
+    public:
+        /// Begins a transaction on a SliceBuilder object.
+        ///
+        /// The transaction must end $(B after) any transactions created within the
+        /// transaction but $(B before) the slice is finish()-ed. A transaction can be
+        /// ended either by commit()-ing or reverting through the destructor.
+        ///
+        /// Saves the current state of a slice.
+        this(SliceBuilder* builder) @safe pure nothrow @nogc
+        {
+            builder_ = builder;
+            stackLevel_ = builder_.endStackUsed_;
+            builder_.push();
+        }
+
+        /// Commit changes to the slice.
+        ///
+        /// Ends the transaction - can only be called once, and removes the possibility
+        /// to revert slice state.
+        ///
+        /// Does nothing for a default-initialized transaction (the transaction has not
+        /// been started yet).
+        void commit() @safe pure nothrow @nogc
+        in(!committed_, "Can't commit a transaction more than once")
+        {
+
+            if(builder_ is null) { return; }
+            assert(builder_.endStackUsed_ == stackLevel_ + 1,
+                   "Parent transactions don't fully contain child transactions");
+            builder_.apply();
+            committed_ = true;
+        }
+
+        /// Destroy the transaction and revert it if it hasn't been committed yet.
+        void end() @safe pure nothrow @nogc
+        in(builder_ && builder_.endStackUsed_ == stackLevel_ + 1, "Parent transactions don't fully contain child transactions")
+        {
+            builder_.pop();
+            builder_ = null;
+        }
+
+    }
+
+private:
+    // Push the current end of the slice so we can revert to it if needed.
+    //
+    // Used by Transaction.
+    void push() @safe pure nothrow @nogc
+    in(inProgress, "push called without begin")
+    in(endStackUsed_ < endStack_.length, "Slice stack overflow")
+    {
+        endStack_[endStackUsed_++] = end_;
+    }
+
+    // Pop the current end of endStack_ and set the end of the slice to the popped
+    // value, reverting changes since the old end was pushed.
+    //
+    // Used by Transaction.
+    void pop() @safe pure nothrow @nogc
+    in(inProgress, "pop called without begin")
+    in(endStackUsed_ > 0, "Trying to pop an empty slice stack")
+    {
+        end_ = endStack_[--endStackUsed_];
+    }
+
+    // Pop the current end of endStack_, but keep the current end of the slice, applying
+    // changes made since pushing the old end.
+    //
+    // Used by Transaction.
+    void apply() @safe pure nothrow @nogc
+    in(inProgress, "apply called without begin")
+    in(endStackUsed_ > 0, "Trying to apply an empty slice stack")
+    {
+        --endStackUsed_;
+    }
+}
+
+
+private:
+
+// Convert a UTF-8/16/32 buffer to UTF-8, in-place if possible.
+//
+// Params:
+//
+// input    = Buffer with UTF-8/16/32 data to decode. May be overwritten by the
+//            conversion, in which case the result will be a slice of this buffer.
+// encoding = Encoding of input.
+//
+// Returns:
+//
+// A struct with the following members:
+//
+// $(D string errorMessage)   In case of an error, the error message is stored here. If
+//                            there was no error, errorMessage is NULL. Always check
+//                            this first.
+// $(D char[] utf8)           input converted to UTF-8. May be a slice of input.
+// $(D size_t characterCount) Number of characters (code points) in input.
+auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow
+{
+    // Documented in function ddoc.
+    struct Result
+    {
+        string errorMessage;
+        char[] utf8;
+        size_t characterCount;
+    }
+
+    Result result;
+
+    // Encode input_ into UTF-8 if it's encoded as UTF-16 or UTF-32.
+    //
+    // Params:
+    //
+    // buffer = The input buffer to encode.
+    // result = A Result struct to put encoded result and any error messages to.
+    //
+    // On error, result.errorMessage will be set.
+    static void encode(C)(C[] input, ref Result result) @safe pure
+    {
+        // We can do UTF-32->UTF-8 in place because all UTF-8 sequences are 4 or
+        // less bytes.
+        static if(is(C == dchar))
+        {
+            char[4] encodeBuf;
+            auto utf8 = cast(char[])input;
+            auto length = 0;
+            foreach(dchar c; input)
+            {
+                ++result.characterCount;
+                // ASCII
+                if(c < 0x80)
+                {
+                    utf8[length++] = cast(char)c;
+                    continue;
+                }
+
+                std.utf.encode(encodeBuf, c);
+                const bytes = codeLength!char(c);
+                utf8[length .. length + bytes] = encodeBuf[0 .. bytes];
+                length += bytes;
+            }
+            result.utf8 = utf8[0 .. length];
+        }
+        // Unfortunately we can't do UTF-16 in place so we just use std.conv.to
+        else
+        {
+            result.characterCount = std.utf.count(input);
+            result.utf8 = input.to!(char[]);
+        }
+    }
+
+    try final switch(encoding)
+    {
+        case UTFEncoding.UTF_8:
+            result.utf8 = cast(char[])input;
+            result.utf8.validate();
+            result.characterCount = std.utf.count(result.utf8);
+            break;
+        case UTFEncoding.UTF_16:
+            assert(input.length % 2 == 0, "UTF-16 buffer size must be even");
+            encode(cast(wchar[])input, result);
+            break;
+        case UTFEncoding.UTF_32:
+            assert(input.length % 4 == 0, "UTF-32 buffer size must be a multiple of 4");
+            encode(cast(dchar[])input, result);
+            break;
+    }
+    catch(ConvException e) { result.errorMessage = e.msg; }
+    catch(UTFException e)  { result.errorMessage = e.msg; }
+    catch(Exception e)
+    {
+        assert(false, "Unexpected exception in encode(): " ~ e.msg);
+    }
+
+    return result;
+}
+
+/// Determine if all characters (code points, not bytes) in a string are printable.
+bool isPrintableValidUTF8(const char[] chars) @safe pure
+{
+    import std.uni : isControl, isWhite;
+    foreach (dchar chr; chars)
+    {
+        if (!chr.isValidDchar || (chr.isControl && !chr.isWhite))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+/// Counts the number of ASCII characters in buffer until the first UTF-8 sequence.
+///
+/// Used to determine how many characters we can process without decoding.
+size_t countASCII(const(char)[] buffer) @safe pure nothrow @nogc
+{
+    return buffer.byCodeUnit.until!(x => x > 0x7F).walkLength;
+}
+// Unittests.
+
+void testEndian(R)()
+{
+    void endian_test(ubyte[] data, Encoding encoding_expected, Endian endian_expected)
+    {
+        auto reader = new R(data);
+        assert(reader.encoding == encoding_expected);
+        assert(reader.endian_ == endian_expected);
+    }
+    ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00];
+    ubyte[] big_endian_utf_16 = [0xFE, 0xFF, 0x00, 0x7A];
+    endian_test(little_endian_utf_16, Encoding.UTF_16, Endian.littleEndian);
+    endian_test(big_endian_utf_16, Encoding.UTF_16, Endian.bigEndian);
+}
+
+void testPeekPrefixForward(R)()
+{
+    import std.encoding;
+    ubyte[] data = bomTable[BOM.utf8].sequence ~ cast(ubyte[])"data";
+    auto reader = new R(data);
+    assert(reader.peek() == 'd');
+    assert(reader.peek(1) == 'a');
+    assert(reader.peek(2) == 't');
+    assert(reader.peek(3) == 'a');
+    assert(reader.peek(4) == '\0');
+    assert(reader.prefix(4) == "data");
+    // assert(reader.prefix(6) == "data\0");
+    reader.forward(2);
+    assert(reader.peek(1) == 'a');
+    // assert(collectException(reader.peek(3)));
+}
+
+void testUTF(R)()
+{
+    import std.encoding;
+    dchar[] data = cast(dchar[])"data";
+    void utf_test(T)(T[] data, BOM bom)
+    {
+        ubyte[] bytes = bomTable[bom].sequence ~
+                        (cast(ubyte[])data)[0 .. data.length * T.sizeof];
+        auto reader = new R(bytes);
+        assert(reader.peek() == 'd');
+        assert(reader.peek(1) == 'a');
+        assert(reader.peek(2) == 't');
+        assert(reader.peek(3) == 'a');
+    }
+    utf_test!char(to!(char[])(data), BOM.utf8);
+    utf_test!wchar(to!(wchar[])(data), endian == Endian.bigEndian ? BOM.utf16be : BOM.utf16le);
+    utf_test(data, endian == Endian.bigEndian ? BOM.utf32be : BOM.utf32le);
+}
+
+void test1Byte(R)()
+{
+    ubyte[] data = [97];
+
+    auto reader = new R(data);
+    assert(reader.peek() == 'a');
+    assert(reader.peek(1) == '\0');
+    // assert(collectException(reader.peek(2)));
+}
+
+@system unittest
+{
+    testEndian!Reader();
+    testPeekPrefixForward!Reader();
+    testUTF!Reader();
+    test1Byte!Reader();
+}
+//Issue 257 - https://github.com/dlang-community/D-YAML/issues/257
+@safe unittest
+{
+    import dyaml.loader : Loader;
+    auto yaml = "hello ";
+    auto root = Loader.fromString(yaml).load();
+
+    assert(root.isValid);
+}
author	Ralph Amissah <ralph.amissah@gmail.com>	2021-02-19 17:10:51 -0500
committer	Ralph Amissah <ralph.amissah@gmail.com>	2021-02-24 16:46:47 -0500
commit	02ca32ae0a5bc290918d2b2a3288e385b9cc6b11 (patch)
tree	06379785e8a0165a7deb981c2eba362894820634 /src/ext_depends/D-YAML/source/dyaml/reader.d
parent	build from static source-tree pre fetch depends (diff)