diff options
Diffstat (limited to 'src/ext_depends/D-YAML/source/dyaml/reader.d')
-rw-r--r-- | src/ext_depends/D-YAML/source/dyaml/reader.d | 327 |
1 files changed, 36 insertions, 291 deletions
diff --git a/src/ext_depends/D-YAML/source/dyaml/reader.d b/src/ext_depends/D-YAML/source/dyaml/reader.d index ae44c80..824c1d1 100644 --- a/src/ext_depends/D-YAML/source/dyaml/reader.d +++ b/src/ext_depends/D-YAML/source/dyaml/reader.d @@ -31,19 +31,8 @@ alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029'); package: -///Exception thrown at Reader errors. -class ReaderException : YAMLException -{ - this(string msg, string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow - { - super("Reader error: " ~ msg, file, line); - } -} - -/// Provides an API to read characters from a UTF-8 buffer and build slices into that -/// buffer to avoid allocations (see SliceBuilder). -final class Reader +/// Provides an API to read characters from a UTF-8 buffer. +struct Reader { private: // Buffer of currently loaded characters. @@ -102,8 +91,9 @@ final class Reader auto endianResult = fixUTFByteOrder(buffer); if(endianResult.bytesStripped > 0) { + // TODO: add line and column throw new ReaderException("Size of UTF-16 or UTF-32 input not aligned " ~ - "to 2 or 4 bytes, respectively"); + "to 2 or 4 bytes, respectively", Mark(name, 0, 0)); } version(unittest) { endian_ = endianResult.endian; } @@ -113,17 +103,18 @@ final class Reader const msg = utf8Result.errorMessage; if(msg !is null) { - throw new ReaderException("Error when converting to UTF-8: " ~ msg); + // TODO: add line and column + throw new ReaderException("Error when converting to UTF-8: " ~ msg, Mark(name, 0, 0)); } buffer_ = utf8Result.utf8; characterCount_ = utf8Result.characterCount; // Check that all characters in buffer are printable. + // TODO: add line and column enforce(isPrintableValidUTF8(buffer_), - new ReaderException("Special unicode characters are not allowed")); + new ReaderException("Special unicode characters are not allowed", Mark(name, 0, 0))); - this.sliceBuilder = SliceBuilder(this); checkASCII(); } @@ -212,8 +203,7 @@ final class Reader /// Get specified number of characters starting at current position. /// /// Note: This gets only a "view" into the internal buffer, which will be - /// invalidated after other Reader calls. Use SliceBuilder to build slices - /// for permanent use. + /// invalidated after other Reader calls. /// /// Params: length = Number of characters (code points, not bytes) to get. May /// reach past the end of the buffer; in that case the returned @@ -228,8 +218,7 @@ final class Reader /// Get specified number of bytes, not code points, starting at current position. /// /// Note: This gets only a "view" into the internal buffer, which will be - /// invalidated after other Reader calls. Use SliceBuilder to build slices - /// for permanent use. + /// invalidated after other Reader calls. /// /// Params: length = Number bytes (not code points) to get. May NOT reach past /// the end of the buffer; should be used with peek() to avoid @@ -396,17 +385,34 @@ final class Reader checkASCII(); } - /// Used to build slices of read data in Reader; to avoid allocations. - SliceBuilder sliceBuilder; - - /// Get a string describing current buffer position, used for error messages. + /// Get filename, line and column of current position. Mark mark() const pure nothrow @nogc @safe { return Mark(name_, line_, column_); } - /// Get file name. - string name() const @safe pure nothrow @nogc { return name_; } + /// Get filename, line and column of current position + some number of chars + Mark mark(size_t advance) const pure @safe + { + auto lineTemp = cast()line_; + auto columnTemp = cast()column_; + auto bufferOffsetTemp = cast()bufferOffset_; + for (size_t pos = 0; pos < advance; pos++) + { + if (bufferOffsetTemp >= buffer_.length) + { + break; + } + const c = decode(buffer_, bufferOffsetTemp); + if (c.isBreak || (c == '\r' && buffer_[bufferOffsetTemp] == '\n')) + { + lineTemp++; + columnTemp = 0; + } + columnTemp++; + } + return Mark(name_, lineTemp, columnTemp); + } - /// Set file name. - void name(string name) pure @safe nothrow @nogc { name_ = name; } + /// Get file name. + ref inout(string) name() inout @safe return pure nothrow @nogc { return name_; } /// Get current line number. uint line() const @safe pure nothrow @nogc { return line_; } @@ -448,267 +454,6 @@ private: } } -/// Used to build slices of already read data in Reader buffer, avoiding allocations. -/// -/// Usually these slices point to unchanged Reader data, but sometimes the data is -/// changed due to how YAML interprets certain characters/strings. -/// -/// See begin() documentation. -struct SliceBuilder -{ -private: - // No copying by the user. - @disable this(this); - @disable void opAssign(ref SliceBuilder); - - // Reader this builder works in. - Reader reader_; - - // Start of the slice om reader_.buffer_ (size_t.max while no slice being build) - size_t start_ = size_t.max; - // End of the slice om reader_.buffer_ (size_t.max while no slice being build) - size_t end_ = size_t.max; - - // Stack of slice ends to revert to (see Transaction) - // - // Very few levels as we don't want arbitrarily nested transactions. - size_t[4] endStack_; - // The number of elements currently in endStack_. - size_t endStackUsed_; - - @safe const pure nothrow @nogc invariant() - { - if(!inProgress) { return; } - assert(end_ <= reader_.bufferOffset_, "Slice ends after buffer position"); - assert(start_ <= end_, "Slice start after slice end"); - } - - // Is a slice currently being built? - bool inProgress() @safe const pure nothrow @nogc - in(start_ == size_t.max ? end_ == size_t.max : end_ != size_t.max, "start_/end_ are not consistent") - { - return start_ != size_t.max; - } - -public: - /// Begin building a slice. - /// - /// Only one slice can be built at any given time; before beginning a new slice, - /// finish the previous one (if any). - /// - /// The slice starts at the current position in the Reader buffer. It can only be - /// extended up to the current position in the buffer; Reader methods get() and - /// forward() move the position. E.g. it is valid to extend a slice by write()-ing - /// a string just returned by get() - but not one returned by prefix() unless the - /// position has changed since the prefix() call. - void begin() @safe pure nothrow @nogc - in(!inProgress, "Beginning a slice while another slice is being built") - in(endStackUsed_ == 0, "Slice stack not empty at slice begin") - { - - start_ = reader_.bufferOffset_; - end_ = reader_.bufferOffset_; - } - - /// Finish building a slice and return it. - /// - /// Any Transactions on the slice must be committed or destroyed before the slice - /// is finished. - /// - /// Returns a string; once a slice is finished it is definitive that its contents - /// will not be changed. - char[] finish() @safe pure nothrow @nogc - in(inProgress, "finish called without begin") - in(endStackUsed_ == 0, "Finishing a slice with running transactions.") - { - - auto result = reader_.buffer_[start_ .. end_]; - start_ = end_ = size_t.max; - return result; - } - - /// Write a string to the slice being built. - /// - /// Data can only be written up to the current position in the Reader buffer. - /// - /// If str is a string returned by a Reader method, and str starts right after the - /// end of the slice being built, the slice is extended (trivial operation). - /// - /// See_Also: begin - void write(scope char[] str) @safe pure nothrow @nogc - { - assert(inProgress, "write called without begin"); - assert(end_ <= reader_.bufferOffset_, - "AT START: Slice ends after buffer position"); - - // Nothing? Already done. - if (str.length == 0) { return; } - // If str starts at the end of the slice (is a string returned by a Reader - // method), just extend the slice to contain str. - if(&str[0] == &reader_.buffer_[end_]) - { - end_ += str.length; - } - // Even if str does not start at the end of the slice, it still may be returned - // by a Reader method and point to buffer. So we need to memmove. - else - { - copy(str, reader_.buffer_[end_..end_ + str.length * char.sizeof]); - end_ += str.length; - } - } - - /// Write a character to the slice being built. - /// - /// Data can only be written up to the current position in the Reader buffer. - /// - /// See_Also: begin - void write(dchar c) @safe pure - in(inProgress, "write called without begin") - { - if(c < 0x80) - { - reader_.buffer_[end_++] = cast(char)c; - return; - } - - // We need to encode a non-ASCII dchar into UTF-8 - char[4] encodeBuf; - const bytes = encode(encodeBuf, c); - reader_.buffer_[end_ .. end_ + bytes] = encodeBuf[0 .. bytes]; - end_ += bytes; - } - - /// Insert a character to a specified position in the slice. - /// - /// Enlarges the slice by 1 char. Note that the slice can only extend up to the - /// current position in the Reader buffer. - /// - /// Params: - /// - /// c = The character to insert. - /// position = Position to insert the character at in code units, not code points. - /// Must be less than slice length(); a previously returned length() - /// can be used. - void insert(const dchar c, const size_t position) @safe pure - in(inProgress, "insert called without begin") - in(start_ + position <= end_, "Trying to insert after the end of the slice") - { - - const point = start_ + position; - const movedLength = end_ - point; - - // Encode c into UTF-8 - char[4] encodeBuf; - if(c < 0x80) { encodeBuf[0] = cast(char)c; } - const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c); - - if(movedLength > 0) - { - copy(reader_.buffer_[point..point + movedLength * char.sizeof], - reader_.buffer_[point + bytes..point + bytes + movedLength * char.sizeof]); - } - reader_.buffer_[point .. point + bytes] = encodeBuf[0 .. bytes]; - end_ += bytes; - } - - /// Get the current length of the slice. - size_t length() @safe const pure nothrow @nogc - { - return end_ - start_; - } - - /// A slice building transaction. - /// - /// Can be used to save and revert back to slice state. - struct Transaction - { - private: - // The slice builder affected by the transaction. - SliceBuilder* builder_; - // Index of the return point of the transaction in StringBuilder.endStack_. - size_t stackLevel_; - // True after commit() has been called. - bool committed_; - - public: - /// Begins a transaction on a SliceBuilder object. - /// - /// The transaction must end $(B after) any transactions created within the - /// transaction but $(B before) the slice is finish()-ed. A transaction can be - /// ended either by commit()-ing or reverting through the destructor. - /// - /// Saves the current state of a slice. - this(SliceBuilder* builder) @safe pure nothrow @nogc - { - builder_ = builder; - stackLevel_ = builder_.endStackUsed_; - builder_.push(); - } - - /// Commit changes to the slice. - /// - /// Ends the transaction - can only be called once, and removes the possibility - /// to revert slice state. - /// - /// Does nothing for a default-initialized transaction (the transaction has not - /// been started yet). - void commit() @safe pure nothrow @nogc - in(!committed_, "Can't commit a transaction more than once") - { - - if(builder_ is null) { return; } - assert(builder_.endStackUsed_ == stackLevel_ + 1, - "Parent transactions don't fully contain child transactions"); - builder_.apply(); - committed_ = true; - } - - /// Destroy the transaction and revert it if it hasn't been committed yet. - void end() @safe pure nothrow @nogc - in(builder_ && builder_.endStackUsed_ == stackLevel_ + 1, "Parent transactions don't fully contain child transactions") - { - builder_.pop(); - builder_ = null; - } - - } - -private: - // Push the current end of the slice so we can revert to it if needed. - // - // Used by Transaction. - void push() @safe pure nothrow @nogc - in(inProgress, "push called without begin") - in(endStackUsed_ < endStack_.length, "Slice stack overflow") - { - endStack_[endStackUsed_++] = end_; - } - - // Pop the current end of endStack_ and set the end of the slice to the popped - // value, reverting changes since the old end was pushed. - // - // Used by Transaction. - void pop() @safe pure nothrow @nogc - in(inProgress, "pop called without begin") - in(endStackUsed_ > 0, "Trying to pop an empty slice stack") - { - end_ = endStack_[--endStackUsed_]; - } - - // Pop the current end of endStack_, but keep the current end of the slice, applying - // changes made since pushing the old end. - // - // Used by Transaction. - void apply() @safe pure nothrow @nogc - in(inProgress, "apply called without begin") - in(endStackUsed_ > 0, "Trying to apply an empty slice stack") - { - --endStackUsed_; - } -} - - private: // Convert a UTF-8/16/32 buffer to UTF-8, in-place if possible. @@ -728,7 +473,7 @@ private: // this first. // $(D char[] utf8) input converted to UTF-8. May be a slice of input. // $(D size_t characterCount) Number of characters (code points) in input. -auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow +public auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow { // Documented in function ddoc. struct Result |