rust-dnsbox/lib/dnsbox-base/src/ser/text/quoted.rs

use std::fmt;

pub struct EncodedByte {
	storage: [u8; 4], // max: `\000`
	used: u8,
}

impl ::std::ops::Deref for EncodedByte {
	type Target = str;

	fn deref(&self) -> &Self::Target {
		crate::unsafe_ops::from_utf8_unchecked(&self.storage[..self.used as usize])
	}
}

pub struct EncodeIterator<'a> {
	encode_whitespace: bool,
	data: &'a [u8],
}

impl<'a> EncodeIterator<'a> {
	pub fn new_quoted(value: &'a [u8]) -> Self {
		EncodeIterator {
			encode_whitespace: false,
			data: value,
		}
	}

	pub fn new_encode_whitespace(value: &'a [u8]) -> Self {
		EncodeIterator {
			encode_whitespace: true,
			data: value,
		}
	}
}

impl<'a> Iterator for EncodeIterator<'a> {
	type Item = EncodedByte;

	fn next(&mut self) -> Option<Self::Item> {
		if self.data.is_empty() {
			return None;
		}
		let b = self.data[0];
		self.data = &self.data[1..];
		if b < 32 || b >= 127 || (self.encode_whitespace && is_ascii_whitespace(b)) {
			// `\ddd`
			let d1 = b / 100;
			let d2 = (b / 10) % 10;
			let d3 = b % 10;
			Some(EncodedByte {
				storage: [b'\\', b'0' + d1, b'0' + d2, b'0' + d3],
				used: 4,
			})
		} else if b == b'"' || b == b'\\' {
			// `\c`
			Some(EncodedByte {
				storage: [b'\\', b, 0, 0],
				used: 2,
			})
		} else {
			Some(EncodedByte {
				storage: [b, 0, 0, 0],
				used: 1,
			})
		}
	}
}

#[derive(Debug)]
pub struct UnquoteError {
	data: String,
	position: usize,
	msg: &'static str,
}

impl fmt::Display for UnquoteError {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		write!(
			f,
			"unquote error at position {} in {:?}: {}",
			self.position, self.data, self.msg
		)
	}
}

impl failure::Fail for UnquoteError {}

pub struct UnquoteIterator<'a, 'b: 'a> {
	quoted: bool,
	data: &'a mut &'b str,
	pos: usize,
}

/// when walked to end without hitting errors between, the terminating
/// `"` and following whitespace will be removed from `*data`.
impl<'a, 'b: 'a> UnquoteIterator<'a, 'b> {
	pub fn new(data: &'a mut &'b str) -> Self {
		UnquoteIterator {
			quoted: false,
			data: data,
			pos: 0,
		}
	}

	fn err<T>(&mut self, msg: &'static str) -> Option<Result<T, UnquoteError>> {
		Some(Err(UnquoteError {
			data: (*self.data).into(),
			position: self.pos,
			msg: msg,
		}))
	}
}

pub(crate) fn is_ascii_whitespace(c: u8) -> bool {
	match c {
		0x09 => true, // horizontal tab: \t
		0x0a => true, // line feed: \n
		0x0c => true, // form feed: \f
		0x0d => true, // form feed: \r
		0x20 => true, // space: ' '
		_ => false,
	}
}

impl<'a, 'b: 'a> Iterator for UnquoteIterator<'a, 'b> {
	type Item = Result<u8, UnquoteError>;

	fn next(&mut self) -> Option<Self::Item> {
		let raw = self.data.as_bytes();

		if raw.is_empty() {
			return self.err("empty input");
		}

		if 0 == self.pos {
			// check for starting quote:
			if raw[0] == b'"' {
				self.quoted = true;
				self.pos += 1;
			}
		}

		if self.pos >= raw.len() {
			if self.quoted {
				return self.err("unexpected end of string");
			} else {
				*self.data = "";
				return None;
			}
		}
		if raw[self.pos] == b'"' {
			if self.quoted {
				// either followed by end-of-string or a whitespace
				if self.pos + 1 < raw.len() && !is_ascii_whitespace(raw[self.pos + 1]) {
					return self.err("quote in the middle of quoted string");
				}
				// eat terminating quote
				// pos+1 is obviously a good utf-8 boundary
				*self.data = self.data[self.pos + 1..].trim_start();
				return None;
			} else {
				return self.err("quote in the middle of unquoted string");
			}
		} else if !self.quoted && is_ascii_whitespace(raw[self.pos]) {
			// pos is obviously a good utf-8 boundary
			*self.data = self.data[self.pos..].trim_start();
			return None;
		} else if raw[self.pos] == b'\\' {
			if self.pos + 1 >= raw.len() {
				return self.err("unexpected end of string after backslash");
			}
			if raw[self.pos + 1] < b'0' || raw[self.pos + 1] > b'9' {
				let result = raw[self.pos + 1];
				if !self.quoted && is_ascii_whitespace(result) {
					return self.err("(escaped) whitespace not allowed in unquoted field");
				}
				self.pos += 2;
				return Some(Ok(result));
			}
			// otherwise require 3 decimal digits
			if self.pos + 3 >= raw.len() {
				return self.err("unexpected end of string after backslash with decimal");
			}
			// raw[self.pos+1] already checked for digit above
			if raw[self.pos + 2] < b'0'
				|| raw[self.pos + 2] > b'9'
				|| raw[self.pos + 3] < b'0'
				|| raw[self.pos + 3] > b'9'
			{
				return self.err("expecting 3 digits after backslash with decimal");
			}
			let d1 = raw[self.pos + 1] - b'0';
			let d2 = raw[self.pos + 2] - b'0';
			let d3 = raw[self.pos + 3] - b'0';
			let val = (d1 as u32 * 100) + (d2 as u32 * 10) + (d3 as u32);
			if val > 255 {
				return self.err("invalid decimal escape");
			}
			self.pos += 4;
			Some(Ok(val as u8))
		} else {
			let result = raw[self.pos];
			self.pos += 1;
			Some(Ok(result))
		}
	}
}

#[cfg(test)]
mod tests {
	use crate::ser::text::{next_quoted_field, quote};

	fn check_quote(data: &[u8], quoted: &str) {
		assert_eq!(quote(data), quoted);
	}

	fn check_unquote(mut input: &str, data: &[u8]) {
		assert_eq!(next_quoted_field(&mut input).unwrap(), data);
		assert!(input.is_empty());
	}

	#[test]
	fn test_escapes() {
		check_quote(b"\"hello \\ \xc3\xa4", r#""\"hello \\ \195\164""#);
	}

	#[test]
	fn test_parser() {
		check_unquote(r#""\"hello \\ \195\164""#, b"\"hello \\ \xc3\xa4");
		check_unquote(r#"  "\"hello \\ \195\164"     "#, b"\"hello \\ \xc3\xa4");
		check_unquote(r#""\"hello \\ ä""#, b"\"hello \\ \xc3\xa4");
		check_unquote(r#"  "\"hello \\ ä"   "#, b"\"hello \\ \xc3\xa4");
		// unquoted input
		check_unquote(r#"foobarä"#, b"foobar\xc3\xa4");
		check_unquote(r#"foobar\195\164"#, b"foobar\xc3\xa4");
		check_unquote(r#" foobarä "#, b"foobar\xc3\xa4");
		// random (unnecessary) escapes:
		check_unquote(r#" "\x\%\@\." "#, b"x%@.");
	}
}