rust-dnsbox/lib/dnsbox-base/src/ser/text/quoted.rs

230 lines
5.6 KiB
Rust

use std::fmt;
pub struct EncodedByte {
storage: [u8; 4], // max: `\000`
used: u8,
}
impl ::std::ops::Deref for EncodedByte {
type Target = str;
fn deref(&self) -> &Self::Target {
::unsafe_ops::from_utf8_unchecked(&self.storage[..self.used as usize])
}
}
pub struct EncodeIterator<'a> {
encode_whitespace: bool,
data: &'a [u8]
}
impl<'a> EncodeIterator<'a> {
pub fn new_quoted(value: &'a [u8]) -> Self {
EncodeIterator{
encode_whitespace: false,
data: value,
}
}
pub fn new_encode_whitespace(value: &'a [u8]) -> Self {
EncodeIterator{
encode_whitespace: true,
data: value,
}
}
}
impl<'a> Iterator for EncodeIterator<'a> {
type Item = EncodedByte;
fn next(&mut self) -> Option<Self::Item> {
if self.data.is_empty() { return None; }
let b = self.data[0];
self.data = &self.data[1..];
if b < 32 || b > 127 || (self.encode_whitespace && is_ascii_whitespace(b)) {
// `\ddd`
let d1 = b / 100;
let d2 = (b / 10) % 10;
let d3 = b % 10;
Some(EncodedByte{
storage: [b'\\', b'0' + d1, b'0' + d2, b'0' + d3],
used: 4,
})
} else if b == b'"' || b == b'\\' {
// `\c`
Some(EncodedByte{
storage: [b'\\', b, 0, 0],
used: 2,
})
} else {
Some(EncodedByte{
storage: [b, 0, 0, 0],
used: 1,
})
}
}
}
#[derive(Debug)]
pub struct UnquoteError {
data: String,
position: usize,
msg: &'static str,
}
impl fmt::Display for UnquoteError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "unquote error at position {} in {:?}: {}", self.position, self.data, self.msg)
}
}
impl ::failure::Fail for UnquoteError {}
pub struct UnquoteIterator<'a, 'b: 'a> {
quoted: bool,
data: &'a mut &'b str,
pos: usize,
}
/// when walked to end without hitting errors between, the terminating
/// `"` and following whitespace will be removed from `*data`.
impl<'a, 'b: 'a> UnquoteIterator<'a, 'b> {
pub fn new(data: &'a mut &'b str) -> Self {
UnquoteIterator {
quoted: false,
data: data,
pos: 0,
}
}
fn err<T>(&mut self, msg: &'static str) -> Option<Result<T, UnquoteError>> {
Some(Err(UnquoteError{
data: (*self.data).into(),
position: self.pos,
msg: msg,
}))
}
}
pub(crate) fn is_ascii_whitespace(c: u8) -> bool {
match c {
0x09 => true, // horizontal tab: \t
0x0a => true, // line feed: \n
0x0c => true, // form feed: \f
0x0d => true, // form feed: \r
0x20 => true, // space: ' '
_ => false,
}
}
impl<'a, 'b: 'a> Iterator for UnquoteIterator<'a, 'b> {
type Item = Result<u8, UnquoteError>;
fn next(&mut self) -> Option<Self::Item> {
let raw = self.data.as_bytes();
if raw.is_empty() { return self.err("empty input"); }
if 0 == self.pos {
// check for starting quote:
if raw[0] == b'"' {
self.quoted = true;
self.pos += 1;
}
}
if self.pos >= raw.len() {
if self.quoted {
return self.err("unexpected end of string");
} else {
*self.data = "";
return None;
}
}
if raw[self.pos] == b'"' {
if self.quoted {
// either followed by end-of-string or a whitespace
if self.pos+1 < raw.len() && !is_ascii_whitespace(raw[self.pos+1]) {
return self.err("quote in the middle of quoted string");
}
// eat terminating quote
// pos+1 is obviously a good utf-8 boundary
*self.data = self.data[self.pos+1..].trim_left();
return None;
} else {
return self.err("quote in the middle of unquoted string");
}
} else if !self.quoted && is_ascii_whitespace(raw[self.pos]) {
// pos is obviously a good utf-8 boundary
*self.data = self.data[self.pos..].trim_left();
return None;
} else if raw[self.pos] == b'\\' {
if self.pos + 1 >= raw.len() { return self.err("unexpected end of string after backslash"); }
if raw[self.pos+1] < b'0' || raw[self.pos+1] > b'9' {
let result = raw[self.pos+1];
if !self.quoted && is_ascii_whitespace(result) {
return self.err("(escaped) whitespace not allowed in unquoted field");
}
self.pos += 2;
return Some(Ok(result));
}
// otherwise require 3 decimal digits
if self.pos + 3 >= raw.len() { return self.err("unexpected end of string after backslash with decimal"); }
// raw[self.pos+1] already checked for digit above
if raw[self.pos+2] < b'0' || raw[self.pos+2] > b'9' || raw[self.pos+3] < b'0' || raw[self.pos+3] > b'9' {
return self.err("expecting 3 digits after backslash with decimal");
}
let d1 = raw[self.pos+1] - b'0';
let d2 = raw[self.pos+2] - b'0';
let d3 = raw[self.pos+3] - b'0';
let val = (d1 as u32 * 100) + (d2 as u32 * 10) + (d3 as u32);
if val > 255 { return self.err("invalid decimal escape"); }
self.pos += 4;
Some(Ok(val as u8))
} else {
let result = raw[self.pos];
self.pos += 1;
Some(Ok(result))
}
}
}
#[cfg(test)]
mod tests {
use ser::text::{next_quoted_field, quote};
fn check_quote(data: &[u8], quoted: &str) {
assert_eq!(
quote(data),
quoted
);
}
fn check_unquote(mut input: &str, data: &[u8]) {
assert_eq!(
next_quoted_field(&mut input).unwrap(),
data
);
assert!(input.is_empty());
}
#[test]
fn test_escapes() {
check_quote(b"\"hello \\ \xc3\xa4", r#""\"hello \\ \195\164""#);
}
#[test]
fn test_parser() {
check_unquote(r#""\"hello \\ \195\164""#, b"\"hello \\ \xc3\xa4");
check_unquote(r#" "\"hello \\ \195\164" "#, b"\"hello \\ \xc3\xa4");
check_unquote(r#""\"hello \\ ä""#, b"\"hello \\ \xc3\xa4");
check_unquote(r#" "\"hello \\ ä" "#, b"\"hello \\ \xc3\xa4");
// unquoted input
check_unquote(r#"foobarä"#, b"foobar\xc3\xa4");
check_unquote(r#"foobar\195\164"#, b"foobar\xc3\xa4");
check_unquote(r#" foobarä "#, b"foobar\xc3\xa4");
// random (unnecessary) escapes:
check_unquote(r#" "\x\%\@\." "#, b"x%@.");
}
}