wip
This commit is contained in:
commit
a66c2e05f9
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
344
Cargo.lock
generated
Normal file
344
Cargo.lock
generated
Normal file
@ -0,0 +1,344 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "brownstone"
|
||||
version = "3.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c5839ee4f953e811bfdcf223f509cb2c6a3e1447959b0bff459405575bc17f22"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "5.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6943ae99c34386c84a470c499d3414f66502a41340aa895406e0d2e4a207b91d"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"hashbrown 0.14.0",
|
||||
"lock_api",
|
||||
"once_cell",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derivative"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gc"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3edaac0f5832202ebc99520cb77c932248010c4645d20be1dc62d6579f5b3752"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
|
||||
|
||||
[[package]]
|
||||
name = "indent_write"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0cfe9645a18782869361d9c8732246be7b410ad4e919d3609ebabdac00ba12c3"
|
||||
|
||||
[[package]]
|
||||
name = "internment"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "161079c3ad892faa215fcfcf3fd7a6a3c9288df2b06a2c2bad7fbfad4f01d69d"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"dashmap",
|
||||
"hashbrown 0.12.3",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "joinery"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72167d68f5fce3b8655487b8038691a3c9984ee769590f93f2a631f4ad64e4f5"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.147"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"derivative",
|
||||
"gc",
|
||||
"internment",
|
||||
"nom",
|
||||
"nom-supreme",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom-supreme"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2bd3ae6c901f1959588759ff51c95d24b491ecb9ff91aa9c2ef4acc5b1dcab27"
|
||||
dependencies = [
|
||||
"brownstone",
|
||||
"indent_write",
|
||||
"joinery",
|
||||
"memchr",
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.9.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.66"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
|
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "nix"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
gc = "0.4.1"
|
||||
internment = { version="0.7.1", default-features=false, features=["arc"] }
|
||||
nom = "7.1.3"
|
||||
nom-supreme = "0.8.0"
|
||||
derivative = "2.2.0"
|
39
src/bin/nix.rs
Normal file
39
src/bin/nix.rs
Normal file
@ -0,0 +1,39 @@
|
||||
extern crate nix;
|
||||
|
||||
fn main() {
|
||||
let source = std::sync::Arc::new(
|
||||
nix::parser::source::Source {
|
||||
filename: "default.nix".into(),
|
||||
content: r#"let requiredVersion = import ./lib/minver.nix; in
|
||||
|
||||
if ! builtins ? nixVersion || builtins.compareVersions requiredVersion builtins.nixVersion == 1 then
|
||||
|
||||
abort ''
|
||||
|
||||
This version of Nixpkgs requires Nix >= ${requiredVersion}, please upgrade:
|
||||
|
||||
- If you are running NixOS, `nixos-rebuild' can be used to upgrade your system.
|
||||
|
||||
- Alternatively, with Nix > 2.0 `nix upgrade-nix' can be used to imperatively
|
||||
upgrade Nix. You may use `nix-env --version' to check which version you have.
|
||||
|
||||
- If you installed Nix using the install script (https://nixos.org/nix/install),
|
||||
it is safe to upgrade by running it again:
|
||||
|
||||
curl -L https://nixos.org/nix/install | sh
|
||||
|
||||
For more information, please see the NixOS release notes at
|
||||
https://nixos.org/nixos/manual or locally at
|
||||
${toString ./nixos/doc/manual/release-notes}.
|
||||
|
||||
If you need further help, see https://nixos.org/nixos/support.html
|
||||
''
|
||||
|
||||
else
|
||||
|
||||
import ./pkgs/top-level/impure.nix
|
||||
"#.into(),
|
||||
},
|
||||
);
|
||||
println!("{:?}", nix::parser::token::TokenList::parse_file(source.span()).unwrap().1);
|
||||
}
|
1
src/lib.rs
Normal file
1
src/lib.rs
Normal file
@ -0,0 +1 @@
|
||||
pub mod parser;
|
41
src/parser/common.rs
Normal file
41
src/parser/common.rs
Normal file
@ -0,0 +1,41 @@
|
||||
use super::source::Span;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Number {
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SpannedData<T> {
|
||||
pub data: T,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl<T> std::fmt::Debug for SpannedData<T>
|
||||
where
|
||||
T: std::fmt::Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.data.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
type InnerIdentifier = internment::ArcIntern<String>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Identifier(InnerIdentifier);
|
||||
|
||||
impl Identifier {
|
||||
pub fn from_ref(identifier: &str) -> Self {
|
||||
Self(InnerIdentifier::from_ref(identifier))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Identifier {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("`")?;
|
||||
f.write_str(self.0.as_str())?;
|
||||
f.write_str("`")
|
||||
}
|
||||
}
|
110
src/parser/expression.rs
Normal file
110
src/parser/expression.rs
Normal file
@ -0,0 +1,110 @@
|
||||
use super::Identifier;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Number {
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Literal {
|
||||
Number(Number),
|
||||
String(String),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum AttributeName {
|
||||
// plain identifier
|
||||
Literal(Identifier),
|
||||
// quoted strings or ${...} expressions
|
||||
Interpolated(Arc<Expression>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum PathElement {
|
||||
Fixed(String),
|
||||
// ${...}
|
||||
Expression(Arc<Expression>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
// `inherit NAME1 NAME2;`
|
||||
// `inherit (SET) NAME1 NAME2;`
|
||||
pub struct Inherit {
|
||||
pub from: Option<Arc<Expression>>,
|
||||
// quoted identifiers are ok, but not dynamic ones
|
||||
pub names: Vec<Identifier>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum LetAssignment {
|
||||
Assign {
|
||||
// quoted identifier is ok, but not dynamic one
|
||||
name: Identifier,
|
||||
value: Arc<Expression>,
|
||||
},
|
||||
Inherit(Inherit),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LambdaSetParam {
|
||||
pub names: Vec<(Identifier, Option<Arc<Expression>>)>,
|
||||
pub open: bool, // `...`
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Expression {
|
||||
Identifier(Identifier),
|
||||
Literal(Arc<Literal>),
|
||||
InterpolateString(Vec<Arc<Expression>>),
|
||||
Path {
|
||||
// base must include `/`, otherwise wouldn't be recognized as path
|
||||
base: String,
|
||||
interpolate: Vec<PathElement>,
|
||||
},
|
||||
// `let (NAME = VALUE;)* in EVALUATE`
|
||||
Let {
|
||||
assignments: Vec<(Identifier, Arc<Expression>)>,
|
||||
evaluate: Arc<Expression>,
|
||||
},
|
||||
// `with SET; EVALUATE`
|
||||
// attributes from with don't "shadow" attributes in scope from let/lambda/rec.
|
||||
// but they do shadow attributes from "more distant" with statements.
|
||||
With {
|
||||
set: Arc<Expression>,
|
||||
evaluate: Arc<Expression>,
|
||||
},
|
||||
// `[ ... ]`
|
||||
List {
|
||||
elements: Vec<Arc<Expression>>,
|
||||
},
|
||||
// `{ ... }`
|
||||
AttributeSet {
|
||||
elements: Vec<(AttributeName, Arc<Expression>)>,
|
||||
inherits: Vec<Inherit>,
|
||||
},
|
||||
// `rec ...`
|
||||
RecursiveSet(Arc<Expression>),
|
||||
// `NAME: BODY
|
||||
// `NAME@{...}: BODY
|
||||
// `{...}: BODY
|
||||
Lambda {
|
||||
// quoting not allowed
|
||||
name: Option<Identifier>,
|
||||
set_params: LambdaSetParam,
|
||||
body: Arc<Expression>,
|
||||
},
|
||||
// `if COND then TRUE_BRANCH else FALSE_BRANCH`
|
||||
Conditional {
|
||||
cond: Arc<Expression>,
|
||||
true_branch: Arc<Expression>,
|
||||
false_branch: Arc<Expression>,
|
||||
},
|
||||
// `assert ASSERTION; BODY`
|
||||
Assert {
|
||||
assertion: Arc<Expression>,
|
||||
body: Arc<Expression>,
|
||||
}
|
||||
}
|
8
src/parser/mod.rs
Normal file
8
src/parser/mod.rs
Normal file
@ -0,0 +1,8 @@
|
||||
mod common;
|
||||
pub mod expression;
|
||||
pub mod source;
|
||||
pub mod token;
|
||||
|
||||
pub use self::{
|
||||
common::{Number, SpannedData, Identifier},
|
||||
};
|
314
src/parser/source.rs
Normal file
314
src/parser/source.rs
Normal file
@ -0,0 +1,314 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Source {
|
||||
pub filename: String,
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
impl Source {
|
||||
pub fn span<'a>(self: &'a Arc<Self>) -> SpanRef<'a> {
|
||||
// Ensure offset/length can be stored in u32 for `Span`
|
||||
assert!(self.content.len() <= u32::MAX as usize);
|
||||
SpanRef {
|
||||
source: self,
|
||||
data: self.content.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Span {
|
||||
source: Arc<Source>,
|
||||
start: u32,
|
||||
length: u32,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.source.content[self.start as usize..][..self.length as usize]
|
||||
}
|
||||
|
||||
pub fn as_ref(&self) -> SpanRef<'_> {
|
||||
SpanRef {
|
||||
source: &self.source,
|
||||
data: self.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl nom::Slice<std::ops::Range<usize>> for Span {
|
||||
fn slice(&self, range: std::ops::Range<usize>) -> Self {
|
||||
assert!(range.start <= range.end);
|
||||
assert!(range.end <= self.length as usize);
|
||||
Self {
|
||||
source: self.source.clone(),
|
||||
start: self.start + range.start as u32,
|
||||
length: (range.end - range.start) as u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl nom::Slice<std::ops::RangeTo<usize>> for Span {
|
||||
fn slice(&self, range: std::ops::RangeTo<usize>) -> Self {
|
||||
assert!(range.end <= self.length as usize);
|
||||
Self {
|
||||
source: self.source.clone(),
|
||||
start: self.start,
|
||||
length: range.end as u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl nom::Slice<std::ops::RangeFrom<usize>> for Span {
|
||||
fn slice(&self, range: std::ops::RangeFrom<usize>) -> Self {
|
||||
assert!(range.start <= self.length as usize);
|
||||
Self {
|
||||
source: self.source.clone(),
|
||||
start: self.start + range.start as u32,
|
||||
length: self.length - range.start as u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl nom::Slice<std::ops::RangeFull> for Span {
|
||||
fn slice(&self, _range: std::ops::RangeFull) -> Self {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Span {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}:{}: {:?}",
|
||||
self.source.filename,
|
||||
self.start,
|
||||
self.as_str(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SpanRef<'_>> for Span {
|
||||
fn from(span_ref: SpanRef<'_>) -> Self {
|
||||
use nom::Offset;
|
||||
let start = span_ref.source.content.as_str().offset(span_ref.data) as u32;
|
||||
Self {
|
||||
source: span_ref.source.clone(),
|
||||
start,
|
||||
length: span_ref.data.len() as u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct SpanRef<'a> {
|
||||
source: &'a Arc<Source>,
|
||||
data: &'a str,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for SpanRef<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
use nom::Offset;
|
||||
write!(
|
||||
f,
|
||||
"{}:{}: {:?}",
|
||||
self.source.filename,
|
||||
self.source.content.as_str().offset(self.data),
|
||||
self.data,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> SpanRef<'a> {
|
||||
pub fn as_str(&self) -> &'a str {
|
||||
self.data
|
||||
}
|
||||
|
||||
fn locate(&self, data: &str) -> &'a str {
|
||||
use nom::Offset;
|
||||
&self.data[self.data.offset(data)..][..data.len()]
|
||||
}
|
||||
|
||||
fn lift_error<E>(&self, e: nom::error::Error<&str>) -> E
|
||||
where
|
||||
E: nom::error::ParseError<Self>
|
||||
{
|
||||
let input = Self { data: self.locate(e.input), ..*self };
|
||||
E::from_error_kind(input, e.code)
|
||||
}
|
||||
|
||||
fn lift_result<T, E>(&self, r: Result<T, nom::Err<nom::error::Error<&str>>>) -> Result<T, nom::Err<E>>
|
||||
where
|
||||
E: nom::error::ParseError<Self>
|
||||
{
|
||||
match r {
|
||||
Ok(v) => Ok(v),
|
||||
Err(e) => Err(e.map(|e| self.lift_error(e))),
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl<'a> nom::AsBytes for SpanRef<'a> {
|
||||
fn as_bytes(&self) -> &'a [u8] {
|
||||
self.data.as_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b> nom::Compare<&'b str> for SpanRef<'a> {
|
||||
#[inline(always)]
|
||||
fn compare(&self, t: &'b str) -> nom::CompareResult {
|
||||
self.data.compare(t)
|
||||
}
|
||||
|
||||
fn compare_no_case(&self, t: &'b str) -> nom::CompareResult {
|
||||
self.data.compare_no_case(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl nom::ExtendInto for SpanRef<'_> {
|
||||
type Item = char;
|
||||
type Extender = String;
|
||||
|
||||
fn new_builder(&self) -> Self::Extender {
|
||||
String::new()
|
||||
}
|
||||
|
||||
fn extend_into(&self, acc: &mut Self::Extender) {
|
||||
acc.push_str(self.data);
|
||||
}
|
||||
}
|
||||
|
||||
impl nom::FindSubstring<&str> for SpanRef<'_> {
|
||||
fn find_substring(&self, substr: &str) -> Option<usize> {
|
||||
self.data.find_substring(substr)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> nom::FindToken<T> for SpanRef<'_>
|
||||
where
|
||||
for<'a> &'a str: nom::FindToken<T>,
|
||||
{
|
||||
fn find_token(&self, token: T) -> bool {
|
||||
self.data.find_token(token)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> nom::InputIter for SpanRef<'a> {
|
||||
type Item = char;
|
||||
type Iter = std::str::CharIndices<'a>;
|
||||
type IterElem = std::str::Chars<'a>;
|
||||
|
||||
fn iter_indices(&self) -> Self::Iter {
|
||||
self.data.iter_indices()
|
||||
}
|
||||
|
||||
fn iter_elements(&self) -> Self::IterElem {
|
||||
self.data.iter_elements()
|
||||
}
|
||||
|
||||
fn position<P>(&self, predicate: P) -> Option<usize>
|
||||
where
|
||||
P: Fn(Self::Item) -> bool
|
||||
{
|
||||
self.data.position(predicate)
|
||||
}
|
||||
|
||||
fn slice_index(&self, count: usize) -> Result<usize, nom::Needed> {
|
||||
self.as_str().slice_index(count)
|
||||
}
|
||||
}
|
||||
|
||||
impl nom::InputLength for SpanRef<'_> {
|
||||
fn input_len(&self) -> usize {
|
||||
self.data.len() as usize
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> nom::InputTake for SpanRef<'a> {
|
||||
fn take(&self, count: usize) -> Self {
|
||||
let data = self.data.take(count);
|
||||
Self { data, ..*self }
|
||||
}
|
||||
|
||||
fn take_split(&self, count: usize) -> (Self, Self) {
|
||||
let (prefix, suffix) = self.data.take_split(count);
|
||||
(Self { data: prefix, ..*self }, Self { data: suffix, ..*self })
|
||||
}
|
||||
}
|
||||
|
||||
impl nom::InputTakeAtPosition for SpanRef<'_> {
|
||||
type Item = char;
|
||||
|
||||
fn split_at_position<P, E: nom::error::ParseError<Self>>(
|
||||
&self,
|
||||
predicate: P
|
||||
) -> nom::IResult<Self, Self, E>
|
||||
where
|
||||
P: Fn(Self::Item) -> bool
|
||||
{
|
||||
let (rem, data) = self.lift_result(self.data.split_at_position(predicate))?;
|
||||
Ok((Self { data: rem, ..*self }, Self { data, ..*self } ))
|
||||
}
|
||||
|
||||
fn split_at_position1<P, E: nom::error::ParseError<Self>>(
|
||||
&self,
|
||||
predicate: P,
|
||||
e: nom::error::ErrorKind
|
||||
) -> nom::IResult<Self, Self, E>
|
||||
where
|
||||
P: Fn(Self::Item) -> bool
|
||||
{
|
||||
let (rem, data) = self.lift_result(self.data.split_at_position1(predicate, e))?;
|
||||
Ok((Self { data: rem, ..*self }, Self { data, ..*self } ))
|
||||
}
|
||||
|
||||
fn split_at_position_complete<P, E: nom::error::ParseError<Self>>(
|
||||
&self,
|
||||
predicate: P
|
||||
) -> nom::IResult<Self, Self, E>
|
||||
where
|
||||
P: Fn(Self::Item) -> bool
|
||||
{
|
||||
let (rem, data) = self.lift_result(self.data.split_at_position_complete(predicate))?;
|
||||
Ok((Self { data: rem, ..*self }, Self { data, ..*self } ))
|
||||
}
|
||||
|
||||
fn split_at_position1_complete<P, E: nom::error::ParseError<Self>>(
|
||||
&self,
|
||||
predicate: P,
|
||||
e: nom::error::ErrorKind
|
||||
) -> nom::IResult<Self, Self, E>
|
||||
where
|
||||
P: Fn(Self::Item) -> bool
|
||||
{
|
||||
let (rem, data) = self.lift_result(self.data.split_at_position1_complete(predicate, e))?;
|
||||
Ok((Self { data: rem, ..*self }, Self { data, ..*self } ))
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> nom::ParseTo<R> for SpanRef<'_>
|
||||
where
|
||||
for<'a> &'a str: nom::ParseTo<R>,
|
||||
{
|
||||
fn parse_to(&self) -> Option<R> {
|
||||
self.data.parse_to()
|
||||
}
|
||||
}
|
||||
|
||||
impl nom::Offset for SpanRef<'_> {
|
||||
fn offset(&self, second: &Self) -> usize {
|
||||
self.as_str().offset(second.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> nom::Slice<R> for SpanRef<'_>
|
||||
where
|
||||
for<'a> &'a str: nom::Slice<R>
|
||||
{
|
||||
fn slice(&self, range: R) -> Self {
|
||||
let data = self.data.slice(range);
|
||||
Self { data, ..*self }
|
||||
}
|
||||
}
|
169
src/parser/token/brackets.rs
Normal file
169
src/parser/token/brackets.rs
Normal file
@ -0,0 +1,169 @@
|
||||
use super::{
|
||||
Token,
|
||||
SpannedData,
|
||||
SpanRef,
|
||||
PResult,
|
||||
TokenList,
|
||||
IResultExt,
|
||||
SpanExt,
|
||||
};
|
||||
|
||||
fn parse_bracketed<'a, O, C, F, T>(
|
||||
open_tag: &'static str,
|
||||
close_tag: &'static str,
|
||||
open: O,
|
||||
close: C,
|
||||
constructor: F,
|
||||
span: SpanRef<'a>,
|
||||
) -> PResult<'a, SpannedData<T>>
|
||||
where
|
||||
F: FnOnce(SpannedData<O>, TokenList, SpannedData<C>) -> T,
|
||||
{
|
||||
use nom::{Offset, Slice};
|
||||
|
||||
let (rem_span, open_span) = nom::bytes::complete::tag(open_tag)(span)?;
|
||||
let (rem_span, inner) = TokenList::parse_expression(rem_span).unrecoverable()?;
|
||||
let (rem_span, close_span) = nom::bytes::complete::tag(close_tag)(rem_span).unrecoverable()?;
|
||||
let result = constructor(open_span.data(open), inner, close_span.data(close));
|
||||
let index = span.offset(&rem_span);
|
||||
let bracket_span = span.slice(..index);
|
||||
Ok((rem_span, bracket_span.data(result)))
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CurlyOpen;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CurlyClose;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CurlyBrackets {
|
||||
pub open: SpannedData<CurlyOpen>,
|
||||
pub inner: TokenList,
|
||||
pub close: SpannedData<CurlyClose>,
|
||||
}
|
||||
|
||||
impl CurlyBrackets {
|
||||
pub(super) fn parse(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
parse_bracketed(
|
||||
"{", "}", CurlyOpen, CurlyClose,
|
||||
|open, inner, close| {
|
||||
Self { open, inner, close }.into()
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for CurlyBrackets {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if self.inner.0.is_empty() {
|
||||
f.write_str("{ }")
|
||||
} else {
|
||||
write!(f, "{{ {:?} }}", self.inner)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SquareOpen;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SquareClose;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SquareBrackets {
|
||||
pub open: SpannedData<SquareOpen>,
|
||||
pub inner: TokenList,
|
||||
pub close: SpannedData<SquareClose>,
|
||||
}
|
||||
|
||||
impl SquareBrackets {
|
||||
pub(super) fn parse(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
parse_bracketed(
|
||||
"[", "]", SquareOpen, SquareClose,
|
||||
|open, inner, close| {
|
||||
Self { open, inner, close }.into()
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for SquareBrackets {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if self.inner.0.is_empty() {
|
||||
f.write_str("[ ]")
|
||||
} else {
|
||||
write!(f, "[ {:?} ]", self.inner)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RoundOpen;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RoundClose;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RoundBrackets {
|
||||
pub open: SpannedData<RoundOpen>,
|
||||
pub inner: TokenList,
|
||||
pub close: SpannedData<RoundClose>,
|
||||
}
|
||||
|
||||
impl RoundBrackets {
|
||||
pub(super) fn parse(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
parse_bracketed(
|
||||
"(", ")", RoundOpen, RoundClose,
|
||||
|open, inner, close| {
|
||||
Self { open, inner, close }.into()
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for RoundBrackets {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if self.inner.0.is_empty() {
|
||||
f.write_str("( )")
|
||||
} else {
|
||||
write!(f, "( {:?} )", self.inner)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InterpolateOpen;
|
||||
|
||||
/// Any `${...}` expresions, whether in strings or outside
|
||||
#[derive(Clone)]
|
||||
pub struct Interpolate {
|
||||
pub open: SpannedData<InterpolateOpen>,
|
||||
pub inner: TokenList,
|
||||
pub close: SpannedData<CurlyClose>,
|
||||
}
|
||||
|
||||
impl Interpolate {
|
||||
pub(super) fn parse(span: SpanRef) -> PResult<SpannedData<Interpolate>> {
|
||||
parse_bracketed(
|
||||
"${", "}", InterpolateOpen, CurlyClose,
|
||||
|open, inner, close| {
|
||||
Self { open, inner, close }
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Interpolate {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if self.inner.0.is_empty() {
|
||||
f.write_str("${ }")
|
||||
} else {
|
||||
write!(f, "${{ {:?} }}", self.inner)
|
||||
}
|
||||
}
|
||||
}
|
157
src/parser/token/mod.rs
Normal file
157
src/parser/token/mod.rs
Normal file
@ -0,0 +1,157 @@
|
||||
mod brackets;
|
||||
mod number;
|
||||
mod op_kw_ident;
|
||||
mod path;
|
||||
mod strings;
|
||||
mod tokenlist;
|
||||
|
||||
pub use self::{
|
||||
brackets::{
|
||||
CurlyOpen,
|
||||
CurlyClose,
|
||||
CurlyBrackets,
|
||||
SquareOpen,
|
||||
SquareClose,
|
||||
SquareBrackets,
|
||||
RoundOpen,
|
||||
RoundClose,
|
||||
RoundBrackets,
|
||||
InterpolateOpen,
|
||||
Interpolate,
|
||||
},
|
||||
op_kw_ident::SimpleToken,
|
||||
path::Path,
|
||||
tokenlist::TokenList,
|
||||
strings::{
|
||||
Literal,
|
||||
StringPart,
|
||||
},
|
||||
};
|
||||
|
||||
use super::{
|
||||
source::{Span, SpanRef},
|
||||
Number,
|
||||
SpannedData,
|
||||
Identifier,
|
||||
};
|
||||
|
||||
trait SpanExt {
|
||||
fn data<T>(self, data: T) -> SpannedData<T>;
|
||||
}
|
||||
|
||||
impl SpanExt for SpanRef<'_> {
|
||||
fn data<T>(self, data: T) -> SpannedData<T> {
|
||||
SpannedData { data: data, span: self.into() }
|
||||
}
|
||||
}
|
||||
|
||||
struct Spanned<F, O> {
|
||||
f: F,
|
||||
o: std::marker::PhantomData<O>,
|
||||
}
|
||||
|
||||
impl<'a, F, O, E> nom::Parser<SpanRef<'a>, SpannedData<O>, E> for Spanned<F, O>
|
||||
where
|
||||
F: nom::Parser<SpanRef<'a>, O, E>,
|
||||
{
|
||||
fn parse(&mut self, input: SpanRef<'a>) -> nom::IResult<SpanRef<'a>, SpannedData<O>, E> {
|
||||
use nom::{Offset, Slice};
|
||||
|
||||
match self.f.parse(input) {
|
||||
Ok((remaining, result)) => {
|
||||
let index = input.offset(&remaining);
|
||||
let consumed = input.slice(..index);
|
||||
Ok((remaining, consumed.data(result)))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trait ParserExt<'a, O, E> {
|
||||
fn spanned(self) -> Spanned<Self, O>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
Spanned { f: self, o: std::marker::PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, O, E, T> ParserExt<'_, O, E> for T
|
||||
where
|
||||
T: nom::Parser<SpanRef<'a>, O, E>
|
||||
{
|
||||
}
|
||||
|
||||
trait IResultExt {
|
||||
fn unrecoverable(self) -> Self;
|
||||
}
|
||||
|
||||
impl<T, E> IResultExt for Result<T, nom::Err<E>> {
|
||||
fn unrecoverable(self) -> Self {
|
||||
match self {
|
||||
Err(nom::Err::Error(e)) => Err(nom::Err::Failure(e)),
|
||||
v => v,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, derivative::Derivative)]
|
||||
#[derivative(Debug)]
|
||||
pub enum Token {
|
||||
#[derivative(Debug="transparent")]
|
||||
SimpleToken(SimpleToken),
|
||||
#[derivative(Debug="transparent")]
|
||||
Number(Number),
|
||||
#[derivative(Debug="transparent")]
|
||||
Identifier(Identifier),
|
||||
#[derivative(Debug="transparent")]
|
||||
Path(Path),
|
||||
/// `"..."` (might have been ''..'' or URI in source)
|
||||
String(Vec<StringPart>),
|
||||
/// `${...}`
|
||||
#[derivative(Debug="transparent")]
|
||||
Interpolate(Interpolate),
|
||||
/// `{ ... }`
|
||||
#[derivative(Debug="transparent")]
|
||||
CurlyBrackets(CurlyBrackets),
|
||||
/// `[ ... ]`
|
||||
#[derivative(Debug="transparent")]
|
||||
SquareBrackets(SquareBrackets),
|
||||
/// `( ... )`
|
||||
#[derivative(Debug="transparent")]
|
||||
RoundBrackets(RoundBrackets),
|
||||
}
|
||||
|
||||
macro_rules! to_token {
|
||||
($($id:ident,)*) => { $(
|
||||
impl From<$id> for Token {
|
||||
fn from(t: $id) -> Self {
|
||||
Self::$id(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SpannedData<$id>> for SpannedData<Token> {
|
||||
fn from(t: SpannedData<$id>) -> Self {
|
||||
SpannedData {
|
||||
data: Token::$id(t.data),
|
||||
span: t.span,
|
||||
}
|
||||
}
|
||||
}
|
||||
)* };
|
||||
}
|
||||
|
||||
to_token!{
|
||||
SimpleToken,
|
||||
Number,
|
||||
Identifier,
|
||||
Path,
|
||||
Interpolate,
|
||||
CurlyBrackets,
|
||||
SquareBrackets,
|
||||
RoundBrackets,
|
||||
}
|
||||
|
||||
// pub type PResult<T> = nom::IResult<Span, T, nom_supreme::error::ErrorTree<Span>>;
|
||||
pub type PResult<'a, T> = nom::IResult<SpanRef<'a>, T>;
|
48
src/parser/token/number.rs
Normal file
48
src/parser/token/number.rs
Normal file
@ -0,0 +1,48 @@
|
||||
use nom::Parser;
|
||||
use super::{
|
||||
SpanRef,
|
||||
PResult,
|
||||
Token,
|
||||
Number,
|
||||
IResultExt,
|
||||
SpannedData,
|
||||
SpanExt,
|
||||
};
|
||||
|
||||
// should come after path
|
||||
fn parse_number_span(span: SpanRef) -> PResult<()> {
|
||||
// if not a path, everything that starts with an optional '-', optional '.'
|
||||
// followed by digits is a number.
|
||||
|
||||
let (span, _) = nom::sequence::tuple((
|
||||
nom::combinator::opt(nom::bytes::complete::tag("-")),
|
||||
nom::combinator::opt(nom::bytes::complete::tag(".")),
|
||||
nom::character::complete::digit1,
|
||||
))(span)?;
|
||||
|
||||
// if we fail now, fail hard (upstream nix parses something crazy here).
|
||||
// take up all alpha characters too, should be separated by something.
|
||||
let (span, _) = nom::multi::many0_count(nom::branch::alt((
|
||||
nom::character::complete::alphanumeric1.map(|_| ()),
|
||||
nom::bytes::complete::tag(".").map(|_| ()),
|
||||
nom::bytes::complete::tag("e").map(|_| ()),
|
||||
)))(span).unrecoverable()?;
|
||||
|
||||
Ok((span, ()))
|
||||
}
|
||||
|
||||
impl Number {
|
||||
// should come after path
|
||||
pub(super) fn parse(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
let (rem_span, num_span) = nom::combinator::recognize(parse_number_span)(span)?;
|
||||
let num_s = num_span.as_str();
|
||||
let num = if let Ok(num) = num_s.parse() {
|
||||
Number::Integer(num)
|
||||
} else if let Ok(num) = num_s.parse() {
|
||||
Number::Float(num)
|
||||
} else {
|
||||
return nom::combinator::fail(span).unrecoverable();
|
||||
};
|
||||
Ok((rem_span, num_span.data(Token::Number(num))))
|
||||
}
|
||||
}
|
161
src/parser/token/op_kw_ident.rs
Normal file
161
src/parser/token/op_kw_ident.rs
Normal file
@ -0,0 +1,161 @@
|
||||
use nom::Parser;
|
||||
use super::{
|
||||
SpanRef,
|
||||
PResult,
|
||||
SpannedData,
|
||||
Token,
|
||||
SpanExt,
|
||||
Identifier,
|
||||
};
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
/// Keywords / operators we tokenize as "standalone"
|
||||
pub enum SimpleToken {
|
||||
/// `=`
|
||||
Assign,
|
||||
/// `:`
|
||||
Colon,
|
||||
/// `;`
|
||||
SemiColon,
|
||||
/// `@` - lambda parameter alias
|
||||
At,
|
||||
/// `.` - attribute selection
|
||||
Dot,
|
||||
/// `?` - has attribute
|
||||
QuestionMark,
|
||||
/// `//` - attribute set update
|
||||
DoubleSlash,
|
||||
/// `++` - list concatenation
|
||||
DoublePlus,
|
||||
|
||||
/// `*`
|
||||
Multiplication,
|
||||
/// `/`
|
||||
Division,
|
||||
/// `+`
|
||||
Plus,
|
||||
/// `-`
|
||||
Minus,
|
||||
|
||||
/// `<`
|
||||
LessThan,
|
||||
/// `<=`
|
||||
LessThanOrEqual,
|
||||
/// `>`
|
||||
GreaterThan,
|
||||
/// `>=`
|
||||
GreaterThanOrEqual,
|
||||
/// `==`
|
||||
Equal,
|
||||
/// `!=`
|
||||
NotEqual,
|
||||
|
||||
/// `!`
|
||||
LogicNot,
|
||||
/// `&&`
|
||||
LogicAnd,
|
||||
/// `||`
|
||||
LogicOr,
|
||||
/// `->` (`a -> b` == `!a || b`)
|
||||
LogicImplication,
|
||||
|
||||
/// `or` - attribute selection fallback
|
||||
KwOr,
|
||||
/// `let`
|
||||
KwLet,
|
||||
/// `with`
|
||||
KwWith,
|
||||
/// `rec`
|
||||
KwRec,
|
||||
/// `inherit`
|
||||
KwInherit,
|
||||
/// `if`
|
||||
KwIf,
|
||||
/// `then`
|
||||
KwThen,
|
||||
/// `else`
|
||||
KwElse,
|
||||
/// `assert`
|
||||
KwAssert,
|
||||
}
|
||||
|
||||
// this also finds (some) path prefixes - path alternative should come before
|
||||
fn ident_or_keyword(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
let (rem_span, ident_span) = nom::combinator::recognize(
|
||||
nom::sequence::pair(
|
||||
nom::branch::alt((
|
||||
nom::character::complete::alpha1.map(|_| ()),
|
||||
nom::bytes::complete::tag("_").map(|_| ()),
|
||||
)),
|
||||
nom::multi::many0_count(nom::branch::alt((
|
||||
nom::character::complete::alphanumeric1.map(|_| ()),
|
||||
nom::bytes::complete::tag("_").map(|_| ()),
|
||||
nom::bytes::complete::tag("-").map(|_| ()),
|
||||
)))
|
||||
)
|
||||
)(span)?;
|
||||
let t = match ident_span.as_str() {
|
||||
"or" => SimpleToken::KwOr,
|
||||
"let" => SimpleToken::KwLet,
|
||||
"with" => SimpleToken::KwWith,
|
||||
"rec" => SimpleToken::KwRec,
|
||||
"inherit" => SimpleToken::KwInherit,
|
||||
"if" => SimpleToken::KwIf,
|
||||
"then" => SimpleToken::KwThen,
|
||||
"else" => SimpleToken::KwElse,
|
||||
"assert" => SimpleToken::KwAssert,
|
||||
ident => return Ok((
|
||||
rem_span,
|
||||
ident_span.data(Token::Identifier(Identifier::from_ref(ident))),
|
||||
)),
|
||||
};
|
||||
Ok((rem_span, ident_span.data(Token::SimpleToken(t))))
|
||||
}
|
||||
|
||||
fn simple_tagged(tag: &'static str, t: SimpleToken) -> impl Fn(SpanRef) -> PResult<SpannedData<Token>> {
|
||||
move |span| {
|
||||
let (rem_span, token_span) = nom::bytes::complete::tag(tag)(span)?;
|
||||
Ok((rem_span, token_span.data(Token::SimpleToken(t))))
|
||||
}
|
||||
}
|
||||
|
||||
fn simple_op(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
nom::branch::alt((
|
||||
nom::branch::alt((
|
||||
simple_tagged(":", SimpleToken::Colon),
|
||||
simple_tagged(";", SimpleToken::SemiColon),
|
||||
simple_tagged("@", SimpleToken::At),
|
||||
simple_tagged(".", SimpleToken::Dot),
|
||||
simple_tagged("?", SimpleToken::QuestionMark),
|
||||
simple_tagged("//", SimpleToken::DoubleSlash),
|
||||
simple_tagged("++", SimpleToken::DoublePlus),
|
||||
|
||||
simple_tagged("*", SimpleToken::Multiplication),
|
||||
simple_tagged("/", SimpleToken::Division),
|
||||
simple_tagged("+", SimpleToken::Plus),
|
||||
simple_tagged("-", SimpleToken::Minus),
|
||||
)),
|
||||
nom::branch::alt((
|
||||
simple_tagged("<=", SimpleToken::LessThanOrEqual),
|
||||
simple_tagged("<", SimpleToken::LessThan),
|
||||
simple_tagged(">=", SimpleToken::GreaterThanOrEqual),
|
||||
simple_tagged(">", SimpleToken::GreaterThan),
|
||||
simple_tagged("==", SimpleToken::Equal),
|
||||
simple_tagged("!=", SimpleToken::NotEqual),
|
||||
|
||||
simple_tagged("=", SimpleToken::Assign),
|
||||
|
||||
simple_tagged("!", SimpleToken::LogicNot),
|
||||
simple_tagged("&&", SimpleToken::LogicAnd),
|
||||
simple_tagged("||", SimpleToken::LogicOr),
|
||||
simple_tagged("->", SimpleToken::LogicImplication),
|
||||
)),
|
||||
))(span)
|
||||
}
|
||||
|
||||
pub(super) fn op_ident_or_keyword(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
nom::branch::alt((
|
||||
simple_op,
|
||||
ident_or_keyword,
|
||||
))(span)
|
||||
}
|
175
src/parser/token/path.rs
Normal file
175
src/parser/token/path.rs
Normal file
@ -0,0 +1,175 @@
|
||||
use nom::Parser;
|
||||
|
||||
use super::{
|
||||
SpannedData,
|
||||
StringPart,
|
||||
SpanRef,
|
||||
PResult,
|
||||
Literal,
|
||||
SpanExt,
|
||||
Interpolate,
|
||||
IResultExt,
|
||||
Token,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Path {
|
||||
pub base: SpannedData<String>,
|
||||
pub additional: Vec<StringPart>,
|
||||
}
|
||||
|
||||
fn path_separator(span: SpanRef) -> PResult<SpanRef> {
|
||||
nom::sequence::preceded(
|
||||
nom::sequence::pair(
|
||||
nom::combinator::not(nom::bytes::complete::tag("//")),
|
||||
nom::combinator::not(nom::bytes::complete::tag("/*")),
|
||||
),
|
||||
nom::bytes::complete::tag("/"),
|
||||
)(span)
|
||||
}
|
||||
|
||||
struct PathBuilder<'a> {
|
||||
span: SpanRef<'a>,
|
||||
cur_lit_start: Option<usize>,
|
||||
base: Option<SpannedData<String>>,
|
||||
additional: Vec<StringPart>,
|
||||
}
|
||||
|
||||
impl<'a> PathBuilder<'a> {
|
||||
fn new(span: SpanRef<'a>) -> Self {
|
||||
Self {
|
||||
span,
|
||||
cur_lit_start: Some(0),
|
||||
base: None,
|
||||
additional: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_lit(&mut self, lit_span: SpanRef) {
|
||||
use nom::Offset;
|
||||
|
||||
if self.cur_lit_start.is_none() {
|
||||
self.cur_lit_start = Some(self.span.offset(&lit_span));
|
||||
}
|
||||
}
|
||||
|
||||
fn _end_lit(&mut self, next_span: SpanRef) {
|
||||
use nom::{Offset, Slice};
|
||||
|
||||
if let Some(start) = self.cur_lit_start.take() {
|
||||
let end = self.span.offset(&next_span);
|
||||
let lit_span = self.span.slice(start..end);
|
||||
let lit = lit_span.data(lit_span.as_str().into());
|
||||
if self.additional.is_empty() {
|
||||
assert!(self.base.is_none());
|
||||
self.base = Some(lit);
|
||||
} else {
|
||||
self.additional.push(StringPart::Literal(Literal::from(lit_span)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_interp(&mut self, span: SpanRef, interp: Interpolate) {
|
||||
self._end_lit(span);
|
||||
|
||||
assert!(self.base.is_some());
|
||||
self.additional.push(StringPart::Interpolate(span.data(interp)));
|
||||
}
|
||||
|
||||
fn build(mut self, rem_span: SpanRef<'_>) -> SpannedData<Path> {
|
||||
use nom::{Offset, Slice};
|
||||
|
||||
self._end_lit(rem_span);
|
||||
let path = Path {
|
||||
base: self.base.take().expect("base can't be empty here"),
|
||||
additional: self.additional,
|
||||
};
|
||||
|
||||
let end = self.span.offset(&rem_span);
|
||||
let path_span = self.span.slice(..end);
|
||||
|
||||
path_span.data(path)
|
||||
}
|
||||
}
|
||||
|
||||
impl Path {
|
||||
pub(super) fn parse(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
// first segment before a '/' - possibly empty
|
||||
let mut first_segment = nom::combinator::opt(
|
||||
nom::branch::alt((
|
||||
// `~` only allowed as first (full) segment
|
||||
nom::bytes::complete::tag("~").map(|_| ()),
|
||||
nom::sequence::pair(
|
||||
nom::branch::alt((
|
||||
nom::character::complete::alphanumeric1.map(|_| ()),
|
||||
nom::bytes::complete::tag("-").map(|_| ()),
|
||||
nom::bytes::complete::tag("_").map(|_| ()),
|
||||
nom::bytes::complete::tag(".").map(|_| ()),
|
||||
)),
|
||||
nom::multi::many0_count(nom::branch::alt((
|
||||
nom::character::complete::alphanumeric1.map(|_| ()),
|
||||
nom::bytes::complete::tag("-").map(|_| ()),
|
||||
nom::bytes::complete::tag("_").map(|_| ()),
|
||||
nom::bytes::complete::tag(".").map(|_| ()),
|
||||
))),
|
||||
).map(|_| ()),
|
||||
))
|
||||
);
|
||||
|
||||
// segments after the first / contain combinations of literal parts and ${...} expressions
|
||||
let mut later_segment_literal = nom::combinator::recognize(
|
||||
nom::multi::many1_count(nom::branch::alt((
|
||||
nom::character::complete::alphanumeric1.map(|_| ()),
|
||||
nom::bytes::complete::tag("-").map(|_| ()),
|
||||
nom::bytes::complete::tag("_").map(|_| ()),
|
||||
nom::bytes::complete::tag(".").map(|_| ()),
|
||||
))),
|
||||
);
|
||||
|
||||
let (mut rem_span, _) = first_segment(span)?;
|
||||
path_separator(rem_span)?; // shortcut if it can't be a path
|
||||
|
||||
let mut found_separators = 0;
|
||||
let mut path = PathBuilder::new(span);
|
||||
|
||||
while let Ok((next_span, sep_span)) = path_separator(rem_span) {
|
||||
found_separators += 1;
|
||||
path.add_lit(sep_span);
|
||||
rem_span = next_span;
|
||||
let mut parts = 0;
|
||||
loop {
|
||||
if let Ok((next_span, (interp_span, interp))) = nom::combinator::consumed(Interpolate::parse)(rem_span) {
|
||||
path.add_interp(interp_span, interp.data);
|
||||
rem_span = next_span;
|
||||
parts += 1;
|
||||
continue;
|
||||
}
|
||||
match later_segment_literal(rem_span) as PResult<SpanRef<'_>> {
|
||||
Ok((next_span, lit_span)) => {
|
||||
path.add_lit(lit_span);
|
||||
rem_span = next_span;
|
||||
parts += 1;
|
||||
},
|
||||
Err(_e) => {
|
||||
if parts == 0 {
|
||||
// trailing slash
|
||||
if found_separators == 1 {
|
||||
// only one slash, and it is trailing -> not a path.
|
||||
return nom::combinator::fail(rem_span);
|
||||
} else {
|
||||
// invalid path - trailing slash not allowed
|
||||
// TODO: proper error message
|
||||
return nom::combinator::fail(rem_span).unrecoverable();
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(found_separators >= 1); // we check for initial separator above
|
||||
|
||||
Ok((rem_span, path.build(rem_span).into()))
|
||||
}
|
||||
}
|
397
src/parser/token/strings.rs
Normal file
397
src/parser/token/strings.rs
Normal file
@ -0,0 +1,397 @@
|
||||
use nom::Parser;
|
||||
|
||||
use super::{
|
||||
Span,
|
||||
SpanRef,
|
||||
SpanExt,
|
||||
PResult,
|
||||
Token,
|
||||
SpannedData,
|
||||
Interpolate, IResultExt,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Literal {
|
||||
span: Span,
|
||||
}
|
||||
|
||||
impl Literal {
|
||||
pub fn as_str(&self) -> &str {
|
||||
self.span.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Literal {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.span.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SpanRef<'_>> for Literal {
|
||||
fn from(span_ref: SpanRef) -> Self {
|
||||
Self { span: span_ref.into() }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Literal {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.as_str().fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, derivative::Derivative)]
|
||||
#[derivative(Debug)]
|
||||
pub enum StringPart {
|
||||
#[derivative(Debug="transparent")]
|
||||
Literal(Literal),
|
||||
#[derivative(Debug="transparent")]
|
||||
Escaped(char),
|
||||
#[derivative(Debug="transparent")]
|
||||
Interpolate(SpannedData<Interpolate>),
|
||||
}
|
||||
|
||||
pub(super) struct StringBuilder<'a> {
|
||||
span: SpanRef<'a>,
|
||||
parts: Vec<StringPart>,
|
||||
cur_lit: Option<std::ops::Range<usize>>,
|
||||
}
|
||||
|
||||
impl<'a> StringBuilder<'a> {
|
||||
fn new(span: SpanRef<'a>) -> Self {
|
||||
Self {
|
||||
span,
|
||||
parts: Vec::new(),
|
||||
cur_lit: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn add_lit(&mut self, span: SpanRef<'a>) {
|
||||
use nom::{Offset, Slice};
|
||||
|
||||
let start = self.span.offset(&span);
|
||||
let mut next = start..start + span.as_str().len();
|
||||
if let Some(cur) = self.cur_lit.take() {
|
||||
if cur.end == next.start {
|
||||
next.start = cur.start;
|
||||
} else {
|
||||
self.parts.push(StringPart::Literal(Literal::from(self.span.slice(cur))));
|
||||
}
|
||||
}
|
||||
self.cur_lit = Some(next);
|
||||
}
|
||||
|
||||
fn _end_lit(&mut self) {
|
||||
use nom::Slice;
|
||||
|
||||
if let Some(cur) = self.cur_lit.take() {
|
||||
self.parts.push(StringPart::Literal(Literal::from(self.span.slice(cur))));
|
||||
}
|
||||
}
|
||||
|
||||
fn add_escaped(&mut self, ch: char) {
|
||||
self._end_lit();
|
||||
self.parts.push(StringPart::Escaped(ch))
|
||||
}
|
||||
|
||||
fn add_interp(&mut self, interp: SpannedData<Interpolate>) {
|
||||
self._end_lit();
|
||||
self.parts.push(StringPart::Interpolate(interp));
|
||||
}
|
||||
|
||||
fn finish(mut self, rem_span: SpanRef) -> (SpanRef<'a>, Vec<StringPart>) {
|
||||
use nom::{Offset, Slice};
|
||||
self._end_lit();
|
||||
let length = self.span.offset(&rem_span);
|
||||
let span = self.span.slice(..length);
|
||||
(span, self.parts)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_dq(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
let (mut rem_span, _open_span) = nom::bytes::complete::tag("\"")(span)?;
|
||||
let mut sb = StringBuilder::new(span);
|
||||
|
||||
loop {
|
||||
if let Ok((rem_span, _close_span)) = nom::bytes::complete::tag("\"")(rem_span) as PResult<_> {
|
||||
let (span, parts) = sb.finish(rem_span);
|
||||
return Ok((rem_span, span.data(Token::String(parts))));
|
||||
}
|
||||
match Interpolate::parse(rem_span) {
|
||||
Ok((r, interp)) => {
|
||||
rem_span = r;
|
||||
sb.add_interp(interp);
|
||||
continue;
|
||||
},
|
||||
Err(nom::Err::Failure(f)) => return Err(nom::Err::Failure(f)),
|
||||
Err(_) => (), // wasn't a ${ ... }, fall through
|
||||
}
|
||||
if let Ok((r, _)) = nom::bytes::complete::tag("\\")(rem_span) as PResult<_> {
|
||||
let (r, (escaped_span, escaped)) = nom::combinator::consumed(
|
||||
nom::character::complete::anychar
|
||||
)(r).unrecoverable()?;
|
||||
rem_span = r;
|
||||
match escaped {
|
||||
'n' => sb.add_escaped('\n'),
|
||||
'r' => sb.add_escaped('\r'),
|
||||
't' => sb.add_escaped('\t'),
|
||||
'"'|'\\'|'$' => {
|
||||
// must be escaped
|
||||
sb.add_lit(escaped_span);
|
||||
},
|
||||
_ => {
|
||||
// useless escape
|
||||
sb.add_lit(escaped_span);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
let (r, lit_span) = nom::bytes::complete::is_not("\"\\$")(rem_span).unrecoverable()?;
|
||||
rem_span = r;
|
||||
sb.add_lit(lit_span);
|
||||
}
|
||||
}
|
||||
|
||||
fn lit_remove_shared_ident(at_start: bool, prefix_len: usize, lit: &Literal) -> impl Iterator<Item=StringPart> + '_ {
|
||||
use nom::Slice;
|
||||
|
||||
let mut offset = 0;
|
||||
let lit_str = lit.as_str();
|
||||
std::iter::from_fn(move || {
|
||||
// if offset > 0 we set it there after we found a `\n` - i.e. always start of a line
|
||||
// if offset = 0 it depends on at_start:
|
||||
let at_line_start = offset != 0 || at_start;
|
||||
let remaining = &lit_str[offset..];
|
||||
if remaining.is_empty() { return None; }
|
||||
let result: Literal;
|
||||
if let Some(line_len) = remaining.find('\n') {
|
||||
let abs_end = offset+line_len+1;
|
||||
if at_line_start {
|
||||
let line_offset = prefix_len.min(line_len); // might be an empty line without full prefix
|
||||
result = Literal { span: lit.span.slice(offset+line_offset..abs_end) };
|
||||
} else {
|
||||
// not at line start, nothing to remove
|
||||
result = Literal { span: lit.span.slice(offset..abs_end) };
|
||||
}
|
||||
offset = abs_end;
|
||||
} else if at_line_start {
|
||||
// not an "empty line" (apart from spaces), i.e. prefix must be here completely
|
||||
assert!(remaining.len() >= prefix_len);
|
||||
result = Literal { span: lit.span.slice(offset+prefix_len..) };
|
||||
offset = lit_str.len(); // end iterator
|
||||
} else {
|
||||
// not at line start, nothing to remove
|
||||
result = Literal { span: lit.span.slice(offset..) };
|
||||
offset = lit_str.len(); // end iterator
|
||||
}
|
||||
Some(StringPart::Literal(result))
|
||||
})
|
||||
}
|
||||
|
||||
fn remove_shared_ident(parts: &mut Vec<StringPart>) {
|
||||
use nom::Slice;
|
||||
|
||||
// remove trailing spaces after the last newline
|
||||
if let Some(StringPart::Literal(last_lit)) = parts.last_mut() {
|
||||
if let Some(last_non_space) = last_lit.rfind(|c| c != ' ') {
|
||||
if last_lit.as_bytes()[last_non_space] == b'\n' {
|
||||
*last_lit = Literal { span: last_lit.span.slice(..last_non_space+1) };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut at_start = true;
|
||||
let mut at_line_start = true;
|
||||
let mut current_max_prefix = None;
|
||||
for part in parts.iter() {
|
||||
if at_line_start && !at_start {
|
||||
// the parser would not have splitted a literal ending in `\n` and
|
||||
// the next one starting with ` `
|
||||
// i.e. there shouldn't be a literal coming right now.
|
||||
// -> empty prefix, nothing to remove
|
||||
return;
|
||||
}
|
||||
at_start = false;
|
||||
if let StringPart::Literal(lit) = part {
|
||||
let lit_str = lit.as_str();
|
||||
let mut lines = lit_str.split('\n');
|
||||
if !at_line_start {
|
||||
// if we weren't at the start of a line skip the
|
||||
// first part before a '\n'.
|
||||
// if there is no '\n' no other parts will follow,
|
||||
// and at_line_start stays false.
|
||||
let _ = lines.next();
|
||||
}
|
||||
for line in lines {
|
||||
// we are now at the start of a line
|
||||
// (either we were at a start before, or the first part was skipped)
|
||||
|
||||
// if there is nothing else than ' ' - ignore line for prefix calculation.
|
||||
if let Some(prefix_len) = line.find(|c| c != ' ') {
|
||||
if prefix_len == 0 {
|
||||
// empty prefix, nothing to remove
|
||||
return;
|
||||
}
|
||||
if let Some(cur_prefix_len) = current_max_prefix {
|
||||
current_max_prefix = Some(prefix_len.min(cur_prefix_len));
|
||||
}
|
||||
}
|
||||
|
||||
// the next iteration will always be at the start of a line,
|
||||
// but if this is the last iteration, at_line_start is true afterwards
|
||||
// only if this part is empty:
|
||||
at_line_start = line.is_empty();
|
||||
}
|
||||
} else if at_line_start {
|
||||
// empty prefix, nothing to remove
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let prefix_len = match current_max_prefix {
|
||||
None => return, // no literal parts -> no prefixes
|
||||
Some(v) => v,
|
||||
};
|
||||
assert!(prefix_len > 0);
|
||||
|
||||
let mut index = 0;
|
||||
let mut at_start = true;
|
||||
while index < parts.len() {
|
||||
if let StringPart::Literal(lit) = parts[index].clone() {
|
||||
let mut clipped_parts = lit_remove_shared_ident(at_start, prefix_len, &lit);
|
||||
if let Some(part) = clipped_parts.next() {
|
||||
parts[index] = part;
|
||||
index += 1;
|
||||
for part in clipped_parts {
|
||||
parts.insert(index, part);
|
||||
index += 1;
|
||||
}
|
||||
} else {
|
||||
parts.remove(index);
|
||||
}
|
||||
} else {
|
||||
index += 1;
|
||||
}
|
||||
at_start = false;
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_two_sq(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
use nom::Slice;
|
||||
|
||||
let (mut rem_span, _open_span) = nom::bytes::complete::tag("''")(span)?;
|
||||
let mut sb = StringBuilder::new(span);
|
||||
|
||||
// skip first line if it only contains " " (or is empty)
|
||||
let (r, _) = nom::combinator::opt(
|
||||
nom::sequence::pair(
|
||||
nom::combinator::opt(nom::bytes::complete::is_a(" ")),
|
||||
nom::bytes::complete::tag("\n"),
|
||||
),
|
||||
)(rem_span)?;
|
||||
rem_span = r;
|
||||
|
||||
loop {
|
||||
if let Ok((r, escaped_two_sq)) = nom::bytes::complete::tag("'''")(rem_span) as PResult<SpanRef> {
|
||||
// '' is escaped by a single '
|
||||
rem_span = r;
|
||||
sb.add_lit(escaped_two_sq.slice(1..));
|
||||
continue;
|
||||
}
|
||||
if let Ok((r, escaped_dollar)) = nom::bytes::complete::tag("''$")(rem_span) as PResult<SpanRef> {
|
||||
// $ is escaped by ''
|
||||
rem_span = r;
|
||||
sb.add_lit(escaped_dollar.slice(2..));
|
||||
continue;
|
||||
}
|
||||
if let Ok((r, _escape)) = nom::bytes::complete::tag("''\\")(rem_span) as PResult<SpanRef> {
|
||||
// ''\ is the generic escape for the following character
|
||||
let (r, (escaped_span, escaped)) = nom::combinator::consumed(
|
||||
nom::character::complete::anychar
|
||||
)(r).unrecoverable()?;
|
||||
rem_span = r;
|
||||
match escaped {
|
||||
'n' => sb.add_escaped('\n'),
|
||||
'r' => sb.add_escaped('\r'),
|
||||
't' => sb.add_escaped('\t'),
|
||||
' ' => sb.add_escaped(' '), // not part of the indent, add as escaped part
|
||||
_ => {
|
||||
// useless escape - \ doesn't need an escape, $ should be ''$, ...
|
||||
sb.add_lit(escaped_span);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if let Ok((r, two_dollar)) = nom::bytes::complete::tag("$$")(rem_span) as PResult<_> {
|
||||
// $$ is passed through as literal string, $${..} not parsed as interpolation
|
||||
rem_span = r;
|
||||
sb.add_lit(two_dollar);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok((rem_span, _close_span)) = nom::bytes::complete::tag("''")(rem_span) as PResult<_> {
|
||||
let (span, mut parts) = sb.finish(rem_span);
|
||||
remove_shared_ident(&mut parts);
|
||||
return Ok((rem_span, span.data(Token::String(parts))));
|
||||
}
|
||||
if let Ok((r, lit_sq)) = nom::bytes::complete::tag("'")(rem_span) as PResult<SpanRef> {
|
||||
// ' - not followed by another '
|
||||
rem_span = r;
|
||||
sb.add_lit(lit_sq);
|
||||
continue;
|
||||
}
|
||||
match Interpolate::parse(rem_span) {
|
||||
Ok((r, interp)) => {
|
||||
rem_span = r;
|
||||
sb.add_interp(interp);
|
||||
continue;
|
||||
},
|
||||
Err(nom::Err::Failure(f)) => return Err(nom::Err::Failure(f)),
|
||||
Err(_) => (), // wasn't a ${ ... }, fall through
|
||||
}
|
||||
|
||||
let (r, lit_span) = nom::bytes::complete::is_not("'$")(rem_span).unrecoverable()?;
|
||||
rem_span = r;
|
||||
sb.add_lit(lit_span);
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_uri(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
// nix doc says: "URIs as defined in appendix B of RFC 2396";
|
||||
// but the appendix only gives a regex to **split** valid URIs,
|
||||
// not one to properly find them in the first place.
|
||||
// it also would match relative URIs and so on - we should only accept absolute URIs.
|
||||
|
||||
// regex to split from appendix b: ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
|
||||
// nix upstream uses: [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+
|
||||
|
||||
let (rem_span, uri_span) = nom::combinator::recognize(nom::sequence::tuple((
|
||||
// scheme
|
||||
nom::character::complete::alpha1,
|
||||
nom::multi::many0_count(nom::branch::alt((
|
||||
nom::character::complete::alphanumeric1.map(|_| ()),
|
||||
nom::character::complete::one_of("+-.").map(|_| ()),
|
||||
))),
|
||||
// ":"
|
||||
nom::bytes::complete::tag(":"),
|
||||
// [-a-zA-Z0-9%/?:@&=+$,_.!~*']+
|
||||
nom::multi::many0_count(nom::branch::alt((
|
||||
nom::character::complete::alphanumeric1.map(|_| ()),
|
||||
nom::character::complete::one_of("-%/?:@&=+$,_.!~*'").map(|_| ()),
|
||||
))),
|
||||
)))(span)?;
|
||||
|
||||
let uri_lit = Literal::from(uri_span);
|
||||
let uri = Token::String(vec![StringPart::Literal(uri_lit)]);
|
||||
|
||||
Ok((rem_span, uri_span.data(uri)))
|
||||
}
|
||||
|
||||
pub(super) fn parse_string(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
nom::branch::alt((
|
||||
parse_dq,
|
||||
parse_two_sq,
|
||||
parse_uri,
|
||||
))(span)
|
||||
}
|
68
src/parser/token/tokenlist.rs
Normal file
68
src/parser/token/tokenlist.rs
Normal file
@ -0,0 +1,68 @@
|
||||
use nom::Parser;
|
||||
use super::{
|
||||
SpannedData,
|
||||
Token,
|
||||
SpanRef,
|
||||
PResult,
|
||||
};
|
||||
|
||||
fn parse_token(span: SpanRef) -> PResult<SpannedData<Token>> {
|
||||
nom::branch::alt((
|
||||
super::strings::parse_string,
|
||||
super::Path::parse,
|
||||
super::op_kw_ident::op_ident_or_keyword,
|
||||
super::Number::parse,
|
||||
super::CurlyBrackets::parse,
|
||||
super::SquareBrackets::parse,
|
||||
super::RoundBrackets::parse,
|
||||
Parser::into(super::Interpolate::parse),
|
||||
))(span)
|
||||
}
|
||||
|
||||
fn skip_ws(span: SpanRef) -> PResult<()> {
|
||||
nom::multi::many0_count(nom::branch::alt((
|
||||
nom::character::complete::multispace1.map(|_| ()),
|
||||
// `# ...` comments
|
||||
nom::sequence::pair(
|
||||
nom::bytes::complete::tag("#"),
|
||||
nom::bytes::complete::is_not("\n\r"),
|
||||
).map(|_| ()),
|
||||
// /* ... */ comments
|
||||
nom::sequence::tuple((
|
||||
nom::bytes::complete::tag("/*"),
|
||||
nom::bytes::complete::take_until("*/"),
|
||||
nom::bytes::complete::tag("*/"),
|
||||
)).map(|_| ()),
|
||||
))).map(|_| ()).parse(span)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TokenList(pub Vec<SpannedData<Token>>);
|
||||
|
||||
impl TokenList {
|
||||
pub fn parse_expression(span: SpanRef) -> PResult<Self> {
|
||||
nom::sequence::preceded(
|
||||
skip_ws,
|
||||
nom::multi::many0(
|
||||
nom::sequence::terminated(parse_token, skip_ws)
|
||||
),
|
||||
).map(Self).parse(span)
|
||||
}
|
||||
|
||||
pub fn parse_file(span: SpanRef) -> PResult<Self> {
|
||||
nom::combinator::all_consuming(Self::parse_expression)(span)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for TokenList {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(head) = self.0.first() {
|
||||
head.fmt(f)?;
|
||||
for elem in &self.0[1..] {
|
||||
f.write_str(" ")?;
|
||||
elem.fmt(f)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user