diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 654296c..0df6b41 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: dtolnay/rust-toolchain@1.70.0 + - uses: dtolnay/rust-toolchain@1.73.0 with: components: rustfmt - name: Check formatting diff --git a/Cargo.lock b/Cargo.lock index 19c25c1..cf0b4af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -218,6 +218,19 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "either" version = "1.9.0" @@ -272,6 +285,12 @@ version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + [[package]] name = "hermit-abi" version = "0.3.1" @@ -371,6 +390,16 @@ version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.20" @@ -390,9 +419,11 @@ dependencies = [ "anyhow", "argh", "criterion", + "dashmap", "filetime", "jemallocator", "libc", + "rayon", "rustc-hash", "tempfile", "windows-sys 0.48.0", @@ -419,6 +450,19 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +[[package]] +name = "parking_lot_core" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.4.1", + "smallvec", + "windows-targets 0.48.0", +] + [[package]] name = "plotters" version = "0.3.5" @@ -467,9 +511,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" dependencies = [ "either", "rayon-core", @@ -477,9 +521,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -580,6 +624,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" version = "1.0.164" @@ -611,6 +661,12 @@ dependencies = [ "serde", ] +[[package]] +name = "smallvec" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" + [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index b91b120..1d8bda2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,13 +11,15 @@ readme = "README.md" repository = "https://github.com/evmar/n2" # https://github.com/evmar/n2/issues/74 # Note: if we bump this, may need to bump .github/workflows/ci.yml version too. -rust-version = "1.70.0" +rust-version = "1.73.0" description = "a ninja compatible build system" [dependencies] anyhow = "1.0" argh = "0.1.10" +dashmap = "5.5.3" libc = "0.2" +rayon = "1.8.1" rustc-hash = "1.1.0" [target.'cfg(windows)'.dependencies.windows-sys] diff --git a/src/concurrent_linked_list.rs b/src/concurrent_linked_list.rs new file mode 100644 index 0000000..0c15408 --- /dev/null +++ b/src/concurrent_linked_list.rs @@ -0,0 +1,133 @@ +use std::{ + borrow::Borrow, + fmt::Debug, + marker::PhantomData, + ptr::null_mut, + sync::atomic::{AtomicPtr, Ordering}, +}; + +/// ConcurrentLinkedList is a linked list that can only be prepended to or +/// iterated over. prepend() accepts an &self instead of an &mut self, and can +/// be called from multiple threads at the same time. +pub struct ConcurrentLinkedList { + head: AtomicPtr>, +} + +struct ConcurrentLinkedListNode { + val: T, + next: *mut ConcurrentLinkedListNode, +} + +impl ConcurrentLinkedList { + pub fn new() -> Self { + ConcurrentLinkedList { + head: AtomicPtr::new(null_mut()), + } + } + + pub fn prepend(&self, val: T) { + let new_head = Box::into_raw(Box::new(ConcurrentLinkedListNode { + val, + next: null_mut(), + })); + loop { + let old_head = self.head.load(Ordering::SeqCst); + unsafe { + (*new_head).next = old_head; + if self + .head + .compare_exchange_weak(old_head, new_head, Ordering::SeqCst, Ordering::SeqCst) + .is_ok() + { + break; + } + } + } + } + + pub fn iter(&self) -> impl Iterator { + ConcurrentLinkedListIterator { + cur: self.head.load(Ordering::Relaxed), + lifetime: PhantomData, + } + } +} + +impl Default for ConcurrentLinkedList { + fn default() -> Self { + Self { + head: Default::default(), + } + } +} + +impl Clone for ConcurrentLinkedList { + fn clone(&self) -> Self { + let mut iter = self.iter(); + match iter.next() { + None => Self { + head: AtomicPtr::new(null_mut()), + }, + Some(x) => { + let new_head = Box::into_raw(Box::new(ConcurrentLinkedListNode { + val: x.clone(), + next: null_mut(), + })); + let mut new_tail = new_head; + for x in iter { + unsafe { + (*new_tail).next = Box::into_raw(Box::new(ConcurrentLinkedListNode { + val: x.clone(), + next: null_mut(), + })); + new_tail = (*new_tail).next; + } + } + Self { + head: AtomicPtr::new(new_head), + } + } + } + } +} + +impl Debug for ConcurrentLinkedList { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Slow, but hopefully Debug is only used for actual debugging + f.write_fmt(format_args!("{:?}", self.iter().collect::>())) + } +} + +impl Drop for ConcurrentLinkedList { + fn drop(&mut self) { + let mut cur = self.head.swap(null_mut(), Ordering::Relaxed); + while !cur.is_null() { + unsafe { + // Re-box it so that box will call Drop and deallocate the memory + let boxed = Box::from_raw(cur); + cur = boxed.next; + } + } + } +} + +struct ConcurrentLinkedListIterator<'a, T> { + cur: *const ConcurrentLinkedListNode, + lifetime: PhantomData<&'a ()>, +} + +impl<'a, T: 'a> Iterator for ConcurrentLinkedListIterator<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + if self.cur.is_null() { + None + } else { + unsafe { + let result = Some((*self.cur).val.borrow()); + self.cur = (*self.cur).next; + result + } + } + } +} diff --git a/src/db.rs b/src/db.rs index d6481ad..7157f4b 100644 --- a/src/db.rs +++ b/src/db.rs @@ -1,9 +1,9 @@ //! The n2 database stores information about previous builds for determining //! which files are up to date. +use crate::graph; use crate::{ - densemap, densemap::DenseMap, graph::BuildId, graph::FileId, graph::Graph, graph::Hashes, - hash::BuildHash, + densemap, densemap::DenseMap, graph::BuildId, graph::Graph, graph::Hashes, hash::BuildHash, }; use anyhow::{anyhow, bail}; use std::collections::HashMap; @@ -12,6 +12,7 @@ use std::io::BufReader; use std::io::Read; use std::io::Write; use std::path::Path; +use std::sync::Arc; const VERSION: u32 = 1; @@ -34,9 +35,9 @@ impl From for Id { #[derive(Default)] pub struct IdMap { /// Maps db::Id to FileId. - fileids: DenseMap, + fileids: DenseMap>, /// Maps FileId to db::Id. - db_ids: HashMap, + db_ids: HashMap<*const graph::File, Id>, } /// RecordWriter buffers writes into a Vec. @@ -110,13 +111,15 @@ impl Writer { w.finish(&mut self.w) } - fn ensure_id(&mut self, graph: &Graph, fileid: FileId) -> std::io::Result { - let id = match self.ids.db_ids.get(&fileid) { + fn ensure_id(&mut self, file: Arc) -> std::io::Result { + let id = match self.ids.db_ids.get(&(file.as_ref() as *const graph::File)) { Some(&id) => id, None => { - let id = self.ids.fileids.push(fileid); - self.ids.db_ids.insert(fileid, id); - self.write_path(&graph.file(fileid).name)?; + let id = self.ids.fileids.push(file.clone()); + self.ids + .db_ids + .insert(file.as_ref() as *const graph::File, id); + self.write_path(&file.name)?; id } }; @@ -134,15 +137,15 @@ impl Writer { let outs = build.outs(); let mark = (outs.len() as u16) | 0b1000_0000_0000_0000; w.write_u16(mark); - for &out in outs { - let id = self.ensure_id(graph, out)?; + for out in outs { + let id = self.ensure_id(out.clone())?; w.write_id(id); } let deps = build.discovered_ins(); w.write_u16(deps.len() as u16); - for &dep in deps { - let id = self.ensure_id(graph, dep)?; + for dep in deps { + let id = self.ensure_id(dep.clone())?; w.write_id(id); } @@ -190,9 +193,11 @@ impl<'a> Reader<'a> { fn read_path(&mut self, len: usize) -> std::io::Result<()> { let name = self.read_str(len)?; // No canonicalization needed, paths were written canonicalized. - let fileid = self.graph.files.id_from_canonical(name); - let dbid = self.ids.fileids.push(fileid); - self.ids.db_ids.insert(fileid, dbid); + let file = self.graph.files.id_from_canonical(name); + let dbid = self.ids.fileids.push(file.clone()); + self.ids + .db_ids + .insert(file.as_ref() as *const graph::File, dbid); Ok(()) } @@ -217,7 +222,7 @@ impl<'a> Reader<'a> { // keep reading to parse through it. continue; } - match self.graph.file(self.ids.fileids[fileid]).input { + match *self.ids.fileids[fileid].input.lock().unwrap() { None => { obsolete = true; } @@ -238,10 +243,10 @@ impl<'a> Reader<'a> { } let len = self.read_u16()?; - let mut deps = Vec::new(); + let mut deps = Vec::with_capacity(len as usize); for _ in 0..len { let id = self.read_id()?; - deps.push(self.ids.fileids[id]); + deps.push(self.ids.fileids[id].clone()); } let hash = BuildHash(self.read_u64()?); diff --git a/src/densemap.rs b/src/densemap.rs index 2d9878e..b95b044 100644 --- a/src/densemap.rs +++ b/src/densemap.rs @@ -37,6 +37,13 @@ impl std::ops::IndexMut for DenseMap { } impl DenseMap { + pub fn from_vec(v: Vec) -> Self { + Self { + vec: v, + key_type: PhantomData, + } + } + pub fn lookup(&self, k: K) -> Option<&V> { self.vec.get(k.index()) } diff --git a/src/depfile.rs b/src/depfile.rs index b241cb6..ef46a31 100644 --- a/src/depfile.rs +++ b/src/depfile.rs @@ -85,13 +85,15 @@ pub fn parse<'a>(scanner: &mut Scanner<'a>) -> ParseResult) -> Result>, String> { buf.push(0); - let mut scanner = Scanner::new(buf); - parse(&mut scanner).map_err(|err| scanner.format_parse_error(Path::new("test"), err)) + let mut scanner = Scanner::new(buf, 0); + parse(&mut scanner).map_err(|err| format_parse_error(0, buf, Path::new("test"), err)) } fn must_parse(buf: &mut Vec) -> SmallMap<&str, Vec<&str>> { diff --git a/src/eval.rs b/src/eval.rs index d737bfe..be2a8e4 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -1,17 +1,24 @@ //! Represents parsed Ninja strings with embedded variable references, e.g. //! `c++ $in -o $out`, and mechanisms for expanding those into plain strings. -use rustc_hash::FxHashMap; - +use crate::load::Scope; +use crate::load::ScopePosition; +use crate::parse::EvalParser; use crate::smallmap::SmallMap; use std::borrow::Borrow; -use std::borrow::Cow; /// An environment providing a mapping of variable name to variable value. /// This represents one "frame" of evaluation context, a given EvalString may /// need multiple environments in order to be fully expanded. pub trait Env { - fn get_var(&self, var: &str) -> Option>>; + fn evaluate_var( + &self, + result: &mut String, + var: &str, + envs: &[&dyn Env], + scope: &Scope, + position: ScopePosition, + ); } /// One token within an EvalString, either literal text or a variable reference. @@ -26,136 +33,78 @@ pub enum EvalPart> { /// expanded evals, like top-level bindings, and EvalString, which is /// used for delayed evals like in `rule` blocks. #[derive(Debug, PartialEq)] -pub struct EvalString>(Vec>); +pub struct EvalString>(T); impl> EvalString { - pub fn new(parts: Vec>) -> Self { - EvalString(parts) + pub fn new(inner: T) -> Self { + EvalString(inner) } - fn evaluate_inner(&self, result: &mut String, envs: &[&dyn Env]) { - for part in &self.0 { + pub fn evaluate_inner( + &self, + result: &mut String, + envs: &[&dyn Env], + scope: &Scope, + position: ScopePosition, + ) { + for part in self.parse() { match part { EvalPart::Literal(s) => result.push_str(s.as_ref()), EvalPart::VarRef(v) => { - for (i, env) in envs.iter().enumerate() { - if let Some(v) = env.get_var(v.as_ref()) { - v.evaluate_inner(result, &envs[i + 1..]); - break; - } + if let Some(env) = envs.first() { + env.evaluate_var(result, v.as_ref(), &envs[1..], scope, position); + } else { + scope.evaluate(result, v.as_ref(), position); } } } } } - fn calc_evaluated_length(&self, envs: &[&dyn Env]) -> usize { - self.0 - .iter() - .map(|part| match part { - EvalPart::Literal(s) => s.as_ref().len(), - EvalPart::VarRef(v) => { - for (i, env) in envs.iter().enumerate() { - if let Some(v) = env.get_var(v.as_ref()) { - return v.calc_evaluated_length(&envs[i + 1..]); - } - } - 0 - } - }) - .sum() - } - /// evalulate turns the EvalString into a regular String, looking up the /// values of variable references in the provided Envs. It will look up /// its variables in the earliest Env that has them, and then those lookups /// will be recursively expanded starting from the env after the one that /// had the first successful lookup. - pub fn evaluate(&self, envs: &[&dyn Env]) -> String { + pub fn evaluate(&self, envs: &[&dyn Env], scope: &Scope, position: ScopePosition) -> String { let mut result = String::new(); - result.reserve(self.calc_evaluated_length(envs)); - self.evaluate_inner(&mut result, envs); + self.evaluate_inner(&mut result, envs, scope, position); result } -} -impl EvalString<&str> { - pub fn into_owned(self) -> EvalString { - EvalString( - self.0 - .into_iter() - .map(|part| match part { - EvalPart::Literal(s) => EvalPart::Literal(s.to_owned()), - EvalPart::VarRef(s) => EvalPart::VarRef(s.to_owned()), - }) - .collect(), - ) + pub fn maybe_literal(&self) -> Option<&T> { + if self.0.as_ref().contains('$') { + None + } else { + Some(&self.0) + } } -} -impl EvalString { - pub fn as_cow(&self) -> EvalString> { - EvalString( - self.0 - .iter() - .map(|part| match part { - EvalPart::Literal(s) => EvalPart::Literal(Cow::Borrowed(s.as_ref())), - EvalPart::VarRef(s) => EvalPart::VarRef(Cow::Borrowed(s.as_ref())), - }) - .collect(), - ) + pub fn parse(&self) -> impl Iterator> { + EvalParser::new(self.0.as_ref().as_bytes()) } } impl EvalString<&str> { - pub fn as_cow(&self) -> EvalString> { - EvalString( - self.0 - .iter() - .map(|part| match part { - EvalPart::Literal(s) => EvalPart::Literal(Cow::Borrowed(*s)), - EvalPart::VarRef(s) => EvalPart::VarRef(Cow::Borrowed(*s)), - }) - .collect(), - ) - } -} - -/// A single scope's worth of variable definitions. -#[derive(Debug, Default)] -pub struct Vars<'text>(FxHashMap<&'text str, String>); - -impl<'text> Vars<'text> { - pub fn insert(&mut self, key: &'text str, val: String) { - self.0.insert(key, val); - } - pub fn get(&self, key: &str) -> Option<&String> { - self.0.get(key) - } -} -impl<'a> Env for Vars<'a> { - fn get_var(&self, var: &str) -> Option>> { - Some(EvalString::new(vec![EvalPart::Literal( - std::borrow::Cow::Borrowed(self.get(var)?), - )])) - } -} - -impl + PartialEq> Env for SmallMap> { - fn get_var(&self, var: &str) -> Option>> { - Some(self.get(var)?.as_cow()) - } -} - -impl + PartialEq> Env for SmallMap> { - fn get_var(&self, var: &str) -> Option>> { - Some(self.get(var)?.as_cow()) + pub fn into_owned(self) -> EvalString { + EvalString(self.0.to_owned()) } } -impl Env for SmallMap<&str, String> { - fn get_var(&self, var: &str) -> Option>> { - Some(EvalString::new(vec![EvalPart::Literal( - std::borrow::Cow::Borrowed(self.get(var)?), - )])) +impl + PartialEq, V: AsRef> Env for SmallMap> { + fn evaluate_var( + &self, + result: &mut String, + var: &str, + envs: &[&dyn Env], + scope: &Scope, + position: ScopePosition, + ) { + if let Some(v) = self.get(var) { + v.evaluate_inner(result, envs, scope, position); + } else if let Some(env) = envs.first() { + env.evaluate_var(result, var, &envs[1..], scope, position); + } else { + scope.evaluate(result, var, position); + } } } diff --git a/src/file_pool.rs b/src/file_pool.rs new file mode 100644 index 0000000..499812c --- /dev/null +++ b/src/file_pool.rs @@ -0,0 +1,127 @@ +use anyhow::bail; +use core::slice; +use std::{path::Path, sync::Mutex}; + +#[cfg(unix)] +mod mmap { + use super::*; + use libc::{ + c_void, mmap, munmap, sysconf, MAP_ANONYMOUS, MAP_FAILED, MAP_FIXED, MAP_PRIVATE, + PROT_READ, PROT_WRITE, _SC_PAGESIZE, + }; + use std::{ + os::fd::{AsFd, AsRawFd}, + ptr::null_mut, + }; + /// FilePool is a datastructure that is intended to hold onto byte buffers and give out immutable + /// references to them. But it can also accept new byte buffers while old ones are still lent out. + /// This requires interior mutability / unsafe code. Appending to a Vec while references to other + /// elements are held is generally unsafe, because the Vec can reallocate all the prior elements + /// to a new memory location. But if the elements themselves are pointers to stable memory, the + /// contents of those pointers can be referenced safely. This also requires guarding the outer + /// Vec with a Mutex so that two threads don't append to it at the same time. + pub struct FilePool { + files: Mutex>, + } + impl FilePool { + pub fn new() -> FilePool { + FilePool { + files: Mutex::new(Vec::new()), + } + } + + pub fn read_file(&self, path: &Path) -> anyhow::Result<&[u8]> { + let page_size = unsafe { sysconf(_SC_PAGESIZE) } as usize; + let file = std::fs::File::open(path)?; + let fd = file.as_fd().as_raw_fd(); + let file_size = file.metadata()?.len() as usize; + let mapping_size = (file_size + page_size).next_multiple_of(page_size); + unsafe { + // size + 1 to add a null terminator. + let addr = mmap(null_mut(), mapping_size, PROT_READ, MAP_PRIVATE, fd, 0); + if addr == MAP_FAILED { + bail!("mmap failed"); + } + + let addr2 = mmap( + addr.add(mapping_size).sub(page_size), + page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + -1, + 0, + ); + if addr2 == MAP_FAILED { + bail!("mmap failed"); + } + *(addr.add(mapping_size).sub(page_size) as *mut u8) = 0; + // The manpages say the extra bytes past the end of the file are + // zero-filled, but just to make sure: + assert!(*(addr.add(file_size) as *mut u8) == 0); + + let files = &mut self.files.lock().unwrap(); + files.push((addr, mapping_size)); + + Ok(slice::from_raw_parts(addr as *mut u8, file_size + 1)) + } + } + } + + // SAFETY: Sync isn't implemented automatically because we have a *mut pointer, + // but that pointer isn't used at all aside from the drop implementation, so + // we won't have data races. + unsafe impl Sync for FilePool {} + unsafe impl Send for FilePool {} + + impl Drop for FilePool { + fn drop(&mut self) { + let files = self.files.lock().unwrap(); + for &(addr, len) in files.iter() { + unsafe { + munmap(addr, len); + } + } + } + } +} + +#[cfg(not(unix))] +mod read { + use crate::scanner::read_file_with_nul; + + use super::*; + + /// FilePool is a datastructure that is intended to hold onto byte buffers and give out immutable + /// references to them. But it can also accept new byte buffers while old ones are still lent out. + /// This requires interior mutability / unsafe code. Appending to a Vec while references to other + /// elements are held is generally unsafe, because the Vec can reallocate all the prior elements + /// to a new memory location. But if the elements themselves are unchanging Vecs, the + /// contents of those Vecs can be referenced safely. This also requires guarding the outer + /// Vec with a Mutex so that two threads don't append to it at the same time. + pub struct FilePool { + files: Mutex>>, + } + + impl FilePool { + pub fn new() -> FilePool { + FilePool { + files: Mutex::new(Vec::new()), + } + } + + pub fn read_file(&self, path: &Path) -> anyhow::Result<&[u8]> { + let bytes = read_file_with_nul(path)?; + let addr = bytes.as_ptr(); + let len = bytes.len(); + self.files.lock().unwrap().push(bytes); + + unsafe { Ok(slice::from_raw_parts(addr as *mut u8, len)) } + } + } +} + +#[cfg(unix)] +pub use mmap::FilePool; + +#[cfg(not(unix))] +pub use read::FilePool; diff --git a/src/graph.rs b/src/graph.rs index d6b1a38..b4b289b 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -1,28 +1,23 @@ //! The build graph, a graph between files and commands. -use rustc_hash::FxHashMap; +use anyhow::bail; +use rustc_hash::{FxHashMap, FxHasher}; use crate::{ + concurrent_linked_list::ConcurrentLinkedList, densemap::{self, DenseMap}, + eval::EvalString, hash::BuildHash, + load::{Scope, ScopePosition}, + smallmap::SmallMap, }; -use std::collections::{hash_map::Entry, HashMap}; -use std::path::{Path, PathBuf}; use std::time::SystemTime; - -/// Id for File nodes in the Graph. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct FileId(u32); -impl densemap::Index for FileId { - fn index(&self) -> usize { - self.0 as usize - } -} -impl From for FileId { - fn from(u: usize) -> FileId { - FileId(u as u32) - } -} +use std::{collections::HashMap, sync::Arc}; +use std::{ + hash::BuildHasherDefault, + path::{Path, PathBuf}, + sync::Mutex, +}; /// Id for Build nodes in the Graph. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] @@ -39,26 +34,26 @@ impl From for BuildId { } /// A single file referenced as part of a build. -#[derive(Debug)] +#[derive(Debug, Default)] pub struct File { /// Canonical path to the file. - pub name: String, + pub name: Arc, /// The Build that generates this file, if any. - pub input: Option, + pub input: Mutex>, /// The Builds that depend on this file as an input. - pub dependents: Vec, + pub dependents: ConcurrentLinkedList, } impl File { pub fn path(&self) -> &Path { - Path::new(&self.name) + Path::new(self.name.as_ref()) } } /// A textual location within a build.ninja file, used in error messages. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct FileLoc { - pub filename: std::rc::Rc, + pub filename: Arc, pub line: usize, } impl std::fmt::Display for FileLoc { @@ -74,12 +69,13 @@ pub struct RspFile { } /// Input files to a Build. +#[derive(Debug)] pub struct BuildIns { /// Internally we stuff explicit/implicit/order-only ins all into one Vec. /// This is mostly to simplify some of the iteration and is a little more /// memory efficient than three separate Vecs, but it is kept internal to /// Build and only exposed via methods on Build. - pub ids: Vec, + pub ids: Vec>, pub explicit: usize, pub implicit: usize, pub order_only: usize, @@ -88,10 +84,12 @@ pub struct BuildIns { } /// Output files from a Build. +#[derive(Debug)] pub struct BuildOuts { /// Similar to ins, we keep both explicit and implicit outs in one Vec. - pub ids: Vec, + pub ids: Vec>, pub explicit: usize, + pub implicit: usize, } impl BuildOuts { @@ -100,105 +98,175 @@ impl BuildOuts { /// this function removes duplicates from the output list. pub fn remove_duplicates(&mut self) { let mut ids = Vec::new(); - for (i, &id) in self.ids.iter().enumerate() { - if self.ids[0..i].iter().any(|&prev| prev == id) { + for (i, id) in self.ids.iter().enumerate() { + if self.ids[0..i] + .iter() + .any(|prev| std::ptr::eq(prev.as_ref(), id.as_ref())) + { // Skip over duplicate. if i < self.explicit { self.explicit -= 1; } continue; } - ids.push(id); + ids.push(id.clone()); } self.ids = ids; } + + pub fn num_outs(&self) -> usize { + self.explicit + self.implicit + } } #[cfg(test)] mod tests { - fn fileids(ids: Vec) -> Vec { - ids.into_iter().map(FileId::from).collect() + use super::*; + + fn assert_file_arc_vecs_equal(a: Vec>, b: Vec>) { + for (x, y) in a.into_iter().zip(b.into_iter()) { + if !Arc::ptr_eq(&x, &y) { + panic!("File vecs not equal"); + } + } } - use super::*; #[test] fn remove_dups_explicit() { + let file1 = Arc::new(File::default()); + let file2 = Arc::new(File::default()); let mut outs = BuildOuts { - ids: fileids(vec![1, 1, 2]), + ids: vec![file1.clone(), file1.clone(), file2.clone()], explicit: 2, + implicit: 0, }; outs.remove_duplicates(); - assert_eq!(outs.ids, fileids(vec![1, 2])); + assert_file_arc_vecs_equal(outs.ids, vec![file1, file2]); assert_eq!(outs.explicit, 1); } #[test] fn remove_dups_implicit() { + let file1 = Arc::new(File::default()); + let file2 = Arc::new(File::default()); let mut outs = BuildOuts { - ids: fileids(vec![1, 2, 1]), + ids: vec![file1.clone(), file2.clone(), file1.clone()], explicit: 2, + implicit: 0, }; outs.remove_duplicates(); - assert_eq!(outs.ids, fileids(vec![1, 2])); + assert_file_arc_vecs_equal(outs.ids, vec![file1, file2]); assert_eq!(outs.explicit, 2); } } +/// A variable lookup environment for magic $in/$out variables. +struct BuildImplicitVars<'a> { + explicit_ins: &'a [Arc], + explicit_outs: &'a [Arc], +} +impl<'text> crate::eval::Env for BuildImplicitVars<'text> { + fn evaluate_var( + &self, + result: &mut String, + var: &str, + envs: &[&dyn crate::eval::Env], + scope: &Scope, + position: ScopePosition, + ) { + let mut common = |files: &[Arc], sep: &'static str| { + for (i, file) in files.iter().enumerate() { + if i > 0 { + result.push_str(sep); + } + result.push_str(&file.name); + } + }; + match var { + "in" => common(self.explicit_ins, " "), + "in_newline" => common(self.explicit_ins, "\n"), + "out" => common(self.explicit_outs, " "), + "out_newline" => common(self.explicit_outs, "\n"), + _ => { + if let Some(env) = envs.first() { + env.evaluate_var(result, var, &envs[1..], scope, position); + } else { + scope.evaluate(result, var, position); + } + } + } + } +} + /// A single build action, generating File outputs from File inputs with a command. +#[derive(Debug)] pub struct Build { - /// Source location this Build was declared. - pub location: FileLoc, + pub id: BuildId, - /// User-provided description of the build step. - pub desc: Option, + /// The scope that this build is part of. Used when evaluating the build's + /// bindings. + pub scope: Option>, - /// Command line to run. Absent for phony builds. - pub cmdline: Option, + /// The position of this build in the scope. Used when evaluating the + /// build's bindings. + pub scope_position: ScopePosition, - /// Path to generated `.d` file, if any. - pub depfile: Option, + /// The unevalated output/input files. These strings really have a lifetime + /// of 'text, but we use 'static so that we don't need to add 'text to the + /// build itself. We unsafely cast 'text strings to 'static. The strings + /// are evaluated and this vec is cleared before the lifetime of 'text is + /// over. + pub unevaluated_outs_and_ins: Vec>, - /// If true, extract "/showIncludes" lines from output. - pub parse_showincludes: bool, + pub rule: String, - // Struct that contains the path to the rsp file and its contents, if any. - pub rspfile: Option, + // The unevaluated variable bindings. They're stored unevalated so that + // we don't have to evaluate all bindings on all builds. + pub bindings: SmallMap>, - /// Pool to execute this build in, if any. - pub pool: Option, + /// Source location this Build was declared. + pub location: FileLoc, + /// Input files. pub ins: BuildIns, /// Additional inputs discovered from a previous build. - discovered_ins: Vec, + discovered_ins: Vec>, /// Output files. pub outs: BuildOuts, } impl Build { - pub fn new(loc: FileLoc, ins: BuildIns, outs: BuildOuts) -> Self { + pub fn new( + rule: String, + bindings: SmallMap>, + location: FileLoc, + ins: BuildIns, + outs: BuildOuts, + unevaluated_outs_and_ins: Vec>, + ) -> Self { Build { - location: loc, - desc: None, - cmdline: None, - depfile: None, - parse_showincludes: false, - rspfile: None, - pool: None, - ins, + id: BuildId::from(0), + rule, + scope: None, + scope_position: ScopePosition(0), + bindings, + location, + ins: ins, discovered_ins: Vec::new(), - outs, + outs: outs, + unevaluated_outs_and_ins, } } /// Input paths that appear in `$in`. - pub fn explicit_ins(&self) -> &[FileId] { + pub fn explicit_ins(&self) -> &[Arc] { &self.ins.ids[0..self.ins.explicit] } /// Input paths that, if changed, invalidate the output. /// Note this omits discovered_ins, which also invalidate the output. - pub fn dirtying_ins(&self) -> &[FileId] { + pub fn dirtying_ins(&self) -> &[Arc] { &self.ins.ids[0..(self.ins.explicit + self.ins.implicit)] } @@ -206,7 +274,7 @@ impl Build { /// Distinct from dirtying_ins in that it includes order-only dependencies. /// Note that we don't order on discovered_ins, because they're not allowed to /// affect build order. - pub fn ordering_ins(&self) -> &[FileId] { + pub fn ordering_ins(&self) -> &[Arc] { &self.ins.ids[0..(self.ins.order_only + self.ins.explicit + self.ins.implicit)] } @@ -214,13 +282,25 @@ impl Build { /// Validation inputs will be built whenever this Build is built, but this Build will not /// wait for them to complete before running. The validation inputs can fail to build, which /// will cause the overall build to fail. - pub fn validation_ins(&self) -> &[FileId] { + pub fn validation_ins(&self) -> &[Arc] { &self.ins.ids[(self.ins.order_only + self.ins.explicit + self.ins.implicit)..] } + fn vecs_of_arcs_eq(a: &Vec>, b: &Vec>) -> bool { + if a.len() != b.len() { + return false; + } + for (x, y) in a.iter().zip(b.iter()) { + if !Arc::ptr_eq(x, y) { + return false; + } + } + return true; + } + /// Potentially update discovered_ins with a new set of deps, returning true if they changed. - pub fn update_discovered(&mut self, deps: Vec) -> bool { - if deps == self.discovered_ins { + pub fn update_discovered(&mut self, deps: Vec>) -> bool { + if Self::vecs_of_arcs_eq(&deps, &self.discovered_ins) { false } else { self.set_discovered_ins(deps); @@ -228,57 +308,117 @@ impl Build { } } - pub fn set_discovered_ins(&mut self, deps: Vec) { + pub fn set_discovered_ins(&mut self, deps: Vec>) { self.discovered_ins = deps; } /// Input paths that were discovered after building, for use in the next build. - pub fn discovered_ins(&self) -> &[FileId] { + pub fn discovered_ins(&self) -> &[Arc] { &self.discovered_ins } /// Output paths that appear in `$out`. - pub fn explicit_outs(&self) -> &[FileId] { + pub fn explicit_outs(&self) -> &[Arc] { &self.outs.ids[0..self.outs.explicit] } /// Output paths that are updated when the build runs. - pub fn outs(&self) -> &[FileId] { + pub fn outs(&self) -> &[Arc] { &self.outs.ids } + + fn get_binding(&self, key: &str) -> Option { + let implicit_vars = BuildImplicitVars { + explicit_ins: &self.ins.ids[..self.ins.explicit], + explicit_outs: &self.outs.ids[..self.outs.explicit], + }; + let scope = self.scope.as_ref().unwrap(); + let rule = scope.get_rule(&self.rule, self.scope_position).unwrap(); + Some(match rule.vars.get(key) { + Some(val) => val.evaluate( + &[&implicit_vars, &self.bindings], + scope, + self.scope_position, + ), + None => self + .bindings + .get(key)? + .evaluate(&[], scope, self.scope_position), + }) + } + + pub fn get_rspfile(&self) -> anyhow::Result> { + let rspfile_path = self.get_binding("rspfile"); + let rspfile_content = self.get_binding("rspfile_content"); + let rspfile = match (rspfile_path, rspfile_content) { + (None, None) => None, + (Some(path), Some(content)) => Some(RspFile { + path: std::path::PathBuf::from(path), + content, + }), + _ => bail!("rspfile and rspfile_content need to be both specified"), + }; + Ok(rspfile) + } + + pub fn get_parse_showincludes(&self) -> anyhow::Result { + Ok(match self.get_binding("deps").as_deref() { + None => false, + Some("gcc") => false, + Some("msvc") => true, + Some(other) => bail!("invalid deps attribute {:?}", other), + }) + } + + pub fn get_cmdline(&self) -> Option { + self.get_binding("command") + } + + pub fn get_description(&self) -> Option { + self.get_binding("description") + } + + pub fn get_depfile(&self) -> Option { + self.get_binding("depfile") + } + + pub fn get_pool(&self) -> Option { + self.get_binding("pool") + } } -/// The build graph: owns Files/Builds and maps FileIds/BuildIds to them. +/// The build graph: owns Files/Builds and maps BuildIds to them. #[derive(Default)] pub struct Graph { - pub builds: DenseMap, + pub builds: DenseMap>, pub files: GraphFiles, } -/// Files identified by FileId, as well as mapping string filenames to them. +/// Files identified by their string names. /// Split from Graph for lifetime reasons. #[derive(Default)] pub struct GraphFiles { - pub by_id: DenseMap, - by_name: FxHashMap, + by_name: dashmap::DashMap, Arc, BuildHasherDefault>, } impl Graph { - /// Look up a file by its FileId. - pub fn file(&self, id: FileId) -> &File { - &self.files.by_id[id] + pub fn new(builds: Vec>, files: GraphFiles) -> anyhow::Result { + let result = Graph { + builds: DenseMap::from_vec(builds), + files, + }; + Ok(result) } - /// Add a new Build, generating a BuildId for it. - pub fn add_build(&mut self, mut build: Build) -> anyhow::Result<()> { - let new_id = self.builds.next_id(); - for &id in &build.ins.ids { - self.files.by_id[id].dependents.push(new_id); - } + pub fn initialize_build(build: &mut Build) -> anyhow::Result<()> { + let new_id = build.id; let mut fixup_dups = false; - for &id in &build.outs.ids { - let f = &mut self.files.by_id[id]; - match f.input { + for input in &build.ins.ids { + input.dependents.prepend(new_id); + } + for f in &build.outs.ids { + let mut input = f.input.lock().unwrap(); + match *input { Some(prev) if prev == new_id => { fixup_dups = true; println!( @@ -286,29 +426,28 @@ impl Graph { build.location, f.name, ); } - Some(prev) => { + Some(_) => { + let location = build.location.clone(); anyhow::bail!( - "{}: {:?} is already an output at {}", - build.location, + "{}: {:?} is already an output of another build", + location, f.name, - self.builds[prev].location ); } - None => f.input = Some(new_id), + None => *input = Some(new_id), } } if fixup_dups { build.outs.remove_duplicates(); } - self.builds.push(build); Ok(()) } } impl GraphFiles { /// Look up a file by its name. Name must have been canonicalized already. - pub fn lookup(&self, file: &str) -> Option { - self.by_name.get(file).copied() + pub fn lookup(&self, file: String) -> Option> { + self.by_name.get(&Arc::new(file)).map(|x| x.clone()) } /// Look up a file by its name, adding it if not already present. @@ -318,24 +457,26 @@ impl GraphFiles { /// of this function that accepts string references that is more optimized /// for the case where the entry already exists. But so far, all of our /// usages of this function have an owned string easily accessible anyways. - pub fn id_from_canonical(&mut self, file: String) -> FileId { - // TODO: so many string copies :< + pub fn id_from_canonical(&self, file: String) -> Arc { + let file = Arc::new(file); match self.by_name.entry(file) { - Entry::Occupied(o) => *o.get(), - Entry::Vacant(v) => { - let id = self.by_id.push(File { - name: v.key().clone(), - input: None, - dependents: Vec::new(), - }); - v.insert(id); - id + dashmap::mapref::entry::Entry::Occupied(o) => o.get().clone(), + dashmap::mapref::entry::Entry::Vacant(v) => { + let mut f = File::default(); + f.name = v.key().clone(); + let f = Arc::new(f); + v.insert(f.clone()); + f } } } - pub fn all_ids(&self) -> impl Iterator { - (0..self.by_id.next_id().0).map(|id| FileId(id)) + pub fn all_files(&self) -> impl Iterator> + '_ { + self.by_name.iter().map(|x| x.clone()) + } + + pub fn num_files(&self) -> usize { + self.by_name.len() } } @@ -366,20 +507,24 @@ pub fn stat(path: &Path) -> std::io::Result { /// Gathered state of on-disk files. /// Due to discovered deps this map may grow after graph initialization. -pub struct FileState(DenseMap>); +pub struct FileState(FxHashMap<*const File, Option>); impl FileState { pub fn new(graph: &Graph) -> Self { - FileState(DenseMap::new_sized(graph.files.by_id.next_id(), None)) + let hm = HashMap::with_capacity_and_hasher( + graph.files.num_files(), + BuildHasherDefault::::default(), + ); + FileState(hm) } - pub fn get(&self, id: FileId) -> Option { - self.0.lookup(id).copied().unwrap_or(None) + pub fn get(&self, id: &File) -> Option { + self.0.get(&(id as *const File)).copied().flatten() } - pub fn stat(&mut self, id: FileId, path: &Path) -> anyhow::Result { + pub fn stat(&mut self, id: &File, path: &Path) -> anyhow::Result { let mtime = stat(path).map_err(|err| anyhow::anyhow!("stat {:?}: {}", path, err))?; - self.0.set_grow(id, Some(mtime), None); + self.0.insert(id as *const File, Some(mtime)); Ok(mtime) } } diff --git a/src/hash.rs b/src/hash.rs index 23ea8d5..5306ab3 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -4,11 +4,12 @@ //! See "Manifests instead of mtime order" in //! https://neugierig.org/software/blog/2022/03/n2.html -use crate::graph::{Build, FileId, FileState, GraphFiles, MTime, RspFile}; +use crate::graph::{self, Build, FileState, MTime, RspFile}; use std::{ collections::hash_map::DefaultHasher, fmt::Write, hash::{Hash, Hasher}, + sync::Arc, time::SystemTime, }; @@ -21,23 +22,13 @@ pub struct BuildHash(pub u64); /// implement it a second time for "-d explain" debug purposes. trait Manifest { /// Write a list of files+mtimes. desc is used only for "-d explain" output. - fn write_files( - &mut self, - desc: &str, - files: &GraphFiles, - file_state: &FileState, - ids: &[FileId], - ); + fn write_files(&mut self, desc: &str, file_state: &FileState, ids: &[Arc]); fn write_rsp(&mut self, rspfile: &RspFile); fn write_cmdline(&mut self, cmdline: &str); } -fn get_fileid_status<'a>( - files: &'a GraphFiles, - file_state: &FileState, - id: FileId, -) -> (&'a str, SystemTime) { - let name = &files.by_id[id].name; +fn get_fileid_status<'a>(file_state: &FileState, id: &'a graph::File) -> (&'a str, SystemTime) { + let name = &id.name; let mtime = file_state .get(id) .unwrap_or_else(|| panic!("no state for {:?}", name)); @@ -69,15 +60,9 @@ impl TerseHash { } impl Manifest for TerseHash { - fn write_files<'a>( - &mut self, - _desc: &str, - files: &GraphFiles, - file_state: &FileState, - ids: &[FileId], - ) { - for &id in ids { - let (name, mtime) = get_fileid_status(files, file_state, id); + fn write_files<'a>(&mut self, _desc: &str, file_state: &FileState, ids: &[Arc]) { + for id in ids { + let (name, mtime) = get_fileid_status(file_state, &id); self.write_string(name); mtime.hash(&mut self.0); } @@ -96,27 +81,27 @@ impl Manifest for TerseHash { fn build_manifest( manifest: &mut M, - files: &GraphFiles, file_state: &FileState, build: &Build, -) { - manifest.write_files("in", files, file_state, build.dirtying_ins()); - manifest.write_files("discovered", files, file_state, build.discovered_ins()); - manifest.write_cmdline(build.cmdline.as_deref().unwrap_or("")); - if let Some(rspfile) = &build.rspfile { +) -> anyhow::Result<()> { + manifest.write_files("in", file_state, build.dirtying_ins()); + manifest.write_files("discovered", file_state, build.discovered_ins()); + manifest.write_cmdline(build.get_cmdline().as_deref().unwrap_or("")); + if let Some(rspfile) = &build.get_rspfile()? { manifest.write_rsp(rspfile); } - manifest.write_files("out", files, file_state, build.outs()); + manifest.write_files("out", file_state, build.outs()); + Ok(()) } // Hashes the inputs of a build to compute a signature. // Prerequisite: all referenced files have already been stat()ed and are present. // (It doesn't make sense to hash a build with missing files, because it's out // of date regardless of the state of the other files.) -pub fn hash_build(files: &GraphFiles, file_state: &FileState, build: &Build) -> BuildHash { +pub fn hash_build(file_state: &FileState, build: &Build) -> anyhow::Result { let mut hasher = TerseHash::default(); - build_manifest(&mut hasher, files, file_state, build); - hasher.finish() + build_manifest(&mut hasher, file_state, build)?; + Ok(hasher.finish()) } /// A BuildHasher that records human-readable text for "-d explain" debugging. @@ -126,16 +111,10 @@ struct ExplainHash { } impl Manifest for ExplainHash { - fn write_files<'a>( - &mut self, - desc: &str, - files: &GraphFiles, - file_state: &FileState, - ids: &[FileId], - ) { + fn write_files<'a>(&mut self, desc: &str, file_state: &FileState, ids: &[Arc]) { writeln!(&mut self.text, "{desc}:").unwrap(); - for &id in ids { - let (name, mtime) = get_fileid_status(files, file_state, id); + for id in ids { + let (name, mtime) = get_fileid_status(file_state, &id); let millis = mtime .duration_since(SystemTime::UNIX_EPOCH) .unwrap() @@ -159,8 +138,8 @@ impl Manifest for ExplainHash { /// Logs human-readable state of all the inputs used for hashing a given build. /// Used for "-d explain" debugging output. -pub fn explain_hash_build(files: &GraphFiles, file_state: &FileState, build: &Build) -> String { +pub fn explain_hash_build(file_state: &FileState, build: &Build) -> anyhow::Result { let mut explainer = ExplainHash::default(); - build_manifest(&mut explainer, files, file_state, build); - explainer.text + build_manifest(&mut explainer, file_state, build)?; + Ok(explainer.text) } diff --git a/src/lib.rs b/src/lib.rs index c1e8cb0..769a924 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,10 @@ pub mod canon; +mod concurrent_linked_list; mod db; mod densemap; mod depfile; mod eval; +mod file_pool; mod graph; mod hash; pub mod load; diff --git a/src/load.rs b/src/load.rs index edcf49d..5806195 100644 --- a/src/load.rs +++ b/src/load.rs @@ -1,236 +1,403 @@ //! Graph loading: runs .ninja parsing and constructs the build graph from it. use crate::{ - canon::{canon_path, canon_path_fast}, - eval::{EvalPart, EvalString}, - graph::{FileId, RspFile}, - parse::Statement, - scanner, + canon::canon_path, + db, + file_pool::FilePool, + graph::{self, BuildId, Graph, GraphFiles}, + parse::{self, Clump, ClumpOrInclude, Rule, VariableAssignment}, + scanner::{format_parse_error, ParseResult}, smallmap::SmallMap, - {db, eval, graph, parse, trace}, + trace, }; use anyhow::{anyhow, bail}; -use std::collections::HashMap; -use std::path::PathBuf; -use std::{borrow::Cow, path::Path}; - -/// A variable lookup environment for magic $in/$out variables. -struct BuildImplicitVars<'a> { - graph: &'a graph::Graph, - build: &'a graph::Build, -} -impl<'a> BuildImplicitVars<'a> { - fn file_list(&self, ids: &[FileId], sep: char) -> String { - let mut out = String::new(); - for &id in ids { - if !out.is_empty() { - out.push(sep); - } - out.push_str(&self.graph.file(id).name); - } - out +use rayon::prelude::*; +use rustc_hash::FxHashMap; +use std::path::{Path, PathBuf}; +use std::{collections::hash_map::Entry, sync::Arc, thread::available_parallelism}; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Default, PartialOrd, Ord)] +pub struct ScopePosition(pub usize); + +impl ScopePosition { + pub fn add(&self, other: ScopePosition) -> ScopePosition { + ScopePosition(self.0 + other.0) } -} -impl<'a> eval::Env for BuildImplicitVars<'a> { - fn get_var(&self, var: &str) -> Option>> { - let string_to_evalstring = - |s: String| Some(EvalString::new(vec![EvalPart::Literal(Cow::Owned(s))])); - match var { - "in" => string_to_evalstring(self.file_list(self.build.explicit_ins(), ' ')), - "in_newline" => string_to_evalstring(self.file_list(self.build.explicit_ins(), '\n')), - "out" => string_to_evalstring(self.file_list(self.build.explicit_outs(), ' ')), - "out_newline" => string_to_evalstring(self.file_list(self.build.explicit_outs(), '\n')), - _ => None, - } + pub fn add_usize(&self, other: usize) -> ScopePosition { + ScopePosition(self.0 + other) } } -/// Internal state used while loading. -#[derive(Default)] -pub struct Loader { - graph: graph::Graph, - default: Vec, - /// rule name -> list of (key, val) - rules: HashMap>>, - pools: SmallMap, - builddir: Option, -} +#[derive(Debug)] +pub struct ParentScopeReference(pub Arc, pub ScopePosition); -impl Loader { - pub fn new() -> Self { - let mut loader = Loader::default(); +#[derive(Debug)] +pub struct Scope { + parent: Option, + rules: FxHashMap, + variables: FxHashMap>, + next_free_position: ScopePosition, +} - loader.rules.insert("phony".to_owned(), SmallMap::default()); +impl Scope { + pub fn new(parent: Option) -> Self { + Self { + parent, + rules: FxHashMap::default(), + variables: FxHashMap::default(), + next_free_position: ScopePosition(0), + } + } - loader + pub fn get_and_inc_scope_position(&mut self) -> ScopePosition { + let result = self.next_free_position; + self.next_free_position.0 += 1; + result } - /// Convert a path string to a FileId. For performance reasons - /// this requires an owned 'path' param. - fn path(&mut self, mut path: String) -> FileId { - // Perf: this is called while parsing build.ninja files. We go to - // some effort to avoid allocating in the common case of a path that - // refers to a file that is already known. - let len = canon_path_fast(&mut path); - path.truncate(len); - self.graph.files.id_from_canonical(path) + pub fn get_last_scope_position(&self) -> ScopePosition { + self.next_free_position } - fn evaluate_path(&mut self, path: EvalString<&str>, envs: &[&dyn eval::Env]) -> FileId { - self.path(path.evaluate(envs)) + pub fn get_rule(&self, name: &str, position: ScopePosition) -> Option<&Rule> { + match self.rules.get(name) { + Some(rule) if rule.scope_position.0 < position.0 => Some(rule), + Some(_) | None => self + .parent + .as_ref() + .map(|p| p.0.get_rule(name, p.1)) + .flatten(), + } } - fn evaluate_paths( - &mut self, - paths: Vec>, - envs: &[&dyn eval::Env], - ) -> Vec { - paths - .into_iter() - .map(|path| self.evaluate_path(path, envs)) - .collect() + pub fn evaluate(&self, result: &mut String, varname: &str, position: ScopePosition) { + if let Some(variables) = self.variables.get(varname) { + let i = variables.binary_search_by_key(&position, |x| x.scope_position); + let i = match i { + Ok(i) => std::cmp::max(i, 1) - 1, + Err(i) => std::cmp::min(i, variables.len() - 1), + }; + if variables[i].scope_position.0 < position.0 { + variables[i].evaluate(result, &self); + return; + } + // We couldn't find a variable assignment before the input + // position, so check the parent scope if there is one. + } + if let Some(parent) = &self.parent { + parent.0.evaluate(result, varname, parent.1); + } } +} - fn add_build( - &mut self, - filename: std::rc::Rc, - env: &eval::Vars, - b: parse::Build, - ) -> anyhow::Result<()> { - let ins = graph::BuildIns { - ids: self.evaluate_paths(b.ins, &[&b.vars, env]), - explicit: b.explicit_ins, - implicit: b.implicit_ins, - order_only: b.order_only_ins, - // validation is implied by the other counts - }; - let outs = graph::BuildOuts { - ids: self.evaluate_paths(b.outs, &[&b.vars, env]), - explicit: b.explicit_outs, - }; - let mut build = graph::Build::new( - graph::FileLoc { - filename, - line: b.line, +fn evaluate_build_files<'text>( + files: &GraphFiles, + scope: Arc, + b: &mut graph::Build, + base_position: ScopePosition, +) -> anyhow::Result<()> { + b.scope_position.0 += base_position.0; + let num_outs = b.outs.num_outs(); + b.outs.ids = b.unevaluated_outs_and_ins[..num_outs] + .iter() + .map(|x| { + files.id_from_canonical(canon_path(x.evaluate( + &[&b.bindings], + &scope, + b.scope_position, + ))) + }) + .collect(); + b.ins.ids = b.unevaluated_outs_and_ins[num_outs..] + .iter() + .map(|x| { + files.id_from_canonical(canon_path(x.evaluate( + &[&b.bindings], + &scope, + b.scope_position, + ))) + }) + .collect(); + // The unevaluated values actually have a lifetime of 'text, not 'static, + // so clear them so they don't accidentally get used later. + b.unevaluated_outs_and_ins.clear(); + b.unevaluated_outs_and_ins.shrink_to_fit(); + b.scope = Some(scope); + + Ok(()) +} + +#[derive(Default)] +struct SubninjaResults<'text> { + clumps: Vec>, + builddir: Option, +} + +fn subninja<'thread, 'text>( + num_threads: usize, + files: &'thread GraphFiles, + file_pool: &'text FilePool, + path: String, + parent_scope: Option, +) -> anyhow::Result> +where + 'text: 'thread, +{ + let path = PathBuf::from(path); + let top_level_scope = parent_scope.is_none(); + let mut scope = Scope::new(parent_scope); + if top_level_scope { + scope.rules.insert( + "phony".to_owned(), + Rule { + vars: SmallMap::default(), + scope_position: ScopePosition(0), }, - ins, - outs, ); + } + let filename = Arc::new(path); + let mut parse_results = trace::scope("parse", || { + parse( + &filename, + num_threads, + file_pool, + file_pool.read_file(&filename)?, + &mut scope, + // to account for the phony rule + if top_level_scope { + ScopePosition(1) + } else { + ScopePosition(0) + }, + ) + })?; - let rule = match self.rules.get(b.rule) { - Some(r) => r, - None => bail!("unknown rule {:?}", b.rule), - }; + let scope = Arc::new(scope); - let implicit_vars = BuildImplicitVars { - graph: &self.graph, - build: &build, - }; + for clump in &mut parse_results { + let base_position = clump.base_position; + for default in clump.defaults.iter_mut() { + let scope = scope.clone(); + default.evaluated = default + .files + .iter() + .map(|x| { + let path = canon_path(x.evaluate( + &[], + &scope, + default.scope_position.add(base_position), + )); + files.id_from_canonical(path) + }) + .collect(); + } + } - // temp variable in order to not move all of b into the closure - let build_vars = &b.vars; - let lookup = |key: &str| -> Option { - // Look up `key = ...` binding in build and rule block. - Some(match rule.get(key) { - Some(val) => val.evaluate(&[&implicit_vars, build_vars, env]), - None => build_vars.get(key)?.evaluate(&[env]), + trace::scope("evaluate builds' files", || -> anyhow::Result<()> { + parse_results + .par_iter_mut() + .flat_map(|x| { + let num_builds = x.builds.len(); + x.builds + .par_iter_mut() + .zip(rayon::iter::repeatn(x.base_position, num_builds)) }) - }; + .try_for_each(|(mut build, base_position)| -> anyhow::Result<()> { + evaluate_build_files(files, scope.clone(), &mut build, base_position) + }) + })?; - let cmdline = lookup("command"); - let desc = lookup("description"); - let depfile = lookup("depfile"); - let parse_showincludes = match lookup("deps").as_deref() { - None => false, - Some("gcc") => false, - Some("msvc") => true, - Some(other) => bail!("invalid deps attribute {:?}", other), - }; - let pool = lookup("pool"); - - let rspfile_path = lookup("rspfile"); - let rspfile_content = lookup("rspfile_content"); - let rspfile = match (rspfile_path, rspfile_content) { - (None, None) => None, - (Some(path), Some(content)) => Some(RspFile { - path: std::path::PathBuf::from(path), - content, - }), - _ => bail!("rspfile and rspfile_content need to be both specified"), - }; + // The unevaluated values of scoped variables have a lifetime of 'static + // for simplicity in the code, but in actuality their lifetime is 'text. + // We need to evaluate all the variables before the lifetime of 'text ends. + scope + .variables + .par_iter() + .flat_map(|x| x.1.par_iter()) + .for_each(|x| { + x.pre_evaluate(&scope); + }); - build.cmdline = cmdline; - build.desc = desc; - build.depfile = depfile; - build.parse_showincludes = parse_showincludes; - build.rspfile = rspfile; - build.pool = pool; + let mut subninja_results = parse_results + .par_iter() + .flat_map(|x| { + x.subninjas + .par_iter() + .zip(rayon::iter::repeatn(x.base_position, x.subninjas.len())) + }) + .map(|(sn, base_position)| -> anyhow::Result> { + let position = sn.scope_position.add(base_position); + let file = canon_path(sn.file.evaluate(&[], &scope, position)); + Ok(subninja( + num_threads, + files, + file_pool, + file, + Some(ParentScopeReference(scope.clone(), position)), + )? + .clumps) + }) + .collect::>>>>()?; - self.graph.add_build(build) + for subninja_result in &mut subninja_results { + parse_results.append(subninja_result); } - fn read_file(&mut self, id: FileId) -> anyhow::Result<()> { - let path = self.graph.file(id).path().to_path_buf(); - let bytes = match trace::scope("read file", || scanner::read_file_with_nul(&path)) { - Ok(b) => b, - Err(e) => bail!("read {}: {}", path.display(), e), - }; - self.parse(path, &bytes) - } + // Only the builddir in the outermost scope is respected + let build_dir = if top_level_scope { + let mut build_dir = String::new(); + scope.evaluate(&mut build_dir, "builddir", ScopePosition(usize::MAX)); + if !build_dir.is_empty() { + Some(build_dir) + } else { + None + } + } else { + None + }; - fn evaluate_and_read_file( - &mut self, - file: EvalString<&str>, - envs: &[&dyn eval::Env], - ) -> anyhow::Result<()> { - let evaluated = self.evaluate_path(file, envs); - self.read_file(evaluated) + Ok(SubninjaResults { + clumps: parse_results, + builddir: build_dir, + }) +} + +fn include<'thread, 'text>( + filename: &Arc, + num_threads: usize, + file_pool: &'text FilePool, + path: String, + scope: &mut Scope, + clump_base_position: ScopePosition, +) -> anyhow::Result>> +where + 'text: 'thread, +{ + let path = PathBuf::from(path); + parse( + filename, + num_threads, + file_pool, + file_pool.read_file(&path)?, + scope, + clump_base_position, + ) +} + +fn parse<'thread, 'text>( + filename: &Arc, + num_threads: usize, + file_pool: &'text FilePool, + bytes: &'text [u8], + scope: &mut Scope, + mut clump_base_position: ScopePosition, +) -> anyhow::Result>> +where + 'text: 'thread, +{ + let chunks = parse::split_manifest_into_chunks(bytes, num_threads); + + let statements: ParseResult>> = chunks + .par_iter() + .enumerate() + .map(|(i, chunk)| { + let mut parser = parse::Parser::new(chunk, filename.clone(), i); + parser.read_clumps() + }) + .collect(); + + let Ok(statements) = statements else { + let err = statements.unwrap_err(); + let ofs = chunks[..err.chunk_index].iter().map(|x| x.len()).sum(); + bail!(format_parse_error( + ofs, + chunks[err.chunk_index], + filename, + err + )); + }; + + let mut num_rules = 0; + let mut num_variables = 0; + let mut num_clumps = 0; + for clumps in &statements { + num_clumps += clumps.len(); + for clump_or_include in clumps { + if let ClumpOrInclude::Clump(clump) = clump_or_include { + num_rules += clump.rules.len(); + num_variables += clump.assignments.len(); + } + } } - pub fn parse(&mut self, path: PathBuf, bytes: &[u8]) -> anyhow::Result<()> { - let filename = std::rc::Rc::new(path); + scope.rules.reserve(num_rules); + scope.variables.reserve(num_variables); - let mut parser = parse::Parser::new(&bytes); + let mut results = Vec::with_capacity(num_clumps); - loop { - let stmt = match parser - .read() - .map_err(|err| anyhow!(parser.format_parse_error(&filename, err)))? - { - None => break, - Some(s) => s, - }; - match stmt { - Statement::Include(id) => trace::scope("include", || { - self.evaluate_and_read_file(id, &[&parser.vars]) - })?, - // TODO: implement scoping for subninja - Statement::Subninja(id) => trace::scope("subninja", || { - self.evaluate_and_read_file(id, &[&parser.vars]) - })?, - Statement::Default(defaults) => { - let evaluated = self.evaluate_paths(defaults, &[&parser.vars]); - self.default.extend(evaluated); - } - Statement::Rule(rule) => { - let mut vars: SmallMap> = SmallMap::default(); - for (name, val) in rule.vars.into_iter() { - // TODO: We should not need to call .into_owned() here - // if we keep the contents of all included files in - // memory. - vars.insert(name.to_owned(), val.into_owned()); - } - self.rules.insert(rule.name.to_owned(), vars); - } - Statement::Build(build) => self.add_build(filename.clone(), &parser.vars, build)?, - Statement::Pool(pool) => { - self.pools.insert(pool.name.to_string(), pool.depth); - } - }; + for stmt in statements.into_iter().flatten() { + match stmt { + ClumpOrInclude::Clump(mut clump) => { + // Variable assignments must be added to the scope now, because + // they may be referenced by a later include. Also add rules + // while we're at it, to avoid some copies later on. + let rules = std::mem::take(&mut clump.rules); + let assignments = std::mem::take(&mut clump.assignments); + let scope_rules = &mut scope.rules; + let scope_variables = &mut scope.variables; + rayon::join( + || { + for (name, mut variable_assignment) in assignments.into_iter() { + variable_assignment.scope_position.0 += clump_base_position.0; + match scope_variables.entry(name) { + Entry::Occupied(mut e) => e.get_mut().push(variable_assignment), + Entry::Vacant(e) => { + e.insert(vec![variable_assignment]); + } + } + } + }, + || -> anyhow::Result<()> { + for (name, mut rule) in rules.into_iter() { + rule.scope_position.0 += clump_base_position.0; + match scope_rules.entry(name) { + Entry::Occupied(e) => bail!("duplicate rule '{}'", e.key()), + Entry::Vacant(e) => { + e.insert(rule); + } + } + } + Ok(()) + }, + ) + .1?; + clump.base_position = clump_base_position; + clump_base_position.0 += clump.used_scope_positions; + results.push(clump); + } + ClumpOrInclude::Include(i) => { + trace::scope("include", || -> anyhow::Result<()> { + let evaluated = canon_path(i.evaluate(&[], &scope, clump_base_position)); + let mut new_results = include( + filename, + num_threads, + file_pool, + evaluated, + scope, + clump_base_position, + )?; + clump_base_position = new_results + .last() + .map(|c| c.base_position.add_usize(c.used_scope_positions)) + .unwrap_or(clump_base_position); + results.append(&mut new_results); + Ok(()) + })?; + } } - self.builddir = parser.vars.get("builddir").cloned(); - Ok(()) } + + Ok(results) } /// State loaded by read(). @@ -238,48 +405,87 @@ pub struct State { pub graph: graph::Graph, pub db: db::Writer, pub hashes: graph::Hashes, - pub default: Vec, + pub default: Vec>, pub pools: SmallMap, } /// Load build.ninja/.n2_db and return the loaded build graph and state. pub fn read(build_filename: &str) -> anyhow::Result { - let mut loader = Loader::new(); - trace::scope("loader.read_file", || { - let id = loader - .graph - .files - .id_from_canonical(canon_path(build_filename)); - loader.read_file(id) - })?; + let build_filename = canon_path(build_filename); + let file_pool = FilePool::new(); + let files = GraphFiles::default(); + let num_threads = available_parallelism()?.get(); + let pool = rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .build()?; + let (defaults, builddir, pools, builds) = + trace::scope("loader.read_file", || -> anyhow::Result<_> { + pool.scope(|_| { + let mut results = subninja(num_threads, &files, &file_pool, build_filename, None)?; + + let mut pools = SmallMap::default(); + let mut defaults = Vec::new(); + let mut num_builds = 0; + trace::scope("add pools and defaults", || -> anyhow::Result<()> { + for clump in &mut results.clumps { + for pool in &clump.pools { + if !pools.insert_if_absent(pool.name.to_owned(), pool.depth) { + bail!("duplicate pool {}", pool.name); + } + } + for default in &mut clump.defaults { + defaults.append(&mut default.evaluated); + } + num_builds += clump.builds.len(); + } + Ok(()) + })?; + let mut builds = trace::scope("allocate and concat builds", || { + let mut builds = Vec::with_capacity(num_builds); + for clump in &mut results.clumps { + builds.append(&mut clump.builds); + } + builds + }); + let builddir = results.builddir.take(); + drop(results); + // Turns out munmap is rather slow, unmapping the android ninja + // files takes ~150ms. Do this in parallel with initialize_build. + rayon::spawn(move || { + drop(file_pool); + }); + trace::scope("initialize builds", move || { + builds + .par_iter_mut() + .enumerate() + .try_for_each(|(id, build)| { + build.id = BuildId::from(id); + graph::Graph::initialize_build(build) + })?; + Ok((defaults, builddir, pools, builds)) + }) + }) + })?; + + let mut graph = Graph::new(builds, files)?; let mut hashes = graph::Hashes::default(); let db = trace::scope("db::open", || { let mut db_path = PathBuf::from(".n2_db"); - if let Some(builddir) = &loader.builddir { + if let Some(builddir) = &builddir { db_path = Path::new(&builddir).join(db_path); if let Some(parent) = db_path.parent() { std::fs::create_dir_all(parent)?; } }; - db::open(&db_path, &mut loader.graph, &mut hashes) + db::open(&db_path, &mut graph, &mut hashes) }) .map_err(|err| anyhow!("load .n2_db: {}", err))?; + Ok(State { - graph: loader.graph, + graph, db, hashes, - default: loader.default, - pools: loader.pools, + default: defaults, + pools, }) } - -/// Parse a single file's content. -#[cfg(test)] -pub fn parse(name: &str, mut content: Vec) -> anyhow::Result { - content.push(0); - let mut loader = Loader::new(); - trace::scope("loader.read_file", || { - loader.parse(PathBuf::from(name), &content) - })?; - Ok(loader.graph) -} diff --git a/src/parse.rs b/src/parse.rs index 04a8397..91b54ba 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -6,68 +6,223 @@ //! text, marked with the lifetime `'text`. use crate::{ - eval::{EvalPart, EvalString, Vars}, - scanner::{ParseError, ParseResult, Scanner}, + eval::{EvalPart, EvalString}, + graph::{self, Build, BuildIns, BuildOuts, FileLoc}, + load::{Scope, ScopePosition}, + scanner::{ParseResult, Scanner}, smallmap::SmallMap, }; -use std::path::Path; +use std::{ + cell::UnsafeCell, + path::PathBuf, + sync::{atomic::AtomicBool, Arc, Mutex}, +}; -/// A list of variable bindings, as expressed with syntax like: -/// key = $val -pub type VarList<'text> = SmallMap<&'text str, EvalString<&'text str>>; +#[derive(Debug)] +pub struct Rule { + pub vars: SmallMap>, + pub scope_position: ScopePosition, +} -pub struct Rule<'text> { +// #[derive(Debug, PartialEq)] +// pub struct Build<'text> { +// pub rule: &'text str, +// pub line: usize, +// pub outs: Vec>, +// pub explicit_outs: usize, +// pub ins: Vec>, +// pub explicit_ins: usize, +// pub implicit_ins: usize, +// pub order_only_ins: usize, +// pub validation_ins: usize, +// pub vars: VarList<'text>, +// pub scope_position: ScopePosition, +// } + +#[derive(Debug, PartialEq)] +pub struct Pool<'text> { pub name: &'text str, - pub vars: VarList<'text>, + pub depth: usize, +} + +#[derive(Debug)] +pub struct VariableAssignment { + pub unevaluated: EvalString<&'static str>, + pub scope_position: ScopePosition, + pub evaluated: UnsafeCell, + pub is_evaluated: AtomicBool, + pub lock: Mutex<()>, } -pub struct Build<'text> { - pub rule: &'text str, - pub line: usize, - pub outs: Vec>, - pub explicit_outs: usize, - pub ins: Vec>, - pub explicit_ins: usize, - pub implicit_ins: usize, - pub order_only_ins: usize, - pub validation_ins: usize, - pub vars: VarList<'text>, +// SAFETY: Sync is not automatically implemented because of the UnsafeCell, +// but our usage of the UnsafeCell is guarded byhind a Mutex. +unsafe impl Sync for VariableAssignment {} + +impl VariableAssignment { + fn new(unevaluated: EvalString<&'static str>) -> Self { + Self { + unevaluated, + scope_position: ScopePosition(0), + evaluated: UnsafeCell::new(String::new()), + is_evaluated: AtomicBool::new(false), + lock: Mutex::new(()), + } + } + + pub fn evaluate(&self, result: &mut String, scope: &Scope) { + self.pre_evaluate(scope); + unsafe { + result.push_str(&(*self.evaluated.get())); + } + } + + // This is the same as evaluate, but doesn't give any results back. + // Used to evalutate all the variables before 'text is over, as unevaluated + // should really have a 'text lifetime. + pub fn pre_evaluate(&self, scope: &Scope) { + if self.is_evaluated.load(std::sync::atomic::Ordering::Relaxed) { + return; + } + let guard = self.lock.lock().unwrap(); + if self.is_evaluated.load(std::sync::atomic::Ordering::Relaxed) { + return; + } + + unsafe { + self.unevaluated.evaluate_inner( + &mut *self.evaluated.get(), + &[], + scope, + self.scope_position, + ); + } + self.is_evaluated + .store(true, std::sync::atomic::Ordering::Relaxed); + drop(guard); + } } #[derive(Debug)] -pub struct Pool<'text> { - pub name: &'text str, - pub depth: usize, +pub struct DefaultStmt<'text> { + pub files: Vec>, + pub evaluated: Vec>, + pub scope_position: ScopePosition, +} + +#[derive(Debug, PartialEq)] +pub struct IncludeOrSubninja<'text> { + pub file: EvalString<&'text str>, + pub scope_position: ScopePosition, } +#[derive(Debug)] pub enum Statement<'text> { - Rule(Rule<'text>), - Build(Build<'text>), - Default(Vec>), - Include(EvalString<&'text str>), - Subninja(EvalString<&'text str>), + Rule((String, Rule)), + Build(Box), + Default(DefaultStmt<'text>), + Include(IncludeOrSubninja<'text>), + Subninja(IncludeOrSubninja<'text>), Pool(Pool<'text>), + VariableAssignment((String, VariableAssignment)), +} + +// Grouping the parse results into clumps allows us to do fewer vector +// concatenations when merging results of different chunks or different files +// together. +#[derive(Default, Debug)] +pub struct Clump<'text> { + pub assignments: Vec<(String, VariableAssignment)>, + pub rules: Vec<(String, Rule)>, + pub pools: Vec>, + pub defaults: Vec>, + pub builds: Vec>, + pub subninjas: Vec>, + pub used_scope_positions: usize, + pub base_position: ScopePosition, +} + +impl<'text> Clump<'text> { + pub fn is_empty(&self) -> bool { + self.assignments.is_empty() + && self.rules.is_empty() + && self.pools.is_empty() + && self.defaults.is_empty() + && self.builds.is_empty() + && self.subninjas.is_empty() + } +} + +#[derive(Debug)] +pub enum ClumpOrInclude<'text> { + Clump(Clump<'text>), + Include(EvalString<&'text str>), } pub struct Parser<'text> { + filename: Arc, scanner: Scanner<'text>, - pub vars: Vars<'text>, - /// Reading EvalStrings is very hot when parsing, so we always read into - /// this buffer and then clone it afterwards. - eval_buf: Vec>, + buf_len: usize, } impl<'text> Parser<'text> { - pub fn new(buf: &'text [u8]) -> Parser<'text> { + pub fn new(buf: &'text [u8], filename: Arc, chunk_index: usize) -> Parser<'text> { Parser { - scanner: Scanner::new(buf), - vars: Vars::default(), - eval_buf: Vec::with_capacity(16), + filename, + scanner: Scanner::new(buf, chunk_index), + buf_len: buf.len(), } } - pub fn format_parse_error(&self, filename: &Path, err: ParseError) -> String { - self.scanner.format_parse_error(filename, err) + pub fn read_clumps(&mut self) -> ParseResult>> { + let mut result = Vec::new(); + let mut clump = Clump::default(); + let mut position = ScopePosition(0); + while let Some(stmt) = self.read()? { + match stmt { + Statement::Rule(mut r) => { + r.1.scope_position = position; + position.0 += 1; + clump.rules.push(r); + } + Statement::Build(mut b) => { + b.scope_position = position; + position.0 += 1; + clump.builds.push(b); + } + Statement::Default(mut d) => { + d.scope_position = position; + position.0 += 1; + clump.defaults.push(d); + } + Statement::Include(i) => { + if !clump.is_empty() { + clump.used_scope_positions = position.0; + result.push(ClumpOrInclude::Clump(clump)); + clump = Clump::default(); + position = ScopePosition(0); + } + result.push(ClumpOrInclude::Include(i.file)); + } + Statement::Subninja(mut s) => { + s.scope_position = position; + position.0 += 1; + clump.subninjas.push(s); + } + Statement::Pool(p) => { + clump.pools.push(p); + } + Statement::VariableAssignment(mut v) => { + v.1.scope_position = position; + position.0 += 1; + clump.assignments.push(v); + } + } + } + if !clump.is_empty() { + clump.used_scope_positions = position.0; + result.push(ClumpOrInclude::Clump(clump)); + } + Ok(result) } pub fn read(&mut self) -> ParseResult>> { @@ -78,26 +233,57 @@ impl<'text> Parser<'text> { '#' => self.skip_comment()?, ' ' | '\t' => return self.scanner.parse_error("unexpected whitespace"), _ => { + if self.scanner.ofs >= self.buf_len { + // The parsing code expects there to be a null byte at the end of the file, + // to allow the parsing to be more performant and exclude most checks for + // EOF. However, when parsing an individual "chunk" of the manifest, there + // won't be a null byte at the end, the scanner will do an out-of-bounds + // read past the end of the chunk and into the next chunk. When we split + // the file into chunks, we made sure to end all the chunks just before + // identifiers at the start of a new line, so that we can easily detect + // that here. + assert!(self.scanner.ofs == self.buf_len); + return Ok(None); + } let ident = self.read_ident()?; self.skip_spaces(); match ident { "rule" => return Ok(Some(Statement::Rule(self.read_rule()?))), - "build" => return Ok(Some(Statement::Build(self.read_build()?))), + "build" => return Ok(Some(Statement::Build(Box::new(self.read_build()?)))), "default" => return Ok(Some(Statement::Default(self.read_default()?))), "include" => { - return Ok(Some(Statement::Include(self.read_eval(false)?))); + let result = IncludeOrSubninja { + file: self.read_eval(false)?, + scope_position: ScopePosition(0), + }; + return Ok(Some(Statement::Include(result))); } "subninja" => { - return Ok(Some(Statement::Subninja(self.read_eval(false)?))); + let result = IncludeOrSubninja { + file: self.read_eval(false)?, + scope_position: ScopePosition(0), + }; + return Ok(Some(Statement::Subninja(result))); } "pool" => return Ok(Some(Statement::Pool(self.read_pool()?))), ident => { - // TODO: The evaluation of global variables should - // be moved out of the parser, so that we can run - // multiple parsers in parallel and then evaluate - // all the variables in series at the end. - let val = self.read_vardef()?.evaluate(&[&self.vars]); - self.vars.insert(ident, val); + let x = self.read_vardef()?; + // SAFETY: We need to make sure we call evaluate + // or pre_evaluate on all VariableAssignments before + // the lifetime of 'text is over. After evaluating, + // the VariableAssignments will cache their owned + // Strings. + let x = unsafe { + std::mem::transmute::< + EvalString<&'text str>, + EvalString<&'static str>, + >(x) + }; + let result = VariableAssignment::new(x); + return Ok(Some(Statement::VariableAssignment(( + ident.to_owned(), + result, + )))); } } } @@ -114,7 +300,7 @@ impl<'text> Parser<'text> { if self.scanner.peek_newline() { self.scanner.skip('\r'); self.scanner.expect('\n')?; - return Ok(EvalString::new(Vec::new())); + return Ok(EvalString::new("")); } let result = self.read_eval(false); self.scanner.skip('\r'); @@ -126,8 +312,8 @@ impl<'text> Parser<'text> { fn read_scoped_vars( &mut self, variable_name_validator: fn(var: &str) -> bool, - ) -> ParseResult> { - let mut vars = VarList::default(); + ) -> ParseResult>> { + let mut vars = SmallMap::default(); while self.scanner.peek() == ' ' { self.scanner.skip_spaces(); let name = self.read_ident()?; @@ -136,13 +322,13 @@ impl<'text> Parser<'text> { .parse_error(format!("unexpected variable {:?}", name))?; } self.skip_spaces(); - let val = self.read_vardef()?; - vars.insert(name, val); + let val = self.read_vardef()?.into_owned(); + vars.insert(name.to_owned(), val); } Ok(vars) } - fn read_rule(&mut self) -> ParseResult> { + fn read_rule(&mut self) -> ParseResult<(String, Rule)> { let name = self.read_ident()?; self.scanner.skip('\r'); self.scanner.expect('\n')?; @@ -162,7 +348,13 @@ impl<'text> Parser<'text> { | "msvc_deps_prefix" ) })?; - Ok(Rule { name, vars }) + Ok(( + name.to_owned(), + Rule { + vars, + scope_position: ScopePosition(0), + }, + )) } fn read_pool(&mut self) -> ParseResult> { @@ -172,10 +364,17 @@ impl<'text> Parser<'text> { let vars = self.read_scoped_vars(|var| matches!(var, "depth"))?; let mut depth = 0; if let Some((_, val)) = vars.into_iter().next() { - let val = val.evaluate(&[]); - depth = match val.parse::() { - Ok(d) => d, - Err(err) => return self.scanner.parse_error(format!("pool depth: {}", err)), + match val.maybe_literal() { + Some(x) => match x.parse::() { + Ok(d) => depth = d, + Err(err) => return self.scanner.parse_error(format!("pool depth: {}", err)), + }, + None => { + return self.scanner.parse_error(format!( + "pool depth must be a literal string, got: {:?}", + val + )) + } } } Ok(Pool { name, depth }) @@ -186,34 +385,31 @@ impl<'text> Parser<'text> { v: &mut Vec>, ) -> ParseResult<()> { self.skip_spaces(); - while self.scanner.peek() != ':' - && self.scanner.peek() != '|' - && !self.scanner.peek_newline() - { + while !matches!(self.scanner.peek(), ':' | '|') && !self.scanner.peek_newline() { v.push(self.read_eval(true)?); self.skip_spaces(); } Ok(()) } - fn read_build(&mut self) -> ParseResult> { + fn read_build(&mut self) -> ParseResult { let line = self.scanner.line; - let mut outs = Vec::new(); - self.read_unevaluated_paths_to(&mut outs)?; - let explicit_outs = outs.len(); + let mut outs_and_ins = Vec::new(); + self.read_unevaluated_paths_to(&mut outs_and_ins)?; + let explicit_outs = outs_and_ins.len(); if self.scanner.peek() == '|' { self.scanner.next(); - self.read_unevaluated_paths_to(&mut outs)?; + self.read_unevaluated_paths_to(&mut outs_and_ins)?; } + let implicit_outs = outs_and_ins.len() - explicit_outs; self.scanner.expect(':')?; self.skip_spaces(); let rule = self.read_ident()?; - let mut ins = Vec::new(); - self.read_unevaluated_paths_to(&mut ins)?; - let explicit_ins = ins.len(); + self.read_unevaluated_paths_to(&mut outs_and_ins)?; + let explicit_ins = outs_and_ins.len() - implicit_outs - explicit_outs; if self.scanner.peek() == '|' { self.scanner.next(); @@ -221,10 +417,10 @@ impl<'text> Parser<'text> { if peek == '|' || peek == '@' { self.scanner.back(); } else { - self.read_unevaluated_paths_to(&mut ins)?; + self.read_unevaluated_paths_to(&mut outs_and_ins)?; } } - let implicit_ins = ins.len() - explicit_ins; + let implicit_ins = outs_and_ins.len() - explicit_ins - implicit_outs - explicit_outs; if self.scanner.peek() == '|' { self.scanner.next(); @@ -232,44 +428,66 @@ impl<'text> Parser<'text> { self.scanner.back(); } else { self.scanner.expect('|')?; - self.read_unevaluated_paths_to(&mut ins)?; + self.read_unevaluated_paths_to(&mut outs_and_ins)?; } } - let order_only_ins = ins.len() - implicit_ins - explicit_ins; + let order_only_ins = + outs_and_ins.len() - implicit_ins - explicit_ins - implicit_outs - explicit_outs; if self.scanner.peek() == '|' { self.scanner.next(); self.scanner.expect('@')?; - self.read_unevaluated_paths_to(&mut ins)?; + self.read_unevaluated_paths_to(&mut outs_and_ins)?; } - let validation_ins = ins.len() - order_only_ins - implicit_ins - explicit_ins; self.scanner.skip('\r'); self.scanner.expect('\n')?; let vars = self.read_scoped_vars(|_| true)?; - Ok(Build { - rule, - line, - outs, - explicit_outs, - ins, - explicit_ins, - implicit_ins, - order_only_ins, - validation_ins, + + // SAFETY: We will evaluate the ins/outs into owned strings before 'text + // is over, and we don't want to attach the 'text lifetime to Build. So + // instead, unsafely cast the lifetime to 'static. + let outs_and_ins = unsafe { + std::mem::transmute::>, Vec>>( + outs_and_ins, + ) + }; + + Ok(Build::new( + rule.to_owned(), vars, - }) + FileLoc { + filename: self.filename.clone(), + line, + }, + BuildIns { + ids: Vec::new(), + explicit: explicit_ins, + implicit: implicit_ins, + order_only: order_only_ins, + }, + BuildOuts { + ids: Vec::new(), + explicit: explicit_outs, + implicit: implicit_outs, + }, + outs_and_ins, + )) } - fn read_default(&mut self) -> ParseResult>> { - let mut defaults = Vec::new(); - self.read_unevaluated_paths_to(&mut defaults)?; - if defaults.is_empty() { + fn read_default(&mut self) -> ParseResult> { + let mut files = Vec::new(); + self.read_unevaluated_paths_to(&mut files)?; + if files.is_empty() { return self.scanner.parse_error("expected path"); } self.scanner.skip('\r'); self.scanner.expect('\n')?; - Ok(defaults) + Ok(DefaultStmt { + files, + evaluated: Vec::new(), + scope_position: ScopePosition(0), + }) } fn skip_comment(&mut self) -> ParseResult<()> { @@ -304,8 +522,9 @@ impl<'text> Parser<'text> { /// stop_at_path_separators is set, without consuming the character that /// caused it to stop. fn read_eval(&mut self, stop_at_path_separators: bool) -> ParseResult> { - self.eval_buf.clear(); + let start = self.scanner.ofs; let mut ofs = self.scanner.ofs; + let mut found_content = false; // This match block is copied twice, with the only difference being the check for // spaces, colons, and pipes in the stop_at_path_separators version. We could remove the // duplication by adding a match branch like `' ' | ':' | '|' if stop_at_path_separators =>` @@ -325,13 +544,8 @@ impl<'text> Parser<'text> { break self.scanner.ofs; } '$' => { - let end = self.scanner.ofs - 1; - if end > ofs { - self.eval_buf - .push(EvalPart::Literal(self.scanner.slice(ofs, end))); - } - let escape = self.read_escape()?; - self.eval_buf.push(escape); + self.read_escape()?; + found_content = true; ofs = self.scanner.ofs; } _ => {} @@ -350,13 +564,8 @@ impl<'text> Parser<'text> { break self.scanner.ofs; } '$' => { - let end = self.scanner.ofs - 1; - if end > ofs { - self.eval_buf - .push(EvalPart::Literal(self.scanner.slice(ofs, end))); - } - let escape = self.read_escape()?; - self.eval_buf.push(escape); + self.read_escape()?; + found_content = true; ofs = self.scanner.ofs; } _ => {} @@ -364,13 +573,12 @@ impl<'text> Parser<'text> { } }; if end > ofs { - self.eval_buf - .push(EvalPart::Literal(self.scanner.slice(ofs, end))); + found_content = true; } - if self.eval_buf.is_empty() { + if !found_content { return self.scanner.parse_error(format!("Expected a string")); } - Ok(EvalString::new(self.eval_buf.clone())) + Ok(EvalString::new(self.scanner.slice(start, end))) } /// Read a variable name as found after a '$' in an eval. @@ -378,7 +586,7 @@ impl<'text> Parser<'text> { /// period allowed(!), I guess because we expect things like /// foo = $bar.d /// to parse as a reference to $bar. - fn read_simple_varname(&mut self) -> ParseResult<&'text str> { + fn read_simple_varname(&mut self) -> ParseResult<()> { let start = self.scanner.ofs; while matches!(self.scanner.read(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-') {} self.scanner.back(); @@ -386,36 +594,27 @@ impl<'text> Parser<'text> { if end == start { return self.scanner.parse_error("failed to scan variable name"); } - Ok(self.scanner.slice(start, end)) + Ok(()) } /// Read and interpret the text following a '$' escape character. - fn read_escape(&mut self) -> ParseResult> { + fn read_escape(&mut self) -> ParseResult<()> { Ok(match self.scanner.read() { '\n' | '\r' => { self.scanner.skip_spaces(); - EvalPart::Literal(self.scanner.slice(0, 0)) - } - ' ' | '$' | ':' => { - EvalPart::Literal(self.scanner.slice(self.scanner.ofs - 1, self.scanner.ofs)) } - '{' => { - let start = self.scanner.ofs; - loop { - match self.scanner.read() { - '\0' => return self.scanner.parse_error("unexpected EOF"), - '}' => break, - _ => {} - } + ' ' | '$' | ':' => (), + '{' => loop { + match self.scanner.read() { + '\0' => return self.scanner.parse_error("unexpected EOF"), + '}' => break, + _ => {} } - let end = self.scanner.ofs - 1; - EvalPart::VarRef(self.scanner.slice(start, end)) - } + }, _ => { // '$' followed by some other text. self.scanner.back(); - let var = self.read_simple_varname()?; - EvalPart::VarRef(var) + self.read_simple_varname()?; } }) } @@ -440,6 +639,133 @@ impl<'text> Parser<'text> { } } +pub fn split_manifest_into_chunks(buf: &[u8], num_threads: usize) -> Vec<&[u8]> { + let min_chunk_size = 1024 * 1024; + let chunk_count = num_threads * 2; + let chunk_size = std::cmp::max(min_chunk_size, buf.len() / chunk_count + 1); + let mut result = Vec::with_capacity(chunk_count); + let mut start = 0; + while start < buf.len() { + let next = std::cmp::min(start + chunk_size, buf.len()); + let next = find_start_of_next_manifest_chunk(buf, next); + result.push(&buf[start..next]); + start = next; + } + result +} + +fn find_start_of_next_manifest_chunk(buf: &[u8], prospective_start: usize) -> usize { + let mut idx = prospective_start; + loop { + // TODO: Replace the search with something that uses SIMD instructions like the memchr crate + let Some(nl_index) = &buf[idx..].iter().position(|&b| b == b'\n') else { + return buf.len(); + }; + idx += nl_index + 1; + + // This newline was escaped, try again. It's possible that this check is too conservative, + // for example, you could have: + // - a comment that ends with a "$": "# $\n" + // - an escaped-dollar: "X=$$\n" + if idx >= 2 && buf[idx - 2] == b'$' + || idx >= 3 && buf[idx - 2] == b'\r' && buf[idx - 3] == b'$' + { + continue; + } + + // We want chunk boundaries to be at an easy/predictable place for the scanner to stop + // at. So only stop at an identifier after a newline. + if idx == buf.len() + || matches!( + buf[idx], + b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' | b'.' + ) + { + return idx; + } + } +} + +// An iterator over the EvalParts in the given string. Note that the +// string must be a valid EvalString, or undefined behavior will occur. +pub struct EvalParser<'a> { + buf: &'a [u8], + offset: usize, +} + +impl<'a> EvalParser<'a> { + pub fn new(buf: &'a [u8]) -> Self { + Self { buf, offset: 0 } + } + + fn peek(&self) -> u8 { + unsafe { *self.buf.get_unchecked(self.offset) } + } + + fn read(&mut self) -> u8 { + let c = self.peek(); + self.offset += 1; + c + } + + fn slice(&self, start: usize, end: usize) -> &'a str { + unsafe { std::str::from_utf8_unchecked(self.buf.get_unchecked(start..end)) } + } +} + +impl<'a> Iterator for EvalParser<'a> { + type Item = EvalPart<&'a str>; + + fn next(&mut self) -> Option { + let mut start = self.offset; + while self.offset < self.buf.len() { + match self.peek() { + b'$' => { + if self.offset > start { + return Some(EvalPart::Literal(self.slice(start, self.offset))); + } + self.offset += 1; + match self.peek() { + b'\n' | b'\r' => { + self.offset += 1; + while self.offset < self.buf.len() && self.peek() == b' ' { + self.offset += 1; + } + start = self.offset; + } + b' ' | b'$' | b':' => { + start = self.offset; + self.offset += 1; + } + b'{' => { + self.offset += 1; + start = self.offset; + while self.read() != b'}' {} + let end = self.offset - 1; + return Some(EvalPart::VarRef(self.slice(start, end))); + } + _ => { + // '$' followed by some other text. + start = self.offset; + while self.offset < self.buf.len() + && matches!(self.peek(), b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-') + { + self.offset += 1; + } + return Some(EvalPart::VarRef(self.slice(start, self.offset))); + } + } + } + _ => self.offset += 1, + } + } + if self.offset > start { + return Some(EvalPart::Literal(self.slice(start, self.offset))); + } + None + } +} + #[cfg(test)] mod tests { use super::*; @@ -462,17 +788,24 @@ mod tests { fn parse_defaults() { test_for_line_endings(&["var = 3", "default a b$var c", ""], |test_case| { let mut buf = test_case_buffer(test_case); - let mut parser = Parser::new(&mut buf); + let mut parser = Parser::new(&mut buf, Arc::new(PathBuf::from("build.ninja")), 0); + match parser.read().unwrap().unwrap() { + Statement::VariableAssignment(_) => {} + stmt => panic!("expected variable assignment, got {:?}", stmt), + }; let default = match parser.read().unwrap().unwrap() { - Statement::Default(d) => d, - _ => panic!("expected default"), + Statement::Default(d) => d.files, + stmt => panic!("expected default, got {:?}", stmt), }; assert_eq!( - default, + default + .iter() + .map(|x| x.parse().collect::>()) + .collect::>(), vec![ - EvalString::new(vec![EvalPart::Literal("a")]), - EvalString::new(vec![EvalPart::Literal("b"), EvalPart::VarRef("var")]), - EvalString::new(vec![EvalPart::Literal("c")]), + vec![EvalPart::Literal("a")], + vec![EvalPart::Literal("b"), EvalPart::VarRef("var")], + vec![EvalPart::Literal("c")], ] ); }); @@ -481,34 +814,40 @@ mod tests { #[test] fn parse_dot_in_eval() { let mut buf = test_case_buffer("x = $y.z\n"); - let mut parser = Parser::new(&mut buf); - parser.read().unwrap(); - let x = parser.vars.get("x").unwrap(); - assert_eq!(x, ".z"); + let mut parser = Parser::new(&mut buf, Arc::new(PathBuf::from("build.ninja")), 0); + let Ok(Some(Statement::VariableAssignment((name, x)))) = parser.read() else { + panic!("Fail"); + }; + assert_eq!(name, "x"); + assert_eq!( + x.unevaluated.parse().collect::>(), + vec![EvalPart::VarRef("y"), EvalPart::Literal(".z")] + ); } #[test] fn parse_dot_in_rule() { let mut buf = test_case_buffer("rule x.y\n command = x\n"); - let mut parser = Parser::new(&mut buf); - let stmt = parser.read().unwrap().unwrap(); - assert!(matches!( - stmt, - Statement::Rule(Rule { - name: "x.y", - vars: _ - }) - )); + let mut parser = Parser::new(&mut buf, Arc::new(PathBuf::from("build.ninja")), 0); + let Ok(Some(Statement::Rule((name, stmt)))) = parser.read() else { + panic!("Fail"); + }; + assert_eq!(name, "x.y"); + assert_eq!(stmt.vars.len(), 1); + assert_eq!( + stmt.vars.get("command"), + Some(&EvalString::new("x".to_owned())) + ); } #[test] fn parse_trailing_newline() { let mut buf = test_case_buffer("build$\n foo$\n : $\n touch $\n\n"); - let mut parser = Parser::new(&mut buf); + let mut parser = Parser::new(&mut buf, Arc::new(PathBuf::from("build.ninja")), 0); let stmt = parser.read().unwrap().unwrap(); - assert!(matches!( - stmt, - Statement::Build(Build { rule: "touch", .. }) - )); + let Statement::Build(stmt) = stmt else { + panic!("Wasn't a build"); + }; + assert_eq!(stmt.rule, "touch"); } } diff --git a/src/progress.rs b/src/progress.rs index 6ba1418..6533f95 100644 --- a/src/progress.rs +++ b/src/progress.rs @@ -14,12 +14,11 @@ use std::time::Duration; use std::time::Instant; /// Compute the message to display on the console for a given build. -pub fn build_message(build: &Build) -> &str { +pub fn build_message(build: &Build) -> String { build - .desc - .as_ref() + .get_description() .filter(|desc| !desc.is_empty()) - .unwrap_or_else(|| build.cmdline.as_ref().unwrap()) + .unwrap_or_else(|| build.get_cmdline().unwrap()) } /// Trait for build progress notifications. @@ -80,11 +79,11 @@ impl Progress for DumbConsoleProgress { } fn task_started(&mut self, id: BuildId, build: &Build) { - self.log(if self.verbose { - build.cmdline.as_ref().unwrap() + if self.verbose { + self.log(build.get_cmdline().as_ref().unwrap()); } else { - build_message(build) - }); + self.log(&build_message(build)); + } self.last_started = Some(id); } @@ -98,11 +97,13 @@ impl Progress for DumbConsoleProgress { if result.output.is_empty() || self.last_started == Some(id) { // Output is empty, or we just printed the command, don't print it again. } else { - self.log(build_message(build)) + self.log(&build_message(build)) } } - Termination::Interrupted => self.log(&format!("interrupted: {}", build_message(build))), - Termination::Failure => self.log(&format!("failed: {}", build_message(build))), + Termination::Interrupted => { + self.log(&format!("interrupted: {}", &build_message(build))) + } + Termination::Failure => self.log(&format!("failed: {}", &build_message(build))), }; if !result.output.is_empty() { std::io::stdout().write_all(&result.output).unwrap(); @@ -228,7 +229,7 @@ impl FancyState { fn task_started(&mut self, id: BuildId, build: &Build) { if self.verbose { - self.log(build.cmdline.as_ref().unwrap()); + self.log(build.get_cmdline().as_ref().unwrap()); } let message = build_message(build); self.tasks.push_back(Task { @@ -254,7 +255,7 @@ impl FancyState { if result.output.is_empty() { // Common case: don't show anything. } else { - self.log(build_message(build)) + self.log(&build_message(build)) } } Termination::Interrupted => self.log(&format!("interrupted: {}", build_message(build))), diff --git a/src/run.rs b/src/run.rs index e84ec26..4b95926 100644 --- a/src/run.rs +++ b/src/run.rs @@ -34,8 +34,8 @@ fn build( let mut tasks_finished = 0; // Attempt to rebuild build.ninja. - let build_file_target = work.lookup(&build_filename); - if let Some(target) = build_file_target { + let mut build_file_target = work.lookup(&build_filename); + if let Some(target) = build_file_target.clone() { work.want_file(target)?; match trace::scope("work.run", || work.run())? { None => return Ok(None), @@ -57,6 +57,7 @@ fn build( progress, state.pools, ); + build_file_target = work.lookup(&build_filename); } } } @@ -66,9 +67,11 @@ fn build( let target = work .lookup(name) .ok_or_else(|| anyhow::anyhow!("unknown path requested: {:?}", name))?; - if Some(target) == build_file_target { - // Already built above. - continue; + if let Some(build_file_target) = build_file_target.as_ref() { + if std::ptr::eq(build_file_target.as_ref(), target.as_ref()) { + // Already built above. + continue; + } } work.want_file(target)?; } @@ -81,6 +84,16 @@ fn build( } let tasks = trace::scope("work.run", || work.run())?; + + // Important! Deallocating all the builds and files stored in the work + // object actually takes a considerable amount of time (>1 second on an + // AOSP build), so instead, leak the memory. This means that none of the + // Drop implementations will be called for work or anything inside of it, + // so we need to be sure to we don't put anything important in the Drop + // implementations. std::mem::forget used to be an unsafe api, and should + // be treated as such. + std::mem::forget(work); + // Include any tasks from initial build in final count of steps. Ok(tasks.map(|n| n + tasks_finished)) } diff --git a/src/scanner.rs b/src/scanner.rs index 091791d..717b388 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -6,6 +6,7 @@ use std::{io::Read, path::Path}; pub struct ParseError { msg: String, ofs: usize, + pub chunk_index: usize, } pub type ParseResult = Result; @@ -13,17 +14,16 @@ pub struct Scanner<'a> { buf: &'a [u8], pub ofs: usize, pub line: usize, + pub chunk_index: usize, } impl<'a> Scanner<'a> { - pub fn new(buf: &'a [u8]) -> Self { - if !buf.ends_with(b"\0") { - panic!("Scanner requires nul-terminated buf"); - } + pub fn new(buf: &'a [u8], chunk_index: usize) -> Self { Scanner { buf, ofs: 0, line: 1, + chunk_index, } } @@ -34,25 +34,28 @@ impl<'a> Scanner<'a> { unsafe { *self.buf.get_unchecked(self.ofs) as char } } pub fn peek_newline(&self) -> bool { - if self.peek() == '\n' { + let peek = self.peek(); + if peek == '\n' { return true; } if self.ofs >= self.buf.len() - 1 { return false; } let peek2 = unsafe { *self.buf.get_unchecked(self.ofs + 1) as char }; - self.peek() == '\r' && peek2 == '\n' + peek == '\r' && peek2 == '\n' } pub fn next(&mut self) { if self.peek() == '\n' { self.line += 1; } + #[cfg(debug_assertions)] if self.ofs == self.buf.len() { panic!("scanned past end") } self.ofs += 1; } pub fn back(&mut self) { + #[cfg(debug_assertions)] if self.ofs == 0 { panic!("back at start") } @@ -91,46 +94,46 @@ impl<'a> Scanner<'a> { Err(ParseError { msg: msg.into(), ofs: self.ofs, + chunk_index: self.chunk_index, }) } +} - pub fn format_parse_error(&self, filename: &Path, err: ParseError) -> String { - let mut ofs = 0; - let lines = self.buf.split(|&c| c == b'\n'); - for (line_number, line) in lines.enumerate() { - if ofs + line.len() >= err.ofs { - let mut msg = "parse error: ".to_string(); - msg.push_str(&err.msg); - msg.push('\n'); - - let prefix = format!("{}:{}: ", filename.display(), line_number + 1); - msg.push_str(&prefix); +pub fn format_parse_error(mut ofs: usize, buf: &[u8], filename: &Path, err: ParseError) -> String { + let lines = buf.split(|&c| c == b'\n'); + for (line_number, line) in lines.enumerate() { + if ofs + line.len() >= err.ofs { + let mut msg = "parse error: ".to_string(); + msg.push_str(&err.msg); + msg.push('\n'); - let mut context = unsafe { std::str::from_utf8_unchecked(line) }; - let mut col = err.ofs - ofs; - if col > 40 { - // Trim beginning of line to fit it on screen. - msg.push_str("..."); - context = &context[col - 20..]; - col = 3 + 20; - } - if context.len() > 40 { - context = &context[0..40]; - msg.push_str(context); - msg.push_str("..."); - } else { - msg.push_str(context); - } - msg.push('\n'); + let prefix = format!("{}:{}: ", filename.display(), line_number + 1); + msg.push_str(&prefix); - msg.push_str(&" ".repeat(prefix.len() + col)); - msg.push_str("^\n"); - return msg; + let mut context = unsafe { std::str::from_utf8_unchecked(line) }; + let mut col = err.ofs - ofs; + if col > 40 { + // Trim beginning of line to fit it on screen. + msg.push_str("..."); + context = &context[col - 20..]; + col = 3 + 20; + } + if context.len() > 40 { + context = &context[0..40]; + msg.push_str(context); + msg.push_str("..."); + } else { + msg.push_str(context); } - ofs += line.len() + 1; + msg.push('\n'); + + msg.push_str(&" ".repeat(prefix.len() + col)); + msg.push_str("^\n"); + return msg; } - panic!("invalid offset when formatting error") + ofs += line.len() + 1; } + panic!("invalid offset when formatting error") } /// Scanner wants its input buffer to end in a trailing nul. diff --git a/src/smallmap.rs b/src/smallmap.rs index da8298c..88ec041 100644 --- a/src/smallmap.rs +++ b/src/smallmap.rs @@ -8,6 +8,15 @@ use std::{borrow::Borrow, fmt::Debug}; /// number of entries in the map is small. pub struct SmallMap(Vec<(K, V)>); +impl SmallMap { + pub fn with_capacity(cap: usize) -> Self { + Self(Vec::with_capacity(cap)) + } + pub fn len(&self) -> usize { + self.0.len() + } +} + impl Default for SmallMap { fn default() -> Self { SmallMap(Vec::default()) @@ -25,6 +34,17 @@ impl SmallMap { self.0.push((k, v)); } + // returns true if value was inserted, false if the key was already present. + pub fn insert_if_absent(&mut self, k: K, v: V) -> bool { + for (ik, _) in self.0.iter_mut() { + if *ik == k { + return false; + } + } + self.0.push((k, v)); + true + } + pub fn get(&self, q: &Q) -> Option<&V> where K: Borrow, diff --git a/src/task.rs b/src/task.rs index dfb818b..1091f79 100644 --- a/src/task.rs +++ b/src/task.rs @@ -12,7 +12,7 @@ use crate::{ depfile, graph::{Build, BuildId, RspFile}, process, - scanner::{self, Scanner}, + scanner::{self, format_parse_error, Scanner}, }; use anyhow::{anyhow, bail}; use std::path::{Path, PathBuf}; @@ -46,9 +46,9 @@ fn read_depfile(path: &Path) -> anyhow::Result> { Err(e) => bail!("read {}: {}", path.display(), e), }; - let mut scanner = Scanner::new(&bytes); + let mut scanner = Scanner::new(&bytes, 0); let parsed_deps = depfile::parse(&mut scanner) - .map_err(|err| anyhow!(scanner.format_parse_error(path, err)))?; + .map_err(|err| anyhow!(format_parse_error(0, &bytes, path, err)))?; // TODO verify deps refers to correct output let deps: Vec = parsed_deps .values() @@ -210,11 +210,11 @@ impl Runner { self.running > 0 } - pub fn start(&mut self, id: BuildId, build: &Build) { - let cmdline = build.cmdline.clone().unwrap(); - let depfile = build.depfile.clone().map(PathBuf::from); - let rspfile = build.rspfile.clone(); - let parse_showincludes = build.parse_showincludes; + pub fn start(&mut self, id: BuildId, build: &Build) -> anyhow::Result<()> { + let cmdline = build.get_cmdline().clone().unwrap(); + let depfile = build.get_depfile().clone().map(PathBuf::from); + let rspfile = build.get_rspfile()?; + let parse_showincludes = build.get_parse_showincludes()?; let tid = self.tids.claim(); let tx = self.tx.clone(); @@ -246,6 +246,7 @@ impl Runner { let _ = tx.send(Message::Done(task)); }); self.running += 1; + Ok(()) } /// Wait for a build to complete. May block for a long time. diff --git a/src/work.rs b/src/work.rs index 6366c72..dab3273 100644 --- a/src/work.rs +++ b/src/work.rs @@ -6,6 +6,7 @@ use crate::{ }; use std::collections::HashSet; use std::collections::VecDeque; +use std::sync::Arc; /// Build steps go through this sequence of states. /// See "Build states" in the design notes. @@ -71,6 +72,7 @@ impl StateCounts { /// Each running build is running "in" a pool; there's a default unbounded /// pool for builds that don't specify one. /// See "Tracking build state" in the design notes. +#[derive(Debug)] struct PoolState { /// A queue of builds that are ready to be executed in this pool. queued: VecDeque, @@ -139,7 +141,7 @@ impl BuildStates { let prev = std::mem::replace(&mut self.states[id], state); // We skip user-facing counters for phony builds. - let skip_ui_count = build.cmdline.is_none(); + let skip_ui_count = build.get_cmdline().is_none(); // println!("{:?} {:?}=>{:?} {:?}", id, prev, state, self.counts); if prev == BuildState::Unknown { @@ -197,10 +199,10 @@ impl BuildStates { /// Visits a BuildId that is an input to the desired output. /// Will recursively visit its own inputs. - fn want_build( + fn want_build<'a>( &mut self, graph: &Graph, - stack: &mut Vec, + stack: &mut Vec>, id: BuildId, ) -> anyhow::Result<()> { if self.get(id) != BuildState::Unknown { @@ -212,16 +214,16 @@ impl BuildStates { // Any Build that doesn't depend on an output of another Build is ready. let mut ready = true; - for &id in build.ordering_ins() { - self.want_file(graph, stack, id)?; - ready = ready && graph.file(id).input.is_none(); + for file in build.ordering_ins() { + self.want_file(graph, stack, file.clone())?; + ready = ready && file.input.lock().unwrap().is_none(); } - for &id in build.validation_ins() { + for file in build.validation_ins() { // This build doesn't technically depend on the validation inputs, so // allocate a new stack. Validation inputs could in theory depend on this build's // outputs. let mut stack = Vec::new(); - self.want_file(graph, &mut stack, id)?; + self.want_file(graph, &mut stack, file.clone())?; } if ready { @@ -235,21 +237,26 @@ impl BuildStates { pub fn want_file( &mut self, graph: &Graph, - stack: &mut Vec, - id: FileId, + stack: &mut Vec>, + file: Arc, ) -> anyhow::Result<()> { // Check for a dependency cycle. - if let Some(cycle) = stack.iter().position(|&sid| sid == id) { + if let Some(cycle) = stack + .iter() + .position(|f| std::ptr::eq(f.as_ref(), file.as_ref())) + { let mut err = "dependency cycle: ".to_string(); - for &id in stack[cycle..].iter() { - err.push_str(&format!("{} -> ", graph.file(id).name)); + for file in stack[cycle..].iter() { + err.push_str(&format!("{} -> ", file.name)); } - err.push_str(&graph.file(id).name); + err.push_str(&file.name); anyhow::bail!(err); } - if let Some(bid) = graph.file(id).input { - stack.push(id); + let input_guard = file.input.lock().unwrap(); + if let Some(bid) = *input_guard { + drop(input_guard); + stack.push(file.clone()); self.want_build(graph, stack, bid)?; stack.pop(); } @@ -264,7 +271,8 @@ impl BuildStates { /// Look up a PoolState by name. fn get_pool(&mut self, build: &Build) -> Option<&mut PoolState> { - let name = build.pool.as_deref().unwrap_or(""); + let owned_name = build.get_pool(); + let name = owned_name.as_deref().unwrap_or(""); for (key, pool) in self.pools.iter_mut() { if key == name { return Some(pool); @@ -283,7 +291,7 @@ impl BuildStates { build.location, // Unnamed pool lookups always succeed, this error is about // named pools. - build.pool.as_ref().unwrap() + build.get_pool().as_ref().unwrap() ) })?; pool.queued.push_back(id); @@ -345,23 +353,25 @@ impl<'a> Work<'a> { } } - pub fn lookup(&mut self, name: &str) -> Option { - self.graph.files.lookup(&canon_path(name)) + pub fn lookup(&mut self, name: &str) -> Option> { + self.graph.files.lookup(canon_path(name)) } - pub fn want_file(&mut self, id: FileId) -> anyhow::Result<()> { + pub fn want_file(&mut self, file: Arc) -> anyhow::Result<()> { let mut stack = Vec::new(); - self.build_states.want_file(&self.graph, &mut stack, id) + self.build_states.want_file(&self.graph, &mut stack, file) } - pub fn want_every_file(&mut self, exclude: Option) -> anyhow::Result<()> { - for id in self.graph.files.all_ids() { - if let Some(exclude) = exclude { - if id == exclude { + pub fn want_every_file(&mut self, exclude: Option>) -> anyhow::Result<()> { + for id in self.graph.files.all_files() { + if let Some(exclude) = exclude.as_ref() { + if std::ptr::eq(id.as_ref(), exclude.as_ref()) { continue; } } - self.want_file(id)?; + let mut stack = Vec::new(); + self.build_states + .want_file(&self.graph, &mut stack, id.clone())?; } Ok(()) } @@ -371,9 +381,8 @@ impl<'a> Work<'a> { fn recheck_ready(&self, id: BuildId) -> bool { let build = &self.graph.builds[id]; // println!("recheck {:?} {} ({}...)", id, build.location, self.graph.file(build.outs()[0]).name); - for &id in build.ordering_ins() { - let file = self.graph.file(id); - match file.input { + for file in build.ordering_ins() { + match *file.input.lock().unwrap() { None => { // Only generated inputs contribute to readiness. continue; @@ -397,19 +406,18 @@ impl<'a> Work<'a> { &mut self, id: BuildId, discovered: bool, - ) -> anyhow::Result> { + ) -> anyhow::Result>> { let build = &self.graph.builds[id]; - let ids = if discovered { + let files = if discovered { build.discovered_ins() } else { build.dirtying_ins() }; - for &id in ids { - let mtime = match self.file_state.get(id) { + for file in files { + let mtime = match self.file_state.get(file.as_ref()) { Some(mtime) => mtime, None => { - let file = self.graph.file(id); - if file.input.is_some() { + if file.input.lock().unwrap().is_some() { // This dep is generated by some other build step, but the // build graph didn't cause that other build step to be // visited first. This is an error in the build file. @@ -428,11 +436,11 @@ impl<'a> Work<'a> { file.name ); } - self.file_state.stat(id, file.path())? + self.file_state.stat(file.as_ref(), file.path())? } }; if mtime == MTime::Missing { - return Ok(Some(id)); + return Ok(Some(file.clone())); } } Ok(None) @@ -442,18 +450,27 @@ impl<'a> Work<'a> { /// Postcondition: all outputs have been stat()ed. fn record_finished(&mut self, id: BuildId, result: task::TaskResult) -> anyhow::Result<()> { // Clean up the deps discovered from the task. - let mut deps = Vec::new(); + let mut deps: Vec> = Vec::new(); if let Some(names) = result.discovered_deps { for name in names { let fileid = self.graph.files.id_from_canonical(canon_path(name)); // Filter duplicates from the file list. - if deps.contains(&fileid) { + if deps + .iter() + .find(|x| std::ptr::eq(x.as_ref(), fileid.as_ref())) + .is_some() + { continue; } // Filter out any deps that were already dirtying in the build file. // Note that it's allowed to have a duplicate against an order-only // dep; see `discover_existing_dep` test. - if self.graph.builds[id].dirtying_ins().contains(&fileid) { + if self.graph.builds[id] + .dirtying_ins() + .iter() + .find(|x| std::ptr::eq(x.as_ref(), fileid.as_ref())) + .is_some() + { continue; } deps.push(fileid); @@ -467,7 +484,7 @@ impl<'a> Work<'a> { anyhow::bail!( "{}: depfile references nonexistent {}", self.graph.builds[id].location, - self.graph.file(missing).name + missing.name ); } } @@ -475,7 +492,7 @@ impl<'a> Work<'a> { let input_was_missing = self.graph.builds[id] .dirtying_ins() .iter() - .any(|&id| self.file_state.get(id).unwrap() == MTime::Missing); + .any(|file| self.file_state.get(file.as_ref()).unwrap() == MTime::Missing); // Update any cached state of the output files to reflect their new state. let output_was_missing = self.stat_all_outputs(id)?.is_some(); @@ -487,7 +504,7 @@ impl<'a> Work<'a> { } let build = &self.graph.builds[id]; - let hash = hash::hash_build(&self.graph.files, &self.file_state, build); + let hash = hash::hash_build(&self.file_state, build)?; self.db.write_build(&self.graph, id, hash)?; Ok(()) @@ -499,12 +516,12 @@ impl<'a> Work<'a> { self.build_states.set(id, build, BuildState::Done); let mut dependents = HashSet::new(); - for &id in build.outs() { - for &id in &self.graph.file(id).dependents { - if self.build_states.get(id) != BuildState::Want { + for file in build.outs() { + for &file in file.dependents.iter() { + if self.build_states.get(file) != BuildState::Want { continue; } - dependents.insert(id); + dependents.insert(file); } } for id in dependents { @@ -519,14 +536,13 @@ impl<'a> Work<'a> { /// Stat all the outputs of a build. /// Called before it's run (for determining whether it's up to date) and /// after (to see if it touched any outputs). - fn stat_all_outputs(&mut self, id: BuildId) -> anyhow::Result> { + fn stat_all_outputs(&mut self, id: BuildId) -> anyhow::Result>> { let build = &self.graph.builds[id]; let mut missing = None; - for &id in build.outs() { - let file = self.graph.file(id); - let mtime = self.file_state.stat(id, file.path())?; + for file in build.outs() { + let mtime = self.file_state.stat(file.as_ref(), file.path())?; if mtime == MTime::Missing && missing.is_none() { - missing = Some(id); + missing = Some(file.clone()); } } Ok(missing) @@ -538,13 +554,12 @@ impl<'a> Work<'a> { /// Returns a build error if any required input files are missing. /// Otherwise returns the missing id if any expected but not required files, /// e.g. outputs, are missing, implying that the build needs to be executed. - fn check_build_files_missing(&mut self, id: BuildId) -> anyhow::Result> { + fn check_build_files_missing(&mut self, id: BuildId) -> anyhow::Result>> { // Ensure we have state for all input files. if let Some(missing) = self.ensure_input_files(id, false)? { - let file = self.graph.file(missing); - if file.input.is_none() { + if missing.input.lock().unwrap().is_none() { let build = &self.graph.builds[id]; - anyhow::bail!("{}: input {} missing", build.location, file.name); + anyhow::bail!("{}: input {} missing", build.location, missing.name); } return Ok(Some(missing)); } @@ -589,7 +604,7 @@ impl<'a> Work<'a> { /// Prereq: any dependent input is already generated. fn check_build_dirty(&mut self, id: BuildId) -> anyhow::Result { let build = &self.graph.builds[id]; - let phony = build.cmdline.is_none(); + let phony = build.get_cmdline().is_none(); let file_missing = if phony { self.check_build_files_missing_phony(id)?; return Ok(false); // Phony builds never need to run anything. @@ -604,8 +619,7 @@ impl<'a> Work<'a> { if self.options.explain { self.progress.log(&format!( "explain: {}: input {} missing", - build.location, - self.graph.file(missing).name + build.location, missing.name )); } return Ok(true); @@ -630,16 +644,13 @@ impl<'a> Work<'a> { Some(prev_hash) => prev_hash, }; - let hash = hash::hash_build(&self.graph.files, &self.file_state, build); + let hash = hash::hash_build(&self.file_state, build)?; if prev_hash != hash { if self.options.explain { self.progress .log(&format!("explain: {}: manifest changed", build.location)); - self.progress.log(&hash::explain_hash_build( - &self.graph.files, - &self.file_state, - build, - )); + self.progress + .log(&hash::explain_hash_build(&self.file_state, build)?); } return Ok(true); } @@ -650,10 +661,10 @@ impl<'a> Work<'a> { /// Create the parent directories of a given list of fileids. /// Used to create directories used for outputs. /// TODO: do this within the thread executing the subtask? - fn create_parent_dirs(&self, ids: &[FileId]) -> anyhow::Result<()> { + fn create_parent_dirs(&self, ids: &[Arc]) -> anyhow::Result<()> { let mut dirs: Vec<&std::path::Path> = Vec::new(); - for &out in ids { - if let Some(parent) = self.graph.file(out).path().parent() { + for out in ids { + if let Some(parent) = out.path().parent() { if dirs.iter().any(|&p| p == parent) { continue; } @@ -694,7 +705,7 @@ impl<'a> Work<'a> { let build = &self.graph.builds[id]; self.build_states.set(id, build, BuildState::Running); self.create_parent_dirs(build.outs())?; - runner.start(id, build); + runner.start(id, build)?; self.progress.task_started(id, build); made_progress = true; } @@ -738,7 +749,7 @@ impl<'a> Work<'a> { let build = &self.graph.builds[task.buildid]; trace::if_enabled(|t| { let desc = progress::build_message(build); - t.write_complete(desc, task.tid + 1, task.span.0, task.span.1); + t.write_complete(&desc, task.tid + 1, task.span.0, task.span.1); }); self.progress @@ -775,26 +786,3 @@ impl<'a> Work<'a> { Ok(success.then_some(tasks_done)) } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn build_cycle() -> Result<(), anyhow::Error> { - let file = " -build a: phony b -build b: phony c -build c: phony a -"; - let mut graph = crate::load::parse("build.ninja", file.as_bytes().to_vec())?; - let a_id = graph.files.id_from_canonical("a".to_owned()); - let mut states = BuildStates::new(graph.builds.next_id(), SmallMap::default()); - let mut stack = Vec::new(); - match states.want_file(&graph, &mut stack, a_id) { - Ok(_) => panic!("expected build cycle error"), - Err(err) => assert_eq!(err.to_string(), "dependency cycle: a -> b -> c -> a"), - } - Ok(()) - } -} diff --git a/tests/e2e/basic.rs b/tests/e2e/basic.rs index fca23b8..edaaacf 100644 --- a/tests/e2e/basic.rs +++ b/tests/e2e/basic.rs @@ -183,6 +183,24 @@ rule echo Ok(()) } +#[cfg(unix)] +#[test] +fn dollar_in_filename() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +# need a special touch rule that escapes the $ for the shell +rule touch + command = touch '$out' +build out$$foo: touch +", + )?; + space.run_expect(&mut n2_command(vec!["out$foo"]))?; + assert!(space.read("out$foo").is_ok()); + Ok(()) +} + #[test] fn explain() -> anyhow::Result<()> { let space = TestSpace::new()?; @@ -451,3 +469,21 @@ build foo: write_file assert_eq!(space.read("foo")?, b"Hello, world!\n"); Ok(()) } + +#[test] +fn cycle() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +build a: phony b +build b: phony c +build c: phony a +", + )?; + space.write("in", "")?; + let out = space.run(&mut n2_command(vec!["a"]))?; + assert_output_contains(&out, "dependency cycle: a -> b -> c -> a"); + + Ok(()) +} diff --git a/tests/e2e/include_and_subninja.rs b/tests/e2e/include_and_subninja.rs new file mode 100644 index 0000000..425cfac --- /dev/null +++ b/tests/e2e/include_and_subninja.rs @@ -0,0 +1,88 @@ +use crate::e2e::{n2_command, TestSpace}; + +#[cfg(unix)] +#[test] +fn include_creates_new_variable_with_dependency() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule write_file + command = echo $contents > $out + +a = foo +include included.ninja +build out: write_file + contents = $b + +", + )?; + space.write( + "included.ninja", + " +b = $a bar +", + )?; + space.run_expect(&mut n2_command(vec!["out"]))?; + assert_eq!(space.read("out").unwrap(), b"foo bar\n"); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn include_creates_edits_existing_variable() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule write_file + command = echo $contents > $out + +a = foo +include included.ninja +build out: write_file + contents = $a + +", + )?; + space.write( + "included.ninja", + " +a = $a bar +", + )?; + space.run_expect(&mut n2_command(vec!["out"]))?; + assert_eq!(space.read("out").unwrap(), b"foo bar\n"); + Ok(()) +} + +#[cfg(unix)] +#[test] +fn subninja_doesnt_affect_variables_in_parent_scope() -> anyhow::Result<()> { + let space = TestSpace::new()?; + space.write( + "build.ninja", + " +rule write_file + command = echo $contents > $out + +a = foo +subninja subninja.ninja +build out: write_file + contents = $a + +", + )?; + space.write( + "subninja.ninja", + " +a = bar +build out2: write_file + contents = $a +", + )?; + space.run_expect(&mut n2_command(vec!["out", "out2"]))?; + assert_eq!(space.read("out").unwrap(), b"foo\n"); + assert_eq!(space.read("out2").unwrap(), b"bar\n"); + Ok(()) +} diff --git a/tests/e2e/mod.rs b/tests/e2e/mod.rs index a1e7e7f..956076f 100644 --- a/tests/e2e/mod.rs +++ b/tests/e2e/mod.rs @@ -3,6 +3,7 @@ mod basic; mod directories; mod discovered; +mod include_and_subninja; mod missing; mod regen; mod validations;