From e7327a4d8666dd030e2b410daf97222878d35065 Mon Sep 17 00:00:00 2001 From: Edgar Lee Date: Mon, 31 Mar 2025 20:29:46 -0400 Subject: [PATCH 1/6] Make modules graph, densemap and depfile public for external tools --- src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 44411ce..761ef00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,9 @@ pub mod canon; mod db; -mod densemap; -mod depfile; +pub mod densemap; +pub mod depfile; mod eval; -mod graph; +pub mod graph; mod hash; pub mod load; pub mod parse; From d34a26fc68760642e8a03afbaf924ee68b3a328b Mon Sep 17 00:00:00 2001 From: Edgar Lee Date: Mon, 31 Mar 2025 20:38:46 -0400 Subject: [PATCH 2/6] Refactor parse_showincludes as method on Build to expose value of deps on Build --- src/graph.rs | 17 +++++++++++++---- src/load.rs | 10 +++++----- src/task.rs | 2 +- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/graph.rs b/src/graph.rs index a5b23ae..149f1d1 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -155,12 +155,12 @@ pub struct Build { /// Command line to run. Absent for phony builds. pub cmdline: Option, + /// Controls how dependency information is processed after compilation. + pub deps: Option, + /// Path to generated `.d` file, if any. pub depfile: Option, - /// If true, extract "/showIncludes" lines from output. - pub parse_showincludes: bool, - // Struct that contains the path to the rsp file and its contents, if any. pub rspfile: Option, @@ -186,8 +186,8 @@ impl Build { location: loc, desc: None, cmdline: None, + deps: None, depfile: None, - parse_showincludes: false, rspfile: None, pool: None, ins, @@ -243,6 +243,15 @@ impl Build { pub fn outs(&self) -> &[FileId] { &self.outs.ids } + + /// If true, extract "/showIncludes" lines from output. + pub fn parse_showincludes(&self) -> bool { + match self.deps.as_deref() { + Some("msvc") => true, + _ => false, + } + } + } /// The build graph: owns Files/Builds and maps FileIds/BuildIds to them. diff --git a/src/load.rs b/src/load.rs index 4624c51..c67292d 100644 --- a/src/load.rs +++ b/src/load.rs @@ -140,10 +140,10 @@ impl Loader { let cmdline = lookup("command"); let desc = lookup("description"); let depfile = lookup("depfile"); - let parse_showincludes = match lookup("deps").as_deref() { - None => false, - Some("gcc") => false, - Some("msvc") => true, + let deps = match lookup("deps").as_deref() { + None => None, + Some("gcc") => Some("gcc".to_string()), + Some("msvc") => Some("msvc".to_string()), Some(other) => bail!("invalid deps attribute {:?}", other), }; let pool = lookup("pool"); @@ -164,7 +164,7 @@ impl Loader { build.cmdline = cmdline; build.desc = desc; build.depfile = depfile; - build.parse_showincludes = parse_showincludes; + build.deps = deps; build.rspfile = rspfile; build.pool = pool; build.hide_success = hide_success; diff --git a/src/task.rs b/src/task.rs index bf4e816..c67eecd 100644 --- a/src/task.rs +++ b/src/task.rs @@ -214,7 +214,7 @@ impl Runner { let cmdline = build.cmdline.clone().unwrap(); let depfile = build.depfile.clone().map(PathBuf::from); let rspfile = build.rspfile.clone(); - let parse_showincludes = build.parse_showincludes; + let parse_showincludes = build.parse_showincludes(); let hide_progress = build.hide_progress; let tid = self.tids.claim(); From 2d1d1377c5fa15fccfe35e2342442a196b6a9fda Mon Sep 17 00:00:00 2001 From: Edgar Lee Date: Mon, 31 Mar 2025 21:01:01 -0400 Subject: [PATCH 3/6] Refactor graph::Build ins & outs to BuildDependencies and derive Clone --- src/graph.rs | 121 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 46 deletions(-) diff --git a/src/graph.rs b/src/graph.rs index 149f1d1..ed8252f 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -6,9 +6,12 @@ use crate::{ densemap::{self, DenseMap}, hash::BuildHash, }; -use std::collections::{hash_map::Entry, HashMap}; use std::path::{Path, PathBuf}; use std::time::SystemTime; +use std::{ + collections::{hash_map::Entry, HashMap}, + ops::{Deref, DerefMut}, +}; /// Id for File nodes in the Graph. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] @@ -74,6 +77,7 @@ pub struct RspFile { } /// Input files to a Build. +#[derive(Clone)] pub struct BuildIns { /// Internally we stuff explicit/implicit/order-only ins all into one Vec. /// This is mostly to simplify some of the iteration and is a little more @@ -88,6 +92,7 @@ pub struct BuildIns { } /// Output files from a Build. +#[derive(Clone)] pub struct BuildOuts { /// Similar to ins, we keep both explicit and implicit outs in one Vec. pub ids: Vec, @@ -144,29 +149,9 @@ mod tests { } } -/// A single build action, generating File outputs from File inputs with a command. -pub struct Build { - /// Source location this Build was declared. - pub location: FileLoc, - - /// User-provided description of the build step. - pub desc: Option, - - /// Command line to run. Absent for phony builds. - pub cmdline: Option, - - /// Controls how dependency information is processed after compilation. - pub deps: Option, - - /// Path to generated `.d` file, if any. - pub depfile: Option, - - // Struct that contains the path to the rsp file and its contents, if any. - pub rspfile: Option, - - /// Pool to execute this build in, if any. - pub pool: Option, - +#[derive(Clone)] +pub struct BuildDependencies { + /// Input files. pub ins: BuildIns, /// Additional inputs discovered from a previous build. @@ -174,30 +159,9 @@ pub struct Build { /// Output files. pub outs: BuildOuts, - - /// True if output of command should be hidden on successful completion. - pub hide_success: bool, - /// True if last line of output should not be shown in status. - pub hide_progress: bool, } -impl Build { - pub fn new(loc: FileLoc, ins: BuildIns, outs: BuildOuts) -> Self { - Build { - location: loc, - desc: None, - cmdline: None, - deps: None, - depfile: None, - rspfile: None, - pool: None, - ins, - discovered_ins: Vec::new(), - outs, - hide_success: false, - hide_progress: false, - } - } +impl BuildDependencies { /// Input paths that appear in `$in`. pub fn explicit_ins(&self) -> &[FileId] { &self.ins.ids[0..self.ins.explicit] @@ -243,6 +207,58 @@ impl Build { pub fn outs(&self) -> &[FileId] { &self.outs.ids } +} + +/// A single build action, generating File outputs from File inputs with a command. +pub struct Build { + /// Source location this Build was declared. + pub location: FileLoc, + + /// Inputs and outputs for this build. + pub dependencies: BuildDependencies, + + /// User-provided description of the build step. + pub desc: Option, + + /// Command line to run. Absent for phony builds. + pub cmdline: Option, + + /// Controls how dependency information is processed after compilation. + pub deps: Option, + + /// Path to generated `.d` file, if any. + pub depfile: Option, + + // Struct that contains the path to the rsp file and its contents, if any. + pub rspfile: Option, + + /// Pool to execute this build in, if any. + pub pool: Option, + + /// True if output of command should be hidden on successful completion. + pub hide_success: bool, + /// True if last line of output should not be shown in status. + pub hide_progress: bool, +} +impl Build { + pub fn new(loc: FileLoc, ins: BuildIns, outs: BuildOuts) -> Self { + Build { + location: loc, + dependencies: BuildDependencies { + ins, + discovered_ins: Vec::new(), + outs, + }, + desc: None, + cmdline: None, + deps: None, + depfile: None, + rspfile: None, + pool: None, + hide_success: false, + hide_progress: false, + } + } /// If true, extract "/showIncludes" lines from output. pub fn parse_showincludes(&self) -> bool { @@ -251,7 +267,20 @@ impl Build { _ => false, } } +} +impl Deref for Build { + type Target = BuildDependencies; + + fn deref(&self) -> &Self::Target { + &self.dependencies + } +} + +impl DerefMut for Build { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.dependencies + } } /// The build graph: owns Files/Builds and maps FileIds/BuildIds to them. From 328d70366f92573a82ded44f9d1535f753b532fc Mon Sep 17 00:00:00 2001 From: Edgar Lee Date: Mon, 31 Mar 2025 21:10:23 -0400 Subject: [PATCH 4/6] Derive Clone on graph::File --- src/graph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graph.rs b/src/graph.rs index ed8252f..650f219 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -42,7 +42,7 @@ impl From for BuildId { } /// A single file referenced as part of a build. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct File { /// Canonical path to the file. pub name: String, From 341b5112f69bd77a4345a0855fafd14a73d2689a Mon Sep 17 00:00:00 2001 From: Edgar Lee Date: Mon, 31 Mar 2025 21:12:35 -0400 Subject: [PATCH 5/6] Move all_ids implementation into DenseMap --- src/densemap.rs | 4 ++++ src/graph.rs | 4 ---- src/work.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/densemap.rs b/src/densemap.rs index 2d9878e..a099a0a 100644 --- a/src/densemap.rs +++ b/src/densemap.rs @@ -50,6 +50,10 @@ impl DenseMap { self.vec.push(val); id } + + pub fn all_ids(&self) -> impl Iterator { + (0..self.vec.len()).map(|id| K::from(id)) + } } impl DenseMap { diff --git a/src/graph.rs b/src/graph.rs index 650f219..5f7a1c2 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -368,10 +368,6 @@ impl GraphFiles { } } } - - pub fn all_ids(&self) -> impl Iterator { - (0..self.by_id.next_id().0).map(|id| FileId(id)) - } } /// MTime info gathered for a file. This also models "file is absent". diff --git a/src/work.rs b/src/work.rs index 9b618a6..4e8903e 100644 --- a/src/work.rs +++ b/src/work.rs @@ -383,7 +383,7 @@ impl<'a> Work<'a> { } pub fn want_every_file(&mut self, exclude: Option) -> anyhow::Result<()> { - for id in self.graph.files.all_ids() { + for id in self.graph.files.by_id.all_ids() { if let Some(exclude) = exclude { if id == exclude { continue; From badb10943ada5097c54e693e53e0f4727e00681f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomek=20Ma=C5=84ko?= Date: Tue, 29 Apr 2025 23:38:05 +0200 Subject: [PATCH 6/6] Read build files using memory mapping --- Cargo.lock | 10 ++ Cargo.toml | 5 + benches/loader.rs | 24 ++++ benches/parse.rs | 2 +- rust-toolchain.toml | 3 + src/file_loader.rs | 97 +++++++++++++ src/file_loader/ref_cell_hash_map.rs | 31 ++++ src/lib.rs | 1 + src/load.rs | 206 +++++++++++++++++---------- tests/e2e/loader_memmap.rs | 51 +++++++ tests/e2e/mod.rs | 5 + 11 files changed, 360 insertions(+), 75 deletions(-) create mode 100644 benches/loader.rs create mode 100644 rust-toolchain.toml create mode 100644 src/file_loader.rs create mode 100644 src/file_loader/ref_cell_hash_map.rs create mode 100644 tests/e2e/loader_memmap.rs diff --git a/Cargo.lock b/Cargo.lock index 8be9191..def6775 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -157,6 +157,15 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "memmap2" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +dependencies = [ + "libc", +] + [[package]] name = "n2" version = "0.1.0" @@ -166,6 +175,7 @@ dependencies = [ "jemallocator", "lexopt", "libc", + "memmap2", "rustc-hash", "tempfile", "windows-sys 0.48.0", diff --git a/Cargo.toml b/Cargo.toml index d0ade3c..1fb22e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ anyhow = "1.0" lexopt = "0.3.0" libc = "0.2" rustc-hash = "1.1.0" +memmap2 = "0.9.5" [target.'cfg(windows)'.dependencies.windows-sys] version = "0.48" @@ -50,6 +51,10 @@ harness = false name = "canon" harness = false +[[bench]] +name = "loader" +harness = false + [features] default = ["jemalloc"] jemalloc = ["jemallocator"] diff --git a/benches/loader.rs b/benches/loader.rs new file mode 100644 index 0000000..0c390e7 --- /dev/null +++ b/benches/loader.rs @@ -0,0 +1,24 @@ +use divan::Bencher; + +mod loader { + use super::*; + use n2::load; + + #[divan::bench(sample_size = 3, sample_count = 3)] + fn file_via_loader(bencher: Bencher) { + bencher.bench_local(|| { + load::testing::read_internal("benches/build.ninja").unwrap(); + }); + } + + #[divan::bench(sample_size = 3, sample_count = 3)] + fn file_via_loader_slow(bencher: Bencher) { + bencher.bench_local(|| { + load::testing::read_internal_slow("benches/build.ninja").unwrap(); + }); + } +} + +fn main() { + divan::main(); +} diff --git a/benches/parse.rs b/benches/parse.rs index 2886685..fa6aca3 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -32,7 +32,7 @@ mod parser { } // This can take a while to run (~100ms per sample), so reduce total count. - #[divan::bench(sample_size = 3, max_time = 1)] + #[divan::bench(sample_size = 3, sample_count = 3)] fn file(bencher: Bencher) { let input = match n2::scanner::read_file_with_nul("benches/build.ninja".as_ref()) { Ok(input) => input, diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..4512b27 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "nightly-2025-04-26" +profile = "complete" diff --git a/src/file_loader.rs b/src/file_loader.rs new file mode 100644 index 0000000..ded714c --- /dev/null +++ b/src/file_loader.rs @@ -0,0 +1,97 @@ +use std::{ + cell::RefCell, fs::File, path::PathBuf, rc::Rc, sync::Arc +}; +use libc::{ + c_void, mmap, munmap, sysconf, MAP_ANONYMOUS, MAP_FAILED, MAP_FIXED, MAP_PRIVATE, + PROT_READ, PROT_WRITE, _SC_PAGESIZE, +}; + +use anyhow::{anyhow, bail, Result}; +use ref_cell_hash_map::RefCellHashMapFileMap; +// use libc::{sysconf, _SC_PAGESIZE}; +use memmap2::{Mmap, MmapOptions}; + +use crate::graph::{FileId, Graph}; + +pub mod ref_cell_hash_map; + +pub trait FileMap { + fn new() -> Self where Self : Sized; + fn read_file(&self, file_id: FileId, file_path: &PathBuf) -> Result; +} + +pub struct FileLoader { + graph: Rc>, + files: FileMapT, +} + +impl FileLoader { + pub fn new(graph: Rc>) -> Self { + FileLoader { graph, files: RefCellHashMapFileMap::new() } + } + + pub fn read_file(&self, file_id: FileId) -> Result { + let file_path = self.graph.borrow().file(file_id).path().to_path_buf(); + + self.files.read_file(file_id, &file_path) + .map_err(|err| anyhow!("read {}: {}", file_path.display(), err)) + } +} + +#[derive(Clone)] +pub struct FileHandle { + pub size: usize, + pub path: PathBuf, + + mmap: Option>, +} + +impl FileHandle { + fn from_path(path: &PathBuf) -> Result { + let file = Arc::new(File::options().read(true).write(true).open(path)?); + let metadata = file.metadata()?; + let size = metadata.len() as usize; + + let mmap = if size > 0 { + let mmap = unsafe { + let page_size = sysconf(_SC_PAGESIZE) as usize; + let mapping_size = (size + page_size).next_multiple_of(page_size); + let mut mmap = MmapOptions::new().len(mapping_size).map_copy(&file)?; + + // TODO: upstream it to the memmap2 crate? Also what about other platforms? + let addr2 = libc::mmap( + mmap.as_ptr_range().end.sub(page_size) as *mut c_void, + page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + -1, + 0, + ); + if addr2 == MAP_FAILED { + bail!("mmap failed"); + } + + // Ensure we have a 0 byte at the end + mmap[size] = 0; + + mmap.make_read_only()? + }; + + Some(Arc::new(mmap)) + } else { + None + }; + + Ok(FileHandle { mmap, path: path.to_path_buf(), size }) + } +} + +impl AsRef<[u8]> for FileHandle { + #[inline] + fn as_ref(&self) -> &[u8] { + match self.mmap { + Some(ref mmap) => mmap[0 ..= self.size].as_ref(), + None => &[0], + } + } +} diff --git a/src/file_loader/ref_cell_hash_map.rs b/src/file_loader/ref_cell_hash_map.rs new file mode 100644 index 0000000..5f72930 --- /dev/null +++ b/src/file_loader/ref_cell_hash_map.rs @@ -0,0 +1,31 @@ +use std::{cell::RefCell, path::PathBuf}; +use std::collections::HashMap; + +use anyhow::Result; + +use crate::graph::{FileId}; + +use super::{FileHandle, FileMap}; + +#[derive(Default)] +pub struct RefCellHashMapFileMap(RefCell>); + +impl FileMap for RefCellHashMapFileMap { + fn new() -> Self where Self : Sized { + RefCellHashMapFileMap(RefCell::new(HashMap::with_capacity(16))) + } + + fn read_file(&self, file_id: FileId, file_path: &PathBuf) -> Result { + let file = { self.0.borrow().get(&file_id).map(Clone::clone) }; + match file { + Some(handle) => Ok(handle), + None => { + let handle = FileHandle::from_path(file_path)?; + let _ = self.0.borrow_mut().insert(file_id, handle.clone()); + + Ok(handle) + } + } + } +} + diff --git a/src/lib.rs b/src/lib.rs index 761ef00..8308ec1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,7 @@ mod task; mod terminal; mod trace; mod work; +mod file_loader; #[cfg(feature = "jemalloc")] #[cfg(not(any(miri, windows, target_arch = "wasm32")))] diff --git a/src/load.rs b/src/load.rs index c67292d..5fbfbd4 100644 --- a/src/load.rs +++ b/src/load.rs @@ -1,17 +1,10 @@ //! Graph loading: runs .ninja parsing and constructs the build graph from it. use crate::{ - canon::{canonicalize_path, to_owned_canon_path}, - db, - eval::{self, EvalPart, EvalString}, - graph::{self, FileId, RspFile}, - parse::{self, Statement}, - scanner, - smallmap::SmallMap, - trace, + canon::{canonicalize_path, to_owned_canon_path}, db, eval::{self, EvalPart, EvalString}, file_loader::FileLoader, graph::{self, FileId, RspFile}, parse::{self, Statement}, scanner, smallmap::SmallMap, trace }; use anyhow::{anyhow, bail}; -use std::collections::HashMap; +use std::{cell::{Ref, RefCell}, collections::HashMap, rc::Rc}; use std::path::PathBuf; use std::{borrow::Cow, path::Path}; @@ -47,14 +40,29 @@ impl<'a> eval::Env for BuildImplicitVars<'a> { } /// Internal state used while loading. -#[derive(Default)] pub struct Loader { - pub graph: graph::Graph, + _graph: Rc>, default: Vec, /// rule name -> list of (key, val) rules: HashMap>>, pools: SmallMap, builddir: Option, + file_loader: FileLoader, +} + +impl Default for Loader { + fn default() -> Self { + let graph: Rc> = Default::default(); + + Self { + _graph: graph.clone(), + default: Default::default(), + rules: Default::default(), + pools: Default::default(), + builddir: Default::default(), + file_loader: FileLoader::new(graph.clone()), + } + } } impl Loader { @@ -66,13 +74,17 @@ impl Loader { loader } + pub fn graph<'a>(&'a self) -> Ref<'a, graph::Graph> { + self._graph.borrow() + } + /// Convert a path string to a FileId. fn path(&mut self, mut path: String) -> FileId { // Perf: this is called while parsing build.ninja files. We go to // some effort to avoid allocating in the common case of a path that // refers to a file that is already known. canonicalize_path(&mut path); - self.graph.files.id_from_canonical(path) + self._graph.borrow_mut().files.id_from_canonical(path) } fn evaluate_path(&mut self, path: EvalString<&str>, envs: &[&dyn eval::Env]) -> FileId { @@ -121,65 +133,73 @@ impl Loader { None => bail!("unknown rule {:?}", b.rule), }; - let implicit_vars = BuildImplicitVars { - graph: &self.graph, - build: &build, - }; + { + let implicit_vars = BuildImplicitVars { + graph: &self.graph(), + build: &build, + }; - // temp variable in order to not move all of b into the closure - let build_vars = &b.vars; - let lookup = |key: &str| -> Option { - // Look up `key = ...` binding in build and rule block. - // See "Variable scope" in the design notes. - Some(match build_vars.get(key) { - Some(val) => val.evaluate(&[env]), - None => rule.get(key)?.evaluate(&[&implicit_vars, build_vars, env]), - }) - }; + // temp variable in order to not move all of b into the closure + let build_vars = &b.vars; + + let lookup = |key: &str| -> Option { + // Look up `key = ...` binding in build and rule block. + // See "Variable scope" in the design notes. + Some(match build_vars.get(key) { + Some(val) => val.evaluate(&[env]), + None => rule.get(key)?.evaluate(&[&implicit_vars, build_vars, env]), + }) + }; - let cmdline = lookup("command"); - let desc = lookup("description"); - let depfile = lookup("depfile"); - let deps = match lookup("deps").as_deref() { - None => None, - Some("gcc") => Some("gcc".to_string()), - Some("msvc") => Some("msvc".to_string()), - Some(other) => bail!("invalid deps attribute {:?}", other), - }; - let pool = lookup("pool"); - - let rspfile_path = lookup("rspfile"); - let rspfile_content = lookup("rspfile_content"); - let rspfile = match (rspfile_path, rspfile_content) { - (None, None) => None, - (Some(path), Some(content)) => Some(RspFile { - path: std::path::PathBuf::from(path), - content, - }), - _ => bail!("rspfile and rspfile_content need to be both specified"), + let cmdline = lookup("command"); + let desc = lookup("description"); + let depfile = lookup("depfile"); + let deps = match lookup("deps").as_deref() { + None => None, + Some("gcc") => Some("gcc".to_string()), + Some("msvc") => Some("msvc".to_string()), + Some(other) => bail!("invalid deps attribute {:?}", other), + }; + let pool = lookup("pool"); + + let rspfile_path = lookup("rspfile"); + let rspfile_content = lookup("rspfile_content"); + let rspfile = match (rspfile_path, rspfile_content) { + (None, None) => None, + (Some(path), Some(content)) => Some(RspFile { + path: std::path::PathBuf::from(path), + content, + }), + _ => bail!("rspfile and rspfile_content need to be both specified"), + }; + let hide_success = lookup("hide_success").is_some(); + let hide_progress = lookup("hide_progress").is_some(); + + build.cmdline = cmdline; + build.desc = desc; + build.depfile = depfile; + build.deps = deps; + build.rspfile = rspfile; + build.pool = pool; + build.hide_success = hide_success; + build.hide_progress = hide_progress; }; - let hide_success = lookup("hide_success").is_some(); - let hide_progress = lookup("hide_progress").is_some(); - - build.cmdline = cmdline; - build.desc = desc; - build.depfile = depfile; - build.deps = deps; - build.rspfile = rspfile; - build.pool = pool; - build.hide_success = hide_success; - build.hide_progress = hide_progress; - - self.graph.add_build(build) + + self._graph.borrow_mut().add_build(build) } fn read_file(&mut self, id: FileId) -> anyhow::Result<()> { - let path = self.graph.file(id).path().to_path_buf(); - let bytes = match trace::scope("read file", || scanner::read_file_with_nul(&path)) { - Ok(b) => b, - Err(e) => bail!("read {}: {}", path.display(), e), - }; - self.parse(path, &bytes) + let file = trace::scope("read file", || self.file_loader.read_file(id))?; + + self.parse(file.path.clone(), file.as_ref()) + } + + // For benchmark comparison against memmapings + pub(in crate::load) fn read_file_slow(&mut self, id: FileId) -> anyhow::Result<()> { + let path = self._graph.borrow().file(id).path().to_path_buf(); + let file = trace::scope("read file", || scanner::read_file_with_nul(&path))?; + + self.parse(path, file.as_ref()) } fn evaluate_and_read_file( @@ -250,10 +270,13 @@ pub struct State { pub fn read(build_filename: &str) -> anyhow::Result { let mut loader = Loader::new(); trace::scope("loader.read_file", || { - let id = loader - .graph - .files - .id_from_canonical(to_owned_canon_path(build_filename)); + let id = { + loader + ._graph + .borrow_mut() + .files + .id_from_canonical(to_owned_canon_path(build_filename)) + }; loader.read_file(id) })?; let mut hashes = graph::Hashes::default(); @@ -265,18 +288,51 @@ pub fn read(build_filename: &str) -> anyhow::Result { std::fs::create_dir_all(parent)?; } }; - db::open(&db_path, &mut loader.graph, &mut hashes) + db::open(&db_path, &mut loader._graph.borrow_mut(), &mut hashes) }) .map_err(|err| anyhow!("load .n2_db: {}", err))?; + + let graph = loader._graph; + let default = loader.default; + let pools = loader.pools; + Ok(State { - graph: loader.graph, + graph: graph.take(), db, hashes, - default: loader.default, - pools: loader.pools, + default, + pools, }) } +pub mod testing { + use super::*; + + pub fn read_internal(build_filename: &str) -> anyhow::Result<()> { + let mut loader = Loader::new(); + let id = { + loader + ._graph + .borrow_mut() + .files + .id_from_canonical(to_owned_canon_path(build_filename)) + }; + loader.read_file(id) + } + + pub fn read_internal_slow(build_filename: &str) -> anyhow::Result<()> { + let mut loader = Loader::new(); + let id = { + loader + ._graph + .borrow_mut() + .files + .id_from_canonical(to_owned_canon_path(build_filename)) + }; + loader.read_file_slow(id) + } +} + /// Parse a single file's content. #[cfg(test)] pub fn parse(name: &str, mut content: Vec) -> anyhow::Result { @@ -285,5 +341,7 @@ pub fn parse(name: &str, mut content: Vec) -> anyhow::Result { trace::scope("loader.read_file", || { loader.parse(PathBuf::from(name), &content) })?; - Ok(loader.graph) + let graph = loader._graph; + + Ok(graph.take()) } diff --git a/tests/e2e/loader_memmap.rs b/tests/e2e/loader_memmap.rs new file mode 100644 index 0000000..c08bcdf --- /dev/null +++ b/tests/e2e/loader_memmap.rs @@ -0,0 +1,51 @@ +//! Tests for behavior around missing files. +use libc::{sysconf, _SC_PAGESIZE}; +use std::{ffi::{OsStr, OsString}, os::unix::ffi::OsStrExt}; + +use super::*; + +#[test] +fn test_null_termination_with_page_sized_file() -> anyhow::Result<()> { + let page_size = unsafe { sysconf(_SC_PAGESIZE) as usize }; + let line_max_length = 128; + let lines = page_size / line_max_length; + let mut content = OsString::with_capacity(page_size); + + // Create a page-sized file + content.push( + &[ + TOUCH_RULE, + "build out: touch | phony_file", + "build phony_file: phony", + "" + ].join("\n") + ); + let rounding_line_length = line_max_length - content.len(); + let mut fill = Vec::with_capacity(line_max_length); + fill.extend(std::iter::repeat('#' as u8).take(rounding_line_length)); + fill[rounding_line_length - 1] = '\n' as u8; + let rounding_line = OsString::from(unsafe { OsStr::from_encoded_bytes_unchecked(fill.as_slice()) }); + content.push(&rounding_line); + + for _ in 1..lines { + let mut fill = Vec::with_capacity(line_max_length); + fill.extend(std::iter::repeat('#' as u8).take(line_max_length)); + fill[line_max_length - 1] = '\n' as u8; + let rounding_line = OsString::from(unsafe { OsStr::from_encoded_bytes_unchecked(fill.as_slice()) }); + content.push(&rounding_line); + } + + let space = TestSpace::new()?; + space.write_raw( + "build.ninja", + content.as_bytes(), + )?; + + space.write("phony_file", "")?; + + // Should not fail with a SIGBUS + let out = space.run_expect(&mut n2_command(vec!["out"]))?; + assert_output_contains(&out, "ran 1 task"); + + Ok(()) +} diff --git a/tests/e2e/mod.rs b/tests/e2e/mod.rs index 7457577..f1c2780 100644 --- a/tests/e2e/mod.rs +++ b/tests/e2e/mod.rs @@ -7,6 +7,7 @@ mod discovered; mod missing; mod regen; mod validations; +mod loader_memmap; use anyhow::anyhow; @@ -68,6 +69,10 @@ impl TestSpace { std::fs::write(self.dir.path().join(path), content) } + pub fn write_raw(&self, path: &str, content: &[u8]) -> std::io::Result<()> { + std::fs::write(self.dir.path().join(path), content) + } + /// Read a file from the working space. pub fn read(&self, path: &str) -> anyhow::Result> { let path = self.dir.path().join(path);