From 94a23a6839f3d4632c5abed1cb8846e4d295383e Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Tue, 17 Aug 2021 19:22:05 +0000 Subject: Proof of concept --- Cargo.lock | 86 ++++++++++++++++++++++++ Cargo.toml | 4 ++ src/diff_thread/client.rs | 31 +++++++++ src/diff_thread/mod.rs | 28 ++++++++ src/diff_thread/server.rs | 39 +++++++++++ src/filter.rs | 120 +++++++++++++++++++++++++++++++++ src/main.rs | 131 ++++++++++++++++++++++++++++++++++-- tests/no_repo.rs | 20 ------ tests/test.rs | 167 ++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 601 insertions(+), 25 deletions(-) create mode 100644 src/diff_thread/client.rs create mode 100644 src/diff_thread/mod.rs create mode 100644 src/diff_thread/server.rs create mode 100644 src/filter.rs delete mode 100644 tests/no_repo.rs create mode 100644 tests/test.rs diff --git a/Cargo.lock b/Cargo.lock index 8ae1ccb..15b2621 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -33,11 +33,19 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + [[package]] name = "git-girf" version = "0.1.0" dependencies = [ "git2", + "libc", + "tempdir", ] [[package]] @@ -130,6 +138,62 @@ version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand", + "remove_dir_all", +] + [[package]] name = "tinyvec" version = "1.3.1" @@ -180,3 +244,25 @@ name = "vcpkg" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 9e9e1c4..a2537ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,7 @@ license = "GPL-2.0-or-later" [dependencies] git2 = { version = "0.13.20", default-features = false } +libc = "0.2.99" + +[dev-dependencies] +tempdir = "0.3.7" diff --git a/src/diff_thread/client.rs b/src/diff_thread/client.rs new file mode 100644 index 0000000..dcc9cfe --- /dev/null +++ b/src/diff_thread/client.rs @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2021 Alyssa Ross + +use super::{server, FileDiff, MakeDiff}; + +use std::sync::mpsc::{sync_channel, SyncSender}; +use std::thread::spawn; + +use git2::Repository; + +pub struct DiffThread { + sender: SyncSender<(D, SyncSender>)>, +} + +impl DiffThread { + pub fn new(repo: Repository) -> Self { + let (sender, receiver) = sync_channel(0); + + spawn(move || server::main(&repo, receiver)); + + Self { sender } + } + + pub fn diff_files(&self, diff_maker: D) -> impl Iterator> { + let (sender, receiver) = sync_channel(2); + self.sender + .send((diff_maker, sender)) + .expect("sending to diff thread"); + receiver.into_iter() + } +} diff --git a/src/diff_thread/mod.rs b/src/diff_thread/mod.rs new file mode 100644 index 0000000..9416509 --- /dev/null +++ b/src/diff_thread/mod.rs @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2021 Alyssa Ross + +//! libgit2's diffing API gives us a callback whenever a file diff is ready, but +//! that's not very convenient for Rust code, because it can't be used as an +//! iterator. So we run the diff in a thread, and send the diff information we need +//! to the main thread over a channel. When the main thread is processing diff +//! information too slowly, we block the diff thread until the main thread catches +//! up. + +mod server; +mod client; + +pub use client::DiffThread; + +use std::path::PathBuf; + +use git2::{Repository, Diff, Delta}; + +pub trait MakeDiff: Send + 'static { + fn make_diff(self, _: &Repository) -> Result; +} + +/// `new_path` isn't included because it's not currently used in git-girf. +pub struct FileDiff { + pub status: Delta, + pub old_path: Option, +} diff --git a/src/diff_thread/server.rs b/src/diff_thread/server.rs new file mode 100644 index 0000000..d6d43ab --- /dev/null +++ b/src/diff_thread/server.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2021 Alyssa Ross + +use super::{FileDiff, MakeDiff}; + +use std::sync::mpsc::{Receiver, SyncSender}; + +use git2::Repository; + +pub fn main( + repository: &Repository, + receiver: Receiver<(impl MakeDiff, SyncSender>)>, +) { + for (diff_maker, sender) in receiver { + let diff = match diff_maker.make_diff(&repository) { + Ok(diff) => diff, + Err(e) => { + let _ = sender.send(Err(e)); + return; + } + }; + + if let Err(e) = diff.foreach( + &mut |delta, _| { + let _ = sender.send(Ok(FileDiff { + status: delta.status(), + old_path: delta.old_file().path().map(Into::into), + })); + + true + }, + None, + None, + None, + ) { + let _ = sender.send(Err(e)); + } + } +} diff --git a/src/filter.rs b/src/filter.rs new file mode 100644 index 0000000..3352b16 --- /dev/null +++ b/src/filter.rs @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2021 Alyssa Ross + +use std::ffi::{CStr, OsString}; +use std::fmt::{Display, Formatter}; +use std::io::{self, ErrorKind, Write}; +use std::os::raw::{c_char, c_int}; +use std::path::Path; +use std::process::{Child, ChildStdout, Command, Stdio}; +use std::thread::{spawn, JoinHandle}; + +use git2::{Repository, Tree}; +use libc::{SIGPIPE, WEXITSTATUS, WIFEXITED, WIFSIGNALED, WTERMSIG}; + +extern "C" { + fn sigdescr_np(sig: c_int) -> *const c_char; +} + +unsafe fn wait(pid: u32) -> io::Result { + let mut wstatus: c_int = 0; + if libc::waitpid(pid as i32, &mut wstatus, 0) == -1 { + return Err(io::Error::last_os_error()); + } + Ok(wstatus) +} + +fn filter_output(argv: &[OsString]) -> Child { + Command::new(argv.get(0).unwrap()) + .args(&argv[1..]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .expect("spawn") +} + +fn tree_path_content(repo: &Repository, tree: &Tree, path: &Path) -> Result, git2::Error> { + let blob = tree.get_path(path)?.to_object(&repo)?.peel_to_blob()?; + Ok(blob.content().to_vec()) +} + +#[derive(Debug)] +pub enum FilterError { + Write(io::Error), + Exit(c_int), + Signal(c_int), +} + +impl Display for FilterError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + use FilterError::*; + match self { + Write(e) => write!(f, "writing to filter stdin: {}", e), + Exit(code) => write!(f, "filter command failed with code {}", code), + Signal(signo) => { + let sigdescr = unsafe { CStr::from_ptr(sigdescr_np(*signo)) }.to_string_lossy(); + write!(f, "filter command killed by signal: {}", sigdescr) + } + } + } +} + +impl std::error::Error for FilterError {} + +pub struct Filter { + pid: u32, + writer: JoinHandle>, +} + +impl Filter { + pub fn new( + repo: &Repository, + tree: &Tree, + path: &Path, + args: &[OsString], + ) -> (Self, ChildStdout) { + let blob = tree_path_content(repo, tree, path).unwrap(); + + let child = filter_output(args); + let pid = child.id(); + + // Destructure here to make sure there are no + // remaining references to the Child structs after + // this. This makes sure nothing else will wait for + // our filter processes, and we don't have to worry + // about PID reuse. + let Child { stdin, stdout, .. } = child; + + let mut stdin = stdin.unwrap(); + let stdout = stdout.unwrap(); + + let writer = spawn(move || stdin.write_all(&blob)); + + (Self { pid, writer }, stdout) + } + + pub fn wait(self) -> Result<(), FilterError> { + if let Err(e) = self.writer.join().unwrap() { + if e.kind() != ErrorKind::BrokenPipe { + return Err(FilterError::Write(e)); + } + } + + let wstatus = unsafe { wait(self.pid).unwrap() }; + if WIFEXITED(wstatus) { + let status = WEXITSTATUS(wstatus); + if status != 0 { + return Err(FilterError::Exit(status)); + } + } else if WIFSIGNALED(wstatus) { + let signal = WTERMSIG(wstatus); + if signal != SIGPIPE { + return Err(FilterError::Signal(signal)); + } + } else { + unreachable!() + } + + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs index 062a448..d5d763d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,137 @@ // SPDX-License-Identifier: EUPL-1.2 // SPDX-FileCopyrightText: 2021 Alyssa Ross -use std::process::exit; +mod diff_thread; +mod filter; -use git2::Repository; +use diff_thread::{DiffThread, FileDiff, MakeDiff}; +use filter::Filter; -fn main() { - let repo = match Repository::open_from_env() { +use std::env::args_os; +use std::ffi::OsString; +use std::io::{stderr, Read, Write}; +use std::os::unix::prelude::*; +use std::process::{exit, ChildStdout}; +use std::thread::spawn; + +use git2::{Delta, Diff, Oid, Repository, Sort, Tree}; + +fn child_out_bytes(child: &mut ChildStdout) -> impl Iterator + '_ { + child.by_ref().bytes().map(Result::unwrap) +} + +fn output_eq(lhs: &mut ChildStdout, rhs: &mut ChildStdout) -> bool { + child_out_bytes(lhs).eq(child_out_bytes(rhs)) +} + +fn filtered_file_same( + repo: &Repository, + old: &Tree, + new: &Tree, + diff: FileDiff, + argv: &[OsString], +) -> bool { + if diff.status != Delta::Modified { + return false; + } + + let path = diff.old_path.unwrap(); + + let (filter_old, mut stdout_old) = Filter::new(&repo, &old, &path, argv); + let (filter_new, mut stdout_new) = Filter::new(&repo, &new, &path, argv); + + let thread = spawn(move || output_eq(&mut stdout_old, &mut stdout_new)); + + filter_old.wait().expect("TODO: HANDLE FILTER ERROR"); + filter_new.wait().expect("TODO: HANDLE FILTER ERROR"); + + thread.join().unwrap() +} + +fn open_repo() -> Repository { + match Repository::open_from_env() { Ok(repo) => repo, Err(e) => { eprintln!("fatal: {}", e.message()); exit(128); } - }; + } +} + +struct DiffMaker { + old_tree_oid: Oid, + new_tree_oid: Oid, +} + +impl MakeDiff for DiffMaker { + fn make_diff(self, repo: &Repository) -> Result { + let old_tree = repo.find_tree(self.old_tree_oid)?; + let new_tree = repo.find_tree(self.new_tree_oid)?; + repo.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), Default::default()) + } +} + +fn main() { + let args: Vec<_> = args_os().collect(); + if args.len() < 2 { + eprintln!("fatal: no command given"); + exit(128); + } + + if let Some(arg) = args + .iter() + .skip(1) + .take_while(|a| a.as_bytes().starts_with(b"-")) + .next() + { + eprint!("fatal: unrecognized argument: "); + let _ = stderr().write_all(arg.as_bytes()); + eprintln!(); + exit(128); + } + + let repo = open_repo(); + + let diff_thread = + DiffThread::new(Repository::open(repo.path()).expect("opening diff thread repo")); + + let mut walk = repo.revwalk().expect("revwalk"); + walk.set_sorting(Sort::TOPOLOGICAL).expect("set_sorting"); + walk.push_head().expect("push_head"); + + // It might be good to parallelize this loop at some point, but + // we'd want to preserve order. + for commit_oid in walk { + let commit_oid = commit_oid.expect("revwalk"); + let commit = repo.find_commit(commit_oid).expect("find_commit"); + + // eprint!("checking {:.7} ", commit_oid); + // let _ = stderr().write_all(commit.message_bytes().split(|b| *b == b'\n').next().unwrap()); + // eprintln!(); + + if commit.parent_count() != 1 { + continue; + } + + // TODO: typechange + // TODO: filemode + // TODO: submodules + // + // TODO: fix all these unwraps + + let parent_tree = commit.parents().next().unwrap().tree().unwrap(); + let commit_tree = commit.tree().unwrap(); + + if diff_thread + .diff_files(DiffMaker { + old_tree_oid: parent_tree.id(), + new_tree_oid: commit_tree.id(), + }) + .all(|diff| { + filtered_file_same(&repo, &parent_tree, &commit_tree, diff.unwrap(), &args[1..]) + }) + { + println!("{}", commit.id()); + } + } } diff --git a/tests/no_repo.rs b/tests/no_repo.rs deleted file mode 100644 index 247426c..0000000 --- a/tests/no_repo.rs +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-License-Identifier: EUPL-1.2 -// SPDX-FileCopyrightText: 2021 Alyssa Ross - -use std::process::Command; - -#[test] -fn no_repo() { - let output = Command::new(env!("CARGO_BIN_EXE_git-girf")) - .current_dir("/var/empty") - .env_clear() - .output() - .expect("spawn"); - - assert_eq!(output.status.code(), Some(128)); - assert_eq!( - output.stderr, - b"fatal: could not find repository from '.'\n" - ); - assert_eq!(output.stdout, b""); -} diff --git a/tests/test.rs b/tests/test.rs new file mode 100644 index 0000000..9edacff --- /dev/null +++ b/tests/test.rs @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2021 Alyssa Ross + +use std::{ + env::var_os, + fs::create_dir, + io::{stderr, Write}, + process::Command, +}; + +use git2::{Commit, IntoCString, Oid, Reference, Signature, Time, Tree}; +use tempdir::TempDir; + +struct Repo<'a> { + inner: git2::Repository, + signature: Signature<'a>, +} + +impl Repo<'_> { + fn new(inner: git2::Repository) -> Self { + Self { + inner, + signature: Signature::new("git-girf test suite", "git-girf@test", &Time::new(0, 0)) + .unwrap(), + } + } + + fn commit(&self, files: &[(impl AsRef<[u8]>, impl AsRef<[u8]>)]) -> Oid { + let mut treebuilder = self.inner.treebuilder(None).unwrap(); + + for (path, data) in files { + let blob = self.inner.blob(data.as_ref()).unwrap(); + treebuilder.insert(path.as_ref(), blob, 0o100644).unwrap(); + } + + let tree_oid = treebuilder.write().unwrap(); + let tree = self.inner.find_tree(tree_oid).unwrap(); + let parent_oids = self + .inner + .head() + .into_iter() + .map(|head| head.target().unwrap()); + let parents: Vec<_> = parent_oids + .map(|oid| self.inner.find_commit(oid).unwrap()) + .collect(); + let borrowed_parents: Vec<_> = parents.iter().collect(); + dbg!(self.inner + .commit( + Some("HEAD"), + &self.signature, + &self.signature, + "A commit", + &tree, + &borrowed_parents, + ) + .unwrap()) + } +} + +#[test] +fn happy() { + let dir = TempDir::new("git-girf-tests").unwrap(); + + let path = var_os("PATH").unwrap_or_else(|| "/usr/bin:/bin".into()); + + let repo = Repo::new(git2::Repository::init(&dir).expect("opening repo")); + + // We're going to use `tail -n 1` as our filter command, meaning + // commits shouldn't be printed if they change the last line of + // any file. + + let mut expected = Vec::new(); + + repo.commit(&[("a", "a\na\n")]); + + // This creates a file, so shouldn't be printed. + repo.commit(&[("a", "a\na\n"), ("b", "b\nb\n")]); + + // This changes the first lines only, so should be printed. + expected.push(repo.commit(&[("a", "A\na\n"), ("b", "B\nb\n")])); + + // This changes the last line, so shouldn't be printed. + repo.commit(&[("a", "A\nA\n"), ("b", "b\nB\n")]); + + // This has lots of files, but it should be obvious that it + // doesn't have to be printed after the first file. We include it + // to test that short circuiting doesn't panic the diff thread. + repo.commit(&(b'a'..b'z').map(|c| ([c], [c, b'\n'])).collect::>()); + + let output = Command::new(env!("CARGO_BIN_EXE_git-girf")) + .args(&["tail", "-n", "1"]) + .current_dir(&dir) + .env_clear() + .env("PATH", path) + .output() + .expect("spawn"); + + let _ = stderr().write_all(&output.stderr); + + let len = expected.len(); + let mut expected_bytes = Vec::with_capacity(41 * expected.len()); + for oid in expected { + expected_bytes.extend_from_slice(oid.to_string().as_bytes()); + expected_bytes.push(b'\n'); + } + + assert!(output.stderr.is_empty()); + assert_eq!( + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&expected_bytes) + ); + assert!(output.status.success()); +} + +#[test] +fn no_repo() { + let output = Command::new(env!("CARGO_BIN_EXE_git-girf")) + .arg("cat") + .current_dir("/var/empty") + .env_clear() + .output() + .expect("spawn"); + + assert_eq!(output.status.code(), Some(128)); + assert_eq!( + output.stderr, + b"fatal: could not find repository from '.'\n" + ); + assert!(output.stdout.is_empty()); +} + +#[test] +fn no_args() { + let output = Command::new(env!("CARGO_BIN_EXE_git-girf")) + .current_dir("/var/empty") + .env_clear() + .output() + .expect("spawn"); + + assert_eq!(output.status.code(), Some(128)); + assert_eq!(output.stderr, b"fatal: no command given\n"); + assert!(output.stdout.is_empty()); +} + +#[test] +fn flag() { + let output = Command::new(env!("CARGO_BIN_EXE_git-girf")) + .arg("-Z") + .current_dir("/var/empty") + .env_clear() + .output() + .expect("spawn"); + + assert_eq!(output.status.code(), Some(128)); + assert_eq!(output.stderr, b"fatal: unrecognized argument: -Z\n"); + assert!(output.stdout.is_empty()); +} + +#[test] +fn filter_sigpipe() { + todo!(); +} + +#[test] +fn sigpipe() { + todo!(); +} -- cgit 1.4.1