summary refs log tree commit diff
diff options
authorAlyssa Ross <>2021-08-17 19:22:05 +0000
committerAlyssa Ross <>2021-08-17 19:22:05 +0000
commit94a23a6839f3d4632c5abed1cb8846e4d295383e (patch)
parent256ac9c8eb8775acd1f812ee4345e13926181f0e (diff)
Proof of concept
9 files changed, 601 insertions, 25 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 8ae1ccb..15b2621 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -34,10 +34,18 @@ dependencies = [
+name = "fuchsia-cprng"
+version = "0.1.1"
+source = "registry+"
+checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
 name = "git-girf"
 version = "0.1.0"
 dependencies = [
+ "libc",
+ "tempdir",
@@ -131,6 +139,62 @@ source = "registry+"
 checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
+name = "rand"
+version = "0.4.6"
+source = "registry+"
+checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293"
+dependencies = [
+ "fuchsia-cprng",
+ "libc",
+ "rand_core 0.3.1",
+ "rdrand",
+ "winapi",
+name = "rand_core"
+version = "0.3.1"
+source = "registry+"
+checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
+dependencies = [
+ "rand_core 0.4.2",
+name = "rand_core"
+version = "0.4.2"
+source = "registry+"
+checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
+name = "rdrand"
+version = "0.4.0"
+source = "registry+"
+checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
+dependencies = [
+ "rand_core 0.3.1",
+name = "remove_dir_all"
+version = "0.5.3"
+source = "registry+"
+checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
+dependencies = [
+ "winapi",
+name = "tempdir"
+version = "0.3.7"
+source = "registry+"
+checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8"
+dependencies = [
+ "rand",
+ "remove_dir_all",
 name = "tinyvec"
 version = "1.3.1"
 source = "registry+"
@@ -180,3 +244,25 @@ name = "vcpkg"
 version = "0.2.15"
 source = "registry+"
 checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+name = "winapi"
+version = "0.3.9"
+source = "registry+"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/Cargo.toml b/Cargo.toml
index 9e9e1c4..a2537ff 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,3 +12,7 @@ license = "GPL-2.0-or-later"
 git2 = { version = "0.13.20", default-features = false }
+libc = "0.2.99"
+tempdir = "0.3.7"
diff --git a/src/diff_thread/ b/src/diff_thread/
new file mode 100644
index 0000000..dcc9cfe
--- /dev/null
+++ b/src/diff_thread/
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: EUPL-1.2
+// SPDX-FileCopyrightText: 2021 Alyssa Ross <>
+use super::{server, FileDiff, MakeDiff};
+use std::sync::mpsc::{sync_channel, SyncSender};
+use std::thread::spawn;
+use git2::Repository;
+pub struct DiffThread<D: MakeDiff> {
+    sender: SyncSender<(D, SyncSender<Result<FileDiff, git2::Error>>)>,
+impl<D: MakeDiff> DiffThread<D> {
+    pub fn new(repo: Repository) -> Self {
+        let (sender, receiver) = sync_channel(0);
+        spawn(move || server::main(&repo, receiver));
+        Self { sender }
+    }
+    pub fn diff_files(&self, diff_maker: D) -> impl Iterator<Item = Result<FileDiff, git2::Error>> {
+        let (sender, receiver) = sync_channel(2);
+        self.sender
+            .send((diff_maker, sender))
+            .expect("sending to diff thread");
+        receiver.into_iter()
+    }
diff --git a/src/diff_thread/ b/src/diff_thread/
new file mode 100644
index 0000000..9416509
--- /dev/null
+++ b/src/diff_thread/
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: EUPL-1.2
+// SPDX-FileCopyrightText: 2021 Alyssa Ross <>
+//! libgit2's diffing API gives us a callback whenever a file diff is ready, but
+//! that's not very convenient for Rust code, because it can't be used as an
+//! iterator.  So we run the diff in a thread, and send the diff information we need
+//! to the main thread over a channel.  When the main thread is processing diff
+//! information too slowly, we block the diff thread until the main thread catches
+//! up.
+mod server;
+mod client;
+pub use client::DiffThread;
+use std::path::PathBuf;
+use git2::{Repository, Diff, Delta};
+pub trait MakeDiff: Send + 'static {
+    fn make_diff(self, _: &Repository) -> Result<Diff, git2::Error>;
+/// `new_path` isn't included because it's not currently used in git-girf.
+pub struct FileDiff {
+    pub status: Delta,
+    pub old_path: Option<PathBuf>,
diff --git a/src/diff_thread/ b/src/diff_thread/
new file mode 100644
index 0000000..d6d43ab
--- /dev/null
+++ b/src/diff_thread/
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: EUPL-1.2
+// SPDX-FileCopyrightText: 2021 Alyssa Ross <>
+use super::{FileDiff, MakeDiff};
+use std::sync::mpsc::{Receiver, SyncSender};
+use git2::Repository;
+pub fn main(
+    repository: &Repository,
+    receiver: Receiver<(impl MakeDiff, SyncSender<Result<FileDiff, git2::Error>>)>,
+) {
+    for (diff_maker, sender) in receiver {
+        let diff = match diff_maker.make_diff(&repository) {
+            Ok(diff) => diff,
+            Err(e) => {
+                let _ = sender.send(Err(e));
+                return;
+            }
+        };
+        if let Err(e) = diff.foreach(
+            &mut |delta, _| {
+                let _ = sender.send(Ok(FileDiff {
+                    status: delta.status(),
+                    old_path: delta.old_file().path().map(Into::into),
+                }));
+                true
+            },
+            None,
+            None,
+            None,
+        ) {
+            let _ = sender.send(Err(e));
+        }
+    }
diff --git a/src/ b/src/
new file mode 100644
index 0000000..3352b16
--- /dev/null
+++ b/src/
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: EUPL-1.2
+// SPDX-FileCopyrightText: 2021 Alyssa Ross <>
+use std::ffi::{CStr, OsString};
+use std::fmt::{Display, Formatter};
+use std::io::{self, ErrorKind, Write};
+use std::os::raw::{c_char, c_int};
+use std::path::Path;
+use std::process::{Child, ChildStdout, Command, Stdio};
+use std::thread::{spawn, JoinHandle};
+use git2::{Repository, Tree};
+extern "C" {
+    fn sigdescr_np(sig: c_int) -> *const c_char;
+unsafe fn wait(pid: u32) -> io::Result<c_int> {
+    let mut wstatus: c_int = 0;
+    if libc::waitpid(pid as i32, &mut wstatus, 0) == -1 {
+        return Err(io::Error::last_os_error());
+    }
+    Ok(wstatus)
+fn filter_output(argv: &[OsString]) -> Child {
+    Command::new(argv.get(0).unwrap())
+        .args(&argv[1..])
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("spawn")
+fn tree_path_content(repo: &Repository, tree: &Tree, path: &Path) -> Result<Vec<u8>, git2::Error> {
+    let blob = tree.get_path(path)?.to_object(&repo)?.peel_to_blob()?;
+    Ok(blob.content().to_vec())
+pub enum FilterError {
+    Write(io::Error),
+    Exit(c_int),
+    Signal(c_int),
+impl Display for FilterError {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        use FilterError::*;
+        match self {
+            Write(e) => write!(f, "writing to filter stdin: {}", e),
+            Exit(code) => write!(f, "filter command failed with code {}", code),
+            Signal(signo) => {
+                let sigdescr = unsafe { CStr::from_ptr(sigdescr_np(*signo)) }.to_string_lossy();
+                write!(f, "filter command killed by signal: {}", sigdescr)
+            }
+        }
+    }
+impl std::error::Error for FilterError {}
+pub struct Filter {
+    pid: u32,
+    writer: JoinHandle<io::Result<()>>,
+impl Filter {
+    pub fn new(
+        repo: &Repository,
+        tree: &Tree,
+        path: &Path,
+        args: &[OsString],
+    ) -> (Self, ChildStdout) {
+        let blob = tree_path_content(repo, tree, path).unwrap();
+        let child = filter_output(args);
+        let pid =;
+        // Destructure here to make sure there are no
+        // remaining references to the Child structs after
+        // this.  This makes sure nothing else will wait for
+        // our filter processes, and we don't have to worry
+        // about PID reuse.
+        let Child { stdin, stdout, .. } = child;
+        let mut stdin = stdin.unwrap();
+        let stdout = stdout.unwrap();
+        let writer = spawn(move || stdin.write_all(&blob));
+        (Self { pid, writer }, stdout)
+    }
+    pub fn wait(self) -> Result<(), FilterError> {
+        if let Err(e) = self.writer.join().unwrap() {
+            if e.kind() != ErrorKind::BrokenPipe {
+                return Err(FilterError::Write(e));
+            }
+        }
+        let wstatus = unsafe { wait( };
+        if WIFEXITED(wstatus) {
+            let status = WEXITSTATUS(wstatus);
+            if status != 0 {
+                return Err(FilterError::Exit(status));
+            }
+        } else if WIFSIGNALED(wstatus) {
+            let signal = WTERMSIG(wstatus);
+            if signal != SIGPIPE {
+                return Err(FilterError::Signal(signal));
+            }
+        } else {
+            unreachable!()
+        }
+        Ok(())
+    }
diff --git a/src/ b/src/
index 062a448..d5d763d 100644
--- a/src/
+++ b/src/
@@ -1,16 +1,137 @@
 // SPDX-License-Identifier: EUPL-1.2
 // SPDX-FileCopyrightText: 2021 Alyssa Ross <>
-use std::process::exit;
+mod diff_thread;
+mod filter;
-use git2::Repository;
+use diff_thread::{DiffThread, FileDiff, MakeDiff};
+use filter::Filter;
-fn main() {
-    let repo = match Repository::open_from_env() {
+use std::env::args_os;
+use std::ffi::OsString;
+use std::io::{stderr, Read, Write};
+use std::os::unix::prelude::*;
+use std::process::{exit, ChildStdout};
+use std::thread::spawn;
+use git2::{Delta, Diff, Oid, Repository, Sort, Tree};
+fn child_out_bytes(child: &mut ChildStdout) -> impl Iterator<Item = u8> + '_ {
+    child.by_ref().bytes().map(Result::unwrap)
+fn output_eq(lhs: &mut ChildStdout, rhs: &mut ChildStdout) -> bool {
+    child_out_bytes(lhs).eq(child_out_bytes(rhs))
+fn filtered_file_same(
+    repo: &Repository,
+    old: &Tree,
+    new: &Tree,
+    diff: FileDiff,
+    argv: &[OsString],
+) -> bool {
+    if diff.status != Delta::Modified {
+        return false;
+    }
+    let path = diff.old_path.unwrap();
+    let (filter_old, mut stdout_old) = Filter::new(&repo, &old, &path, argv);
+    let (filter_new, mut stdout_new) = Filter::new(&repo, &new, &path, argv);
+    let thread = spawn(move || output_eq(&mut stdout_old, &mut stdout_new));
+    filter_old.wait().expect("TODO: HANDLE FILTER ERROR");
+    filter_new.wait().expect("TODO: HANDLE FILTER ERROR");
+    thread.join().unwrap()
+fn open_repo() -> Repository {
+    match Repository::open_from_env() {
         Ok(repo) => repo,
         Err(e) => {
             eprintln!("fatal: {}", e.message());
-    };
+    }
+struct DiffMaker {
+    old_tree_oid: Oid,
+    new_tree_oid: Oid,
+impl MakeDiff for DiffMaker {
+    fn make_diff(self, repo: &Repository) -> Result<Diff, git2::Error> {
+        let old_tree = repo.find_tree(self.old_tree_oid)?;
+        let new_tree = repo.find_tree(self.new_tree_oid)?;
+        repo.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), Default::default())
+    }
+fn main() {
+    let args: Vec<_> = args_os().collect();
+    if args.len() < 2 {
+        eprintln!("fatal: no command given");
+        exit(128);
+    }
+    if let Some(arg) = args
+        .iter()
+        .skip(1)
+        .take_while(|a| a.as_bytes().starts_with(b"-"))
+        .next()
+    {
+        eprint!("fatal: unrecognized argument: ");
+        let _ = stderr().write_all(arg.as_bytes());
+        eprintln!();
+        exit(128);
+    }
+    let repo = open_repo();
+    let diff_thread =
+        DiffThread::new(Repository::open(repo.path()).expect("opening diff thread repo"));
+    let mut walk = repo.revwalk().expect("revwalk");
+    walk.set_sorting(Sort::TOPOLOGICAL).expect("set_sorting");
+    walk.push_head().expect("push_head");
+    // It might be good to parallelize this loop at some point, but
+    // we'd want to preserve order.
+    for commit_oid in walk {
+        let commit_oid = commit_oid.expect("revwalk");
+        let commit = repo.find_commit(commit_oid).expect("find_commit");
+        // eprint!("checking {:.7} ", commit_oid);
+        // let _ = stderr().write_all(commit.message_bytes().split(|b| *b == b'\n').next().unwrap());
+        // eprintln!();
+        if commit.parent_count() != 1 {
+            continue;
+        }
+        // TODO: typechange
+        // TODO: filemode
+        // TODO: submodules
+        //
+        // TODO: fix all these unwraps
+        let parent_tree = commit.parents().next().unwrap().tree().unwrap();
+        let commit_tree = commit.tree().unwrap();
+        if diff_thread
+            .diff_files(DiffMaker {
+                old_tree_oid:,
+                new_tree_oid:,
+            })
+            .all(|diff| {
+                filtered_file_same(&repo, &parent_tree, &commit_tree, diff.unwrap(), &args[1..])
+            })
+        {
+            println!("{}",;
+        }
+    }
diff --git a/tests/ b/tests/
deleted file mode 100644
index 247426c..0000000
--- a/tests/
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: EUPL-1.2
-// SPDX-FileCopyrightText: 2021 Alyssa Ross <>
-use std::process::Command;
-fn no_repo() {
-    let output = Command::new(env!("CARGO_BIN_EXE_git-girf"))
-        .current_dir("/var/empty")
-        .env_clear()
-        .output()
-        .expect("spawn");
-    assert_eq!(output.status.code(), Some(128));
-    assert_eq!(
-        output.stderr,
-        b"fatal: could not find repository from '.'\n"
-    );
-    assert_eq!(output.stdout, b"");
diff --git a/tests/ b/tests/
new file mode 100644
index 0000000..9edacff
--- /dev/null
+++ b/tests/
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: EUPL-1.2
+// SPDX-FileCopyrightText: 2021 Alyssa Ross <>
+use std::{
+    env::var_os,
+    fs::create_dir,
+    io::{stderr, Write},
+    process::Command,
+use git2::{Commit, IntoCString, Oid, Reference, Signature, Time, Tree};
+use tempdir::TempDir;
+struct Repo<'a> {
+    inner: git2::Repository,
+    signature: Signature<'a>,
+impl Repo<'_> {
+    fn new(inner: git2::Repository) -> Self {
+        Self {
+            inner,
+            signature: Signature::new("git-girf test suite", "git-girf@test", &Time::new(0, 0))
+                .unwrap(),
+        }
+    }
+    fn commit(&self, files: &[(impl AsRef<[u8]>, impl AsRef<[u8]>)]) -> Oid {
+        let mut treebuilder = self.inner.treebuilder(None).unwrap();
+        for (path, data) in files {
+            let blob = self.inner.blob(data.as_ref()).unwrap();
+            treebuilder.insert(path.as_ref(), blob, 0o100644).unwrap();
+        }
+        let tree_oid = treebuilder.write().unwrap();
+        let tree = self.inner.find_tree(tree_oid).unwrap();
+        let parent_oids = self
+            .inner
+            .head()
+            .into_iter()
+            .map(|head|;
+        let parents: Vec<_> = parent_oids
+            .map(|oid| self.inner.find_commit(oid).unwrap())
+            .collect();
+        let borrowed_parents: Vec<_> = parents.iter().collect();
+        dbg!(self.inner
+            .commit(
+                Some("HEAD"),
+                &self.signature,
+                &self.signature,
+                "A commit",
+                &tree,
+                &borrowed_parents,
+            )
+            .unwrap())
+    }
+fn happy() {
+    let dir = TempDir::new("git-girf-tests").unwrap();
+    let path = var_os("PATH").unwrap_or_else(|| "/usr/bin:/bin".into());
+    let repo = Repo::new(git2::Repository::init(&dir).expect("opening repo"));
+    // We're going to use `tail -n 1` as our filter command, meaning
+    // commits shouldn't be printed if they change the last line of
+    // any file.
+    let mut expected = Vec::new();
+    repo.commit(&[("a", "a\na\n")]);
+    // This creates a file, so shouldn't be printed.
+    repo.commit(&[("a", "a\na\n"), ("b", "b\nb\n")]);
+    // This changes the first lines only, so should be printed.
+    expected.push(repo.commit(&[("a", "A\na\n"), ("b", "B\nb\n")]));
+    // This changes the last line, so shouldn't be printed.
+    repo.commit(&[("a", "A\nA\n"), ("b", "b\nB\n")]);
+    // This has lots of files, but it should be obvious that it
+    // doesn't have to be printed after the first file.  We include it
+    // to test that short circuiting doesn't panic the diff thread.
+    repo.commit(&(b'a'..b'z').map(|c| ([c], [c, b'\n'])).collect::<Vec<_>>());
+    let output = Command::new(env!("CARGO_BIN_EXE_git-girf"))
+        .args(&["tail", "-n", "1"])
+        .current_dir(&dir)
+        .env_clear()
+        .env("PATH", path)
+        .output()
+        .expect("spawn");
+    let _ = stderr().write_all(&output.stderr);
+    let len = expected.len();
+    let mut expected_bytes = Vec::with_capacity(41 * expected.len());
+    for oid in expected {
+        expected_bytes.extend_from_slice(oid.to_string().as_bytes());
+        expected_bytes.push(b'\n');
+    }
+    assert!(output.stderr.is_empty());
+    assert_eq!(
+        String::from_utf8_lossy(&output.stdout),
+        String::from_utf8_lossy(&expected_bytes)
+    );
+    assert!(output.status.success());
+fn no_repo() {
+    let output = Command::new(env!("CARGO_BIN_EXE_git-girf"))
+        .arg("cat")
+        .current_dir("/var/empty")
+        .env_clear()
+        .output()
+        .expect("spawn");
+    assert_eq!(output.status.code(), Some(128));
+    assert_eq!(
+        output.stderr,
+        b"fatal: could not find repository from '.'\n"
+    );
+    assert!(output.stdout.is_empty());
+fn no_args() {
+    let output = Command::new(env!("CARGO_BIN_EXE_git-girf"))
+        .current_dir("/var/empty")
+        .env_clear()
+        .output()
+        .expect("spawn");
+    assert_eq!(output.status.code(), Some(128));
+    assert_eq!(output.stderr, b"fatal: no command given\n");
+    assert!(output.stdout.is_empty());
+fn flag() {
+    let output = Command::new(env!("CARGO_BIN_EXE_git-girf"))
+        .arg("-Z")
+        .current_dir("/var/empty")
+        .env_clear()
+        .output()
+        .expect("spawn");
+    assert_eq!(output.status.code(), Some(128));
+    assert_eq!(output.stderr, b"fatal: unrecognized argument: -Z\n");
+    assert!(output.stdout.is_empty());
+fn filter_sigpipe() {
+    todo!();
+fn sigpipe() {
+    todo!();