diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/diff_thread/client.rs | 31 | ||||
-rw-r--r-- | src/diff_thread/mod.rs | 28 | ||||
-rw-r--r-- | src/diff_thread/server.rs | 39 | ||||
-rw-r--r-- | src/filter.rs | 120 | ||||
-rw-r--r-- | src/main.rs | 131 |
5 files changed, 344 insertions, 5 deletions
diff --git a/src/diff_thread/client.rs b/src/diff_thread/client.rs new file mode 100644 index 0000000..dcc9cfe --- /dev/null +++ b/src/diff_thread/client.rs @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2021 Alyssa Ross <hi@alyssa.is> + +use super::{server, FileDiff, MakeDiff}; + +use std::sync::mpsc::{sync_channel, SyncSender}; +use std::thread::spawn; + +use git2::Repository; + +pub struct DiffThread<D: MakeDiff> { + sender: SyncSender<(D, SyncSender<Result<FileDiff, git2::Error>>)>, +} + +impl<D: MakeDiff> DiffThread<D> { + pub fn new(repo: Repository) -> Self { + let (sender, receiver) = sync_channel(0); + + spawn(move || server::main(&repo, receiver)); + + Self { sender } + } + + pub fn diff_files(&self, diff_maker: D) -> impl Iterator<Item = Result<FileDiff, git2::Error>> { + let (sender, receiver) = sync_channel(2); + self.sender + .send((diff_maker, sender)) + .expect("sending to diff thread"); + receiver.into_iter() + } +} diff --git a/src/diff_thread/mod.rs b/src/diff_thread/mod.rs new file mode 100644 index 0000000..9416509 --- /dev/null +++ b/src/diff_thread/mod.rs @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2021 Alyssa Ross <hi@alyssa.is> + +//! libgit2's diffing API gives us a callback whenever a file diff is ready, but +//! that's not very convenient for Rust code, because it can't be used as an +//! iterator. So we run the diff in a thread, and send the diff information we need +//! to the main thread over a channel. When the main thread is processing diff +//! information too slowly, we block the diff thread until the main thread catches +//! up. + +mod server; +mod client; + +pub use client::DiffThread; + +use std::path::PathBuf; + +use git2::{Repository, Diff, Delta}; + +pub trait MakeDiff: Send + 'static { + fn make_diff(self, _: &Repository) -> Result<Diff, git2::Error>; +} + +/// `new_path` isn't included because it's not currently used in git-girf. +pub struct FileDiff { + pub status: Delta, + pub old_path: Option<PathBuf>, +} diff --git a/src/diff_thread/server.rs b/src/diff_thread/server.rs new file mode 100644 index 0000000..d6d43ab --- /dev/null +++ b/src/diff_thread/server.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2021 Alyssa Ross <hi@alyssa.is> + +use super::{FileDiff, MakeDiff}; + +use std::sync::mpsc::{Receiver, SyncSender}; + +use git2::Repository; + +pub fn main( + repository: &Repository, + receiver: Receiver<(impl MakeDiff, SyncSender<Result<FileDiff, git2::Error>>)>, +) { + for (diff_maker, sender) in receiver { + let diff = match diff_maker.make_diff(&repository) { + Ok(diff) => diff, + Err(e) => { + let _ = sender.send(Err(e)); + return; + } + }; + + if let Err(e) = diff.foreach( + &mut |delta, _| { + let _ = sender.send(Ok(FileDiff { + status: delta.status(), + old_path: delta.old_file().path().map(Into::into), + })); + + true + }, + None, + None, + None, + ) { + let _ = sender.send(Err(e)); + } + } +} diff --git a/src/filter.rs b/src/filter.rs new file mode 100644 index 0000000..3352b16 --- /dev/null +++ b/src/filter.rs @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2021 Alyssa Ross <hi@alyssa.is> + +use std::ffi::{CStr, OsString}; +use std::fmt::{Display, Formatter}; +use std::io::{self, ErrorKind, Write}; +use std::os::raw::{c_char, c_int}; +use std::path::Path; +use std::process::{Child, ChildStdout, Command, Stdio}; +use std::thread::{spawn, JoinHandle}; + +use git2::{Repository, Tree}; +use libc::{SIGPIPE, WEXITSTATUS, WIFEXITED, WIFSIGNALED, WTERMSIG}; + +extern "C" { + fn sigdescr_np(sig: c_int) -> *const c_char; +} + +unsafe fn wait(pid: u32) -> io::Result<c_int> { + let mut wstatus: c_int = 0; + if libc::waitpid(pid as i32, &mut wstatus, 0) == -1 { + return Err(io::Error::last_os_error()); + } + Ok(wstatus) +} + +fn filter_output(argv: &[OsString]) -> Child { + Command::new(argv.get(0).unwrap()) + .args(&argv[1..]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .expect("spawn") +} + +fn tree_path_content(repo: &Repository, tree: &Tree, path: &Path) -> Result<Vec<u8>, git2::Error> { + let blob = tree.get_path(path)?.to_object(&repo)?.peel_to_blob()?; + Ok(blob.content().to_vec()) +} + +#[derive(Debug)] +pub enum FilterError { + Write(io::Error), + Exit(c_int), + Signal(c_int), +} + +impl Display for FilterError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + use FilterError::*; + match self { + Write(e) => write!(f, "writing to filter stdin: {}", e), + Exit(code) => write!(f, "filter command failed with code {}", code), + Signal(signo) => { + let sigdescr = unsafe { CStr::from_ptr(sigdescr_np(*signo)) }.to_string_lossy(); + write!(f, "filter command killed by signal: {}", sigdescr) + } + } + } +} + +impl std::error::Error for FilterError {} + +pub struct Filter { + pid: u32, + writer: JoinHandle<io::Result<()>>, +} + +impl Filter { + pub fn new( + repo: &Repository, + tree: &Tree, + path: &Path, + args: &[OsString], + ) -> (Self, ChildStdout) { + let blob = tree_path_content(repo, tree, path).unwrap(); + + let child = filter_output(args); + let pid = child.id(); + + // Destructure here to make sure there are no + // remaining references to the Child structs after + // this. This makes sure nothing else will wait for + // our filter processes, and we don't have to worry + // about PID reuse. + let Child { stdin, stdout, .. } = child; + + let mut stdin = stdin.unwrap(); + let stdout = stdout.unwrap(); + + let writer = spawn(move || stdin.write_all(&blob)); + + (Self { pid, writer }, stdout) + } + + pub fn wait(self) -> Result<(), FilterError> { + if let Err(e) = self.writer.join().unwrap() { + if e.kind() != ErrorKind::BrokenPipe { + return Err(FilterError::Write(e)); + } + } + + let wstatus = unsafe { wait(self.pid).unwrap() }; + if WIFEXITED(wstatus) { + let status = WEXITSTATUS(wstatus); + if status != 0 { + return Err(FilterError::Exit(status)); + } + } else if WIFSIGNALED(wstatus) { + let signal = WTERMSIG(wstatus); + if signal != SIGPIPE { + return Err(FilterError::Signal(signal)); + } + } else { + unreachable!() + } + + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs index 062a448..d5d763d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,137 @@ // SPDX-License-Identifier: EUPL-1.2 // SPDX-FileCopyrightText: 2021 Alyssa Ross <hi@alyssa.is> -use std::process::exit; +mod diff_thread; +mod filter; -use git2::Repository; +use diff_thread::{DiffThread, FileDiff, MakeDiff}; +use filter::Filter; -fn main() { - let repo = match Repository::open_from_env() { +use std::env::args_os; +use std::ffi::OsString; +use std::io::{stderr, Read, Write}; +use std::os::unix::prelude::*; +use std::process::{exit, ChildStdout}; +use std::thread::spawn; + +use git2::{Delta, Diff, Oid, Repository, Sort, Tree}; + +fn child_out_bytes(child: &mut ChildStdout) -> impl Iterator<Item = u8> + '_ { + child.by_ref().bytes().map(Result::unwrap) +} + +fn output_eq(lhs: &mut ChildStdout, rhs: &mut ChildStdout) -> bool { + child_out_bytes(lhs).eq(child_out_bytes(rhs)) +} + +fn filtered_file_same( + repo: &Repository, + old: &Tree, + new: &Tree, + diff: FileDiff, + argv: &[OsString], +) -> bool { + if diff.status != Delta::Modified { + return false; + } + + let path = diff.old_path.unwrap(); + + let (filter_old, mut stdout_old) = Filter::new(&repo, &old, &path, argv); + let (filter_new, mut stdout_new) = Filter::new(&repo, &new, &path, argv); + + let thread = spawn(move || output_eq(&mut stdout_old, &mut stdout_new)); + + filter_old.wait().expect("TODO: HANDLE FILTER ERROR"); + filter_new.wait().expect("TODO: HANDLE FILTER ERROR"); + + thread.join().unwrap() +} + +fn open_repo() -> Repository { + match Repository::open_from_env() { Ok(repo) => repo, Err(e) => { eprintln!("fatal: {}", e.message()); exit(128); } - }; + } +} + +struct DiffMaker { + old_tree_oid: Oid, + new_tree_oid: Oid, +} + +impl MakeDiff for DiffMaker { + fn make_diff(self, repo: &Repository) -> Result<Diff, git2::Error> { + let old_tree = repo.find_tree(self.old_tree_oid)?; + let new_tree = repo.find_tree(self.new_tree_oid)?; + repo.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), Default::default()) + } +} + +fn main() { + let args: Vec<_> = args_os().collect(); + if args.len() < 2 { + eprintln!("fatal: no command given"); + exit(128); + } + + if let Some(arg) = args + .iter() + .skip(1) + .take_while(|a| a.as_bytes().starts_with(b"-")) + .next() + { + eprint!("fatal: unrecognized argument: "); + let _ = stderr().write_all(arg.as_bytes()); + eprintln!(); + exit(128); + } + + let repo = open_repo(); + + let diff_thread = + DiffThread::new(Repository::open(repo.path()).expect("opening diff thread repo")); + + let mut walk = repo.revwalk().expect("revwalk"); + walk.set_sorting(Sort::TOPOLOGICAL).expect("set_sorting"); + walk.push_head().expect("push_head"); + + // It might be good to parallelize this loop at some point, but + // we'd want to preserve order. + for commit_oid in walk { + let commit_oid = commit_oid.expect("revwalk"); + let commit = repo.find_commit(commit_oid).expect("find_commit"); + + // eprint!("checking {:.7} ", commit_oid); + // let _ = stderr().write_all(commit.message_bytes().split(|b| *b == b'\n').next().unwrap()); + // eprintln!(); + + if commit.parent_count() != 1 { + continue; + } + + // TODO: typechange + // TODO: filemode + // TODO: submodules + // + // TODO: fix all these unwraps + + let parent_tree = commit.parents().next().unwrap().tree().unwrap(); + let commit_tree = commit.tree().unwrap(); + + if diff_thread + .diff_files(DiffMaker { + old_tree_oid: parent_tree.id(), + new_tree_oid: commit_tree.id(), + }) + .all(|diff| { + filtered_file_same(&repo, &parent_tree, &commit_tree, diff.unwrap(), &args[1..]) + }) + { + println!("{}", commit.id()); + } + } } |