#[macro_use] extern crate pest_derive; use log::*; mod gparser; #[macro_use] mod messages; use messages::{explains, explains_all}; mod linkcheck; mod utils; mod recode; use recode::{wrong_line_endings2crlf, wrong_line_endings2lf}; mod filemagic; use filemagic::LineEnding; use linkcheck::LinkCheck; use std::fmt; use std::fmt::Display; use std::str; use utils::*; use serde::{Deserialize, Serialize}; use std::os::unix::fs::MetadataExt; use std::borrow::Cow; use scoped_threadpool::Pool; use tempfile::Builder; use once_cell::sync::Lazy; // 1.3.1 use colored::*; use regex::Regex; use std::fs::File; use std::fs::Metadata; use std::io::prelude::*; use std::io::BufReader; use std::os::unix::fs::FileTypeExt; use std::{fs, process}; // use std::os::unix::fs::PermissionsExt; use std::ffi::OsStr; use std::io::{self, Read}; use std::path::Path; use std::path::PathBuf; use std::sync::atomic::{AtomicBool, Ordering}; use rustc_hash::{FxHashMap, FxHashSet}; use std::fmt::Arguments; use std::sync::mpsc::{channel, Sender}; use clap::{Command, CommandFactory, Parser, ValueHint}; use clap_complete::{generate, Generator, Shell}; #[cfg(unix)] use walkdir::{DirEntry, WalkDir}; fn format_message(message: &Arguments, no_color: bool) -> Cow<'static, str> { let msg_str = format!("{}", message); if msg_str.starts_with(' ') { return msg_str.into(); } let (left, right) = msg_str.split_once(' ').unwrap(); if no_color { msg_str.into() } else { let colored_msg = match &left.chars().next().unwrap() { 'E' | 'F' => format!("{} {}", left.bright_red().bold(), right), 'W' => format!("{} {}", left.bright_red(), right), 'I' => format!("{} {}", left.bright_yellow().bold(), right), _ => msg_str, }; colored_msg.into() } } #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct TdsException { pub pkg: String, pub tpkg: String, } #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct PathExceptions { pub tds_path_exceptions: Vec, } fn get_config_file_name() -> Option { if let Some(config_file) = &ARGS.config_file { if Path::new(&config_file).exists() { return Some(config_file.to_string()) } else { f0008!(config_file); std::process::exit(1); } } let home_dir = match home::home_dir() { Some(path) => path.display().to_string(), None => panic!("Impossible to get your home dir!"), }; let config_files = [".ctan/pkgcheck.yml", ".config/ctan/pkgcheck.yml"]; for f in config_files { let config_file_abs_path = format!("{}/{}", home_dir, f); if Path::new(&config_file_abs_path).exists() { return Some(config_file_abs_path); } } None } fn read_yaml_config() -> FxHashMap { let mut pkg_replacements: FxHashMap = FxHashMap::default(); for (p, q) in [ ("armtex", "armenian"), ("babel-base", "babel"), ("latex-amsmath", "latex"), ("latex-amsmath-dev", "latex-dev"), ("latex-base", "latex"), ("latex-base-dev", "latex-dev"), ("latex-cyrillic", "cyrillic"), ("latex-firstaid", "latex/firstaid"), ("latex-firstaid-dev", "latex-dev/firstaid"), ("latex-graphics", "latex"), ("latex-graphics-dev", "latex-dev"), ("latex-lab", "latex"), ("latex-lab-dev", "latex-dev"), ("latex-tools", "latex"), ("latex-tools-dev", "latex-dev"), ("vntex-nonfree", "vntex"), ] { pkg_replacements.insert(p.to_string(), q.to_string()); } match get_config_file_name() { Some(config_filename) => { i0008!(config_filename); let data = match fs::read_to_string(&config_filename) { Ok(str) => str, Err(e) => { f0009!(&config_filename, e); std::process::exit(1); } }; let path_exceptions = serde_yaml::from_str::(&data); match path_exceptions { Ok(pb) => { for play in &pb.tds_path_exceptions { // check if package name is already in pkg_replacements hash let old_val = pkg_replacements.get(&play.pkg); if let Some(ov) = old_val { if ARGS.verbose { if ov == &play.tpkg { w0009!(play.pkg, play.tpkg); } else { i0009!(play.pkg, ov, play.tpkg); } } } pkg_replacements.insert(play.pkg.clone(), play.tpkg.clone()); } pb } Err(e) => { f0010!(e); std::process::exit(1);}, }; pkg_replacements } None => pkg_replacements, } } fn setup_logger(no_color: bool) -> Result<(), fern::InitError> { fern::Dispatch::new() .format(move |out, message, _record| { let msg_txt = format_message(message, no_color); out.finish(format_args!("{}", msg_txt)) }) .level(log::LevelFilter::Info) .chain(std::io::stdout()) .apply()?; Ok(()) } fn err(path: &Path, err: &io::Error) { e0027!(path.display(), err); } type HashSender = Sender<(u64, PathBuf, Vec)>; //type DupeSender = Sender<(u64, Vec)>; // SizesHashMap contains // - file sizes // - and a vector of file names having that size type SizesHashMap = FxHashMap>; type GeneratedHashMap = FxHashMap; type FileNamesHashMap = FxHashMap; const BLOCKSIZE: usize = 4096; fn hash_file_inner(path: &Path) -> io::Result> { let mut buf = [0u8; BLOCKSIZE]; let mut fp = File::open(path)?; let mut digest = blake3::Hasher::new(); loop { match fp.read(&mut buf)? { 0 => break, n => { digest.update(&buf[..n]); } } } Ok(digest.finalize().as_bytes().to_vec()) } fn hash_file(fsize: u64, path: PathBuf, tx: &HashSender) { match hash_file_inner(&path) { Ok(hash) => tx.send((fsize, path, hash)).unwrap(), Err(e) => err(&path, &e), } } // returns false if an error occurred fn fix_inconsistent_le(fname: &str) -> bool { i0004!(fname); match wrong_line_endings2lf(fname) { Ok(_) => { i0007!(fname, "LF"); true } Err(e) => { e0027!(fname, e); false } } } // returns false if an error occurred fn make_crlf(fname: &str) -> bool { i0004!(fname); match wrong_line_endings2crlf(fname) { Ok(_) => { i0007!(fname, "CRLF"); true } Err(e) => { e0027!(fname, e); false } } } fn check_readme(dir_entry: &str, is_readme: &ReadmeKind, ft: &filemagic::Mimetype) -> bool { let msg_name = if let ReadmeKind::Symlink(s) = is_readme { format!("{} (symlinked from {})", dir_entry, &s) } else { dir_entry.to_string() }; match ft { filemagic::Mimetype::Pdf | filemagic::Mimetype::Binary | filemagic::Mimetype::Archive | filemagic::Mimetype::Zip => { e0003!(msg_name); return false; } filemagic::Mimetype::Bom(b) => { e0029!(msg_name, b.as_ref()); return false; } filemagic::Mimetype::Text(_le) => match File::open(dir_entry) { Ok(f) => { if !check_readme_inner(&msg_name, &f) { return false; } } Err(e) => { e0027!(msg_name, e); return false; } }, _ => (), } true } fn check_readme_inner(fname: &str, f: &std::fs::File) -> bool { let reader = BufReader::new(f); let lines = reader.split(b'\n').map(|l| l.unwrap()); let mut result = true; for (lineno, line) in lines.enumerate() { if let Err(e) = String::from_utf8(line.clone()) { e0021!(fname, lineno + 1, e); result = false; } } result } fn is_readme(entry: &str) -> bool { matches!(entry, "README" | "README.txt" | "README.md") } fn get_devno(meta: &Metadata) -> u64 { meta.dev() } fn _get_devno(entry: &DirEntry) -> u64 { let meta = fs::metadata(entry.path().to_str().unwrap()); match meta { Ok(m) => m.dev(), _ => 0, } } #[derive(Parser, Debug, PartialEq)] #[clap(author, version, about, long_about = None)] struct Args { #[clap(short = 'I', long = "ignore-dupes", help = "Ignore dupes")] ignore_dupes: bool, #[clap(long = "ignore-same-named", help = "Ignore same-named files")] ignore_same_named: bool, #[clap(short = 'v', long = "verbose", help = "Verbose operation?")] verbose: bool, #[clap(short = 'L', long = "correct-le", help = "Correct line endings")] correct_le: bool, #[clap(short = 'C', long = "correct-perms", help = "Correct permissions")] correct_perms: bool, #[clap(long = "no-colors", help = "Don't display messages in color")] no_colors: bool, #[clap(long = "urlcheck", help = "Check URLs found in README files")] urlcheck: bool, #[clap(short = 'T', long = "tds-zip", help = "tds zip archive", group = "tds", value_hint = ValueHint::FilePath)] tds_zip: Option, #[clap( short = 'e', long = "explain", help = "Explain error or warning message", group = "only_one" )] explain: Option, #[clap( long = "explain-all", help = "Explains all error or warning messages", group = "only_one" )] explain_all: bool, #[clap(long = "generate-completion", group = "only_one", value_enum)] generator: Option, #[clap( long = "show-temp-endings", help = "Show file endings for temporary files", group = "only_one" )] show_tmp_endings: bool, #[clap(short = 'd', long = "package-dir", help = "Package directory", value_hint = ValueHint::DirPath)] pkg_dir: Option, #[clap(long = "config-file", help = "Specify config file to use", value_hint = ValueHint::FilePath)] config_file: Option, } // In the pas we took care to avoid visiting a single inode twice, which takes care of (false positive) hardlinks. // Now we want to know if there is a hardlink in the package directory #[cfg(unix)] fn check_inode(set: &mut FxHashMap<(u64, u64), Vec>, filename: &str, meta: &Metadata) { set.entry((get_devno(meta), meta.ino())).or_insert_with(Vec::new).push(filename.to_string()); } #[cfg(not(unix))] fn check_inode(_: &mut FxHashSet, _: &Metadata) -> bool { true } static ARGS: Lazy = Lazy::new(Args::parse); static ERROR_OCCURRED: AtomicBool = AtomicBool::new(false); #[derive(Debug, Clone, PartialEq, Eq)] pub enum DPath { Both(PathBuf), Tds(PathBuf), } impl fmt::Display for DPath { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { DPath::Both(p) => write!(f, "{}", p.display()), DPath::Tds(p) => write!(f, "{}", p.display()), } } } #[derive(Default)] pub struct DupPath { len: usize, plen: usize, dupes: Vec, } impl DupPath { pub fn new() -> DupPath { DupPath { len: 0, plen: 0, dupes: Vec::new(), } } pub fn push(&mut self, pb: PathBuf) { let pname = pb.to_string_lossy(); self.len += 1; if pname.ends_with(".tfm") { self.dupes.push(DPath::Tds(pb.clone())); } else { self.plen += 1; self.dupes.push(DPath::Both(pb.clone())); } } } type DupHashes = FxHashMap<(u64, Vec), DupPath>; fn print_completions(gen: G, cmd: &mut Command) { generate(gen, cmd, cmd.get_name().to_string(), &mut io::stdout()); } fn main() { let _ = setup_logger(ARGS.no_colors); // read yaml config file if one is given explicitly or implicitly let pkg_replace : FxHashMap = read_yaml_config(); match &ARGS.explain { None => (), Some(e) => { explains(e); process::exit(0); } } if let Some(generator) = ARGS.generator { let mut cmd = Args::command(); eprintln!("Generating completion file for {:?}...", generator); print_completions(generator, &mut cmd); process::exit(0) } if ARGS.explain_all { explains_all(); process::exit(0); } if ARGS.show_tmp_endings { show_tmp_endings(); process::exit(0); } let pkg_dir = match &ARGS.pkg_dir { None => { f0001!(); process::exit(1); } Some(d) => { // make sure the given directory ends with a '/' (slash) let ds: String = if d.ends_with('/') { d.to_string() } else { let d_s = d.to_string(); d_s + "/" }; if !exists_dir(&ds) { f0002!(&ds); process::exit(1); }; ds } }; let tds_zip = &ARGS.tds_zip; let pkg_name = match tds_zip { None => None, Some(tz) => { let pn = check_tds_archive_name(tds_zip); if !exists_file(tz) { f0003!(&tz); process::exit(1); } let mut fmagic = filemagic::Filetype::new(); match fmagic.analyze(tz) { Ok(filemagic::Mimetype::Zip) => (), _ => { f0004!(&tz); process::exit(1) } }; pn } }; if let Some(hashes) = check_package(&pkg_dir, tds_zip) { if tds_zip.is_some() { if let Some(pn) = pkg_name { if let Some(s) = ARGS.tds_zip.as_ref() { check_tds_archive(&pn, s, &hashes, &pkg_replace); } } } } if ERROR_OCCURRED.load(Ordering::Relaxed) { process::exit(1); } else { process::exit(0); } } fn print_duplicates(hashes: &DupHashes) { let mut total_dupes = 0; let mut total_files = 0; let mut total_size = 0; let mut header_printed = false; for (k, paths) in hashes.iter() { let (sz, _hash) = k; if paths.plen <= 1 { total_files += 1; total_size += sz; continue; } else if !header_printed { w0002!(); header_printed = true; } total_files += paths.plen; total_size += sz * (paths.plen - 1) as u64; total_dupes += (paths.plen - 1) as u64; info!("Size: {}", sz); for p in &paths.dupes { if let DPath::Both(p) = p { let ps = p.as_path().to_str().unwrap(); info!(" >>> {}", ps); } } //eprintln!(); } if ARGS.verbose && total_dupes > 0 { info!("Duplicate statistics"); info!(" Found {} duplicate files", total_files); info!(" Size of duplicate files: {}", total_size); } } //#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)] pub enum FType { Regular, Directory, Symlink, BlockDevice, CharDevice, Fifo, Socket, Error(String), } fn get_filetype(entry: &DirEntry) -> FType { match entry.metadata() { Ok(mt) => { let ft = mt.file_type(); if ft.is_symlink() { return FType::Symlink; } if ft.is_dir() { return FType::Directory; } if ft.is_block_device() { return FType::BlockDevice; } if ft.is_char_device() { return FType::CharDevice; } if ft.is_fifo() { return FType::Fifo; } if ft.is_socket() { return FType::Socket; } FType::Regular } Err(e) => FType::Error(format!("{}", e)), } } // // read file into buffer[u8] // then convert into string // fn check_generated_files(entry: &str, generated: &mut GeneratedHashMap) { // unwrap() is ok here as we only call this function for files, // specifically .ins or .dtx files let entry_fname = filename(entry).unwrap().to_string(); // the name of the .ins resp. .dtx without extension let entry_base = &entry_fname[0..entry_fname.len() - 4]; let fhdl = File::open(entry); match fhdl { Ok(mut f) => { let mut buf = Vec::new(); match f.read_to_end(&mut buf) { Ok(_bytes_read) => { if let Some(found) = gparser::parse_generate(&String::from_utf8_lossy(&buf.clone())) { for fname in found { // If the filename in the 'file{} statement contains `\jobname` // we replace jobname with the .dtx resp. .ins filename (without extension) // before we investigate further let fname1 = fname.replace("\\jobname", entry_base); // If the filename in the generate statement contains a path component // we ignore it so that a generated file will be reported even if it is // in a different place in the package directory which sometimes // happens in uploaded packages let fname_opt = utils::filename(&fname1); if fname_opt.is_none() { continue; } let filename = fname_opt.unwrap(); // As we request a README in the top level directory of // a package we ignore if a README was generated by an // .ins or .dtx file // CAVEAT: If this happens in a subdirectory it could be an error!!!! if is_readme(filename) { continue; } // Ignore generated pdf, html, and css files if fname.ends_with(".pdf") || fname.ends_with(".html") || fname.ends_with(".css") { continue; } generated .entry(filename.to_string()) .or_insert_with(|| entry.to_string()); } }; } Err(e) => error!("Error reading file {}: {:?}", entry, e), } } Err(e) => error!("Error opening file {}: {:?}", entry, e), } } fn x_bit_set(p: u32) -> bool { let p1 = p & 0o7777; p1 & 0o111 != 0 } // checks that archive name is in the format x.tds.zip and returns: // None if not // Option if x.tds.zip // Caveat: we currently don't know if x is really the package name. fn check_tds_archive_name(tds_zip: &Option) -> Option { match tds_zip { None => None, Some(tz) => { // 8 for ".tds.zip", 1 means a package name is at least a character if tz.len() < 8 + 1 || !tz.ends_with(".tds.zip") { f0005!(tz); process::exit(1); } let mut pname = String::from(utils::basename(tz)); let plen = pname.len(); pname.truncate(plen - 8); Some(pname) } } } // fn unzip_tds_archive(tds_zip: &str, tmp_dir: &str) { // match run_cmd("unzip", &["-q", "-d", tmp_dir, tds_zip]) { // CmdReturn { status: true, .. } => (), // CmdReturn { // status: false, // output: out, // } => { // if let Some(o) = out { // e0033!(&tds_zip, o); // } else { // e0033!(&tds_zip, ") { i0003!(tds_zip); let dir_entry = Path::new(tds_zip); let p = get_perms(dir_entry); if !owner_has(p, 4) || !others_have(p, 4) || x_bit_set(p) { e0024!(tds_zip, perms_to_string(p)); if ARGS.correct_perms { i0005!(&tds_zip); set_perms(tds_zip, 0o664); } }; let ut = Utils::new(utils::CheckType::Tds); let real_pkg_name = if let Some(real_name) = pkg_replace.get(pkg_name) { real_name } else { pkg_name }; let tmp_dir = match Builder::new().prefix("pkgcheck").tempdir() { Ok(tdir) => tdir, Err(e) => { f0007!(e); process::exit(1); } }; let tmp_dir_offset = tmp_dir.path().to_str().unwrap().len() + 1; let tmp_dir_str = tmp_dir.path().to_str().unwrap(); // unzip the TDS zip archive into a temporary directory match ut.unzip(tds_zip, tmp_dir_str) { Ok(_) => {} Err(e) => { error!( "Could not unpack `{}` into directory `{}`", tds_zip, tmp_dir_str ); error!("Error from unzip: {}", e); e0033!(&tds_zip, e); process::exit(1) } } // in order to compare the package files with the content of the // tds zip archive we need to checksum the files in the tds zip // archive. let mut sizes: SizesHashMap = FxHashMap::default(); let mut pool = Pool::new(num_cpus::get() as u32 + 1); { // Processing a single file entry, with the "sizes" hashmap collecting // same-size files. Entries are either Found::One or Found::Multiple, // so that we can submit the first file's path as a hashing job when the // first duplicate is found. Hashing each file is submitted as a job to // the pool. let mut process = |fsize, dir_entry: &DirEntry| { let path = dir_entry.path().to_path_buf(); let sizeref = &mut sizes; sizeref.entry(fsize).or_insert_with(Vec::new).push(path); }; let mut map_files_found = false; let mut map_dvips_found = false; // those top level directories are the directories found in the // texmf-dist/ directory of a texlive installation let tds_toplevel_dirs: FxHashSet = [ "asymptote", "bibtex", "chktex", "context", "doc", "dvipdfmx", "dvips", "fonts", "hbf2gf", "makeindex", "metafont", "metapost", "mft", "omega", "pbibtex", "psutils", "scripts", "source", "tex", "tex4ht", "texconfig", "texdoc", "texdoctk", "ttf2pk", "web2c", "xdvi", "xindy", ] .iter() .map(|&s| s.to_string()) .collect(); // set to True if the TDS zip archive contains a top level directory doc/ let mut doc_found = false; // we track the number of toplevel directories which must at least be 2 let mut number_of_toplevel_dirs = 0; for dir_entry in WalkDir::new(tmp_dir.path().to_str().unwrap()).follow_links(false) { match dir_entry { Ok(dir_entry) => { let dir_entry_str = match dir_entry.path().to_str() { Some(d) => d, None => { e0031!(dir_entry.path().to_string_lossy()); continue; } }; // this is the file_name without the directory part // unwrap() is ok here as we covered potential UTF-8 related errors // above in the definition of dir_entry_str let file_name = dir_entry.file_name().to_str().unwrap().to_string(); let meta = match dir_entry.metadata() { Ok(meta) => meta, Err(e) => { e0027!(dir_entry.path().display(), e); continue; } }; let ft = get_filetype(&dir_entry); if let FType::Error(e) = ft { e0023!(e); continue; } let dir_entry_display = if dir_entry.depth() == 0 { &dir_entry_str[tmp_dir_offset - 1..] } else { &dir_entry_str[tmp_dir_offset..] }; ut.check_for_temporary_file(dir_entry_display); // In the top level directory of a TDS zip archive // ... no files are allowed // ... only specific directories are allowed if dir_entry.depth() == 1 { if ft == FType::Regular { e0034!(dir_entry_display); continue; } if !tds_toplevel_dirs.contains(&file_name) { e0020!(&file_name); } else { number_of_toplevel_dirs += 1; if &file_name == "doc" { doc_found = true; } } continue; } if ft == FType::Directory { ut.check_for_empty_directory(dir_entry_str, dir_entry_display); ut.check_for_hidden_directory(&file_name, dir_entry_display); ut.is_unwanted_directory(&file_name, dir_entry_str); continue; } // The LaTeX team provides the file `.tex` as a file with an empty name // in order to make `\input\relax` work (explained by David Carlisle) // Therefore, we don't call check_for_hidden_file() in this case match (pkg_name, dir_entry_display) { ("latex-tools", "tex/latex/tools/.tex") => (), ("latex-tools-dev", "tex/latex-dev/tools/.tex") => (), (_, _) => ut.check_for_hidden_file(&file_name, dir_entry_display), }; let fsize = meta.len(); process(fsize, &dir_entry); ut.check_filesize(fsize, dir_entry_display); // if we encounter a .dtx or .ins file we check // that it is in a subdirectory of either source/ or doc/ if (dir_entry_str.ends_with(".dtx") || dir_entry_str.ends_with(".ins")) && !(dir_entry_display.starts_with("source/") || dir_entry_display.starts_with("doc/")) { e0036!(dir_entry_display); continue; } // if the path doesn't contain a man page... if !dir_entry_str.contains("/man/") { let pkg_name_s = format!("/{}/", real_pkg_name); // ...then we want to have the package name in the path if !dir_entry_str.contains(&pkg_name_s) { e0028!(real_pkg_name, dir_entry_display); } } if dir_entry_str.ends_with(".map") { map_files_found = true; let re: Regex = Regex::new(r"fonts[/]map[/]dvips[/]").unwrap(); if re.is_match(dir_entry_str) { map_dvips_found = true; } } } Err(e) => { error!("{}", e); } } } if !doc_found { e0039!(); } if number_of_toplevel_dirs < 2 { e0040!(); } if map_files_found && !map_dvips_found { e0041!(); } }; let mut tds_hashes: FxHashMap<(u64, Vec), Vec> = FxHashMap::default(); pool.scoped(|scope| { let (tx, rx) = channel(); let hashref = &mut tds_hashes; scope.execute(move || { for (size, path, hash) in rx.iter() { hashref .entry((size, hash)) .or_insert_with(Vec::new) .push(path); } }); for size in sizes.keys() { for p in &sizes[size] { let txc = tx.clone(); scope.execute(move || hash_file(*size, p.to_path_buf(), &txc)); } } }); // now check if each package file is in the tds archive for (k, paths) in hashes.iter() { if !tds_hashes.contains_key(k) { let p = &paths.dupes[0]; e0026!(p); } } } fn get_extension_from_filename(filename: &str) -> Option<&str> { Path::new(filename).extension().and_then(OsStr::to_str) } fn found_unwanted_filetype(fname: &str, ft: &FType) -> bool { match ft { FType::Socket => { e0013!(fname); true } FType::Fifo => { e0014!(fname); true } FType::BlockDevice => { e0015!(fname); true } FType::CharDevice => { e0016!(fname); true } FType::Error(e) => { e0023!(e); true } _ => false, } } fn fix_perms(entry: &str) { if !ARGS.correct_perms { return; } i0005!(entry); let rc = run_cmd("chmod", &["-v", "ug=rwX,o=rX", entry]); if rc.status { if let Some(op) = rc.output { info!("{}", op); } } } fn check_and_correct_perms(dir_entry: &str, p: u32) { if owner_has(p, 5) || !others_have(p, 4) { e0002!(dir_entry, perms_to_string(p)); if ARGS.correct_perms { i0005!(&dir_entry); set_perms(dir_entry, 0o664); } }; } // Sets permissions for a file or directory // Sample invocation: set_perms("somfile", 0o644); fn set_perms(entry: &str, p: u32) -> bool { let ps = &format!("{:o}", p); let rc = run_cmd("chmod", &["-v", ps, entry]); if rc.status { if let Some(op) = rc.output { // we need to remove the `\n` in the `chmod` output // as `info!` also adds a `\n` info!("{}", op.trim_end()); } true } else { false } } #[derive(Debug, Clone, PartialEq)] enum FileKind { File, Directory, Symlink(String), } impl Display for FileKind { fn fmt(&self, f: &mut ::std::fmt::Formatter) -> Result<(), ::std::fmt::Error> { match *self { FileKind::File => f.write_str("file"), FileKind::Directory => f.write_str("directory"), FileKind::Symlink(_) => f.write_str("symlink"), } } } #[derive(Debug, Clone, PartialEq)] enum ReadmeKind { No, Yes, Symlink(String), } fn check_package(root: &str, tds_zip: &Option) -> Option { let mut lcnames: FxHashMap> = FxHashMap::default(); let mut doublenames: FxHashMap> = FxHashMap::default(); let mut inodes = FxHashMap::default(); let ut = Utils::new(utils::CheckType::Package); i0002!(root); // This hash contains all package file names. // // PathBuf: the full path starting at the directory specified at the command line // Metadata: the meta data of the file // String: the file name without any directory part // ReadmeKind: is it a certain README, file or symlink? // A special case of a README file is a file with has a different name but // was pointed to by a symlink. Example: README --> README.rst let mut file_names: FileNamesHashMap = FxHashMap::default(); let mut readme_found = false; let root_absolute = PathBuf::from(root) .canonicalize() .unwrap() .to_string_lossy() .to_string(); for dir_entry in WalkDir::new(root).follow_links(false) { match dir_entry { Ok(dir_entry) => { let dir_entry_str = match dir_entry.path().to_str() { Some(d) => d, None => { e0031!(dir_entry.path().to_string_lossy()); continue; } }; let meta = match dir_entry.metadata() { Ok(meta) => meta, Err(e) => { e0023!(e); continue; } }; check_inode(&mut inodes, dir_entry_str, &meta); // this is the file_name without the directory part // unwrap() is ok here as we covered potential UTF-8 related errors // above in the definition of dir_entry_str let file_name = dir_entry.file_name().to_str().unwrap().to_string(); // we check for weird stuff like socket files aso. let ft = get_filetype(&dir_entry); if found_unwanted_filetype(dir_entry_str, &ft) { continue; } ut.filename_has_bad_chars(&dir_entry, dir_entry_str); // 1. dealing with symlinks if ft == FType::Symlink { match get_symlink(&dir_entry) { Ok(None) => { e0010!(&dir_entry_str); continue; } Err(e) => { e0027!(&dir_entry_str, e); continue; } Ok(Some(p)) => { let pd: String = p.canonicalize().unwrap().to_string_lossy().to_string(); if !pd.starts_with(&root_absolute) { e0030!(&dir_entry_str, p.display()); continue; } let lc_dir_entry_str = dir_entry_str.to_lowercase(); if let Some(_dir_name) = filename(dir_entry_str) { let lcnref = &mut lcnames; lcnref .entry(PathBuf::from(lc_dir_entry_str)) .or_insert_with(Vec::new) .push(( PathBuf::from(&dir_entry_str), //FileKind::Symlink(&dir_entry_str.into()), FileKind::Symlink(pd.clone()), )); } if is_readme(&file_name) { readme_found = true; file_names.insert( p, ( meta, file_name, ReadmeKind::Symlink(dir_entry_str.to_string()), ), ); } continue; } } } let p = get_perms(dir_entry.path()); // 2. dealing with directories if ft == FType::Directory { let lc_dir_entry_str = dir_entry_str.to_lowercase(); if let Some(_dir_name) = filename(dir_entry_str) { let lcnref = &mut lcnames; lcnref .entry(PathBuf::from(lc_dir_entry_str)) .or_insert_with(Vec::new) .push((PathBuf::from(dir_entry_str), FileKind::Directory)); } if !owner_has(p, 5) || !others_have(p, 5) { e0011!(&dir_entry_str, perms_to_string(p)); if ARGS.correct_perms { i0005!(&dir_entry_str); set_perms(dir_entry_str, 0o775); } } ut.check_for_empty_directory(dir_entry_str, dir_entry_str); ut.check_for_hidden_directory(&file_name, dir_entry_str); ut.is_unwanted_directory(&file_name, dir_entry_str); continue; } // 3. dealing with regular files ut.check_for_hidden_file(&file_name, dir_entry_str); ut.check_for_temporary_file(dir_entry_str); // if is_temporary_file(&dir_entry_str) { // e0008!(&dir_entry_str); // } if let Some(file_name) = filename(dir_entry_str) { let doubleref = &mut doublenames; doubleref .entry(PathBuf::from(file_name)) .or_insert_with(Vec::new) .push(PathBuf::from(&dir_entry_str)); } if is_readme(&file_name) { // We want to deal with README files only if they are // in the root directory of the package. let f = format!( "{}{}{}", root, // we have to pay attention if `root` ends already with '/' if root.ends_with('/') { "" } else { "/" }, &file_name ); if dir_entry_str == f { readme_found = true; file_names.insert( dir_entry.path().to_path_buf(), (meta, file_name.clone(), ReadmeKind::Yes), ); } else { file_names.entry(dir_entry.path().to_path_buf()).or_insert(( meta, file_name.clone(), ReadmeKind::No, )); } } else { file_names.entry(dir_entry.path().to_path_buf()).or_insert(( meta, file_name.clone(), ReadmeKind::No, )); } let lc_dir_entry_str = dir_entry_str.to_lowercase(); let lcnref = &mut lcnames; lcnref .entry(PathBuf::from(lc_dir_entry_str)) .or_insert_with(Vec::new) .push((PathBuf::from(&dir_entry_str), FileKind::File)); } Err(e) => { error!("{}", e); } } } if !readme_found { e0009!(); } let lc = LinkCheck::new(4, false); let mut detective = filemagic::Filetype::new(); let mut sizes: SizesHashMap = FxHashMap::default(); let mut generated: GeneratedHashMap = FxHashMap::default(); // Processing a single file entry, with the "sizes" hashmap collecting // same-size files. Entries are either Found::One or Found::Multiple, // so that we can submit the first file's path as a hashing job when the // first duplicate is found. Hashing each file is submitted as a job to // the pool. let mut process = |fsize, path: &PathBuf| { let sizeref = &mut sizes; let path = path.clone(); sizeref.entry(fsize).or_insert_with(Vec::new).push(path); }; for (path, (meta, _file_name, is_readme)) in file_names.iter() { let dir_entry_str = match path.to_str() { Some(d) => d, None => { e0031!(&path.to_string_lossy()); continue; } }; let fsize = meta.len(); ut.check_filesize(fsize, dir_entry_str); let p = get_perms(path); if !owner_has(p, 4) { e0002!(&dir_entry_str, perms_to_string(p)); fix_perms(dir_entry_str); continue; } let ftr = detective.analyze(dir_entry_str); //println!(">>> {:?}", ftr); // we ignore errors from filetype recognition if ftr.is_err() { continue; } let ft = ftr.unwrap(); // DEBUG !readme_symlinked.contains(&dir_entry_str) if ReadmeKind::No != *is_readme { if !check_readme(dir_entry_str, is_readme, &ft) { continue; } if ARGS.urlcheck { lc.check_urls(dir_entry_str); } } match ft { filemagic::Mimetype::Text(_) => { check_and_correct_perms(dir_entry_str, p); let fext = get_extension_from_filename(dir_entry_str); if fext == Some("ins") || fext == Some("dtx") { check_generated_files(dir_entry_str, &mut generated); } match fext { // deal with Windows files Some("bat") | Some("cmd") | Some("nsh") | Some("reg") => match ft { filemagic::Mimetype::Text(LineEnding::Crlf) => (), filemagic::Mimetype::Text(LineEnding::Cr) => { e0037!(&dir_entry_str); if ARGS.correct_le { make_crlf(dir_entry_str); } } filemagic::Mimetype::Text(LineEnding::Mixed(0, 0, 0)) => (), filemagic::Mimetype::Text(LineEnding::Mixed(cr, lf, crlf)) => { e0038!(&dir_entry_str, cr, lf, crlf); if ARGS.correct_le { fix_inconsistent_le(dir_entry_str); } } filemagic::Mimetype::Text(LineEnding::Lf) => { w0008!(&dir_entry_str); } fmm => error!("Should not occur: {} has {:?}", dir_entry_str, fmm), }, Some(_) | None => { match ft { filemagic::Mimetype::Text(LineEnding::Crlf) => { e0012!(&dir_entry_str); if ARGS.correct_le { fix_inconsistent_le(dir_entry_str); } } filemagic::Mimetype::Text(LineEnding::Cr) => { e0037!(&dir_entry_str); if ARGS.correct_le { fix_inconsistent_le(dir_entry_str); } } filemagic::Mimetype::Text(LineEnding::Mixed(0, 0, 0)) => (), filemagic::Mimetype::Text(LineEnding::Mixed(cr, lf, crlf)) => { //println!(">>>{}: {:?} {},{},{}", &dir_entry_str, ft, x, y, z); e0038!(&dir_entry_str, cr, lf, crlf); if ARGS.correct_le { fix_inconsistent_le(dir_entry_str); } } filemagic::Mimetype::Text(LineEnding::Lf) => (), fmm => error!("Should not occur: {} has {:?}", dir_entry_str, fmm), } } } } filemagic::Mimetype::Bom(b) => { //println!("{}: {} with BOM detected", dir_entry_str, b.as_ref()); w0004!(&dir_entry_str, b.as_ref()); check_and_correct_perms(dir_entry_str, p); } filemagic::Mimetype::Binary | filemagic::Mimetype::Script(_) => { if !owner_has(p, 4) || !others_have(p, 4) { e0002!(&dir_entry_str, perms_to_string(p)); }; fix_perms(dir_entry_str); } filemagic::Mimetype::Pdf => { check_and_correct_perms(dir_entry_str, p); let ret = is_pdf_ok(dir_entry_str); if !ret.status { e0017!(&dir_entry_str); if let Some(output) = ret.output { info!("{}", &output); }; } } filemagic::Mimetype::Archive | filemagic::Mimetype::Zip => { if dir_entry_str.ends_with(".tds.zip") { e0035!(&dir_entry_str); } else { w0001!(&dir_entry_str); } check_and_correct_perms(dir_entry_str, p); } filemagic::Mimetype::Data => check_and_correct_perms(dir_entry_str, p), filemagic::Mimetype::Zerofile => check_and_correct_perms(dir_entry_str, p), _ => continue, } if others_match(p, 0) { e0002!(&dir_entry_str, perms_to_string(p)); if ARGS.correct_perms { i0005!(&dir_entry_str); set_perms(dir_entry_str, 0o664); } } if !(ARGS.ignore_dupes && tds_zip.is_none()) { process(fsize, path); } } print_casefolding(&lcnames); print_generated(&doublenames, &generated); print_hardlinks(&inodes); if !ARGS.ignore_same_named { print_doublenames(&doublenames); } if ARGS.ignore_dupes && tds_zip.is_none() { return None; } // Set up thread pool for the task to hash a file. Number of CPUs + 1 has been // found to be a good pool size, likely since the walker thread should be // doing mostly IO. let mut pool = Pool::new(num_cpus::get() as u32 + 1); let mut hashes: FxHashMap<(u64, Vec), DupPath> = FxHashMap::default(); pool.scoped(|scope| { let (tx, rx) = channel(); let hashref = &mut hashes; scope.execute(move || { for (size, path, hash) in rx.iter() { hashref .entry((size, hash)) .or_insert_with(DupPath::new) .push(path); } }); for size in sizes.keys() { let paths = &sizes[size]; if paths.len() == 1 && tds_zip.is_none() { continue; }; for p in &sizes[size] { let txc = tx.clone(); scope.execute(move || hash_file(*size, p.to_path_buf(), &txc)); } } }); if !ARGS.ignore_dupes { print_duplicates(&hashes); } Some(hashes) } fn print_hardlinks(hashes: &FxHashMap<(u64, u64), Vec>) { for ((_devid,inode), eles) in hashes.iter() { if eles.len() > 1 { w0010!(inode); for hfile in eles.iter() { info!(" >>> {}", &hfile); } } } } fn print_casefolding(hashes: &FxHashMap>) { for (k, eles) in hashes.iter() { //println!("pcf: {:?}, {:?}", k, &eles); if eles.len() == 1 { continue; } e0025!(k.display()); for (p, ty) in eles { info!(" >>> {} ({})", p.display(), ty); } } } fn print_generated(doublenames: &FxHashMap>, generated: &GeneratedHashMap) { // `k` is generated by `gen` for (k, gen) in generated.iter() { let path = PathBuf::from(k); if doublenames.contains_key(&path) { if k.ends_with(".ins") || k.ends_with(".pdf") { //println!("key {}, gen {}", k, gen); continue; } let v = &doublenames[&path]; for fname in v { e0019!(fname.to_str().unwrap(), gen.as_str()); } } } } fn print_doublenames(hashes: &FxHashMap>) { for (k, paths) in hashes.iter() { if paths.len() == 1 { continue; } let ks = k.to_str().unwrap(); if ks == "README" || ks == "README.txt" || ks == "README.md" || ks == "Makefile" || ks == "Makefile.am" || ks == "Makefile.in" || ks == "makefile" { continue; } w0003!(k.to_str().unwrap()); // println!(":: {}", k.display()); for p in paths { info!(" >>> {}", p.display()); } } } fn show_tmp_endings() { i0006!(); for (t, c) in temp_file_endings() { info!("{:23} {}", t, c); } }