2018-10-31 19:37:45 +00:00
|
|
|
/// # Bonus 1: link linter
|
|
|
|
///
|
|
|
|
/// *Authors:* Claudio Maggioni, Joey Bevilacqua
|
|
|
|
///
|
|
|
|
/// Group 1
|
|
|
|
extern crate reqwest;
|
|
|
|
extern crate regex;
|
|
|
|
#[macro_use]
|
|
|
|
extern crate lazy_static;
|
2018-11-05 20:03:04 +00:00
|
|
|
extern crate walkdir;
|
2018-10-31 19:37:45 +00:00
|
|
|
|
|
|
|
use std::fs::File;
|
|
|
|
use std::io;
|
2018-11-05 20:03:04 +00:00
|
|
|
use std::env;
|
|
|
|
use std::process;
|
2018-10-31 19:37:45 +00:00
|
|
|
use std::io::prelude::*;
|
|
|
|
use regex::Regex;
|
2018-11-05 20:03:04 +00:00
|
|
|
use walkdir::WalkDir;
|
2018-10-31 19:37:45 +00:00
|
|
|
|
|
|
|
fn read_file(path: &str) -> io::Result<String> {
|
|
|
|
let mut file = File::open(path)?;
|
|
|
|
let mut contents = String::new();
|
|
|
|
|
|
|
|
file.read_to_string(&mut contents)?;
|
|
|
|
Ok(contents)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn find_urls(html: &str) -> Vec<String> {
|
|
|
|
let mut results: Vec<String> = Vec::new();
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref RE: Regex = Regex::new("(?:(?:href)|(?:src))=\"([^\"]*)\"").unwrap();
|
|
|
|
}
|
|
|
|
|
|
|
|
for cap in RE.captures_iter(html) {
|
|
|
|
results.push(cap[1].to_string());
|
|
|
|
}
|
|
|
|
|
|
|
|
results
|
|
|
|
}
|
|
|
|
|
2018-11-08 20:18:09 +00:00
|
|
|
fn is_external_url_working(url: &str) -> bool {
|
2018-10-31 19:37:45 +00:00
|
|
|
let res: Result<reqwest::Response, reqwest::Error> = reqwest::get(url);
|
|
|
|
match res {
|
|
|
|
Ok(r) => r.status().is_success(),
|
|
|
|
Err(_) => false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_is_url_working() {
|
2018-11-08 20:18:09 +00:00
|
|
|
assert!(is_external_url_working("https://www.google.com"));
|
|
|
|
assert!(is_external_url_working("https://xkcd.com"));
|
|
|
|
assert!(!is_external_url_working("https://xkcd.com/404"));
|
|
|
|
assert!(!is_external_url_working("notaurl"));
|
2018-10-31 19:37:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn main() {
|
2018-11-05 20:03:04 +00:00
|
|
|
let args: Vec<String> = env::args().collect();
|
|
|
|
|
|
|
|
if args.len() != 2 {
|
|
|
|
println!("Please provide directory with HTML files as 1st argument.");
|
|
|
|
process::exit(127);
|
|
|
|
} else {
|
|
|
|
let dir = &args[1];
|
|
|
|
for entry in WalkDir::new(dir) {
|
|
|
|
match entry {
|
|
|
|
Ok(en) => {
|
|
|
|
let md: std::fs::Metadata = en.metadata().unwrap();
|
|
|
|
|
|
|
|
if md.is_dir() {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref RE: Regex = Regex::new(".html$").unwrap();
|
|
|
|
}
|
2018-11-08 20:18:09 +00:00
|
|
|
|
|
|
|
let file: &str = en.file_name().to_str().unwrap();
|
|
|
|
if RE.is_match(file) {
|
|
|
|
let path: &str = en.path().to_str().unwrap();
|
|
|
|
let contents: String = read_file(path)
|
|
|
|
.expect("cannot read");
|
|
|
|
|
|
|
|
let urls: Vec<String> = find_urls(contents.as_str());
|
|
|
|
for url in urls {
|
|
|
|
if !is_external_url_working(&url) {
|
|
|
|
println!("{}: '{}' is a broken link.", path, url);
|
|
|
|
}
|
|
|
|
}
|
2018-11-05 20:03:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
println!("{}", e);
|
|
|
|
process::exit(2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-10-31 19:37:45 +00:00
|
|
|
}
|