From 672e2fff59a447a31e1dcebb3c47d5e7675c1ae5 Mon Sep 17 00:00:00 2001 From: Timofey Gelazoniya Date: Sun, 29 Jun 2025 11:02:36 +0300 Subject: [PATCH] feat: add slate file delition, remove unnecessary feature --- .gitignore | 1 + src/cli.rs | 19 --------------- src/config.rs | 38 ++++++++++++----------------- src/downloader.rs | 61 +++++++++++++++++++++++++++++++++++++++++------ src/lib.rs | 61 +++++++++++++++++++---------------------------- 5 files changed, 94 insertions(+), 86 deletions(-) diff --git a/.gitignore b/.gitignore index da3fe0a..73b7ade 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target +/rules template.json \ No newline at end of file diff --git a/src/cli.rs b/src/cli.rs index 7f0af57..7bc605b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,4 +1,3 @@ -use anyhow::{Result, bail}; use clap::Parser; use std::path::PathBuf; @@ -16,22 +15,4 @@ pub struct Args { /// Number of concurrent download operations. #[arg(long, default_value_t = 10)] pub concurrency: usize, - - /// The new domain to use for rewriting rule URLs. - #[arg(long)] - pub domain: Option, - - /// The path on the domain for the rewritten rule URLs. - #[arg(long, name = "rule-path")] - pub rule_path: Option, -} - -impl Args { - /// Validates that domain and rule_path are either both present or both absent. - pub fn validate_domain_and_rule_path(&self) -> Result<()> { - if self.domain.is_some() != self.rule_path.is_some() { - bail!("--domain and --rule-path must be used together."); - } - Ok(()) - } } diff --git a/src/config.rs b/src/config.rs index 611aac7..97d284d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,9 +1,11 @@ use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; use serde_json::Value; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashSet}; use std::fs; -use std::path::Path; +use std::path::{Path, PathBuf}; + +use crate::downloader; // Import the downloader module to use its helpers /// Represents a single rule, capturing the URL, tag, and any other fields. #[derive(Serialize, Deserialize, Debug, Clone)] @@ -41,14 +43,6 @@ impl Config { Ok(config) } - /// Saves the current config state back to a file, prettified. - pub fn save(&self, path: &Path) -> Result<()> { - let new_content = serde_json::to_string_pretty(self) - .context("Failed to serialize modified config to JSON.")?; - fs::write(path, new_content)?; - Ok(()) - } - /// Extracts all URLs from the rule_set. pub fn extract_urls(&self) -> Vec { self.route @@ -58,18 +52,16 @@ impl Config { .collect() } - /// Rewrites the URLs in the config based on the provided domain and path. - pub fn rewrite_urls(&mut self, domain: &str, rule_path: &str) -> Result<()> { - let clean_domain = domain.trim_matches('/'); - let clean_rule_path = format!("/{}/", rule_path.trim_matches('/')); - - for rule in &mut self.route.rule_set { - if let Some(filename) = rule.url.split('/').last().filter(|s| !s.is_empty()) { - let new_url = format!("https://{}{}{}", clean_domain, clean_rule_path, filename); - println!(" '{}' -> {}", rule.tag, new_url); - rule.url = new_url; - } - } - Ok(()) + /// Generates a set of the absolute paths of all files that are expected + /// to exist in the output directory based on the config. + pub fn get_expected_files(&self, output_dir: &Path) -> Result> { + self.route + .rule_set + .iter() + .map(|rule| { + let filename = downloader::url_to_filename(&rule.url)?; + Ok(output_dir.join(filename)) + }) + .collect() } } diff --git a/src/downloader.rs b/src/downloader.rs index 3f80a44..8291b6f 100644 --- a/src/downloader.rs +++ b/src/downloader.rs @@ -1,5 +1,6 @@ use anyhow::{Context, Result}; use futures::{StreamExt, stream}; +use std::collections::HashSet; use std::fs; use std::path::{Path, PathBuf}; @@ -10,12 +11,63 @@ pub struct DownloadReport { pub failed: usize, } +/// Extracts a filename from a URL string. +pub fn url_to_filename(url: &str) -> Result { + url.split('/') + .last() + .filter(|s| !s.is_empty()) + .map(String::from) + .with_context(|| format!("Could not determine filename for URL '{}'", url)) +} + +/// Scans the output directory and removes any files not in the expected set. +pub fn cleanup_stale_files(output_dir: &Path, expected_files: &HashSet) -> Result<()> { + println!("\n▶️ Cleaning up stale rule files..."); + let mut cleaned_count = 0; + + for entry in fs::read_dir(output_dir) + .with_context(|| format!("Failed to read output directory '{}'", output_dir.display()))? + { + let entry = entry?; + let path = entry.path(); + + // Ignore directories and files that are not rule files (e.g., .tmp files) + if !path.is_file() { + continue; + } + + // If the file is not in our set of expected files, remove it. + if !expected_files.contains(&path) { + match fs::remove_file(&path) { + Ok(_) => { + println!("[CLEAN] Removed stale file {}", path.display()); + cleaned_count += 1; + } + Err(e) => { + eprintln!( + "[WARN] Failed to remove stale file {}: {}", + path.display(), + e + ); + } + } + } + } + if cleaned_count == 0 { + println!("[CLEAN] No stale files found to clean up."); + } else { + println!("[CLEAN] Cleaned up {} stale files.", cleaned_count); + } + Ok(()) +} + /// Downloads a list of URLs concurrently, with a specified limit. pub async fn download_all_rules( urls: &[String], output_dir: &Path, concurrency: usize, ) -> Result { + println!("\n▶️ Starting download process..."); let client = reqwest::Client::new(); let mut successful = 0; let mut failed = 0; @@ -55,15 +107,10 @@ pub async fn download_all_rules( /// Downloads a single file from a URL to a destination directory. /// Uses a temporary file to ensure atomic writes. async fn download_rule(client: &reqwest::Client, url: &str, output_dir: &Path) -> Result { - let file_name = url - .split('/') - .last() - .filter(|s| !s.is_empty()) - .with_context(|| format!("Could not determine filename for URL '{}'", url))?; - + let file_name = url_to_filename(url)?; println!("[DOWNLOAD] from {}", url); - let final_path = output_dir.join(file_name); + let final_path = output_dir.join(&file_name); let tmp_path = output_dir.join(format!("{}.tmp", file_name)); // Perform the download diff --git a/src/lib.rs b/src/lib.rs index 04430d6..32d3183 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,24 +15,11 @@ pub async fn run(args: Args) -> Result<()> { println!(" Output directory: {}", args.output.display()); println!(" Concurrency level: {}", args.concurrency); - // Validate that --domain and --rule-path are used together. - args.validate_domain_and_rule_path()?; - // Load and parse the configuration file into strongly-typed structs. - let mut config = Config::load(&args.input) + let config = Config::load(&args.input) .with_context(|| format!("Failed to load config from {}", args.input.display()))?; - let urls_to_download = config.extract_urls(); - if urls_to_download.is_empty() { - println!("✔️ No rule sets with URLs found in the configuration. Nothing to do."); - return Ok(()); - } - println!( - "✔️ Found {} rule sets to download.", - urls_to_download.len() - ); - - // Ensure the output directory exists. + // Ensure the output directory exists before any operations. fs::create_dir_all(&args.output).with_context(|| { format!( "Failed to create output directory '{}'", @@ -40,6 +27,25 @@ pub async fn run(args: Args) -> Result<()> { ) })?; + // Determine the set of files that should exist based on the config. + let expected_files = config + .get_expected_files(&args.output) + .context("Failed to determine expected files from config")?; + + // Clean up any files in the output directory that are not in our expected set. + downloader::cleanup_stale_files(&args.output, &expected_files)?; + + // Proceed to download files defined in the config. + let urls_to_download = config.extract_urls(); + if urls_to_download.is_empty() { + println!("\n✔️ No rule sets with URLs found. Process complete."); + return Ok(()); + } + println!( + "\n✔️ Found {} rule sets to download/update.", + urls_to_download.len() + ); + // Download all files concurrently. let download_report = downloader::download_all_rules(&urls_to_download, &args.output, args.concurrency).await?; @@ -50,33 +56,14 @@ pub async fn run(args: Args) -> Result<()> { download_report.successful, download_report.failed ); - // If any downloads failed, abort before modifying the config file. + // If any downloads failed, abort with an error message. if download_report.failed > 0 { bail!( - "Aborting due to {} download failures. The configuration file was NOT modified.", + "Aborting due to {} download failures.", download_report.failed ); } - // Rewrite and save the config file only if all downloads were successful - // and the domain/rule_path arguments were provided. - if let (Some(domain), Some(rule_path)) = (args.domain, args.rule_path) { - println!("\n▶️ All downloads successful. Rewriting configuration file..."); - - config.rewrite_urls(&domain, &rule_path)?; - config.save(&args.input).with_context(|| { - format!("Failed to write updated config to {}", args.input.display()) - })?; - - println!( - "✔️ Configuration file {} updated successfully.", - args.input.display() - ); - } else { - println!( - "\n✔️ Downloads complete. No domain/rule-path specified; config file not modified." - ); - } - + println!("\n✔️ Ruleset synchronization complete."); Ok(()) }