feat: add slate file delition, remove unnecessary feature

2025-06-29 11:02:36 +03:00
parent ea3a53ec83
commit 672e2fff59
5 changed files with 94 additions and 86 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 /target
 /rules
 template.json
--- a/src/cli.rs
+++ b/src/cli.rs
@ -1,4 +1,3 @@
 use anyhow::{Result, bail};
 use clap::Parser;
 use std::path::PathBuf;
@ -16,22 +15,4 @@ pub struct Args {
    /// Number of concurrent download operations.
    #[arg(long, default_value_t = 10)]
    pub concurrency: usize,
    /// The new domain to use for rewriting rule URLs.
    #[arg(long)]
    pub domain: Option<String>,
    /// The path on the domain for the rewritten rule URLs.
    #[arg(long, name = "rule-path")]
    pub rule_path: Option<String>,
 }
 impl Args {
    /// Validates that domain and rule_path are either both present or both absent.
    pub fn validate_domain_and_rule_path(&self) -> Result<()> {
        if self.domain.is_some() != self.rule_path.is_some() {
            bail!("--domain and --rule-path must be used together.");
        }
        Ok(())
    }
 }
--- a/src/config.rs
+++ b/src/config.rs
@ -1,9 +1,11 @@
 use anyhow::{Context, Result};
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashSet};
 use std::fs;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use crate::downloader; // Import the downloader module to use its helpers
 /// Represents a single rule, capturing the URL, tag, and any other fields.
 #[derive(Serialize, Deserialize, Debug, Clone)]
@ -41,14 +43,6 @@ impl Config {
        Ok(config)
    }
    /// Saves the current config state back to a file, prettified.
    pub fn save(&self, path: &Path) -> Result<()> {
        let new_content = serde_json::to_string_pretty(self)
            .context("Failed to serialize modified config to JSON.")?;
        fs::write(path, new_content)?;
        Ok(())
    }
    /// Extracts all URLs from the rule_set.
    pub fn extract_urls(&self) -> Vec<String> {
        self.route
@ -58,18 +52,16 @@ impl Config {
            .collect()
    }
-    /// Rewrites the URLs in the config based on the provided domain and path.
+    /// Generates a set of the absolute paths of all files that are expected
-    pub fn rewrite_urls(&mut self, domain: &str, rule_path: &str) -> Result<()> {
+    /// to exist in the output directory based on the config.
-        let clean_domain = domain.trim_matches('/');
+    pub fn get_expected_files(&self, output_dir: &Path) -> Result<HashSet<PathBuf>> {
-        let clean_rule_path = format!("/{}/", rule_path.trim_matches('/'));
+        self.route
-
+            .rule_set
-        for rule in &mut self.route.rule_set {
+            .iter()
-            if let Some(filename) = rule.url.split('/').last().filter(|s| !s.is_empty()) {
+            .map(|rule| {
-                let new_url = format!("https://{}{}{}", clean_domain, clean_rule_path, filename);
+                let filename = downloader::url_to_filename(&rule.url)?;
-                println!("    '{}' -> {}", rule.tag, new_url);
+                Ok(output_dir.join(filename))
-                rule.url = new_url;
+            })
-            }
+            .collect()
        }
        Ok(())
    }
 }
--- a/src/downloader.rs
+++ b/src/downloader.rs
@ -1,5 +1,6 @@
 use anyhow::{Context, Result};
 use futures::{StreamExt, stream};
 use std::collections::HashSet;
 use std::fs;
 use std::path::{Path, PathBuf};
@ -10,12 +11,63 @@ pub struct DownloadReport {
    pub failed: usize,
 }
 /// Extracts a filename from a URL string.
 pub fn url_to_filename(url: &str) -> Result<String> {
    url.split('/')
        .last()
        .filter(|s| !s.is_empty())
        .map(String::from)
        .with_context(|| format!("Could not determine filename for URL '{}'", url))
 }
 /// Scans the output directory and removes any files not in the expected set.
 pub fn cleanup_stale_files(output_dir: &Path, expected_files: &HashSet<PathBuf>) -> Result<()> {
    println!("\n▶️  Cleaning up stale rule files...");
    let mut cleaned_count = 0;
    for entry in fs::read_dir(output_dir)
        .with_context(|| format!("Failed to read output directory '{}'", output_dir.display()))?
    {
        let entry = entry?;
        let path = entry.path();
        // Ignore directories and files that are not rule files (e.g., .tmp files)
        if !path.is_file() {
            continue;
        }
        // If the file is not in our set of expected files, remove it.
        if !expected_files.contains(&path) {
            match fs::remove_file(&path) {
                Ok(_) => {
                    println!("[CLEAN]    Removed stale file {}", path.display());
                    cleaned_count += 1;
                }
                Err(e) => {
                    eprintln!(
                        "[WARN]     Failed to remove stale file {}: {}",
                        path.display(),
                        e
                    );
                }
            }
        }
    }
    if cleaned_count == 0 {
        println!("[CLEAN]  No stale files found to clean up.");
    } else {
        println!("[CLEAN]  Cleaned up {} stale files.", cleaned_count);
    }
    Ok(())
 }
 /// Downloads a list of URLs concurrently, with a specified limit.
 pub async fn download_all_rules(
    urls: &[String],
    output_dir: &Path,
    concurrency: usize,
 ) -> Result<DownloadReport> {
    println!("\n▶️  Starting download process...");
    let client = reqwest::Client::new();
    let mut successful = 0;
    let mut failed = 0;
@ -55,15 +107,10 @@ pub async fn download_all_rules(
 /// Downloads a single file from a URL to a destination directory.
 /// Uses a temporary file to ensure atomic writes.
 async fn download_rule(client: &reqwest::Client, url: &str, output_dir: &Path) -> Result<PathBuf> {
-    let file_name = url
+    let file_name = url_to_filename(url)?;
        .split('/')
        .last()
        .filter(|s| !s.is_empty())
        .with_context(|| format!("Could not determine filename for URL '{}'", url))?;
    println!("[DOWNLOAD] from {}", url);
-    let final_path = output_dir.join(file_name);
+    let final_path = output_dir.join(&file_name);
    let tmp_path = output_dir.join(format!("{}.tmp", file_name));
    // Perform the download
--- a/src/lib.rs
+++ b/src/lib.rs
@ -15,24 +15,11 @@ pub async fn run(args: Args) -> Result<()> {
    println!("    Output directory:   {}", args.output.display());
    println!("    Concurrency level:  {}", args.concurrency);
    // Validate that --domain and --rule-path are used together.
    args.validate_domain_and_rule_path()?;
    // Load and parse the configuration file into strongly-typed structs.
-    let mut config = Config::load(&args.input)
+    let config = Config::load(&args.input)
        .with_context(|| format!("Failed to load config from {}", args.input.display()))?;
-    let urls_to_download = config.extract_urls();
+    // Ensure the output directory exists before any operations.
    if urls_to_download.is_empty() {
        println!("✔️  No rule sets with URLs found in the configuration. Nothing to do.");
        return Ok(());
    }
    println!(
        "✔️  Found {} rule sets to download.",
        urls_to_download.len()
    );
    // Ensure the output directory exists.
    fs::create_dir_all(&args.output).with_context(|| {
        format!(
            "Failed to create output directory '{}'",
@ -40,6 +27,25 @@ pub async fn run(args: Args) -> Result<()> {
        )
    })?;
    // Determine the set of files that should exist based on the config.
    let expected_files = config
        .get_expected_files(&args.output)
        .context("Failed to determine expected files from config")?;
    // Clean up any files in the output directory that are not in our expected set.
    downloader::cleanup_stale_files(&args.output, &expected_files)?;
    // Proceed to download files defined in the config.
    let urls_to_download = config.extract_urls();
    if urls_to_download.is_empty() {
        println!("\n✔️  No rule sets with URLs found. Process complete.");
        return Ok(());
    }
    println!(
        "\n✔️  Found {} rule sets to download/update.",
        urls_to_download.len()
    );
    // Download all files concurrently.
    let download_report =
        downloader::download_all_rules(&urls_to_download, &args.output, args.concurrency).await?;
@ -50,33 +56,14 @@ pub async fn run(args: Args) -> Result<()> {
        download_report.successful, download_report.failed
    );
-    // If any downloads failed, abort before modifying the config file.
+    // If any downloads failed, abort with an error message.
    if download_report.failed > 0 {
        bail!(
-            "Aborting due to {} download failures. The configuration file was NOT modified.",
+            "Aborting due to {} download failures.",
            download_report.failed
        );
    }
-    // Rewrite and save the config file only if all downloads were successful
+    println!("\n✔️  Ruleset synchronization complete.");
    // and the domain/rule_path arguments were provided.
    if let (Some(domain), Some(rule_path)) = (args.domain, args.rule_path) {
        println!("\n▶️  All downloads successful. Rewriting configuration file...");
        config.rewrite_urls(&domain, &rule_path)?;
        config.save(&args.input).with_context(|| {
            format!("Failed to write updated config to {}", args.input.display())
        })?;
        println!(
            "✔️  Configuration file {} updated successfully.",
            args.input.display()
        );
    } else {
        println!(
            "\n✔️  Downloads complete. No domain/rule-path specified; config file not modified."
        );
    }
    Ok(())
 }