From 672e2fff59a447a31e1dcebb3c47d5e7675c1ae5 Mon Sep 17 00:00:00 2001
From: Timofey Gelazoniya <timofey@z4n.me>
Date: Sun, 29 Jun 2025 11:02:36 +0300
Subject: [PATCH] feat: add slate file delition, remove unnecessary feature

---
 .gitignore        |  1 +
 src/cli.rs        | 19 ---------------
 src/config.rs     | 38 ++++++++++++-----------------
 src/downloader.rs | 61 +++++++++++++++++++++++++++++++++++++++++------
 src/lib.rs        | 61 +++++++++++++++++++----------------------------
 5 files changed, 94 insertions(+), 86 deletions(-)
diff --git a/.gitignore b/.gitignore
index da3fe0a..73b7ade 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 /target
+/rules
 template.json
\ No newline at end of file
diff --git a/src/cli.rs b/src/cli.rs
index 7f0af57..7bc605b 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -1,4 +1,3 @@
-use anyhow::{Result, bail};
 use clap::Parser;
 use std::path::PathBuf;
 
@@ -16,22 +15,4 @@ pub struct Args {
     /// Number of concurrent download operations.
     #[arg(long, default_value_t = 10)]
     pub concurrency: usize,
-
-    /// The new domain to use for rewriting rule URLs.
-    #[arg(long)]
-    pub domain: Option<String>,
-
-    /// The path on the domain for the rewritten rule URLs.
-    #[arg(long, name = "rule-path")]
-    pub rule_path: Option<String>,
-}
-
-impl Args {
-    /// Validates that domain and rule_path are either both present or both absent.
-    pub fn validate_domain_and_rule_path(&self) -> Result<()> {
-        if self.domain.is_some() != self.rule_path.is_some() {
-            bail!("--domain and --rule-path must be used together.");
-        }
-        Ok(())
-    }
 }
diff --git a/src/config.rs b/src/config.rs
index 611aac7..97d284d 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -1,9 +1,11 @@
 use anyhow::{Context, Result};
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashSet};
 use std::fs;
-use std::path::Path;
+use std::path::{Path, PathBuf};
+
+use crate::downloader; // Import the downloader module to use its helpers
 
 /// Represents a single rule, capturing the URL, tag, and any other fields.
 #[derive(Serialize, Deserialize, Debug, Clone)]
@@ -41,14 +43,6 @@ impl Config {
         Ok(config)
     }
 
-    /// Saves the current config state back to a file, prettified.
-    pub fn save(&self, path: &Path) -> Result<()> {
-        let new_content = serde_json::to_string_pretty(self)
-            .context("Failed to serialize modified config to JSON.")?;
-        fs::write(path, new_content)?;
-        Ok(())
-    }
-
     /// Extracts all URLs from the rule_set.
     pub fn extract_urls(&self) -> Vec<String> {
         self.route
@@ -58,18 +52,16 @@ impl Config {
             .collect()
     }
 
-    /// Rewrites the URLs in the config based on the provided domain and path.
-    pub fn rewrite_urls(&mut self, domain: &str, rule_path: &str) -> Result<()> {
-        let clean_domain = domain.trim_matches('/');
-        let clean_rule_path = format!("/{}/", rule_path.trim_matches('/'));
-
-        for rule in &mut self.route.rule_set {
-            if let Some(filename) = rule.url.split('/').last().filter(|s| !s.is_empty()) {
-                let new_url = format!("https://{}{}{}", clean_domain, clean_rule_path, filename);
-                println!("    '{}' -> {}", rule.tag, new_url);
-                rule.url = new_url;
-            }
-        }
-        Ok(())
+    /// Generates a set of the absolute paths of all files that are expected
+    /// to exist in the output directory based on the config.
+    pub fn get_expected_files(&self, output_dir: &Path) -> Result<HashSet<PathBuf>> {
+        self.route
+            .rule_set
+            .iter()
+            .map(|rule| {
+                let filename = downloader::url_to_filename(&rule.url)?;
+                Ok(output_dir.join(filename))
+            })
+            .collect()
     }
 }
diff --git a/src/downloader.rs b/src/downloader.rs
index 3f80a44..8291b6f 100644
--- a/src/downloader.rs
+++ b/src/downloader.rs
@@ -1,5 +1,6 @@
 use anyhow::{Context, Result};
 use futures::{StreamExt, stream};
+use std::collections::HashSet;
 use std::fs;
 use std::path::{Path, PathBuf};
 
@@ -10,12 +11,63 @@ pub struct DownloadReport {
     pub failed: usize,
 }
 
+/// Extracts a filename from a URL string.
+pub fn url_to_filename(url: &str) -> Result<String> {
+    url.split('/')
+        .last()
+        .filter(|s| !s.is_empty())
+        .map(String::from)
+        .with_context(|| format!("Could not determine filename for URL '{}'", url))
+}
+
+/// Scans the output directory and removes any files not in the expected set.
+pub fn cleanup_stale_files(output_dir: &Path, expected_files: &HashSet<PathBuf>) -> Result<()> {
+    println!("\n▶️  Cleaning up stale rule files...");
+    let mut cleaned_count = 0;
+
+    for entry in fs::read_dir(output_dir)
+        .with_context(|| format!("Failed to read output directory '{}'", output_dir.display()))?
+    {
+        let entry = entry?;
+        let path = entry.path();
+
+        // Ignore directories and files that are not rule files (e.g., .tmp files)
+        if !path.is_file() {
+            continue;
+        }
+
+        // If the file is not in our set of expected files, remove it.
+        if !expected_files.contains(&path) {
+            match fs::remove_file(&path) {
+                Ok(_) => {
+                    println!("[CLEAN]    Removed stale file {}", path.display());
+                    cleaned_count += 1;
+                }
+                Err(e) => {
+                    eprintln!(
+                        "[WARN]     Failed to remove stale file {}: {}",
+                        path.display(),
+                        e
+                    );
+                }
+            }
+        }
+    }
+    if cleaned_count == 0 {
+        println!("[CLEAN]  No stale files found to clean up.");
+    } else {
+        println!("[CLEAN]  Cleaned up {} stale files.", cleaned_count);
+    }
+    Ok(())
+}
+
 /// Downloads a list of URLs concurrently, with a specified limit.
 pub async fn download_all_rules(
     urls: &[String],
     output_dir: &Path,
     concurrency: usize,
 ) -> Result<DownloadReport> {
+    println!("\n▶️  Starting download process...");
     let client = reqwest::Client::new();
     let mut successful = 0;
     let mut failed = 0;
@@ -55,15 +107,10 @@ pub async fn download_all_rules(
 /// Downloads a single file from a URL to a destination directory.
 /// Uses a temporary file to ensure atomic writes.
 async fn download_rule(client: &reqwest::Client, url: &str, output_dir: &Path) -> Result<PathBuf> {
-    let file_name = url
-        .split('/')
-        .last()
-        .filter(|s| !s.is_empty())
-        .with_context(|| format!("Could not determine filename for URL '{}'", url))?;
-
+    let file_name = url_to_filename(url)?;
     println!("[DOWNLOAD] from {}", url);
 
-    let final_path = output_dir.join(file_name);
+    let final_path = output_dir.join(&file_name);
     let tmp_path = output_dir.join(format!("{}.tmp", file_name));
 
     // Perform the download
diff --git a/src/lib.rs b/src/lib.rs
index 04430d6..32d3183 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,24 +15,11 @@ pub async fn run(args: Args) -> Result<()> {
     println!("    Output directory:   {}", args.output.display());
     println!("    Concurrency level:  {}", args.concurrency);
 
-    // Validate that --domain and --rule-path are used together.
-    args.validate_domain_and_rule_path()?;
-
     // Load and parse the configuration file into strongly-typed structs.
-    let mut config = Config::load(&args.input)
+    let config = Config::load(&args.input)
         .with_context(|| format!("Failed to load config from {}", args.input.display()))?;
 
-    let urls_to_download = config.extract_urls();
-    if urls_to_download.is_empty() {
-        println!("✔️  No rule sets with URLs found in the configuration. Nothing to do.");
-        return Ok(());
-    }
-    println!(
-        "✔️  Found {} rule sets to download.",
-        urls_to_download.len()
-    );
-
-    // Ensure the output directory exists.
+    // Ensure the output directory exists before any operations.
     fs::create_dir_all(&args.output).with_context(|| {
         format!(
             "Failed to create output directory '{}'",
@@ -40,6 +27,25 @@ pub async fn run(args: Args) -> Result<()> {
         )
     })?;
 
+    // Determine the set of files that should exist based on the config.
+    let expected_files = config
+        .get_expected_files(&args.output)
+        .context("Failed to determine expected files from config")?;
+
+    // Clean up any files in the output directory that are not in our expected set.
+    downloader::cleanup_stale_files(&args.output, &expected_files)?;
+
+    // Proceed to download files defined in the config.
+    let urls_to_download = config.extract_urls();
+    if urls_to_download.is_empty() {
+        println!("\n✔️  No rule sets with URLs found. Process complete.");
+        return Ok(());
+    }
+    println!(
+        "\n✔️  Found {} rule sets to download/update.",
+        urls_to_download.len()
+    );
+
     // Download all files concurrently.
     let download_report =
         downloader::download_all_rules(&urls_to_download, &args.output, args.concurrency).await?;
@@ -50,33 +56,14 @@ pub async fn run(args: Args) -> Result<()> {
         download_report.successful, download_report.failed
     );
 
-    // If any downloads failed, abort before modifying the config file.
+    // If any downloads failed, abort with an error message.
     if download_report.failed > 0 {
         bail!(
-            "Aborting due to {} download failures. The configuration file was NOT modified.",
+            "Aborting due to {} download failures.",
             download_report.failed
         );
     }
 
-    // Rewrite and save the config file only if all downloads were successful
-    // and the domain/rule_path arguments were provided.
-    if let (Some(domain), Some(rule_path)) = (args.domain, args.rule_path) {
-        println!("\n▶️  All downloads successful. Rewriting configuration file...");
-
-        config.rewrite_urls(&domain, &rule_path)?;
-        config.save(&args.input).with_context(|| {
-            format!("Failed to write updated config to {}", args.input.display())
-        })?;
-
-        println!(
-            "✔️  Configuration file {} updated successfully.",
-            args.input.display()
-        );
-    } else {
-        println!(
-            "\n✔️  Downloads complete. No domain/rule-path specified; config file not modified."
-        );
-    }
-
+    println!("\n✔️  Ruleset synchronization complete.");
     Ok(())
 }