feat: add slate file delition, remove unnecessary feature

This commit is contained in:
2025-06-29 11:02:36 +03:00
parent ea3a53ec83
commit 672e2fff59
5 changed files with 94 additions and 86 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
/target /target
/rules
template.json template.json

View File

@ -1,4 +1,3 @@
use anyhow::{Result, bail};
use clap::Parser; use clap::Parser;
use std::path::PathBuf; use std::path::PathBuf;
@ -16,22 +15,4 @@ pub struct Args {
/// Number of concurrent download operations. /// Number of concurrent download operations.
#[arg(long, default_value_t = 10)] #[arg(long, default_value_t = 10)]
pub concurrency: usize, pub concurrency: usize,
/// The new domain to use for rewriting rule URLs.
#[arg(long)]
pub domain: Option<String>,
/// The path on the domain for the rewritten rule URLs.
#[arg(long, name = "rule-path")]
pub rule_path: Option<String>,
}
impl Args {
/// Validates that domain and rule_path are either both present or both absent.
pub fn validate_domain_and_rule_path(&self) -> Result<()> {
if self.domain.is_some() != self.rule_path.is_some() {
bail!("--domain and --rule-path must be used together.");
}
Ok(())
}
} }

View File

@ -1,9 +1,11 @@
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use std::collections::BTreeMap; use std::collections::{BTreeMap, HashSet};
use std::fs; use std::fs;
use std::path::Path; use std::path::{Path, PathBuf};
use crate::downloader; // Import the downloader module to use its helpers
/// Represents a single rule, capturing the URL, tag, and any other fields. /// Represents a single rule, capturing the URL, tag, and any other fields.
#[derive(Serialize, Deserialize, Debug, Clone)] #[derive(Serialize, Deserialize, Debug, Clone)]
@ -41,14 +43,6 @@ impl Config {
Ok(config) Ok(config)
} }
/// Saves the current config state back to a file, prettified.
pub fn save(&self, path: &Path) -> Result<()> {
let new_content = serde_json::to_string_pretty(self)
.context("Failed to serialize modified config to JSON.")?;
fs::write(path, new_content)?;
Ok(())
}
/// Extracts all URLs from the rule_set. /// Extracts all URLs from the rule_set.
pub fn extract_urls(&self) -> Vec<String> { pub fn extract_urls(&self) -> Vec<String> {
self.route self.route
@ -58,18 +52,16 @@ impl Config {
.collect() .collect()
} }
/// Rewrites the URLs in the config based on the provided domain and path. /// Generates a set of the absolute paths of all files that are expected
pub fn rewrite_urls(&mut self, domain: &str, rule_path: &str) -> Result<()> { /// to exist in the output directory based on the config.
let clean_domain = domain.trim_matches('/'); pub fn get_expected_files(&self, output_dir: &Path) -> Result<HashSet<PathBuf>> {
let clean_rule_path = format!("/{}/", rule_path.trim_matches('/')); self.route
.rule_set
for rule in &mut self.route.rule_set { .iter()
if let Some(filename) = rule.url.split('/').last().filter(|s| !s.is_empty()) { .map(|rule| {
let new_url = format!("https://{}{}{}", clean_domain, clean_rule_path, filename); let filename = downloader::url_to_filename(&rule.url)?;
println!(" '{}' -> {}", rule.tag, new_url); Ok(output_dir.join(filename))
rule.url = new_url; })
} .collect()
}
Ok(())
} }
} }

View File

@ -1,5 +1,6 @@
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use futures::{StreamExt, stream}; use futures::{StreamExt, stream};
use std::collections::HashSet;
use std::fs; use std::fs;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
@ -10,12 +11,63 @@ pub struct DownloadReport {
pub failed: usize, pub failed: usize,
} }
/// Extracts a filename from a URL string.
pub fn url_to_filename(url: &str) -> Result<String> {
url.split('/')
.last()
.filter(|s| !s.is_empty())
.map(String::from)
.with_context(|| format!("Could not determine filename for URL '{}'", url))
}
/// Scans the output directory and removes any files not in the expected set.
pub fn cleanup_stale_files(output_dir: &Path, expected_files: &HashSet<PathBuf>) -> Result<()> {
println!("\n▶️ Cleaning up stale rule files...");
let mut cleaned_count = 0;
for entry in fs::read_dir(output_dir)
.with_context(|| format!("Failed to read output directory '{}'", output_dir.display()))?
{
let entry = entry?;
let path = entry.path();
// Ignore directories and files that are not rule files (e.g., .tmp files)
if !path.is_file() {
continue;
}
// If the file is not in our set of expected files, remove it.
if !expected_files.contains(&path) {
match fs::remove_file(&path) {
Ok(_) => {
println!("[CLEAN] Removed stale file {}", path.display());
cleaned_count += 1;
}
Err(e) => {
eprintln!(
"[WARN] Failed to remove stale file {}: {}",
path.display(),
e
);
}
}
}
}
if cleaned_count == 0 {
println!("[CLEAN] No stale files found to clean up.");
} else {
println!("[CLEAN] Cleaned up {} stale files.", cleaned_count);
}
Ok(())
}
/// Downloads a list of URLs concurrently, with a specified limit. /// Downloads a list of URLs concurrently, with a specified limit.
pub async fn download_all_rules( pub async fn download_all_rules(
urls: &[String], urls: &[String],
output_dir: &Path, output_dir: &Path,
concurrency: usize, concurrency: usize,
) -> Result<DownloadReport> { ) -> Result<DownloadReport> {
println!("\n▶️ Starting download process...");
let client = reqwest::Client::new(); let client = reqwest::Client::new();
let mut successful = 0; let mut successful = 0;
let mut failed = 0; let mut failed = 0;
@ -55,15 +107,10 @@ pub async fn download_all_rules(
/// Downloads a single file from a URL to a destination directory. /// Downloads a single file from a URL to a destination directory.
/// Uses a temporary file to ensure atomic writes. /// Uses a temporary file to ensure atomic writes.
async fn download_rule(client: &reqwest::Client, url: &str, output_dir: &Path) -> Result<PathBuf> { async fn download_rule(client: &reqwest::Client, url: &str, output_dir: &Path) -> Result<PathBuf> {
let file_name = url let file_name = url_to_filename(url)?;
.split('/')
.last()
.filter(|s| !s.is_empty())
.with_context(|| format!("Could not determine filename for URL '{}'", url))?;
println!("[DOWNLOAD] from {}", url); println!("[DOWNLOAD] from {}", url);
let final_path = output_dir.join(file_name); let final_path = output_dir.join(&file_name);
let tmp_path = output_dir.join(format!("{}.tmp", file_name)); let tmp_path = output_dir.join(format!("{}.tmp", file_name));
// Perform the download // Perform the download

View File

@ -15,24 +15,11 @@ pub async fn run(args: Args) -> Result<()> {
println!(" Output directory: {}", args.output.display()); println!(" Output directory: {}", args.output.display());
println!(" Concurrency level: {}", args.concurrency); println!(" Concurrency level: {}", args.concurrency);
// Validate that --domain and --rule-path are used together.
args.validate_domain_and_rule_path()?;
// Load and parse the configuration file into strongly-typed structs. // Load and parse the configuration file into strongly-typed structs.
let mut config = Config::load(&args.input) let config = Config::load(&args.input)
.with_context(|| format!("Failed to load config from {}", args.input.display()))?; .with_context(|| format!("Failed to load config from {}", args.input.display()))?;
let urls_to_download = config.extract_urls(); // Ensure the output directory exists before any operations.
if urls_to_download.is_empty() {
println!("✔️ No rule sets with URLs found in the configuration. Nothing to do.");
return Ok(());
}
println!(
"✔️ Found {} rule sets to download.",
urls_to_download.len()
);
// Ensure the output directory exists.
fs::create_dir_all(&args.output).with_context(|| { fs::create_dir_all(&args.output).with_context(|| {
format!( format!(
"Failed to create output directory '{}'", "Failed to create output directory '{}'",
@ -40,6 +27,25 @@ pub async fn run(args: Args) -> Result<()> {
) )
})?; })?;
// Determine the set of files that should exist based on the config.
let expected_files = config
.get_expected_files(&args.output)
.context("Failed to determine expected files from config")?;
// Clean up any files in the output directory that are not in our expected set.
downloader::cleanup_stale_files(&args.output, &expected_files)?;
// Proceed to download files defined in the config.
let urls_to_download = config.extract_urls();
if urls_to_download.is_empty() {
println!("\n✔️ No rule sets with URLs found. Process complete.");
return Ok(());
}
println!(
"\n✔️ Found {} rule sets to download/update.",
urls_to_download.len()
);
// Download all files concurrently. // Download all files concurrently.
let download_report = let download_report =
downloader::download_all_rules(&urls_to_download, &args.output, args.concurrency).await?; downloader::download_all_rules(&urls_to_download, &args.output, args.concurrency).await?;
@ -50,33 +56,14 @@ pub async fn run(args: Args) -> Result<()> {
download_report.successful, download_report.failed download_report.successful, download_report.failed
); );
// If any downloads failed, abort before modifying the config file. // If any downloads failed, abort with an error message.
if download_report.failed > 0 { if download_report.failed > 0 {
bail!( bail!(
"Aborting due to {} download failures. The configuration file was NOT modified.", "Aborting due to {} download failures.",
download_report.failed download_report.failed
); );
} }
// Rewrite and save the config file only if all downloads were successful println!("\n✔️ Ruleset synchronization complete.");
// and the domain/rule_path arguments were provided.
if let (Some(domain), Some(rule_path)) = (args.domain, args.rule_path) {
println!("\n▶️ All downloads successful. Rewriting configuration file...");
config.rewrite_urls(&domain, &rule_path)?;
config.save(&args.input).with_context(|| {
format!("Failed to write updated config to {}", args.input.display())
})?;
println!(
"✔️ Configuration file {} updated successfully.",
args.input.display()
);
} else {
println!(
"\n✔️ Downloads complete. No domain/rule-path specified; config file not modified."
);
}
Ok(()) Ok(())
} }