From 6dccf35a306a4d2a8a31e3ce6750734ba2c3fe3f Mon Sep 17 00:00:00 2001 From: Timofey Gelazoniya Date: Sun, 29 Jun 2025 12:48:51 +0300 Subject: [PATCH] fix: replace rule urls --- .gitignore | 3 +- src/cli.rs | 26 +++++++--- src/config.rs | 67 ------------------------ src/lib.rs | 137 +++++++++++++++++++++++++++++++++----------------- src/main.rs | 1 - 5 files changed, 112 insertions(+), 122 deletions(-) delete mode 100644 src/config.rs diff --git a/.gitignore b/.gitignore index 73b7ade..79b0d0f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /rules -template.json \ No newline at end of file +template.json +default.json \ No newline at end of file diff --git a/src/cli.rs b/src/cli.rs index 308957a..017ae4e 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -4,19 +4,31 @@ use std::path::PathBuf; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] pub struct Args { - /// Path to the input JSON configuration file. - #[arg(short, long)] - pub input: PathBuf, + /// Path to the source JSON config with original URLs. + #[arg(long, required = true)] + pub input_config: PathBuf, - /// Path to the output directory for downloaded rule files. - #[arg(short, long)] - pub output: PathBuf, + /// Path to the directory for downloaded rule files. + #[arg(long, required = true)] + pub rules_dir: PathBuf, + + /// Path where the rewritten, client-facing JSON config will be saved. + #[arg(long, required = true)] + pub output_config: PathBuf, /// Number of concurrent download operations. #[arg(long, default_value_t = 10)] pub concurrency: usize, - /// Increase logging verbosity. Can be used multiple times (e.g., -v, -vv). + /// The domain to use for rewriting rule URLs (e.g., "mydomain.com"). + #[arg(long, required = true)] + pub domain: String, + + /// The path on the domain for the rewritten rule URLs (e.g., "rules"). + #[arg(long, name = "rule-path", required = true)] + pub rule_path: String, + + /// Increase logging verbosity. #[arg(short, long, action = clap::ArgAction::Count)] pub verbose: u8, } diff --git a/src/config.rs b/src/config.rs deleted file mode 100644 index 97d284d..0000000 --- a/src/config.rs +++ /dev/null @@ -1,67 +0,0 @@ -use anyhow::{Context, Result}; -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use std::collections::{BTreeMap, HashSet}; -use std::fs; -use std::path::{Path, PathBuf}; - -use crate::downloader; // Import the downloader module to use its helpers - -/// Represents a single rule, capturing the URL, tag, and any other fields. -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct Rule { - pub url: String, - pub tag: String, - // Capture any other fields in the rule object - #[serde(flatten)] - pub other_fields: BTreeMap, -} - -/// Represents the "route" object in the config. -#[derive(Serialize, Deserialize, Debug)] -struct Route { - #[serde(default, rename = "rule_set")] - rule_set: Vec, -} - -/// Represents the top-level structure of the configuration file. -#[derive(Serialize, Deserialize, Debug)] -pub struct Config { - route: Route, - // Capture any other top-level fields - #[serde(flatten)] - other_fields: BTreeMap, -} - -impl Config { - /// Loads and parses the config file from a given path. - pub fn load(path: &Path) -> Result { - let content = fs::read_to_string(path) - .with_context(|| format!("Failed to read config file from '{}'", path.display()))?; - let config: Config = serde_json::from_str(&content) - .context("Failed to parse JSON. Please check the config file structure.")?; - Ok(config) - } - - /// Extracts all URLs from the rule_set. - pub fn extract_urls(&self) -> Vec { - self.route - .rule_set - .iter() - .map(|rule| rule.url.clone()) - .collect() - } - - /// Generates a set of the absolute paths of all files that are expected - /// to exist in the output directory based on the config. - pub fn get_expected_files(&self, output_dir: &Path) -> Result> { - self.route - .rule_set - .iter() - .map(|rule| { - let filename = downloader::url_to_filename(&rule.url)?; - Ok(output_dir.join(filename)) - }) - .collect() - } -} diff --git a/src/lib.rs b/src/lib.rs index 909ace7..3f38978 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,72 +1,117 @@ use anyhow::{Context, Result, bail}; +use serde_json::Value; +use std::collections::{HashMap, HashSet}; use std::fs; use tracing::{info, instrument}; pub mod cli; -pub mod config; pub mod downloader; use cli::Args; -use config::Config; /// The main application logic. #[instrument(skip_all, fields( - config_file = %args.input.display(), - output_dir = %args.output.display() + input_config = %args.input_config.display(), + rules_dir = %args.rules_dir.display(), + output_config = %args.output_config.display() ))] pub async fn run(args: Args) -> Result<()> { - info!("Starting ruleset processor"); + info!("Starting ruleset synchronization"); - // Load and parse the configuration file. - let config = Config::load(&args.input) - .with_context(|| format!("Failed to load config from {}", args.input.display()))?; - - // Ensure the output directory exists. - fs::create_dir_all(&args.output).with_context(|| { + // 1. Read the input config file into a generic, order-preserving Value. + // We only use this to discover the URLs, not to modify. + let config_content = fs::read_to_string(&args.input_config).with_context(|| { format!( - "Failed to create output directory '{}'", - args.output.display() + "Failed to load input config from {}", + args.input_config.display() ) })?; + let config_value: Value = serde_json::from_str(&config_content) + .context("Failed to parse JSON. Please check the config file structure.")?; - // Determine the set of files that should exist. - let expected_files = config - .get_expected_files(&args.output) - .context("Failed to determine expected files from config")?; + // 2. Build the list of URLs to download, the list of expected files, + // AND a map of (original_url -> new_url) for text replacement later. + let mut urls_to_download = Vec::new(); + let mut expected_files = HashSet::new(); + let mut url_replacement_map = HashMap::new(); - // Clean up any stale files. - downloader::cleanup_stale_files(&args.output, &expected_files)?; + if let Some(rule_sets) = config_value + .pointer("/route/rule_set") + .and_then(|v| v.as_array()) + { + let clean_domain = args.domain.trim_matches('/'); + let clean_rule_path = format!("/{}/", args.rule_path.trim_matches('/')); - // Proceed to download files defined in the config. - let urls_to_download = config.extract_urls(); + for rule in rule_sets { + if let Some(url) = rule.get("url").and_then(|u| u.as_str()) { + urls_to_download.push(url.to_string()); + if let Ok(filename) = downloader::url_to_filename(url) { + expected_files.insert(args.rules_dir.join(&filename)); + // Create the new URL and store the mapping. + let new_url = + format!("https://{}{}{}", clean_domain, clean_rule_path, filename); + url_replacement_map.insert(url.to_string(), new_url); + } + } + } + } + + // 3. Ensure the rules directory exists and clean up stale files. + fs::create_dir_all(&args.rules_dir).with_context(|| { + format!( + "Failed to create rules directory '{}'", + args.rules_dir.display() + ) + })?; + downloader::cleanup_stale_files(&args.rules_dir, &expected_files)?; + + // 4. Proceed to download new/updated files. if urls_to_download.is_empty() { - info!("No rule sets with URLs found in config. Process complete."); - return Ok(()); - } - - info!( - count = urls_to_download.len(), - "Found rule sets to download/update." - ); - - // Download all files concurrently. - let download_report = - downloader::download_all_rules(&urls_to_download, &args.output, args.concurrency).await?; - - info!( - successful = download_report.successful, - failed = download_report.failed, - "Download process finished." - ); - - // If any downloads failed, abort with an error message. - if download_report.failed > 0 { - bail!( - "Aborting due to {} download failures.", - download_report.failed + info!("No rule sets with URLs found in input config."); + } else { + info!( + count = urls_to_download.len(), + "Found rule sets to download/update." ); + let report = + downloader::download_all_rules(&urls_to_download, &args.rules_dir, args.concurrency) + .await?; + info!( + successful = report.successful, + failed = report.failed, + "Download process finished." + ); + if report.failed > 0 { + bail!( + "Aborting due to {} download failures. Output config was NOT generated.", + report.failed + ); + } } - info!("Ruleset synchronization completed successfully."); + // 5. Generate the new config file using simple text replacement. + info!("Generating output configuration file..."); + + // We use the original `config_content` string read at the start. + let mut output_content = config_content; + + // Perform a direct search-and-replace for each URL. + for (original_url, new_url) in &url_replacement_map { + output_content = output_content.replace(original_url, new_url); + } + + // Write the modified string content to the output file. + fs::write(&args.output_config, output_content).with_context(|| { + format!( + "Failed to write output config to {}", + args.output_config.display() + ) + })?; + info!( + path = %args.output_config.display(), + "Output configuration file saved successfully." + ); + + info!("Synchronization process completed successfully."); Ok(()) } diff --git a/src/main.rs b/src/main.rs index 829999e..66d034d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,6 @@ async fn main() -> Result<()> { let args = Args::parse(); // 2. Set up the tracing subscriber. - // This configures a logger that prints to the console. // The verbosity is controlled by the `-v` flag. let log_level = match args.verbose { 0 => "info", // Default level