fix: replace rule urls
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
/target
|
/target
|
||||||
/rules
|
/rules
|
||||||
template.json
|
template.json
|
||||||
|
default.json
|
26
src/cli.rs
26
src/cli.rs
@ -4,19 +4,31 @@ use std::path::PathBuf;
|
|||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(author, version, about, long_about = None)]
|
#[command(author, version, about, long_about = None)]
|
||||||
pub struct Args {
|
pub struct Args {
|
||||||
/// Path to the input JSON configuration file.
|
/// Path to the source JSON config with original URLs.
|
||||||
#[arg(short, long)]
|
#[arg(long, required = true)]
|
||||||
pub input: PathBuf,
|
pub input_config: PathBuf,
|
||||||
|
|
||||||
/// Path to the output directory for downloaded rule files.
|
/// Path to the directory for downloaded rule files.
|
||||||
#[arg(short, long)]
|
#[arg(long, required = true)]
|
||||||
pub output: PathBuf,
|
pub rules_dir: PathBuf,
|
||||||
|
|
||||||
|
/// Path where the rewritten, client-facing JSON config will be saved.
|
||||||
|
#[arg(long, required = true)]
|
||||||
|
pub output_config: PathBuf,
|
||||||
|
|
||||||
/// Number of concurrent download operations.
|
/// Number of concurrent download operations.
|
||||||
#[arg(long, default_value_t = 10)]
|
#[arg(long, default_value_t = 10)]
|
||||||
pub concurrency: usize,
|
pub concurrency: usize,
|
||||||
|
|
||||||
/// Increase logging verbosity. Can be used multiple times (e.g., -v, -vv).
|
/// The domain to use for rewriting rule URLs (e.g., "mydomain.com").
|
||||||
|
#[arg(long, required = true)]
|
||||||
|
pub domain: String,
|
||||||
|
|
||||||
|
/// The path on the domain for the rewritten rule URLs (e.g., "rules").
|
||||||
|
#[arg(long, name = "rule-path", required = true)]
|
||||||
|
pub rule_path: String,
|
||||||
|
|
||||||
|
/// Increase logging verbosity.
|
||||||
#[arg(short, long, action = clap::ArgAction::Count)]
|
#[arg(short, long, action = clap::ArgAction::Count)]
|
||||||
pub verbose: u8,
|
pub verbose: u8,
|
||||||
}
|
}
|
||||||
|
@ -1,67 +0,0 @@
|
|||||||
use anyhow::{Context, Result};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use serde_json::Value;
|
|
||||||
use std::collections::{BTreeMap, HashSet};
|
|
||||||
use std::fs;
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
|
|
||||||
use crate::downloader; // Import the downloader module to use its helpers
|
|
||||||
|
|
||||||
/// Represents a single rule, capturing the URL, tag, and any other fields.
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
|
||||||
pub struct Rule {
|
|
||||||
pub url: String,
|
|
||||||
pub tag: String,
|
|
||||||
// Capture any other fields in the rule object
|
|
||||||
#[serde(flatten)]
|
|
||||||
pub other_fields: BTreeMap<String, Value>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents the "route" object in the config.
|
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
|
||||||
struct Route {
|
|
||||||
#[serde(default, rename = "rule_set")]
|
|
||||||
rule_set: Vec<Rule>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents the top-level structure of the configuration file.
|
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
|
||||||
pub struct Config {
|
|
||||||
route: Route,
|
|
||||||
// Capture any other top-level fields
|
|
||||||
#[serde(flatten)]
|
|
||||||
other_fields: BTreeMap<String, Value>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Config {
|
|
||||||
/// Loads and parses the config file from a given path.
|
|
||||||
pub fn load(path: &Path) -> Result<Self> {
|
|
||||||
let content = fs::read_to_string(path)
|
|
||||||
.with_context(|| format!("Failed to read config file from '{}'", path.display()))?;
|
|
||||||
let config: Config = serde_json::from_str(&content)
|
|
||||||
.context("Failed to parse JSON. Please check the config file structure.")?;
|
|
||||||
Ok(config)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extracts all URLs from the rule_set.
|
|
||||||
pub fn extract_urls(&self) -> Vec<String> {
|
|
||||||
self.route
|
|
||||||
.rule_set
|
|
||||||
.iter()
|
|
||||||
.map(|rule| rule.url.clone())
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generates a set of the absolute paths of all files that are expected
|
|
||||||
/// to exist in the output directory based on the config.
|
|
||||||
pub fn get_expected_files(&self, output_dir: &Path) -> Result<HashSet<PathBuf>> {
|
|
||||||
self.route
|
|
||||||
.rule_set
|
|
||||||
.iter()
|
|
||||||
.map(|rule| {
|
|
||||||
let filename = downloader::url_to_filename(&rule.url)?;
|
|
||||||
Ok(output_dir.join(filename))
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
}
|
|
119
src/lib.rs
119
src/lib.rs
@ -1,72 +1,117 @@
|
|||||||
use anyhow::{Context, Result, bail};
|
use anyhow::{Context, Result, bail};
|
||||||
|
use serde_json::Value;
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use tracing::{info, instrument};
|
use tracing::{info, instrument};
|
||||||
|
|
||||||
pub mod cli;
|
pub mod cli;
|
||||||
pub mod config;
|
|
||||||
pub mod downloader;
|
pub mod downloader;
|
||||||
|
|
||||||
use cli::Args;
|
use cli::Args;
|
||||||
use config::Config;
|
|
||||||
|
|
||||||
/// The main application logic.
|
/// The main application logic.
|
||||||
#[instrument(skip_all, fields(
|
#[instrument(skip_all, fields(
|
||||||
config_file = %args.input.display(),
|
input_config = %args.input_config.display(),
|
||||||
output_dir = %args.output.display()
|
rules_dir = %args.rules_dir.display(),
|
||||||
|
output_config = %args.output_config.display()
|
||||||
))]
|
))]
|
||||||
pub async fn run(args: Args) -> Result<()> {
|
pub async fn run(args: Args) -> Result<()> {
|
||||||
info!("Starting ruleset processor");
|
info!("Starting ruleset synchronization");
|
||||||
|
|
||||||
// Load and parse the configuration file.
|
// 1. Read the input config file into a generic, order-preserving Value.
|
||||||
let config = Config::load(&args.input)
|
// We only use this to discover the URLs, not to modify.
|
||||||
.with_context(|| format!("Failed to load config from {}", args.input.display()))?;
|
let config_content = fs::read_to_string(&args.input_config).with_context(|| {
|
||||||
|
|
||||||
// Ensure the output directory exists.
|
|
||||||
fs::create_dir_all(&args.output).with_context(|| {
|
|
||||||
format!(
|
format!(
|
||||||
"Failed to create output directory '{}'",
|
"Failed to load input config from {}",
|
||||||
args.output.display()
|
args.input_config.display()
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
|
let config_value: Value = serde_json::from_str(&config_content)
|
||||||
|
.context("Failed to parse JSON. Please check the config file structure.")?;
|
||||||
|
|
||||||
// Determine the set of files that should exist.
|
// 2. Build the list of URLs to download, the list of expected files,
|
||||||
let expected_files = config
|
// AND a map of (original_url -> new_url) for text replacement later.
|
||||||
.get_expected_files(&args.output)
|
let mut urls_to_download = Vec::new();
|
||||||
.context("Failed to determine expected files from config")?;
|
let mut expected_files = HashSet::new();
|
||||||
|
let mut url_replacement_map = HashMap::new();
|
||||||
|
|
||||||
// Clean up any stale files.
|
if let Some(rule_sets) = config_value
|
||||||
downloader::cleanup_stale_files(&args.output, &expected_files)?;
|
.pointer("/route/rule_set")
|
||||||
|
.and_then(|v| v.as_array())
|
||||||
|
{
|
||||||
|
let clean_domain = args.domain.trim_matches('/');
|
||||||
|
let clean_rule_path = format!("/{}/", args.rule_path.trim_matches('/'));
|
||||||
|
|
||||||
// Proceed to download files defined in the config.
|
for rule in rule_sets {
|
||||||
let urls_to_download = config.extract_urls();
|
if let Some(url) = rule.get("url").and_then(|u| u.as_str()) {
|
||||||
if urls_to_download.is_empty() {
|
urls_to_download.push(url.to_string());
|
||||||
info!("No rule sets with URLs found in config. Process complete.");
|
if let Ok(filename) = downloader::url_to_filename(url) {
|
||||||
return Ok(());
|
expected_files.insert(args.rules_dir.join(&filename));
|
||||||
|
// Create the new URL and store the mapping.
|
||||||
|
let new_url =
|
||||||
|
format!("https://{}{}{}", clean_domain, clean_rule_path, filename);
|
||||||
|
url_replacement_map.insert(url.to_string(), new_url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 3. Ensure the rules directory exists and clean up stale files.
|
||||||
|
fs::create_dir_all(&args.rules_dir).with_context(|| {
|
||||||
|
format!(
|
||||||
|
"Failed to create rules directory '{}'",
|
||||||
|
args.rules_dir.display()
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
downloader::cleanup_stale_files(&args.rules_dir, &expected_files)?;
|
||||||
|
|
||||||
|
// 4. Proceed to download new/updated files.
|
||||||
|
if urls_to_download.is_empty() {
|
||||||
|
info!("No rule sets with URLs found in input config.");
|
||||||
|
} else {
|
||||||
info!(
|
info!(
|
||||||
count = urls_to_download.len(),
|
count = urls_to_download.len(),
|
||||||
"Found rule sets to download/update."
|
"Found rule sets to download/update."
|
||||||
);
|
);
|
||||||
|
let report =
|
||||||
// Download all files concurrently.
|
downloader::download_all_rules(&urls_to_download, &args.rules_dir, args.concurrency)
|
||||||
let download_report =
|
.await?;
|
||||||
downloader::download_all_rules(&urls_to_download, &args.output, args.concurrency).await?;
|
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
successful = download_report.successful,
|
successful = report.successful,
|
||||||
failed = download_report.failed,
|
failed = report.failed,
|
||||||
"Download process finished."
|
"Download process finished."
|
||||||
);
|
);
|
||||||
|
if report.failed > 0 {
|
||||||
// If any downloads failed, abort with an error message.
|
|
||||||
if download_report.failed > 0 {
|
|
||||||
bail!(
|
bail!(
|
||||||
"Aborting due to {} download failures.",
|
"Aborting due to {} download failures. Output config was NOT generated.",
|
||||||
download_report.failed
|
report.failed
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
info!("Ruleset synchronization completed successfully.");
|
// 5. Generate the new config file using simple text replacement.
|
||||||
|
info!("Generating output configuration file...");
|
||||||
|
|
||||||
|
// We use the original `config_content` string read at the start.
|
||||||
|
let mut output_content = config_content;
|
||||||
|
|
||||||
|
// Perform a direct search-and-replace for each URL.
|
||||||
|
for (original_url, new_url) in &url_replacement_map {
|
||||||
|
output_content = output_content.replace(original_url, new_url);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the modified string content to the output file.
|
||||||
|
fs::write(&args.output_config, output_content).with_context(|| {
|
||||||
|
format!(
|
||||||
|
"Failed to write output config to {}",
|
||||||
|
args.output_config.display()
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
info!(
|
||||||
|
path = %args.output_config.display(),
|
||||||
|
"Output configuration file saved successfully."
|
||||||
|
);
|
||||||
|
|
||||||
|
info!("Synchronization process completed successfully.");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,6 @@ async fn main() -> Result<()> {
|
|||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
|
|
||||||
// 2. Set up the tracing subscriber.
|
// 2. Set up the tracing subscriber.
|
||||||
// This configures a logger that prints to the console.
|
|
||||||
// The verbosity is controlled by the `-v` flag.
|
// The verbosity is controlled by the `-v` flag.
|
||||||
let log_level = match args.verbose {
|
let log_level = match args.verbose {
|
||||||
0 => "info", // Default level
|
0 => "info", // Default level
|
||||||
|
Reference in New Issue
Block a user