diff options
author | Leonard Kugis <leonard@kug.is> | 2025-04-23 16:51:55 +0000 |
---|---|---|
committer | Leonard Kugis <leonard@kug.is> | 2025-04-23 16:51:55 +0000 |
commit | fd647f540792cba368f7d4193801004fa62435b2 (patch) | |
tree | c5b08f2928bbbde111f25d30a654d43a776445f0 | |
parent | eff36e6f573300d4a5a655ef61139731ac16691f (diff) | |
download | yara-compiler-fd647f540792cba368f7d4193801004fa62435b2.tar.gz yara-compiler-fd647f540792cba368f7d4193801004fa62435b2.tar.bz2 yara-compiler-fd647f540792cba368f7d4193801004fa62435b2.zip |
Added Rust files as stub
-rw-r--r-- | .gitignore | 25 | ||||
-rw-r--r-- | Cargo.toml | 11 | ||||
-rw-r--r-- | src/main.rs | 190 |
3 files changed, 223 insertions, 3 deletions
@@ -1,6 +1,6 @@ yara -# Created by https://www.toptal.com/developers/gitignore/api/python,windows,linux,macos,visualstudiocode -# Edit at https://www.toptal.com/developers/gitignore?templates=python,windows,linux,macos,visualstudiocode +# Created by https://www.toptal.com/developers/gitignore/api/rust,linux,macos,python,windows,visualstudiocode +# Edit at https://www.toptal.com/developers/gitignore?templates=rust,linux,macos,python,windows,visualstudiocode ### Linux ### *~ @@ -222,6 +222,21 @@ poetry.toml # LSP config files pyrightconfig.json +### Rust ### +# Generated by Cargo +# will have compiled files and executables +debug/ + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + ### VisualStudioCode ### .vscode/* !.vscode/settings.json @@ -267,4 +282,8 @@ $RECYCLE.BIN/ # Windows shortcuts *.lnk -# End of https://www.toptal.com/developers/gitignore/api/python,windows,linux,macos,visualstudiocode
\ No newline at end of file +# End of https://www.toptal.com/developers/gitignore/api/rust,linux,macos,python,windows,visualstudiocode + +# Added by cargo + +/target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..66b5e5a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "yara-compiler" +version = "0.1.0" +edition = "2024" + +[dependencies] +regex = "1" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +log = "0.4" +byteorder = "1.4"
\ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e23d224 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,190 @@ +use std::fs::File; +use std::io::Read; +use std::path::Path; +use std::collections::HashMap; + +use byteorder::{WriteBytesExt, BigEndian}; +use regex::Regex; +use serde::Deserialize; +use log::{debug, warn}; + +#[derive(Debug)] +struct OperatorTree { + left: Box<OperatorNode>, + right: Box<OperatorNode>, + operator: u8, +} + +#[derive(Debug)] +struct OperatorOf { + n: String, + pattern: String, +} + +#[derive(Debug)] +enum OperatorNode { + Tree(OperatorTree), + Of(OperatorOf), + Identifier(String), +} + +#[derive(Deserialize, Debug)] +struct RuleString { + id: String, + #[serde(rename = "type")] + string_type: u8, + text: String, + modifiers: HashMap<String, bool>, +} + +#[derive(Deserialize, Debug)] +struct RuleEntry { + identifier: String, + condition: String, + strings: Vec<RuleString>, +} + +pub struct YaraDatabase { + entries: Vec<RuleEntry>, + pub data: Vec<u8>, +} + +impl YaraDatabase { + const STRING_TYPE_STRING: u8 = 0; + const STRING_TYPE_HEX: u8 = 1; + const STRING_TYPE_REGEX: u8 = 2; + + const WILDCARD_NIBBLE_LOW: u8 = 0; + const WILDCARD_NIBBLE_HIGH: u8 = 1; + const WILDCARD_NIBBLE_BOTH: u8 = 2; + + const CONDITION_OPERATOR_OR: u8 = 0; + const CONDITION_OPERATOR_AND: u8 = 1; + const CONDITION_OPERATOR_OF: u8 = 2; + const CONDITION_OPERATOR_SINGLE: u8 = 3; + const CONDITION_OPERATOR_TRUE: u8 = 4; + + pub fn new() -> Self { + YaraDatabase { + entries: Vec::new(), + data: Vec::new(), + } + } + + pub fn add_file<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Box<dyn std::error::Error>> { + let mut f = File::open(path)?; + let mut content = String::new(); + f.read_to_string(&mut content)?; + let json: serde_json::Value = serde_json::from_str(&content)?; + if let Some(rules) = json.get("rules") { + self.entries.extend(serde_json::from_value::<Vec<RuleEntry>>(rules.clone())?); + } + Ok(()) + } + + fn build_tree(condition: &str) -> OperatorNode { + let pattern_or = Regex::new(r"(.*)\s+or\s+(.*)").unwrap(); + let pattern_and = Regex::new(r"(.*)\s+and\s+(.*)").unwrap(); + let pattern_of = Regex::new(r"((\d+)|(all)|(any))\s+of\s+([\w\_\(\)\$\*\,]+)").unwrap(); + + debug!("Parsing condition = {}", condition); + + if let Some(caps) = pattern_or.captures(condition) { + return OperatorNode::Tree(OperatorTree { + left: Box::new(Self::build_tree(&caps[1])), + right: Box::new(Self::build_tree(&caps[2])), + operator: Self::CONDITION_OPERATOR_OR, + }); + } + + if let Some(caps) = pattern_and.captures(condition) { + return OperatorNode::Tree(OperatorTree { + left: Box::new(Self::build_tree(&caps[1])), + right: Box::new(Self::build_tree(&caps[2])), + operator: Self::CONDITION_OPERATOR_AND, + }); + } + + if let Some(caps) = pattern_of.captures(condition) { + let n = caps[1].to_string(); + let pattern = caps[5].to_string(); + debug!("Leaf: OperatorOf, n = {}, pattern = {}", n, pattern); + return OperatorNode::Of(OperatorOf { n, pattern }); + } + + debug!("Leaf: remainder = {}", condition); + OperatorNode::Identifier(condition.trim().to_string()) + } + + fn compile_tree(node: &OperatorNode, strings: &[String]) -> Vec<u8> { + let mut data = Vec::new(); + + match node { + OperatorNode::Tree(tree) => { + data.write_u8(tree.operator).unwrap(); + data.extend(Self::compile_tree(&tree.left, strings)); + data.extend(Self::compile_tree(&tree.right, strings)); + } + OperatorNode::Of(op_of) => { + debug!("Compiling OperatorOf, n = {}, pattern = {}", op_of.n, op_of.pattern); + data.write_u8(Self::CONDITION_OPERATOR_OF).unwrap(); + + let mut pattern = String::new(); + let mut para = 0; + for c in op_of.pattern.chars() { + match c { + '$' => pattern.push_str(r"\$"), + '*' => pattern.push_str(".*"), + ',' => pattern.push_str(")|("), + '(' => { pattern.push('('); para += 1; } + ')' => { + if para == 0 { + warn!("Unmatched parenthesis in pattern {}", op_of.pattern); + } else { + pattern.push(')'); + para -= 1; + } + } + ' ' => {} + other => pattern.push(other), + } + } + let re = Regex::new(&format!("^({})$", pattern)).unwrap(); + let of_elements: Vec<u8> = strings.iter().enumerate() + .filter(|(_, s)| re.is_match(s)) + .map(|(i, _)| i as u8) + .collect(); + + let n = match op_of.n.as_str() { + "all" => 0, + "any" => 1, + _ => op_of.n.parse::<u8>().unwrap_or(0), + }; + + data.write_u8(n).unwrap(); + data.write_u8(of_elements.len() as u8).unwrap(); + for e in of_elements { + data.write_u8(e).unwrap(); + } + } + OperatorNode::Identifier(id) => { + debug!("Compiling single identifier {}", id); + let mut found = false; + for (i, s) in strings.iter().enumerate() { + if s == id { + data.write_u8(Self::CONDITION_OPERATOR_SINGLE).unwrap(); + data.write_u8(i as u8).unwrap(); + found = true; + break; + } + } + if !found { + warn!("Single identifier {} not found, defaulting to true", id); + data.write_u8(Self::CONDITION_OPERATOR_TRUE).unwrap(); + } + } + } + + data + } +} |