diff options
author | Leonard Kugis <leonard@kug.is> | 2025-04-23 16:51:55 +0000 |
---|---|---|
committer | Leonard Kugis <leonard@kug.is> | 2025-04-23 16:51:55 +0000 |
commit | fd647f540792cba368f7d4193801004fa62435b2 (patch) | |
tree | c5b08f2928bbbde111f25d30a654d43a776445f0 /src/main.rs | |
parent | eff36e6f573300d4a5a655ef61139731ac16691f (diff) | |
download | yara-compiler-fd647f540792cba368f7d4193801004fa62435b2.tar.gz yara-compiler-fd647f540792cba368f7d4193801004fa62435b2.tar.bz2 yara-compiler-fd647f540792cba368f7d4193801004fa62435b2.zip |
Added Rust files as stub
Diffstat (limited to 'src/main.rs')
-rw-r--r-- | src/main.rs | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e23d224 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,190 @@ +use std::fs::File; +use std::io::Read; +use std::path::Path; +use std::collections::HashMap; + +use byteorder::{WriteBytesExt, BigEndian}; +use regex::Regex; +use serde::Deserialize; +use log::{debug, warn}; + +#[derive(Debug)] +struct OperatorTree { + left: Box<OperatorNode>, + right: Box<OperatorNode>, + operator: u8, +} + +#[derive(Debug)] +struct OperatorOf { + n: String, + pattern: String, +} + +#[derive(Debug)] +enum OperatorNode { + Tree(OperatorTree), + Of(OperatorOf), + Identifier(String), +} + +#[derive(Deserialize, Debug)] +struct RuleString { + id: String, + #[serde(rename = "type")] + string_type: u8, + text: String, + modifiers: HashMap<String, bool>, +} + +#[derive(Deserialize, Debug)] +struct RuleEntry { + identifier: String, + condition: String, + strings: Vec<RuleString>, +} + +pub struct YaraDatabase { + entries: Vec<RuleEntry>, + pub data: Vec<u8>, +} + +impl YaraDatabase { + const STRING_TYPE_STRING: u8 = 0; + const STRING_TYPE_HEX: u8 = 1; + const STRING_TYPE_REGEX: u8 = 2; + + const WILDCARD_NIBBLE_LOW: u8 = 0; + const WILDCARD_NIBBLE_HIGH: u8 = 1; + const WILDCARD_NIBBLE_BOTH: u8 = 2; + + const CONDITION_OPERATOR_OR: u8 = 0; + const CONDITION_OPERATOR_AND: u8 = 1; + const CONDITION_OPERATOR_OF: u8 = 2; + const CONDITION_OPERATOR_SINGLE: u8 = 3; + const CONDITION_OPERATOR_TRUE: u8 = 4; + + pub fn new() -> Self { + YaraDatabase { + entries: Vec::new(), + data: Vec::new(), + } + } + + pub fn add_file<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Box<dyn std::error::Error>> { + let mut f = File::open(path)?; + let mut content = String::new(); + f.read_to_string(&mut content)?; + let json: serde_json::Value = serde_json::from_str(&content)?; + if let Some(rules) = json.get("rules") { + self.entries.extend(serde_json::from_value::<Vec<RuleEntry>>(rules.clone())?); + } + Ok(()) + } + + fn build_tree(condition: &str) -> OperatorNode { + let pattern_or = Regex::new(r"(.*)\s+or\s+(.*)").unwrap(); + let pattern_and = Regex::new(r"(.*)\s+and\s+(.*)").unwrap(); + let pattern_of = Regex::new(r"((\d+)|(all)|(any))\s+of\s+([\w\_\(\)\$\*\,]+)").unwrap(); + + debug!("Parsing condition = {}", condition); + + if let Some(caps) = pattern_or.captures(condition) { + return OperatorNode::Tree(OperatorTree { + left: Box::new(Self::build_tree(&caps[1])), + right: Box::new(Self::build_tree(&caps[2])), + operator: Self::CONDITION_OPERATOR_OR, + }); + } + + if let Some(caps) = pattern_and.captures(condition) { + return OperatorNode::Tree(OperatorTree { + left: Box::new(Self::build_tree(&caps[1])), + right: Box::new(Self::build_tree(&caps[2])), + operator: Self::CONDITION_OPERATOR_AND, + }); + } + + if let Some(caps) = pattern_of.captures(condition) { + let n = caps[1].to_string(); + let pattern = caps[5].to_string(); + debug!("Leaf: OperatorOf, n = {}, pattern = {}", n, pattern); + return OperatorNode::Of(OperatorOf { n, pattern }); + } + + debug!("Leaf: remainder = {}", condition); + OperatorNode::Identifier(condition.trim().to_string()) + } + + fn compile_tree(node: &OperatorNode, strings: &[String]) -> Vec<u8> { + let mut data = Vec::new(); + + match node { + OperatorNode::Tree(tree) => { + data.write_u8(tree.operator).unwrap(); + data.extend(Self::compile_tree(&tree.left, strings)); + data.extend(Self::compile_tree(&tree.right, strings)); + } + OperatorNode::Of(op_of) => { + debug!("Compiling OperatorOf, n = {}, pattern = {}", op_of.n, op_of.pattern); + data.write_u8(Self::CONDITION_OPERATOR_OF).unwrap(); + + let mut pattern = String::new(); + let mut para = 0; + for c in op_of.pattern.chars() { + match c { + '$' => pattern.push_str(r"\$"), + '*' => pattern.push_str(".*"), + ',' => pattern.push_str(")|("), + '(' => { pattern.push('('); para += 1; } + ')' => { + if para == 0 { + warn!("Unmatched parenthesis in pattern {}", op_of.pattern); + } else { + pattern.push(')'); + para -= 1; + } + } + ' ' => {} + other => pattern.push(other), + } + } + let re = Regex::new(&format!("^({})$", pattern)).unwrap(); + let of_elements: Vec<u8> = strings.iter().enumerate() + .filter(|(_, s)| re.is_match(s)) + .map(|(i, _)| i as u8) + .collect(); + + let n = match op_of.n.as_str() { + "all" => 0, + "any" => 1, + _ => op_of.n.parse::<u8>().unwrap_or(0), + }; + + data.write_u8(n).unwrap(); + data.write_u8(of_elements.len() as u8).unwrap(); + for e in of_elements { + data.write_u8(e).unwrap(); + } + } + OperatorNode::Identifier(id) => { + debug!("Compiling single identifier {}", id); + let mut found = false; + for (i, s) in strings.iter().enumerate() { + if s == id { + data.write_u8(Self::CONDITION_OPERATOR_SINGLE).unwrap(); + data.write_u8(i as u8).unwrap(); + found = true; + break; + } + } + if !found { + warn!("Single identifier {} not found, defaulting to true", id); + data.write_u8(Self::CONDITION_OPERATOR_TRUE).unwrap(); + } + } + } + + data + } +} |