use std::fs::File; use std::io::Read; use std::path::Path; use std::collections::HashMap; use byteorder::{WriteBytesExt, BigEndian}; use regex::Regex; use serde::Deserialize; use log::{debug, warn}; #[derive(Debug)] struct OperatorTree { left: Box, right: Box, operator: u8, } #[derive(Debug)] struct OperatorOf { n: String, pattern: String, } #[derive(Debug)] enum OperatorNode { Tree(OperatorTree), Of(OperatorOf), Identifier(String), } #[derive(Deserialize, Debug)] struct RuleString { id: String, #[serde(rename = "type")] string_type: u8, text: String, modifiers: HashMap, } #[derive(Deserialize, Debug)] struct RuleEntry { identifier: String, condition: String, strings: Vec, } pub struct YaraDatabase { entries: Vec, pub data: Vec, } impl YaraDatabase { const STRING_TYPE_STRING: u8 = 0; const STRING_TYPE_HEX: u8 = 1; const STRING_TYPE_REGEX: u8 = 2; const WILDCARD_NIBBLE_LOW: u8 = 0; const WILDCARD_NIBBLE_HIGH: u8 = 1; const WILDCARD_NIBBLE_BOTH: u8 = 2; const CONDITION_OPERATOR_OR: u8 = 0; const CONDITION_OPERATOR_AND: u8 = 1; const CONDITION_OPERATOR_OF: u8 = 2; const CONDITION_OPERATOR_SINGLE: u8 = 3; const CONDITION_OPERATOR_TRUE: u8 = 4; pub fn new() -> Self { YaraDatabase { entries: Vec::new(), data: Vec::new(), } } pub fn add_file>(&mut self, path: P) -> Result<(), Box> { let mut f = File::open(path)?; let mut content = String::new(); f.read_to_string(&mut content)?; let json: serde_json::Value = serde_json::from_str(&content)?; if let Some(rules) = json.get("rules") { self.entries.extend(serde_json::from_value::>(rules.clone())?); } Ok(()) } fn build_tree(condition: &str) -> OperatorNode { let pattern_or = Regex::new(r"(.*)\s+or\s+(.*)").unwrap(); let pattern_and = Regex::new(r"(.*)\s+and\s+(.*)").unwrap(); let pattern_of = Regex::new(r"((\d+)|(all)|(any))\s+of\s+([\w\_\(\)\$\*\,]+)").unwrap(); debug!("Parsing condition = {}", condition); if let Some(caps) = pattern_or.captures(condition) { return OperatorNode::Tree(OperatorTree { left: Box::new(Self::build_tree(&caps[1])), right: Box::new(Self::build_tree(&caps[2])), operator: Self::CONDITION_OPERATOR_OR, }); } if let Some(caps) = pattern_and.captures(condition) { return OperatorNode::Tree(OperatorTree { left: Box::new(Self::build_tree(&caps[1])), right: Box::new(Self::build_tree(&caps[2])), operator: Self::CONDITION_OPERATOR_AND, }); } if let Some(caps) = pattern_of.captures(condition) { let n = caps[1].to_string(); let pattern = caps[5].to_string(); debug!("Leaf: OperatorOf, n = {}, pattern = {}", n, pattern); return OperatorNode::Of(OperatorOf { n, pattern }); } debug!("Leaf: remainder = {}", condition); OperatorNode::Identifier(condition.trim().to_string()) } fn compile_tree(node: &OperatorNode, strings: &[String]) -> Vec { let mut data = Vec::new(); match node { OperatorNode::Tree(tree) => { data.write_u8(tree.operator).unwrap(); data.extend(Self::compile_tree(&tree.left, strings)); data.extend(Self::compile_tree(&tree.right, strings)); } OperatorNode::Of(op_of) => { debug!("Compiling OperatorOf, n = {}, pattern = {}", op_of.n, op_of.pattern); data.write_u8(Self::CONDITION_OPERATOR_OF).unwrap(); let mut pattern = String::new(); let mut para = 0; for c in op_of.pattern.chars() { match c { '$' => pattern.push_str(r"\$"), '*' => pattern.push_str(".*"), ',' => pattern.push_str(")|("), '(' => { pattern.push('('); para += 1; } ')' => { if para == 0 { warn!("Unmatched parenthesis in pattern {}", op_of.pattern); } else { pattern.push(')'); para -= 1; } } ' ' => {} other => pattern.push(other), } } let re = Regex::new(&format!("^({})$", pattern)).unwrap(); let of_elements: Vec = strings.iter().enumerate() .filter(|(_, s)| re.is_match(s)) .map(|(i, _)| i as u8) .collect(); let n = match op_of.n.as_str() { "all" => 0, "any" => 1, _ => op_of.n.parse::().unwrap_or(0), }; data.write_u8(n).unwrap(); data.write_u8(of_elements.len() as u8).unwrap(); for e in of_elements { data.write_u8(e).unwrap(); } } OperatorNode::Identifier(id) => { debug!("Compiling single identifier {}", id); let mut found = false; for (i, s) in strings.iter().enumerate() { if s == id { data.write_u8(Self::CONDITION_OPERATOR_SINGLE).unwrap(); data.write_u8(i as u8).unwrap(); found = true; break; } } if !found { warn!("Single identifier {} not found, defaulting to true", id); data.write_u8(Self::CONDITION_OPERATOR_TRUE).unwrap(); } } } data } }