aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLeonard Kugis <leonard@kug.is>2025-04-23 16:51:55 +0000
committerLeonard Kugis <leonard@kug.is>2025-04-23 16:51:55 +0000
commitfd647f540792cba368f7d4193801004fa62435b2 (patch)
treec5b08f2928bbbde111f25d30a654d43a776445f0
parenteff36e6f573300d4a5a655ef61139731ac16691f (diff)
downloadyara-compiler-fd647f540792cba368f7d4193801004fa62435b2.tar.gz
yara-compiler-fd647f540792cba368f7d4193801004fa62435b2.tar.bz2
yara-compiler-fd647f540792cba368f7d4193801004fa62435b2.zip
Added Rust files as stub
-rw-r--r--.gitignore25
-rw-r--r--Cargo.toml11
-rw-r--r--src/main.rs190
3 files changed, 223 insertions, 3 deletions
diff --git a/.gitignore b/.gitignore
index 97f2a16..9a7d9b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
yara
-# Created by https://www.toptal.com/developers/gitignore/api/python,windows,linux,macos,visualstudiocode
-# Edit at https://www.toptal.com/developers/gitignore?templates=python,windows,linux,macos,visualstudiocode
+# Created by https://www.toptal.com/developers/gitignore/api/rust,linux,macos,python,windows,visualstudiocode
+# Edit at https://www.toptal.com/developers/gitignore?templates=rust,linux,macos,python,windows,visualstudiocode
### Linux ###
*~
@@ -222,6 +222,21 @@ poetry.toml
# LSP config files
pyrightconfig.json
+### Rust ###
+# Generated by Cargo
+# will have compiled files and executables
+debug/
+
+# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
+# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
+Cargo.lock
+
+# These are backup files generated by rustfmt
+**/*.rs.bk
+
+# MSVC Windows builds of rustc generate these, which store debugging information
+*.pdb
+
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
@@ -267,4 +282,8 @@ $RECYCLE.BIN/
# Windows shortcuts
*.lnk
-# End of https://www.toptal.com/developers/gitignore/api/python,windows,linux,macos,visualstudiocode \ No newline at end of file
+# End of https://www.toptal.com/developers/gitignore/api/rust,linux,macos,python,windows,visualstudiocode
+
+# Added by cargo
+
+/target
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..66b5e5a
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "yara-compiler"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+regex = "1"
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+log = "0.4"
+byteorder = "1.4" \ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..e23d224
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,190 @@
+use std::fs::File;
+use std::io::Read;
+use std::path::Path;
+use std::collections::HashMap;
+
+use byteorder::{WriteBytesExt, BigEndian};
+use regex::Regex;
+use serde::Deserialize;
+use log::{debug, warn};
+
+#[derive(Debug)]
+struct OperatorTree {
+ left: Box<OperatorNode>,
+ right: Box<OperatorNode>,
+ operator: u8,
+}
+
+#[derive(Debug)]
+struct OperatorOf {
+ n: String,
+ pattern: String,
+}
+
+#[derive(Debug)]
+enum OperatorNode {
+ Tree(OperatorTree),
+ Of(OperatorOf),
+ Identifier(String),
+}
+
+#[derive(Deserialize, Debug)]
+struct RuleString {
+ id: String,
+ #[serde(rename = "type")]
+ string_type: u8,
+ text: String,
+ modifiers: HashMap<String, bool>,
+}
+
+#[derive(Deserialize, Debug)]
+struct RuleEntry {
+ identifier: String,
+ condition: String,
+ strings: Vec<RuleString>,
+}
+
+pub struct YaraDatabase {
+ entries: Vec<RuleEntry>,
+ pub data: Vec<u8>,
+}
+
+impl YaraDatabase {
+ const STRING_TYPE_STRING: u8 = 0;
+ const STRING_TYPE_HEX: u8 = 1;
+ const STRING_TYPE_REGEX: u8 = 2;
+
+ const WILDCARD_NIBBLE_LOW: u8 = 0;
+ const WILDCARD_NIBBLE_HIGH: u8 = 1;
+ const WILDCARD_NIBBLE_BOTH: u8 = 2;
+
+ const CONDITION_OPERATOR_OR: u8 = 0;
+ const CONDITION_OPERATOR_AND: u8 = 1;
+ const CONDITION_OPERATOR_OF: u8 = 2;
+ const CONDITION_OPERATOR_SINGLE: u8 = 3;
+ const CONDITION_OPERATOR_TRUE: u8 = 4;
+
+ pub fn new() -> Self {
+ YaraDatabase {
+ entries: Vec::new(),
+ data: Vec::new(),
+ }
+ }
+
+ pub fn add_file<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Box<dyn std::error::Error>> {
+ let mut f = File::open(path)?;
+ let mut content = String::new();
+ f.read_to_string(&mut content)?;
+ let json: serde_json::Value = serde_json::from_str(&content)?;
+ if let Some(rules) = json.get("rules") {
+ self.entries.extend(serde_json::from_value::<Vec<RuleEntry>>(rules.clone())?);
+ }
+ Ok(())
+ }
+
+ fn build_tree(condition: &str) -> OperatorNode {
+ let pattern_or = Regex::new(r"(.*)\s+or\s+(.*)").unwrap();
+ let pattern_and = Regex::new(r"(.*)\s+and\s+(.*)").unwrap();
+ let pattern_of = Regex::new(r"((\d+)|(all)|(any))\s+of\s+([\w\_\(\)\$\*\,]+)").unwrap();
+
+ debug!("Parsing condition = {}", condition);
+
+ if let Some(caps) = pattern_or.captures(condition) {
+ return OperatorNode::Tree(OperatorTree {
+ left: Box::new(Self::build_tree(&caps[1])),
+ right: Box::new(Self::build_tree(&caps[2])),
+ operator: Self::CONDITION_OPERATOR_OR,
+ });
+ }
+
+ if let Some(caps) = pattern_and.captures(condition) {
+ return OperatorNode::Tree(OperatorTree {
+ left: Box::new(Self::build_tree(&caps[1])),
+ right: Box::new(Self::build_tree(&caps[2])),
+ operator: Self::CONDITION_OPERATOR_AND,
+ });
+ }
+
+ if let Some(caps) = pattern_of.captures(condition) {
+ let n = caps[1].to_string();
+ let pattern = caps[5].to_string();
+ debug!("Leaf: OperatorOf, n = {}, pattern = {}", n, pattern);
+ return OperatorNode::Of(OperatorOf { n, pattern });
+ }
+
+ debug!("Leaf: remainder = {}", condition);
+ OperatorNode::Identifier(condition.trim().to_string())
+ }
+
+ fn compile_tree(node: &OperatorNode, strings: &[String]) -> Vec<u8> {
+ let mut data = Vec::new();
+
+ match node {
+ OperatorNode::Tree(tree) => {
+ data.write_u8(tree.operator).unwrap();
+ data.extend(Self::compile_tree(&tree.left, strings));
+ data.extend(Self::compile_tree(&tree.right, strings));
+ }
+ OperatorNode::Of(op_of) => {
+ debug!("Compiling OperatorOf, n = {}, pattern = {}", op_of.n, op_of.pattern);
+ data.write_u8(Self::CONDITION_OPERATOR_OF).unwrap();
+
+ let mut pattern = String::new();
+ let mut para = 0;
+ for c in op_of.pattern.chars() {
+ match c {
+ '$' => pattern.push_str(r"\$"),
+ '*' => pattern.push_str(".*"),
+ ',' => pattern.push_str(")|("),
+ '(' => { pattern.push('('); para += 1; }
+ ')' => {
+ if para == 0 {
+ warn!("Unmatched parenthesis in pattern {}", op_of.pattern);
+ } else {
+ pattern.push(')');
+ para -= 1;
+ }
+ }
+ ' ' => {}
+ other => pattern.push(other),
+ }
+ }
+ let re = Regex::new(&format!("^({})$", pattern)).unwrap();
+ let of_elements: Vec<u8> = strings.iter().enumerate()
+ .filter(|(_, s)| re.is_match(s))
+ .map(|(i, _)| i as u8)
+ .collect();
+
+ let n = match op_of.n.as_str() {
+ "all" => 0,
+ "any" => 1,
+ _ => op_of.n.parse::<u8>().unwrap_or(0),
+ };
+
+ data.write_u8(n).unwrap();
+ data.write_u8(of_elements.len() as u8).unwrap();
+ for e in of_elements {
+ data.write_u8(e).unwrap();
+ }
+ }
+ OperatorNode::Identifier(id) => {
+ debug!("Compiling single identifier {}", id);
+ let mut found = false;
+ for (i, s) in strings.iter().enumerate() {
+ if s == id {
+ data.write_u8(Self::CONDITION_OPERATOR_SINGLE).unwrap();
+ data.write_u8(i as u8).unwrap();
+ found = true;
+ break;
+ }
+ }
+ if !found {
+ warn!("Single identifier {} not found, defaulting to true", id);
+ data.write_u8(Self::CONDITION_OPERATOR_TRUE).unwrap();
+ }
+ }
+ }
+
+ data
+ }
+}