aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLeonard Kugis <leonard@kug.is>2025-04-23 16:44:45 +0000
committerLeonard Kugis <leonard@kug.is>2025-04-23 16:44:45 +0000
commita23d53f2ae50aec982004581546ea5e51e8a6da3 (patch)
tree720b0d43fe518093c03ce7a48dd7479b4b17202c
parent1919efbe0c565e3fc4a74d5a075df0456effaa06 (diff)
downloadyara-compiler-a23d53f2ae50aec982004581546ea5e51e8a6da3.tar.gz
yara-compiler-a23d53f2ae50aec982004581546ea5e51e8a6da3.tar.bz2
yara-compiler-a23d53f2ae50aec982004581546ea5e51e8a6da3.zip
yara: Refactored entire YAC structure
-rw-r--r--yara.py555
1 files changed, 428 insertions, 127 deletions
diff --git a/yara.py b/yara.py
index 9252c68..1ff6a36 100644
--- a/yara.py
+++ b/yara.py
@@ -4,9 +4,29 @@ import os
import struct
import re
import logging
+from hashlib import sha256
logger = logging.getLogger(__name__)
+MODIFIER_INDEX_SIZE_8_BIT = 0b00
+MODIFIER_INDEX_SIZE_16_BIT = 0b01
+MODIFIER_INDEX_SIZE_32_BIT = 0b10
+MODIFIER_INDEX_SIZE_64_BIT = 0b11
+
+MAP_MODIFIER_FORMAT = {
+ MODIFIER_INDEX_SIZE_8_BIT: "B",
+ MODIFIER_INDEX_SIZE_16_BIT: "H",
+ MODIFIER_INDEX_SIZE_32_BIT: "I",
+ MODIFIER_INDEX_SIZE_64_BIT: "Q",
+}
+
+class YaraCompileConfig(object):
+ def __init__(self, store_identifier_entry, store_identifier_signature, store_index_map_entries, store_index_map_signatures):
+ self.store_identifier_entry = store_identifier_entry
+ self.store_identifier_signature = store_identifier_signature
+ self.store_index_map_entries = store_index_map_entries
+ self.store_index_map_signatures = store_index_map_signatures
+
class OperatorTree(object):
def __init__(self):
self.left = None
@@ -21,17 +41,141 @@ class OperatorOf(object):
self.n = n
self.pattern = pattern
-class YaraDatabase(object):
+class YaraIndex(object):
+
+ _MODIFIER = 0b00
+ __FORMAT_MODIFIER = "<B"
+ _FORMAT_INDEX = "<B"
+
+ @staticmethod
+ def from_size(size):
+ if (size >= 0) and (size < (2 ** 8)):
+ return YaraIndex8()
+ elif (size >= (2 ** 8)) and (size < (2 ** 16)):
+ return YaraIndex16()
+ elif (size >= (2 ** 16)) and (size < (2 ** 32)):
+ return YaraIndex32()
+ elif (size >= (2 ** 32)) and (size < (2 ** 64)):
+ return YaraIndex64()
+ return YaraIndex()
+
+ def compile_index(self, index):
+ return struct.pack(self._FORMAT_INDEX, index)
+
+ def compile_modifier(self):
+ return struct.pack(self.__FORMAT_MODIFIER, self._MODIFIER)
+
+class YaraIndex8(YaraIndex):
+ _MODIFIER = 0b00
+ _FORMAT_INDEX = "B"
+
+class YaraIndex16(YaraIndex):
+ _MODIFIER = 0b01
+ _FORMAT_INDEX = "H"
+
+class YaraIndex32(YaraIndex):
+ _MODIFIER = 0b10
+ _FORMAT_INDEX = "I"
+
+class YaraIndex64(YaraIndex):
+ _MODIFIER = 0b11
+ _FORMAT_INDEX = "Q"
+
+class YaraAddressing(object):
+ pass
+
+class YaraAddressingBit(YaraAddressing):
+ pass
+
+class YaraAddressingNibble(YaraAddressing):
+ pass
+
+class YaraAddressingByte(YaraAddressing):
+ pass
+
+class YaraIndexMap(object):
+
+ __FORMAT = "<{size}s"
+
+ def __init__(self, index = YaraIndex(), indices = list()):
+ self.index = index
+ self.indices = indices
+
+ def compile(self):
+ indices_data = bytearray()
+ for index in self.indices:
+ indices_data.extend(self.index.compile_index(index))
+ fmt = self.__FORMAT.format(size=len(self.indices))
+ return struct.pack(fmt, indices_data)
+
+class StringBlock(object):
+
+ # big endian, modifiers, mask_left, mask_right
+ __FORMAT = "<BBB"
+
+ _TYPE = 0 # should not occur
+
+ def __init__(self, mask_left = 0xFF, mask_right = 0xFF):
+ self.mask_left = mask_left
+ self.mask_right = mask_right
+
+ def __str__(self):
+ return "{}, mask_left = {}, mask_right = {}".format(super(), self.mask_left, self.mask_right)
+
+ def _compile(self, index):
+ modifiers = (index.compile_modifier()[0] << 4) | self._TYPE
+ return struct.pack(self.__FORMAT, modifiers, self.mask_left, self.mask_right)
+
+class StringBlockText(StringBlock):
+
+ # big endian, super_data, size_text, text
+ __FORMAT = "<{size_super}s{size_text_data}s{size_text}s"
+
+ _TYPE = 0b0000
+
+ def __init__(self, text, mask_left = 0xFF, mask_right = 0xFF):
+ super().__init__(mask_left, mask_right)
+ self.text = text
+
+ def __str__(self):
+ return "{}, text = {}".format(super().__str__(), self.text)
+
+ def compile(self):
+ index = YaraIndex.from_size(len(self.text))
+ super_data = super()._compile(index)
+ size_text_data = index.compile_index(len(self.text))
+ fmt = self.__FORMAT.format(size_super=len(super_data), size_text_data=len(size_text_data), size_text=len(self.text))
+ logger.debug("{}: fmt = {}, super_data = {}, size_text_data = {}, text = {}".format("StringBlockText", fmt, super_data, size_text_data, self.text))
+ return struct.pack(fmt, super_data, size_text_data, self.text)
+
+class StringBlockRange(StringBlock):
+
+ # big endian, super_data, length_min, length_max
+ __FORMAT = "<{size_super}s{size_length_min_data}s{size_length_max_data}s"
+
+ _TYPE = 0b0001
+
+ def __init__(self, length_min, length_max, mask_left = 0xFF, mask_right = 0xFF):
+ super().__init__(mask_left, mask_right)
+ self.length_min = length_min
+ self.length_max = length_max
+
+ def __str__(self):
+ return "{}, length_min = {}, length_max = {}".format(super().__str__(), self.length_min, self.length_max)
- __FORMAT_HEADER = "=3sccI"
- __FORMAT_ENTRY = "=c{size_id}sc"
- __FORMAT_STRING = "=c{size_id}scH{size_text}scII"
- __FORMAT_WILDCARD = "=Ic"
- __FORMAT_RANGE = "=II"
- __FORMAT_OPERATOR = "=c"
- __FORMAT_OPERATOR_OF = "=cc"
- __FORMAT_OPERATOR_OF_ELEMENT = "=c"
- __FORMAT_OPERATOR_SINGLE = "=c"
+ def compile(self):
+ index = YaraIndex.from_size(self.length_max)
+ super_data = super()._compile(index)
+ length_min_data = index.compile_index(self.length_min)
+ length_max_data = index.compile_index(self.length_max)
+ fmt = self.__FORMAT.format(size_super=len(super_data), size_length_min_data=len(length_min_data), size_length_max_data=len(length_max_data))
+ logger.debug("{}: fmt = {}, super_data = {}, length_min_data = {}, length_max_data = {}".format("StringBlockRange", fmt, super_data, length_min_data, length_max_data))
+ return struct.pack(fmt, super_data, length_min_data, length_max_data)
+
+class YaraSignature(object):
+
+ # big endian, modifiers, n_blocks_data, index_map_data, blocks_data
+ __FORMAT = "<H{size_n_blocks_data}s{size_index_map_data}s{size_blocks_data}s"
__STRING_TYPE_STRING = 0
__STRING_TYPE_HEX = 1
@@ -42,6 +186,153 @@ class YaraDatabase(object):
__PATTERN_WILDCARD_HIGH = re.compile(r"^\?[0-9A-Fa-f]$")
__PATTERN_WILDCARD_LOW = re.compile(r"^[0-9A-Fa-f]\?$")
__PATTERN_WILDCARD_BOTH = re.compile(r"^\?\?$")
+
+ @staticmethod
+ def build_blocks(stringg):
+ blocks = list()
+ block = StringBlock()
+ if stringg["type"] == YaraSignature.__STRING_TYPE_STRING:
+ block = StringBlockText(s["text"].encode("utf-8"))
+ logger.debug("Appending block: {}".format(block))
+ blocks.append(block)
+ elif stringg["type"] == YaraSignature.__STRING_TYPE_HEX:
+ for symbol in stringg["text"].strip().split(' '):
+ logger.debug("Building block for symbol: {}".format(symbol))
+ match = re.match(YaraSignature.__PATTERN_RANGE_VARIABLE, symbol)
+ if match:
+ block = StringBlockRange(int(match.group(1)) * 8, int(match.group(2)) * 8)
+ if len(blocks) > 0:
+ if isinstance(blocks[-1], StringBlockRange):
+ block = blocks.pop()
+ logger.debug("Extending block: {}".format(block))
+ block.length_max += int(match.group(2)) * 8
+ logger.debug("Extended block: {}".format(block))
+ logger.debug("Appending block: {}".format(block))
+ blocks.append(block)
+ continue
+ match = re.match(YaraSignature.__PATTERN_RANGE_FIXED, symbol)
+ if match:
+ block = StringBlockRange(int(match.group(1)) * 8, int(match.group(1)) * 8)
+ if len(blocks) > 0:
+ if isinstance(blocks[-1], StringBlockRange):
+ block = blocks.pop()
+ logger.debug("Extending block: {}".format(block))
+ block.length_max += int(match.group(1)) * 8
+ logger.debug("Extended block: {}".format(block))
+ logger.debug("Appending block: {}".format(block))
+ blocks.append(block)
+ continue
+ if re.match(YaraSignature.__PATTERN_WILDCARD_HIGH, symbol):
+ block = StringBlockRange(4, 4, 0xF0, 0xF0)
+ if len(blocks) > 0:
+ if isinstance(blocks[-1], StringBlockRange):
+ block = blocks.pop()
+ logger.debug("Extending block: {}".format(block))
+ block.length_max += 4
+ block.mask_right = 0xF0
+ logger.debug("Extended block: {}".format(block))
+ logger.debug("Appending block: {}".format(block))
+ blocks.append(block)
+ symbol = symbol.replace('?', '0')
+ block = StringBlockText(bytearray.fromhex(symbol), 0x0F, 0x0F)
+ logger.debug("Appending block: {}".format(block))
+ blocks.append(block)
+ continue
+ if re.match(YaraSignature.__PATTERN_WILDCARD_LOW, symbol):
+ symbol = symbol.replace('?', '0')
+ block = StringBlockText(bytearray.fromhex(symbol), 0x0F, 0x0F)
+ if len(blocks) > 0:
+ if isinstance(blocks[-1], StringBlockText):
+ block = blocks.pop()
+ logger.debug("Extending block: {}".format(block))
+ block.mask_right = 0xF0
+ block.text.extend(bytearray.fromhex(symbol))
+ logger.debug("Extended block: {}".format(block))
+ logger.debug("Appending block: {}".format(block))
+ blocks.append(block)
+ block = StringBlockRange(4, 4, 0x0F, 0x0F)
+ logger.debug("Appending block: {}".format(block))
+ blocks.append(block)
+ continue
+ if re.match(YaraSignature.__PATTERN_WILDCARD_BOTH, symbol):
+ block = StringBlockRange(8, 8, 0xFF, 0xFF)
+ if len(blocks) > 0:
+ if isinstance(blocks[-1], StringBlockRange):
+ block = blocks.pop()
+ logger.debug("Extending block: {}".format(block))
+ block.length_max += 8
+ block.mask_right = 0xFF
+ logger.debug("Extended block: {}".format(block))
+ logger.debug("Appending block: {}".format(block))
+ blocks.append(block)
+ continue
+ block = StringBlockText(bytearray.fromhex(symbol))
+ if len(blocks) > 0:
+ if isinstance(blocks[-1], StringBlockText):
+ block = blocks.pop()
+ logger.debug("Extending block: {}".format(block))
+ block.text.extend(bytearray.fromhex(symbol))
+ block.mask_right = 0xFF
+ logger.debug("Extended block: {}".format(block))
+ logger.debug("Appending block: {}".format(block))
+ blocks.append(block)
+ continue
+ elif s["type"] == YaraDatabase.__STRING_TYPE_REGEX:
+ logger.error("Regex not supported yet!")
+ logger.error("Unsupported block type: {}".format(s["type"]))
+ block = StringBlockText(bytearray([0]))
+ logger.info("Appending stub block: {}".format(block))
+ blocks.append(block)
+ else:
+ logger.error("Unsupported block type: {}".format(s["type"]))
+ block = StringBlockText(bytearray([0]))
+ logger.info("Appending stub block: {}".format(block))
+ blocks.append(block)
+ return blocks
+
+ @staticmethod
+ def from_dict(dictt):
+ modifiers_origin = (((1 if dictt["modifiers"]["nocase"] else 0) << 6) |
+ ((1 if dictt["modifiers"]["ascii"] else 0) << 5) |
+ ((1 if dictt["modifiers"]["wide"] else 0) << 4) |
+ ((1 if dictt["modifiers"]["fullword"] else 0) << 3) |
+ ((1 if dictt["modifiers"]["private"] else 0) << 2) |
+ ((1 if dictt["modifiers"]["i"] else 0) << 1) |
+ ((1 if dictt["modifiers"]["s"] else 0) << 0))
+ return YaraSignature(dictt["id"], modifiers_origin, YaraSignature.build_blocks(dictt))
+
+ def __init__(self, identifier, modifiers_origin = 0, blocks = list()):
+ self.identifier = identifier
+ self.modifiers_origin = modifiers_origin
+ self.blocks = blocks
+
+ def compile(self, store_identifier_signature, store_index_map_string_blocks):
+ blocks_data = bytearray()
+ indices = list()
+ for block in self.blocks:
+ indices.append(len(blocks_data))
+ blocks_data.extend(block.compile())
+ index_elements = YaraIndex.from_size(len(self.blocks))
+ index_data = YaraIndex.from_size(len(blocks_data))
+ index_map = YaraIndexMap(index_data, indices)
+ index_map_data = index_map.compile()
+ n_blocks_data = index_elements.compile_index(len(self.blocks))
+ modifiers = self.modifiers_origin | (index_elements.compile_modifier()[0] << 8) | (index_data.compile_modifier()[0] << 10) | ((1 if store_index_map_string_blocks else 0) << 12)
+ fmt = self.__FORMAT.format(size_n_blocks_data=len(n_blocks_data), size_index_map_data=(len(index_map_data) if store_index_map_string_blocks else 0), size_blocks_data=len(blocks_data))
+ logger.debug("{}: fmt = {}, modifiers = {}, n_blocks_data = {}, index_map_data = {}, blocks_data = {}".format("YaraSignature", fmt, modifiers, n_blocks_data, index_map_data, blocks_data))
+ return struct.pack(fmt, modifiers, n_blocks_data, index_map_data, blocks_data)
+
+class YaraCondition(object):
+
+ # big endian, modifiers, condition_size, condition_data
+ __FORMAT = "<B{size_size_data}s{size_data}s"
+
+ __FORMAT_OPERATOR = "<c"
+ __FORMAT_OPERATOR_OF = "<cc"
+ __FORMAT_OPERATOR_OF_ELEMENT = "<c"
+ __FORMAT_OPERATOR_SINGLE = "<c"
+ __FORMAT_CONDITION_SIZE = "<H"
+
__PATTERN_OF = re.compile(r"((\d+)|(all)|(any))\s+of\s+([\w\_\(\)\$\*\,]+)")
__PATTERN_AND = re.compile(r"(.*)\s+and\s+(.*)")
__PATTERN_OR = re.compile(r"(.*)\s+or\s+(.*)")
@@ -54,30 +345,23 @@ class YaraDatabase(object):
__CONDITION_OPERATOR_FALSE = 5
@staticmethod
- def parse_file(file):
- container = json.load(file)
- entries = list()
- entries.extend(container["rules"])
- return entries
-
- @staticmethod
def build_tree(condition, parent):
node = OperatorTree()
node.data = condition
logger.debug("Parsing condition = {}".format(condition))
- match = re.findall(YaraDatabase.__PATTERN_OR, condition)
+ match = re.findall(YaraCondition.__PATTERN_OR, condition)
if match:
- node.left = YaraDatabase.build_tree(match[0][0], node)
- node.right = YaraDatabase.build_tree(match[0][1], node)
- node.operator = YaraDatabase.__CONDITION_OPERATOR_OR
+ node.left = YaraCondition.build_tree(match[0][0], node)
+ node.right = YaraCondition.build_tree(match[0][1], node)
+ node.operator = YaraCondition.__CONDITION_OPERATOR_OR
return node
- match = re.findall(YaraDatabase.__PATTERN_AND, condition)
+ match = re.findall(YaraCondition.__PATTERN_AND, condition)
if match:
- node.left = YaraDatabase.build_tree(match[0][0], node)
- node.right = YaraDatabase.build_tree(match[0][1], node)
- node.operator = YaraDatabase.__CONDITION_OPERATOR_AND
+ node.left = YaraCondition.build_tree(match[0][0], node)
+ node.right = YaraCondition.build_tree(match[0][1], node)
+ node.operator = YaraCondition.__CONDITION_OPERATOR_AND
return node
- match = re.findall(YaraDatabase.__PATTERN_OF, condition)
+ match = re.findall(YaraCondition.__PATTERN_OF, condition)
if match:
logger.debug("Leaf: OperatorOf, match = {}, n = {}, pattern = {}".format(match, match[0][0], match[0][4]))
return OperatorOf(parent, match[0][0], match[0][4])
@@ -87,16 +371,13 @@ class YaraDatabase(object):
@staticmethod
def compile_tree(node, strings):
if isinstance(node, OperatorTree):
- data_left = YaraDatabase.compile_tree(node.left, strings)
- data_right = YaraDatabase.compile_tree(node.right, strings)
- logger.debug("Compiling OperatorTree, left = {}, right = {}".format(data_left, data_right))
- data_left += data_right
- data_left += struct.pack(YaraDatabase.__FORMAT_OPERATOR, node.operator.to_bytes(1))
- return data_left
+ data = bytearray(struct.pack(YaraCondition.__FORMAT_OPERATOR, node.operator.to_bytes(1)))
+ data += YaraCondition.compile_tree(node.left, strings)
+ data += YaraCondition.compile_tree(node.right, strings)
+ return data
elif isinstance(node, OperatorOf):
logger.debug("Compiling OperatorOf, n = {}, pattern = {}".format(node.n, node.pattern))
- data = bytearray()
- data += struct.pack(YaraDatabase.__FORMAT_OPERATOR, YaraDatabase.__CONDITION_OPERATOR_OF.to_bytes(1))
+ data = bytearray(struct.pack(YaraCondition.__FORMAT_OPERATOR, YaraCondition.__CONDITION_OPERATOR_OF.to_bytes(1)))
of_elements = list()
pattern = str()
if node.pattern.strip() == "them":
@@ -135,9 +416,9 @@ class YaraDatabase(object):
n = 0
if n == "any":
n = 1
- data += struct.pack(YaraDatabase.__FORMAT_OPERATOR_OF, int(n).to_bytes(1), len(of_elements).to_bytes(1))
+ data += struct.pack(YaraCondition.__FORMAT_OPERATOR_OF, int(n).to_bytes(1), len(of_elements).to_bytes(1))
for e in of_elements:
- data += struct.pack(YaraDatabase.__FORMAT_OPERATOR_OF_ELEMENT, e.to_bytes(1))
+ data += struct.pack(YaraCondition.__FORMAT_OPERATOR_OF_ELEMENT, e.to_bytes(1))
return data
else:
logger.debug("Compiling single identifier {}".format(node))
@@ -150,97 +431,117 @@ class YaraDatabase(object):
c += 1
else:
logger.warning("Single identifier {} not found, defaulting to true".format(node))
- return bytearray(struct.pack(YaraDatabase.__FORMAT_OPERATOR, YaraDatabase.__CONDITION_OPERATOR_TRUE.to_bytes(1)))
+ return bytearray(struct.pack(YaraCondition.__FORMAT_OPERATOR, YaraDatabase.__CONDITION_OPERATOR_TRUE.to_bytes(1)))
+
+ @staticmethod
+ def from_string(signature_ids, stringg):
+ return YaraCondition(signature_ids, YaraCondition.build_tree(stringg, None))
+
+ def __init__(self, signature_ids = list(), node = OperatorTree()):
+ self.signature_ids = signature_ids
+ self.node = node
+
+ def compile(self):
+ data = self.compile_tree(self.node, self.signature_ids)
+ index = YaraIndex.from_size(len(data))
+ modifiers = index.compile_modifier()[0]
+ size_data = index.compile_index(len(data))
+ fmt = self.__FORMAT.format(size_size_data=len(size_data), size_data=len(data))
+ logger.debug("{}: fmt = {}, modifiers = {}, size_data = {}, data = {}".format("YaraCondition", fmt, modifiers, size_data, data))
+ return struct.pack(fmt, modifiers, size_data, data)
+
+class YaraEntry(object):
+
+ # big endian, modifiers, identifier_size, identifier, signatures_size, index_map, signatures, condition
+ __FORMAT = "<B{size_identifier_size_data}s{size_identifier}s{size_n_signatures_data}s{size_index_map_data}s{size_signatures_data}s{size_condition_data}s"
+
+ @staticmethod
+ def from_dict(dictt):
+ return YaraEntry(dictt["identifier"], [YaraSignature.from_dict(s) for s in dictt["strings"]], YaraCondition.from_string([s["id"] for s in dictt["strings"]], dictt["condition"]))
+
+ def __init__(self, identifier = "Entry", signatures = list(), condition = YaraCondition()):
+ self.identifier = identifier
+ self.signatures = signatures
+ self.condition = condition
+
+ def compile(self, store_identifier_entry = False, store_identifier_signature = False, store_index_map_signatures = False, store_index_map_string_blocks = False):
+ signatures_data = bytearray()
+ indices = list()
+ for signature in self.signatures:
+ indices.append(len(signatures_data))
+ signatures_data.extend(signature.compile(store_identifier_signature, store_index_map_string_blocks))
+ index_identifier = YaraIndex.from_size(len(self.identifier) if store_identifier_entry else 0)
+ index_signatures = YaraIndex.from_size(len(self.signatures))
+ index_signatures_data = YaraIndex.from_size(len(signatures_data))
+ index_map = YaraIndexMap(index_signatures_data, indices)
+ index_map_data = index_map.compile()
+ modifiers = (index_identifier.compile_modifier()[0] << 0) | (index_signatures.compile_modifier()[0] << 2) | (index_signatures_data.compile_modifier()[0] << 4) | ((1 if store_identifier_entry else 0) << 6) | ((1 if store_index_map_signatures else 0) << 7)
+ identifier_size_data = index_identifier.compile_index(len(self.identifier) if store_identifier_entry else 0)
+ n_signatures_data = index_signatures.compile_index(len(self.signatures))
+ condition_data = self.condition.compile()
+ identifier_data = self.identifier.encode("utf-8")
+ fmt = self.__FORMAT.format(
+ size_identifier_size_data=(len(identifier_size_data) if store_identifier_entry else 0),
+ size_identifier=(len(self.identifier) if store_identifier_entry else 0),
+ size_n_signatures_data=len(n_signatures_data),
+ size_index_map_data=(len(index_map_data) if store_index_map_signatures else 0),
+ size_signatures_data=len(signatures_data),
+ size_condition_data=len(condition_data))
+ logger.debug("{}: fmt = {}, modifiers = {}, identifier_size_data = {}, identifier = {}, n_signatures_data = {}, index_map_data = {}, signatures_data = {}, condition_data = {}".format("YaraEntry", fmt, modifiers, identifier_size_data, identifier_data, n_signatures_data, index_map_data, signatures_data, condition_data))
+ return struct.pack(fmt, modifiers, identifier_size_data, identifier_data, n_signatures_data, index_map_data, signatures_data, condition_data)
+
+class YaraDatabase(object):
+
+ __MAGIC = "YAC"
+ __VERSION_FORMAT = 0x0100
+
+ # big endian, magic[0], magic[1], magic[2], version_format, version_database, modifiers, hash, size_entries, index_map_data, entries_data
+ __FORMAT_HEADER = "<BBBHHB"
+ __FORMAT_BODY = "<{size_n_entries_data}s{size_index_map_data}s{size_entries_data}s"
+ __FORMAT = "<{size_header}s{size_hash}s{size_body}s"
+
+ @staticmethod
+ def from_json(filename, version):
+ db = YaraDatabase(version)
+ db.add_file(filename)
+ return db
+
+ def __init__(self, version = 0x0000, entries = list()):
+ self.__version = version
+ self.__entries = entries
def add_file(self, filename):
- f = open(filename, 'r')
- self.__entries.extend(YaraDatabase.parse_file(f))
- f.close()
-
- def write_file(self, filename):
- f = open(filename, 'wb')
- header = struct.pack(self.__FORMAT_HEADER, "YAC".encode("utf-8"), b'\x00', b'\x00', len(self.__entries))
- logger.debug("Header data = {}".format(header))
- f.write(header)
- for entry in self.__entries:
- logger.debug("Compiling entry {}".format(entry["identifier"]))
- entry_data = bytearray(struct.pack(self.__FORMAT_ENTRY.format(size_id=len(entry["identifier"])), len(entry["identifier"]).to_bytes(1), entry["identifier"].encode("utf-8"), len(entry["strings"]).to_bytes(1)))
- logger.debug("Entry data = {}".format(entry_data))
- string_data = bytearray()
- for s in entry["strings"]:
- logger.debug("Compiling string {}".format(s["id"]))
- # first parse text
- text = bytearray()
- wildcards = list()
- ranges = list()
- if s["type"] == YaraDatabase.__STRING_TYPE_STRING:
- logger.debug("String type string, text = {}".format(s["text"]))
- text += s["text"].encode("utf-8")
- elif s["type"] == YaraDatabase.__STRING_TYPE_HEX:
- bn = 0
- for block in s["text"].strip().split(' '):
- logger.debug("Compiling block = {}".format(block))
- match = re.match(self.__PATTERN_RANGE_VARIABLE, block)
- if match:
- for i in range(int(match.group(1)), int(match.group(2)), 1):
- logger.debug("Appending range = {}".format((bn, i)))
- ranges.append((bn, i))
- bn += 1
- continue
- match = re.match(self.__PATTERN_RANGE_FIXED, block)
- if match:
- logger.debug("Appending range = {}".format((bn, int(match.group(1)))))
- ranges.append((bn, int(match.group(1))))
- bn += 1
- continue
- if re.match(self.__PATTERN_WILDCARD_HIGH, block):
- wildcards.append((bn, 1))
- block = block.replace('?', '0')
- text += bytearray.fromhex(block)
- bn += 1
- continue
- if re.match(self.__PATTERN_WILDCARD_LOW, block):
- wildcards.append((bn, 0))
- block = block.replace('?', '0')
- text += bytearray.fromhex(block)
- bn += 1
- continue
- if re.match(self.__PATTERN_WILDCARD_BOTH, block):
- wildcards.append((bn, 0))
- wildcards.append((bn, 1))
- block = block.replace('?', '0')
- text += bytearray.fromhex(block)
- bn += 1
- continue
- text += bytearray.fromhex(block)
- bn += 1
- continue
- elif s["type"] == YaraDatabase.__STRING_TYPE_REGEX:
- text += s["text"].encode("utf-8")
- # parse modifiers
- modifiers = (((1 if s["modifiers"]["nocase"] else 0) << 6) |
- ((1 if s["modifiers"]["ascii"] else 0) << 5) |
- ((1 if s["modifiers"]["wide"] else 0) << 4) |
- ((1 if s["modifiers"]["fullword"] else 0) << 3) |
- ((1 if s["modifiers"]["private"] else 0) << 2) |
- ((1 if s["modifiers"]["i"] else 0) << 1) |
- ((1 if s["modifiers"]["s"] else 0) << 0))
- string_data += struct.pack(self.__FORMAT_STRING.format(size_id=len(s["id"]), size_text=len(text)), len(s["id"]).to_bytes(1), s["id"].encode("utf-8"), s["type"].to_bytes(1), len(text), text, modifiers.to_bytes(1), len(wildcards), len(ranges))
- for wildcard in wildcards:
- string_data += struct.pack(self.__FORMAT_WILDCARD, wildcard[0], wildcard[1].to_bytes(1))
- for r in ranges:
- string_data += struct.pack(self.__FORMAT_RANGE, r[0], r[1])
- logger.debug("Building conditional operator tree for entry = {}, condition = {}".format(entry["identifier"], entry["condition"]))
- node = YaraDatabase.build_tree(entry["condition"], None)
- logger.debug("Compiling conditional operator tree for entry = {}, condition = {}".format(entry["identifier"], entry["condition"]))
- condition_data = YaraDatabase.compile_tree(node, [s["id"] for s in entry["strings"]])
- logger.debug("Compilation done for entry {}".format(entry["identifier"]))
- f.write(entry_data)
- f.write(string_data)
- f.write(condition_data)
- logger.debug("Compilation done for file {}".format(filename))
- f.close()
+ file = open(filename, 'r')
+ container = json.load(file)
+ entries_dicts = list()
+ entries_dicts.extend(container["rules"])
+ for entry_dict in entries_dicts:
+ self.__entries.append(YaraEntry.from_dict(entry_dict))
+ file.close()
- def __init__(self):
- self.__entries = list()
- pass \ No newline at end of file
+ def compile(self, store_identifier_entry = False, store_identifier_signature = False, store_index_map_entries = False, store_index_map_signatures = False, store_index_map_string_blocks = False, store_hash = False, hash_salt = bytearray()):
+ magic = self.__MAGIC.encode("utf-8")
+ entries_data = bytearray()
+ indices = list()
+ for entry in self.__entries:
+ indices.append(len(entries_data))
+ entries_data.extend(entry.compile(store_identifier_entry, store_identifier_signature, store_index_map_signatures, store_index_map_string_blocks))
+ index_entries = YaraIndex.from_size(len(self.__entries))
+ index_entries_data = YaraIndex.from_size(len(entries_data))
+ n_entries_data = index_entries.compile_index(len(self.__entries))
+ index_map_data = YaraIndexMap(index_entries_data, indices).compile()
+ modifiers = (index_entries.compile_modifier()[0] << 0) | (index_entries_data.compile_modifier()[0] << 2) | ((1 if store_index_map_entries else 0) << 4) | ((1 if store_hash else 0) << 5)
+ fmt = self.__FORMAT_HEADER
+ logger.debug("{}: Header, fmt = {}, magic[0] = {}, magic[1] = {}, magic[2] = {}, version_format = {}, version_database = {}, modifiers = {}".format("YaraDatabase", fmt, magic[0], magic[1], magic[2], self.__VERSION_FORMAT, self.__version, modifiers))
+ header_data = struct.pack(fmt, magic[0], magic[1], magic[2], self.__VERSION_FORMAT, self.__version, modifiers)
+ fmt = self.__FORMAT_BODY.format(size_n_entries_data=len(n_entries_data), size_index_map_data=len(index_map_data), size_entries_data=len(entries_data))
+ logger.debug("{}: Body, fmt = {}, n_entries_data = {}, index_map_data = {}, entries_data = {}".format("YaraDatabase", fmt, n_entries_data, index_map_data, entries_data))
+ body_data = struct.pack(fmt, n_entries_data, index_map_data, entries_data)
+ h = sha256()
+ h.update(header_data)
+ h.update(body_data)
+ h.update(hash_salt)
+ digest = h.digest()
+ fmt = self.__FORMAT.format(size_header=len(header_data), size_hash=(len(digest) if store_hash else 0), size_body=len(body_data))
+ logger.debug("{}: fmt = {}, header_data = {}, digest = {}, body_data = {}".format("YaraDatabase", fmt, header_data, digest, body_data))
+ return struct.pack(fmt, header_data, digest, body_data) \ No newline at end of file