From a23d53f2ae50aec982004581546ea5e51e8a6da3 Mon Sep 17 00:00:00 2001 From: Leonard Kugis Date: Wed, 23 Apr 2025 18:44:45 +0200 Subject: yara: Refactored entire YAC structure --- yara.py | 555 +++++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 428 insertions(+), 127 deletions(-) (limited to 'yara.py') diff --git a/yara.py b/yara.py index 9252c68..1ff6a36 100644 --- a/yara.py +++ b/yara.py @@ -4,9 +4,29 @@ import os import struct import re import logging +from hashlib import sha256 logger = logging.getLogger(__name__) +MODIFIER_INDEX_SIZE_8_BIT = 0b00 +MODIFIER_INDEX_SIZE_16_BIT = 0b01 +MODIFIER_INDEX_SIZE_32_BIT = 0b10 +MODIFIER_INDEX_SIZE_64_BIT = 0b11 + +MAP_MODIFIER_FORMAT = { + MODIFIER_INDEX_SIZE_8_BIT: "B", + MODIFIER_INDEX_SIZE_16_BIT: "H", + MODIFIER_INDEX_SIZE_32_BIT: "I", + MODIFIER_INDEX_SIZE_64_BIT: "Q", +} + +class YaraCompileConfig(object): + def __init__(self, store_identifier_entry, store_identifier_signature, store_index_map_entries, store_index_map_signatures): + self.store_identifier_entry = store_identifier_entry + self.store_identifier_signature = store_identifier_signature + self.store_index_map_entries = store_index_map_entries + self.store_index_map_signatures = store_index_map_signatures + class OperatorTree(object): def __init__(self): self.left = None @@ -21,17 +41,141 @@ class OperatorOf(object): self.n = n self.pattern = pattern -class YaraDatabase(object): +class YaraIndex(object): + + _MODIFIER = 0b00 + __FORMAT_MODIFIER = "= 0) and (size < (2 ** 8)): + return YaraIndex8() + elif (size >= (2 ** 8)) and (size < (2 ** 16)): + return YaraIndex16() + elif (size >= (2 ** 16)) and (size < (2 ** 32)): + return YaraIndex32() + elif (size >= (2 ** 32)) and (size < (2 ** 64)): + return YaraIndex64() + return YaraIndex() + + def compile_index(self, index): + return struct.pack(self._FORMAT_INDEX, index) + + def compile_modifier(self): + return struct.pack(self.__FORMAT_MODIFIER, self._MODIFIER) + +class YaraIndex8(YaraIndex): + _MODIFIER = 0b00 + _FORMAT_INDEX = "B" + +class YaraIndex16(YaraIndex): + _MODIFIER = 0b01 + _FORMAT_INDEX = "H" + +class YaraIndex32(YaraIndex): + _MODIFIER = 0b10 + _FORMAT_INDEX = "I" + +class YaraIndex64(YaraIndex): + _MODIFIER = 0b11 + _FORMAT_INDEX = "Q" + +class YaraAddressing(object): + pass + +class YaraAddressingBit(YaraAddressing): + pass + +class YaraAddressingNibble(YaraAddressing): + pass + +class YaraAddressingByte(YaraAddressing): + pass + +class YaraIndexMap(object): + + __FORMAT = "<{size}s" + + def __init__(self, index = YaraIndex(), indices = list()): + self.index = index + self.indices = indices + + def compile(self): + indices_data = bytearray() + for index in self.indices: + indices_data.extend(self.index.compile_index(index)) + fmt = self.__FORMAT.format(size=len(self.indices)) + return struct.pack(fmt, indices_data) + +class StringBlock(object): + + # big endian, modifiers, mask_left, mask_right + __FORMAT = " 0: + if isinstance(blocks[-1], StringBlockRange): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.length_max += int(match.group(2)) * 8 + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + match = re.match(YaraSignature.__PATTERN_RANGE_FIXED, symbol) + if match: + block = StringBlockRange(int(match.group(1)) * 8, int(match.group(1)) * 8) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockRange): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.length_max += int(match.group(1)) * 8 + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + if re.match(YaraSignature.__PATTERN_WILDCARD_HIGH, symbol): + block = StringBlockRange(4, 4, 0xF0, 0xF0) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockRange): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.length_max += 4 + block.mask_right = 0xF0 + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + symbol = symbol.replace('?', '0') + block = StringBlockText(bytearray.fromhex(symbol), 0x0F, 0x0F) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + if re.match(YaraSignature.__PATTERN_WILDCARD_LOW, symbol): + symbol = symbol.replace('?', '0') + block = StringBlockText(bytearray.fromhex(symbol), 0x0F, 0x0F) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockText): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.mask_right = 0xF0 + block.text.extend(bytearray.fromhex(symbol)) + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + block = StringBlockRange(4, 4, 0x0F, 0x0F) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + if re.match(YaraSignature.__PATTERN_WILDCARD_BOTH, symbol): + block = StringBlockRange(8, 8, 0xFF, 0xFF) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockRange): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.length_max += 8 + block.mask_right = 0xFF + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + block = StringBlockText(bytearray.fromhex(symbol)) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockText): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.text.extend(bytearray.fromhex(symbol)) + block.mask_right = 0xFF + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + elif s["type"] == YaraDatabase.__STRING_TYPE_REGEX: + logger.error("Regex not supported yet!") + logger.error("Unsupported block type: {}".format(s["type"])) + block = StringBlockText(bytearray([0])) + logger.info("Appending stub block: {}".format(block)) + blocks.append(block) + else: + logger.error("Unsupported block type: {}".format(s["type"])) + block = StringBlockText(bytearray([0])) + logger.info("Appending stub block: {}".format(block)) + blocks.append(block) + return blocks + + @staticmethod + def from_dict(dictt): + modifiers_origin = (((1 if dictt["modifiers"]["nocase"] else 0) << 6) | + ((1 if dictt["modifiers"]["ascii"] else 0) << 5) | + ((1 if dictt["modifiers"]["wide"] else 0) << 4) | + ((1 if dictt["modifiers"]["fullword"] else 0) << 3) | + ((1 if dictt["modifiers"]["private"] else 0) << 2) | + ((1 if dictt["modifiers"]["i"] else 0) << 1) | + ((1 if dictt["modifiers"]["s"] else 0) << 0)) + return YaraSignature(dictt["id"], modifiers_origin, YaraSignature.build_blocks(dictt)) + + def __init__(self, identifier, modifiers_origin = 0, blocks = list()): + self.identifier = identifier + self.modifiers_origin = modifiers_origin + self.blocks = blocks + + def compile(self, store_identifier_signature, store_index_map_string_blocks): + blocks_data = bytearray() + indices = list() + for block in self.blocks: + indices.append(len(blocks_data)) + blocks_data.extend(block.compile()) + index_elements = YaraIndex.from_size(len(self.blocks)) + index_data = YaraIndex.from_size(len(blocks_data)) + index_map = YaraIndexMap(index_data, indices) + index_map_data = index_map.compile() + n_blocks_data = index_elements.compile_index(len(self.blocks)) + modifiers = self.modifiers_origin | (index_elements.compile_modifier()[0] << 8) | (index_data.compile_modifier()[0] << 10) | ((1 if store_index_map_string_blocks else 0) << 12) + fmt = self.__FORMAT.format(size_n_blocks_data=len(n_blocks_data), size_index_map_data=(len(index_map_data) if store_index_map_string_blocks else 0), size_blocks_data=len(blocks_data)) + logger.debug("{}: fmt = {}, modifiers = {}, n_blocks_data = {}, index_map_data = {}, blocks_data = {}".format("YaraSignature", fmt, modifiers, n_blocks_data, index_map_data, blocks_data)) + return struct.pack(fmt, modifiers, n_blocks_data, index_map_data, blocks_data) + +class YaraCondition(object): + + # big endian, modifiers, condition_size, condition_data + __FORMAT = "