diff options
author | Leonard Kugis <leonard@kug.is> | 2025-04-23 16:44:45 +0000 |
---|---|---|
committer | Leonard Kugis <leonard@kug.is> | 2025-04-23 16:44:45 +0000 |
commit | a23d53f2ae50aec982004581546ea5e51e8a6da3 (patch) | |
tree | 720b0d43fe518093c03ce7a48dd7479b4b17202c | |
parent | 1919efbe0c565e3fc4a74d5a075df0456effaa06 (diff) | |
download | yara-compiler-a23d53f2ae50aec982004581546ea5e51e8a6da3.tar.gz yara-compiler-a23d53f2ae50aec982004581546ea5e51e8a6da3.tar.bz2 yara-compiler-a23d53f2ae50aec982004581546ea5e51e8a6da3.zip |
yara: Refactored entire YAC structure
-rw-r--r-- | yara.py | 555 |
1 files changed, 428 insertions, 127 deletions
@@ -4,9 +4,29 @@ import os import struct import re import logging +from hashlib import sha256 logger = logging.getLogger(__name__) +MODIFIER_INDEX_SIZE_8_BIT = 0b00 +MODIFIER_INDEX_SIZE_16_BIT = 0b01 +MODIFIER_INDEX_SIZE_32_BIT = 0b10 +MODIFIER_INDEX_SIZE_64_BIT = 0b11 + +MAP_MODIFIER_FORMAT = { + MODIFIER_INDEX_SIZE_8_BIT: "B", + MODIFIER_INDEX_SIZE_16_BIT: "H", + MODIFIER_INDEX_SIZE_32_BIT: "I", + MODIFIER_INDEX_SIZE_64_BIT: "Q", +} + +class YaraCompileConfig(object): + def __init__(self, store_identifier_entry, store_identifier_signature, store_index_map_entries, store_index_map_signatures): + self.store_identifier_entry = store_identifier_entry + self.store_identifier_signature = store_identifier_signature + self.store_index_map_entries = store_index_map_entries + self.store_index_map_signatures = store_index_map_signatures + class OperatorTree(object): def __init__(self): self.left = None @@ -21,17 +41,141 @@ class OperatorOf(object): self.n = n self.pattern = pattern -class YaraDatabase(object): +class YaraIndex(object): + + _MODIFIER = 0b00 + __FORMAT_MODIFIER = "<B" + _FORMAT_INDEX = "<B" + + @staticmethod + def from_size(size): + if (size >= 0) and (size < (2 ** 8)): + return YaraIndex8() + elif (size >= (2 ** 8)) and (size < (2 ** 16)): + return YaraIndex16() + elif (size >= (2 ** 16)) and (size < (2 ** 32)): + return YaraIndex32() + elif (size >= (2 ** 32)) and (size < (2 ** 64)): + return YaraIndex64() + return YaraIndex() + + def compile_index(self, index): + return struct.pack(self._FORMAT_INDEX, index) + + def compile_modifier(self): + return struct.pack(self.__FORMAT_MODIFIER, self._MODIFIER) + +class YaraIndex8(YaraIndex): + _MODIFIER = 0b00 + _FORMAT_INDEX = "B" + +class YaraIndex16(YaraIndex): + _MODIFIER = 0b01 + _FORMAT_INDEX = "H" + +class YaraIndex32(YaraIndex): + _MODIFIER = 0b10 + _FORMAT_INDEX = "I" + +class YaraIndex64(YaraIndex): + _MODIFIER = 0b11 + _FORMAT_INDEX = "Q" + +class YaraAddressing(object): + pass + +class YaraAddressingBit(YaraAddressing): + pass + +class YaraAddressingNibble(YaraAddressing): + pass + +class YaraAddressingByte(YaraAddressing): + pass + +class YaraIndexMap(object): + + __FORMAT = "<{size}s" + + def __init__(self, index = YaraIndex(), indices = list()): + self.index = index + self.indices = indices + + def compile(self): + indices_data = bytearray() + for index in self.indices: + indices_data.extend(self.index.compile_index(index)) + fmt = self.__FORMAT.format(size=len(self.indices)) + return struct.pack(fmt, indices_data) + +class StringBlock(object): + + # big endian, modifiers, mask_left, mask_right + __FORMAT = "<BBB" + + _TYPE = 0 # should not occur + + def __init__(self, mask_left = 0xFF, mask_right = 0xFF): + self.mask_left = mask_left + self.mask_right = mask_right + + def __str__(self): + return "{}, mask_left = {}, mask_right = {}".format(super(), self.mask_left, self.mask_right) + + def _compile(self, index): + modifiers = (index.compile_modifier()[0] << 4) | self._TYPE + return struct.pack(self.__FORMAT, modifiers, self.mask_left, self.mask_right) + +class StringBlockText(StringBlock): + + # big endian, super_data, size_text, text + __FORMAT = "<{size_super}s{size_text_data}s{size_text}s" + + _TYPE = 0b0000 + + def __init__(self, text, mask_left = 0xFF, mask_right = 0xFF): + super().__init__(mask_left, mask_right) + self.text = text + + def __str__(self): + return "{}, text = {}".format(super().__str__(), self.text) + + def compile(self): + index = YaraIndex.from_size(len(self.text)) + super_data = super()._compile(index) + size_text_data = index.compile_index(len(self.text)) + fmt = self.__FORMAT.format(size_super=len(super_data), size_text_data=len(size_text_data), size_text=len(self.text)) + logger.debug("{}: fmt = {}, super_data = {}, size_text_data = {}, text = {}".format("StringBlockText", fmt, super_data, size_text_data, self.text)) + return struct.pack(fmt, super_data, size_text_data, self.text) + +class StringBlockRange(StringBlock): + + # big endian, super_data, length_min, length_max + __FORMAT = "<{size_super}s{size_length_min_data}s{size_length_max_data}s" + + _TYPE = 0b0001 + + def __init__(self, length_min, length_max, mask_left = 0xFF, mask_right = 0xFF): + super().__init__(mask_left, mask_right) + self.length_min = length_min + self.length_max = length_max + + def __str__(self): + return "{}, length_min = {}, length_max = {}".format(super().__str__(), self.length_min, self.length_max) - __FORMAT_HEADER = "=3sccI" - __FORMAT_ENTRY = "=c{size_id}sc" - __FORMAT_STRING = "=c{size_id}scH{size_text}scII" - __FORMAT_WILDCARD = "=Ic" - __FORMAT_RANGE = "=II" - __FORMAT_OPERATOR = "=c" - __FORMAT_OPERATOR_OF = "=cc" - __FORMAT_OPERATOR_OF_ELEMENT = "=c" - __FORMAT_OPERATOR_SINGLE = "=c" + def compile(self): + index = YaraIndex.from_size(self.length_max) + super_data = super()._compile(index) + length_min_data = index.compile_index(self.length_min) + length_max_data = index.compile_index(self.length_max) + fmt = self.__FORMAT.format(size_super=len(super_data), size_length_min_data=len(length_min_data), size_length_max_data=len(length_max_data)) + logger.debug("{}: fmt = {}, super_data = {}, length_min_data = {}, length_max_data = {}".format("StringBlockRange", fmt, super_data, length_min_data, length_max_data)) + return struct.pack(fmt, super_data, length_min_data, length_max_data) + +class YaraSignature(object): + + # big endian, modifiers, n_blocks_data, index_map_data, blocks_data + __FORMAT = "<H{size_n_blocks_data}s{size_index_map_data}s{size_blocks_data}s" __STRING_TYPE_STRING = 0 __STRING_TYPE_HEX = 1 @@ -42,6 +186,153 @@ class YaraDatabase(object): __PATTERN_WILDCARD_HIGH = re.compile(r"^\?[0-9A-Fa-f]$") __PATTERN_WILDCARD_LOW = re.compile(r"^[0-9A-Fa-f]\?$") __PATTERN_WILDCARD_BOTH = re.compile(r"^\?\?$") + + @staticmethod + def build_blocks(stringg): + blocks = list() + block = StringBlock() + if stringg["type"] == YaraSignature.__STRING_TYPE_STRING: + block = StringBlockText(s["text"].encode("utf-8")) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + elif stringg["type"] == YaraSignature.__STRING_TYPE_HEX: + for symbol in stringg["text"].strip().split(' '): + logger.debug("Building block for symbol: {}".format(symbol)) + match = re.match(YaraSignature.__PATTERN_RANGE_VARIABLE, symbol) + if match: + block = StringBlockRange(int(match.group(1)) * 8, int(match.group(2)) * 8) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockRange): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.length_max += int(match.group(2)) * 8 + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + match = re.match(YaraSignature.__PATTERN_RANGE_FIXED, symbol) + if match: + block = StringBlockRange(int(match.group(1)) * 8, int(match.group(1)) * 8) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockRange): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.length_max += int(match.group(1)) * 8 + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + if re.match(YaraSignature.__PATTERN_WILDCARD_HIGH, symbol): + block = StringBlockRange(4, 4, 0xF0, 0xF0) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockRange): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.length_max += 4 + block.mask_right = 0xF0 + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + symbol = symbol.replace('?', '0') + block = StringBlockText(bytearray.fromhex(symbol), 0x0F, 0x0F) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + if re.match(YaraSignature.__PATTERN_WILDCARD_LOW, symbol): + symbol = symbol.replace('?', '0') + block = StringBlockText(bytearray.fromhex(symbol), 0x0F, 0x0F) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockText): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.mask_right = 0xF0 + block.text.extend(bytearray.fromhex(symbol)) + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + block = StringBlockRange(4, 4, 0x0F, 0x0F) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + if re.match(YaraSignature.__PATTERN_WILDCARD_BOTH, symbol): + block = StringBlockRange(8, 8, 0xFF, 0xFF) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockRange): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.length_max += 8 + block.mask_right = 0xFF + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + block = StringBlockText(bytearray.fromhex(symbol)) + if len(blocks) > 0: + if isinstance(blocks[-1], StringBlockText): + block = blocks.pop() + logger.debug("Extending block: {}".format(block)) + block.text.extend(bytearray.fromhex(symbol)) + block.mask_right = 0xFF + logger.debug("Extended block: {}".format(block)) + logger.debug("Appending block: {}".format(block)) + blocks.append(block) + continue + elif s["type"] == YaraDatabase.__STRING_TYPE_REGEX: + logger.error("Regex not supported yet!") + logger.error("Unsupported block type: {}".format(s["type"])) + block = StringBlockText(bytearray([0])) + logger.info("Appending stub block: {}".format(block)) + blocks.append(block) + else: + logger.error("Unsupported block type: {}".format(s["type"])) + block = StringBlockText(bytearray([0])) + logger.info("Appending stub block: {}".format(block)) + blocks.append(block) + return blocks + + @staticmethod + def from_dict(dictt): + modifiers_origin = (((1 if dictt["modifiers"]["nocase"] else 0) << 6) | + ((1 if dictt["modifiers"]["ascii"] else 0) << 5) | + ((1 if dictt["modifiers"]["wide"] else 0) << 4) | + ((1 if dictt["modifiers"]["fullword"] else 0) << 3) | + ((1 if dictt["modifiers"]["private"] else 0) << 2) | + ((1 if dictt["modifiers"]["i"] else 0) << 1) | + ((1 if dictt["modifiers"]["s"] else 0) << 0)) + return YaraSignature(dictt["id"], modifiers_origin, YaraSignature.build_blocks(dictt)) + + def __init__(self, identifier, modifiers_origin = 0, blocks = list()): + self.identifier = identifier + self.modifiers_origin = modifiers_origin + self.blocks = blocks + + def compile(self, store_identifier_signature, store_index_map_string_blocks): + blocks_data = bytearray() + indices = list() + for block in self.blocks: + indices.append(len(blocks_data)) + blocks_data.extend(block.compile()) + index_elements = YaraIndex.from_size(len(self.blocks)) + index_data = YaraIndex.from_size(len(blocks_data)) + index_map = YaraIndexMap(index_data, indices) + index_map_data = index_map.compile() + n_blocks_data = index_elements.compile_index(len(self.blocks)) + modifiers = self.modifiers_origin | (index_elements.compile_modifier()[0] << 8) | (index_data.compile_modifier()[0] << 10) | ((1 if store_index_map_string_blocks else 0) << 12) + fmt = self.__FORMAT.format(size_n_blocks_data=len(n_blocks_data), size_index_map_data=(len(index_map_data) if store_index_map_string_blocks else 0), size_blocks_data=len(blocks_data)) + logger.debug("{}: fmt = {}, modifiers = {}, n_blocks_data = {}, index_map_data = {}, blocks_data = {}".format("YaraSignature", fmt, modifiers, n_blocks_data, index_map_data, blocks_data)) + return struct.pack(fmt, modifiers, n_blocks_data, index_map_data, blocks_data) + +class YaraCondition(object): + + # big endian, modifiers, condition_size, condition_data + __FORMAT = "<B{size_size_data}s{size_data}s" + + __FORMAT_OPERATOR = "<c" + __FORMAT_OPERATOR_OF = "<cc" + __FORMAT_OPERATOR_OF_ELEMENT = "<c" + __FORMAT_OPERATOR_SINGLE = "<c" + __FORMAT_CONDITION_SIZE = "<H" + __PATTERN_OF = re.compile(r"((\d+)|(all)|(any))\s+of\s+([\w\_\(\)\$\*\,]+)") __PATTERN_AND = re.compile(r"(.*)\s+and\s+(.*)") __PATTERN_OR = re.compile(r"(.*)\s+or\s+(.*)") @@ -54,30 +345,23 @@ class YaraDatabase(object): __CONDITION_OPERATOR_FALSE = 5 @staticmethod - def parse_file(file): - container = json.load(file) - entries = list() - entries.extend(container["rules"]) - return entries - - @staticmethod def build_tree(condition, parent): node = OperatorTree() node.data = condition logger.debug("Parsing condition = {}".format(condition)) - match = re.findall(YaraDatabase.__PATTERN_OR, condition) + match = re.findall(YaraCondition.__PATTERN_OR, condition) if match: - node.left = YaraDatabase.build_tree(match[0][0], node) - node.right = YaraDatabase.build_tree(match[0][1], node) - node.operator = YaraDatabase.__CONDITION_OPERATOR_OR + node.left = YaraCondition.build_tree(match[0][0], node) + node.right = YaraCondition.build_tree(match[0][1], node) + node.operator = YaraCondition.__CONDITION_OPERATOR_OR return node - match = re.findall(YaraDatabase.__PATTERN_AND, condition) + match = re.findall(YaraCondition.__PATTERN_AND, condition) if match: - node.left = YaraDatabase.build_tree(match[0][0], node) - node.right = YaraDatabase.build_tree(match[0][1], node) - node.operator = YaraDatabase.__CONDITION_OPERATOR_AND + node.left = YaraCondition.build_tree(match[0][0], node) + node.right = YaraCondition.build_tree(match[0][1], node) + node.operator = YaraCondition.__CONDITION_OPERATOR_AND return node - match = re.findall(YaraDatabase.__PATTERN_OF, condition) + match = re.findall(YaraCondition.__PATTERN_OF, condition) if match: logger.debug("Leaf: OperatorOf, match = {}, n = {}, pattern = {}".format(match, match[0][0], match[0][4])) return OperatorOf(parent, match[0][0], match[0][4]) @@ -87,16 +371,13 @@ class YaraDatabase(object): @staticmethod def compile_tree(node, strings): if isinstance(node, OperatorTree): - data_left = YaraDatabase.compile_tree(node.left, strings) - data_right = YaraDatabase.compile_tree(node.right, strings) - logger.debug("Compiling OperatorTree, left = {}, right = {}".format(data_left, data_right)) - data_left += data_right - data_left += struct.pack(YaraDatabase.__FORMAT_OPERATOR, node.operator.to_bytes(1)) - return data_left + data = bytearray(struct.pack(YaraCondition.__FORMAT_OPERATOR, node.operator.to_bytes(1))) + data += YaraCondition.compile_tree(node.left, strings) + data += YaraCondition.compile_tree(node.right, strings) + return data elif isinstance(node, OperatorOf): logger.debug("Compiling OperatorOf, n = {}, pattern = {}".format(node.n, node.pattern)) - data = bytearray() - data += struct.pack(YaraDatabase.__FORMAT_OPERATOR, YaraDatabase.__CONDITION_OPERATOR_OF.to_bytes(1)) + data = bytearray(struct.pack(YaraCondition.__FORMAT_OPERATOR, YaraCondition.__CONDITION_OPERATOR_OF.to_bytes(1))) of_elements = list() pattern = str() if node.pattern.strip() == "them": @@ -135,9 +416,9 @@ class YaraDatabase(object): n = 0 if n == "any": n = 1 - data += struct.pack(YaraDatabase.__FORMAT_OPERATOR_OF, int(n).to_bytes(1), len(of_elements).to_bytes(1)) + data += struct.pack(YaraCondition.__FORMAT_OPERATOR_OF, int(n).to_bytes(1), len(of_elements).to_bytes(1)) for e in of_elements: - data += struct.pack(YaraDatabase.__FORMAT_OPERATOR_OF_ELEMENT, e.to_bytes(1)) + data += struct.pack(YaraCondition.__FORMAT_OPERATOR_OF_ELEMENT, e.to_bytes(1)) return data else: logger.debug("Compiling single identifier {}".format(node)) @@ -150,97 +431,117 @@ class YaraDatabase(object): c += 1 else: logger.warning("Single identifier {} not found, defaulting to true".format(node)) - return bytearray(struct.pack(YaraDatabase.__FORMAT_OPERATOR, YaraDatabase.__CONDITION_OPERATOR_TRUE.to_bytes(1))) + return bytearray(struct.pack(YaraCondition.__FORMAT_OPERATOR, YaraDatabase.__CONDITION_OPERATOR_TRUE.to_bytes(1))) + + @staticmethod + def from_string(signature_ids, stringg): + return YaraCondition(signature_ids, YaraCondition.build_tree(stringg, None)) + + def __init__(self, signature_ids = list(), node = OperatorTree()): + self.signature_ids = signature_ids + self.node = node + + def compile(self): + data = self.compile_tree(self.node, self.signature_ids) + index = YaraIndex.from_size(len(data)) + modifiers = index.compile_modifier()[0] + size_data = index.compile_index(len(data)) + fmt = self.__FORMAT.format(size_size_data=len(size_data), size_data=len(data)) + logger.debug("{}: fmt = {}, modifiers = {}, size_data = {}, data = {}".format("YaraCondition", fmt, modifiers, size_data, data)) + return struct.pack(fmt, modifiers, size_data, data) + +class YaraEntry(object): + + # big endian, modifiers, identifier_size, identifier, signatures_size, index_map, signatures, condition + __FORMAT = "<B{size_identifier_size_data}s{size_identifier}s{size_n_signatures_data}s{size_index_map_data}s{size_signatures_data}s{size_condition_data}s" + + @staticmethod + def from_dict(dictt): + return YaraEntry(dictt["identifier"], [YaraSignature.from_dict(s) for s in dictt["strings"]], YaraCondition.from_string([s["id"] for s in dictt["strings"]], dictt["condition"])) + + def __init__(self, identifier = "Entry", signatures = list(), condition = YaraCondition()): + self.identifier = identifier + self.signatures = signatures + self.condition = condition + + def compile(self, store_identifier_entry = False, store_identifier_signature = False, store_index_map_signatures = False, store_index_map_string_blocks = False): + signatures_data = bytearray() + indices = list() + for signature in self.signatures: + indices.append(len(signatures_data)) + signatures_data.extend(signature.compile(store_identifier_signature, store_index_map_string_blocks)) + index_identifier = YaraIndex.from_size(len(self.identifier) if store_identifier_entry else 0) + index_signatures = YaraIndex.from_size(len(self.signatures)) + index_signatures_data = YaraIndex.from_size(len(signatures_data)) + index_map = YaraIndexMap(index_signatures_data, indices) + index_map_data = index_map.compile() + modifiers = (index_identifier.compile_modifier()[0] << 0) | (index_signatures.compile_modifier()[0] << 2) | (index_signatures_data.compile_modifier()[0] << 4) | ((1 if store_identifier_entry else 0) << 6) | ((1 if store_index_map_signatures else 0) << 7) + identifier_size_data = index_identifier.compile_index(len(self.identifier) if store_identifier_entry else 0) + n_signatures_data = index_signatures.compile_index(len(self.signatures)) + condition_data = self.condition.compile() + identifier_data = self.identifier.encode("utf-8") + fmt = self.__FORMAT.format( + size_identifier_size_data=(len(identifier_size_data) if store_identifier_entry else 0), + size_identifier=(len(self.identifier) if store_identifier_entry else 0), + size_n_signatures_data=len(n_signatures_data), + size_index_map_data=(len(index_map_data) if store_index_map_signatures else 0), + size_signatures_data=len(signatures_data), + size_condition_data=len(condition_data)) + logger.debug("{}: fmt = {}, modifiers = {}, identifier_size_data = {}, identifier = {}, n_signatures_data = {}, index_map_data = {}, signatures_data = {}, condition_data = {}".format("YaraEntry", fmt, modifiers, identifier_size_data, identifier_data, n_signatures_data, index_map_data, signatures_data, condition_data)) + return struct.pack(fmt, modifiers, identifier_size_data, identifier_data, n_signatures_data, index_map_data, signatures_data, condition_data) + +class YaraDatabase(object): + + __MAGIC = "YAC" + __VERSION_FORMAT = 0x0100 + + # big endian, magic[0], magic[1], magic[2], version_format, version_database, modifiers, hash, size_entries, index_map_data, entries_data + __FORMAT_HEADER = "<BBBHHB" + __FORMAT_BODY = "<{size_n_entries_data}s{size_index_map_data}s{size_entries_data}s" + __FORMAT = "<{size_header}s{size_hash}s{size_body}s" + + @staticmethod + def from_json(filename, version): + db = YaraDatabase(version) + db.add_file(filename) + return db + + def __init__(self, version = 0x0000, entries = list()): + self.__version = version + self.__entries = entries def add_file(self, filename): - f = open(filename, 'r') - self.__entries.extend(YaraDatabase.parse_file(f)) - f.close() - - def write_file(self, filename): - f = open(filename, 'wb') - header = struct.pack(self.__FORMAT_HEADER, "YAC".encode("utf-8"), b'\x00', b'\x00', len(self.__entries)) - logger.debug("Header data = {}".format(header)) - f.write(header) - for entry in self.__entries: - logger.debug("Compiling entry {}".format(entry["identifier"])) - entry_data = bytearray(struct.pack(self.__FORMAT_ENTRY.format(size_id=len(entry["identifier"])), len(entry["identifier"]).to_bytes(1), entry["identifier"].encode("utf-8"), len(entry["strings"]).to_bytes(1))) - logger.debug("Entry data = {}".format(entry_data)) - string_data = bytearray() - for s in entry["strings"]: - logger.debug("Compiling string {}".format(s["id"])) - # first parse text - text = bytearray() - wildcards = list() - ranges = list() - if s["type"] == YaraDatabase.__STRING_TYPE_STRING: - logger.debug("String type string, text = {}".format(s["text"])) - text += s["text"].encode("utf-8") - elif s["type"] == YaraDatabase.__STRING_TYPE_HEX: - bn = 0 - for block in s["text"].strip().split(' '): - logger.debug("Compiling block = {}".format(block)) - match = re.match(self.__PATTERN_RANGE_VARIABLE, block) - if match: - for i in range(int(match.group(1)), int(match.group(2)), 1): - logger.debug("Appending range = {}".format((bn, i))) - ranges.append((bn, i)) - bn += 1 - continue - match = re.match(self.__PATTERN_RANGE_FIXED, block) - if match: - logger.debug("Appending range = {}".format((bn, int(match.group(1))))) - ranges.append((bn, int(match.group(1)))) - bn += 1 - continue - if re.match(self.__PATTERN_WILDCARD_HIGH, block): - wildcards.append((bn, 1)) - block = block.replace('?', '0') - text += bytearray.fromhex(block) - bn += 1 - continue - if re.match(self.__PATTERN_WILDCARD_LOW, block): - wildcards.append((bn, 0)) - block = block.replace('?', '0') - text += bytearray.fromhex(block) - bn += 1 - continue - if re.match(self.__PATTERN_WILDCARD_BOTH, block): - wildcards.append((bn, 0)) - wildcards.append((bn, 1)) - block = block.replace('?', '0') - text += bytearray.fromhex(block) - bn += 1 - continue - text += bytearray.fromhex(block) - bn += 1 - continue - elif s["type"] == YaraDatabase.__STRING_TYPE_REGEX: - text += s["text"].encode("utf-8") - # parse modifiers - modifiers = (((1 if s["modifiers"]["nocase"] else 0) << 6) | - ((1 if s["modifiers"]["ascii"] else 0) << 5) | - ((1 if s["modifiers"]["wide"] else 0) << 4) | - ((1 if s["modifiers"]["fullword"] else 0) << 3) | - ((1 if s["modifiers"]["private"] else 0) << 2) | - ((1 if s["modifiers"]["i"] else 0) << 1) | - ((1 if s["modifiers"]["s"] else 0) << 0)) - string_data += struct.pack(self.__FORMAT_STRING.format(size_id=len(s["id"]), size_text=len(text)), len(s["id"]).to_bytes(1), s["id"].encode("utf-8"), s["type"].to_bytes(1), len(text), text, modifiers.to_bytes(1), len(wildcards), len(ranges)) - for wildcard in wildcards: - string_data += struct.pack(self.__FORMAT_WILDCARD, wildcard[0], wildcard[1].to_bytes(1)) - for r in ranges: - string_data += struct.pack(self.__FORMAT_RANGE, r[0], r[1]) - logger.debug("Building conditional operator tree for entry = {}, condition = {}".format(entry["identifier"], entry["condition"])) - node = YaraDatabase.build_tree(entry["condition"], None) - logger.debug("Compiling conditional operator tree for entry = {}, condition = {}".format(entry["identifier"], entry["condition"])) - condition_data = YaraDatabase.compile_tree(node, [s["id"] for s in entry["strings"]]) - logger.debug("Compilation done for entry {}".format(entry["identifier"])) - f.write(entry_data) - f.write(string_data) - f.write(condition_data) - logger.debug("Compilation done for file {}".format(filename)) - f.close() + file = open(filename, 'r') + container = json.load(file) + entries_dicts = list() + entries_dicts.extend(container["rules"]) + for entry_dict in entries_dicts: + self.__entries.append(YaraEntry.from_dict(entry_dict)) + file.close() - def __init__(self): - self.__entries = list() - pass
\ No newline at end of file + def compile(self, store_identifier_entry = False, store_identifier_signature = False, store_index_map_entries = False, store_index_map_signatures = False, store_index_map_string_blocks = False, store_hash = False, hash_salt = bytearray()): + magic = self.__MAGIC.encode("utf-8") + entries_data = bytearray() + indices = list() + for entry in self.__entries: + indices.append(len(entries_data)) + entries_data.extend(entry.compile(store_identifier_entry, store_identifier_signature, store_index_map_signatures, store_index_map_string_blocks)) + index_entries = YaraIndex.from_size(len(self.__entries)) + index_entries_data = YaraIndex.from_size(len(entries_data)) + n_entries_data = index_entries.compile_index(len(self.__entries)) + index_map_data = YaraIndexMap(index_entries_data, indices).compile() + modifiers = (index_entries.compile_modifier()[0] << 0) | (index_entries_data.compile_modifier()[0] << 2) | ((1 if store_index_map_entries else 0) << 4) | ((1 if store_hash else 0) << 5) + fmt = self.__FORMAT_HEADER + logger.debug("{}: Header, fmt = {}, magic[0] = {}, magic[1] = {}, magic[2] = {}, version_format = {}, version_database = {}, modifiers = {}".format("YaraDatabase", fmt, magic[0], magic[1], magic[2], self.__VERSION_FORMAT, self.__version, modifiers)) + header_data = struct.pack(fmt, magic[0], magic[1], magic[2], self.__VERSION_FORMAT, self.__version, modifiers) + fmt = self.__FORMAT_BODY.format(size_n_entries_data=len(n_entries_data), size_index_map_data=len(index_map_data), size_entries_data=len(entries_data)) + logger.debug("{}: Body, fmt = {}, n_entries_data = {}, index_map_data = {}, entries_data = {}".format("YaraDatabase", fmt, n_entries_data, index_map_data, entries_data)) + body_data = struct.pack(fmt, n_entries_data, index_map_data, entries_data) + h = sha256() + h.update(header_data) + h.update(body_data) + h.update(hash_salt) + digest = h.digest() + fmt = self.__FORMAT.format(size_header=len(header_data), size_hash=(len(digest) if store_hash else 0), size_body=len(body_data)) + logger.debug("{}: fmt = {}, header_data = {}, digest = {}, body_data = {}".format("YaraDatabase", fmt, header_data, digest, body_data)) + return struct.pack(fmt, header_data, digest, body_data)
\ No newline at end of file |