From bb830befb5598472cad0f90ee54c4c01d196e3f6 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Fri, 27 Dec 2024 21:38:33 +0100 Subject: [PATCH] Yara improvements (#1212) --- core/database_arango.py | 2 +- core/schemas/indicators/yara.py | 132 ++++++++++++++++-- plugins/feeds/public/artifacts.py | 32 ++--- plugins/feeds/public/signaturebase.py | 36 +---- plugins/feeds/public/yaraforge.py | 45 ++++++ poetry.lock | 29 +++- pyproject.toml | 1 + tests/apiv2/indicators.py | 8 +- tests/feeds.py | 6 + tests/schemas/indicator.py | 36 ----- tests/schemas/yararule.py | 190 ++++++++++++++++++++++++++ 11 files changed, 421 insertions(+), 96 deletions(-) create mode 100644 plugins/feeds/public/yaraforge.py create mode 100644 tests/schemas/yararule.py diff --git a/core/database_arango.py b/core/database_arango.py index e1dd78c8e..ec08dc31d 100644 --- a/core/database_arango.py +++ b/core/database_arango.py @@ -515,7 +515,7 @@ def tag( if not isinstance(tags, (list, set, tuple)): raise ValueError("Tags must be of type list, set or tuple.") - tags = [t.strip() for t in tags if t.strip()] + tags = list({t.strip() for t in tags if t.strip()}) if strict: self.clear_tags() diff --git a/core/schemas/indicators/yara.py b/core/schemas/indicators/yara.py index 958c414c5..0f48a7286 100644 --- a/core/schemas/indicators/yara.py +++ b/core/schemas/indicators/yara.py @@ -1,7 +1,10 @@ -from typing import ClassVar, Literal +from typing import Any, ClassVar, Literal +import plyara +import plyara.exceptions +import plyara.utils import yara -from pydantic import BaseModel, PrivateAttr, field_validator +from pydantic import BaseModel, PrivateAttr, model_validator from core.schemas import indicator @@ -111,16 +114,52 @@ class Yara(indicator.Indicator): _type_filter: ClassVar[str] = "yara" _compiled_pattern: yara.Match | None = PrivateAttr(None) + + name: str = "" # gets overridden during validation type: Literal["yara"] = "yara" + dependencies: list[str] = [] + private: bool = False - @field_validator("pattern") + @model_validator(mode="before") @classmethod - def validate_yara(cls, value) -> str: + def validate_yara(cls, data: Any): + rule = data.get("pattern") + if not rule: + raise ValueError("Yara rule body is required.") try: - yara.compile(source=value, externals=ALLOWED_EXTERNALS) - except yara.SyntaxError as error: - raise ValueError(f"Invalid Yara rule: {error}") - return value + rules = plyara.Plyara().parse_string(rule) + except plyara.exceptions.ParseTypeError as error: + raise ValueError(str(error)) from error + if len(rules) > 1: + raise ValueError("Only one Yara rule is allowed in the rule body.") + if not rules: + raise ValueError("No valid Yara rules found in the rule body.") + parsed_rule = rules[0] + rule_deps = set(plyara.utils.detect_dependencies(parsed_rule)) + data["dependencies"] = rule_deps - ALLOWED_EXTERNALS.keys() + data["name"] = parsed_rule["rule_name"] + data["private"] = "private" in parsed_rule.get("scopes", []) + + return data + + def save(self): + self = super().save() + nodes, relationships, _ = self.neighbors( + link_types=["depends"], direction="outbound", max_hops=1 + ) + + for edge in relationships: + for rel in edge: + if nodes[rel.target].name not in self.dependencies: + rel.delete() + + for dependency in self.dependencies: + dep = Yara.find(name=dependency) + if not dep: + raise ValueError(f"Rule depends on unknown dependency '{dependency}'") + self.link_to(dep, "depends", "Depends on") + + return self @property def compiled_pattern(self): @@ -134,3 +173,80 @@ def match(self, value: str | bytes) -> YaraMatch | None: if result: return YaraMatch(matches=yaramatch.matches) return None + + @classmethod + def import_bulk_rules(cls, bulk_rule_text: str, tags: list[str] | None = None): + """Import bulk rules from a rule body. + + Args: + bulk_rule_text: The text containing the bulk rules. + tags: A list of tags to apply to the imported rules. + """ + if not tags: + tags = [] + + try: + yara.compile(source=bulk_rule_text, externals=ALLOWED_EXTERNALS) + except yara.SyntaxError as error: + raise ValueError(str(error)) from error + + parsed_rules = plyara.Plyara().parse_string(bulk_rule_text) + # all_rule_names = {rule["rule_name"] for rule in parsed_rules} + + for rule in parsed_rules: + raw_rule = plyara.utils.rebuild_yara_rule(rule) + print(f'Processing {rule["rule_name"]}') + yara_object = Yara( + name=rule["rule_name"], + pattern=raw_rule, + diamond=indicator.DiamondModel.capability, + location=rule.get("scan_context", "N/A"), + ).save() + + rule_tags = rule.get("tags", []) + try: + if rule_tags and isinstance(rule_tags, str): + rule_tags = rule_tags.split(",") + except ValueError: + rule_tags = [] + + if tags + rule_tags: + yara_object.tag(tags + rule_tags) + + def rule_with_dependencies( + self, resolved: set[str] | None = None, seen: set[str] | None = None + ) -> str: + """ + Find dependencies in a Yara rule. + + Returns: + A string containing the original rule text with dependencies added. + """ + if resolved is None: + resolved = set() + if seen is None: + seen = set() + + if self.name in seen: + raise ValueError(f"Circular dependency detected: {self.name}") + + seen.add(self.name) + + concatenated_rules = "" + + parsed_rule = plyara.Plyara().parse_string(self.pattern)[0] + dependencies = plyara.utils.detect_dependencies(parsed_rule) + + for dependency in dependencies: + dep_rule = Yara.find(name=dependency) + if not dep_rule: + raise ValueError(f"Rule depends on unknown dependency '{dependency}'") + if dep_rule.name not in resolved: + concatenated_rules += dep_rule.rule_with_dependencies(resolved, seen) + + if self.name not in resolved: + concatenated_rules += self.pattern + "\n\n" + resolved.add(self.name) + + seen.remove(self.name) + return concatenated_rules diff --git a/plugins/feeds/public/artifacts.py b/plugins/feeds/public/artifacts.py index 7dffde670..c511e0c05 100644 --- a/plugins/feeds/public/artifacts.py +++ b/plugins/feeds/public/artifacts.py @@ -36,22 +36,22 @@ def run(self): tempdir, "artifacts-main", "artifacts", "data" ) - data_files_glob = glob.glob(os.path.join(artifacts_datadir, "*.yaml")) - artifacts_dict = {} - for file in data_files_glob: - result = validator_object.CheckFile(file) - if not result: - logging.error("Failed to validate %s, skipping", file) - continue - logging.info("Processing %s", file) - with open(file, "r") as f: - yaml_string = f.read() - - forensic_indicators = indicator.ForensicArtifact.from_yaml_string( - yaml_string, update_parents=False - ) - for fi in forensic_indicators: - artifacts_dict[fi.name] = fi + data_files_glob = glob.glob(os.path.join(artifacts_datadir, "*.yaml")) + artifacts_dict = {} + for file in data_files_glob: + result = validator_object.CheckFile(file) + if not result: + logging.error("Failed to validate %s, skipping", file) + continue + logging.info("Processing %s", file) + with open(file, "r") as f: + yaml_string = f.read() + + forensic_indicators = indicator.ForensicArtifact.from_yaml_string( + yaml_string, update_parents=False + ) + for fi in forensic_indicators: + artifacts_dict[fi.name] = fi for artifact in artifacts_dict.values(): artifact.update_parents(artifacts_dict) diff --git a/plugins/feeds/public/signaturebase.py b/plugins/feeds/public/signaturebase.py index 106b7ae77..95d80275a 100644 --- a/plugins/feeds/public/signaturebase.py +++ b/plugins/feeds/public/signaturebase.py @@ -1,5 +1,4 @@ import glob -import json import logging import os import tempfile @@ -7,18 +6,8 @@ from io import BytesIO from zipfile import ZipFile -import yara - from core import taskmanager -from core.schemas import entity, indicator, task - -ALLOWED_EXTERNALS = { - "filename": "", - "filepath": "", - "extension": "", - "filetype": "", - "owner": "", -} +from core.schemas import indicator, task class Neo23x0SignatureBase(task.FeedTask): @@ -41,24 +30,13 @@ def run(self): ZipFile(BytesIO(response.content)).extractall(path=tempdir) rules_path = os.path.join(tempdir, "signature-base-master", "yara") - for file in glob.glob(f"{rules_path}/*.yar"): - with open(file, "r") as f: - rule = f.read() - - try: - yara.compile(source=rule, externals=ALLOWED_EXTERNALS) - except Exception as e: - logging.warning(f"Error compiling rule {file}: {e}") - raise - - yara_object = indicator.Yara( - name=f"Neo23x0: {os.path.basename(file)}", - pattern=rule, - diamond=indicator.DiamondModel.capability, - location="filesystem", - ).save() + for file in glob.glob(f"{rules_path}/*.yar"): + with open(file, "r") as f: + rule = f.read() - yara_object.tag(["Neo23x0", "signature-base"]) + indicator.Yara.import_bulk_rules( + rule, tags=["Neo23x0", "signature-base"] + ) taskmanager.TaskManager.register_task(Neo23x0SignatureBase) diff --git a/plugins/feeds/public/yaraforge.py b/plugins/feeds/public/yaraforge.py new file mode 100644 index 000000000..088c98cd7 --- /dev/null +++ b/plugins/feeds/public/yaraforge.py @@ -0,0 +1,45 @@ +import glob +import json +import logging +import os +import tempfile +from datetime import timedelta +from io import BytesIO +from zipfile import ZipFile + +from core import taskmanager +from core.schemas import indicator, task + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class YaraForge(task.FeedTask): + _defaults = { + "name": "YaraForge", + "frequency": timedelta(days=1), + "type": "feed", + "description": "Collection of community Yara rules: https://yarahq.github.io/", + } + + _SOURCE_ZIP = "https://github.com/YARAHQ/yara-forge/releases/latest/download/yara-forge-rules-core.zip" + + def run(self): + response = self._make_request(self._SOURCE_ZIP, no_cache=True) + if not response: + logging.info(f"No response: skipping {self.name} update") + return + + with tempfile.TemporaryDirectory() as tempdir: + ZipFile(BytesIO(response.content)).extractall(path=tempdir) + + rules_path = os.path.join( + tempdir, "packages", "core", "yara-rules-core.yar" + ) + with open(rules_path, "r") as f: + rules = f.read() + + indicator.Yara.import_bulk_rules(rules, tags=["yara-forge-core"]) + + +taskmanager.TaskManager.register_task(YaraForge) diff --git a/poetry.lock b/poetry.lock index 01971d369..5b987e3e7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aenum" @@ -1649,6 +1649,31 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-a test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] type = ["mypy (>=1.11.2)"] +[[package]] +name = "ply" +version = "3.11" +description = "Python Lex & Yacc" +optional = false +python-versions = "*" +files = [ + {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, + {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, +] + +[[package]] +name = "plyara" +version = "2.2.1" +description = "Parse YARA rules" +optional = false +python-versions = ">=3.10" +files = [ + {file = "plyara-2.2.1-py3-none-any.whl", hash = "sha256:3df3ded8cc1439ac6c12a1d39b18d03d0c677f0a909bf0e4a240c9d813126128"}, + {file = "plyara-2.2.1.tar.gz", hash = "sha256:4a05fd93f8174127b7f510dfdc7effba8f2d1f762c0314e7944f0dd66fd3f466"}, +] + +[package.dependencies] +ply = ">=3.11" + [[package]] name = "prompt-toolkit" version = "3.0.48" @@ -2941,4 +2966,4 @@ s3 = ["boto3"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "4be03d5e3b07e52b51ebe98e7ccb4a204d704a8b32e37291c94bfd9081541fb7" +content-hash = "cb4997aded60dde00945ad8882d098a9fbfdebdc2be64fd011813facf226bc17" diff --git a/pyproject.toml b/pyproject.toml index 974df6068..9aa367264 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ idstools = "^0.6.5" aenum = "^3.1.15" boto3 = { version = "^1.35.22", optional = true } tqdm = "^4.67.1" +plyara = "2.2.1" # while https://github.com/plyara/plyara/issues/143 is addressed [tool.poetry.group.dev.dependencies] pylint = "^2.16.1" diff --git a/tests/apiv2/indicators.py b/tests/apiv2/indicators.py index 0fc54aed5..b62823630 100644 --- a/tests/apiv2/indicators.py +++ b/tests/apiv2/indicators.py @@ -162,7 +162,6 @@ def test_bad_regex(self): def test_bad_yara(self): indicator_dict = { - "name": "badYara", "type": "yara", "pattern": "rule test {", "location": "filesystem", @@ -174,11 +173,12 @@ def test_bad_yara(self): ) self.assertEqual(response.status_code, 422) data = response.json() - self.assertIn("Value error, Invalid Yara rule", data["detail"][0]["msg"]) + self.assertIn( + "No valid Yara rules found in the rule body", data["detail"][0]["msg"] + ) def test_new_yara(self): indicator_dict = { - "name": "yara", "type": "yara", "pattern": 'rule test { strings: $a = "test" condition: $a }', "location": "filesystem", @@ -190,5 +190,5 @@ def test_new_yara(self): ) self.assertEqual(response.status_code, 200) data = response.json() - self.assertEqual(data["name"], "yara") + self.assertEqual(data["name"], "test") self.assertEqual(data["type"], "yara") diff --git a/tests/feeds.py b/tests/feeds.py index 4e76efa3a..563123405 100644 --- a/tests/feeds.py +++ b/tests/feeds.py @@ -19,6 +19,7 @@ threatfox, timesketch, tor_exit_nodes, + yaraforge, yaraify, ) @@ -124,3 +125,8 @@ def test_neo23_signaturebase(self): defaults = signaturebase.Neo23x0SignatureBase._defaults.copy() feed = signaturebase.Neo23x0SignatureBase(**defaults) feed.run() + + def test_yara_forge(self): + defaults = yaraforge.YaraForge._defaults.copy() + feed = yaraforge.YaraForge(**defaults) + feed.run() diff --git a/tests/schemas/indicator.py b/tests/schemas/indicator.py index 3378e8322..1e4b21b5f 100644 --- a/tests/schemas/indicator.py +++ b/tests/schemas/indicator.py @@ -8,7 +8,6 @@ Indicator, Query, Regex, - Yara, ) @@ -279,38 +278,3 @@ def test_forensic_artifacts_parent_extraction(self): self.assertEqual(vertices[artifacts[1].extended_id].name, "Artifact2") self.assertEqual(vertices[artifacts[2].extended_id].name, "Artifact3") - - def test_yara_creation(self): - yara = Yara( - name="yara1", - pattern='rule test { strings: $a = "test" condition: $a }', - location="any", - diamond=DiamondModel.capability, - ).save() - - self.assertIsNotNone(yara.id) - self.assertIsNotNone(yara.created) - self.assertEqual(yara.name, "yara1") - self.assertEqual(yara.type, "yara") - - def test_yara_match(self): - rule = Yara( - name="yara1", - pattern='rule test_rule { strings: $a = "Ba" condition: $a }', - location="any", - diamond=DiamondModel.capability, - ).save() - - result = rule.match("ThisIsAReallyBaaaadStringIsntIt") - self.assertIsNotNone(result) - self.assertEqual(result.matches[0].rule, "test_rule") - self.assertEqual(result.matches[0].strings[0].identifier, "$a") - self.assertEqual(result.matches[0].strings[0].instances[0].offset, 13) - self.assertEqual(result.matches[0].strings[0].instances[0].matched_data, b"Ba") - - result = rule.match(b"ThisIsAReallyBaaaadStringIsntIt") - self.assertIsNotNone(result) - self.assertEqual(result.matches[0].rule, "test_rule") - self.assertEqual(result.matches[0].strings[0].identifier, "$a") - self.assertEqual(result.matches[0].strings[0].instances[0].offset, 13) - self.assertEqual(result.matches[0].strings[0].instances[0].matched_data, b"Ba") diff --git a/tests/schemas/yararule.py b/tests/schemas/yararule.py new file mode 100644 index 000000000..b1ab6de31 --- /dev/null +++ b/tests/schemas/yararule.py @@ -0,0 +1,190 @@ +import unittest + +from core import database_arango +from core.schemas.indicator import DiamondModel +from core.schemas.indicators.yara import Yara + + +class YaraIndicatorTest(unittest.TestCase): + def setUp(self) -> None: + database_arango.db.connect(database="yeti_test") + database_arango.db.truncate() + + def test_yara_creation(self): + yara = Yara( + pattern='rule test { strings: $a = "test" condition: $a }', + location="any", + diamond=DiamondModel.capability, + ).save() + + self.assertIsNotNone(yara.id) + self.assertIsNotNone(yara.created) + self.assertEqual(yara.name, "test") + self.assertEqual(yara.type, "yara") + + def test_yara_name_and_deps(self): + yara = Yara( + name="blah", + pattern='rule test { strings: $a = "test" condition: $a and dep }', + location="any", + diamond=DiamondModel.capability, + ) + + self.assertEqual(yara.name, "test") + self.assertEqual(yara.dependencies, ["dep"]) + + def test_invalid_yara_rule(self): + with self.assertRaises(ValueError) as error: + Yara( + pattern='rule test { wooo: $a = "test" fooo: $a and dep }', + location="any", + diamond=DiamondModel.capability, + ).save() + + self.assertIn("Unknown text wooo", str(error.exception)) + + def test_fail_on_more_than_one_rule(self): + with self.assertRaises(ValueError) as error: + Yara( + pattern="rule test { condition: true } rule test2 { condition: true }", + location="any", + diamond=DiamondModel.capability, + ).save() + + self.assertIn( + "Only one Yara rule is allowed in the rule body.", str(error.exception) + ) + + def test_dependency_calculation(self): + Yara( + pattern="rule dep0 { condition: true }", + location="any", + diamond=DiamondModel.capability, + ).save() + + Yara( + pattern="rule dep1 { condition: true and dep0 }", + location="any", + diamond=DiamondModel.capability, + ).save() + + Yara( + pattern="rule dep2 { condition: true and dep1 }", + location="any", + diamond=DiamondModel.capability, + ).save() + + yara_rule = Yara( + pattern="rule test { condition: true and dep2 and dep1 }", + location="any", + diamond=DiamondModel.capability, + ).save() + + deps = yara_rule.rule_with_dependencies() + self.assertEqual( + deps, + ( + "rule dep0 { condition: true }\n\n" + "rule dep1 { condition: true and dep0 }\n\n" + "rule dep2 { condition: true and dep1 }\n\n" + "rule test { condition: true and dep2 and dep1 }\n\n" + ), + ) + + def test_yara_dependency_creates_links(self): + dep0 = Yara( + pattern="rule dep0 { condition: true }", + location="any", + diamond=DiamondModel.capability, + ).save() + + dep1 = Yara( + pattern="rule dep1 { condition: true and dep0 }", + location="any", + diamond=DiamondModel.capability, + ).save() + + dep2 = Yara( + pattern="rule dep2 { condition: true and dep1 }", + location="any", + diamond=DiamondModel.capability, + ).save() + + yara_rule = Yara( + pattern="rule test { condition: true and dep2 and dep1 }", + location="any", + diamond=DiamondModel.capability, + ).save() + + vertices, _, total = yara_rule.neighbors() + self.assertEqual(total, 2) + self.assertEqual(len(vertices), 2) + + self.assertEqual(vertices[dep1.extended_id].name, "dep1") + self.assertEqual(vertices[dep2.extended_id].name, "dep2") + + vertices, _, total = dep1.neighbors() + self.assertEqual(total, 3) + self.assertEqual(len(vertices), 3) + self.assertEqual(vertices[dep0.extended_id].name, "dep0") + self.assertEqual(vertices[dep2.extended_id].name, "dep2") + self.assertEqual(vertices[yara_rule.extended_id].name, "test") + + def test_yara_links_get_updated_when_deps_change(self): + dep1 = Yara( + pattern="rule dep1 { condition: true }", + location="any", + diamond=DiamondModel.capability, + ).save() + + dep2 = Yara( + pattern="rule dep2 { condition: true }", + location="any", + diamond=DiamondModel.capability, + ).save() + + yara_rule = Yara( + pattern="rule test { condition: dep2 and dep1 }", + location="any", + diamond=DiamondModel.capability, + ).save() + + self.assertCountEqual(yara_rule.dependencies, ["dep2", "dep1"]) + + vertices, _, total = yara_rule.neighbors() + self.assertEqual(total, 2) + self.assertEqual(len(vertices), 2) + + self.assertEqual(vertices[dep1.extended_id].name, "dep1") + self.assertEqual(vertices[dep2.extended_id].name, "dep2") + + yara_rule.pattern = "rule test { condition: true and dep1 }" + yara_rule = yara_rule.save() + self.assertEquals(yara_rule.dependencies, ["dep1"]) + + vertices, _, total = yara_rule.neighbors() + self.assertEqual(total, 1) + self.assertEqual(len(vertices), 1) + self.assertEqual(vertices[dep1.extended_id].name, "dep1") + + def test_yara_match(self): + rule = Yara( + name="yara1", + pattern='rule test_rule { strings: $a = "Ba" condition: $a }', + location="any", + diamond=DiamondModel.capability, + ).save() + + result = rule.match("ThisIsAReallyBaaaadStringIsntIt") + self.assertIsNotNone(result) + self.assertEqual(result.matches[0].rule, "test_rule") + self.assertEqual(result.matches[0].strings[0].identifier, "$a") + self.assertEqual(result.matches[0].strings[0].instances[0].offset, 13) + self.assertEqual(result.matches[0].strings[0].instances[0].matched_data, b"Ba") + + result = rule.match(b"ThisIsAReallyBaaaadStringIsntIt") + self.assertIsNotNone(result) + self.assertEqual(result.matches[0].rule, "test_rule") + self.assertEqual(result.matches[0].strings[0].identifier, "$a") + self.assertEqual(result.matches[0].strings[0].instances[0].offset, 13) + self.assertEqual(result.matches[0].strings[0].instances[0].matched_data, b"Ba")