diff --git a/CHANGELOG.md b/CHANGELOG.md index b3a71c9f9..d33c912d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### New Features - allow call as valid subscope for call scoped rules @mr-tz +- vmray: record command line details @mr-tz ### Breaking Changes diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py index a8976cd8c..93f0330ee 100644 --- a/capa/features/extractors/vmray/__init__.py +++ b/capa/features/extractors/vmray/__init__.py @@ -13,7 +13,15 @@ from dataclasses import dataclass from capa.exceptions import UnsupportedFormatError -from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall, xml_to_dict +from capa.features.extractors.vmray.models import ( + File, + Flog, + SummaryV2, + StaticData, + FunctionCall, + xml_to_dict, + sanitize_string, +) logger = logging.getLogger(__name__) @@ -35,6 +43,8 @@ class VMRayMonitorProcess: ppid: int # parent process ID assigned by OS monitor_id: int # unique ID assigned to process by VMRay image_name: str + filename: str + cmd_line: str class VMRayAnalysis: @@ -160,7 +170,12 @@ def _compute_monitor_processes(self): self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0 ) self.monitor_processes[process.monitor_id] = VMRayMonitorProcess( - process.os_pid, ppid, process.monitor_id, process.image_name + process.os_pid, + ppid, + process.monitor_id, + process.image_name, + sanitize_string(process.filename), + sanitize_string(process.cmd_line), ) # not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394 @@ -170,6 +185,8 @@ def _compute_monitor_processes(self): monitor_process.os_parent_pid, monitor_process.process_id, monitor_process.image_name, + monitor_process.filename, + monitor_process.cmd_line, ) if monitor_process.process_id not in self.monitor_processes: diff --git a/capa/features/extractors/vmray/extractor.py b/capa/features/extractors/vmray/extractor.py index a9f0491c9..7f40f25da 100644 --- a/capa/features/extractors/vmray/extractor.py +++ b/capa/features/extractors/vmray/extractor.py @@ -86,7 +86,7 @@ def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, def get_process_name(self, ph) -> str: monitor_process: VMRayMonitorProcess = ph.inner - return monitor_process.image_name + return f"{monitor_process.image_name} ({monitor_process.cmd_line})" def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]: diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index c2d6551aa..728361c69 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -136,11 +136,20 @@ class FunctionReturn(BaseModel): from_addr: HexInt = Field(alias="from") +def sanitize_string(value: str) -> str: + # e.g. "cmd_line": "\"C:\\Users\\38lTTV5Kii\\Desktop\\filename.exe\" ", + return value.replace("\\\\", "\\").strip(' "') + + +# unify representation +SanitizedString = Annotated[str, BeforeValidator(sanitize_string)] + + class MonitorProcess(BaseModel): ts: HexInt process_id: int image_name: str - filename: str + filename: SanitizedString # page_root: HexInt os_pid: HexInt # os_integrity_level: HexInt @@ -148,7 +157,7 @@ class MonitorProcess(BaseModel): monitor_reason: str parent_id: int os_parent_pid: HexInt - # cmd_line: str + cmd_line: SanitizedString # cur_dir: str # os_username: str # bitness: int @@ -308,6 +317,7 @@ class Process(BaseModel): os_pid: int filename: str image_name: str + cmd_line: str ref_parent_process: Optional[GenericReference] = None