Skip to content

Commit

Permalink
Decouples Filesystem implementation
Browse files Browse the repository at this point in the history
This refactors the way that the parser handles paths in general,
removing the filesystem-specific implementation details from the parsing
fuctions, and providing a new interface centered around providing the
correct readers through the `FileProvider` trait. Default
implementations for live-system and logarchive formats are provided, as
are utilities for categorizing files by path. This requird updates
throughout the repo's tests, and if merged, will require updates to the
larger tests that are currently shipping in the test archive.
  • Loading branch information
dgmcdona committed Dec 20, 2024
1 parent f9ce1c9 commit 8a4a0e0
Show file tree
Hide file tree
Showing 17 changed files with 647 additions and 970 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ plist = "1.7.0"
regex = "1.10.6"
base64 = "0.22.1"
chrono = "0.4.38"
walkdir = "2.5.0"

[dev-dependencies]
simplelog = "0.12.2"
Expand Down
9 changes: 8 additions & 1 deletion examples/parse_tracev3/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,14 @@ fn main() {

// Parse single tracev3 file
fn parse_trace_file(path: &str) {
let log_data = parse_log(path).unwrap();
let handle = match std::fs::File::open(&path) {
Ok(handle) => handle,
Err(e) => {
log::error!("Failed to open tracev3 file {}: {:?}", path, e);
return;
}
};
let log_data = parse_log(handle).unwrap();
let filename = Path::new(path);
// Pass empty UUID, UUID cache, timesync files
let string_results: Vec<UUIDText> = Vec::new();
Expand Down
258 changes: 59 additions & 199 deletions examples/unifiedlog_iterator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,20 @@
use chrono::{SecondsFormat, TimeZone, Utc};
use log::LevelFilter;
use macos_unifiedlogs::dsc::SharedCacheStrings;
use macos_unifiedlogs::filesystem::{LiveSystemProvider, LogarchiveProvider};
use macos_unifiedlogs::iterator::UnifiedLogIterator;
use macos_unifiedlogs::parser::{
build_log, collect_shared_strings, collect_shared_strings_system, collect_strings,
collect_strings_system, collect_timesync, collect_timesync_system,
build_log, collect_shared_strings, collect_strings, collect_timesync,
};
use macos_unifiedlogs::timesync::TimesyncBoot;
use macos_unifiedlogs::traits::FileProvider;
use macos_unifiedlogs::unified_log::{LogData, UnifiedLogData};
use macos_unifiedlogs::uuidtext::UUIDText;
use simplelog::{Config, SimpleLogger};
use std::error::Error;
use std::fs::OpenOptions;
use std::io::Write;
use std::path::PathBuf;
use std::{fs, io};
use std::fs;
use std::io::{Read, Write};
use std::path::{Path, PathBuf};

use clap::Parser;
use csv::Writer;
Expand All @@ -30,16 +30,16 @@ use csv::Writer;
#[clap(version, about, long_about = None)]
struct Args {
/// Run on live system
#[clap(short, long, default_value = "false")]
live: String,
#[clap(short, long)]
live: bool,

/// Path to logarchive formatted directory
#[clap(short, long, default_value = "")]
input: String,
#[clap(short, long)]
input: Option<PathBuf>,

/// Path to output file. Any directories must already exist
#[clap(short, long, default_value = "")]
output: String,
#[clap(short, long)]
output: Option<PathBuf>,

/// Output format. Options: csv, jsonl. Default is autodetect.
#[clap(short, long, default_value = "auto")]
Expand All @@ -58,50 +58,53 @@ fn main() {
.expect("Failed to initialize simple logger");

let args = Args::parse();
let output_format = if args.format.is_empty() || args.format == "auto" {
std::path::Path::new(&args.output)
.extension()
.and_then(std::ffi::OsStr::to_str)
.unwrap_or("csv")
.to_string()
let output_format = match args.format.as_str() {
"csv" => "csv",
"jsonl" => "jsonl",
"auto" => "auto",
_ => "auto",
}
.to_string();

let handle: Box<dyn Write> = if let Some(path) = args.output {
Box::new(
fs::OpenOptions::new()
.write(true)
.append(true)
.open(path)
.unwrap(),
)
} else {
args.format.clone()
Box::new(std::io::stdout())
};

let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap();
let mut writer = OutputWriter::new(Box::new(handle), &output_format).unwrap();

if !args.input.is_empty() {
parse_log_archive(&args.input, &mut writer);
} else if args.live != "false" {
if let Some(path) = args.input {
parse_log_archive(&path, &mut writer);
} else if args.live {
parse_live_system(&mut writer);
}
}

// Parse a provided directory path. Currently, expect the path to follow macOS log collect structure
fn parse_log_archive(path: &str, writer: &mut OutputWriter) {
let mut archive_path = PathBuf::from(path);
fn parse_log_archive(path: &Path, writer: &mut OutputWriter) {
let provider = LogarchiveProvider::new(path);

// Parse all UUID files which contain strings and other metadata
let string_results = collect_strings(&archive_path.display().to_string()).unwrap();

archive_path.push("dsc");
let string_results = collect_strings(&provider).unwrap();
// Parse UUID cache files which also contain strings and other metadata
let shared_strings_results =
collect_shared_strings(&archive_path.display().to_string()).unwrap();
archive_path.pop();

archive_path.push("timesync");
let shared_strings_results = collect_shared_strings(&provider).unwrap();
// Parse all timesync files
let timesync_data = collect_timesync(&archive_path.display().to_string()).unwrap();
archive_path.pop();
let timesync_data = collect_timesync(&provider).unwrap();

// Keep UUID, UUID cache, timesync files in memory while we parse all tracev3 files
// Allows for faster lookups
parse_trace_file(
&string_results,
&shared_strings_results,
&timesync_data,
path,
&provider,
writer,
);

Expand All @@ -110,17 +113,12 @@ fn parse_log_archive(path: &str, writer: &mut OutputWriter) {

// Parse a live macOS system
fn parse_live_system(writer: &mut OutputWriter) {
let strings = collect_strings_system().unwrap();
let shared_strings = collect_shared_strings_system().unwrap();
let timesync_data = collect_timesync_system().unwrap();
let provider = LiveSystemProvider::default();
let strings = collect_strings(&provider).unwrap();
let shared_strings = collect_shared_strings(&provider).unwrap();
let timesync_data = collect_timesync(&provider).unwrap();

parse_trace_file(
&strings,
&shared_strings,
&timesync_data,
"/private/var/db/diagnostics",
writer,
);
parse_trace_file(&strings, &shared_strings, &timesync_data, &provider, writer);

eprintln!("\nFinished parsing Unified Log data.");
}
Expand All @@ -131,7 +129,7 @@ fn parse_trace_file(
string_results: &[UUIDText],
shared_strings_results: &[SharedCacheStrings],
timesync_data: &[TimesyncBoot],
path: &str,
provider: &dyn FileProvider,
writer: &mut OutputWriter,
) {
// We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries)
Expand All @@ -145,146 +143,19 @@ fn parse_trace_file(

let mut missing_data: Vec<UnifiedLogData> = Vec::new();

let mut archive_path = PathBuf::from(path);
archive_path.push("Persist");

// Loop through all tracev3 files in Persist directory
let mut log_count = 0;
if archive_path.exists() {
let paths = fs::read_dir(&archive_path).unwrap();

// Loop through all tracev3 files in Persist directory
for log_path in paths {
let data = log_path.unwrap();
let full_path = data.path().display().to_string();
eprintln!("Parsing: {}", full_path);

if data.path().exists() {
let count = iterate_chunks(
&full_path,
&mut missing_data,
string_results,
shared_strings_results,
timesync_data,
writer,
&mut oversize_strings,
);
log_count += count;
} else {
eprintln!("File {} no longer on disk", full_path);
continue;
};
}
}

archive_path.pop();
archive_path.push("Special");

if archive_path.exists() {
let paths = fs::read_dir(&archive_path).unwrap();

// Loop through all tracev3 files in Special directory
for log_path in paths {
let data = log_path.unwrap();
let full_path = data.path().display().to_string();
eprintln!("Parsing: {}", full_path);

if data.path().exists() {
let count = iterate_chunks(
&full_path,
&mut missing_data,
string_results,
shared_strings_results,
timesync_data,
writer,
&mut oversize_strings,
);
log_count += count;
} else {
eprintln!("File {} no longer on disk", full_path);
continue;
};
}
}

archive_path.pop();
archive_path.push("Signpost");

if archive_path.exists() {
let paths = fs::read_dir(&archive_path).unwrap();

// Loop through all tracev3 files in Signpost directory
for log_path in paths {
let data = log_path.unwrap();
let full_path = data.path().display().to_string();
eprintln!("Parsing: {}", full_path);

if data.path().exists() {
let count = iterate_chunks(
&full_path,
&mut missing_data,
string_results,
shared_strings_results,
timesync_data,
writer,
&mut oversize_strings,
);
log_count += count;
} else {
eprintln!("File {} no longer on disk", full_path);
continue;
};
}
}
archive_path.pop();
archive_path.push("HighVolume");

if archive_path.exists() {
let paths = fs::read_dir(&archive_path).unwrap();

// Loop through all tracev3 files in HighVolume directory
for log_path in paths {
let data = log_path.unwrap();
let full_path = data.path().display().to_string();
eprintln!("Parsing: {}", full_path);

if data.path().exists() {
let count = iterate_chunks(
&full_path,
&mut missing_data,
string_results,
shared_strings_results,
timesync_data,
writer,
&mut oversize_strings,
);
log_count += count;
} else {
eprintln!("File {} no longer on disk", full_path);
continue;
};
}
}
archive_path.pop();

archive_path.push("logdata.LiveData.tracev3");

// Check if livedata exists. We only have it if 'log collect' was used
if archive_path.exists() {
eprintln!("Parsing: logdata.LiveData.tracev3");

let count = iterate_chunks(
&archive_path.display().to_string(),
for reader in provider.tracev3_files() {
log_count += iterate_chunks(
reader,
&mut missing_data,
string_results,
shared_strings_results,
timesync_data,
writer,
&mut oversize_strings,
);
log_count += count;
archive_path.pop();
}

let include_missing = false;
println!("Oversize cache size: {}", oversize_strings.oversize.len());
println!("Logs with missing Oversize strings: {}", missing_data.len());
Expand Down Expand Up @@ -313,17 +184,23 @@ fn parse_trace_file(
}

fn iterate_chunks(
path: &str,
mut reader: impl Read,
missing: &mut Vec<UnifiedLogData>,
strings_data: &[UUIDText],
shared_strings: &[SharedCacheStrings],
timesync_data: &[TimesyncBoot],
writer: &mut OutputWriter,
oversize_strings: &mut UnifiedLogData,
) -> usize {
let log_bytes = fs::read(path).unwrap();
let mut buf = Vec::new();

if let Err(e) = reader.read(&mut buf) {
log::error!("Failed to read tracev3 file: {:?}", e);
return 0;
}

let log_iterator = UnifiedLogIterator {
data: log_bytes,
data: buf,
header: Vec::new(),
};

Expand Down Expand Up @@ -367,24 +244,7 @@ enum OutputWriterEnum {
}

impl OutputWriter {
pub fn new(
output_path: &str,
output_format: &str,
append: bool,
) -> Result<Self, Box<dyn Error>> {
let writer: Box<dyn Write> = if !output_path.is_empty() {
Box::new(
OpenOptions::new()
.write(true)
.create(true)
.truncate(!append)
.append(append)
.open(output_path)?,
)
} else {
Box::new(io::stdout())
};

pub fn new(writer: Box<dyn Write>, output_format: &str) -> Result<Self, Box<dyn Error>> {
let writer_enum = match output_format {
"csv" => {
let mut csv_writer = Writer::from_writer(writer);
Expand Down
Loading

0 comments on commit 8a4a0e0

Please sign in to comment.