Skip to content

Commit

Permalink
Merge pull request #43 from jgarzik/endian
Browse files Browse the repository at this point in the history
Mixed Endian support
  • Loading branch information
jgarzik authored Feb 1, 2024
2 parents d43bb82 + 9305a7d commit 1e6eb9e
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 52 deletions.
26 changes: 19 additions & 7 deletions src/avail.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use byteorder::{LittleEndian, ReadBytesExt};
use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
use std::io::{self, Read};

use crate::ser::{w32, woff_t};
Expand All @@ -10,14 +10,26 @@ pub struct AvailElem {
}

impl AvailElem {
pub fn from_reader(is_lfs: bool, rdr: &mut impl Read) -> io::Result<Self> {
let elem_sz = rdr.read_u32::<LittleEndian>()?;
pub fn from_reader(is_lfs: bool, is_le: bool, rdr: &mut impl Read) -> io::Result<Self> {
let elem_sz: u32;
let elem_ofs: u64;
if is_lfs {
let _padding = rdr.read_u32::<LittleEndian>()?;
elem_ofs = rdr.read_u64::<LittleEndian>()?;

if is_le {
elem_sz = rdr.read_u32::<LittleEndian>()?;
if is_lfs {
let _padding = rdr.read_u32::<LittleEndian>()?;
elem_ofs = rdr.read_u64::<LittleEndian>()?;
} else {
elem_ofs = rdr.read_u32::<LittleEndian>()? as u64;
}
} else {
elem_ofs = rdr.read_u32::<LittleEndian>()? as u64;
elem_sz = rdr.read_u32::<BigEndian>()?;
if is_lfs {
let _padding = rdr.read_u32::<BigEndian>()?;
elem_ofs = rdr.read_u64::<BigEndian>()?;
} else {
elem_ofs = rdr.read_u32::<BigEndian>()? as u64;
}
}

Ok(AvailElem {
Expand Down
62 changes: 47 additions & 15 deletions src/bucket.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use byteorder::{LittleEndian, ReadBytesExt};
use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
use std::collections::HashMap;
use std::io::{self, Error, ErrorKind, Read};

Expand All @@ -17,22 +17,40 @@ pub struct BucketElement {
}

impl BucketElement {
pub fn from_reader(is_lfs: bool, rdr: &mut impl Read) -> io::Result<Self> {
let hash = rdr.read_u32::<LittleEndian>()?;
pub fn from_reader(is_lfs: bool, is_le: bool, rdr: &mut impl Read) -> io::Result<Self> {
let hash;
if is_le {
hash = rdr.read_u32::<LittleEndian>()?;
} else {
hash = rdr.read_u32::<BigEndian>()?;
}

let mut key_start = [0; KEY_SMALL];
rdr.read(&mut key_start)?;

let data_ofs: u64;
if is_lfs {
data_ofs = rdr.read_u64::<LittleEndian>()?;
let (key_size, data_size);

if is_le {
if is_lfs {
data_ofs = rdr.read_u64::<LittleEndian>()?;
} else {
data_ofs = rdr.read_u32::<LittleEndian>()? as u64;
}

key_size = rdr.read_u32::<LittleEndian>()?;
data_size = rdr.read_u32::<LittleEndian>()?;
} else {
data_ofs = rdr.read_u32::<LittleEndian>()? as u64;
if is_lfs {
data_ofs = rdr.read_u64::<BigEndian>()?;
} else {
data_ofs = rdr.read_u32::<BigEndian>()? as u64;
}

key_size = rdr.read_u32::<BigEndian>()?;
data_size = rdr.read_u32::<BigEndian>()?;
}

let key_size = rdr.read_u32::<LittleEndian>()?;
let data_size = rdr.read_u32::<LittleEndian>()?;

Ok(BucketElement {
hash,
key_start,
Expand Down Expand Up @@ -67,19 +85,33 @@ pub struct Bucket {
impl Bucket {
pub fn from_reader(header: &Header, rdr: &mut impl Read) -> io::Result<Self> {
// read avail section
let av_count = rdr.read_u32::<LittleEndian>()?;
let _padding = rdr.read_u32::<LittleEndian>()?;
let av_count;
if header.is_le {
av_count = rdr.read_u32::<LittleEndian>()?;
let _padding = rdr.read_u32::<LittleEndian>()?;
} else {
av_count = rdr.read_u32::<BigEndian>()?;
let _padding = rdr.read_u32::<BigEndian>()?;
}

let mut avail = Vec::new();
for _idx in 0..BUCKET_AVAIL {
let av_elem = AvailElem::from_reader(header.is_lfs, rdr)?;
let av_elem = AvailElem::from_reader(header.is_lfs, header.is_le, rdr)?;
avail.push(av_elem);
}

// todo: validate and assure-sorted avail[]

// read misc. section
let bits = rdr.read_u32::<LittleEndian>()?;
let count = rdr.read_u32::<LittleEndian>()?;
let (bits, count);

if header.is_le {
bits = rdr.read_u32::<LittleEndian>()?;
count = rdr.read_u32::<LittleEndian>()?;
} else {
bits = rdr.read_u32::<BigEndian>()?;
count = rdr.read_u32::<BigEndian>()?;
}

if !(count <= header.bucket_elems && bits <= header.dir_bits) {
return Err(Error::new(ErrorKind::Other, "invalid bucket c/b"));
Expand All @@ -88,7 +120,7 @@ impl Bucket {
// read bucket elements section
let mut tab = Vec::new();
for _idx in 0..header.bucket_elems {
let bucket_elem = BucketElement::from_reader(header.is_lfs, rdr)?;
let bucket_elem = BucketElement::from_reader(header.is_lfs, header.is_le, rdr)?;
tab.push(bucket_elem);
}

Expand Down
29 changes: 22 additions & 7 deletions src/dir.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use byteorder::{LittleEndian, ReadBytesExt};
use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
use std::io::{self, Seek, SeekFrom};

use crate::ser::woff_t;
Expand Down Expand Up @@ -44,6 +44,26 @@ pub fn dirent_elem_size(is_lfs: bool) -> usize {
}
}

fn roff_t(f: &mut std::fs::File, is_lfs: bool, is_le: bool) -> io::Result<u64> {
let v;

if is_le {
if is_lfs {
v = f.read_u64::<LittleEndian>()?;
} else {
v = f.read_u32::<LittleEndian>()? as u64;
}
} else {
if is_lfs {
v = f.read_u64::<BigEndian>()?;
} else {
v = f.read_u32::<BigEndian>()? as u64;
}
}

Ok(v)
}

// Read C-struct-based bucket directory (a vector of storage offsets)
pub fn dir_reader(f: &mut std::fs::File, header: &Header) -> io::Result<Vec<u64>> {
let is_lfs = header.is_lfs;
Expand All @@ -55,12 +75,7 @@ pub fn dir_reader(f: &mut std::fs::File, header: &Header) -> io::Result<Vec<u64>
let _pos = f.seek(SeekFrom::Start(header.dir_ofs))?;

for _idx in 0..dirent_count {
let ofs: u64;
if is_lfs {
ofs = f.read_u64::<LittleEndian>()?;
} else {
ofs = f.read_u32::<LittleEndian>()? as u64;
}
let ofs = roff_t(f, header.is_lfs, header.is_le)?;
dir.push(ofs);
}

Expand Down
91 changes: 73 additions & 18 deletions src/header.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use byteorder::{LittleEndian, ReadBytesExt};
use byteorder::{BigEndian, LittleEndian, NativeEndian, ReadBytesExt};
use std::io::{self, Error, ErrorKind, Read};

use crate::dir::build_dir_size;
Expand All @@ -24,7 +24,8 @@ pub struct Header {
pub avail: AvailBlock,

// following fields are calculated, not stored
pub is_lfs: bool,
pub is_lfs: bool, // using 64-bit off_t?
pub is_le: bool, // metadata endianness is big (false) or little (true)
pub dirty: bool,
}

Expand All @@ -36,26 +37,79 @@ impl Header {
pub fn from_reader(metadata: &std::fs::Metadata, mut rdr: impl Read) -> io::Result<Self> {
let file_sz = metadata.len();

let magic = rdr.read_u32::<LittleEndian>()?;
let magic = rdr.read_u32::<NativeEndian>()?;

// determine db file version, intrinsics from magic number
let (is_lfs, need_swap) = match magic {
GDBM_OMAGIC => (false, false),
GDBM_OMAGIC_SWAP => (false, true),
GDBM_MAGIC32 => (false, false),
GDBM_MAGIC32_SWAP => (false, true),
GDBM_MAGIC64 => (true, false),
GDBM_MAGIC64_SWAP => (true, true),
_ => {
return Err(Error::new(ErrorKind::Other, "Unknown/invalid magic number"));
}
};

let is_lfs = match magic {
GDBM_MAGIC64 | GDBM_MAGIC64_SWAP => true,
_ => false,
// detect db file endianness
let is_le = match need_swap {
true => {
if cfg!(target_endian = "little") {
false
} else {
true
}
}
false => {
if cfg!(target_endian = "little") {
true
} else {
false
}
}
};

// fixme: read u32, not u64, if is_lfs

let block_sz = rdr.read_u32::<LittleEndian>()?;
let dir_ofs = rdr.read_u64::<LittleEndian>()?;
let dir_sz = rdr.read_u32::<LittleEndian>()?;
let dir_bits = rdr.read_u32::<LittleEndian>()?;
let bucket_sz = rdr.read_u32::<LittleEndian>()?;
let bucket_elems = rdr.read_u32::<LittleEndian>()?;
let next_block = rdr.read_u64::<LittleEndian>()?;

let avail_sz = rdr.read_u32::<LittleEndian>()?;
let avail_count = rdr.read_u32::<LittleEndian>()?;
let avail_next_block = rdr.read_u64::<LittleEndian>()?;
let (
block_sz,
dir_ofs,
dir_sz,
dir_bits,
bucket_sz,
bucket_elems,
next_block,
avail_sz,
avail_count,
avail_next_block,
);

if is_le {
block_sz = rdr.read_u32::<LittleEndian>()?;
dir_ofs = rdr.read_u64::<LittleEndian>()?;
dir_sz = rdr.read_u32::<LittleEndian>()?;
dir_bits = rdr.read_u32::<LittleEndian>()?;
bucket_sz = rdr.read_u32::<LittleEndian>()?;
bucket_elems = rdr.read_u32::<LittleEndian>()?;
next_block = rdr.read_u64::<LittleEndian>()?;

avail_sz = rdr.read_u32::<LittleEndian>()?;
avail_count = rdr.read_u32::<LittleEndian>()?;
avail_next_block = rdr.read_u64::<LittleEndian>()?;
} else {
block_sz = rdr.read_u32::<BigEndian>()?;
dir_ofs = rdr.read_u64::<BigEndian>()?;
dir_sz = rdr.read_u32::<BigEndian>()?;
dir_bits = rdr.read_u32::<BigEndian>()?;
bucket_sz = rdr.read_u32::<BigEndian>()?;
bucket_elems = rdr.read_u32::<BigEndian>()?;
next_block = rdr.read_u64::<BigEndian>()?;

avail_sz = rdr.read_u32::<BigEndian>()?;
avail_count = rdr.read_u32::<BigEndian>()?;
avail_next_block = rdr.read_u64::<BigEndian>()?;
}

if !(block_sz > 0 && block_sz > GDBM_HDR_SZ && block_sz - GDBM_HDR_SZ >= GDBM_AVAIL_ELEM_SZ)
{
Expand Down Expand Up @@ -99,7 +153,7 @@ impl Header {

let mut elems: Vec<AvailElem> = Vec::new();
for _idx in 0..avail_count {
let av_elem = AvailElem::from_reader(is_lfs, &mut rdr)?;
let av_elem = AvailElem::from_reader(is_lfs, is_le, &mut rdr)?;
elems.push(av_elem);
}

Expand Down Expand Up @@ -141,6 +195,7 @@ impl Header {
elems,
},
is_lfs,
is_le,
dirty: false,
})
}
Expand Down
9 changes: 4 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ const GDBM_AVAIL_HDR_SZ: u32 = 16;
const GDBM_AVAIL_ELEM_SZ: u32 = 16;
const KEY_SMALL: usize = 4;
const IGNORE_SMALL: usize = 4;
const DEF_IS_LE: bool = true;

pub enum ExportBinMode {
ExpNative,
Expand Down Expand Up @@ -527,7 +526,7 @@ impl Gdbm {
self.header.dirty = true;

// write extension block to storage (immediately)
let ext_bytes = ext_blk.serialize(self.header.is_lfs, DEF_IS_LE);
let ext_bytes = ext_blk.serialize(self.header.is_lfs, self.header.is_le);
write_ofs(&mut self.f, new_blk_ofs, &ext_bytes)?;

Ok(())
Expand Down Expand Up @@ -582,7 +581,7 @@ impl Gdbm {
}

fn write_bucket(&mut self, bucket_ofs: u64, bucket: &Bucket) -> io::Result<()> {
let bytes = bucket.serialize(self.header.is_lfs, DEF_IS_LE);
let bytes = bucket.serialize(self.header.is_lfs, self.header.is_le);
write_ofs(&mut self.f, bucket_ofs, &bytes)?;

Ok(())
Expand Down Expand Up @@ -612,7 +611,7 @@ impl Gdbm {
return Ok(());
}

let bytes = self.dir.serialize(self.header.is_lfs, DEF_IS_LE);
let bytes = self.dir.serialize(self.header.is_lfs, self.header.is_le);
write_ofs(&mut self.f, self.header.dir_ofs, &bytes)?;

self.dir_dirty = false;
Expand All @@ -626,7 +625,7 @@ impl Gdbm {
return Ok(());
}

let bytes = self.header.serialize(self.header.is_lfs, DEF_IS_LE);
let bytes = self.header.serialize(self.header.is_lfs, self.header.is_le);
write_ofs(&mut self.f, 0, &bytes)?;

self.header.dirty = false;
Expand Down

0 comments on commit 1e6eb9e

Please sign in to comment.