Skip to content

Commit

Permalink
Increment to beta version, updated dependencies and the code that use…
Browse files Browse the repository at this point in the history
…d them, fixed minor grammer errors.
  • Loading branch information
Tim Evans committed May 29, 2024
1 parent a31c197 commit e3b724c
Show file tree
Hide file tree
Showing 15 changed files with 53 additions and 57 deletions.
29 changes: 16 additions & 13 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "omf"
version = "0.1.0-alpha.2"
version = "0.1.0-beta.1"
description = "File reader and writer for Open Mining Format."
authors = ["Tim Evans <[email protected]>"]
license = "MIT"
Expand Down Expand Up @@ -34,15 +34,18 @@ omf1 = ["parquet"]
members = ["omf-c"]

[workspace.dependencies]
bytes = "1.4.0"
cbindgen = { version = "0.24.5", default-features = false }
chrono = { version = "0.4.30", default-features = false, features = ["serde"] }
flate2 = "1.0.27"
image = { version = "0.24.7", default-features = false, features = ["png", "jpeg"] }
parquet = { version = "46.0.0", default-features = false, features = ["flate2"] }
regex = "1.9.3"
schemars = { version = "0.8.12", features = ["chrono"] }
serde = { version = "1.0.188", features = ["derive"] }
serde_json = { version = "1.0.107", features = ["float_roundtrip"] }
thiserror = "1.0.47"
zip = { version = "0.6.6", default-features = false }
bytes = "1"
cbindgen = { version = "0.26", default-features = false }
chrono = { version = "0.4", default-features = false, features = ["serde"] }
flate2 = "1.0"
image = { version = "0.25", default-features = false, features = [
"png",
"jpeg",
] }
parquet = { version = "51", default-features = false, features = ["flate2"] }
regex = "1"
schemars = { version = "0.8", features = ["chrono"] }
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1", features = ["float_roundtrip"] }
thiserror = "1"
zip = { version = "2", default-features = false }
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
A library for reading and writing files in Open Mining Format 2.0.
Also supports translating OMF 1 files to OMF 2.

OMF file version: 2.0-alpha.2
OMF file version: 2.0-beta.1

Crate version: 0.1.0-alpha.2
Crate version: 0.1.0-beta.1

**Warning:** this is pre-release code.

Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Home

Version 0.1.0-alpha.1
Version 0.1.0-beta.1

Specification and library for Open Mining Format version 2,
a standard for mining data interchange backed by the
Expand Down
2 changes: 1 addition & 1 deletion docs/start.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ and contains details for linking them together into rich objects.
Images may use PNG or JPEG encoding, while arrays use Apache Parquet encoding.

> WARNING:
> When OMF files, beware of "zip bombs" where data is maliciously crafted to expand to an
> When reading OMF files, beware of "zip bombs" where data is maliciously crafted to expand to an
> excessive size when decompressed, leading to a potential denial of service attack.
> Use the limits provided by the C and Rust APIs, and check sizes before allocating memory.
Expand Down
2 changes: 1 addition & 1 deletion omf-c/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "omfc"
version = "0.1.0-alpha.2"
version = "0.1.0-beta.1"
description = "C bindings for `omf`."
authors = ["Tim Evans <[email protected]>"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion omf.schema.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "https://github.com/gmggroup/omf-rust/blob/main/omf.schema.json",
"title": "Open Mining Format 2.0-alpha.2",
"title": "Open Mining Format 2.0-beta.1",
"type": "object",
"required": [
"date"
Expand Down
15 changes: 8 additions & 7 deletions src/date_time.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Utility functions for date and date-time conversion.
use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, TimeZone, Utc};
use chrono::{DateTime, Duration, NaiveDate, TimeZone, Utc};

/// Convert a date to the number of days since the epoch.
pub fn date_to_f64(date: NaiveDate) -> f64 {
Expand Down Expand Up @@ -29,8 +29,8 @@ pub fn date_time_to_i64(date_time: DateTime<Utc>) -> i64 {

/// Convert a number of days since the epoch back to a date.
pub fn i64_to_date(value: i64) -> NaiveDate {
NaiveDate::default()
.checked_add_signed(Duration::days(value))
Duration::try_days(value)
.and_then(|d| NaiveDate::default().checked_add_signed(d))
.unwrap_or(if value < 0 {
NaiveDate::MIN
} else {
Expand All @@ -51,8 +51,8 @@ pub fn i64_to_date_time(value: i64) -> DateTime<Utc> {

/// Convert a number of milliseconds since the epoch back to a date.
pub fn i64_milli_to_date_time(value: i64) -> DateTime<Utc> {
DateTime::<Utc>::default()
.checked_add_signed(Duration::milliseconds(value))
Duration::try_milliseconds(value)
.and_then(|d| DateTime::<Utc>::default().checked_add_signed(d))
.unwrap_or(if value < 0 {
DateTime::<Utc>::MIN_UTC
} else {
Expand All @@ -76,7 +76,8 @@ pub fn utc_now() -> DateTime<Utc> {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.expect("valid system time");
let naive = NaiveDateTime::from_timestamp_opt(now.as_secs() as i64, now.subsec_nanos())
.expect("valid timestamp");
let naive = DateTime::from_timestamp(now.as_secs() as i64, now.subsec_nanos())
.expect("valid timestamp")
.naive_utc();
Utc.from_utc_datetime(&naive)
}
8 changes: 4 additions & 4 deletions src/file/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl Writer {
image: &image::DynamicImage,
) -> Result<Array<array_type::Image>, Error> {
let mut bytes = Vec::new();
image.write_to(&mut Cursor::new(&mut bytes), image::ImageOutputFormat::Png)?;
image.write_to(&mut Cursor::new(&mut bytes), image::ImageFormat::Png)?;
self.image_bytes(&bytes)
}

Expand All @@ -53,10 +53,10 @@ impl Writer {
quality: u8,
) -> Result<Array<array_type::Image>, Error> {
let mut bytes = Vec::new();
image.write_to(
image.write_with_encoder(image::codecs::jpeg::JpegEncoder::new_with_quality(
&mut Cursor::new(&mut bytes),
image::ImageOutputFormat::Jpeg(quality.clamp(1, 100)),
)?;
quality.clamp(1, 100),
))?;
self.image_bytes(&bytes)
}
}
6 changes: 3 additions & 3 deletions src/file/zip_container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{

use zip::{
read::{ZipArchive, ZipFile},
write::{FileOptions, ZipWriter},
write::{FullFileOptions, ZipWriter},
};

use crate::{error::Error, FORMAT_NAME};
Expand Down Expand Up @@ -54,8 +54,8 @@ impl Builder {
FileType::Jpeg => format!("{}{JPEG_EXT}", self.id()),
};
self.zip_writer.start_file(
&name,
FileOptions::default()
name.clone(),
FullFileOptions::default()
.large_file(true)
.compression_method(zip::CompressionMethod::Stored),
)?;
Expand Down
8 changes: 4 additions & 4 deletions src/pqarray/array_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,9 @@ mod tests {
#[test]
fn date_overflow() {
let min = NaiveDate::from_parquet(i32::MIN, &None);
assert_eq!(min.to_string(), "-262144-01-01");
assert_eq!(min.to_string(), "-262143-01-01");
let max = NaiveDate::from_parquet(i32::MAX, &None);
assert_eq!(max.to_string(), "+262143-12-31");
assert_eq!(max.to_string(), "+262142-12-31");
}

#[test]
Expand Down Expand Up @@ -281,8 +281,8 @@ mod tests {
#[test]
fn date_time_overflow() {
let min = DateTime::<Utc>::from_parquet(i64::MIN, &DATE_TIME_MILLI);
assert_eq!(min.to_string(), "-262144-01-01 00:00:00 UTC");
assert_eq!(min.to_string(), "-262143-01-01 00:00:00 UTC");
let max = DateTime::<Utc>::from_parquet(i64::MAX, &DATE_TIME_MILLI);
assert_eq!(max.to_string(), "+262143-12-31 23:59:59.999999999 UTC");
assert_eq!(max.to_string(), "+262142-12-31 23:59:59.999999999 UTC");
}
}
16 changes: 9 additions & 7 deletions src/pqarray/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,14 @@ impl Counter {
/// Reads a block of data from a column reader.
fn read_column_chunk<D: DataType>(
column: &mut ColumnReaderImpl<D>,
values: &mut [D::T],
def_levels: Option<&mut [i16]>,
values: &mut Vec<D::T>,
mut def_levels: Option<&mut Vec<i16>>,
) -> Result<(usize, usize), Error> {
let mut max_records = values.len();
if let Some(d) = &def_levels {
max_records = max_records.max(d.len());
values.clear();
let mut max_records = values.capacity();
if let Some(d) = &mut def_levels {
d.clear();
max_records = max_records.max(d.capacity());
}
let (n_val, n_def, _n_rep) = column.read_records(max_records, def_levels, None, values)?;
Ok((n_val, n_def))
Expand Down Expand Up @@ -193,8 +195,8 @@ impl<P: PqArrayType> GroupValues<P> for NullableGroupValues<P> {
len: 0,
index: Counter::new(),
value_index: Counter::new(),
values: vec![Default::default(); CHUNK_SIZE],
def_levels: vec![0; CHUNK_SIZE],
values: Vec::with_capacity(CHUNK_SIZE),
def_levels: Vec::with_capacity(CHUNK_SIZE),
logical_type,
}
}
Expand Down
10 changes: 0 additions & 10 deletions src/pqarray/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ pub trait Source {
fn buffer(&mut self, size: usize) -> usize;
/// Write data from the last `buffer()` call.
fn write(&mut self, row_group: &mut dyn RowGrouper) -> Result<(), ParquetError>;
/// Size hint from the underlying iterator.
fn size_hint(&self) -> (usize, Option<usize>);
}

fn single_type<P: PqArrayType>(name: &str, nullable: bool) -> Type {
Expand Down Expand Up @@ -108,10 +106,6 @@ impl<R: PqArrayRow, I: Iterator<Item = R>> Source for RowSource<R, I> {
fn write(&mut self, row_group: &mut dyn RowGrouper) -> Result<(), ParquetError> {
R::write_buffer(&self.buffer, row_group, &self.def_levels[..self.count])
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}

pub struct NullableRowSource<R: PqArrayRow, I: Iterator<Item = Option<R>>> {
Expand Down Expand Up @@ -182,10 +176,6 @@ impl<R: PqArrayRow, I: Iterator<Item = Option<R>>> Source for NullableRowSource<
&self.def_levels[..self.def_levels.len()],
)
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}

impl<P: PqArrayType, const N: usize> PqArrayRow for [P; N] {
Expand Down
2 changes: 1 addition & 1 deletion src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ impl Visitor for TweakSchema {
// Change references to the generics Array_for_* to just Array.
if let Some(r) = schema.reference.as_mut() {
if r.starts_with("#/definitions/Array_for_") {
*r = "#/definitions/Array".to_owned();
"#/definitions/Array".clone_into(r);
}
}
// Then delegate to default implementation to visit any subschemas.
Expand Down
2 changes: 1 addition & 1 deletion src/version.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub const FORMAT_VERSION_MINOR: u32 = 0;
///
/// This will always be `None` in release versions of the crate. Pre-release formats
/// may contain experimental changes so can't be opened in by release versions.
pub const FORMAT_VERSION_PRERELEASE: Option<&str> = Some("alpha.2");
pub const FORMAT_VERSION_PRERELEASE: Option<&str> = Some("beta.1");

/// Returns a string containing the file format version that this crate produces.
pub fn format_version() -> String {
Expand Down
2 changes: 1 addition & 1 deletion tests/conversion_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ fn convert_omf1() {
assert!(metadata.starts_with(
r#"{
"OMF1 conversion": {
"by": "omf 0.1.0-alpha.2",
"by": "omf 0.1.0-beta.1",
"from": "OMF-v0.9.0",
"on": "#
));
Expand Down

0 comments on commit e3b724c

Please sign in to comment.