From fb073911f4405db16b7f743cf6475f6b360446b3 Mon Sep 17 00:00:00 2001 From: Hendrik Sollich Date: Tue, 13 Aug 2024 21:30:16 +0200 Subject: [PATCH] fix: consider value type when escaping and unescaping this fixes #104 (introduced in 0.16.2) where URL properties got unnecessarily escaped --- src/components/alarm.rs | 2 +- src/components/date_time.rs | 3 +- src/lib.rs | 4 +- src/parser/parsed_string.rs | 9 +++ src/parser/properties.rs | 110 ++++++++++++++++++++++++---- src/properties.rs | 124 +++++++++++--------------------- src/value_types.rs | 139 ++++++++++++++++++++++++++++++++++++ 7 files changed, 291 insertions(+), 100 deletions(-) create mode 100644 src/value_types.rs diff --git a/src/components/alarm.rs b/src/components/alarm.rs index e978789..2f72a85 100644 --- a/src/components/alarm.rs +++ b/src/components/alarm.rs @@ -25,7 +25,7 @@ use super::*; /// When it is time for the Alarm to occur we have to define what is actually supposed to happen. /// The RFC5545 know three different actions, two of which are currently implemented. /// -/// 1. Display +/// 1. Display /// 2. Audio /// 3. Email (not yet implemented) /// diff --git a/src/components/date_time.rs b/src/components/date_time.rs index b467bce..f9cdbe7 100644 --- a/src/components/date_time.rs +++ b/src/components/date_time.rs @@ -1,7 +1,6 @@ -#![allow(dead_code, unused)] use std::str::FromStr; -use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Offset, TimeZone as _, Utc}; +use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, TimeZone as _, Utc}; use crate::{Property, ValueType}; diff --git a/src/lib.rs b/src/lib.rs index 17bcee1..3e953a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,6 +100,7 @@ mod components; #[cfg(feature = "parser")] pub mod parser; mod properties; +mod value_types; pub use crate::{ calendar::{Calendar, CalendarComponent}, @@ -108,7 +109,8 @@ pub use crate::{ date_time::{CalendarDateTime, DatePerhapsTime}, Component, Event, EventLike, Todo, Venue, }, - properties::{Class, EventStatus, Parameter, Property, TodoStatus, ValueType}, + properties::{Class, EventStatus, Parameter, Property, TodoStatus}, + value_types::ValueType, }; #[cfg(feature = "chrono-tz")] diff --git a/src/parser/parsed_string.rs b/src/parser/parsed_string.rs index ade6dbf..f7af4b5 100644 --- a/src/parser/parsed_string.rs +++ b/src/parser/parsed_string.rs @@ -1,5 +1,7 @@ use std::borrow::Cow; +use crate::ValueType; + /// A zero-copy string parsed from an iCal input. #[derive(Debug, Eq, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -26,6 +28,13 @@ impl ParseString<'_> { } impl<'a> ParseString<'a> { + pub fn unescape_by_value_type(self, value_type: ValueType) -> ParseString<'a> { + match value_type { + ValueType::Text => self.unescape_text(), + _ => self, + } + } + pub fn unescape_text(self) -> ParseString<'a> { if self.0.contains(r#"\\"#) || self.0.contains(r#"\,"#) diff --git a/src/parser/properties.rs b/src/parser/properties.rs index d033c40..074b9f1 100644 --- a/src/parser/properties.rs +++ b/src/parser/properties.rs @@ -3,7 +3,7 @@ use std::{ str::FromStr, }; -use crate::{parser::utils::valid_key_sequence_cow, properties::fold_line}; +use crate::{parser::utils::valid_key_sequence_cow, properties::fold_line, value_types::ValueType}; use super::{ parameters::{parameters, Parameter}, @@ -124,7 +124,7 @@ impl FromStr for crate::Property { } #[test] -fn test_property() { +fn properties() { assert_parser!( property, "KEY:VALUE\n", @@ -164,31 +164,97 @@ fn test_property() { params: vec![Parameter::new_ref("foo", Some("bar"))] } ); +} + +#[test] +fn unescape_text() { + assert_eq!( + "DESCRIPTION:https\\://example.com\n" + .parse::() + .map(|p| p.val) + .unwrap(), + "https://example.com" + ); + + assert_parser!( + property, + "DESCRIPTION:https\\://example.com\n", + Property { + name: "DESCRIPTION".into(), + val: "https://example.com".into(), + params: Default::default() + } + ); +} + +#[test] +fn property_escape_url_as_url() { + assert_eq!( + "URL:https://example.com\n" + .parse::() + .map(|p| p.val) + .unwrap(), + "https://example.com" + ); + + assert_parser!( + property, + "URL:https://example.com\n", + Property { + name: "URL".into(), + val: "https://example.com".into(), + params: Default::default() + } + ); +} - // TODO: newlines followed by spaces must be ignored +#[test] +fn property_escape_description_as_text() { assert_parser!( property, - "KEY4;foo=bar:VALUE\\n newline separated\n", + "DESCRIPTION;foo=bar:VALUE\\n newline separated\n", Property { - name: "KEY4".into(), + name: "DESCRIPTION".into(), val: "VALUE\n newline separated".into(), params: vec![Parameter::new_ref("foo", Some("bar"))] } ); +} +#[test] +fn property_escape_by_value_param() { assert_parser!( property, - "KEY3;foo=bar:VALUE\\n newline separated\n", + "FOO;VALUE=TEXT:VALUE\\n newline separated\n", Property { - name: "KEY3".into(), + name: "FOO".into(), val: "VALUE\n newline separated".into(), - params: vec![Parameter::new_ref("foo", Some("bar"))] + params: vec![Parameter::new_ref("VALUE", Some("TEXT"))] + } + ); + + assert_parser!( + property, + "FOO_AS_TEXT;VALUE=TEXT:https\\://example.com\n", + Property { + name: "FOO_AS_TEXT".into(), + val: "https://example.com".into(), + params: vec![Parameter::new_ref("VALUE", Some("TEXT"))] + } + ); + assert_parser!( + property, + "FOO_AS_URL;VALUE=URL:https\\://example.com\n", + Property { + name: "FOO_AS_URL".into(), + val: "https\\://example.com".into(), + params: vec![Parameter::new_ref("VALUE", Some("URL"))] } ); } #[test] -fn test_property_with_dash() { +fn property_with_dash() { assert_parser!( property, "X-HOODIE-KEY:VALUE\n", @@ -318,8 +384,7 @@ pub fn property<'a, E: ParseError<&'a str> + ContextError<&'a str>>( // this is for single line prop parsing, just so I can leave off the '\n' take_while(|_| true), )) - .map(ParseString::from) - .map(ParseString::unescape_text), + .map(ParseString::from), ), ), context( @@ -329,10 +394,25 @@ pub fn property<'a, E: ParseError<&'a str> + ContextError<&'a str>>( )), opt(line_ending), )) - .map(|(((key, params), val), _)| Property { - name: key, - val, - params, + .map(|(((name, params), val), _)| { + let value_type = params + .iter() + .find(|param| param.key == "VALUE") + .and_then(|Parameter { val, .. }| val.as_ref()) + .and_then(|typ| ValueType::from_str(typ.as_str()).ok()) + .or_else(|| ValueType::by_name(name.as_str())); + + if let Some(value_type) = value_type { + eprintln!("escaping for {value_type:?}"); + Property { + name, + val: val.unescape_by_value_type(value_type), + params, + } + } else { + eprintln!("NOT escaping"); + Property { name, val, params } + } })), ) .parse(input) diff --git a/src/properties.rs b/src/properties.rs index 757eb62..6d8b80f 100644 --- a/src/properties.rs +++ b/src/properties.rs @@ -2,8 +2,11 @@ use std::{ collections::HashMap, fmt::{self, Write}, mem, + str::FromStr, }; +use crate::value_types::ValueType; + #[derive(Clone, Debug, PartialEq, Eq)] /// key-value pairs inside of `Property`s pub struct Parameter { @@ -89,7 +92,10 @@ impl Property { /// Returns the `VALUE` parameter, if any is specified. pub fn value_type(&self) -> Option { - ValueType::from_str(&self.params.get("VALUE")?.val) + self.params + .get("VALUE") + .and_then(|p| ValueType::from_str(&p.val).ok()) + .or_else(|| ValueType::by_name(self.key())) } // /// Returns the value as a certain type @@ -171,7 +177,17 @@ impl Property { for Parameter { key, val } in self.params.values() { write!(line, ";{}={}", key, Self::quote_if_contains_colon(val))?; } - write!(line, ":{}", Self::escape_text(&self.val))?; + let value_type = self.value_type(); + eprintln!( + "{}:{} value type of {key} = {value_type:?}", + file!(), + line!(), + key = self.key + ); + match value_type { + Some(ValueType::Text) => write!(line, ":{}", Self::escape_text(&self.val))?, + _ => write!(line, ":{}", self.val)?, + } write_crlf!(out, "{}", fold_line(&line))?; Ok(()) } @@ -224,85 +240,6 @@ impl From for Property { } } -/// see 8.3.4. [Value Data Types Registry](https://tools.ietf.org/html/rfc5545#section-8.3.4) -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum ValueType { - /// [`Binary`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.1) - Binary, - /// [`Boolean`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.2) - Boolean, - /// [`CalAddress`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.3) - CalAddress, - /// [`Date`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.4) - Date, - /// [`DateTime`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.5) - DateTime, - /// [`Duration`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.6) - Duration, - /// [`Float`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.7) - Float, - /// [`Integer`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.8) - Integer, - /// [`Period`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.9) - Period, - /// [`Recur`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.10) - Recur, - /// [`Text`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.11) - Text, - /// [`Time`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.12) - Time, - /// [`Uri`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.13) - Uri, - /// [`UtcOffset`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.14) - UtcOffset, -} - -impl ValueType { - pub(crate) fn from_str(s: &str) -> Option { - match s { - "BINARY" => Some(Self::Binary), - "BOOLEAN" => Some(Self::Boolean), - "CAL-ADDRESS" => Some(Self::CalAddress), - "DATE" => Some(Self::Date), - "DATE-TIME" => Some(Self::DateTime), - "DURATION" => Some(Self::Duration), - "FLOAT" => Some(Self::Float), - "INTEGER" => Some(Self::Integer), - "PERIOD" => Some(Self::Period), - "RECUR" => Some(Self::Recur), - "TEXT" => Some(Self::Text), - "TIME" => Some(Self::Time), - "URI" => Some(Self::Uri), - "UTC-OFFSET" => Some(Self::UtcOffset), - _ => None, - } - } -} - -impl From for Parameter { - fn from(val: ValueType) -> Self { - Parameter { - key: String::from("VALUE"), - val: String::from(match val { - ValueType::Binary => "BINARY", - ValueType::Boolean => "BOOLEAN", - ValueType::CalAddress => "CAL-ADDRESS", - ValueType::Date => "DATE", - ValueType::DateTime => "DATE-TIME", - ValueType::Duration => "DURATION", - ValueType::Float => "FLOAT", - ValueType::Integer => "INTEGER", - ValueType::Period => "PERIOD", - ValueType::Recur => "RECUR", - ValueType::Text => "TEXT", - ValueType::Time => "TIME", - ValueType::Uri => "URI", - ValueType::UtcOffset => "UTC-OFFSET", - }), - } - } -} - #[derive(Copy, Clone, Debug, Eq, PartialEq)] /// Encodes the status of an `Event` /// [RFC 5545, Section 3.8.1.11](https://datatracker.ietf.org/doc/html/rfc5545#section-3.8.1.11) @@ -375,6 +312,31 @@ impl From for Property { } } +// TODO: why do we add this? +impl From for Parameter { + fn from(val: ValueType) -> Self { + Parameter { + key: String::from("VALUE"), + val: String::from(match val { + ValueType::Binary => "BINARY", + ValueType::Boolean => "BOOLEAN", + ValueType::CalAddress => "CAL-ADDRESS", + ValueType::Date => "DATE", + ValueType::DateTime => "DATE-TIME", + ValueType::Duration => "DURATION", + ValueType::Float => "FLOAT", + ValueType::Integer => "INTEGER", + ValueType::Period => "PERIOD", + ValueType::Recur => "RECUR", + ValueType::Text => "TEXT", + ValueType::Time => "TIME", + ValueType::Uri => "URI", + ValueType::UtcOffset => "UTC-OFFSET", + }), + } + } +} + impl From for Property { fn from(val: TodoStatus) -> Self { Property::new_pre_alloc( diff --git a/src/value_types.rs b/src/value_types.rs new file mode 100644 index 0000000..5ebf962 --- /dev/null +++ b/src/value_types.rs @@ -0,0 +1,139 @@ +use std::str::FromStr; + +use crate::Parameter; + +/// see 8.3.4. [Value Data Types Registry](https://tools.ietf.org/html/rfc5545#section-8.3.4) +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ValueType { + /// [`Binary`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.1) + Binary, + /// [`Boolean`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.2) + Boolean, + /// [`CalAddress`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.3) + CalAddress, + /// [`Date`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.4) + Date, + /// [`DateTime`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.5) + DateTime, + /// [`Duration`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.6) + Duration, + /// [`Float`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.7) + Float, + /// [`Integer`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.8) + Integer, + /// [`Period`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.9) + Period, + /// [`Recur`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.10) + Recur, + /// [`Text`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.11) + Text, + /// [`Time`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.12) + Time, + /// [`Uri`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.13) + Uri, + /// [`UtcOffset`](https://datatracker.ietf.org/doc/html/rfc5545#section-3.3.14) + UtcOffset, +} + +impl ValueType { + pub(crate) fn by_name(name: &str) -> Option { + if name.chars().any(|c| c.is_lowercase()) { + eprintln!("property_name must be uppercase"); + return None; + } + use ValueType::*; + let value_type = match name { + // 3.7.0 calendar properties + "CALSCALE" => Some(Text), // 3.7.1 + "METHOD" => Some(Text), // 3.7.2 + "PRODID" => Some(Text), // 3.7.3 + "VERSION" => Some(Text), //3.7.4 + + // 3.8.0 component properties + "ATTACH" => Some(Uri), // or BINARY // 3.8.1.1 + "CATEGORIES" => Some(Text), // 3.8.1.2 + "CLASS" => Some(Text), // 3.8.1.3 + "COMMENT" => Some(Text), // 3.8.1.4 + "DESCRIPTION" => Some(Text), // 3.8.1.5 + "GEO" => Some(Float), // 3.8.1.6 + "LOCATION" => Some(Text), // 3.8.1.7 + "PERCENT-COMPLETE" => Some(Integer), // 3.8.1.8 + "PRIORITY" => Some(Integer), // 3.8.1.9 + "RESOURCES" => Some(Text), // 3.8.1.10 + "STATUS" => Some(Text), // 3.8.1.11 + "SUMMARY" => Some(Text), // 3.8.8.1.12 + "COMPLETED" => Some(DateTime), // 3.8.2.1 + "DTEND" => Some(DateTime), // or DATE // 3.8.2.2 + "DUE" => Some(DateTime), // or DATE // 3.8.2.3 + "DTSTART" => Some(DateTime), // or DATE 3.8.2.4 + "DURATION" => Some(Duration), // 3.8.2.5 + "FREEBUSY" => Some(Period), // 3.8.2.6 + "TRANSP" => Some(Text), // 3.8.2.7 + "TZID" => Some(Text), // 3.8.3.1 + "TZNAME" => Some(Text), // 3.8.3.2 + "TZOFFSETFROM" => Some(UtcOffset), // + "TZOFFSETTO" => Some(UtcOffset), // + "TZURL" => Some(Uri), // + "ATTENDEE" => Some(CalAddress), // 3.8.4.1 + "CONTACT" => Some(Text), // 3.8.4.2 + "ORGANIZER" => Some(CalAddress), // 3.8.4.3 + "RECURRENCE-ID" => Some(DateTime), // 3.8.4.4 + "RELATED-TO" => Some(Text), // + "URL" => Some(Uri), // + "UID" => Some(Text), // + "EXDATE" => Some(DateTime), // or DATE-TIME // + "RDATE" => Some(DateTime), // or PERIOD // or DATE-TIME // or DATE // + "RRULE" => Some(Recur), // + "ACTION" => Some(Text), // + "REPEAT" => Some(Integer), // + "TRIGGER" => Some(Duration), // or DATE-TIME (must be UTC) // 3.8.6.3 + "CREATED" => Some(DateTime), // 3.8.7.1 + "DTSTAMP" => Some(DateTime), // 3.8.7.2 + "LAST-MODIFIED" => Some(DateTime), // 3.8.7.3 + "SEQUENCE" => Some(Integer), // 3.8.7.4 + // 3.8.8.1 + // "An IANA-registered property name" => Any parameter can be specified on this property. + // "X-"/* ... */ => Some(Text), // any type 3.8.8.2 + "REQUEST-STATUS" => Some(Text), // 3.8.8.3 + _ => None, + }; + eprintln!("ValueType {value_type:?} by_name({name:?})"); + return value_type; + } +} + +impl TryFrom for ValueType { + type Error = (); + + fn try_from(par: Parameter) -> Result { + if par.key() != "VALUE" { + Err(()) + } else { + FromStr::from_str(par.value()) + } + } +} + +impl FromStr for ValueType { + type Err = (); + + fn from_str(val: &str) -> Result { + match val { + "BINARY" => Ok(Self::Binary), + "BOOLEAN" => Ok(Self::Boolean), + "CAL-ADDRESS" => Ok(Self::CalAddress), + "DATE" => Ok(Self::Date), + "DATE-TIME" => Ok(Self::DateTime), + "DURATION" => Ok(Self::Duration), + "FLOAT" => Ok(Self::Float), + "INTEGER" => Ok(Self::Integer), + "PERIOD" => Ok(Self::Period), + "RECUR" => Ok(Self::Recur), + "TEXT" => Ok(Self::Text), + "TIME" => Ok(Self::Time), + "URI" => Ok(Self::Uri), + "UTC-OFFSET" => Ok(Self::UtcOffset), + _ => Err(()), + } + } +}