From 41408a887b571d24483cdae9416642171abfe5f4 Mon Sep 17 00:00:00 2001 From: mochi-neko Date: Mon, 19 Feb 2024 20:20:51 +0900 Subject: [PATCH] Improve rules of srt parser --- src/lib.rs | 1 - src/srt.rs | 122 +++++++++++++++++++++++++++++++++++++- src/str_parser.rs | 84 +++++++++++--------------- {src => tests}/general.rs | 2 +- 4 files changed, 155 insertions(+), 54 deletions(-) rename {src => tests}/general.rs (99%) diff --git a/src/lib.rs b/src/lib.rs index 2a92390..d484114 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,6 @@ pub mod vtt; // Internal modules. mod error; -mod general; mod result; mod str_parser; mod vtt_parser; diff --git a/src/srt.rs b/src/srt.rs index 227f929..10828b1 100644 --- a/src/srt.rs +++ b/src/srt.rs @@ -64,6 +64,7 @@ use std::cmp::Ordering; use std::fmt::{Display, Formatter}; use std::ops::{Add, Sub}; +use std::time::Duration; use crate::str_parser; use crate::ParseResult; @@ -269,7 +270,7 @@ pub struct SrtSubtitle { pub start: SrtTimestamp, /// The end timestamp. pub end: SrtTimestamp, - /// The text. + /// The subtitle text. pub text: Vec, } @@ -356,6 +357,11 @@ impl Display for SrtSubtitle { /// seconds: 1, /// ..Default::default() /// }; +/// +/// assert_eq!( +/// timestamp.to_string(), +/// "00:00:01,000".to_string() +/// ); /// ``` #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct SrtTimestamp { @@ -466,6 +472,35 @@ impl Sub for SrtTimestamp { } } +impl From for SrtTimestamp { + fn from(duration: Duration) -> Self { + let seconds = duration.as_secs(); + let milliseconds = duration.subsec_millis() as u16; + + let hours = (seconds / 3600) as u8; + let minutes = ((seconds % 3600) / 60) as u8; + let seconds = (seconds % 60) as u8; + + Self { + hours, + minutes, + seconds, + milliseconds, + } + } +} + +impl Into for SrtTimestamp { + fn into(self) -> Duration { + Duration::new( + u64::from(self.hours) * 3600 + + u64::from(self.minutes) * 60 + + u64::from(self.seconds), + self.milliseconds as u32 * 1_000_000, + ) + } +} + #[cfg(test)] mod tests { use super::*; @@ -765,4 +800,89 @@ This is a test. }; assert!(subtitle1 < subtitle2); } + + #[test] + fn display_timestamp() { + let timestamp = SrtTimestamp { + hours: 0, + minutes: 0, + seconds: 1, + milliseconds: 0, + }; + let displayed = format!("{}", timestamp); + let expected = "00:00:01,000"; + assert_eq!(displayed, expected); + } + + #[test] + fn from_duration_to_timestamp() { + let duration = Duration::new(1, 0); + let timestamp: SrtTimestamp = duration.into(); + assert_eq!( + timestamp, + SrtTimestamp { + hours: 0, + minutes: 0, + seconds: 1, + milliseconds: 0, + } + ); + + let duration = Duration::new(3661, 0); + let timestamp: SrtTimestamp = duration.into(); + assert_eq!( + timestamp, + SrtTimestamp { + hours: 1, + minutes: 1, + seconds: 1, + milliseconds: 0, + } + ); + + let duration = Duration::new(3661, 500 * 1_000_000); + let timestamp: SrtTimestamp = duration.into(); + assert_eq!( + timestamp, + SrtTimestamp { + hours: 1, + minutes: 1, + seconds: 1, + milliseconds: 500, + } + ); + } + + #[test] + fn from_timestamp_to_duration() { + let timestamp = SrtTimestamp { + hours: 0, + minutes: 0, + seconds: 1, + milliseconds: 0, + }; + let duration: Duration = timestamp.into(); + assert_eq!(duration, Duration::new(1, 0)); + + let timestamp = SrtTimestamp { + hours: 1, + minutes: 1, + seconds: 1, + milliseconds: 0, + }; + let duration: Duration = timestamp.into(); + assert_eq!(duration, Duration::new(3661, 0)); + + let timestamp = SrtTimestamp { + hours: 1, + minutes: 1, + seconds: 1, + milliseconds: 500, + }; + let duration: Duration = timestamp.into(); + assert_eq!( + duration, + Duration::new(3661, 500 * 1_000_000) + ); + } } diff --git a/src/str_parser.rs b/src/str_parser.rs index 2eb02a7..06d5321 100644 --- a/src/str_parser.rs +++ b/src/str_parser.rs @@ -12,65 +12,38 @@ peg::parser! { /// Whitespace. rule whitespace() = [' ' | '\t'] - /// Zero or more whitespaces. - pub(crate) rule whitespaces() = quiet!{ whitespace()* } - - /// One or more whitespaces. - pub(crate) rule some_whitespaces() = whitespace()+ - /// Newline. - pub(crate) rule newline() = "\r\n" / "\n" / "\r" - - /// Zero or more newlines. - pub(crate) rule newlines() = quiet!{ newline()* } - - /// One or more newlines. - pub(crate) rule some_newlines() = newline()+ - - /// Whitespace or newline. - pub(crate) rule whitespace_or_newline() = [' ' | '\t' | '\r' | '\n'] - - /// Zero or more whitespaces or newlines. - pub(crate) rule whitespaces_or_newlines() = quiet!{ whitespace_or_newline()* } + rule newline() = "\r\n" / "\n" / "\r" - /// One or more whitespaces or one newline. - pub(crate) rule some_whitespaces_or_newline() = some_whitespaces() / newline() - - /// One or more whitespaces or newlines. - pub(crate) rule some_whitespaces_or_newlines() = whitespace_or_newline()+ + /// Whitespaces and/or newline without two or more newlines. + rule separator() = !(newline() newline()) (whitespace() / newline())+ /// Any-digit number. - pub(crate) rule number() -> u32 + rule number() -> u32 = n:$(['0'..='9']+) {? - n.parse().or(Err("number")) + n.parse().or(Err("number in u32")) } /// Two-digit number. - pub(crate) rule two_number() -> u8 + rule two_number() -> u8 = n:$(['0'..='9']['0'..='9']) {? n.parse().or(Err("two-digit number")) } /// Three-digit number. - pub(crate) rule three_number() -> u16 + rule three_number() -> u16 = n:$(['0'..='9']['0'..='9']['0'..='9']) {? n.parse().or(Err("three-digit number")) } /// Multiple lines block of text. - pub(crate) rule multiline() -> Vec - = !whitespace_or_newline() lines:$((!newline() [_])+ newline()) ** () - {? - let lines = lines + rule multiline() -> Vec + = !((whitespace() / newline())+) lines:$((!newline() [_])+ newline()) ++ () + { + lines .iter() .map(|l| l.to_string().trim().to_string()) - .collect::>(); - - if !lines.is_empty() { - Ok(lines) - } else { - Err("Empty multiline") - } + .collect::>() } /// Timestamp. @@ -87,18 +60,18 @@ peg::parser! { /// Single subtitle entry. pub(crate) rule subtitle() -> SrtSubtitle - = whitespaces() sequence:number() whitespaces() newline() - whitespaces() start:timestamp() whitespaces() "-->" whitespaces() end:timestamp() whitespaces() newline() - whitespaces() text:multiline() + = sequence:number() separator() + start:timestamp() whitespace()* "-->" whitespace()* end:timestamp() separator() + text:multiline() { SrtSubtitle { sequence, start, end, text } } /// The entire SRT. pub(crate) rule srt() -> SubRip - = whitespaces_or_newlines() - subtitles:subtitle() ** some_whitespaces_or_newlines() - whitespaces_or_newlines() + = (whitespace() / newline())* + subtitles:subtitle() ** (newline()+) + (whitespace() / newline())* { SubRip { subtitles, } } @@ -211,7 +184,7 @@ mod test { // Allow whitespaces. assert_eq!( srt_parser::subtitle( - " 1 \n 00:00:00,000 --> 00:00:01,000 \n \tHello, world! \n" + "1 \n 00:00:00,000 --> 00:00:01,000 \n \tHello, world! \n" ) .unwrap(), subtitle @@ -226,16 +199,20 @@ mod test { subtitle ); + // Allow separator with whitespaces. + assert_eq!( + srt_parser::subtitle( + "1 00:00:00,000 --> 00:00:01,000 Hello, world!\n" + ) + .unwrap(), + subtitle + ); + // Prohibit spaces or new lines in header. assert!(srt_parser::subtitle( "\n1\n00:00:00,000 --> 00:00:01,000\nHello, world!\n" ) .is_err()); - // Must be separated by newlines. - assert!(srt_parser::subtitle( - "1 00:00:00,000 --> 00:00:01,000 Hello, world!\n" - ) - .is_err()); // Prohibit two or more newlines. assert!(srt_parser::subtitle( "1\n\n00:00:00,000 --> 00:00:01,000\nHello, world!\n" @@ -249,6 +226,11 @@ mod test { "1\n00:00:00,000 --> 00:00:01,000\nHello, world!\n\n" ) .is_err()); + // Prohibit empty text. + assert!( + srt_parser::subtitle("1\n00:00:00,000 --> 00:00:01,000\n\n") + .is_err() + ); } #[test] diff --git a/src/general.rs b/tests/general.rs similarity index 99% rename from src/general.rs rename to tests/general.rs index 9a7c94b..e4e413c 100644 --- a/src/general.rs +++ b/tests/general.rs @@ -33,7 +33,7 @@ peg::parser! { /// Any-digit number. pub(crate) rule number() -> u32 = n:$(['0'..='9']+) {? - n.parse().or(Err("number")) + n.parse().or(Err("number in u32")) } /// Signed integer.