diff --git a/web/http/src/request.rs b/web/http/src/request.rs index 12e9062b..f5a0eeb9 100644 --- a/web/http/src/request.rs +++ b/web/http/src/request.rs @@ -141,8 +141,7 @@ impl Request { Host::Domain(host) | Host::OpaqueHost(host) => dns::Domain::new(host.as_str()) .lookup() .map_err(HTTPError::DNS)?, - Host::IPv4(_ip) => todo!(), - Host::IPv6(_ip) => todo!(), + Host::Ip(_ip) => todo!(), Host::EmptyHost => todo!(), }; diff --git a/web/url/src/host.rs b/web/url/src/host.rs index e913bbec..7e61433d 100644 --- a/web/url/src/host.rs +++ b/web/url/src/host.rs @@ -1,10 +1,11 @@ -use std::str::FromStr; +use std::net; use sl_std::{ascii, punycode}; use crate::{ + ip::{ipv4_parse, ipv6_parse}, percent_encode::{is_c0_control, percent_encode}, - util, IPParseError, Ipv4Address, Ipv6Address, ValidationError, ValidationErrorHandler, + util, IPParseError, ValidationError, ValidationErrorHandler, }; /// @@ -45,8 +46,7 @@ fn is_forbidden_domain_code_point(c: ascii::Char) -> bool { #[derive(PartialEq, Clone, Debug)] pub enum Host { Domain(ascii::String), - IPv4(Ipv4Address), - IPv6(Ipv6Address), + Ip(net::IpAddr), OpaqueHost(ascii::String), EmptyHost, } @@ -63,13 +63,13 @@ impl ToString for Host { // fn to_string(&self) -> String { match self { - Self::IPv4(ipv4) => { + Self::Ip(net::IpAddr::V4(ipv4)) => { // 1. If host is an IPv4 address, return the result of running the IPv4 serializer on host. ipv4.to_string() }, - Self::IPv6(ipv6) => { + Self::Ip(net::IpAddr::V6(ipv6)) => { // 2. Otherwise, if host is an IPv6 address, return U+005B ([), followed by the result of running the IPv6 serializer on host, followed by U+005D (]). - format!("[{}]", ipv6.to_string()) + format!("[{ipv6}]") }, Self::Domain(host) | Self::OpaqueHost(host) => { // 3. Otherwise, host is a domain, opaque host, or empty host, return host. @@ -105,8 +105,9 @@ where // Return the result of IPv6 parsing input with its leading U+005B ([) and trailing U+005D (]) removed. let ipv6_text = &input[1..input.len() - 1]; - let parsed_ip = Host::IPv6(Ipv6Address::from_str(ipv6_text).map_err(HostParseError::IP)?); - return Ok(parsed_ip); + let ipv6 = ipv6_parse(ipv6_text).map_err(HostParseError::IP)?; + let host = Host::Ip(net::IpAddr::V6(ipv6)); + return Ok(host); } // If isNotSpecial is true @@ -150,9 +151,8 @@ where .is_some_and(|&c| ascii::Char::Digit0 <= c && c <= ascii::Char::Digit9) { // then return the result of IPv4 parsing asciiDomain. - return Ok(Host::IPv4( - Ipv4Address::from_str(input).map_err(HostParseError::IP)?, - )); + let ipv4 = ipv4_parse(input).map_err(HostParseError::IP)?; + return Ok(Host::Ip(net::IpAddr::V4(ipv4))); } // Return asciiDomain. diff --git a/web/url/src/ip.rs b/web/url/src/ip.rs index 8e4f4a70..5ddea403 100644 --- a/web/url/src/ip.rs +++ b/web/url/src/ip.rs @@ -1,12 +1,4 @@ -use std::str::FromStr; - -/// -#[derive(PartialEq, Clone, Copy, Debug)] -pub struct Ipv4Address(u32); - -/// -#[derive(PartialEq, Clone, Copy, Debug)] -pub struct Ipv6Address([u16; 8]); +use std::net; #[derive(Clone, Copy, Debug)] pub enum IPParseError { @@ -21,173 +13,8 @@ pub enum IPParseError { Generic, } -impl FromStr for Ipv4Address { - type Err = IPParseError; - fn from_str(input: &str) -> Result { - Ok(Self(ipv4_parse(input)?)) - } -} - -impl FromStr for Ipv6Address { - type Err = IPParseError; - - fn from_str(input: &str) -> Result { - Ok(Self(ipv6_parse(input)?)) - } -} - -impl ToString for Ipv4Address { - fn to_string(&self) -> String { - // 1. Let output be the empty string. - let mut octets = [0; 4]; - - // 2. Let n be the value of address. - let mut n = self.0; - - // 3. For each i in the range 1 to 4, inclusive: - for i in 0..4 { - // 1. Prepend n % 256, serialized, to output. - octets[i] = n % 256; - // 2. If i is not 4, then prepend U+002E (.) to output. - // NOTE: the actual serialization happens later in the code - - // 3. Set n to floor(n / 256). - n /= 256; - } - - // 4 Return output. - format!("{}.{}.{}.{}", octets[0], octets[1], octets[2], octets[3]) - } -} - -impl ToString for Ipv6Address { - fn to_string(&self) -> String { - // 1. Let output be the empty string. - let mut output = String::new(); - - // 2. Let compress be an index to the first IPv6 piece in the first longest sequences of address’s IPv6 pieces that are 0. - let mut longest_sequence_length = 0; - let mut longest_sequence_start = 0; - let mut current_sequence_length = 0; - let mut current_sequence_start = 0; - - for (index, &piece) in self.0.iter().enumerate() { - if piece == 0 { - if current_sequence_length == 0 { - current_sequence_start = index; - } - - current_sequence_length += 1; - if current_sequence_length > longest_sequence_length { - longest_sequence_length = current_sequence_length; - longest_sequence_start = current_sequence_start; - } - } else { - current_sequence_length = 0; - } - } - - // 3. If there is no sequence of address’s IPv6 pieces that are 0 that is longer than 1, then set compress to null. - let compress = if longest_sequence_length == 1 { - 0 - } else { - longest_sequence_start - }; - - // 4. Let ignore0 be false. - let mut ignore0 = false; - - // 5. For each pieceIndex in the range 0 to 7, inclusive: - for piece_index in 0..8 { - // 1. If ignore0 is true and address[pieceIndex] is 0, then continue. - if ignore0 && self.0[piece_index] == 0 { - continue; - } - - // 2. Otherwise, if ignore0 is true, set ignore0 to false. - ignore0 = false; - - // 3. If compress is pieceIndex, then: - if compress == piece_index { - // 1. Let separator be "::" if pieceIndex is 0, and U+003A (:) otherwise. - let seperator = if piece_index == 0 { "::" } else { ":" }; - - // 2. Append separator to output. - output.push_str(seperator); - - // 3. Set ignore0 to true and continue. - ignore0 = true; - continue; - } - - // 4. Append address[pieceIndex], represented as the shortest possible lowercase hexadecimal number, to output. - output.push_str(&format!("{:x}", self.0[piece_index])); - - // If pieceIndex is not 7, then append U+003A (:) to output. - output.push(':') - } - - // 6. Return output. - output - } -} - -/// -fn ipv4_number_parse(mut input: &str) -> Result<(u32, bool), IPParseError> { - // If input is the empty string, - if input.is_empty() { - // then return failure - return Err(IPParseError::Empty); - } - - // Let validationError be false. - let mut validation_error = false; - - // Let R be 10. - let mut radix = 10; - - // If input contains at least two code points - // and the first two code points are either "0X" or "0x", then: - if 2 <= input.len() && (input.starts_with("0x") || input.starts_with("0X")) { - // Set validationError to true. - validation_error = true; - - // Remove the first two code points from input. - input = &input[2..]; - - radix = 16; - } - // Otherwise, if input contains at least two code points - // and the first code point is U+0030 (0), then: - else if 2 <= input.len() && input.starts_with('0') { - // Set validationError to true. - validation_error = true; - - // Remove the first code point from input. - input = &input[1..]; - - // Set R to 8. - radix = 8; - } - - // If input is the empty string, then return (0, true). - if input.is_empty() { - return Ok((0, true)); - } - - // If input contains a code point that is not a radix-R digit, - // NOTE: rust takes care of that - - // Let output be the mathematical integer value that is represented by input - // in radix-R notation, using ASCII hex digits for digits with values 0 through 15. - let output = u32::from_str_radix(input, radix).map_err(|_| IPParseError::InvalidDigit)?; - - // Return (output, validationError). - Ok((output, validation_error)) -} - /// -fn ipv4_parse(input: &str) -> Result { +pub(crate) fn ipv4_parse(input: &str) -> Result { // Let validationError be false. let mut validation_error = false; @@ -213,10 +40,10 @@ fn ipv4_parse(input: &str) -> Result { } // Let numbers be an empty list. - let mut numbers = vec![]; + let mut numbers = [0; 4]; // For each part of parts: - for part in parts { + for (index, part) in parts.iter().enumerate() { // Let result be the result of parsing part. // If result is failure, // validation error, return failure. @@ -229,7 +56,7 @@ fn ipv4_parse(input: &str) -> Result { } // Append result[0] to numbers. - numbers.push(result.0); + numbers[index] = result.0; } // If validationError is true, @@ -259,14 +86,14 @@ fn ipv4_parse(input: &str) -> Result { // Let ipv4 be the last item in numbers. // Remove the last item from numbers. - let mut ipv4 = numbers.pop().expect("numbers must not be empty"); + let mut ipv4 = numbers[3]; // Let counter be 0. let mut counter = 0; // For each n of numbers: #[allow(clippy::explicit_counter_loop)] // Let's follow the spec comments - for n in numbers { + for n in numbers.iter().take(3) { // Increment ipv4 by n × 256^(3 − counter). ipv4 += n * 256_u32.pow(3 - counter); @@ -275,11 +102,65 @@ fn ipv4_parse(input: &str) -> Result { } // Return ipv4. - Ok(ipv4) + Ok(net::Ipv4Addr::from_bits(ipv4)) +} + +/// +fn ipv4_number_parse(mut input: &str) -> Result<(u32, bool), IPParseError> { + // If input is the empty string, + if input.is_empty() { + // then return failure + return Err(IPParseError::Empty); + } + + // Let validationError be false. + let mut validation_error = false; + + // Let R be 10. + let mut radix = 10; + + // If input contains at least two code points + // and the first two code points are either "0X" or "0x", then: + if 2 <= input.len() && (input.starts_with("0x") || input.starts_with("0X")) { + // Set validationError to true. + validation_error = true; + + // Remove the first two code points from input. + input = &input[2..]; + + radix = 16; + } + // Otherwise, if input contains at least two code points + // and the first code point is U+0030 (0), then: + else if 2 <= input.len() && input.starts_with('0') { + // Set validationError to true. + validation_error = true; + + // Remove the first code point from input. + input = &input[1..]; + + // Set R to 8. + radix = 8; + } + + // If input is the empty string, then return (0, true). + if input.is_empty() { + return Ok((0, true)); + } + + // If input contains a code point that is not a radix-R digit, + // NOTE: rust takes care of that + + // Let output be the mathematical integer value that is represented by input + // in radix-R notation, using ASCII hex digits for digits with values 0 through 15. + let output = u32::from_str_radix(input, radix).map_err(|_| IPParseError::InvalidDigit)?; + + // Return (output, validationError). + Ok((output, validation_error)) } /// -fn ipv6_parse(input: &str) -> Result<[u16; 8], IPParseError> { +pub(crate) fn ipv6_parse(input: &str) -> Result { // Let address be a new IPv6 address whose IPv6 pieces are all 0. let mut address = [0_u16; 8]; @@ -520,5 +401,32 @@ fn ipv6_parse(input: &str) -> Result<[u16; 8], IPParseError> { } // Return address. - Ok(address) + Ok(net::Ipv6Addr::new( + address[0], address[1], address[2], address[3], address[4], address[5], address[6], + address[7], + )) +} + +#[cfg(test)] +mod tests { + use std::net; + + use super::{ipv4_parse, ipv6_parse}; + + #[test] + fn test_ipv4_parse() { + assert_eq!(ipv4_parse("127.0.0.1").unwrap(), net::Ipv4Addr::LOCALHOST); + + // Test parsing with hex numbers + // This is explicitly forbidden in https://datatracker.ietf.org/doc/html/rfc6943#section-3.1.1 + // but the URL specification allows for it, so we should too. + let with_hex = net::Ipv4Addr::new(255, 1, 2, 3); + assert_eq!(ipv4_parse("0xff.1.0x2.3").unwrap(), with_hex); + } + + #[test] + fn test_ipv6_parse() { + let ipv6 = net::Ipv6Addr::new(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq!(ipv6_parse("0.1.2.3.4.5.6.7").unwrap(), ipv6); + } } diff --git a/web/url/src/lib.rs b/web/url/src/lib.rs index da7f2b80..07b890e9 100644 --- a/web/url/src/lib.rs +++ b/web/url/src/lib.rs @@ -14,7 +14,8 @@ let_chains, option_get_or_insert_default, ascii_char, - ascii_char_variants + ascii_char_variants, + ip_bits )] mod host; @@ -25,7 +26,7 @@ mod url; mod util; mod validation_error; -pub use crate::ip::{IPParseError, Ipv4Address, Ipv6Address}; +pub use crate::ip::IPParseError; pub use crate::parser::*; pub use crate::url::*; pub use host::Host;