Skip to content

Commit

Permalink
Merge pull request #2 from opcodesio/refactor
Browse files Browse the repository at this point in the history
big refactor to better handle headers and content separation
  • Loading branch information
arukompas authored Dec 31, 2024
2 parents 639ef31 + e5b2281 commit 5725f66
Show file tree
Hide file tree
Showing 6 changed files with 316 additions and 156 deletions.
42 changes: 42 additions & 0 deletions src/HasHeaders.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?php

namespace Opcodes\MailParser;

trait HasHeaders
{
protected array $headers = [];

public function getHeaders(): array
{
return $this->headers;
}

public function getHeader(string $header, $default = null): mixed
{
$header = strtolower($header);

foreach ($this->headers as $key => $value) {
if (strtolower($key) === $header) {
return $value;
}
}

return $default;
}

public function setHeader(string $header, $value): void
{
$this->headers[$header] = Utils::decodeHeader($value);
}

public function removeHeader(string $header): void
{
$header = strtolower($header);

foreach ($this->headers as $key => $value) {
if (strtolower($key) === $header) {
unset($this->headers[$key]);
}
}
}
}
183 changes: 46 additions & 137 deletions src/Message.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

class Message implements \JsonSerializable
{
protected string $message;
use HasHeaders;

protected string $boundary;
protected string $rawMessage;

protected array $headers = [];
protected string $boundary;

/**
* @var MessagePart[]
Expand All @@ -17,7 +17,8 @@ class Message implements \JsonSerializable

public function __construct(string $message)
{
$this->message = $message;
$this->rawMessage = Utils::cleanUntilFirstHeader($message);
$this->rawMessage = Utils::normaliseLineEndings($this->rawMessage, true);

$this->parse();
}
Expand All @@ -32,27 +33,9 @@ public static function fromFile($path): self
return new self(file_get_contents($path));
}

public function getBoundary(): string
{
return $this->boundary;
}

public function getHeaders(): array
{
return $this->headers;
}

public function getHeader(string $header, $default = null): ?string
public function getBoundary(): ?string
{
$header = strtolower($header);

foreach ($this->headers as $key => $value) {
if (strtolower($key) === $header) {
return $value;
}
}

return $default;
return $this->boundary ?? null;
}

public function getContentType(): string
Expand Down Expand Up @@ -102,7 +85,7 @@ public function getParts(): array

public function getHtmlPart(): ?MessagePart
{
foreach ($this->parts as $part) {
foreach ($this->getParts() as $part) {
if ($part->isHtml()) {
return $part;
}
Expand All @@ -113,7 +96,7 @@ public function getHtmlPart(): ?MessagePart

public function getTextPart(): ?MessagePart
{
foreach ($this->parts as $part) {
foreach ($this->getParts() as $part) {
if ($part->isText()) {
return $part;
}
Expand All @@ -132,7 +115,7 @@ public function getAttachments(): array

public function getSize(): int
{
return strlen($this->message);
return strlen($this->rawMessage);
}

public function toArray(): array
Expand All @@ -154,131 +137,57 @@ public function jsonSerialize(): mixed
return $this->toArray();
}

/**
* Parse the email message into headers and body parts.
*/
protected function parse(): void
{
$lines = explode("\n", $this->message);
$headerInProgress = null;

$collectingBody = false;
$currentBody = '';
$currentBodyHeaders = [];
$currentBodyHeaderInProgress = null;

foreach ($lines as $line) {
$line = rtrim($line, "\r\n ");

if ($headerInProgress) {
$this->headers[$headerInProgress] .= PHP_EOL . $line;
$headerInProgress = str_ends_with($this->headers[$headerInProgress], ';');
continue;
}

if ($currentBodyHeaderInProgress) {
$currentBodyHeaders[$currentBodyHeaderInProgress] .= PHP_EOL . $line;
$currentBodyHeaderInProgress = str_ends_with($currentBodyHeaders[$currentBodyHeaderInProgress], ';');
continue;
}

if (isset($this->boundary) && str_ends_with($line, '--'.$this->boundary.'--')) {
$line = str_replace('--'.$this->boundary.'--', '', $line);
$currentBody .= $line;
// We've reached the end of the message
break;
}

if (isset($this->boundary) && str_ends_with($line, '--'.$this->boundary)) {
$line = str_replace('--'.$this->boundary, '', $line);
// Split email into headers and body
[$rawHeaders, $body] = explode("\r\n\r\n", $this->rawMessage, 2);

if ($collectingBody) {
// We've reached the end of a part, add it and reset the variables
$this->addPart($currentBody . $line, $currentBodyHeaders);
}
// Parse top-level headers
$this->headers = Utils::parseHeaders($rawHeaders);
$this->headers = Utils::decodeHeaders($this->headers);

$collectingBody = true;
$currentBody = '';
$currentBodyHeaders = [];
continue;
}

if ($collectingBody && preg_match('/^(?<key>[A-Za-z\-0-9]+): (?<value>.*)$/', $line, $matches)) {
$currentBodyHeaders[$matches['key']] = $matches['value'];

// if the last character is a semicolon, then the header is continued on the next line
if (str_ends_with($currentBodyHeaders[$matches['key']], ';')) {
$currentBodyHeaderInProgress = $matches['key'];
}

continue;
}

if ($collectingBody) {
$currentBody .= $line . PHP_EOL;
continue;
}
// Get boundary if this is a multipart email
$contentType = $this->getHeader('Content-Type');
if ($contentType && preg_match('/boundary="?([^";\r\n]+)"?/', $contentType, $matches)) {
$this->boundary = $matches[1];
}

if (preg_match("/^Content-Type: (?<contenttype>multipart\/.*); boundary=(?<boundary>.*)$/", $line, $matches)) {
$this->headers['Content-Type'] = $matches['contenttype']."; boundary=".$matches['boundary'];
$this->boundary = trim($matches['boundary'], '"');
continue;
if (!isset($this->boundary) && str_contains($contentType, 'multipart/')) {
// multipart email, perhaps the boundary is corrupted in the header.
// Let's attempt to find a boundary in the body.
if (preg_match("~^--(?<boundary>[0-9A-Za-z'()+_,-./:=?]{0,68}[0-9A-Za-z'()+_,-./=?])~", $body, $matches)) {
$this->boundary = trim($matches['boundary']);
}
}

if (preg_match('/^(?<key>[A-Za-z\-0-9]+): (?<value>.*)$/', $line, $matches)) {
if (strtolower($matches['key']) === 'content-type' && !isset($this->boundary) && !str_contains($matches['value'], 'multipart/mixed')) {
// this might be a single-part message. Let's start collecting the body.
$collectingBody = true;
$currentBody = '';
$currentBodyHeaders = [
$matches['key'] => $matches['value'],
];

if (str_ends_with($currentBodyHeaders[$matches['key']], ';')) {
$currentBodyHeaderInProgress = $matches['key'];
}

continue;
}

$this->headers[$matches['key']] = $matches['value'];

// if the last character is a semicolon, then the header is continued on the next line
if (str_ends_with($this->headers[$matches['key']], ';')) {
$headerInProgress = $matches['key'];
}

continue;
// If no boundary, treat the entire body as a single part
if (!isset($this->boundary)) {
$part = $this->addPart($body ?? '');
if ($contentType = $this->getHeader('Content-Type')) {
$part->setHeader('Content-Type', $contentType);
}

if (preg_match("~^--(?<boundary>[0-9A-Za-z'()+_,-./:=?]{0,68}[0-9A-Za-z'()+_,-./=?])~", $line, $matches)) {
$this->boundary = trim($matches['boundary']);
$collectingBody = true;
$currentBody = '';
$currentBodyHeaders = [];
continue;
if ($contentTransferEncoding = $this->getHeader('Content-Transfer-Encoding')) {
$part->setHeader('Content-Transfer-Encoding', $contentTransferEncoding);
$this->removeHeader('Content-Transfer-Encoding');
}

// The line is not part of the email message. Let's remove it altogether.
$this->message = ltrim(substr($this->message, strlen($line)));
return;
}

if (!empty($currentBody) || !empty($currentBodyHeaders)) {
$this->addPart($currentBody, $currentBodyHeaders);
}
// Split body into parts using boundary
$parts = preg_split("/--" . preg_quote($this->boundary) . "(?:--|(?:\r\n|$))/", $body);

if (! $this->getContentType() && ($part = $this->getParts()[0] ?? null)) {
foreach ($part->getHeaders() as $key => $value) {
if (strtolower($key) === 'content-type') {
$this->headers[$key] = $value;
break;
}
}
// Process each part
foreach ($parts as $rawPart) {
if (empty(trim($rawPart))) continue;

$this->addPart($rawPart);
}
}

protected function addPart(string $currentBody, array $currentBodyHeaders): void
protected function addPart(string $rawMessage): MessagePart
{
$this->parts[] = new MessagePart(trim($currentBody), $currentBodyHeaders);
$this->parts[] = $part = new MessagePart($rawMessage);

return $part;
}
}
42 changes: 26 additions & 16 deletions src/MessagePart.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,48 @@

class MessagePart implements \JsonSerializable
{
protected string $content;
use HasHeaders;

protected array $headers;
protected string $rawMessage;

public function __construct(string $content, array $headers = [])
{
$this->content = $content;
$this->headers = $headers;
}
protected string $content;

public function getContentType(): string
public function __construct(string $message)
{
return $this->headers['Content-Type'] ?? '';
$this->rawMessage = $message;

$this->parse();
}

public function getHeaders(): array
protected function parse(): void
{
return $this->headers;
// Split part into headers and content
if (strpos($this->rawMessage, "\r\n\r\n") !== false) {
[$headers, $content] = explode("\r\n\r\n", $this->rawMessage, 2);

// Parse part headers
$this->headers = Utils::parseHeaders($headers);
$this->headers = Utils::decodeHeaders($this->headers);

$this->content = trim($content);
} else {
// No headers, just content
$this->content = trim($this->rawMessage);
}
}

public function getHeader(string $name, $default = null): mixed
public function getContentType(): string
{
return $this->headers[$name] ?? $default;
return $this->getHeader('Content-Type', '');
}

public function getContent(): string
{
if (strtolower($this->getHeader('Content-Transfer-Encoding', '')) === 'base64') {
return base64_decode($this->content);
return Utils::normaliseLineEndings(base64_decode($this->content));
}

return $this->content;
return Utils::normaliseLineEndings($this->content);
}

public function isHtml(): bool
Expand Down Expand Up @@ -73,7 +83,7 @@ public function getFilename(): string

public function getSize(): int
{
return strlen($this->getContent());
return strlen($this->rawMessage);
}

public function toArray(): array
Expand Down
Loading

0 comments on commit 5725f66

Please sign in to comment.