[Web] Add html2text converter; Show quarantaine html elements as text, do not escape html

This commit is contained in:
andre.peters
2018-01-18 10:09:30 +01:00
parent 9ba0786fbf
commit 79cb929911
78 changed files with 28586 additions and 1247 deletions

View File

@@ -0,0 +1,10 @@
# EditorConfig is awesome: http://EditorConfig.org
# don't add newlines to test files
[*]
insert_final_newline = false
trim_trailing_whitespace = false
[**.txt]
insert_final_newline = false
trim_trailing_whitespace = false

View File

@@ -0,0 +1,9 @@
<?php
class Html2TextComponentTest extends \ComponentTests\ComponentTest {
function getRoots() {
return array(__DIR__ . "/..");
}
}

View File

@@ -0,0 +1,101 @@
<?php
class Html2TextTest extends PHPUnit_Framework_TestCase {
function doTest($test, $ignoreXmlError = false) {
$this->assertTrue(file_exists(__DIR__ . "/$test.html"), "File '$test.html' did not exist");
$this->assertTrue(file_exists(__DIR__ . "/$test.txt"), "File '$test.txt' did not exist");
$input = file_get_contents(__DIR__ . "/$test.html");
$expected = Html2Text\Html2Text::fixNewlines(file_get_contents(__DIR__ . "/$test.txt"));
$output = Html2Text\Html2Text::convert($input, $ignoreXmlError);
if ($output != $expected) {
file_put_contents(__DIR__ . "/$test.output", $output);
}
$this->assertEquals($output, $expected);
}
function testBasic() {
$this->doTest("basic");
}
function testAnchors() {
$this->doTest("anchors");
}
function testMoreAnchors() {
$this->doTest("more-anchors");
}
function test3() {
$this->doTest("test3");
}
function test4() {
$this->doTest("test4");
}
function testTable() {
$this->doTest("table");
}
function testNbsp() {
$this->doTest("nbsp");
}
function testLists() {
$this->doTest("lists");
}
function testPre() {
$this->doTest("pre");
}
function testFullEmail() {
$this->doTest("full_email");
}
function testImages() {
$this->doTest("images");
}
function testNonBreakingSpaces() {
$this->doTest("non-breaking-spaces");
}
function testUtf8Example() {
$this->doTest("utf8-example");
}
function testWindows1252Example() {
$this->doTest("windows-1252-example");
}
function testMsoffice() {
$this->doTest("msoffice");
}
function testDOMProcessing() {
$this->doTest("dom-processing");
}
function testEmpty() {
$this->doTest("empty");
}
function testHugeMsoffice() {
$this->doTest("huge-msoffice");
}
/**
* @expectedException PHPUnit_Framework_Error_Warning
*/
function testInvalidXML() {
$this->doTest("invalid", false);
}
function testInvalidXMLIgnore() {
$this->doTest("invalid", true);
}
}

View File

@@ -0,0 +1,12 @@
A document without any HTML open/closing tags.
<hr>
We try and use the representation given by common browsers of the
HTML document, so that it looks similar when converted to plain text.
<a href="http://foo.com">visit foo.com</a> - or <a href="http://www.foo.com">http://www.foo.com</a>
<a href="http://foo.com" title="a link with a title">link</a>
<h2><a name="anchor">An anchor which will not appear</a></h2>

View File

@@ -0,0 +1,5 @@
A document without any HTML open/closing tags.
---------------------------------------------------------------
We try and use the representation given by common browsers of the HTML document, so that it looks similar when converted to plain text. [visit foo.com](http://foo.com) - or http://www.foo.com [link](http://foo.com)
[An anchor which will not appear]

View File

@@ -0,0 +1,21 @@
<html>
<title>Ignored Title</title>
<body>
<h1>Hello, World!</h1>
<p>This is some e-mail content.
Even though it has whitespace and newlines, the e-mail converter
will handle it correctly.
<p>Even mismatched tags.</p>
<div>A div</div>
<div>Another div</div>
<div>A div<div>within a div</div></div>
<p>Another line<br />Yet another line</p>
<a href="http://foo.com">A link</a>
</body>
</html>

View File

@@ -0,0 +1,15 @@
Hello, World!
This is some e-mail content. Even though it has whitespace and newlines, the e-mail converter will handle it correctly.
Even mismatched tags.
A div
Another div
A div
within a div
Another line
Yet another line
[A link](http://foo.com)

View File

@@ -0,0 +1,8 @@
<html>
<body>
<?a
I am a random piece of code
?>
Hello
</body>
</html>

View File

@@ -0,0 +1 @@
Hello

View File

@@ -0,0 +1,220 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=680">
</head>
<body class="cat-update-email cat-update" style="background: #ffccee; color: blue; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 0; text-align: center" bgcolor="#ffccee">
<style type="text/css">
body.cat-update-email {
margin: 0; padding: 0; background: #ffccee; color: blue; text-align: center;
}
body.cat-update-email {
font-size: 12px; font-family: Times New Roman; font-weight: normal;
}
body.cat-update-email th {
font-size: 12px; font-family: Times New Roman; font-weight: normal;
}
body.cat-update-email td {
font-size: 12px; font-family: Times New Roman; font-weight: normal;
}
</style>
<table class="header-wrapper" style="border-spacing: 0; border: none; margin: 0; width: 100%">
<tr>
<td class="header" style="background: none; color: #999; font-family: Times New Roman; font-size: 12px; font-weight: normal; padding: 15px 0">
<table cellspacing="0" cellpadding="0" border="0" style="margin: 0 auto; padding: 0 20px; width: 640px">
<tr>
<th style="font-family: Times New Roman; font-size: 12px; font-weight: normal">
<a class="logo" href="http://localhost/home" style="color: red; text-decoration: none">
<img border="0" height="32" src="test.png" width="200" style="display: block">
</a> </th>
<td class="account-number" style="color: white; font-family: Times New Roman; font-size: 12px; font-weight: normal; text-align: right" align="right">
16 December 2015<br>
Account 123
</td>
</tr>
</table>
</td>
</tr>
</table>
<table class="section-wrapper" style="border-spacing: 0; border: none; margin: 0 auto 20px; width: 640px">
<tr>
<td class="salutation section" style="background: white; color: black; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0 auto 20px; padding: 40px 20px; text-align: left; width: 600px" align="left" bgcolor="white">
<h1 class="user_greeting" style="font-family: Times New Roman; font-size: 1.8; font-weight: normal; line-height: 1.2; margin: 0 0 1em">
Hi Susan
</h1>
<p class="message" style="font-size: 1.5em; line-height: 1.2; margin: 0">
Here is your cat report.
</p>
</td>
</tr>
</table>
<table class="section-wrapper" style="border-spacing: 0; border: none; margin: 0 auto 20px; width: 640px">
<tr>
<td class="balance section" style="background: white; color: black; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0 auto 20px; padding: 40px 20px; text-align: left; width: 600px" align="left" bgcolor="white">
<div class="account-status-heading" style="font-size: 2.5em; line-height: 1em; padding: 30px 20px; text-align: center" align="center">You have found <span class="status-cats-negative" style="color: #df0000">5 cats</span> less than anyone else</div>
<div id="cat-update-action-buttons">
<div id="buy-button" style="text-align: center" align="center">
<a class="btn-alert" href="http://localhost/cats" id="buy-cats-button" style="-moz-appearance: none; -webkit-appearance: none; background: #DF0000; border-radius: 3px; border: 11px solid #df0000; color: #fff; cursor: pointer; display: block; font-size: 16px; height: 16px; line-height: 16px; margin: 0 auto; text-decoration: none; transition: background-color .15s; width: 120px">Find more cats</a>
</div>
</div>
</td>
</tr>
</table>
<table class="section-wrapper" style="border-spacing: 0; border: none; margin: 0 auto 20px; width: 640px">
<tr>
<td class="cats section" id="cats" style="background: white; color: black; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0 auto 20px; padding: 40px 20px; text-align: left; width: 600px" align="left" bgcolor="white">
<div class="cats-usage">
<h2 style="font-family: Times New Roman; font-size: 1.8; font-weight: normal; line-height: 1.2; margin: 0">Down the road</h2>
<p class="fine-print" style="margin: 0">Across the hall</p>
<h3 style="font-family: Times New Roman; font-size: 18px; font-weight: normal; line-height: 2em; margin: 10px 0 0">Your achievements</h3>
<table class="current-usage with-icon-left" style="border-collapse: collapse; border-spacing: 0; margin-bottom: 20px; margin-top: 20px; width: 100%">
<tr>
<th style="border: none; font-family: Times New Roman; font-size: 14px; font-weight: bold; margin: 0; padding: 0; text-align: left; vertical-align: middle; width: 50px" align="left" valign="middle"><img src="test.png"></th>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 0; vertical-align: top; width: 550px" valign="top">
<div class="top">You're currently finding about</div>
<div class="large" style="color: black; font-size: 18px; padding: 4px 0">12 cats</div>
<div class="bottom">per day</div>
</td>
</tr>
<tr><td colspan="2" style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 0; vertical-align: top; width: 550px" valign="top"> </td></tr>
<tr>
<td colspan="2" style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 0; vertical-align: top; width: 550px" valign="top"><img alt="Number of cats found" src="test.png"></td>
</tr>
</table>
</div>
<div class="summary">
<hr class="fine-print" style="border-bottom-color: #eee; border-bottom-style: solid; border-width: 0 0 1px; margin: 20px 0">
<h3 style="font-family: Times New Roman; font-size: 18px; font-weight: normal; line-height: 2em; margin: 10px 0 0">Your last cat was found two days ago.</h3>
<p class="fine-print" style="margin: 0">One type of cat is a kitten.</p>
<table class="readings" style="border-collapse: collapse; border-spacing: 0; margin: 10px 0; width: 100%">
<tr style="color: #BD236C">
<td class="left-column" style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 0; width: 5%">
<img src="test.png" style="padding-top: 10px">
</td>
<td class="center-column" style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 0; width: 60%">
<h3 style="font-family: Times New Roman; font-size: 18px; font-weight: normal; line-height: 2em; margin: 10px 0 0">Special account <span class="nickname" style="font-size: 12px"></span> <span class="fine-print">A1</span>
</h3>
</td>
<td class="right-column" style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 0; width: 20%">
<h3 style="font-family: Times New Roman; font-size: 18px; font-weight: normal; line-height: 2em; margin: 10px 0 0">12.345</h3>
</td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 0"></td>
</tr>
</table>
</div>
</td>
</tr>
</table>
<div class="banner" style="margin: 0 auto 20px; padding: 10px; text-align: center; width: 640px" align="center">
<a href="http://localhost/logout" style="color: red; text-decoration: none">
<img alt="" border="0" height="177" src="http://localhost/photo1.png" width="600">
</a>
</div>
<table class="section-wrapper" style="border-spacing: 0; border: none; margin: 0 auto 20px; width: 640px">
<tr>
<td class="tips section" style="background: white; color: black; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0 auto 20px; padding: 40px 20px; text-align: left; width: 600px" align="left" bgcolor="white">
<table style="border-collapse: collapse; border-spacing: 0; width: 100%">
<tr>
<td colspan="3" style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0; vertical-align: top; width: 200px" valign="top"><h2 style="font-family: Times New Roman; font-size: 1.8; font-weight: normal; line-height: 1.2; margin: 0 0 10px">How can you find more cats?</h2></td>
</tr>
<tr class="icon">
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0; vertical-align: top; width: 200px" valign="top"><img height="40" src="http://localhost/photo1.png" width="40"></td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0 17px; vertical-align: top; width: 200px" valign="top"><img height="40" src="http://localhost/photo2.png" width="40"></td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0 17px; vertical-align: top; width: 200px" valign="top"><img height="40" src="http://localhost/photo3.png" width="40"></td>
</tr>
<tr class="subtitle">
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0; vertical-align: top; width: 200px" valign="top"><h3 style="font-family: Times New Roman; font-size: 18px; font-weight: normal; line-height: 2em; margin: 0 0 5px">Look in trash cans</h3></td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0 17px; vertical-align: top; width: 200px" valign="top"><h3 style="font-family: Times New Roman; font-size: 18px; font-weight: normal; line-height: 2em; margin: 0 0 5px">Start meowing</h3></td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0 17px; vertical-align: top; width: 200px" valign="top"><h3 style="font-family: Times New Roman; font-size: 18px; font-weight: normal; line-height: 2em; margin: 0 0 5px">Eat cat food</h3></td>
</tr>
<tr class="body" style="color: green">
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0; vertical-align: top; width: 200px" valign="top">Some cats like to hang out in trash cans. Some cats do not.</td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0 17px; vertical-align: top; width: 200px" valign="top">Some cats are attracted to similar tones.</td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0 17px; vertical-align: top; width: 200px" valign="top">So one day your tears may smell like cat food, attracting more cats.</td>
</tr>
<tr class="image">
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0; vertical-align: top; width: 200px" valign="top">
<a href="https://localhost/about" style="color: red; text-decoration: none">
<img border="0" height="130" src="http://localhost/photo1.png" style="display: block; margin: 10px 0" width="165">
</a>
</td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0 17px; vertical-align: top; width: 200px" valign="top">
<a href="https://localhost/about" style="color: red; text-decoration: none">
<img border="0" height="130" src="http://localhost/photo2.png" style="display: block; margin: 10px 0" width="165">
</a>
</td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0 17px; vertical-align: top; width: 200px" valign="top">
<a href="https://localhost/about" style="color: red; text-decoration: none">
<img border="0" height="130" src="http://localhost/photo3.png" style="display: block; margin: 10px 0" width="165">
</a>
</td>
</tr>
<tr class="tips-footer" style="color: green">
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0; vertical-align: top; width: 200px" valign="top">
<a href="https://github.com/soundasleep/html2text_ruby" style="color: red; text-decoration: none">Cats are great.</a>
</td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0 17px; vertical-align: top; width: 200px" valign="top">
<a href="https://github.com/soundasleep/html2text_ruby" style="color: red; text-decoration: none">Find more cats.</a>
</td>
<td style="border: none; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0; padding: 5px 0 0 17px; vertical-align: top; width: 200px" valign="top">
<a href="https://github.com/soundasleep/html2text_ruby" style="color: red; text-decoration: none">Do more things.</a>
</td>
</tr>
</table>
</td>
</tr>
</table>
<table class="footer-wrapper" style="margin: 0 auto 20px">
<tr>
<td class="footer" style="color: #9B9B9B; font-family: Times New Roman; font-size: 12px; font-weight: normal; margin: 0 auto 4em; text-align: left; width: 600px" align="left">
<h3 style="font-family: Times New Roman; font-size: 1.2; font-weight: normal; line-height: 2em; margin: 0">
<a href="http://localhost/contact" style="color: red; text-decoration: none">Contact us</a>
</h3>
<p style="margin: 0 0 1em">
cats@cats.com<br>
Monday and Friday
</p>
<p style="margin: 0 0 1em"><a href="https://github.com/soundasleep/html2text" style="color: red; text-decoration: none"><img align="absmiddle" height="26" src="test.png" width="26"></a>
<a href="https://github.com/soundasleep/html2text_ruby" style="color: red; text-decoration: none"><img align="absmiddle" height="26" src="test.png" width="26"></a>
</p>
<p class="message no-web-display" style="margin: 0">Having trouble seeing this email?
<a href="http://localhost/view_it_online" style="color: red; text-decoration: none">View it online</a>.
</p>
</td>
</tr>
</table>
<script async type="text/javascript" id="profiler" src="/profiler.js" data-version="1.0"></script>
</body>
</html>

View File

@@ -0,0 +1,54 @@
http://localhost/home 16 December 2015
Account 123
Hi Susan
Here is your cat report.
You have found 5 cats less than anyone else
[Find more cats](http://localhost/cats)
Down the road
Across the hall
Your achievements
You're currently finding about
12 cats
per day
[Number of cats found]
---------------------------------------------------------------
Your last cat was found two days ago.
One type of cat is a kitten.
Special account A1
12.345
http://localhost/logout
How can you find more cats?
Look in trash cans
Start meowing
Eat cat food
Some cats like to hang out in trash cans. Some cats do not. Some cats are attracted to similar tones. So one day your tears may smell like cat food, attracting more cats.
https://localhost/about https://localhost/about https://localhost/about
[Cats are great.](https://github.com/soundasleep/html2text_ruby) [Find more cats.](https://github.com/soundasleep/html2text_ruby) [Do more things.](https://github.com/soundasleep/html2text_ruby)
[Contact us](http://localhost/contact)
cats@cats.com
Monday and Friday
https://github.com/soundasleep/html2text https://github.com/soundasleep/html2text_ruby
Having trouble seeing this email? [View it online](http://localhost/view_it_online).

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,54 @@
<body>
<p>
One: <img src="one.png">
</p>
<p>
Two: <img src="two.png" alt="two">
</p>
<p>
Three: <img src="three.png" title="three">
</p>
<p>
Four: <img src="four.png" title="four" alt="four alt">
</p>
<h1>With links</h1>
<p>
One: <a href="http://localhost"><img src="one.png"></a>
</p>
<p>
Two: <a href="http://localhost"><img src="two.png" alt="two"></a>
</p>
<p>
Three: <a href="http://localhost"><img src="three.png" title="three"></a>
</p>
<p>
Four: <a href="http://localhost"><img src="four.png" title="four" alt="four alt"></a>
</p>
<h1>With links with titles</h1>
<p>
One: <a href="http://localhost" title="one link"><img src="one.png"></a>
</p>
<p>
Two: <a href="http://localhost" title="two link"><img src="two.png" alt="two"></a>
</p>
<p>
Three: <a href="http://localhost" title="three link"><img src="three.png" title="three"></a>
</p>
<p>
Four: <a href="http://localhost" title="four link"><img src="four.png" title="four" alt="four alt"></a>
</p>
</body>
</html>

View File

@@ -0,0 +1,27 @@
One:
Two: [two]
Three: [three]
Four: [four]
With links
One: http://localhost
Two: [two](http://localhost)
Three: [three](http://localhost)
Four: [four](http://localhost)
With links with titles
One: [one link](http://localhost)
Two: [two link](http://localhost)
Three: [three link](http://localhost)
Four: [four link](http://localhost)

View File

@@ -0,0 +1,4 @@
<b>Hello &nbsnbsp; world</b>
<div class=">
Error
</div>

View File

@@ -0,0 +1 @@
Hello &nbsnbsp; world

View File

@@ -0,0 +1,24 @@
<h1>List tests</h1>
<p>
Add some lists.
</p>
<ol>
<li>one</li>
<li>two
<li>three
</ol>
<h2>An unordered list</h2>
<ul>
<li>one
<li>two</li>
<li>three</li>
</ul>
<ul>
<li>one
<li>two</li>
<li>three</li>
</ul>

View File

@@ -0,0 +1,17 @@
List tests
Add some lists.
- one
- two
- three
An unordered list
- one
- two
- three
- one
- two
- three

View File

@@ -0,0 +1,14 @@
<h1>Anchor tests</h1>
<p>
Visit http://openiaml.org or <a href="http://openiaml.org">openiaml.org</a> or <a href="http://openiaml.org">http://openiaml.org</a>.
</p>
<p>
To visit with SSL, visit https://openiaml.org or <a href="https://openiaml.org">openiaml.org</a> or <a href="https://openiaml.org">https://openiaml.org</a>.
</p>
<p>
To mail, email support@openiaml.org or mailto:support@openiaml.org
or <a href="mailto:support@openiaml.org">support@openiaml.org</a> or <a href="mailto:support@openiaml.org">mailto:support@openiaml.org</a>.
</p>

View File

@@ -0,0 +1,7 @@
Anchor tests
Visit http://openiaml.org or openiaml.org or http://openiaml.org.
To visit with SSL, visit https://openiaml.org or openiaml.org or https://openiaml.org.
To mail, email support@openiaml.org or mailto:support@openiaml.org or support@openiaml.org or mailto:support@openiaml.org.

View File

@@ -0,0 +1 @@
<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=us-ascii"><meta name=Generator content="Microsoft Word 15 (filtered medium)"><style><!-- /* Font Definitions */ @font-face {font-family:"Cambria Math"; panose-1:2 4 5 3 5 4 6 3 2 4;} @font-face {font-family:Calibri; panose-1:2 15 5 2 2 2 4 3 2 4;} /* Style Definitions */ p.MsoNormal, li.MsoNormal, div.MsoNormal {margin:0cm; margin-bottom:.0001pt; font-size:11.0pt; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} a:link, span.MsoHyperlink {mso-style-priority:99; color:#0563C1; text-decoration:underline;} a:visited, span.MsoHyperlinkFollowed {mso-style-priority:99; color:#954F72; text-decoration:underline;} span.EmailStyle17 {mso-style-type:personal-compose; font-family:"Calibri",sans-serif; color:windowtext;} .MsoChpDefault {mso-style-type:export-only; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} @page WordSection1 {size:612.0pt 792.0pt; margin:72.0pt 72.0pt 72.0pt 72.0pt;} div.WordSection1 {page:WordSection1;} --></style><!--[if gte mso 9]><xml> <o:shapedefaults v:ext="edit" spidmax="1026" /> </xml><![endif]--><!--[if gte mso 9]><xml> <o:shapelayout v:ext="edit"> <o:idmap v:ext="edit" data="1" /> </o:shapelayout></xml><![endif]--></head><body lang=EN-GB link="#0563C1" vlink="#954F72"><div class=WordSection1><p class=MsoNormal>Dear html2text,<o:p></o:p></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal>This is an example email that can be used to test html2text conversion of outlook / exchange emails.<o:p></o:p></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal>The addition of &lt;o:p&gt; tags is very annoying!<o:p></o:p></p><p class=MsoNormal>This is a single line return<o:p></o:p></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal><b>This is bold<o:p></o:p></b></p><p class=MsoNormal><i>This is italic<o:p></o:p></i></p><p class=MsoNormal><u>This is underline<o:p></o:p></u></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal>Andrew<o:p></o:p></p></div></body></html>

View File

@@ -0,0 +1,12 @@
Dear html2text,
This is an example email that can be used to test html2text conversion of outlook / exchange emails.
The addition of <o:p> tags is very annoying!
This is a single line return
This is bold
This is italic
This is underline
Andrew

View File

@@ -0,0 +1 @@
hello &nbsp; world &amp; people &lt; &gt; &NBSP;

View File

@@ -0,0 +1 @@
hello world & people < > &NBSP;

View File

@@ -0,0 +1,31 @@
<html>
<body>
<div>
Hello
<br>
</div>
<div>
How are you?
<br>
</div>
<p>
How are you?
<br>
<p></p>
</p>
<p>
How are you?
<br>
</p>
This is not the end!
<div>
How are you again?
<br>
</div>
This is the end!
<br>
</body>
</html>

View File

@@ -0,0 +1,10 @@
Hello
How are you?
How are you?
How are you?
This is not the end!
How are you again?
This is the end!

View File

@@ -0,0 +1 @@
these spaces are non-breaking

View File

@@ -0,0 +1 @@
these spaces are non-breaking

View File

@@ -0,0 +1,10 @@
Here is the code
<pre>
#include &lt;stdlib.h&gt;
#include &lt;stdio.h&gt;
int main(){
return 0;
};
</pre>

View File

@@ -0,0 +1,8 @@
Here is the code
#include <stdlib.h>
#include <stdio.h>
int main(){
return 0;
};

View File

@@ -0,0 +1,53 @@
<html>
<title>Ignored Title</title>
<body>
<h1>Hello, World!</h1>
<table>
<thead>
<tr>
<th>Col A</th>
<th>Col B</th>
</tr>
</thead>
<tbody>
<tr>
<td>
Data A1
</td>
<td>
Data B1
</td>
</tr>
<tr>
<td>
Data A2
</td>
<td>
Data B2
</td>
</tr>
<tr>
<td>
Data A3
</td>
<td>
Data B4
</td>
</tr>
</tbody>
<tfoot>
<tr>
<td>
Total A
</td>
<td>
Total B
</td>
</tr>
</tfoot>
</table>
</body>
</html>

View File

@@ -0,0 +1,7 @@
Hello, World!
Col A Col B
Data A1 Data B1
Data A2 Data B2
Data A3 Data B4
Total A Total B

View File

@@ -0,0 +1 @@
test one<br />test two

View File

@@ -0,0 +1,2 @@
test one
test two

View File

@@ -0,0 +1 @@
1<br />2<br />3<br />4<br />5 6

View File

@@ -0,0 +1,5 @@
1
2
3
4
5 6

View File

@@ -0,0 +1,4 @@
<ul>
<li>ÅÄÖ</li>
<li>åäö</li>
</ul>

View File

@@ -0,0 +1,2 @@
- ÅÄÖ
- åäö

View File

@@ -0,0 +1,4 @@
<ul>
<li><EFBFBD><EFBFBD><EFBFBD></li>
<li><EFBFBD><EFBFBD><EFBFBD></li>
</ul>

View File

@@ -0,0 +1,2 @@
- ÅÄÖ
- åäö