bonus2: add support for combinig multiple html files into one LaTeX doc

git-svn-id: svn+ssh://atelier.inf.usi.ch/home/bevilj/group-1@210 a672b425-5310-4d7a-af5c-997e18724b81
This commit is contained in:
bevilj 2018-11-15 15:11:32 +00:00
parent d98e9e5da7
commit 6aeb5370cc
9 changed files with 269 additions and 62 deletions

View File

@ -19,7 +19,7 @@ The output jar will be available at this path: `build/libs/bonus2.jar`
## Execute
```
java -jar bonus2.jar [file1.html] [file2.html] [directory]
java -jar bonus2.jar [-s, --singlepage] [file1.html] [file2.html] [directory]
```
Both files and directories can be passed as parameter, even at the same.
@ -33,7 +33,10 @@ subdirectories matching the absolute path of the files passed as argument.
Note that the program will not work with plain html files,
it requires Jekyll html files instead.
A test file is available for reference under the directory `res`.
The `-s` or `--singlepage` flag creates one single output LaTeX file
instead of many. It will be output'ed in the "out" directory.
Test files is available for reference under the directory `res`.
## Supported tags
@ -62,4 +65,5 @@ A test file is available for reference under the directory `res`.
* comments are removed
* jekyll author -> LaTeX author
* jekyll title -> LaTeX title
* jekyll highlight -> verbatim

View File

@ -8,8 +8,12 @@ title: The Tragedy
previous-page: pages/cmd/basic/open.html
next-page: pages/cmd/basic/ls.html
---
<p>
Did you ever hear the tragedy of <b>Darth Plagueis</b> <i>The Wise</i>?<br>
Did you ever hear the tragedy of <!--
~ Copyright (c) 2018 Bevilacqua Joey.
-->
<b>Darth Plagueis</b> <i>The Wise</i>?<br>
<code>I thought not</code>. Its not a story the Jedi would tell you.</br>
Its a Sith legend. Darth Plagueis was a Dark Lord of the Sith,
so powerful and so wise he could use the Force to influence the midichlorians to <u>create life…</u><br>
@ -19,13 +23,6 @@ He became so powerful… the only thing he was afraid of was losing his power, w
Unfortunately, he taught his apprentice everything he knew, then his apprentice killed him in his sleep.<br>
<b>Ironic</b>. He could save others from death, but not himself.<br>
<h1>Surprise</h1>
<pre>
A surprise to be sure,
but a welcome one.
</pre>
<table>
<tr>
<td>Take a</td>
@ -44,9 +41,7 @@ A surprise to be sure,
<h1>Fish</h1>
There's always <a href="{{ site.baseurl }}/bigger.html">bigger</a> fish<br>
[visible confusion]
There's always <a href="{{ site.baseurl }}/bigger.html">bigger</a> fish
<ul>
<li><u>Hello there</u></li>
@ -78,6 +73,4 @@ Dew it.
<td>that's a</td>
<td>good trick</td>
</tr>
</table>
</p>
</table>

85
bonus2/res/test2.html Normal file
View File

@ -0,0 +1,85 @@
---
layout: page
category-title: Knowledge
category-page: basic
tags: tragedy know
author: Sheev Palpatine
title: The Tragedy
previous-page: pages/cmd/basic/open.html
next-page: pages/cmd/basic/ls.html
---
Did you ever hear the tragedy of <!--
~ Copyright (c) 2018 Bevilacqua Joey.
-->
<b>Darth Plagueis</b> <i>The Wise</i>?<br>
<code>I thought not</code>. Its not a story the Jedi would tell you.</br>
Its a Sith legend. Darth Plagueis was a Dark Lord of the Sith,
so powerful and so wise he could use the Force to influence the midichlorians to <u>create life…</u><br>
He had such a knowledge of the dark side that he could even keep the ones he cared about from dying.<br>
The dark side of the Force is a pathway to many abilities some consider to be unnatural.<br>
He became so powerful… the only thing he was afraid of was losing his power, which eventually, of course, he did.<br>
Unfortunately, he taught his apprentice everything he knew, then his apprentice killed him in his sleep.<br>
<b>Ironic</b>. He could save others from death, but not himself.<br>
<h1>Surprise</h1>
<pre>
A surprise to be sure,
but a welcome one.
</pre>
<table>
<tr>
<td>Take a</td>
<td><b>seat</b></td>
</tr>
<tr>
<td><a href="https://google.com">This</a> is where</td>
<td>The fun begins</td>
</tr>
</table>
<h3>Fish</h3>
<h2>Fish</h2>
<h1>Fish</h1>
There's always <a href="{{ site.baseurl }}/bigger.html">bigger</a> fish<br>
[visible confusion]
<ul>
<li><u>Hello there</u></li>
<li>General Kenobi</li>
<li>You're a <b>bold</b> one</li>
</ul>
{% highlight bash %}
Dew it.
{% endhighlight %}
<table>
<tr>
<th>The</th>
<th>negotiations</th>
</tr>
<tr>
<td>where</td>
<td>short</td>
</tr>
<tr>
<td>I'll try</td>
<td><i>spinning</i></td>
</tr>
<tr>
<td>that's a</td>
<td>good trick</td>
</tr>
</table>

View File

@ -8,29 +8,48 @@ import ch.usi.inf.atelier.group1.jekyll.HtmlToLatexWriter
import ch.usi.inf.atelier.group1.jekyll.JekyllPage
import ch.usi.inf.atelier.group1.util.Log
import ch.usi.inf.atelier.group1.util.extensions.insertJekyllHeader
import ch.usi.inf.atelier.group1.util.extensions.writeTo
import java.io.File
import java.io.FileWriter
import java.lang.StringBuilder
import java.text.SimpleDateFormat
import java.util.*
class HtmlParser(private val input: File) {
private val file = JekyllPage(input)
private val output = HtmlToLatexWriter(file.content)
class HtmlParser(private val singlePage: Boolean) {
private val content = StringBuilder()
private var outName = "${SimpleDateFormat("yyyy-MM-dd_hh:mm").format(Date())}.html"
fun parse(): String {
/**
* Parse a jekyll html file to a LaTex Document
*
* @param path The path of the html file
*/
fun parse(path: File) {
val file = JekyllPage(path)
val output = HtmlToLatexWriter(file.content, singlePage)
// Make sure this is a jekyll html file
if (!file.isValid()) {
Log.e(IllegalArgumentException("This file is not valid"))
Log.e(IllegalArgumentException("This file is not valid"), false)
return
}
/*
if (file.header["author"] == "Marwan Announ") {
throw IllegalStateException("Invalid fuckery. Please don\'t attemp to parse this shit. Kthxbye")
}
*/
output.run {
start()
insertJekyllHeader(file)
beginDocument()
if (singlePage) {
// In singlePage mode, insert the beginning of the document only once
if (content.isEmpty()) {
start()
addSinglePageInfo("Documentation", "Group 1")
beginDocument()
}
insertJekyllHeader(file)
} else {
start()
insertJekyllHeader(file)
beginDocument()
}
// Convert html elements
changeBold()
changeBr()
changeCode()
@ -46,14 +65,60 @@ class HtmlParser(private val input: File) {
changeTable()
changeUnderline()
// Strip html comments
stripComments()
endDocument()
// Store the converted document
commit()
writeTo(input.absolutePath.replace(".html", ".tex"))
if (!singlePage) {
// End the document if singlePage is not enabled
endDocument()
}
}
return output.toString()
Log.i("Parsed: $path")
content.append(output.toString())
// Set the file name basing on the original html file name if not running in singlePage mode
if (!singlePage) {
outName = path.absolutePath
}
}
fun save() {
val document = if (singlePage) {
// End the singlePage'd document
HtmlToLatexWriter(content.toString(), true).run {
commit()
endDocument()
toString()
}
} else {
content.toString()
}
// No ned to save an empty document
if (document.isEmpty()) {
return
}
val outDir = File("out", if (singlePage) "" else File(outName).parent)
if (!outDir.exists()) {
outDir.mkdirs()
}
// Save the LaTeX document
val file = File("out", outName.replace(".html", ".tex"))
val writer = FileWriter(file, false)
Log.i("${file.path} created")
writer.write(document)
writer.flush()
writer.close()
}
}

View File

@ -3,14 +3,25 @@
*/
package ch.usi.inf.atelier.group1
import ch.usi.inf.atelier.group1.util.Log
import java.io.File
object Main {
private lateinit var parser: HtmlParser
@JvmStatic
fun main(args: Array<String>) {
if (args.isEmpty()) {
Log.e("At least one argument is needed")
return
}
parser = HtmlParser(arrayOf("-s", "--singlepage").contains(args[0]))
args.forEach(this::convert)
parser.save()
}
/**
@ -19,10 +30,15 @@ object Main {
* @param path of the file (or directory containing files) to be converted
*/
private fun convert(path: String) {
// Ignore the flags
if (path.startsWith("-")) {
return
}
val file = File(path)
if (!file.isDirectory) {
HtmlParser(file).parse()
parser.parse(file)
return
}

View File

@ -7,7 +7,7 @@ import org.jsoup.Jsoup
import org.jsoup.parser.Parser
import java.util.regex.Pattern
class HtmlToLatexWriter(private var content: String) {
class HtmlToLatexWriter(private var content: String, private val singlePage: Boolean) {
private val document = StringBuilder()
/**
@ -30,11 +30,18 @@ class HtmlToLatexWriter(private var content: String) {
insert("\\tableofcontents\\")
}
/**
* Store the converted html text into
* the writer content so it can be later exported
*/
fun commit() {
insert(content)
}
/**
* End the document
*/
fun endDocument() {
insert(content)
insert("\\end{document}", afterLine = true)
}
@ -116,26 +123,36 @@ class HtmlToLatexWriter(private var content: String) {
.replaceTag("<h5>", "</h5>", "", "")
.replaceTag("<p>", "</p>", "\n", "")
.replaceTag("<a>", "</a>", "", "")
if (singlePage) {
content = content.replaceTag("<h3>", "</h3>", "", "")
}
}
/**
* Replace <h1> with LaTeX \section
*/
fun changeSection() {
content = content.replaceTag("<h1>", "</h1>", "\\section{", "}\n")
content = content.replaceTag("<h1>", "</h1>",
if (singlePage) "\\subsection{" else "\\section{", "}\n")
}
/**
* Replace <h2> with LaTeX \subsection
*/
fun changeSubSection() {
content = content.replaceTag("<h2>", "</h2>", "\\subsection{", "}\n")
content = content.replaceTag("<h2>", "</h2>",
if (singlePage) "\\subsubsection{" else "\\subsection{", "}\n")
}
/**
* Replace <h3> with LaTeX \subsubsection
*/
fun changeSubSubSection() {
if (singlePage) {
return
}
content = content.replaceTag("<h3>", "</h3>", "\\subsubsection{", "}\n")
}
@ -197,7 +214,7 @@ class HtmlToLatexWriter(private var content: String) {
* @param author the text being inserted
*/
fun addAuthor(author: String) {
insert(AUTHOR.format(author), true, true)
insert(if (singlePage) "\\large $author \\normalsize\\\\" else AUTHOR.format(author), true, true)
}
/**
@ -206,7 +223,19 @@ class HtmlToLatexWriter(private var content: String) {
* @param title the text being inserted
*/
fun addTitle(title: String) {
insert(if (singlePage) "\\section{$title}\n" else TITLE.format(title), true, true)
}
/**
* Add an header for the html file in singlePage mode
*/
fun addSinglePageInfo(title: String, author: String) {
if (!singlePage) {
return
}
insert(TITLE.format(title), true, true)
insert(AUTHOR.format(author), false, true)
}
/**

View File

@ -8,14 +8,45 @@ import java.util.*
object Log {
fun e(exception: Exception) {
print('E', exception.message ?: "Unknown error", true)
/**
* Log an exception as an error
*
* @param message The log message
* @param shouldThrow Whether the exception should be thrown
*/
fun <T : Exception> e(exception: T, shouldThrow: Boolean) {
e(exception.message ?: "Unknown error")
if (shouldThrow) {
throw exception
}
}
/**
* Log an error
*
* @param message The log message
*/
fun e(message: String) {
print('E', message , true)
}
/**
* Log an object using its toString() method.
*
* @param obj The object which content will be printed.
*/
fun i(obj: Any) {
print('I', obj.toString(), false)
}
/**
* Print a log with a date and prefix
*
* @param prefix The prefix of the log. Helps differentiating the various types of logs
* @param message The message that will be displayed in the log
* @param isErr Whether the log should be printed as an error
*/
private fun print(prefix: Char, message: String, isErr: Boolean) {
val time = SimpleDateFormat("yyyy-MM-dd hh:mm").format(Date())

View File

@ -26,7 +26,7 @@ fun File.getContent(): String {
line = reader.readLine()
}
} catch (e: IOException) {
Log.e(exception = e)
Log.e(e, true)
}
return content.toString()

View File

@ -11,6 +11,8 @@ import java.io.FileWriter
/**
* Insert author and title from Jekyll file
*
* @param file File from which the author will be read
*/
fun HtmlToLatexWriter.insertJekyllHeader(file: JekyllPage) {
val title = file.header["title"] ?: "Unknown title"
@ -18,22 +20,4 @@ fun HtmlToLatexWriter.insertJekyllHeader(file: JekyllPage) {
addTitle(title)
addAuthor(author)
}
fun HtmlToLatexWriter.writeTo(path: String) {
val outDir = File("out", File(path).parent)
if (!outDir.exists()) {
outDir.mkdirs()
}
val file = File("out", path)
val writer = FileWriter(file, false)
Log.i("${file.path} created")
writer.write(toString())
writer.flush()
writer.close()
}