bonus2: add support for combinig multiple html files into one LaTeX doc
git-svn-id: svn+ssh://atelier.inf.usi.ch/home/bevilj/group-1@210 a672b425-5310-4d7a-af5c-997e18724b81
This commit is contained in:
parent
1889e4544e
commit
628d204803
9 changed files with 269 additions and 62 deletions
bonus2
|
@ -19,7 +19,7 @@ The output jar will be available at this path: `build/libs/bonus2.jar`
|
|||
## Execute
|
||||
|
||||
```
|
||||
java -jar bonus2.jar [file1.html] [file2.html] [directory]
|
||||
java -jar bonus2.jar [-s, --singlepage] [file1.html] [file2.html] [directory]
|
||||
```
|
||||
|
||||
Both files and directories can be passed as parameter, even at the same.
|
||||
|
@ -33,7 +33,10 @@ subdirectories matching the absolute path of the files passed as argument.
|
|||
Note that the program will not work with plain html files,
|
||||
it requires Jekyll html files instead.
|
||||
|
||||
A test file is available for reference under the directory `res`.
|
||||
The `-s` or `--singlepage` flag creates one single output LaTeX file
|
||||
instead of many. It will be output'ed in the "out" directory.
|
||||
|
||||
Test files is available for reference under the directory `res`.
|
||||
|
||||
## Supported tags
|
||||
|
||||
|
@ -62,4 +65,5 @@ A test file is available for reference under the directory `res`.
|
|||
* comments are removed
|
||||
* jekyll author -> LaTeX author
|
||||
* jekyll title -> LaTeX title
|
||||
* jekyll highlight -> verbatim
|
||||
|
||||
|
|
|
@ -8,8 +8,12 @@ title: The Tragedy
|
|||
previous-page: pages/cmd/basic/open.html
|
||||
next-page: pages/cmd/basic/ls.html
|
||||
---
|
||||
<p>
|
||||
Did you ever hear the tragedy of <b>Darth Plagueis</b> <i>The Wise</i>?<br>
|
||||
|
||||
Did you ever hear the tragedy of <!--
|
||||
~ Copyright (c) 2018 Bevilacqua Joey.
|
||||
-->
|
||||
|
||||
<b>Darth Plagueis</b> <i>The Wise</i>?<br>
|
||||
<code>I thought not</code>. It’s not a story the Jedi would tell you.</br>
|
||||
It’s a Sith legend. Darth Plagueis was a Dark Lord of the Sith,
|
||||
so powerful and so wise he could use the Force to influence the midichlorians to <u>create life…</u><br>
|
||||
|
@ -19,13 +23,6 @@ He became so powerful… the only thing he was afraid of was losing his power, w
|
|||
Unfortunately, he taught his apprentice everything he knew, then his apprentice killed him in his sleep.<br>
|
||||
<b>Ironic</b>. He could save others from death, but not himself.<br>
|
||||
|
||||
<h1>Surprise</h1>
|
||||
|
||||
<pre>
|
||||
A surprise to be sure,
|
||||
but a welcome one.
|
||||
</pre>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td>Take a</td>
|
||||
|
@ -44,9 +41,7 @@ A surprise to be sure,
|
|||
|
||||
<h1>Fish</h1>
|
||||
|
||||
There's always <a href="{{ site.baseurl }}/bigger.html">bigger</a> fish<br>
|
||||
|
||||
[visible confusion]
|
||||
There's always <a href="{{ site.baseurl }}/bigger.html">bigger</a> fish
|
||||
|
||||
<ul>
|
||||
<li><u>Hello there</u></li>
|
||||
|
@ -78,6 +73,4 @@ Dew it.
|
|||
<td>that's a</td>
|
||||
<td>good trick</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
</p>
|
||||
</table>
|
85
bonus2/res/test2.html
Normal file
85
bonus2/res/test2.html
Normal file
|
@ -0,0 +1,85 @@
|
|||
---
|
||||
layout: page
|
||||
category-title: Knowledge
|
||||
category-page: basic
|
||||
tags: tragedy know
|
||||
author: Sheev Palpatine
|
||||
title: The Tragedy
|
||||
previous-page: pages/cmd/basic/open.html
|
||||
next-page: pages/cmd/basic/ls.html
|
||||
---
|
||||
|
||||
Did you ever hear the tragedy of <!--
|
||||
~ Copyright (c) 2018 Bevilacqua Joey.
|
||||
-->
|
||||
|
||||
<b>Darth Plagueis</b> <i>The Wise</i>?<br>
|
||||
<code>I thought not</code>. It’s not a story the Jedi would tell you.</br>
|
||||
It’s a Sith legend. Darth Plagueis was a Dark Lord of the Sith,
|
||||
so powerful and so wise he could use the Force to influence the midichlorians to <u>create life…</u><br>
|
||||
He had such a knowledge of the dark side that he could even keep the ones he cared about from dying.<br>
|
||||
The dark side of the Force is a pathway to many abilities some consider to be unnatural.<br>
|
||||
He became so powerful… the only thing he was afraid of was losing his power, which eventually, of course, he did.<br>
|
||||
Unfortunately, he taught his apprentice everything he knew, then his apprentice killed him in his sleep.<br>
|
||||
<b>Ironic</b>. He could save others from death, but not himself.<br>
|
||||
|
||||
<h1>Surprise</h1>
|
||||
|
||||
<pre>
|
||||
A surprise to be sure,
|
||||
but a welcome one.
|
||||
</pre>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td>Take a</td>
|
||||
<td><b>seat</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://google.com">This</a> is where</td>
|
||||
<td>The fun begins</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
<h3>Fish</h3>
|
||||
|
||||
<h2>Fish</h2>
|
||||
|
||||
<h1>Fish</h1>
|
||||
|
||||
There's always <a href="{{ site.baseurl }}/bigger.html">bigger</a> fish<br>
|
||||
|
||||
[visible confusion]
|
||||
|
||||
<ul>
|
||||
<li><u>Hello there</u></li>
|
||||
<li>General Kenobi</li>
|
||||
<li>You're a <b>bold</b> one</li>
|
||||
</ul>
|
||||
|
||||
{% highlight bash %}
|
||||
|
||||
Dew it.
|
||||
|
||||
{% endhighlight %}
|
||||
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>The</th>
|
||||
<th>negotiations</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>where</td>
|
||||
<td>short</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>I'll try</td>
|
||||
<td><i>spinning</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>that's a</td>
|
||||
<td>good trick</td>
|
||||
</tr>
|
||||
</table>
|
|
@ -8,29 +8,48 @@ import ch.usi.inf.atelier.group1.jekyll.HtmlToLatexWriter
|
|||
import ch.usi.inf.atelier.group1.jekyll.JekyllPage
|
||||
import ch.usi.inf.atelier.group1.util.Log
|
||||
import ch.usi.inf.atelier.group1.util.extensions.insertJekyllHeader
|
||||
import ch.usi.inf.atelier.group1.util.extensions.writeTo
|
||||
import java.io.File
|
||||
import java.io.FileWriter
|
||||
import java.lang.StringBuilder
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.*
|
||||
|
||||
class HtmlParser(private val input: File) {
|
||||
private val file = JekyllPage(input)
|
||||
private val output = HtmlToLatexWriter(file.content)
|
||||
class HtmlParser(private val singlePage: Boolean) {
|
||||
private val content = StringBuilder()
|
||||
private var outName = "${SimpleDateFormat("yyyy-MM-dd_hh:mm").format(Date())}.html"
|
||||
|
||||
fun parse(): String {
|
||||
/**
|
||||
* Parse a jekyll html file to a LaTex Document
|
||||
*
|
||||
* @param path The path of the html file
|
||||
*/
|
||||
fun parse(path: File) {
|
||||
val file = JekyllPage(path)
|
||||
val output = HtmlToLatexWriter(file.content, singlePage)
|
||||
|
||||
// Make sure this is a jekyll html file
|
||||
if (!file.isValid()) {
|
||||
Log.e(IllegalArgumentException("This file is not valid"))
|
||||
Log.e(IllegalArgumentException("This file is not valid"), false)
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
if (file.header["author"] == "Marwan Announ") {
|
||||
throw IllegalStateException("Invalid fuckery. Please don\'t attemp to parse this shit. Kthxbye")
|
||||
}
|
||||
*/
|
||||
|
||||
output.run {
|
||||
start()
|
||||
insertJekyllHeader(file)
|
||||
beginDocument()
|
||||
if (singlePage) {
|
||||
// In singlePage mode, insert the beginning of the document only once
|
||||
if (content.isEmpty()) {
|
||||
start()
|
||||
addSinglePageInfo("Documentation", "Group 1")
|
||||
beginDocument()
|
||||
}
|
||||
|
||||
insertJekyllHeader(file)
|
||||
} else {
|
||||
start()
|
||||
insertJekyllHeader(file)
|
||||
beginDocument()
|
||||
}
|
||||
|
||||
// Convert html elements
|
||||
changeBold()
|
||||
changeBr()
|
||||
changeCode()
|
||||
|
@ -46,14 +65,60 @@ class HtmlParser(private val input: File) {
|
|||
changeTable()
|
||||
changeUnderline()
|
||||
|
||||
// Strip html comments
|
||||
stripComments()
|
||||
|
||||
endDocument()
|
||||
// Store the converted document
|
||||
commit()
|
||||
|
||||
writeTo(input.absolutePath.replace(".html", ".tex"))
|
||||
if (!singlePage) {
|
||||
// End the document if singlePage is not enabled
|
||||
endDocument()
|
||||
}
|
||||
}
|
||||
|
||||
return output.toString()
|
||||
Log.i("Parsed: $path")
|
||||
|
||||
content.append(output.toString())
|
||||
|
||||
// Set the file name basing on the original html file name if not running in singlePage mode
|
||||
if (!singlePage) {
|
||||
outName = path.absolutePath
|
||||
}
|
||||
}
|
||||
|
||||
fun save() {
|
||||
val document = if (singlePage) {
|
||||
// End the singlePage'd document
|
||||
HtmlToLatexWriter(content.toString(), true).run {
|
||||
commit()
|
||||
endDocument()
|
||||
toString()
|
||||
}
|
||||
} else {
|
||||
content.toString()
|
||||
}
|
||||
|
||||
// No ned to save an empty document
|
||||
if (document.isEmpty()) {
|
||||
return
|
||||
}
|
||||
|
||||
val outDir = File("out", if (singlePage) "" else File(outName).parent)
|
||||
|
||||
if (!outDir.exists()) {
|
||||
outDir.mkdirs()
|
||||
}
|
||||
|
||||
// Save the LaTeX document
|
||||
val file = File("out", outName.replace(".html", ".tex"))
|
||||
|
||||
val writer = FileWriter(file, false)
|
||||
|
||||
Log.i("${file.path} created")
|
||||
|
||||
writer.write(document)
|
||||
writer.flush()
|
||||
writer.close()
|
||||
}
|
||||
}
|
|
@ -3,14 +3,25 @@
|
|||
*/
|
||||
package ch.usi.inf.atelier.group1
|
||||
|
||||
import ch.usi.inf.atelier.group1.util.Log
|
||||
import java.io.File
|
||||
|
||||
object Main {
|
||||
private lateinit var parser: HtmlParser
|
||||
|
||||
@JvmStatic
|
||||
fun main(args: Array<String>) {
|
||||
|
||||
if (args.isEmpty()) {
|
||||
Log.e("At least one argument is needed")
|
||||
return
|
||||
}
|
||||
|
||||
parser = HtmlParser(arrayOf("-s", "--singlepage").contains(args[0]))
|
||||
|
||||
args.forEach(this::convert)
|
||||
|
||||
parser.save()
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -19,10 +30,15 @@ object Main {
|
|||
* @param path of the file (or directory containing files) to be converted
|
||||
*/
|
||||
private fun convert(path: String) {
|
||||
// Ignore the flags
|
||||
if (path.startsWith("-")) {
|
||||
return
|
||||
}
|
||||
|
||||
val file = File(path)
|
||||
|
||||
if (!file.isDirectory) {
|
||||
HtmlParser(file).parse()
|
||||
parser.parse(file)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ import org.jsoup.Jsoup
|
|||
import org.jsoup.parser.Parser
|
||||
import java.util.regex.Pattern
|
||||
|
||||
class HtmlToLatexWriter(private var content: String) {
|
||||
class HtmlToLatexWriter(private var content: String, private val singlePage: Boolean) {
|
||||
private val document = StringBuilder()
|
||||
|
||||
/**
|
||||
|
@ -30,11 +30,18 @@ class HtmlToLatexWriter(private var content: String) {
|
|||
insert("\\tableofcontents\\")
|
||||
}
|
||||
|
||||
/**
|
||||
* Store the converted html text into
|
||||
* the writer content so it can be later exported
|
||||
*/
|
||||
fun commit() {
|
||||
insert(content)
|
||||
}
|
||||
|
||||
/**
|
||||
* End the document
|
||||
*/
|
||||
fun endDocument() {
|
||||
insert(content)
|
||||
insert("\\end{document}", afterLine = true)
|
||||
}
|
||||
|
||||
|
@ -116,26 +123,36 @@ class HtmlToLatexWriter(private var content: String) {
|
|||
.replaceTag("<h5>", "</h5>", "", "")
|
||||
.replaceTag("<p>", "</p>", "\n", "")
|
||||
.replaceTag("<a>", "</a>", "", "")
|
||||
|
||||
if (singlePage) {
|
||||
content = content.replaceTag("<h3>", "</h3>", "", "")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace <h1> with LaTeX \section
|
||||
*/
|
||||
fun changeSection() {
|
||||
content = content.replaceTag("<h1>", "</h1>", "\\section{", "}\n")
|
||||
content = content.replaceTag("<h1>", "</h1>",
|
||||
if (singlePage) "\\subsection{" else "\\section{", "}\n")
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace <h2> with LaTeX \subsection
|
||||
*/
|
||||
fun changeSubSection() {
|
||||
content = content.replaceTag("<h2>", "</h2>", "\\subsection{", "}\n")
|
||||
content = content.replaceTag("<h2>", "</h2>",
|
||||
if (singlePage) "\\subsubsection{" else "\\subsection{", "}\n")
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace <h3> with LaTeX \subsubsection
|
||||
*/
|
||||
fun changeSubSubSection() {
|
||||
if (singlePage) {
|
||||
return
|
||||
}
|
||||
|
||||
content = content.replaceTag("<h3>", "</h3>", "\\subsubsection{", "}\n")
|
||||
}
|
||||
|
||||
|
@ -197,7 +214,7 @@ class HtmlToLatexWriter(private var content: String) {
|
|||
* @param author the text being inserted
|
||||
*/
|
||||
fun addAuthor(author: String) {
|
||||
insert(AUTHOR.format(author), true, true)
|
||||
insert(if (singlePage) "\\large $author \\normalsize\\\\" else AUTHOR.format(author), true, true)
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -206,7 +223,19 @@ class HtmlToLatexWriter(private var content: String) {
|
|||
* @param title the text being inserted
|
||||
*/
|
||||
fun addTitle(title: String) {
|
||||
insert(if (singlePage) "\\section{$title}\n" else TITLE.format(title), true, true)
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an header for the html file in singlePage mode
|
||||
*/
|
||||
fun addSinglePageInfo(title: String, author: String) {
|
||||
if (!singlePage) {
|
||||
return
|
||||
}
|
||||
|
||||
insert(TITLE.format(title), true, true)
|
||||
insert(AUTHOR.format(author), false, true)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -8,14 +8,45 @@ import java.util.*
|
|||
|
||||
object Log {
|
||||
|
||||
fun e(exception: Exception) {
|
||||
print('E', exception.message ?: "Unknown error", true)
|
||||
/**
|
||||
* Log an exception as an error
|
||||
*
|
||||
* @param message The log message
|
||||
* @param shouldThrow Whether the exception should be thrown
|
||||
*/
|
||||
fun <T : Exception> e(exception: T, shouldThrow: Boolean) {
|
||||
e(exception.message ?: "Unknown error")
|
||||
|
||||
if (shouldThrow) {
|
||||
throw exception
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Log an error
|
||||
*
|
||||
* @param message The log message
|
||||
*/
|
||||
fun e(message: String) {
|
||||
print('E', message , true)
|
||||
}
|
||||
|
||||
/**
|
||||
* Log an object using its toString() method.
|
||||
*
|
||||
* @param obj The object which content will be printed.
|
||||
*/
|
||||
fun i(obj: Any) {
|
||||
print('I', obj.toString(), false)
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a log with a date and prefix
|
||||
*
|
||||
* @param prefix The prefix of the log. Helps differentiating the various types of logs
|
||||
* @param message The message that will be displayed in the log
|
||||
* @param isErr Whether the log should be printed as an error
|
||||
*/
|
||||
private fun print(prefix: Char, message: String, isErr: Boolean) {
|
||||
val time = SimpleDateFormat("yyyy-MM-dd hh:mm").format(Date())
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ fun File.getContent(): String {
|
|||
line = reader.readLine()
|
||||
}
|
||||
} catch (e: IOException) {
|
||||
Log.e(exception = e)
|
||||
Log.e(e, true)
|
||||
}
|
||||
|
||||
return content.toString()
|
||||
|
|
|
@ -11,6 +11,8 @@ import java.io.FileWriter
|
|||
|
||||
/**
|
||||
* Insert author and title from Jekyll file
|
||||
*
|
||||
* @param file File from which the author will be read
|
||||
*/
|
||||
fun HtmlToLatexWriter.insertJekyllHeader(file: JekyllPage) {
|
||||
val title = file.header["title"] ?: "Unknown title"
|
||||
|
@ -18,22 +20,4 @@ fun HtmlToLatexWriter.insertJekyllHeader(file: JekyllPage) {
|
|||
|
||||
addTitle(title)
|
||||
addAuthor(author)
|
||||
}
|
||||
|
||||
fun HtmlToLatexWriter.writeTo(path: String) {
|
||||
val outDir = File("out", File(path).parent)
|
||||
|
||||
if (!outDir.exists()) {
|
||||
outDir.mkdirs()
|
||||
}
|
||||
|
||||
val file = File("out", path)
|
||||
|
||||
val writer = FileWriter(file, false)
|
||||
|
||||
Log.i("${file.path} created")
|
||||
|
||||
writer.write(toString())
|
||||
writer.flush()
|
||||
writer.close()
|
||||
}
|
Loading…
Reference in a new issue