Started report
This commit is contained in:
parent
fab188aba7
commit
e2d01e39ea
3 changed files with 369 additions and 0 deletions
284
report/.gitignore
vendored
Normal file
284
report/.gitignore
vendored
Normal file
|
@ -0,0 +1,284 @@
|
|||
## Core latex/pdflatex auxiliary files:
|
||||
*.aux
|
||||
*.lof
|
||||
*.log
|
||||
*.lot
|
||||
*.fls
|
||||
*.out
|
||||
*.toc
|
||||
*.fmt
|
||||
*.fot
|
||||
*.cb
|
||||
*.cb2
|
||||
.*.lb
|
||||
|
||||
## Intermediate documents:
|
||||
*.dvi
|
||||
*.xdv
|
||||
*-converted-to.*
|
||||
# these rules might exclude image files for figures etc.
|
||||
# *.ps
|
||||
# *.eps
|
||||
# *.pdf
|
||||
|
||||
## Generated if empty string is given at "Please type another file name for output:"
|
||||
.pdf
|
||||
|
||||
## Bibliography auxiliary files (bibtex/biblatex/biber):
|
||||
*.bbl
|
||||
*.bcf
|
||||
*.blg
|
||||
*-blx.aux
|
||||
*-blx.bib
|
||||
*.run.xml
|
||||
|
||||
## Build tool auxiliary files:
|
||||
*.fdb_latexmk
|
||||
*.synctex
|
||||
*.synctex(busy)
|
||||
*.synctex.gz
|
||||
*.synctex.gz(busy)
|
||||
*.pdfsync
|
||||
|
||||
## Build tool directories for auxiliary files
|
||||
# latexrun
|
||||
latex.out/
|
||||
|
||||
## Auxiliary and intermediate files from other packages:
|
||||
# algorithms
|
||||
*.alg
|
||||
*.loa
|
||||
|
||||
# achemso
|
||||
acs-*.bib
|
||||
|
||||
# amsthm
|
||||
*.thm
|
||||
|
||||
# beamer
|
||||
*.nav
|
||||
*.pre
|
||||
*.snm
|
||||
*.vrb
|
||||
|
||||
# changes
|
||||
*.soc
|
||||
|
||||
# comment
|
||||
*.cut
|
||||
|
||||
# cprotect
|
||||
*.cpt
|
||||
|
||||
# elsarticle (documentclass of Elsevier journals)
|
||||
*.spl
|
||||
|
||||
# endnotes
|
||||
*.ent
|
||||
|
||||
# fixme
|
||||
*.lox
|
||||
|
||||
# feynmf/feynmp
|
||||
*.mf
|
||||
*.mp
|
||||
*.t[1-9]
|
||||
*.t[1-9][0-9]
|
||||
*.tfm
|
||||
|
||||
#(r)(e)ledmac/(r)(e)ledpar
|
||||
*.end
|
||||
*.?end
|
||||
*.[1-9]
|
||||
*.[1-9][0-9]
|
||||
*.[1-9][0-9][0-9]
|
||||
*.[1-9]R
|
||||
*.[1-9][0-9]R
|
||||
*.[1-9][0-9][0-9]R
|
||||
*.eledsec[1-9]
|
||||
*.eledsec[1-9]R
|
||||
*.eledsec[1-9][0-9]
|
||||
*.eledsec[1-9][0-9]R
|
||||
*.eledsec[1-9][0-9][0-9]
|
||||
*.eledsec[1-9][0-9][0-9]R
|
||||
|
||||
# glossaries
|
||||
*.acn
|
||||
*.acr
|
||||
*.glg
|
||||
*.glo
|
||||
*.gls
|
||||
*.glsdefs
|
||||
*.lzo
|
||||
*.lzs
|
||||
|
||||
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
|
||||
# *.ist
|
||||
|
||||
# gnuplottex
|
||||
*-gnuplottex-*
|
||||
|
||||
# gregoriotex
|
||||
*.gaux
|
||||
*.gtex
|
||||
|
||||
# htlatex
|
||||
*.4ct
|
||||
*.4tc
|
||||
*.idv
|
||||
*.lg
|
||||
*.trc
|
||||
*.xref
|
||||
|
||||
# hyperref
|
||||
*.brf
|
||||
|
||||
# knitr
|
||||
*-concordance.tex
|
||||
# *.tikz
|
||||
*-tikzDictionary
|
||||
|
||||
# listings
|
||||
*.lol
|
||||
|
||||
# luatexja-ruby
|
||||
*.ltjruby
|
||||
|
||||
# makeidx
|
||||
*.idx
|
||||
*.ilg
|
||||
*.ind
|
||||
|
||||
# minitoc
|
||||
*.maf
|
||||
*.mlf
|
||||
*.mlt
|
||||
*.mtc[0-9]*
|
||||
*.slf[0-9]*
|
||||
*.slt[0-9]*
|
||||
*.stc[0-9]*
|
||||
|
||||
# minted
|
||||
_minted*
|
||||
*.pyg
|
||||
|
||||
# morewrites
|
||||
*.mw
|
||||
|
||||
# nomencl
|
||||
*.nlg
|
||||
*.nlo
|
||||
*.nls
|
||||
|
||||
# pax
|
||||
*.pax
|
||||
|
||||
# pdfpcnotes
|
||||
*.pdfpc
|
||||
|
||||
# sagetex
|
||||
*.sagetex.sage
|
||||
*.sagetex.py
|
||||
*.sagetex.scmd
|
||||
|
||||
# scrwfile
|
||||
*.wrt
|
||||
|
||||
# sympy
|
||||
*.sout
|
||||
*.sympy
|
||||
sympy-plots-for-*.tex/
|
||||
|
||||
# pdfcomment
|
||||
*.upa
|
||||
*.upb
|
||||
|
||||
# pythontex
|
||||
*.pytxcode
|
||||
pythontex-files-*/
|
||||
|
||||
# tcolorbox
|
||||
*.listing
|
||||
|
||||
# thmtools
|
||||
*.loe
|
||||
|
||||
# TikZ & PGF
|
||||
*.dpth
|
||||
*.md5
|
||||
*.auxlock
|
||||
|
||||
# todonotes
|
||||
*.tdo
|
||||
|
||||
# vhistory
|
||||
*.hst
|
||||
*.ver
|
||||
|
||||
# easy-todo
|
||||
*.lod
|
||||
|
||||
# xcolor
|
||||
*.xcp
|
||||
|
||||
# xmpincl
|
||||
*.xmpi
|
||||
|
||||
# xindy
|
||||
*.xdy
|
||||
|
||||
# xypic precompiled matrices and outlines
|
||||
*.xyc
|
||||
*.xyd
|
||||
|
||||
# endfloat
|
||||
*.ttt
|
||||
*.fff
|
||||
|
||||
# Latexian
|
||||
TSWLatexianTemp*
|
||||
|
||||
## Editors:
|
||||
# WinEdt
|
||||
*.bak
|
||||
*.sav
|
||||
|
||||
# Texpad
|
||||
.texpadtmp
|
||||
|
||||
# LyX
|
||||
*.lyx~
|
||||
|
||||
# Kile
|
||||
*.backup
|
||||
|
||||
# gummi
|
||||
.*.swp
|
||||
|
||||
# KBibTeX
|
||||
*~[0-9]*
|
||||
|
||||
# TeXnicCenter
|
||||
*.tps
|
||||
|
||||
# auto folder when using emacs and auctex
|
||||
./auto/*
|
||||
*.el
|
||||
|
||||
# expex forward references with \gathertags
|
||||
*-tags.tex
|
||||
|
||||
# standalone packages
|
||||
*.sta
|
||||
|
||||
# Makeindex log files
|
||||
*.lpz
|
||||
|
||||
# xwatermark package
|
||||
*.xwm
|
||||
|
||||
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
|
||||
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
|
||||
# Uncomment the next line to have this generated file ignored.
|
||||
#*Notes.bib
|
||||
!*.pdf
|
BIN
report/report.pdf
Normal file
BIN
report/report.pdf
Normal file
Binary file not shown.
85
report/report.tex
Normal file
85
report/report.tex
Normal file
|
@ -0,0 +1,85 @@
|
|||
% vim: set ts=2 sw=2 et tw=80:
|
||||
|
||||
\documentclass{scrartcl}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{parskip}
|
||||
\usepackage{minted}
|
||||
\usepackage[utf8]{inputenc}
|
||||
|
||||
\setlength{\parindent}{0pt}
|
||||
|
||||
\usepackage[margin=2.5cm]{geometry}
|
||||
|
||||
\title{\textit{Image Search IR System} \\\vspace{0.3cm}
|
||||
\Large{WS2020-21 Information Retrieval Project}}
|
||||
\author{Claudio Maggioni}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
\tableofcontents
|
||||
\newpage
|
||||
|
||||
\section{Introduction}
|
||||
This report is a summary of the work I have done to create the ``Image Search IR
|
||||
system'', a proof-of-concept IR system implementation implementing the ``Image
|
||||
Search Engine'' project (project \#13).
|
||||
|
||||
The project is built on a simple
|
||||
\textit{Scrapy}-\textit{Solr}-\textit{HTML5+CSS+JS} stack. Installation
|
||||
instructions, an in-depth look to the project components for scraping, indexing,
|
||||
and displaying the results, and finally the user evaluation report, can all be
|
||||
found in the following sections.
|
||||
|
||||
\section{Installation instructions}
|
||||
|
||||
\subsection{Project repository}
|
||||
The project Git repository is located here:
|
||||
\url{https://git.maggioni.xyz/maggicl/IRProject}.
|
||||
|
||||
\subsection{Solr installation}
|
||||
The installation of the project and population of the test collection with the
|
||||
scraped documents is automated by a single script. The script requires you have
|
||||
downloaded \textit{Solr} version 8.6.2. as a ZIP file, i.e.\ the same
|
||||
\textit{Solr} ZIP we had to download during lab lectures. Should you need to
|
||||
download a copy of the ZIP file, you can find it here (on USI's onedrive
|
||||
hosting): \url{http://to-do.com/file}.
|
||||
|
||||
Clone the project's git repository and position yourself with a shell on the
|
||||
project's root directory. Then execute this command:
|
||||
|
||||
% linenos
|
||||
\begin{minted}[frame=lines,framesep=2mm]{bash}
|
||||
./solr_install.sh {ZIP path}
|
||||
\end{minted}
|
||||
|
||||
where \texttt{<ZIP path>} is the path of the ZIP file mentioned earlier. This
|
||||
will install, start, and update \textit{Solr} with the test collection.
|
||||
|
||||
\subsection{UI installation}
|
||||
In order to start the UI, open with your browser of choice the file
|
||||
\texttt{ui/index.html}. In order to use the UI, it is necessary to bypass
|
||||
\texttt{Cross Origin Resource Sharing} security checks by downloading and
|
||||
enabling a ``CORS everywhere'' extension. I suggest
|
||||
\href{https://addons.mozilla.org/en-US/firefox/addon/cors-everywhere/}{this one} for
|
||||
Mozilla Firefox and derivatives.
|
||||
|
||||
\subsection{Run the website scrapers}
|
||||
A prerequisite to run the Flickr crawler is to have a working Scrapy Splash
|
||||
instance listening on port \texttt{localhost:8050}. This can be achieved by
|
||||
executing this Docker command, should a Docker installation be available:
|
||||
|
||||
\begin{minted}[frame=lines,framesep=2mm]{bash}
|
||||
docker run -p 8050:8050 scrapinghub/scrapy
|
||||
\end{minted}
|
||||
|
||||
In order to all the website scrapers, run the script \texttt{./scrape.sh} with
|
||||
no arguments.
|
||||
|
||||
\section{Scraping}
|
||||
|
||||
\section{Indexing and \textit{Solr} configuration}
|
||||
|
||||
\section{User interface}
|
||||
|
||||
\section{User evaluation}
|
||||
\end{document}
|
Reference in a new issue