Started report
This commit is contained in:
parent
fab188aba7
commit
e2d01e39ea
3 changed files with 369 additions and 0 deletions
284
report/.gitignore
vendored
Normal file
284
report/.gitignore
vendored
Normal file
|
@ -0,0 +1,284 @@
|
||||||
|
## Core latex/pdflatex auxiliary files:
|
||||||
|
*.aux
|
||||||
|
*.lof
|
||||||
|
*.log
|
||||||
|
*.lot
|
||||||
|
*.fls
|
||||||
|
*.out
|
||||||
|
*.toc
|
||||||
|
*.fmt
|
||||||
|
*.fot
|
||||||
|
*.cb
|
||||||
|
*.cb2
|
||||||
|
.*.lb
|
||||||
|
|
||||||
|
## Intermediate documents:
|
||||||
|
*.dvi
|
||||||
|
*.xdv
|
||||||
|
*-converted-to.*
|
||||||
|
# these rules might exclude image files for figures etc.
|
||||||
|
# *.ps
|
||||||
|
# *.eps
|
||||||
|
# *.pdf
|
||||||
|
|
||||||
|
## Generated if empty string is given at "Please type another file name for output:"
|
||||||
|
.pdf
|
||||||
|
|
||||||
|
## Bibliography auxiliary files (bibtex/biblatex/biber):
|
||||||
|
*.bbl
|
||||||
|
*.bcf
|
||||||
|
*.blg
|
||||||
|
*-blx.aux
|
||||||
|
*-blx.bib
|
||||||
|
*.run.xml
|
||||||
|
|
||||||
|
## Build tool auxiliary files:
|
||||||
|
*.fdb_latexmk
|
||||||
|
*.synctex
|
||||||
|
*.synctex(busy)
|
||||||
|
*.synctex.gz
|
||||||
|
*.synctex.gz(busy)
|
||||||
|
*.pdfsync
|
||||||
|
|
||||||
|
## Build tool directories for auxiliary files
|
||||||
|
# latexrun
|
||||||
|
latex.out/
|
||||||
|
|
||||||
|
## Auxiliary and intermediate files from other packages:
|
||||||
|
# algorithms
|
||||||
|
*.alg
|
||||||
|
*.loa
|
||||||
|
|
||||||
|
# achemso
|
||||||
|
acs-*.bib
|
||||||
|
|
||||||
|
# amsthm
|
||||||
|
*.thm
|
||||||
|
|
||||||
|
# beamer
|
||||||
|
*.nav
|
||||||
|
*.pre
|
||||||
|
*.snm
|
||||||
|
*.vrb
|
||||||
|
|
||||||
|
# changes
|
||||||
|
*.soc
|
||||||
|
|
||||||
|
# comment
|
||||||
|
*.cut
|
||||||
|
|
||||||
|
# cprotect
|
||||||
|
*.cpt
|
||||||
|
|
||||||
|
# elsarticle (documentclass of Elsevier journals)
|
||||||
|
*.spl
|
||||||
|
|
||||||
|
# endnotes
|
||||||
|
*.ent
|
||||||
|
|
||||||
|
# fixme
|
||||||
|
*.lox
|
||||||
|
|
||||||
|
# feynmf/feynmp
|
||||||
|
*.mf
|
||||||
|
*.mp
|
||||||
|
*.t[1-9]
|
||||||
|
*.t[1-9][0-9]
|
||||||
|
*.tfm
|
||||||
|
|
||||||
|
#(r)(e)ledmac/(r)(e)ledpar
|
||||||
|
*.end
|
||||||
|
*.?end
|
||||||
|
*.[1-9]
|
||||||
|
*.[1-9][0-9]
|
||||||
|
*.[1-9][0-9][0-9]
|
||||||
|
*.[1-9]R
|
||||||
|
*.[1-9][0-9]R
|
||||||
|
*.[1-9][0-9][0-9]R
|
||||||
|
*.eledsec[1-9]
|
||||||
|
*.eledsec[1-9]R
|
||||||
|
*.eledsec[1-9][0-9]
|
||||||
|
*.eledsec[1-9][0-9]R
|
||||||
|
*.eledsec[1-9][0-9][0-9]
|
||||||
|
*.eledsec[1-9][0-9][0-9]R
|
||||||
|
|
||||||
|
# glossaries
|
||||||
|
*.acn
|
||||||
|
*.acr
|
||||||
|
*.glg
|
||||||
|
*.glo
|
||||||
|
*.gls
|
||||||
|
*.glsdefs
|
||||||
|
*.lzo
|
||||||
|
*.lzs
|
||||||
|
|
||||||
|
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
|
||||||
|
# *.ist
|
||||||
|
|
||||||
|
# gnuplottex
|
||||||
|
*-gnuplottex-*
|
||||||
|
|
||||||
|
# gregoriotex
|
||||||
|
*.gaux
|
||||||
|
*.gtex
|
||||||
|
|
||||||
|
# htlatex
|
||||||
|
*.4ct
|
||||||
|
*.4tc
|
||||||
|
*.idv
|
||||||
|
*.lg
|
||||||
|
*.trc
|
||||||
|
*.xref
|
||||||
|
|
||||||
|
# hyperref
|
||||||
|
*.brf
|
||||||
|
|
||||||
|
# knitr
|
||||||
|
*-concordance.tex
|
||||||
|
# *.tikz
|
||||||
|
*-tikzDictionary
|
||||||
|
|
||||||
|
# listings
|
||||||
|
*.lol
|
||||||
|
|
||||||
|
# luatexja-ruby
|
||||||
|
*.ltjruby
|
||||||
|
|
||||||
|
# makeidx
|
||||||
|
*.idx
|
||||||
|
*.ilg
|
||||||
|
*.ind
|
||||||
|
|
||||||
|
# minitoc
|
||||||
|
*.maf
|
||||||
|
*.mlf
|
||||||
|
*.mlt
|
||||||
|
*.mtc[0-9]*
|
||||||
|
*.slf[0-9]*
|
||||||
|
*.slt[0-9]*
|
||||||
|
*.stc[0-9]*
|
||||||
|
|
||||||
|
# minted
|
||||||
|
_minted*
|
||||||
|
*.pyg
|
||||||
|
|
||||||
|
# morewrites
|
||||||
|
*.mw
|
||||||
|
|
||||||
|
# nomencl
|
||||||
|
*.nlg
|
||||||
|
*.nlo
|
||||||
|
*.nls
|
||||||
|
|
||||||
|
# pax
|
||||||
|
*.pax
|
||||||
|
|
||||||
|
# pdfpcnotes
|
||||||
|
*.pdfpc
|
||||||
|
|
||||||
|
# sagetex
|
||||||
|
*.sagetex.sage
|
||||||
|
*.sagetex.py
|
||||||
|
*.sagetex.scmd
|
||||||
|
|
||||||
|
# scrwfile
|
||||||
|
*.wrt
|
||||||
|
|
||||||
|
# sympy
|
||||||
|
*.sout
|
||||||
|
*.sympy
|
||||||
|
sympy-plots-for-*.tex/
|
||||||
|
|
||||||
|
# pdfcomment
|
||||||
|
*.upa
|
||||||
|
*.upb
|
||||||
|
|
||||||
|
# pythontex
|
||||||
|
*.pytxcode
|
||||||
|
pythontex-files-*/
|
||||||
|
|
||||||
|
# tcolorbox
|
||||||
|
*.listing
|
||||||
|
|
||||||
|
# thmtools
|
||||||
|
*.loe
|
||||||
|
|
||||||
|
# TikZ & PGF
|
||||||
|
*.dpth
|
||||||
|
*.md5
|
||||||
|
*.auxlock
|
||||||
|
|
||||||
|
# todonotes
|
||||||
|
*.tdo
|
||||||
|
|
||||||
|
# vhistory
|
||||||
|
*.hst
|
||||||
|
*.ver
|
||||||
|
|
||||||
|
# easy-todo
|
||||||
|
*.lod
|
||||||
|
|
||||||
|
# xcolor
|
||||||
|
*.xcp
|
||||||
|
|
||||||
|
# xmpincl
|
||||||
|
*.xmpi
|
||||||
|
|
||||||
|
# xindy
|
||||||
|
*.xdy
|
||||||
|
|
||||||
|
# xypic precompiled matrices and outlines
|
||||||
|
*.xyc
|
||||||
|
*.xyd
|
||||||
|
|
||||||
|
# endfloat
|
||||||
|
*.ttt
|
||||||
|
*.fff
|
||||||
|
|
||||||
|
# Latexian
|
||||||
|
TSWLatexianTemp*
|
||||||
|
|
||||||
|
## Editors:
|
||||||
|
# WinEdt
|
||||||
|
*.bak
|
||||||
|
*.sav
|
||||||
|
|
||||||
|
# Texpad
|
||||||
|
.texpadtmp
|
||||||
|
|
||||||
|
# LyX
|
||||||
|
*.lyx~
|
||||||
|
|
||||||
|
# Kile
|
||||||
|
*.backup
|
||||||
|
|
||||||
|
# gummi
|
||||||
|
.*.swp
|
||||||
|
|
||||||
|
# KBibTeX
|
||||||
|
*~[0-9]*
|
||||||
|
|
||||||
|
# TeXnicCenter
|
||||||
|
*.tps
|
||||||
|
|
||||||
|
# auto folder when using emacs and auctex
|
||||||
|
./auto/*
|
||||||
|
*.el
|
||||||
|
|
||||||
|
# expex forward references with \gathertags
|
||||||
|
*-tags.tex
|
||||||
|
|
||||||
|
# standalone packages
|
||||||
|
*.sta
|
||||||
|
|
||||||
|
# Makeindex log files
|
||||||
|
*.lpz
|
||||||
|
|
||||||
|
# xwatermark package
|
||||||
|
*.xwm
|
||||||
|
|
||||||
|
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
|
||||||
|
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
|
||||||
|
# Uncomment the next line to have this generated file ignored.
|
||||||
|
#*Notes.bib
|
||||||
|
!*.pdf
|
BIN
report/report.pdf
Normal file
BIN
report/report.pdf
Normal file
Binary file not shown.
85
report/report.tex
Normal file
85
report/report.tex
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
% vim: set ts=2 sw=2 et tw=80:
|
||||||
|
|
||||||
|
\documentclass{scrartcl}
|
||||||
|
\usepackage{hyperref}
|
||||||
|
\usepackage{parskip}
|
||||||
|
\usepackage{minted}
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
|
||||||
|
\setlength{\parindent}{0pt}
|
||||||
|
|
||||||
|
\usepackage[margin=2.5cm]{geometry}
|
||||||
|
|
||||||
|
\title{\textit{Image Search IR System} \\\vspace{0.3cm}
|
||||||
|
\Large{WS2020-21 Information Retrieval Project}}
|
||||||
|
\author{Claudio Maggioni}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
\maketitle
|
||||||
|
\tableofcontents
|
||||||
|
\newpage
|
||||||
|
|
||||||
|
\section{Introduction}
|
||||||
|
This report is a summary of the work I have done to create the ``Image Search IR
|
||||||
|
system'', a proof-of-concept IR system implementation implementing the ``Image
|
||||||
|
Search Engine'' project (project \#13).
|
||||||
|
|
||||||
|
The project is built on a simple
|
||||||
|
\textit{Scrapy}-\textit{Solr}-\textit{HTML5+CSS+JS} stack. Installation
|
||||||
|
instructions, an in-depth look to the project components for scraping, indexing,
|
||||||
|
and displaying the results, and finally the user evaluation report, can all be
|
||||||
|
found in the following sections.
|
||||||
|
|
||||||
|
\section{Installation instructions}
|
||||||
|
|
||||||
|
\subsection{Project repository}
|
||||||
|
The project Git repository is located here:
|
||||||
|
\url{https://git.maggioni.xyz/maggicl/IRProject}.
|
||||||
|
|
||||||
|
\subsection{Solr installation}
|
||||||
|
The installation of the project and population of the test collection with the
|
||||||
|
scraped documents is automated by a single script. The script requires you have
|
||||||
|
downloaded \textit{Solr} version 8.6.2. as a ZIP file, i.e.\ the same
|
||||||
|
\textit{Solr} ZIP we had to download during lab lectures. Should you need to
|
||||||
|
download a copy of the ZIP file, you can find it here (on USI's onedrive
|
||||||
|
hosting): \url{http://to-do.com/file}.
|
||||||
|
|
||||||
|
Clone the project's git repository and position yourself with a shell on the
|
||||||
|
project's root directory. Then execute this command:
|
||||||
|
|
||||||
|
% linenos
|
||||||
|
\begin{minted}[frame=lines,framesep=2mm]{bash}
|
||||||
|
./solr_install.sh {ZIP path}
|
||||||
|
\end{minted}
|
||||||
|
|
||||||
|
where \texttt{<ZIP path>} is the path of the ZIP file mentioned earlier. This
|
||||||
|
will install, start, and update \textit{Solr} with the test collection.
|
||||||
|
|
||||||
|
\subsection{UI installation}
|
||||||
|
In order to start the UI, open with your browser of choice the file
|
||||||
|
\texttt{ui/index.html}. In order to use the UI, it is necessary to bypass
|
||||||
|
\texttt{Cross Origin Resource Sharing} security checks by downloading and
|
||||||
|
enabling a ``CORS everywhere'' extension. I suggest
|
||||||
|
\href{https://addons.mozilla.org/en-US/firefox/addon/cors-everywhere/}{this one} for
|
||||||
|
Mozilla Firefox and derivatives.
|
||||||
|
|
||||||
|
\subsection{Run the website scrapers}
|
||||||
|
A prerequisite to run the Flickr crawler is to have a working Scrapy Splash
|
||||||
|
instance listening on port \texttt{localhost:8050}. This can be achieved by
|
||||||
|
executing this Docker command, should a Docker installation be available:
|
||||||
|
|
||||||
|
\begin{minted}[frame=lines,framesep=2mm]{bash}
|
||||||
|
docker run -p 8050:8050 scrapinghub/scrapy
|
||||||
|
\end{minted}
|
||||||
|
|
||||||
|
In order to all the website scrapers, run the script \texttt{./scrape.sh} with
|
||||||
|
no arguments.
|
||||||
|
|
||||||
|
\section{Scraping}
|
||||||
|
|
||||||
|
\section{Indexing and \textit{Solr} configuration}
|
||||||
|
|
||||||
|
\section{User interface}
|
||||||
|
|
||||||
|
\section{User evaluation}
|
||||||
|
\end{document}
|
Reference in a new issue