diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9917ead --- /dev/null +++ b/.gitignore @@ -0,0 +1,459 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ +**/.DS_Store +out/model/*.pt + +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.lot +*.fls +*.out +*.toc +*.fmt +*.fot +*.cb +*.cb2 +.*.lb + +## Intermediate documents: +*.dvi +*.xdv +*-converted-to.* +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Generated if empty string is given at "Please type another file name for output:" + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex +*.synctex(busy) +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync + +## Build tool directories for auxiliary files +# latexrun +latex.out/ + +## Auxiliary and intermediate files from other packages: +# algorithms +*.alg +*.loa + +# achemso +acs-*.bib + +# amsthm +*.thm + +# beamer +*.nav +*.pre +*.snm +*.vrb + +# changes +*.soc + +# comment +*.cut + +# cprotect +*.cpt + +# elsarticle (documentclass of Elsevier journals) +*.spl + +# endnotes +*.ent + +*.lox + +# feynmf/feynmp +*.mf +*.mp +*.t[1-9] +*.t[1-9][0-9] +*.tfm + +#(r)(e)ledmac/(r)(e)ledpar +*.end +*.?end +*.[1-9] +*.[1-9][0-9] +*.[1-9][0-9][0-9] +*.[1-9]R +*.[1-9][0-9]R +*.[1-9][0-9][0-9]R +*.eledsec[1-9] +*.eledsec[1-9]R +*.eledsec[1-9][0-9] +*.eledsec[1-9][0-9]R +*.eledsec[1-9][0-9][0-9] +*.eledsec[1-9][0-9][0-9]R + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls +*.glsdefs +*.lzo +*.lzs +*.slg +*.slo +*.sls + +# uncomment this for glossaries-extra (will ignore makeindex's style files!) +# *.ist + +# gnuplot +*.gnuplot +*.table + +# gnuplottex +*-gnuplottex-* + +# gregoriotex +*.gaux +*.glog +*.gtex + +# htlatex +*.4ct +*.4tc +*.idv +*.lg +*.trc +*.xref + +# hyperref +*.brf + +# knitr +*-concordance.tex +# *.tikz +*-tikzDictionary + +# listings +*.lol + +# luatexja-ruby +*.ltjruby + +# makeidx +*.idx +*.ilg +*.ind + +# minitoc +*.maf +*.mlf +*.mlt +*.mtc[0-9]* +*.slf[0-9]* +*.slt[0-9]* +*.stc[0-9]* + +# minted +_minted* +*.pyg + +# morewrites +*.mw + +# newpax +*.newpax + +# nomencl +*.nlg +*.nlo +*.nls + +# pax +*.pax + +# pdfpcnotes +*.pdfpc + +# sagetex +*.sagetex.sage +*.sagetex.py +*.sagetex.scmd + +# scrwfile +*.wrt + +# svg +svg-inkscape/ + +# sympy +*.sout +*.sympy +sympy-plots-for-*.tex/ + +# pdfcomment +*.upa +*.upb + +# pythontex +*.pytxcode +pythontex-files-*/ + +# tcolorbox +*.listing + +# thmtools +*.loe + +# TikZ & PGF +*.dpth +*.md5 +*.auxlock + +# titletoc +*.ptc + +# todonotes +*.tdo + +# vhistory +*.hst +*.ver + +*.lod + +# xcolor +*.xcp + +# xmpincl +*.xmpi + +# xindy +*.xdy + +# xypic precompiled matrices and outlines +*.xyc +*.xyd + +# endfloat +*.ttt +*.fff + +# Latexian +TSWLatexianTemp* + +## Editors: +# WinEdt +*.bak +*.sav + +# Texpad +.texpadtmp + +# LyX +*.lyx~ + +# Kile +*.backup + +# gummi +.*.swp + +# KBibTeX +*~[0-9]* + +# TeXnicCenter +*.tps + +# auto folder when using emacs and auctex +./auto/* +*.el + +# expex forward references with \gathertags +*-tags.tex + +# standalone packages +*.sta + +# Makeindex log files +*.lpz + +# xwatermark package +*.xwm + +# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib +# option is specified. Footnotes are the stored in a file with suffix Notes.bib. +# Uncomment the next line to have this generated file ignored. +#*Notes.bib \ No newline at end of file diff --git a/README.md b/README.md index 7f0f854..69e7888 100644 --- a/README.md +++ b/README.md @@ -10,5 +10,12 @@ In this repository, you can find the following files: Note: Feel free to modify this file according to the project's necessities. +## Environment setup - +To install the required dependencies make sure `python3` points to a Python 3.10 or 3.11 installation and then run: + +```shell +python3 -m venv env +source env/bin/activate +pip install -r requirements.txt +``` diff --git a/instrumentor.py b/instrumentor.py new file mode 100644 index 0000000..7756334 --- /dev/null +++ b/instrumentor.py @@ -0,0 +1,145 @@ +from collections import defaultdict +from dataclasses import dataclass +from typing import TypeVar, Callable, Optional +from typing import Generic + +from nltk import edit_distance + +T = TypeVar('T') +U = TypeVar('U') + + +@dataclass +class CmpOp(Generic[T]): + operator: str + name: str + test: Callable[[T, T], bool] + true_dist: Callable[[T, T], int] + false_dist: Callable[[T, T], int] + + def __init__(self, operator: str, name: str, test: Callable[[T, T], bool], true_dist: Callable[[T, T], int], + false_dist: Callable[[T, T], int]): + self.operator = operator + self.name = name + self.test = test + self.true_dist = true_dist + self.false_dist = false_dist + + +@dataclass +class InstrState: + min_true_dist: Optional[int] + min_false_dist: Optional[int] + + def __init__(self): + self.min_true_dist = None + self.min_false_dist = None + + def update(self, op: CmpOp[U], lhs: U, rhs: U): + true_dist = op.true_dist(lhs, rhs) + self.min_true_dist = true_dist if self.min_true_dist is None else min(true_dist, self.min_true_dist) + + false_dist = op.false_dist(lhs, rhs) + self.min_false_dist = false_dist if self.min_false_dist is None else min(false_dist, self.min_false_dist) + + +instrumentation_states: defaultdict[int, InstrState] = defaultdict(InstrState) + +# Operands for these must both be integers or strings of length 1 +int_str_ops: list[CmpOp[int | str]] = [ + CmpOp(operator='<', + name='Lt', + test=lambda lhs, rhs: lhs < rhs, + true_dist=lambda lhs, rhs: lhs - rhs + 1 if lhs >= rhs else 0, + false_dist=lambda lhs, rhs: rhs - lhs if lhs < rhs else 0), + CmpOp(operator='>', + name='Gt', + test=lambda lhs, rhs: lhs > rhs, + true_dist=lambda lhs, rhs: rhs - lhs + 1 if lhs <= rhs else 0, + false_dist=lambda lhs, rhs: lhs - rhs if lhs > rhs else 0), + CmpOp(operator='<=', + name='LtE', + test=lambda lhs, rhs: lhs <= rhs, + true_dist=lambda lhs, rhs: lhs - rhs if lhs > rhs else 0, + false_dist=lambda lhs, rhs: rhs - lhs + 1 if lhs <= rhs else 0), + CmpOp(operator='>=', + name='GtE', + test=lambda lhs, rhs: lhs >= rhs, + true_dist=lambda lhs, rhs: rhs - lhs if lhs < rhs else 0, + false_dist=lambda lhs, rhs: lhs - rhs + 1 if lhs >= rhs else 0), + CmpOp(operator='==', + name='Eq', + test=lambda lhs, rhs: lhs == rhs, + true_dist=lambda lhs, rhs: abs(lhs - rhs), + false_dist=lambda lhs, rhs: 1 if lhs == rhs else 0), + CmpOp(operator='!=', + name='NotEq', + test=lambda lhs, rhs: lhs == rhs, + true_dist=lambda lhs, rhs: 1 if lhs == rhs else 0, + false_dist=lambda lhs, rhs: abs(lhs - rhs)), +] + +int_str_by_name: dict[str, CmpOp[int | str]] = {c.name: c for c in int_str_ops} + + +def int_str_check(a: any, b: any) -> bool: + if type(a) == int and type(b) == int: + return True + if type(a) != str or type(b) != str: + return False + return len(a) == 1 or len(b) == 1 + + +def int_str_convert(x: int | str) -> int: + if type(x) == int: + return x + if len(x) == 1: + return ord(x) + + raise ValueError("x must be int or len(str) == 1") + + +# Operands for these must both be strings +str_ops: list[CmpOp[str]] = [ + CmpOp(operator='==', + name='Eq', + test=lambda lhs, rhs: lhs == rhs, + true_dist=lambda lhs, rhs: edit_distance(lhs, rhs), + false_dist=lambda lhs, rhs: 1 if lhs == rhs else 0), + CmpOp(operator='!=', + name='NotEq', + test=lambda lhs, rhs: lhs == rhs, + true_dist=lambda lhs, rhs: 1 if lhs == rhs else 0, + false_dist=lambda lhs, rhs: edit_distance(lhs, rhs)), +] + +str_by_name: dict[str, CmpOp[int | str]] = {c.name: c for c in str_ops} + + +def str_check(a: any, b: any) -> bool: + return type(a) == str and type(b) == str + + +def evaluate_condition(cmp_id: int, name: str, lhs: any, rhs: any) -> bool: + if int_str_check(lhs, rhs): + lhs_int = int_str_convert(lhs) + rhs_int = int_str_convert(rhs) + + if name not in int_str_by_name: + raise ValueError(f"'{name}' is not a valid CmpOp name for 'int_str' operators") + + op = int_str_by_name[name] + + instrumentation_states[cmp_id].update(op, lhs_int, rhs_int) + return op.test(lhs_int, rhs_int) + + if str_check(lhs, rhs): + if name not in str_by_name: + raise ValueError(f"'{name}' is not a valid CmpOp name for 'str' operators") + + op = int_str_by_name[name] + + instrumentation_states[cmp_id].update(op, lhs, rhs) + return op.test(lhs, rhs) + + raise ValueError(f"'{lhs}' and '{rhs}' are not suitable for both 'int_str' and 'str' operators") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1a170a4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +nltk==3.8.1 \ No newline at end of file