wip instrumentor

This commit is contained in:
Claudio Maggioni 2023-11-13 14:45:51 +01:00
parent 24600885b9
commit fb409cb714
4 changed files with 613 additions and 1 deletions

459
.gitignore vendored Normal file
View file

@ -0,0 +1,459 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
**/.DS_Store
out/model/*.pt
## Core latex/pdflatex auxiliary files:
*.aux
*.lof
*.lot
*.fls
*.out
*.toc
*.fmt
*.fot
*.cb
*.cb2
.*.lb
## Intermediate documents:
*.dvi
*.xdv
*-converted-to.*
# these rules might exclude image files for figures etc.
# *.ps
# *.eps
# *.pdf
## Generated if empty string is given at "Please type another file name for output:"
## Bibliography auxiliary files (bibtex/biblatex/biber):
*.bbl
*.bcf
*.blg
*-blx.aux
*-blx.bib
*.run.xml
## Build tool auxiliary files:
*.fdb_latexmk
*.synctex
*.synctex(busy)
*.synctex.gz
*.synctex.gz(busy)
*.pdfsync
## Build tool directories for auxiliary files
# latexrun
latex.out/
## Auxiliary and intermediate files from other packages:
# algorithms
*.alg
*.loa
# achemso
acs-*.bib
# amsthm
*.thm
# beamer
*.nav
*.pre
*.snm
*.vrb
# changes
*.soc
# comment
*.cut
# cprotect
*.cpt
# elsarticle (documentclass of Elsevier journals)
*.spl
# endnotes
*.ent
*.lox
# feynmf/feynmp
*.mf
*.mp
*.t[1-9]
*.t[1-9][0-9]
*.tfm
#(r)(e)ledmac/(r)(e)ledpar
*.end
*.?end
*.[1-9]
*.[1-9][0-9]
*.[1-9][0-9][0-9]
*.[1-9]R
*.[1-9][0-9]R
*.[1-9][0-9][0-9]R
*.eledsec[1-9]
*.eledsec[1-9]R
*.eledsec[1-9][0-9]
*.eledsec[1-9][0-9]R
*.eledsec[1-9][0-9][0-9]
*.eledsec[1-9][0-9][0-9]R
# glossaries
*.acn
*.acr
*.glg
*.glo
*.gls
*.glsdefs
*.lzo
*.lzs
*.slg
*.slo
*.sls
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
# *.ist
# gnuplot
*.gnuplot
*.table
# gnuplottex
*-gnuplottex-*
# gregoriotex
*.gaux
*.glog
*.gtex
# htlatex
*.4ct
*.4tc
*.idv
*.lg
*.trc
*.xref
# hyperref
*.brf
# knitr
*-concordance.tex
# *.tikz
*-tikzDictionary
# listings
*.lol
# luatexja-ruby
*.ltjruby
# makeidx
*.idx
*.ilg
*.ind
# minitoc
*.maf
*.mlf
*.mlt
*.mtc[0-9]*
*.slf[0-9]*
*.slt[0-9]*
*.stc[0-9]*
# minted
_minted*
*.pyg
# morewrites
*.mw
# newpax
*.newpax
# nomencl
*.nlg
*.nlo
*.nls
# pax
*.pax
# pdfpcnotes
*.pdfpc
# sagetex
*.sagetex.sage
*.sagetex.py
*.sagetex.scmd
# scrwfile
*.wrt
# svg
svg-inkscape/
# sympy
*.sout
*.sympy
sympy-plots-for-*.tex/
# pdfcomment
*.upa
*.upb
# pythontex
*.pytxcode
pythontex-files-*/
# tcolorbox
*.listing
# thmtools
*.loe
# TikZ & PGF
*.dpth
*.md5
*.auxlock
# titletoc
*.ptc
# todonotes
*.tdo
# vhistory
*.hst
*.ver
*.lod
# xcolor
*.xcp
# xmpincl
*.xmpi
# xindy
*.xdy
# xypic precompiled matrices and outlines
*.xyc
*.xyd
# endfloat
*.ttt
*.fff
# Latexian
TSWLatexianTemp*
## Editors:
# WinEdt
*.bak
*.sav
# Texpad
.texpadtmp
# LyX
*.lyx~
# Kile
*.backup
# gummi
.*.swp
# KBibTeX
*~[0-9]*
# TeXnicCenter
*.tps
# auto folder when using emacs and auctex
./auto/*
*.el
# expex forward references with \gathertags
*-tags.tex
# standalone packages
*.sta
# Makeindex log files
*.lpz
# xwatermark package
*.xwm
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
# Uncomment the next line to have this generated file ignored.
#*Notes.bib

View file

@ -10,5 +10,12 @@ In this repository, you can find the following files:
Note: Feel free to modify this file according to the project's necessities. Note: Feel free to modify this file according to the project's necessities.
## Environment setup
To install the required dependencies make sure `python3` points to a Python 3.10 or 3.11 installation and then run:
```shell
python3 -m venv env
source env/bin/activate
pip install -r requirements.txt
```

145
instrumentor.py Normal file
View file

@ -0,0 +1,145 @@
from collections import defaultdict
from dataclasses import dataclass
from typing import TypeVar, Callable, Optional
from typing import Generic
from nltk import edit_distance
T = TypeVar('T')
U = TypeVar('U')
@dataclass
class CmpOp(Generic[T]):
operator: str
name: str
test: Callable[[T, T], bool]
true_dist: Callable[[T, T], int]
false_dist: Callable[[T, T], int]
def __init__(self, operator: str, name: str, test: Callable[[T, T], bool], true_dist: Callable[[T, T], int],
false_dist: Callable[[T, T], int]):
self.operator = operator
self.name = name
self.test = test
self.true_dist = true_dist
self.false_dist = false_dist
@dataclass
class InstrState:
min_true_dist: Optional[int]
min_false_dist: Optional[int]
def __init__(self):
self.min_true_dist = None
self.min_false_dist = None
def update(self, op: CmpOp[U], lhs: U, rhs: U):
true_dist = op.true_dist(lhs, rhs)
self.min_true_dist = true_dist if self.min_true_dist is None else min(true_dist, self.min_true_dist)
false_dist = op.false_dist(lhs, rhs)
self.min_false_dist = false_dist if self.min_false_dist is None else min(false_dist, self.min_false_dist)
instrumentation_states: defaultdict[int, InstrState] = defaultdict(InstrState)
# Operands for these must both be integers or strings of length 1
int_str_ops: list[CmpOp[int | str]] = [
CmpOp(operator='<',
name='Lt',
test=lambda lhs, rhs: lhs < rhs,
true_dist=lambda lhs, rhs: lhs - rhs + 1 if lhs >= rhs else 0,
false_dist=lambda lhs, rhs: rhs - lhs if lhs < rhs else 0),
CmpOp(operator='>',
name='Gt',
test=lambda lhs, rhs: lhs > rhs,
true_dist=lambda lhs, rhs: rhs - lhs + 1 if lhs <= rhs else 0,
false_dist=lambda lhs, rhs: lhs - rhs if lhs > rhs else 0),
CmpOp(operator='<=',
name='LtE',
test=lambda lhs, rhs: lhs <= rhs,
true_dist=lambda lhs, rhs: lhs - rhs if lhs > rhs else 0,
false_dist=lambda lhs, rhs: rhs - lhs + 1 if lhs <= rhs else 0),
CmpOp(operator='>=',
name='GtE',
test=lambda lhs, rhs: lhs >= rhs,
true_dist=lambda lhs, rhs: rhs - lhs if lhs < rhs else 0,
false_dist=lambda lhs, rhs: lhs - rhs + 1 if lhs >= rhs else 0),
CmpOp(operator='==',
name='Eq',
test=lambda lhs, rhs: lhs == rhs,
true_dist=lambda lhs, rhs: abs(lhs - rhs),
false_dist=lambda lhs, rhs: 1 if lhs == rhs else 0),
CmpOp(operator='!=',
name='NotEq',
test=lambda lhs, rhs: lhs == rhs,
true_dist=lambda lhs, rhs: 1 if lhs == rhs else 0,
false_dist=lambda lhs, rhs: abs(lhs - rhs)),
]
int_str_by_name: dict[str, CmpOp[int | str]] = {c.name: c for c in int_str_ops}
def int_str_check(a: any, b: any) -> bool:
if type(a) == int and type(b) == int:
return True
if type(a) != str or type(b) != str:
return False
return len(a) == 1 or len(b) == 1
def int_str_convert(x: int | str) -> int:
if type(x) == int:
return x
if len(x) == 1:
return ord(x)
raise ValueError("x must be int or len(str) == 1")
# Operands for these must both be strings
str_ops: list[CmpOp[str]] = [
CmpOp(operator='==',
name='Eq',
test=lambda lhs, rhs: lhs == rhs,
true_dist=lambda lhs, rhs: edit_distance(lhs, rhs),
false_dist=lambda lhs, rhs: 1 if lhs == rhs else 0),
CmpOp(operator='!=',
name='NotEq',
test=lambda lhs, rhs: lhs == rhs,
true_dist=lambda lhs, rhs: 1 if lhs == rhs else 0,
false_dist=lambda lhs, rhs: edit_distance(lhs, rhs)),
]
str_by_name: dict[str, CmpOp[int | str]] = {c.name: c for c in str_ops}
def str_check(a: any, b: any) -> bool:
return type(a) == str and type(b) == str
def evaluate_condition(cmp_id: int, name: str, lhs: any, rhs: any) -> bool:
if int_str_check(lhs, rhs):
lhs_int = int_str_convert(lhs)
rhs_int = int_str_convert(rhs)
if name not in int_str_by_name:
raise ValueError(f"'{name}' is not a valid CmpOp name for 'int_str' operators")
op = int_str_by_name[name]
instrumentation_states[cmp_id].update(op, lhs_int, rhs_int)
return op.test(lhs_int, rhs_int)
if str_check(lhs, rhs):
if name not in str_by_name:
raise ValueError(f"'{name}' is not a valid CmpOp name for 'str' operators")
op = int_str_by_name[name]
instrumentation_states[cmp_id].update(op, lhs, rhs)
return op.test(lhs, rhs)
raise ValueError(f"'{lhs}' and '{rhs}' are not suitable for both 'int_str' and 'str' operators")

1
requirements.txt Normal file
View file

@ -0,0 +1 @@
nltk==3.8.1