wip instrumentor
This commit is contained in:
parent
24600885b9
commit
fb409cb714
4 changed files with 613 additions and 1 deletions
459
.gitignore
vendored
Normal file
459
.gitignore
vendored
Normal file
|
@ -0,0 +1,459 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
.idea/
|
||||
**/.DS_Store
|
||||
out/model/*.pt
|
||||
|
||||
## Core latex/pdflatex auxiliary files:
|
||||
*.aux
|
||||
*.lof
|
||||
*.lot
|
||||
*.fls
|
||||
*.out
|
||||
*.toc
|
||||
*.fmt
|
||||
*.fot
|
||||
*.cb
|
||||
*.cb2
|
||||
.*.lb
|
||||
|
||||
## Intermediate documents:
|
||||
*.dvi
|
||||
*.xdv
|
||||
*-converted-to.*
|
||||
# these rules might exclude image files for figures etc.
|
||||
# *.ps
|
||||
# *.eps
|
||||
# *.pdf
|
||||
|
||||
## Generated if empty string is given at "Please type another file name for output:"
|
||||
|
||||
## Bibliography auxiliary files (bibtex/biblatex/biber):
|
||||
*.bbl
|
||||
*.bcf
|
||||
*.blg
|
||||
*-blx.aux
|
||||
*-blx.bib
|
||||
*.run.xml
|
||||
|
||||
## Build tool auxiliary files:
|
||||
*.fdb_latexmk
|
||||
*.synctex
|
||||
*.synctex(busy)
|
||||
*.synctex.gz
|
||||
*.synctex.gz(busy)
|
||||
*.pdfsync
|
||||
|
||||
## Build tool directories for auxiliary files
|
||||
# latexrun
|
||||
latex.out/
|
||||
|
||||
## Auxiliary and intermediate files from other packages:
|
||||
# algorithms
|
||||
*.alg
|
||||
*.loa
|
||||
|
||||
# achemso
|
||||
acs-*.bib
|
||||
|
||||
# amsthm
|
||||
*.thm
|
||||
|
||||
# beamer
|
||||
*.nav
|
||||
*.pre
|
||||
*.snm
|
||||
*.vrb
|
||||
|
||||
# changes
|
||||
*.soc
|
||||
|
||||
# comment
|
||||
*.cut
|
||||
|
||||
# cprotect
|
||||
*.cpt
|
||||
|
||||
# elsarticle (documentclass of Elsevier journals)
|
||||
*.spl
|
||||
|
||||
# endnotes
|
||||
*.ent
|
||||
|
||||
*.lox
|
||||
|
||||
# feynmf/feynmp
|
||||
*.mf
|
||||
*.mp
|
||||
*.t[1-9]
|
||||
*.t[1-9][0-9]
|
||||
*.tfm
|
||||
|
||||
#(r)(e)ledmac/(r)(e)ledpar
|
||||
*.end
|
||||
*.?end
|
||||
*.[1-9]
|
||||
*.[1-9][0-9]
|
||||
*.[1-9][0-9][0-9]
|
||||
*.[1-9]R
|
||||
*.[1-9][0-9]R
|
||||
*.[1-9][0-9][0-9]R
|
||||
*.eledsec[1-9]
|
||||
*.eledsec[1-9]R
|
||||
*.eledsec[1-9][0-9]
|
||||
*.eledsec[1-9][0-9]R
|
||||
*.eledsec[1-9][0-9][0-9]
|
||||
*.eledsec[1-9][0-9][0-9]R
|
||||
|
||||
# glossaries
|
||||
*.acn
|
||||
*.acr
|
||||
*.glg
|
||||
*.glo
|
||||
*.gls
|
||||
*.glsdefs
|
||||
*.lzo
|
||||
*.lzs
|
||||
*.slg
|
||||
*.slo
|
||||
*.sls
|
||||
|
||||
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
|
||||
# *.ist
|
||||
|
||||
# gnuplot
|
||||
*.gnuplot
|
||||
*.table
|
||||
|
||||
# gnuplottex
|
||||
*-gnuplottex-*
|
||||
|
||||
# gregoriotex
|
||||
*.gaux
|
||||
*.glog
|
||||
*.gtex
|
||||
|
||||
# htlatex
|
||||
*.4ct
|
||||
*.4tc
|
||||
*.idv
|
||||
*.lg
|
||||
*.trc
|
||||
*.xref
|
||||
|
||||
# hyperref
|
||||
*.brf
|
||||
|
||||
# knitr
|
||||
*-concordance.tex
|
||||
# *.tikz
|
||||
*-tikzDictionary
|
||||
|
||||
# listings
|
||||
*.lol
|
||||
|
||||
# luatexja-ruby
|
||||
*.ltjruby
|
||||
|
||||
# makeidx
|
||||
*.idx
|
||||
*.ilg
|
||||
*.ind
|
||||
|
||||
# minitoc
|
||||
*.maf
|
||||
*.mlf
|
||||
*.mlt
|
||||
*.mtc[0-9]*
|
||||
*.slf[0-9]*
|
||||
*.slt[0-9]*
|
||||
*.stc[0-9]*
|
||||
|
||||
# minted
|
||||
_minted*
|
||||
*.pyg
|
||||
|
||||
# morewrites
|
||||
*.mw
|
||||
|
||||
# newpax
|
||||
*.newpax
|
||||
|
||||
# nomencl
|
||||
*.nlg
|
||||
*.nlo
|
||||
*.nls
|
||||
|
||||
# pax
|
||||
*.pax
|
||||
|
||||
# pdfpcnotes
|
||||
*.pdfpc
|
||||
|
||||
# sagetex
|
||||
*.sagetex.sage
|
||||
*.sagetex.py
|
||||
*.sagetex.scmd
|
||||
|
||||
# scrwfile
|
||||
*.wrt
|
||||
|
||||
# svg
|
||||
svg-inkscape/
|
||||
|
||||
# sympy
|
||||
*.sout
|
||||
*.sympy
|
||||
sympy-plots-for-*.tex/
|
||||
|
||||
# pdfcomment
|
||||
*.upa
|
||||
*.upb
|
||||
|
||||
# pythontex
|
||||
*.pytxcode
|
||||
pythontex-files-*/
|
||||
|
||||
# tcolorbox
|
||||
*.listing
|
||||
|
||||
# thmtools
|
||||
*.loe
|
||||
|
||||
# TikZ & PGF
|
||||
*.dpth
|
||||
*.md5
|
||||
*.auxlock
|
||||
|
||||
# titletoc
|
||||
*.ptc
|
||||
|
||||
# todonotes
|
||||
*.tdo
|
||||
|
||||
# vhistory
|
||||
*.hst
|
||||
*.ver
|
||||
|
||||
*.lod
|
||||
|
||||
# xcolor
|
||||
*.xcp
|
||||
|
||||
# xmpincl
|
||||
*.xmpi
|
||||
|
||||
# xindy
|
||||
*.xdy
|
||||
|
||||
# xypic precompiled matrices and outlines
|
||||
*.xyc
|
||||
*.xyd
|
||||
|
||||
# endfloat
|
||||
*.ttt
|
||||
*.fff
|
||||
|
||||
# Latexian
|
||||
TSWLatexianTemp*
|
||||
|
||||
## Editors:
|
||||
# WinEdt
|
||||
*.bak
|
||||
*.sav
|
||||
|
||||
# Texpad
|
||||
.texpadtmp
|
||||
|
||||
# LyX
|
||||
*.lyx~
|
||||
|
||||
# Kile
|
||||
*.backup
|
||||
|
||||
# gummi
|
||||
.*.swp
|
||||
|
||||
# KBibTeX
|
||||
*~[0-9]*
|
||||
|
||||
# TeXnicCenter
|
||||
*.tps
|
||||
|
||||
# auto folder when using emacs and auctex
|
||||
./auto/*
|
||||
*.el
|
||||
|
||||
# expex forward references with \gathertags
|
||||
*-tags.tex
|
||||
|
||||
# standalone packages
|
||||
*.sta
|
||||
|
||||
# Makeindex log files
|
||||
*.lpz
|
||||
|
||||
# xwatermark package
|
||||
*.xwm
|
||||
|
||||
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
|
||||
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
|
||||
# Uncomment the next line to have this generated file ignored.
|
||||
#*Notes.bib
|
|
@ -10,5 +10,12 @@ In this repository, you can find the following files:
|
|||
|
||||
Note: Feel free to modify this file according to the project's necessities.
|
||||
|
||||
## Environment setup
|
||||
|
||||
|
||||
To install the required dependencies make sure `python3` points to a Python 3.10 or 3.11 installation and then run:
|
||||
|
||||
```shell
|
||||
python3 -m venv env
|
||||
source env/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
|
145
instrumentor.py
Normal file
145
instrumentor.py
Normal file
|
@ -0,0 +1,145 @@
|
|||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from typing import TypeVar, Callable, Optional
|
||||
from typing import Generic
|
||||
|
||||
from nltk import edit_distance
|
||||
|
||||
T = TypeVar('T')
|
||||
U = TypeVar('U')
|
||||
|
||||
|
||||
@dataclass
|
||||
class CmpOp(Generic[T]):
|
||||
operator: str
|
||||
name: str
|
||||
test: Callable[[T, T], bool]
|
||||
true_dist: Callable[[T, T], int]
|
||||
false_dist: Callable[[T, T], int]
|
||||
|
||||
def __init__(self, operator: str, name: str, test: Callable[[T, T], bool], true_dist: Callable[[T, T], int],
|
||||
false_dist: Callable[[T, T], int]):
|
||||
self.operator = operator
|
||||
self.name = name
|
||||
self.test = test
|
||||
self.true_dist = true_dist
|
||||
self.false_dist = false_dist
|
||||
|
||||
|
||||
@dataclass
|
||||
class InstrState:
|
||||
min_true_dist: Optional[int]
|
||||
min_false_dist: Optional[int]
|
||||
|
||||
def __init__(self):
|
||||
self.min_true_dist = None
|
||||
self.min_false_dist = None
|
||||
|
||||
def update(self, op: CmpOp[U], lhs: U, rhs: U):
|
||||
true_dist = op.true_dist(lhs, rhs)
|
||||
self.min_true_dist = true_dist if self.min_true_dist is None else min(true_dist, self.min_true_dist)
|
||||
|
||||
false_dist = op.false_dist(lhs, rhs)
|
||||
self.min_false_dist = false_dist if self.min_false_dist is None else min(false_dist, self.min_false_dist)
|
||||
|
||||
|
||||
instrumentation_states: defaultdict[int, InstrState] = defaultdict(InstrState)
|
||||
|
||||
# Operands for these must both be integers or strings of length 1
|
||||
int_str_ops: list[CmpOp[int | str]] = [
|
||||
CmpOp(operator='<',
|
||||
name='Lt',
|
||||
test=lambda lhs, rhs: lhs < rhs,
|
||||
true_dist=lambda lhs, rhs: lhs - rhs + 1 if lhs >= rhs else 0,
|
||||
false_dist=lambda lhs, rhs: rhs - lhs if lhs < rhs else 0),
|
||||
CmpOp(operator='>',
|
||||
name='Gt',
|
||||
test=lambda lhs, rhs: lhs > rhs,
|
||||
true_dist=lambda lhs, rhs: rhs - lhs + 1 if lhs <= rhs else 0,
|
||||
false_dist=lambda lhs, rhs: lhs - rhs if lhs > rhs else 0),
|
||||
CmpOp(operator='<=',
|
||||
name='LtE',
|
||||
test=lambda lhs, rhs: lhs <= rhs,
|
||||
true_dist=lambda lhs, rhs: lhs - rhs if lhs > rhs else 0,
|
||||
false_dist=lambda lhs, rhs: rhs - lhs + 1 if lhs <= rhs else 0),
|
||||
CmpOp(operator='>=',
|
||||
name='GtE',
|
||||
test=lambda lhs, rhs: lhs >= rhs,
|
||||
true_dist=lambda lhs, rhs: rhs - lhs if lhs < rhs else 0,
|
||||
false_dist=lambda lhs, rhs: lhs - rhs + 1 if lhs >= rhs else 0),
|
||||
CmpOp(operator='==',
|
||||
name='Eq',
|
||||
test=lambda lhs, rhs: lhs == rhs,
|
||||
true_dist=lambda lhs, rhs: abs(lhs - rhs),
|
||||
false_dist=lambda lhs, rhs: 1 if lhs == rhs else 0),
|
||||
CmpOp(operator='!=',
|
||||
name='NotEq',
|
||||
test=lambda lhs, rhs: lhs == rhs,
|
||||
true_dist=lambda lhs, rhs: 1 if lhs == rhs else 0,
|
||||
false_dist=lambda lhs, rhs: abs(lhs - rhs)),
|
||||
]
|
||||
|
||||
int_str_by_name: dict[str, CmpOp[int | str]] = {c.name: c for c in int_str_ops}
|
||||
|
||||
|
||||
def int_str_check(a: any, b: any) -> bool:
|
||||
if type(a) == int and type(b) == int:
|
||||
return True
|
||||
if type(a) != str or type(b) != str:
|
||||
return False
|
||||
return len(a) == 1 or len(b) == 1
|
||||
|
||||
|
||||
def int_str_convert(x: int | str) -> int:
|
||||
if type(x) == int:
|
||||
return x
|
||||
if len(x) == 1:
|
||||
return ord(x)
|
||||
|
||||
raise ValueError("x must be int or len(str) == 1")
|
||||
|
||||
|
||||
# Operands for these must both be strings
|
||||
str_ops: list[CmpOp[str]] = [
|
||||
CmpOp(operator='==',
|
||||
name='Eq',
|
||||
test=lambda lhs, rhs: lhs == rhs,
|
||||
true_dist=lambda lhs, rhs: edit_distance(lhs, rhs),
|
||||
false_dist=lambda lhs, rhs: 1 if lhs == rhs else 0),
|
||||
CmpOp(operator='!=',
|
||||
name='NotEq',
|
||||
test=lambda lhs, rhs: lhs == rhs,
|
||||
true_dist=lambda lhs, rhs: 1 if lhs == rhs else 0,
|
||||
false_dist=lambda lhs, rhs: edit_distance(lhs, rhs)),
|
||||
]
|
||||
|
||||
str_by_name: dict[str, CmpOp[int | str]] = {c.name: c for c in str_ops}
|
||||
|
||||
|
||||
def str_check(a: any, b: any) -> bool:
|
||||
return type(a) == str and type(b) == str
|
||||
|
||||
|
||||
def evaluate_condition(cmp_id: int, name: str, lhs: any, rhs: any) -> bool:
|
||||
if int_str_check(lhs, rhs):
|
||||
lhs_int = int_str_convert(lhs)
|
||||
rhs_int = int_str_convert(rhs)
|
||||
|
||||
if name not in int_str_by_name:
|
||||
raise ValueError(f"'{name}' is not a valid CmpOp name for 'int_str' operators")
|
||||
|
||||
op = int_str_by_name[name]
|
||||
|
||||
instrumentation_states[cmp_id].update(op, lhs_int, rhs_int)
|
||||
return op.test(lhs_int, rhs_int)
|
||||
|
||||
if str_check(lhs, rhs):
|
||||
if name not in str_by_name:
|
||||
raise ValueError(f"'{name}' is not a valid CmpOp name for 'str' operators")
|
||||
|
||||
op = int_str_by_name[name]
|
||||
|
||||
instrumentation_states[cmp_id].update(op, lhs, rhs)
|
||||
return op.test(lhs, rhs)
|
||||
|
||||
raise ValueError(f"'{lhs}' and '{rhs}' are not suitable for both 'int_str' and 'str' operators")
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
|
@ -0,0 +1 @@
|
|||
nltk==3.8.1
|
Reference in a new issue