Skip to content

Commit

Permalink
Updating de-dup script and instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
James Kukucka committed Jan 24, 2022
1 parent 15762cb commit c8d55e1
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 17 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,10 @@ However, some manual analysis is still needed, as a shortcoming of a stack analy

Before running the de-duplication script, ensure that you have Python 3 installed on your machine.
You may access the tarball of failures from the CONFETTI experiments by downloading them from the following URL: **TODO URL**.
Firstly, extract the tarball.
Afterwards, you may perform the de-duplication by running `scripts/unique.py` as follows

`python3 scripts/unique.py /path/to/failures.tgz`
`python3 scripts/unique.py /path/to/failures/directory`

This will create a directory within the `scripts/` directory called `bugs`.
The failures within the tarball will be de-duplicated and the `bugs` directory will create a directory hierarchy corresponding to the target+fuzzer, the bug class, and the trials which found that bug.
Expand Down
49 changes: 33 additions & 16 deletions scripts/unique.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/python3

# Command line arguments: tar.gz files
# Command line arguments: Directory containing CONFETTI result tarballs

import sys
import tarfile
Expand All @@ -18,7 +18,7 @@
"bcelgen-knarr-z3",
"bcelgen-jqf",
"bcelgen-knarr-z3-no-global-hint",
"closure-knarr-z3",
"closure-knarr-z3"
"closure-jqf",
"closure-knarr-z3-no-global-hint",
"maven-knarr-z3",
Expand All @@ -36,20 +36,32 @@

shutil.rmtree(outputdir,True)

for fname in sys.argv[1:]:
fnames =[]
for project in projects:
for i in range(1,21):
fnames.append(os.path.join(sys.argv[1], "%s-%d.tgz" % (project, i)))



for fname in fnames:
print(fname)
with tarfile.open(fname) as tgz:
for project in projects:
bugs = {}
for tgzfile in tgz.getmembers():
# Is this file in the archive a fail?
if not failregex.match(tgzfile.name):
continue
if not project in tgzfile.name:
continue
elif project in tgzfile.name:
if "-no-global-hint" in tgzfile.name and "no-global-hint" not in project:
if "-no-global-hint" in tgzfile.name and "-no-global-hint" not in project:
continue
if "-no-global-hint" in project and "-no-global-hint" not in tgzfile.name:
continue

if project not in bugs:
bugs[project] = {}

#print(tgzfile.name)
with tgz.extractfile(tgzfile.name) as f:
# hash the contents of the fail trace
Expand All @@ -68,26 +80,31 @@
cwd = os.path.join(outputdir,project,md5,"")

# use hash to reason about the uniqueness of the fail
if md5 in bugs:
b = bugs[md5]
if md5 in bugs[project]:
b = bugs[project][md5]
else:
b = []
bugs[md5] = b
os.makedirs(cwd)
bugs[project][md5] = b
if not os.path.exists(cwd):
os.makedirs(cwd)

# extract stacktrace to the correct dir
tgz.extract(tgzfile, cwd)
# extract offending input file to the correct dir
tgz.extract(re.sub(r'\.trace$', '.input', tgzfile.name), cwd)
# register failure in our index
b.append(tgzfile.name)


print("Found %d unique bugs for project %s" % (len(bugs), project))

i=0
for b,fs in bugs.items():
i += 1
cwd = os.path.join(outputdir,project,b,"final-failures")
#print(cwd)
# Print out information about bugs for each project
for project in projects:
i=0
print("Found %d unique bugs for project %s" % (len(bugs[project]), project))
for b,fs in bugs[project].items():
i += 1
cwd = os.path.join(outputdir,project,b)
#print(cwd)

print("Bug {} was found {} times".format(b, len(next(os.walk(cwd))[1])))
print("Bug {} was found {} times".format(b, len(next(os.walk(cwd))[1])))
print("\n\n")

0 comments on commit c8d55e1

Please sign in to comment.