docs

timm · Jul 30, 2024 · a266301 · a266301
1 parent a40a2e5
commit a266301
Show file tree

Hide file tree

Showing 4 changed files with 107 additions and 12 deletions.
diff --git a/docs/Makefile b/docs/Makefile
@@ -18,3 +18,18 @@ all:
 	$(foreach f,$(subst .md,,$(wildcard [A-Z]*.md)),$(MAKE) eg=$f;) ## run all */md files as lua
 
 -include ../Makefile
+
+~/tmp/%.pdf : %.md
+	pandoc  \
+	-V fontsize=10pt \
+	-t beamer  \
+	-V theme:Warsaw \
+	-V "header-includes:\usepackage{xcolor}" \
+	-V "header-includes:\usecolortheme{whale}\definecolor{customred}{HTML}{CC0000}" \
+	-V "header-includes:\setbeamercolor{structure}{fg=customred}" \
+  -V "header-includes:\setbeamercolor{palette primary}{bg=customred, fg=white}" \
+	-V "header-includes:\usepackage{etoolbox}\makeatletter\patchcmd{\@makefntext}{\normalfont}{\footnotesize}{}{}\makeatother" \
+	$< -o $@
+	open $@
+
+
diff --git a/docs/header.tex b/docs/header.tex
@@ -0,0 +1,3 @@
+\usepackage{anyfontsize}
+\fontsize{10pt}{12pt}\selectfont
+
diff --git a/docs/se4ai00.md b/docs/se4ai00.md
@@ -0,0 +1,82 @@
+% Intro SE 4 AI
+% Tim Menzies
+% March 22, 2024
+
+
+#  Issues with SE 4 AI
+
+- The more we use AI in SE, the more code will be auto-generated. 
+- The more we auto-generate code, the less time software engineers 
+  spend writing and reviewing new code, written by someone or something
+  else (the internals of which they may not understand).
+- The less we understand code, the more we will use black-boxes components,
+  where, once a system is assembled, its control settings are tuned. 
+- In this scenario, it becomes very important to reduce the human effort
+  and CPU effort required for that tuning.
+
+# Software Review
+
+- We define “software review” as a panel of SMEs (subject matter experts),
+  looking at examples of behavior to recommend how to improve software.
+- SME time is usually very limited so, such reviews must complete after 
+  looking at just a small number of very informative examples. 
+- To support the software review process, we explore methods that train 
+  a predictive model to guess if some oracle will like/dislike the next example. 
+- These predictive models work with SMEs to guide them as they explore the examples. Afterwards, the models
+  can handle new examples, while the panelists are busy, elsewhere
+
+# How many questions can we ask a human?
+
+What | N
+----:|-------
+Standard theory: |  more is always better
+Cognitive Science: | 7 plus or minus 2
+From human studies (cost estimation, rep grids) : |  10 to 20 examples per 1-4 hours
+Regression theory| 10 examples per attribute
+Semi-supervised learning | $\sqrt{N}$
+Zhu et al. [^zhu16] | 100 images
+Menzies et al. 2008 [^Me08] | 50 examples
+Chessboard model    | 200 examples
+Probable Correctness theory | simpler cases: 50 to 6 (if we can binary chop)<br> safety-critical cases: 272 to 8 (if we can binary chop)<
+
+
+[^zhu16]: Zhu, X., Vondrick, C., Fowlkes, C.C. et al. Do We Need More Training Data?. 
+Int J Comput Vis 119, 76–92 (2016). https://doi-org.prox.lib.ncsu.edu/10.1007/s11263-015-0812-2
+
+[^Me08]: Menzies, T., Turhan, B., Bener, A., Gay, G., Cukic, B., &
+predictors. In Proceedings of the 4th international workshop on
+Predictor models in software engineering (pp. 47-54).
+
+# Maths: Gasussians
+
+
+# Maths: Probabi;ity Theory
+
+- Confidence $C$ to see an event at prob.  $p$ after $n$ trials $C = (1 - p)^n$.
+  - So $n = \frac{log(1-C)}{log(1-p)}$
+- If we have any tricks for order examples best to worst, we can do a binary chop
+  - So $n = log_2\left( \frac{log(1-C)}{log(1-p)}\right)$
+- Guassians 
+
+# adas
+
+[.column]
+
+### The First column
+
+[.column]
+
+### Second column.
+
+# aasdas
+
+asdada
+
+```mermaid
+pie showData
+    title Key elements in Product X
+    "Calcium" : 42.96
+    "Potassium" : 50.05
+    "Magnesium" : 10.01
+    "Iron" :  5
+```
diff --git a/src/ezr2.py b/src/ezr2.py
@@ -14,7 +14,6 @@
 from time import time
 import stats
 R  = random.random
-
 # ## Data Types
 
 # All programs have magic control options, which we keep the `the` variables.
@@ -42,8 +41,8 @@ class CONFIG:
 def LIST(): return field(default_factory=list)
 def DICT(): return field(default_factory=dict)
 
-# NUMs and SYMs are both COLumns. All COLumns count `n` (items seen),
-# `at` (their column number) and `txt` (column name).
+# NUMs and SYMs are COLumns. COLumns know  `n` (items seen), 
+# `at` (their column pos) and `txt` (column name).
 @dataclass
 class COL:
   n   : int = 0
@@ -57,8 +56,7 @@ class SYM(COL):
   mode : atom=None
   most : int=0
 
-# NUMs tracks  `lo,hi` seen so far, as well the `mu` (mean) and `sd` (standard deviation),
-# using Welford's algorithm.
+# NUMs tracks  `lo,hi`; `mu` (mean);`sd` (standard deviation) using Welford's algorithm.
 @dataclass
 class NUM(COL):
   mu : number =  0
@@ -68,13 +66,11 @@ class NUM(COL):
   hi : number = -1E32
   goal : number = 1
 
-  # A minus sign at end of a NUM's name says "this is a column to minimize"
-  # (all other goals are to be maximizes).
+  # "+"/"-" at end of name denotes column to maximize/minimize.
   def __post_init__(self:COLS) -> None:  
     if  self.txt and self.txt[-1] == "-": self.goal=0
 
-# COLS are a factory that reads some `names` from the first
-# row , the creates the appropriate columns.
+# COLS are factories that turn  `names` from row1 into the appropriate columns.
 @dataclass
 class COLS:
   names: list[str]   # column names
@@ -362,7 +358,7 @@ def cli(d:dict):
       if arg in ["-"+k[0], "--"+k]:
         d[k] = coerce("False" if v=="True" else ("True" if v=="False" else after))
 
-# ## Examples
+# ## Examples
 
 class egs: # sassdddsf
   def all():
@@ -474,8 +470,7 @@ def smos():
                   stats.SOME(mqs4,"mqs4"),
                   stats.SOME(mqs1000,"mqs1000")])
 
-
-# ## Start-up
+# ## Start-up
 
 if __name__ == "__main__" and len(sys.argv)> 1:
   cli(the.__dict__)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		\usepackage{anyfontsize}
		\fontsize{10pt}{12pt}\selectfont