diff --git a/literate_notebooks/src-PT-BR/01_constructors.jl b/literate_notebooks/src-PT-BR/01_constructors.jl
new file mode 100644
index 0000000..333a81e
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/01_constructors.jl
@@ -0,0 +1,143 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**
+# 
+# Let's get started by loading the `DataFrames` package.
+
+using DataFrames
+
+# ## Constructors and conversion
+
+#-
+
+# ### Constructors
+# 
+# In this section, you'll see many ways to create a `DataFrame` using the `DataFrame()` constructor.
+# 
+# First, we could create an empty DataFrame,
+
+DataFrame() # empty DataFrame
+
+# Or we could call the constructor using keyword arguments to add columns to the `DataFrame`.
+
+DataFrame(A=1:3, B=rand(3), C=randstring.([3,3,3]))
+
+# We can create a `DataFrame` from a dictionary, in which case keys from the dictionary will be sorted to create the `DataFrame` columns.
+
+x = Dict("A" => [1,2], "B" => [true, false], "C" => ['a', 'b'])
+DataFrame(x)
+
+# Rather than explicitly creating a dictionary first, as above, we could pass `DataFrame` arguments with the syntax of dictionary key-value pairs. 
+# 
+# Note that in this case, we use symbols to denote the column names and arguments are not sorted. For example, `:A`, the symbol, produces `A`, the name of the first column here:
+
+DataFrame(:A => [1,2], :B => [true, false], :C => ['a', 'b'])
+
+# Here we create a `DataFrame` from a vector of vectors, and each vector becomes a column.
+
+DataFrame([rand(3) for i in 1:3])
+
+#  For now we can construct a single `DataFrame` from a `Vector` of atoms, creating a `DataFrame` with a single row. In future releases of DataFrames.jl, this will throw an error.
+
+DataFrame(rand(3))
+
+# Instead use a transposed vector if you have a vector of atoms (in this way you effectively pass a two dimensional array to the constructor which is supported).
+
+DataFrame(transpose([1, 2, 3]))
+
+# Pass a second argument to give the columns names.
+
+DataFrame([1:3, 4:6, 7:9], [:A, :B, :C])
+
+# Here we create a `DataFrame` from a matrix,
+
+DataFrame(rand(3,4))
+
+# and here we do the same but also pass column names.
+
+DataFrame(rand(3,4), Symbol.('a':'d'))
+
+# We can also construct an uninitialized DataFrame.
+# 
+# Here we pass column types, names and number of rows; we get `missing` in column :C because `Any >: Missing`.
+
+DataFrame([Int, Float64, Any], [:A, :B, :C], 1)
+
+# Here we create a `DataFrame`, but column `:C` is #undef and Jupyter has problem with displaying it. (This works OK at the REPL.)
+# 
+# This will be fixed in next release of DataFrames!
+
+DataFrame([Int, Float64, String], [:A, :B, :C], 1)
+
+# To initialize a `DataFrame` with column names, but no rows use
+
+DataFrame([Int, Float64, String], [:A, :B, :C], 0) 
+
+# This syntax gives us a quick way to create homogenous `DataFrame`.
+
+DataFrame(Int, 3, 5)
+
+# This example is similar, but has nonhomogenous columns.
+
+DataFrame([Int, Float64], 4)
+
+# Finally, we can create a `DataFrame` by copying an existing `DataFrame`.
+# 
+# Note that `copy` creates a shallow copy.
+
+y = DataFrame(x)
+z = copy(x)
+(x === y), (x === z), isequal(x, z)
+
+# ### Conversion to a matrix
+# 
+# Let's start by creating a `DataFrame` with two rows and two columns.
+
+x = DataFrame(x=1:2, y=["A", "B"])
+
+# We can create a matrix by passing this `DataFrame` to `Matrix`.
+
+Matrix(x)
+
+# This would work even if the `DataFrame` had some `missing`s:
+
+x = DataFrame(x=1:2, y=[missing,"B"])
+
+#-
+
+Matrix(x)
+
+# In the two previous matrix examples, Julia created matrices with elements of type `Any`. We can see more clearly that the type of matrix is inferred when we pass, for example, a `DataFrame` of integers to `Matrix`, creating a 2D `Array` of `Int64`s:
+
+x = DataFrame(x=1:2, y=3:4)
+
+#-
+
+Matrix(x)
+
+# In this next example, Julia correctly identifies that `Union` is needed to express the type of the resulting `Matrix` (which contains `missing`s).
+
+x = DataFrame(x=1:2, y=[missing,4])
+
+#-
+
+Matrix(x)
+
+# Note that we can't force a conversion of `missing` values to `Int`s!
+
+Matrix{Int}(x)
+
+# ### Handling of duplicate column names
+# 
+# We can pass the `makeunique` keyword argument to allow passing duplicate names (they get deduplicated)
+
+df = DataFrame(:a=>1, :a=>2, :a_1=>3; makeunique=true)
+
+# Otherwise, duplicates will not be allowed in the future.
+
+df = DataFrame(:a=>1, :a=>2, :a_1=>3)
+
+# A constructor that is passed column names as keyword arguments is a corner case.
+# You cannot pass `makeunique` to allow duplicates here.
+
+df = DataFrame(a=1, a=2, makeunique=true)
+
diff --git a/literate_notebooks/src-PT-BR/02_basicinfo.jl b/literate_notebooks/src-PT-BR/02_basicinfo.jl
new file mode 100644
index 0000000..6cde7c6
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/02_basicinfo.jl
@@ -0,0 +1,76 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**
+
+using DataFrames # load package
+
+# ## Getting basic information about a data frame
+# 
+# Let's start by creating a `DataFrame` object, `x`, so that we can learn how to get information on that data frame.
+
+x = DataFrame(A = [1, 2], B = [1.0, missing], C = ["a", "b"])
+
+# The standard `size` function works to get dimensions of the `DataFrame`,
+
+size(x), size(x, 1), size(x, 2)
+
+# as well as `nrow` and `ncol` from R; `length` gives number of columns.
+
+nrow(x), ncol(x), length(x)
+
+# `describe` gives basic summary statistics of data in your `DataFrame`.
+
+describe(x)
+
+# Use `showcols` to get informaton about columns stored in a DataFrame.
+
+showcols(x)
+
+# `names` will return the names of all columns,
+
+names(x)
+
+# and `eltypes` returns their types.
+
+eltypes(x)
+
+# Here we create some large DataFrame
+
+y = DataFrame(rand(1:10, 1000, 10));
+
+# and then we can use `head` to peek into its top rows
+
+head(y)
+
+# and `tail` to see its bottom rows.
+
+tail(y, 3)
+
+# ### Most elementary get and set operations
+# 
+# Given the `DataFrame`, `x`, here are three ways to grab one of its columns as a `Vector`:
+
+x[1], x[:A], x[:, 1]
+
+# To grab one row as a DataFrame, we can index as follows.
+
+x[1, :]
+
+# We can grab a single cell or element with the same syntax to grab an element of an array.
+
+x[1, 1]
+
+# Assignment can be done in ranges to a scalar,
+
+x[1:2, 1:2] = 1
+x
+
+# to a vector of length equal to the number of assigned rows,
+
+x[1:2, 1:2] = [1,2]
+x
+
+# or to another data frame of matching size.
+
+x[1:2, 1:2] = DataFrame([5 6; 7 8])
+x
+
diff --git a/literate_notebooks/src-PT-BR/03_missingvalues.jl b/literate_notebooks/src-PT-BR/03_missingvalues.jl
new file mode 100644
index 0000000..1e17d97
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/03_missingvalues.jl
@@ -0,0 +1,112 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**
+
+using DataFrames # load package
+
+# ## Handling missing values
+# 
+# A singelton type `Missings.Missing` allows us to deal with missing values.
+
+missing, typeof(missing)
+
+# Arrays automatically create an appropriate union type.
+
+x = [1, 2, missing, 3]
+
+# `ismissing` checks if passed value is missing.
+
+ismissing(1), ismissing(missing), ismissing(x), ismissing.(x)
+
+# We can extract the type combined with Missing from a `Union` via
+# 
+# (This is useful for arrays!)
+
+eltype(x), Missings.T(eltype(x))
+
+# `missing` comparisons produce `missing`.
+
+missing == missing, missing != missing, missing < missing
+
+# This is also true when `missing`s are compared with values of other types.
+
+1 == missing, 1 != missing, 1 < missing
+
+# `isequal`, `isless`, and `===` produce results of type `Bool`.
+
+isequal(missing, missing), missing === missing, isequal(1, missing), isless(1, missing)
+
+# In the next few examples, we see that many (not all) functions handle `missing`.
+
+map(x -> x(missing), [sin, cos, zero, sqrt]) # part 1
+
+#-
+
+map(x -> x(missing, 1), [+, - , *, /, div]) # part 2 
+
+#-
+
+map(x -> x([1,2,missing]), [minimum, maximum, extrema, mean, any, float]) # part 3
+
+# `skipmissing` returns iterator skipping missing values. We can use `collect` and `skipmissing` to create an array that excludes these missing values.
+
+collect(skipmissing([1, missing, 2, missing]))
+
+# Similarly, here we combine `collect` and `Missings.replace` to create an array that replaces all missing values with some value (`NaN` in this case).
+
+collect(Missings.replace([1.0, missing, 2.0, missing], NaN))
+
+# Another way to do this:
+
+coalesce.([1.0, missing, 2.0, missing], NaN)
+
+# Caution: `nothing` would also be replaced here (for Julia 0.7 a more sophisticated behavior of `coalesce` that allows to avoid this problem is planned).
+
+coalesce.([1.0, missing, nothing, missing], NaN)
+
+# You can use `recode` if you have homogenous output types.
+
+recode([1.0, missing, 2.0, missing], missing=>NaN)
+
+# You can use `unique` or `levels` to get unique values with or without missings, respectively.
+
+unique([1, missing, 2, missing]), levels([1, missing, 2, missing])
+
+# In this next example, we convert `x` to `y` with `allowmissing`, where `y` has a type that accepts missings.
+
+x = [1,2,3]
+y = allowmissing(x)
+
+# Then, we convert back with `disallowmissing`. This would fail if `y` contained missing values!
+
+z = disallowmissing(y)
+x,y,z
+
+# In this next example, we show that the type of each column in `x` is initially `Int64`. After using `allowmissing!` to accept missing values in columns 1 and 3, the types of those columns become `Union`s of `Int64` and `Missings.Missing`.
+
+x = DataFrame(Int, 2, 3)
+println("Before: ", eltypes(x))
+allowmissing!(x, 1) # make first column accept missings
+allowmissing!(x, :x3) # make :x3 column accept missings
+println("After: ", eltypes(x))
+
+# In this next example, we'll use `completecases` to find all the rows of a `DataFrame` that have complete data.
+
+x = DataFrame(A=[1, missing, 3, 4], B=["A", "B", missing, "C"])
+println(x)
+println("Complete cases:\n", completecases(x))
+
+# We can use `dropmissing` or `dropmissing!` to remove the rows with incomplete data from a `DataFrame` and either create a new `DataFrame` or mutate the original in-place.
+
+y = dropmissing(x)
+dropmissing!(x)
+[x, y]
+
+# When we call `showcols` on a `DataFrame` with dropped missing values, the columns still allow missing values.
+
+showcols(x)
+
+# Since we've excluded missing values, we can safely use `disallowmissing!` so that the columns will no longer accept missing values.
+
+disallowmissing!(x)
+showcols(x)
+
diff --git a/literate_notebooks/src-PT-BR/04_loadsave.jl b/literate_notebooks/src-PT-BR/04_loadsave.jl
new file mode 100644
index 0000000..d166830
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/04_loadsave.jl
@@ -0,0 +1,64 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**
+
+using DataFrames # load package
+
+# ## Load and save DataFrames
+# We do not cover all features of the packages. Please refer to their documentation to learn them.
+# 
+# Here we'll load `CSV` to read and write CSV files and `JLD`, which allows us to work with a Julia native binary format.
+
+using CSV
+using JLD
+
+# Let's create a simple `DataFrame` for testing purposes,
+
+x = DataFrame(A=[true, false, true], B=[1, 2, missing],
+              C=[missing, "b", "c"], D=['a', missing, 'c'])
+
+
+# and use `eltypes` to look at the columnwise types.
+
+eltypes(x)
+
+# Let's use `CSV` to save `x` to disk; make sure `x.csv` does not conflict with some file in your working directory.
+
+CSV.write("x.csv", x)
+
+# Now we can see how it was saved by reading `x.csv`.
+
+print(read("x.csv", String))
+
+# We can also load it back. `use_mmap=false` disables memory mapping so that on Windows the file can be deleted in the same session.
+
+y = CSV.read("x.csv", use_mmap=false)
+
+# When loading in a `DataFrame` from a `CSV`, all columns allow `Missing` by default. Note that the column types have changed!
+
+eltypes(y)
+
+# Now let's save `x` to a file in a binary format; make sure that `x.jld` does not exist in your working directory.
+
+save("x.jld", "x", x)
+
+# After loading in `x.jld` as `y`, `y` is identical to `x`.
+
+y = load("x.jld", "x")
+
+# Note that the column types of `y` are the same as those of `x`!
+
+eltypes(y)
+
+# Next, we'll create the files `bigdf.csv` and `bigdf.jld`, so be careful that you don't already have these files on disc!
+# 
+# In particular, we'll time how long it takes us to write a `DataFrame` with 10^3 rows and 10^5 columns to `.csv` and `.jld` files.  *You can expect JLD to be faster!* Use `compress=true` to reduce file sizes.
+
+bigdf = DataFrame(Bool, 10^3, 10^2)
+@time CSV.write("bigdf.csv", bigdf)
+@time save("bigdf.jld", "bigdf", bigdf)
+getfield.(stat.(["bigdf.csv", "bigdf.jld"]), :size)
+
+# Finally, let's clean up. Do not run the next cell unless you are sure that it will not erase your important files.
+
+foreach(rm, ["x.csv", "x.jld", "bigdf.csv", "bigdf.jld"])
+
diff --git a/literate_notebooks/src-PT-BR/05_columns.jl b/literate_notebooks/src-PT-BR/05_columns.jl
new file mode 100644
index 0000000..f32e02a
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/05_columns.jl
@@ -0,0 +1,187 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**
+
+using DataFrames # load package
+
+# ## Manipulating columns of DataFrame
+
+#-
+
+# ### Renaming columns
+# 
+# Let's start with a `DataFrame` of `Bool`s that has default column names.
+
+x = DataFrame(Bool, 3, 4)
+
+# With `rename`, we create new `DataFrame`; here we rename the column `:x1` to `:A`. (`rename` also accepts collections of Pairs.)
+
+rename(x, :x1 => :A)
+
+# With `rename!` we do an in place transformation. 
+# 
+# This time we've applied a function to every column name.
+
+rename!(c -> Symbol(string(c)^2), x)
+
+# We can also change the name of a particular column without knowing the original.
+# 
+# Here we change the name of the third column, creating a new `DataFrame`.
+
+rename(x, names(x)[3] => :third)
+
+# With `names!`, we can change the names of all variables.
+
+names!(x, [:a, :b, :c, :d])
+
+# We get an error when we try to provide duplicate names
+
+names!(x, fill(:a, 4))
+
+#  unless we pass `makeunique=true`, which allows us to handle duplicates in passed names.
+
+names!(x, fill(:a, 4), makeunique=true)
+
+# ### Reordering columns
+
+#-
+
+# We can reorder the names(x) vector as needed, creating a new DataFrame.
+
+srand(1234)
+x[shuffle(names(x))]
+
+# also `permutecols!` will be introduced in next release of DataFrames
+
+#-
+
+# ### Merging/adding columns
+
+x = DataFrame([(i,j) for i in 1:3, j in 1:4])
+
+# With `hcat` we can merge two `DataFrame`s. Also [x y] syntax is supported but only when DataFrames have unique column names.
+
+hcat(x, x, makeunique=true)
+
+# We can also use `hcat` to add a new column; a default name `:x1` will be used for this column, so `makeunique=true` is needed.
+
+y = hcat(x, [1,2,3], makeunique=true)
+
+# You can also prepend a vector with `hcat`.
+
+hcat([1,2,3], x, makeunique=true)
+
+# Alternatively you could append a vector with the following syntax. This is a bit more verbose but cleaner.
+
+y = [x DataFrame(A=[1,2,3])]
+
+# Here we do the same but add column `:A` to the front.
+
+y = [DataFrame(A=[1,2,3]) x]
+
+# A column can also be added in the middle. Here a brute-force method is used and a new DataFrame is created.
+
+using BenchmarkTools
+@btime [$x[1:2] DataFrame(A=[1,2,3]) $x[3:4]]
+
+# We could also do this with a specialized in place method `insert!`. Let's add `:newcol` to the `DataFrame` `y`.
+
+insert!(y, 2, [1,2,3], :newcol)
+
+# If you want to insert the same column name several times `makeunique=true` is needed as usual.
+
+insert!(y, 2, [1,2,3], :newcol, makeunique=true)
+
+# We can see how much faster it is to insert a column with `insert!` than with `hcat` using `@btime`.
+
+@btime insert!(copy($x), 3, [1,2,3], :A)
+
+# Let's use `insert!` to append a column in place,
+
+insert!(x, ncol(x)+1, [1,2,3], :A)
+
+# and to in place prepend a column.
+
+insert!(x, 1, [1,2,3], :B)
+
+# With `merge!`, let's merge the second DataFrame into first, but overwriting duplicates.
+
+df1 = DataFrame(x=1:3, y=4:6)
+df2 = DataFrame(x='a':'c', z = 'd':'f', new=11:13)
+df1, df2, merge!(df1, df2)
+
+#  For comparison: merge two `DataFrames`s but renaming duplicate names via `hcat`.
+
+df1 = DataFrame(x=1:3, y=4:6)
+df2 = DataFrame(x='a':'c', z = 'd':'f', new=11:13)
+hcat(df1, df2, makeunique=true)
+
+# ### Subsetting/removing columns
+# 
+# Let's create a new `DataFrame` `x` and show a few ways to create DataFrames with a subset of `x`'s columns.
+
+x = DataFrame([(i,j) for i in 1:3, j in 1:5])
+
+# First we could do this by index
+
+x[[1,2,4,5]]
+
+# or by column name.
+
+x[[:x1, :x4]]
+
+# We can also choose to keep or exclude columns by `Bool`. (We need a vector whose length is the number of columns in the original `DataFrame`.)
+
+x[[true, false, true, false, true]]
+
+# Here we create a single column `DataFrame`,
+
+x[[:x1]]
+
+# and here we access the vector contained in column `:x1`.
+
+x[:x1]
+
+# We could grab the same vector by column number
+
+x[1]
+
+# and remove everything from a `DataFrame` with `empty!`.
+
+empty!(y)
+
+# Here we create a copy of `x` and delete the 3rd column from the copy with `delete!`.
+
+z = copy(x)
+x, delete!(z, 3)
+
+# ### Modify column by name
+
+x = DataFrame([(i,j) for i in 1:3, j in 1:5])
+
+# With the following syntax, the existing column is modified without performing any copying.
+
+x[:x1] = x[:x2]
+x
+
+# We can also use the following syntax to add a new column at the end of a `DataFrame`.
+
+x[:A] = [1,2,3]
+x
+
+# A new column name will be added to our `DataFrame` with the following syntax as well (7 is equal to `ncol(x)+1`).
+
+x[7] = 11:13
+x
+
+# ### Find column name
+
+x = DataFrame([(i,j) for i in 1:3, j in 1:5])
+
+# We can check if a column with a given name exists via
+
+:x1 in names(x) 
+
+# and determine its index via
+
+findfirst(names(x), :x2)
+
diff --git a/literate_notebooks/src-PT-BR/06_rows.jl b/literate_notebooks/src-PT-BR/06_rows.jl
new file mode 100644
index 0000000..3660e40
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/06_rows.jl
@@ -0,0 +1,177 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames # load package
+srand(1);
+
+# ## Manipulating rows of DataFrame
+
+#-
+
+# ### Reordering rows
+
+x = DataFrame(id=1:10, x = rand(10), y = [zeros(5); ones(5)]) # and we hope that x[:x] is not sorted :)
+
+#-
+
+issorted(x), issorted(x, :x) # check if a DataFrame or a subset of its columns is sorted
+
+#-
+
+sort!(x, :x) # sort x in place
+
+#-
+
+y = sort(x, :id) # new DataFrame
+
+#-
+
+sort(x, (:y, :x), rev=(true, false)) # sort by two columns, first is decreasing, second is increasing
+
+#-
+
+sort(x, (order(:y, rev=true), :x)) # the same as above
+
+#-
+
+sort(x, (order(:y, rev=true), order(:x, by=v->-v))) # some more fancy sorting stuff
+
+#-
+
+x[shuffle(1:10), :] # reorder rows (here randomly)
+
+#-
+
+sort!(x, :id)
+x[[1,10],:] = x[[10,1],:] # swap rows
+x
+
+#-
+
+x[1,:], x[10,:] = x[10,:], x[1,:] # and swap again
+x
+
+# ### Merging/adding rows
+
+x = DataFrame(rand(3, 5))
+
+#-
+
+[x; x] # merge by rows - data frames must have the same column names; the same is vcat
+
+#-
+
+y = x[reverse(names(x))] # get y with other order of names
+
+#-
+
+vcat(x, y) # we get what we want as vcat does column name matching
+
+#-
+
+vcat(x, y[1:3]) # but column names must still match
+
+#-
+
+append!(x, x) # the same but modifies x
+
+#-
+
+append!(x, y) # here column names must match exactly
+
+#-
+
+push!(x, 1:5) # add one row to x at the end; must give correct number of values and correct types
+x
+
+#-
+
+push!(x, Dict(:x1=> 11, :x2=> 12, :x3=> 13, :x4=> 14, :x5=> 15)) # also works with dictionaries
+x
+
+# ### Subsetting/removing rows
+
+x = DataFrame(id=1:10, val='a':'j')
+
+#-
+
+x[1:2, :] # by index
+
+#-
+
+view(x, 1:2) # the same but a view
+
+#-
+
+x[repmat([true, false], 5), :] # by Bool, exact length required
+
+#-
+
+view(x, repmat([true, false], 5), :) # view again
+
+#-
+
+deleterows!(x, 7) # delete one row
+
+#-
+
+deleterows!(x, 6:7) # delete a collection of rows
+
+#-
+
+x = DataFrame([1:4, 2:5, 3:6])
+
+#-
+
+filter(r -> r[:x1] > 2.5, x) # create a new DataFrame where filtering function operates on DataFrameRow
+
+#-
+
+## in place modification of x, an example with do-block syntax
+filter!(x) do r
+    if r[:x1] > 2.5
+        return r[:x2] < 4.5
+    end
+    r[:x3] < 3.5
+end
+
+# ### Deduplicating
+
+x = DataFrame(A=[1,2], B=["x","y"])
+append!(x, x)
+x[:C] = 1:4
+x
+
+#-
+
+unique(x, [1,2]) # get first unique rows for given index
+
+#-
+
+unique(x) # now we look at whole rows
+
+#-
+
+nonunique(x, :A) # get indicators of non-unique rows
+
+#-
+
+unique!(x, :B) # modify x in place
+
+# ### Extracting one row from `DataFrame` into a vector
+
+x = DataFrame(x=[1,missing,2], y=["a", "b", missing], z=[true,false,true])
+
+#-
+
+cols = [:x, :y]
+[x[1, col] for col in cols] # subset of columns
+
+#-
+
+[[x[i, col] for col in names(x)] for i in 1:nrow(x)] # vector of vectors, each entry contains one full row of x
+
+#-
+
+Tuple(x[1, col] for col in cols) # similar construct for Tuples, when ported to Julia 0.7 NamedTuples will be added
+
diff --git a/literate_notebooks/src-PT-BR/07_factors.jl b/literate_notebooks/src-PT-BR/07_factors.jl
new file mode 100644
index 0000000..a3ff03c
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/07_factors.jl
@@ -0,0 +1,231 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames # load package
+
+# ## Working with CategoricalArrays
+
+#-
+
+# ### Constructor
+
+x = categorical(["A", "B", "B", "C"]) # unordered
+
+#-
+
+y = categorical(["A", "B", "B", "C"], ordered=true) # ordered, by default order is sorting order
+
+#-
+
+z = categorical(["A","B","B","C", missing]) # unordered with missings
+
+#-
+
+c = cut(1:10, 5) # ordered, into equal counts, possible to rename labels and give custom breaks
+
+#-
+
+by(DataFrame(x=cut(randn(100000), 10)), :x, d -> DataFrame(n=nrow(d)), sort=true) # just to make sure it works right
+
+#-
+
+v = categorical([1,2,2,3,3]) # contains integers not strings
+
+#-
+
+Vector{Union{String, Missing}}(z) # sometimes you need to convert back to a standard vector
+
+# ### Managing levels
+
+arr = [x,y,z,c,v]
+
+#-
+
+isordered.(arr) # chcek if categorical array is orderd
+
+#-
+
+ordered!(x, true), isordered(x) # make x ordered
+
+#-
+
+ordered!(x, false), isordered(x) # and unordered again
+
+#-
+
+levels.(arr) # list levels
+
+#-
+
+unique.(arr) # missing will be included
+
+#-
+
+y[1] < y[2] # can compare as y is ordered
+
+#-
+
+v[1] < v[2] # not comparable, v is unordered although it contains integers
+
+#-
+
+levels!(y, ["C", "B", "A"]) # you can reorder levels, mostly useful for ordered CategoricalArrays
+
+#-
+
+y[1] < y[2] # observe that the order is changed
+
+#-
+
+levels!(z, ["A", "B"]) # you have to specify all levels that are present
+
+#-
+
+levels!(z, ["A", "B"], allow_missing=true) # unless the underlying array allows for missings and force removal of levels
+
+#-
+
+z[1] = "B"
+z # now z has only "B" entries
+
+#-
+
+levels(z) # but it remembers the levels it had (the reason is mostly performance)
+
+#-
+
+droplevels!(z) # this way we can clean it up
+levels(z)
+
+# ### Data manipulation
+
+x, levels(x)
+
+#-
+
+x[2] = "0"
+x, levels(x) # new level added at the end (works only for unordered)
+
+#-
+
+v, levels(v)
+
+#-
+
+v[1] + v[2] # even though underlying data is Int, we cannot operate on it
+
+#-
+
+Vector{Int}(v) # you have either to retrieve the data by conversion (may be expensive)
+
+#-
+
+get(v[1]) + get(v[2]) # or get a single value
+
+#-
+
+get.(v) # this will work for arrays witout missings
+
+#-
+
+get.(z) # but will fail on missing values
+
+#-
+
+Vector{Union{String, Missing}}(z) # you have to do the conversion
+
+#-
+
+z[1]*z[2], z.^2 # the only exception are CategoricalArrays based on String - you can operate on them normally
+
+#-
+
+recode([1,2,3,4,5,missing], 1=>10) # recode some values in an array; has also in place recode! equivalent
+
+#-
+
+recode([1,2,3,4,5,missing], "a", 1=>10, 2=>20) # here we provided a default value for not mapped recodings
+
+#-
+
+recode([1,2,3,4,5,missing], 1=>10, missing=>"missing") # to recode Missing you have to do it explicitly
+
+#-
+
+t = categorical([1:5; missing])
+t, levels(t)
+
+#-
+
+recode!(t, [1,3]=>2)
+t, levels(t) # note that the levels are dropped after recode
+
+#-
+
+t = categorical([1,2,3], ordered=true)
+levels(recode(t, 2=>0, 1=>-1)) # and if you introduce a new levels they are added at the end in the order of appearance
+
+#-
+
+t = categorical([1,2,3,4,5], ordered=true) # when using default it becomes the last level
+levels(recode(t, 300, [1,2]=>100, 3=>200))
+
+# ### Comparisons
+
+x = categorical([1,2,3])
+xs = [x, categorical(x), categorical(x, ordered=true), categorical(x, ordered=true)]
+levels!(xs[2], [3,2,1])
+levels!(xs[4], [2,3,1])
+[a == b for a in xs, b in xs] # all are equal - comparison only by contents
+
+#-
+
+signature(x::CategoricalArray) = (x, levels(x), isordered(x)) # this is actually the full signature of CategoricalArray
+## all are different, notice that x[1] and x[2] are unordered but have a different order of levels
+[signature(a) == signature(b) for a in xs, b in xs]
+
+#-
+
+x[1] < x[2] # you cannot compare elements of unordered CategoricalArray
+
+#-
+
+t[1] < t[2] # but you can do it for an ordered one
+
+#-
+
+isless(x[1], x[2]) # isless works within the same CategoricalArray even if it is not ordered
+
+#-
+
+y = deepcopy(x) # but not across categorical arrays
+isless(x[1], y[2])
+
+#-
+
+isless(get(x[1]), get(y[2])) # you can use get to make a comparison of the contents of CategoricalArray
+
+#-
+
+x[1] == y[2] # equality tests works OK across CategoricalArrays
+
+# ### Categorical columns in a DataFrame
+
+df = DataFrame(x = 1:3, y = 'a':'c', z = ["a","b","c"])
+
+#-
+
+categorical!(df) # converts all eltype(AbstractString) columns to categorical
+
+#-
+
+showcols(df)
+
+#-
+
+categorical!(df, :x) # manually convert to categorical column :x
+
+#-
+
+showcols(df)
+
diff --git a/literate_notebooks/src-PT-BR/08_joins.jl b/literate_notebooks/src-PT-BR/08_joins.jl
new file mode 100644
index 0000000..e52bc22
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/08_joins.jl
@@ -0,0 +1,76 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2017**
+
+using DataFrames # load package
+
+# ## Joining DataFrames
+
+#-
+
+# ### Preparing DataFrames for a join
+
+x = DataFrame(ID=[1,2,3,4,missing], name = ["Alice", "Bob", "Conor", "Dave","Zed"])
+y = DataFrame(id=[1,2,5,6,missing], age = [21,22,23,24,99])
+x,y
+
+#-
+
+rename!(x, :ID=>:id) # names of columns on which we want to join must be the same
+
+# ### Standard joins: inner, left, right, outer, semi, anti
+
+join(x, y, on=:id) # :inner join by default, missing is joined
+
+#-
+
+join(x, y, on=:id, kind=:left)
+
+#-
+
+join(x, y, on=:id, kind=:right)
+
+#-
+
+join(x, y, on=:id, kind=:outer)
+
+#-
+
+join(x, y, on=:id, kind=:semi)
+
+#-
+
+join(x, y, on=:id, kind=:anti)
+
+# ### Cross join
+
+## cross-join does not require on argument
+## it produces a Cartesian product or arguments
+function expand_grid(;xs...) # a simple replacement for expand.grid in R
+    reduce((x,y) -> join(x, DataFrame(Pair(y...)), kind=:cross),
+           DataFrame(Pair(xs[1]...)), xs[2:end])
+end
+
+expand_grid(a=[1,2], b=["a","b","c"], c=[true,false])
+
+# ### Complex cases of joins
+
+x = DataFrame(id1=[1,1,2,2,missing,missing],
+              id2=[1,11,2,21,missing,99],
+              name = ["Alice", "Bob", "Conor", "Dave","Zed", "Zoe"])
+y = DataFrame(id1=[1,1,3,3,missing,missing],
+              id2=[11,1,31,3,missing,999],
+              age = [21,22,23,24,99, 100])
+x,y
+
+#-
+
+join(x, y, on=[:id1, :id2]) # joining on two columns
+
+#-
+
+join(x, y, on=[:id1], makeunique=true) # with duplicates all combinations are produced (here :inner join)
+
+#-
+
+join(x, y, on=[:id1], kind=:semi) # but not by :semi join (as it would duplicate rows)
+
diff --git a/literate_notebooks/src-PT-BR/09_reshaping.jl b/literate_notebooks/src-PT-BR/09_reshaping.jl
new file mode 100644
index 0000000..d6ec25b
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/09_reshaping.jl
@@ -0,0 +1,90 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames # load package
+
+# ## Reshaping DataFrames
+
+#-
+
+# ### Wide to long
+
+x = DataFrame(id=[1,2,3,4], id2=[1,1,2,2], M1=[11,12,13,14], M2=[111,112,113,114])
+
+#-
+
+melt(x, :id, [:M1, :M2]) # first pass id-variables and then measure variables; meltdf makes a view
+
+#-
+
+## optionally you can rename columns; melt and stack are identical but order of arguments is reversed
+stack(x, [:M1, :M2], :id, variable_name=:key, value_name=:observed) # first measures and then id-s; stackdf creates view
+
+#-
+
+## if second argument is omitted in melt or stack , all other columns are assumed to be the second argument
+## but measure variables are selected only if they are <: AbstractFloat
+melt(x, [:id, :id2])
+
+#-
+
+melt(x, [1, 2]) # you can use index instead of symbol
+
+#-
+
+bigx = DataFrame(rand(10^6, 10)) # a test comparing creation of new DataFrame and a view
+bigx[:id] = 1:10^6
+@time melt(bigx, :id)
+@time melt(bigx, :id)
+@time meltdf(bigx, :id)
+@time meltdf(bigx, :id);
+
+#-
+
+x = DataFrame(id = [1,1,1], id2=['a','b','c'], a1 = rand(3), a2 = rand(3))
+
+#-
+
+melt(x)
+
+#-
+
+melt(DataFrame(rand(3,2))) # by default stack and melt treats floats as value columns
+
+#-
+
+df = DataFrame(rand(3,2))
+df[:key] = [1,1,1]
+mdf = melt(df) # duplicates in key are silently accepted
+
+# ### Long to wide
+
+x = DataFrame(id = [1,1,1], id2=['a','b','c'], a1 = rand(3), a2 = rand(3))
+
+#-
+
+y = melt(x, [1,2])
+display(x)
+display(y)
+
+#-
+
+unstack(y, :id2, :variable, :value) # stndard unstack with a unique key
+
+#-
+
+unstack(y, :variable, :value) # all other columns are treated as keys
+
+#-
+
+## by default :id, :variable and :value names are assumed; in this case it produces duplicate keys
+unstack(y)
+
+#-
+
+df = stack(DataFrame(rand(3,2)))
+
+#-
+
+unstack(df, :variable, :value) # unable to unstack when no key column is present
+
diff --git a/literate_notebooks/src-PT-BR/10_transforms.jl b/literate_notebooks/src-PT-BR/10_transforms.jl
new file mode 100644
index 0000000..3b5b4aa
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/10_transforms.jl
@@ -0,0 +1,80 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames # load package
+
+# ## Split-apply-combine
+
+x = DataFrame(id=[1,2,3,4,1,2,3,4], id2=[1,2,1,2,1,2,1,2], v=rand(8))
+
+#-
+
+gx1 = groupby(x, :id)
+
+#-
+
+gx2 = groupby(x, [:id, :id2])
+
+#-
+
+vcat(gx2...) # back to the original DataFrame
+
+#-
+
+x = DataFrame(id = [missing, 5, 1, 3, missing], x = 1:5)
+
+#-
+
+showall(groupby(x, :id)) # by default groups include mising values and are not sorted
+
+#-
+
+showall(groupby(x, :id, sort=true, skipmissing=true)) # but we can change it :)
+
+#-
+
+x = DataFrame(id=rand('a':'d', 100), v=rand(100));
+by(x, :id, y->mean(y[:v])) # apply a function to each group of a data frame
+
+#-
+
+by(x, :id, y->mean(y[:v]), sort=true) # we can sort the output
+
+#-
+
+by(x, :id, y->DataFrame(res=mean(y[:v]))) # this way we can set a name for a column - DataFramesMeta @by is better
+
+#-
+
+x = DataFrame(id=rand('a':'d', 100), x1=rand(100), x2=rand(100))
+aggregate(x, :id, sum) # apply a function over all columns of a data frame in groups given by id
+
+#-
+
+aggregate(x, :id, sum, sort=true) # also can be sorted
+
+# *We omit the discussion of of map/combine as I do not find them very useful (better to use by)*
+
+x = DataFrame(rand(3, 5))
+
+#-
+
+map(mean, eachcol(x)) # map a function over each column and return a data frame
+
+#-
+
+foreach(c -> println(c[1], ": ", mean(c[2])), eachcol(x)) # a raw iteration returns a tuple with column name and values
+
+#-
+
+colwise(mean, x) # colwise is similar, but produces a vector
+
+#-
+
+x[:id] = [1,1,2]
+colwise(mean,groupby(x, :id)) # and works on GroupedDataFrame
+
+#-
+
+map(r -> r[:x1]/r[:x2], eachrow(x)) # now the returned value is DataFrameRow which works similarly to a one-row DataFrame
+
diff --git a/literate_notebooks/src-PT-BR/11_performance.jl b/literate_notebooks/src-PT-BR/11_performance.jl
new file mode 100644
index 0000000..005e877
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/11_performance.jl
@@ -0,0 +1,135 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames
+using BenchmarkTools
+
+# ## Performance tips
+
+#-
+
+# ### Access by column number is faster than by name
+
+x = DataFrame(rand(5, 1000))
+@btime x[500];
+@btime x[:x500];
+
+# ### When working with data `DataFrame` use barrier functions or type annotation
+
+function f_bad() # this function will be slow
+    srand(1); x = DataFrame(rand(1000000,2))
+    y, z = x[1], x[2]
+    p = 0.0
+    for i in 1:nrow(x)
+        p += y[i]*z[i]
+    end
+    p
+end
+
+@btime f_bad();
+
+#-
+
+@code_warntype f_bad() # the reason is that Julia does not know the types of columns in `DataFrame`
+
+#-
+
+## solution 1 is to use barrier function (it should be possible to use it in almost any code)
+function f_inner(y,z)
+   p = 0.0
+   for i in 1:length(y)
+       p += y[i]*z[i]
+   end
+   p
+end
+
+function f_barrier() # extract the work to an inner function
+    srand(1); x = DataFrame(rand(1000000,2))
+    f_inner(x[1], x[2])
+end
+
+function f_inbuilt() # or use inbuilt function if possible
+    srand(1); x = DataFrame(rand(1000000,2))
+    dot(x[1], x[2])
+end
+
+@btime f_barrier();
+@btime f_inbuilt();
+
+#-
+
+## solution 2 is to provide the types of extracted columns
+## it is simpler but there are cases in which you will not know these types
+function f_typed()
+    srand(1); x = DataFrame(rand(1000000,2))
+    y::Vector{Float64}, z::Vector{Float64} = x[1], x[2]
+    p = 0.0
+    for i in 1:nrow(x)
+        p += y[i]*z[i]
+    end
+    p
+end
+
+@btime f_typed();
+
+# ### Consider using delayed `DataFrame` creation technique
+
+function f1()
+    x = DataFrame(Float64, 10^4, 100) # we work with DataFrame directly
+    for c in 1:ncol(x)
+        d = x[c]
+        for r in 1:nrow(x)
+            d[r] = rand()
+        end
+    end
+    x
+end
+
+function f2()
+    x = Vector{Any}(100)
+    for c in 1:length(x)
+        d = Vector{Float64}(10^4)
+        for r in 1:length(d)
+            d[r] = rand()
+        end
+        x[c] = d
+    end
+    DataFrame(x) # we delay creation of DataFrame after we have our job done
+end
+
+@btime f1();
+@btime f2();
+
+# ### You can add rows to a `DataFrame` in place and it is fast
+
+x = DataFrame(rand(10^6, 5))
+y = DataFrame(transpose(1.0:5.0))
+z = [1.0:5.0;]
+
+@btime vcat($x, $y); # creates a new DataFrame - slow
+@btime append!($x, $y); # in place - fast
+
+x = DataFrame(rand(10^6, 5)) # reset to the same starting point
+@btime push!($x, $z); # add a single row in place - fastest
+
+# ### Allowing `missing` as well as `categorical` slows down computations
+
+using StatsBase
+
+function test(data) # uses countmap function to test performance
+    println(eltype(data))
+    x = rand(data, 10^6)
+    y = categorical(x)
+    println(" raw:")
+    @btime countmap($x)
+    println(" categorical:")
+    @btime countmap($y)
+    nothing
+end
+
+test(1:10)
+test([randstring() for i in 1:10])
+test(allowmissing(1:10))
+test(allowmissing([randstring() for i in 1:10]))
+
+
diff --git a/literate_notebooks/src-PT-BR/12_pitfalls.jl b/literate_notebooks/src-PT-BR/12_pitfalls.jl
new file mode 100644
index 0000000..8eb5e79
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/12_pitfalls.jl
@@ -0,0 +1,73 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames
+
+# ## Possible pitfalls
+
+#-
+
+# ### Know what is copied when creating a `DataFrame`
+
+x = DataFrame(rand(3, 5))
+
+#-
+
+y = DataFrame(x)
+x === y # no copyinng performed
+
+#-
+
+y = copy(x)
+x === y # not the same object
+
+#-
+
+all(x[i] === y[i] for i in ncol(x)) # but the columns are the same
+
+#-
+
+x = 1:3; y = [1, 2, 3]; df = DataFrame(x=x,y=y) # the same when creating arrays or assigning columns, except ranges
+
+#-
+
+y === df[:y] # the same object
+
+#-
+
+typeof(x), typeof(df[:x]) # range is converted to a vector
+
+# ### Do not modify the parent of `GroupedDataFrame`
+
+x = DataFrame(id=repeat([1,2], outer=3), x=1:6)
+g = groupby(x, :id)
+
+#-
+
+x[1:3, 1]=[2,2,2]
+g # well - it is wrong now, g is only a view
+
+# ### Remember that you can filter columns of a `DataFrame` using booleans
+
+srand(1)
+x = DataFrame(rand(5, 5))
+
+#-
+
+x[x[:x1] .< 0.25] # well - we have filtered columns not rows by accident as you can select columns using booleans
+
+#-
+
+x[x[:x1] .< 0.25, :] # probably this is what we wanted
+
+# ### Column selection for DataFrame creates aliases unless explicitly copied
+
+x = DataFrame(a=1:3)
+x[:b] = x[1] # alias
+x[:c] = x[:, 1] # also alias
+x[:d] = x[1][:] # copy
+x[:e] = copy(x[1]) # explicit copy
+display(x)
+x[1,1] = 100
+display(x)
+
diff --git a/literate_notebooks/src-PT-BR/13_extras.jl b/literate_notebooks/src-PT-BR/13_extras.jl
new file mode 100644
index 0000000..5140a31
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/13_extras.jl
@@ -0,0 +1,198 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 13, 2018**
+
+using DataFrames
+
+# ## Extras - selected functionalities of selected packages
+
+#-
+
+# ### FreqTables: creating cross tabulations
+
+using FreqTables
+df = DataFrame(a=rand('a':'d', 1000), b=rand(["x", "y", "z"], 1000))
+ft = freqtable(df, :a, :b) # observe that dimensions are sorted if possible
+
+#-
+
+ft[1,1], ft['b', "z"] # you can index the result using numbers or names
+
+#-
+
+prop(ft, 1) # getting proportions - 1 means we want to calculate them in rows (first dimension)
+
+#-
+
+prop(ft, 2) # and columns are normalized to 1.0 now
+
+#-
+
+x = categorical(rand(1:3, 10))
+levels!(x, [3, 1, 2, 4]) # reordering levels and adding an extra level
+freqtable(x) # order is preserved and not-used level is shown
+
+#-
+
+freqtable([1,1,2,3,missing]) # by default missings are listed
+
+#-
+
+freqtable([1,1,2,3,missing], skipmissing=true) # but we can skip them
+
+# ### DataFramesMeta - working on `DataFrame`
+
+using DataFramesMeta
+df = DataFrame(x=1:8, y='a':'h', z=repeat([true,false], outer=4))
+
+#-
+
+@with(df, :x+:z) # expressions with columns of DataFrame
+
+#-
+
+@with df begin # you can define code blocks
+    a = :x[:z]
+    b = :x[.!:z]
+    :y + [a; b]
+end
+
+#-
+
+a # @with creates hard scope so variables do not leak out
+
+#-
+
+df2 = DataFrame(a = [:a, :b, :c])
+@with(df2, :a .== ^(:a)) # sometimes we want to work on raw Symbol, ^() escapes it
+
+#-
+
+df2 = DataFrame(x=1:3, y=4:6, z=7:9)
+@with(df2, _I_(2:3)) # _I_(expression) is translated to df2[expression]
+
+#-
+
+@where(df, :x .< 4, :z .== true) # very useful macro for filtering
+
+#-
+
+@select(df, :x, y = 2*:x, z=:y) # create a new DataFrame based on the old one
+
+#-
+
+@transform(df, a=1, x = 2*:x, y=:x) # create a new DataFrame adding columns based on the old one
+
+#-
+
+@transform(df, a=1, b=:a) # old DataFrame is used and :a is not present there
+
+#-
+
+@orderby(df, :z, -:x) # sorting into a new data frame, less powerful than sort, but lightweight
+
+#-
+
+@linq df |> # chaining of operations on DataFrame
+    where(:x .< 5) |>
+    orderby(:z) |>
+    transform(x²=:x.^2) |>
+    select(:z, :x, :x²)
+
+#-
+
+f(df, col) = df[col] # you can define your own functions and put them in the chain
+@linq df |> where(:x .<= 4) |> f(:x)
+
+# ### DataFramesMeta - working on grouped `DataFrame`
+
+df = DataFrame(a = 1:12, b = repeat('a':'d', outer=3))
+g = groupby(df, :b)
+
+#-
+
+@by(df, :b, first=first(:a), last=last(:a), mean=mean(:a)) # more convinient than by from DataFrames
+
+#-
+
+@based_on(g, first=first(:a), last=last(:a), mean=mean(:a)) # the same as by but on grouped DataFrame
+
+#-
+
+@where(g, mean(:a) > 6.5) # filter gropus on aggregate conditions
+
+#-
+
+@orderby(g, -sum(:a)) # order groups on aggregate conditions
+
+#-
+
+@transform(g, center = mean(:a), centered = :a - mean(:a)) # perform operations within a group and return ungroped DataFrame
+
+#-
+
+DataFrame(g) # a nice convinience function not defined in DataFrames
+
+#-
+
+@transform(g) # actually this is the same
+
+#-
+
+@linq df |> groupby(:b) |> where(mean(:a) > 6.5) |> DataFrame # you can do chaining on grouped DataFrames as well
+
+# ### DataFramesMeta - rowwise operations on `DataFrame`
+
+df = DataFrame(a = 1:12, b = repeat(1:4, outer=3))
+
+#-
+
+## such conditions are often needed but are complex to write
+@transform(df, x = ifelse.((:a .> 6) .& (:b .== 4), "yes", "no"))
+
+#-
+
+## one option is to use a function that works on a single observation and broadcast it
+myfun(a, b) = a > 6 && b == 4 ? "yes" : "no"
+@transform(df, x = myfun.(:a, :b))
+
+#-
+
+## or you can use @byrow! macro that allows you to process DataFrame rowwise
+@byrow! df begin
+    @newcol x::Vector{String}
+    :x = :a > 6 && :b == 4 ? "yes" : "no"
+end
+
+# ### Visualizing data with StatPlots
+
+using StatPlots # you might need to setup Plots package and some plotting backend first
+
+#-
+
+## we present only a minimal functionality of the package
+
+#-
+
+srand(1)
+df = DataFrame(x = sort(randn(1000)), y=randn(1000), z = [fill("b", 500); fill("a", 500)])
+
+#-
+
+@df df plot(:x, :y, legend=:topleft, label="y(x)") # a most basic plot
+
+#-
+
+@df df density(:x, label="") # density plot
+
+#-
+
+@df df histogram(:y, label="y") # and a histogram
+
+#-
+
+@df df boxplot(:z, :x, label="x")
+
+#-
+
+@df df violin(:z, :y, label="y")
+
diff --git a/literate_notebooks/src-PT-BR/README.md b/literate_notebooks/src-PT-BR/README.md
new file mode 100644
index 0000000..4733e5c
--- /dev/null
+++ b/literate_notebooks/src-PT-BR/README.md
@@ -0,0 +1,147 @@
+# An Introduction to DataFrames
+
+[Bogumił Kamiński](http://bogumilkaminski.pl/about/), November 2020, 2020
+
+**The tutorial is for DataFrames 0.22.1**
+
+A brief introduction to basic usage of [DataFrames](https://github.com/JuliaData/DataFrames.jl).
+
+The tutorial contains a specification of the project environment version under
+which it should be run. In order to prepare this environment, before using the
+tutorial notebooks, while in the project folder run the following command in the
+command line:
+
+```
+julia -e 'using Pkg; Pkg.activate("."); Pkg.instantiate()'
+```
+
+Tested under Julia 1.5.3. The project dependencies are the following:
+
+```
+  [69666777] Arrow v1.0.1
+  [6e4b80f9] BenchmarkTools v0.5.0
+  [336ed68f] CSV v0.8.2
+  [324d7699] CategoricalArrays v0.9.0
+  [944b1d66] CodecZlib v0.7.0
+  [a93c6f00] DataFrames v0.22.1
+  [1313f7d8] DataFramesMeta v0.6.0
+  [5789e2e9] FileIO v1.4.4
+  [da1fdf0e] FreqTables v0.4.2
+  [7073ff75] IJulia v1.23.0
+  [babc3d20] JDF v0.2.20
+  [9da8a3cd] JLSO v2.4.0
+  [b9914132] JSONTables v1.0.0
+  [86f7a689] NamedArrays v0.9.4
+  [b98c9c47] Pipe v1.3.0
+  [2dfb63ee] PooledArrays v0.5.3
+  [f3b207a7] StatsPlots v0.14.17
+  [bd369af6] Tables v1.2.1
+  [a5390f91] ZipFile v0.9.3
+  [9a3f8284] Random
+  [10745b16] Statistics
+```
+
+I will try to keep the material up to date as the packages evolve.
+
+This tutorial covers
+[DataFrames](https://github.com/JuliaData/DataFrames.jl)
+and [CategoricalArrays](https://github.com/JuliaData/CategoricalArrays.jl),
+as they constitute the core of [DataFrames](https://github.com/JuliaData/DataFrames.jl)
+along with selected file reading and writing packages.
+
+In the last [extras](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/13_extras.ipynb)
+part mentions *selected* functionalities of *selected* useful packages that I find useful for data manipulation, currently those are:
+[FreqTables](https://github.com/nalimilan/FreqTables.jl),
+[DataFramesMeta](https://github.com/JuliaStats/DataFramesMeta.jl) (pending its update to support DataFrames.jl 0.22 release),
+[StatsPlots](https://github.com/JuliaPlots/StatsPlots.jl).
+
+# Setting up Jupyter Notebook for work with DataFrames.jl
+
+By default Jupyter Notebook will limit the number of rows and columns when
+displaying a data frame to roughly fit the screen size (like in the REPL).
+
+You can override this behavior by setting `ENV["COLUMNS"]` or `ENV["LINES"]`
+variables to hold the maximum width and height of output in characters
+respectively when running a notebook. Alternatively you can add the following
+entry `"COLUMNS": "1000", "LINES": "100"` to `"env"` variable in your Jupyter
+kernel file. See
+[here](https://jupyter-client.readthedocs.io/en/stable/kernels.html) for
+information about location and specification of Jupyter kernels.
+
+# TOC
+
+| File                                                                                                              | Topic                             |
+|-------------------------------------------------------------------------------------------------------------------|-----------------------------------|
+| [01_constructors.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/01_constructors.ipynb)   | Creating DataFrame and conversion |
+| [02_basicinfo.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/02_basicinfo.ipynb)         | Getting summary information       |
+| [03_missingvalues.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/03_missingvalues.ipynb) | Handling missing values           |
+| [04_loadsave.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/04_loadsave.ipynb)           | Loading and saving DataFrames     |
+| [05_columns.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/05_columns.ipynb)             | Working with columns of DataFrame |
+| [06_rows.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/06_rows.ipynb)                   | Working with row of DataFrame     |
+| [07_factors.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/07_factors.ipynb)             | Working with categorical data     |
+| [08_joins.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/08_joins.ipynb)                 | Joining DataFrames                |
+| [09_reshaping.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/09_reshaping.ipynb)         | Reshaping DataFrames              |
+| [10_transforms.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/10_transforms.ipynb)       | Transforming DataFrames           |
+| [11_performance.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/11_performance.ipynb)     | Performance tips                  |
+| [12_pitfalls.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/12_pitfalls.ipynb)           | Possible pitfalls                 |
+| [13_extras.ipynb](https://github.com/bkamins/Julia-DataFrames-Tutorial/blob/master/13_extras.ipynb)               | Additional interesting packages   |
+
+Changelog:
+
+| Date       | Changes                                                      |
+| ---------- | ------------------------------------------------------------ |
+| 2017-12-05 | Initial release                                              |
+| 2017-12-06 | Added description of `insert!`, `merge!`, `empty!`, `categorical!`, `delete!`, `DataFrames.index` |
+| 2017-12-09 | Added performance tips                                       |
+| 2017-12-10 | Added pitfalls                                               |
+| 2017-12-18 | Added additional worthwhile packages: *FreqTables* and *DataFramesMeta* |
+| 2017-12-29 | Added description of `filter` and `filter!`                  |
+| 2017-12-31 | Added description of conversion to `Matrix`                  |
+| 2018-04-06 | Added example of extracting a row from a `DataFrame`         |
+| 2018-04-21 | Major update of whole tutorial                               |
+| 2018-05-01 | Added `byrow!` example                                       |
+| 2018-05-13 | Added `StatPlots` package to extras                          |
+| 2018-05-23 | Improved comments in sections 1 do 5 by [Jane Herriman](https://github.com/xorJane) |
+| 2018-07-25 | Update to 0.11.7 release                                     |
+| 2018-08-25 | Update to Julia 1.0 release: sections 1 to 10                |
+| 2018-08-29 | Update to Julia 1.0 release: sections 11, 12 and 13          |
+| 2018-09-05 | Update to Julia 1.0 release: FreqTables section              |
+| 2018-09-10 | Added CSVFiles section to chapter on load/save               |
+| 2018-09-26 | Updated to DataFrames 0.14.0                                 |
+| 2018-10-04 | Updated to DataFrames 0.14.1, added `haskey` and `repeat`    |
+| 2018-12-08 | Updated to DataFrames 0.15.2                                 |
+| 2019-01-03 | Updated to DataFrames 0.16.0, added serialization instructions |
+| 2019-01-18 | Updated to DataFrames 0.17.0, added `passmissing` |
+| 2019-01-27 | Added Feather.jl file read/write |
+| 2019-01-30 | Renamed StatPlots.jl to StatsPlots.jl and added Tables.jl|
+| 2019-02-08 | Added `groupvars` and `groupindices` functions|
+| 2019-04-27 | Updated to DataFrames 0.18.0, dropped JLD2.jl |
+| 2019-04-30 | Updated handling of missing values description |
+| 2019-07-16 | Updated to DataFrames 0.19.0 |
+| 2019-08-14 | Added JSONTables.jl and `Tables.columnindex` |
+| 2019-08-16 | Added Project.toml and Manifest.toml |
+| 2019-08-26 | Update to Julia 1.2 and DataFrames 0.19.3 |
+| 2019-08-29 | Add example how to compress/decompress CSV file using CodecZlib |
+| 2019-08-30 | Add examples of JLSO.jl and ZipFile.jl by [xiaodaigh](https://github.com/xiaodaigh) |
+| 2019-11-03 | Add examples of JDF.jl by [xiaodaigh](https://github.com/xiaodaigh) |
+| 2019-12-08 | Updated to DataFrames 0.20.0 |
+| 2020-05-06 | Updated to DataFrames 0.21.0 (except load/save and extras) |
+| 2020-11-20 | Updated to DataFrames 0.22.0 (except DataFramesMeta.jl which does not work yet) |
+| 2020-11-26 | Updated to DataFramesMeta.jl 0.6; update by @pdeffebach |
+
+# Core functions summary
+
+1. Constructors: `DataFrame`, `DataFrame!`, `Tables.rowtable`, `Tables.columntable`, `Matrix`, `eachcol`, `eachrow`, `Tables.namedtupleiterator`, `empty`, `empty!`
+2. Getting summary: `size`, `nrow`, `ncol`, `describe`, `names`, `eltypes`, `first`, `last`, `getindex`, `setindex!`, `@view`, `isapprox`
+3. Handling missing: `missing` (singleton instance of `Missing`), `ismissing`, `nonmissingtype`, `skipmissing`, `replace`, `replace!`, `coalesce`, `allowmissing`, `disallowmissing`, `allowmissing!`, `completecases`, `dropmissing`, `dropmissing!`, `disallowmissing`, `disallowmissing!`, `passmissing`
+4. Loading and saving: `CSV` (package), `CSVFiles` (package), `Serialization` (module), `CSV.read`, `CSV.write`, `save`, `load`, `serialize`, `deserialize`, `Arrow.write`, `Arrow.Table` (from Arrow.jl package), `JSONTables` (package), `arraytable`, `objecttable`, `jsontable`, `CodecZlib` (module), `GzipCompressorStream`, `GzipDecompressorStream`, `JDF.jl` (package), `JDF.savejdf`, `JDF.loadjdf`, `JLSO.jl` (package), `JLSO.save`, `JLSO.load`, `ZipFile.jl` (package), `ZipFile.reader`, `ZipFile.writer`, `ZipFile.addfile`
+5. Working with columns: `rename`, `rename!`, `hcat`, `insertcols!`, `categorical!`, `columnindex`, `hasproperty`, `select`, `select!`, `transform`, `transform!`, `combine`, `Not`, `All`, `Between`, `ByRow`, `AsTable`
+6. Working with rows: `sort!`, `sort`, `issorted`, `append!`, `vcat`, `push!`, `view`, `filter`, `filter!`, `delete!`, `unique`, `nonunique`, `unique!`, `repeat`, `parent`, `parentindices`, `flatten`, `@pipe` (from `Pipe` package), `only`
+7. Working with categorical: `categorical`, `cut`, `isordered`, `ordered!`, `levels`, `unique`, `levels!`, `droplevels!`, `get`, `recode`, `recode!`
+8. Joining: `innerjoin`, `leftjoin`, `rightjoin`, `outerjoin`, `semijoin`, `antijoin`, `crossjoin`
+9. Reshaping: `stack`, `unstack`
+10. Transforming: `groupby`, `mapcols`, `parent`, `groupcols`, `valuecols`, `groupindices`, `keys` (for `GroupedDataFrame`), `combine`, `select`, `select!`, `transform`, `transform!`, `@pipe` (from `Pipe` package)
+11. Extras:
+    * [FreqTables](https://github.com/nalimilan/FreqTables.jl): `freqtable`, `prop`, `Name`
+    * [DataFramesMeta](https://github.com/JuliaStats/DataFramesMeta.jl): `@with`, `@where`, `@select`, `@transform`, `@orderby`, `@linq`, `@by`, `@combine`, `@eachrow`, `@newcol`, `^`, `cols`
+    * [StatsPlots](https://github.com/JuliaPlots/StatsPlots.jl): `@df`, `plot`, `density`, `histogram`,`boxplot`, `violin`
diff --git a/literate_notebooks/src/01_constructors.jl b/literate_notebooks/src/01_constructors.jl
new file mode 100644
index 0000000..333a81e
--- /dev/null
+++ b/literate_notebooks/src/01_constructors.jl
@@ -0,0 +1,143 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**
+# 
+# Let's get started by loading the `DataFrames` package.
+
+using DataFrames
+
+# ## Constructors and conversion
+
+#-
+
+# ### Constructors
+# 
+# In this section, you'll see many ways to create a `DataFrame` using the `DataFrame()` constructor.
+# 
+# First, we could create an empty DataFrame,
+
+DataFrame() # empty DataFrame
+
+# Or we could call the constructor using keyword arguments to add columns to the `DataFrame`.
+
+DataFrame(A=1:3, B=rand(3), C=randstring.([3,3,3]))
+
+# We can create a `DataFrame` from a dictionary, in which case keys from the dictionary will be sorted to create the `DataFrame` columns.
+
+x = Dict("A" => [1,2], "B" => [true, false], "C" => ['a', 'b'])
+DataFrame(x)
+
+# Rather than explicitly creating a dictionary first, as above, we could pass `DataFrame` arguments with the syntax of dictionary key-value pairs. 
+# 
+# Note that in this case, we use symbols to denote the column names and arguments are not sorted. For example, `:A`, the symbol, produces `A`, the name of the first column here:
+
+DataFrame(:A => [1,2], :B => [true, false], :C => ['a', 'b'])
+
+# Here we create a `DataFrame` from a vector of vectors, and each vector becomes a column.
+
+DataFrame([rand(3) for i in 1:3])
+
+#  For now we can construct a single `DataFrame` from a `Vector` of atoms, creating a `DataFrame` with a single row. In future releases of DataFrames.jl, this will throw an error.
+
+DataFrame(rand(3))
+
+# Instead use a transposed vector if you have a vector of atoms (in this way you effectively pass a two dimensional array to the constructor which is supported).
+
+DataFrame(transpose([1, 2, 3]))
+
+# Pass a second argument to give the columns names.
+
+DataFrame([1:3, 4:6, 7:9], [:A, :B, :C])
+
+# Here we create a `DataFrame` from a matrix,
+
+DataFrame(rand(3,4))
+
+# and here we do the same but also pass column names.
+
+DataFrame(rand(3,4), Symbol.('a':'d'))
+
+# We can also construct an uninitialized DataFrame.
+# 
+# Here we pass column types, names and number of rows; we get `missing` in column :C because `Any >: Missing`.
+
+DataFrame([Int, Float64, Any], [:A, :B, :C], 1)
+
+# Here we create a `DataFrame`, but column `:C` is #undef and Jupyter has problem with displaying it. (This works OK at the REPL.)
+# 
+# This will be fixed in next release of DataFrames!
+
+DataFrame([Int, Float64, String], [:A, :B, :C], 1)
+
+# To initialize a `DataFrame` with column names, but no rows use
+
+DataFrame([Int, Float64, String], [:A, :B, :C], 0) 
+
+# This syntax gives us a quick way to create homogenous `DataFrame`.
+
+DataFrame(Int, 3, 5)
+
+# This example is similar, but has nonhomogenous columns.
+
+DataFrame([Int, Float64], 4)
+
+# Finally, we can create a `DataFrame` by copying an existing `DataFrame`.
+# 
+# Note that `copy` creates a shallow copy.
+
+y = DataFrame(x)
+z = copy(x)
+(x === y), (x === z), isequal(x, z)
+
+# ### Conversion to a matrix
+# 
+# Let's start by creating a `DataFrame` with two rows and two columns.
+
+x = DataFrame(x=1:2, y=["A", "B"])
+
+# We can create a matrix by passing this `DataFrame` to `Matrix`.
+
+Matrix(x)
+
+# This would work even if the `DataFrame` had some `missing`s:
+
+x = DataFrame(x=1:2, y=[missing,"B"])
+
+#-
+
+Matrix(x)
+
+# In the two previous matrix examples, Julia created matrices with elements of type `Any`. We can see more clearly that the type of matrix is inferred when we pass, for example, a `DataFrame` of integers to `Matrix`, creating a 2D `Array` of `Int64`s:
+
+x = DataFrame(x=1:2, y=3:4)
+
+#-
+
+Matrix(x)
+
+# In this next example, Julia correctly identifies that `Union` is needed to express the type of the resulting `Matrix` (which contains `missing`s).
+
+x = DataFrame(x=1:2, y=[missing,4])
+
+#-
+
+Matrix(x)
+
+# Note that we can't force a conversion of `missing` values to `Int`s!
+
+Matrix{Int}(x)
+
+# ### Handling of duplicate column names
+# 
+# We can pass the `makeunique` keyword argument to allow passing duplicate names (they get deduplicated)
+
+df = DataFrame(:a=>1, :a=>2, :a_1=>3; makeunique=true)
+
+# Otherwise, duplicates will not be allowed in the future.
+
+df = DataFrame(:a=>1, :a=>2, :a_1=>3)
+
+# A constructor that is passed column names as keyword arguments is a corner case.
+# You cannot pass `makeunique` to allow duplicates here.
+
+df = DataFrame(a=1, a=2, makeunique=true)
+
diff --git a/literate_notebooks/src/02_basicinfo.jl b/literate_notebooks/src/02_basicinfo.jl
new file mode 100644
index 0000000..6cde7c6
--- /dev/null
+++ b/literate_notebooks/src/02_basicinfo.jl
@@ -0,0 +1,76 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**
+
+using DataFrames # load package
+
+# ## Getting basic information about a data frame
+# 
+# Let's start by creating a `DataFrame` object, `x`, so that we can learn how to get information on that data frame.
+
+x = DataFrame(A = [1, 2], B = [1.0, missing], C = ["a", "b"])
+
+# The standard `size` function works to get dimensions of the `DataFrame`,
+
+size(x), size(x, 1), size(x, 2)
+
+# as well as `nrow` and `ncol` from R; `length` gives number of columns.
+
+nrow(x), ncol(x), length(x)
+
+# `describe` gives basic summary statistics of data in your `DataFrame`.
+
+describe(x)
+
+# Use `showcols` to get informaton about columns stored in a DataFrame.
+
+showcols(x)
+
+# `names` will return the names of all columns,
+
+names(x)
+
+# and `eltypes` returns their types.
+
+eltypes(x)
+
+# Here we create some large DataFrame
+
+y = DataFrame(rand(1:10, 1000, 10));
+
+# and then we can use `head` to peek into its top rows
+
+head(y)
+
+# and `tail` to see its bottom rows.
+
+tail(y, 3)
+
+# ### Most elementary get and set operations
+# 
+# Given the `DataFrame`, `x`, here are three ways to grab one of its columns as a `Vector`:
+
+x[1], x[:A], x[:, 1]
+
+# To grab one row as a DataFrame, we can index as follows.
+
+x[1, :]
+
+# We can grab a single cell or element with the same syntax to grab an element of an array.
+
+x[1, 1]
+
+# Assignment can be done in ranges to a scalar,
+
+x[1:2, 1:2] = 1
+x
+
+# to a vector of length equal to the number of assigned rows,
+
+x[1:2, 1:2] = [1,2]
+x
+
+# or to another data frame of matching size.
+
+x[1:2, 1:2] = DataFrame([5 6; 7 8])
+x
+
diff --git a/literate_notebooks/src/03_missingvalues.jl b/literate_notebooks/src/03_missingvalues.jl
new file mode 100644
index 0000000..1e17d97
--- /dev/null
+++ b/literate_notebooks/src/03_missingvalues.jl
@@ -0,0 +1,112 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**
+
+using DataFrames # load package
+
+# ## Handling missing values
+# 
+# A singelton type `Missings.Missing` allows us to deal with missing values.
+
+missing, typeof(missing)
+
+# Arrays automatically create an appropriate union type.
+
+x = [1, 2, missing, 3]
+
+# `ismissing` checks if passed value is missing.
+
+ismissing(1), ismissing(missing), ismissing(x), ismissing.(x)
+
+# We can extract the type combined with Missing from a `Union` via
+# 
+# (This is useful for arrays!)
+
+eltype(x), Missings.T(eltype(x))
+
+# `missing` comparisons produce `missing`.
+
+missing == missing, missing != missing, missing < missing
+
+# This is also true when `missing`s are compared with values of other types.
+
+1 == missing, 1 != missing, 1 < missing
+
+# `isequal`, `isless`, and `===` produce results of type `Bool`.
+
+isequal(missing, missing), missing === missing, isequal(1, missing), isless(1, missing)
+
+# In the next few examples, we see that many (not all) functions handle `missing`.
+
+map(x -> x(missing), [sin, cos, zero, sqrt]) # part 1
+
+#-
+
+map(x -> x(missing, 1), [+, - , *, /, div]) # part 2 
+
+#-
+
+map(x -> x([1,2,missing]), [minimum, maximum, extrema, mean, any, float]) # part 3
+
+# `skipmissing` returns iterator skipping missing values. We can use `collect` and `skipmissing` to create an array that excludes these missing values.
+
+collect(skipmissing([1, missing, 2, missing]))
+
+# Similarly, here we combine `collect` and `Missings.replace` to create an array that replaces all missing values with some value (`NaN` in this case).
+
+collect(Missings.replace([1.0, missing, 2.0, missing], NaN))
+
+# Another way to do this:
+
+coalesce.([1.0, missing, 2.0, missing], NaN)
+
+# Caution: `nothing` would also be replaced here (for Julia 0.7 a more sophisticated behavior of `coalesce` that allows to avoid this problem is planned).
+
+coalesce.([1.0, missing, nothing, missing], NaN)
+
+# You can use `recode` if you have homogenous output types.
+
+recode([1.0, missing, 2.0, missing], missing=>NaN)
+
+# You can use `unique` or `levels` to get unique values with or without missings, respectively.
+
+unique([1, missing, 2, missing]), levels([1, missing, 2, missing])
+
+# In this next example, we convert `x` to `y` with `allowmissing`, where `y` has a type that accepts missings.
+
+x = [1,2,3]
+y = allowmissing(x)
+
+# Then, we convert back with `disallowmissing`. This would fail if `y` contained missing values!
+
+z = disallowmissing(y)
+x,y,z
+
+# In this next example, we show that the type of each column in `x` is initially `Int64`. After using `allowmissing!` to accept missing values in columns 1 and 3, the types of those columns become `Union`s of `Int64` and `Missings.Missing`.
+
+x = DataFrame(Int, 2, 3)
+println("Before: ", eltypes(x))
+allowmissing!(x, 1) # make first column accept missings
+allowmissing!(x, :x3) # make :x3 column accept missings
+println("After: ", eltypes(x))
+
+# In this next example, we'll use `completecases` to find all the rows of a `DataFrame` that have complete data.
+
+x = DataFrame(A=[1, missing, 3, 4], B=["A", "B", missing, "C"])
+println(x)
+println("Complete cases:\n", completecases(x))
+
+# We can use `dropmissing` or `dropmissing!` to remove the rows with incomplete data from a `DataFrame` and either create a new `DataFrame` or mutate the original in-place.
+
+y = dropmissing(x)
+dropmissing!(x)
+[x, y]
+
+# When we call `showcols` on a `DataFrame` with dropped missing values, the columns still allow missing values.
+
+showcols(x)
+
+# Since we've excluded missing values, we can safely use `disallowmissing!` so that the columns will no longer accept missing values.
+
+disallowmissing!(x)
+showcols(x)
+
diff --git a/literate_notebooks/src/04_loadsave.jl b/literate_notebooks/src/04_loadsave.jl
new file mode 100644
index 0000000..d166830
--- /dev/null
+++ b/literate_notebooks/src/04_loadsave.jl
@@ -0,0 +1,64 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**
+
+using DataFrames # load package
+
+# ## Load and save DataFrames
+# We do not cover all features of the packages. Please refer to their documentation to learn them.
+# 
+# Here we'll load `CSV` to read and write CSV files and `JLD`, which allows us to work with a Julia native binary format.
+
+using CSV
+using JLD
+
+# Let's create a simple `DataFrame` for testing purposes,
+
+x = DataFrame(A=[true, false, true], B=[1, 2, missing],
+              C=[missing, "b", "c"], D=['a', missing, 'c'])
+
+
+# and use `eltypes` to look at the columnwise types.
+
+eltypes(x)
+
+# Let's use `CSV` to save `x` to disk; make sure `x.csv` does not conflict with some file in your working directory.
+
+CSV.write("x.csv", x)
+
+# Now we can see how it was saved by reading `x.csv`.
+
+print(read("x.csv", String))
+
+# We can also load it back. `use_mmap=false` disables memory mapping so that on Windows the file can be deleted in the same session.
+
+y = CSV.read("x.csv", use_mmap=false)
+
+# When loading in a `DataFrame` from a `CSV`, all columns allow `Missing` by default. Note that the column types have changed!
+
+eltypes(y)
+
+# Now let's save `x` to a file in a binary format; make sure that `x.jld` does not exist in your working directory.
+
+save("x.jld", "x", x)
+
+# After loading in `x.jld` as `y`, `y` is identical to `x`.
+
+y = load("x.jld", "x")
+
+# Note that the column types of `y` are the same as those of `x`!
+
+eltypes(y)
+
+# Next, we'll create the files `bigdf.csv` and `bigdf.jld`, so be careful that you don't already have these files on disc!
+# 
+# In particular, we'll time how long it takes us to write a `DataFrame` with 10^3 rows and 10^5 columns to `.csv` and `.jld` files.  *You can expect JLD to be faster!* Use `compress=true` to reduce file sizes.
+
+bigdf = DataFrame(Bool, 10^3, 10^2)
+@time CSV.write("bigdf.csv", bigdf)
+@time save("bigdf.jld", "bigdf", bigdf)
+getfield.(stat.(["bigdf.csv", "bigdf.jld"]), :size)
+
+# Finally, let's clean up. Do not run the next cell unless you are sure that it will not erase your important files.
+
+foreach(rm, ["x.csv", "x.jld", "bigdf.csv", "bigdf.jld"])
+
diff --git a/literate_notebooks/src/05_columns.jl b/literate_notebooks/src/05_columns.jl
new file mode 100644
index 0000000..f32e02a
--- /dev/null
+++ b/literate_notebooks/src/05_columns.jl
@@ -0,0 +1,187 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**
+
+using DataFrames # load package
+
+# ## Manipulating columns of DataFrame
+
+#-
+
+# ### Renaming columns
+# 
+# Let's start with a `DataFrame` of `Bool`s that has default column names.
+
+x = DataFrame(Bool, 3, 4)
+
+# With `rename`, we create new `DataFrame`; here we rename the column `:x1` to `:A`. (`rename` also accepts collections of Pairs.)
+
+rename(x, :x1 => :A)
+
+# With `rename!` we do an in place transformation. 
+# 
+# This time we've applied a function to every column name.
+
+rename!(c -> Symbol(string(c)^2), x)
+
+# We can also change the name of a particular column without knowing the original.
+# 
+# Here we change the name of the third column, creating a new `DataFrame`.
+
+rename(x, names(x)[3] => :third)
+
+# With `names!`, we can change the names of all variables.
+
+names!(x, [:a, :b, :c, :d])
+
+# We get an error when we try to provide duplicate names
+
+names!(x, fill(:a, 4))
+
+#  unless we pass `makeunique=true`, which allows us to handle duplicates in passed names.
+
+names!(x, fill(:a, 4), makeunique=true)
+
+# ### Reordering columns
+
+#-
+
+# We can reorder the names(x) vector as needed, creating a new DataFrame.
+
+srand(1234)
+x[shuffle(names(x))]
+
+# also `permutecols!` will be introduced in next release of DataFrames
+
+#-
+
+# ### Merging/adding columns
+
+x = DataFrame([(i,j) for i in 1:3, j in 1:4])
+
+# With `hcat` we can merge two `DataFrame`s. Also [x y] syntax is supported but only when DataFrames have unique column names.
+
+hcat(x, x, makeunique=true)
+
+# We can also use `hcat` to add a new column; a default name `:x1` will be used for this column, so `makeunique=true` is needed.
+
+y = hcat(x, [1,2,3], makeunique=true)
+
+# You can also prepend a vector with `hcat`.
+
+hcat([1,2,3], x, makeunique=true)
+
+# Alternatively you could append a vector with the following syntax. This is a bit more verbose but cleaner.
+
+y = [x DataFrame(A=[1,2,3])]
+
+# Here we do the same but add column `:A` to the front.
+
+y = [DataFrame(A=[1,2,3]) x]
+
+# A column can also be added in the middle. Here a brute-force method is used and a new DataFrame is created.
+
+using BenchmarkTools
+@btime [$x[1:2] DataFrame(A=[1,2,3]) $x[3:4]]
+
+# We could also do this with a specialized in place method `insert!`. Let's add `:newcol` to the `DataFrame` `y`.
+
+insert!(y, 2, [1,2,3], :newcol)
+
+# If you want to insert the same column name several times `makeunique=true` is needed as usual.
+
+insert!(y, 2, [1,2,3], :newcol, makeunique=true)
+
+# We can see how much faster it is to insert a column with `insert!` than with `hcat` using `@btime`.
+
+@btime insert!(copy($x), 3, [1,2,3], :A)
+
+# Let's use `insert!` to append a column in place,
+
+insert!(x, ncol(x)+1, [1,2,3], :A)
+
+# and to in place prepend a column.
+
+insert!(x, 1, [1,2,3], :B)
+
+# With `merge!`, let's merge the second DataFrame into first, but overwriting duplicates.
+
+df1 = DataFrame(x=1:3, y=4:6)
+df2 = DataFrame(x='a':'c', z = 'd':'f', new=11:13)
+df1, df2, merge!(df1, df2)
+
+#  For comparison: merge two `DataFrames`s but renaming duplicate names via `hcat`.
+
+df1 = DataFrame(x=1:3, y=4:6)
+df2 = DataFrame(x='a':'c', z = 'd':'f', new=11:13)
+hcat(df1, df2, makeunique=true)
+
+# ### Subsetting/removing columns
+# 
+# Let's create a new `DataFrame` `x` and show a few ways to create DataFrames with a subset of `x`'s columns.
+
+x = DataFrame([(i,j) for i in 1:3, j in 1:5])
+
+# First we could do this by index
+
+x[[1,2,4,5]]
+
+# or by column name.
+
+x[[:x1, :x4]]
+
+# We can also choose to keep or exclude columns by `Bool`. (We need a vector whose length is the number of columns in the original `DataFrame`.)
+
+x[[true, false, true, false, true]]
+
+# Here we create a single column `DataFrame`,
+
+x[[:x1]]
+
+# and here we access the vector contained in column `:x1`.
+
+x[:x1]
+
+# We could grab the same vector by column number
+
+x[1]
+
+# and remove everything from a `DataFrame` with `empty!`.
+
+empty!(y)
+
+# Here we create a copy of `x` and delete the 3rd column from the copy with `delete!`.
+
+z = copy(x)
+x, delete!(z, 3)
+
+# ### Modify column by name
+
+x = DataFrame([(i,j) for i in 1:3, j in 1:5])
+
+# With the following syntax, the existing column is modified without performing any copying.
+
+x[:x1] = x[:x2]
+x
+
+# We can also use the following syntax to add a new column at the end of a `DataFrame`.
+
+x[:A] = [1,2,3]
+x
+
+# A new column name will be added to our `DataFrame` with the following syntax as well (7 is equal to `ncol(x)+1`).
+
+x[7] = 11:13
+x
+
+# ### Find column name
+
+x = DataFrame([(i,j) for i in 1:3, j in 1:5])
+
+# We can check if a column with a given name exists via
+
+:x1 in names(x) 
+
+# and determine its index via
+
+findfirst(names(x), :x2)
+
diff --git a/literate_notebooks/src/06_rows.jl b/literate_notebooks/src/06_rows.jl
new file mode 100644
index 0000000..3660e40
--- /dev/null
+++ b/literate_notebooks/src/06_rows.jl
@@ -0,0 +1,177 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames # load package
+srand(1);
+
+# ## Manipulating rows of DataFrame
+
+#-
+
+# ### Reordering rows
+
+x = DataFrame(id=1:10, x = rand(10), y = [zeros(5); ones(5)]) # and we hope that x[:x] is not sorted :)
+
+#-
+
+issorted(x), issorted(x, :x) # check if a DataFrame or a subset of its columns is sorted
+
+#-
+
+sort!(x, :x) # sort x in place
+
+#-
+
+y = sort(x, :id) # new DataFrame
+
+#-
+
+sort(x, (:y, :x), rev=(true, false)) # sort by two columns, first is decreasing, second is increasing
+
+#-
+
+sort(x, (order(:y, rev=true), :x)) # the same as above
+
+#-
+
+sort(x, (order(:y, rev=true), order(:x, by=v->-v))) # some more fancy sorting stuff
+
+#-
+
+x[shuffle(1:10), :] # reorder rows (here randomly)
+
+#-
+
+sort!(x, :id)
+x[[1,10],:] = x[[10,1],:] # swap rows
+x
+
+#-
+
+x[1,:], x[10,:] = x[10,:], x[1,:] # and swap again
+x
+
+# ### Merging/adding rows
+
+x = DataFrame(rand(3, 5))
+
+#-
+
+[x; x] # merge by rows - data frames must have the same column names; the same is vcat
+
+#-
+
+y = x[reverse(names(x))] # get y with other order of names
+
+#-
+
+vcat(x, y) # we get what we want as vcat does column name matching
+
+#-
+
+vcat(x, y[1:3]) # but column names must still match
+
+#-
+
+append!(x, x) # the same but modifies x
+
+#-
+
+append!(x, y) # here column names must match exactly
+
+#-
+
+push!(x, 1:5) # add one row to x at the end; must give correct number of values and correct types
+x
+
+#-
+
+push!(x, Dict(:x1=> 11, :x2=> 12, :x3=> 13, :x4=> 14, :x5=> 15)) # also works with dictionaries
+x
+
+# ### Subsetting/removing rows
+
+x = DataFrame(id=1:10, val='a':'j')
+
+#-
+
+x[1:2, :] # by index
+
+#-
+
+view(x, 1:2) # the same but a view
+
+#-
+
+x[repmat([true, false], 5), :] # by Bool, exact length required
+
+#-
+
+view(x, repmat([true, false], 5), :) # view again
+
+#-
+
+deleterows!(x, 7) # delete one row
+
+#-
+
+deleterows!(x, 6:7) # delete a collection of rows
+
+#-
+
+x = DataFrame([1:4, 2:5, 3:6])
+
+#-
+
+filter(r -> r[:x1] > 2.5, x) # create a new DataFrame where filtering function operates on DataFrameRow
+
+#-
+
+## in place modification of x, an example with do-block syntax
+filter!(x) do r
+    if r[:x1] > 2.5
+        return r[:x2] < 4.5
+    end
+    r[:x3] < 3.5
+end
+
+# ### Deduplicating
+
+x = DataFrame(A=[1,2], B=["x","y"])
+append!(x, x)
+x[:C] = 1:4
+x
+
+#-
+
+unique(x, [1,2]) # get first unique rows for given index
+
+#-
+
+unique(x) # now we look at whole rows
+
+#-
+
+nonunique(x, :A) # get indicators of non-unique rows
+
+#-
+
+unique!(x, :B) # modify x in place
+
+# ### Extracting one row from `DataFrame` into a vector
+
+x = DataFrame(x=[1,missing,2], y=["a", "b", missing], z=[true,false,true])
+
+#-
+
+cols = [:x, :y]
+[x[1, col] for col in cols] # subset of columns
+
+#-
+
+[[x[i, col] for col in names(x)] for i in 1:nrow(x)] # vector of vectors, each entry contains one full row of x
+
+#-
+
+Tuple(x[1, col] for col in cols) # similar construct for Tuples, when ported to Julia 0.7 NamedTuples will be added
+
diff --git a/literate_notebooks/src/07_factors.jl b/literate_notebooks/src/07_factors.jl
new file mode 100644
index 0000000..a3ff03c
--- /dev/null
+++ b/literate_notebooks/src/07_factors.jl
@@ -0,0 +1,231 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames # load package
+
+# ## Working with CategoricalArrays
+
+#-
+
+# ### Constructor
+
+x = categorical(["A", "B", "B", "C"]) # unordered
+
+#-
+
+y = categorical(["A", "B", "B", "C"], ordered=true) # ordered, by default order is sorting order
+
+#-
+
+z = categorical(["A","B","B","C", missing]) # unordered with missings
+
+#-
+
+c = cut(1:10, 5) # ordered, into equal counts, possible to rename labels and give custom breaks
+
+#-
+
+by(DataFrame(x=cut(randn(100000), 10)), :x, d -> DataFrame(n=nrow(d)), sort=true) # just to make sure it works right
+
+#-
+
+v = categorical([1,2,2,3,3]) # contains integers not strings
+
+#-
+
+Vector{Union{String, Missing}}(z) # sometimes you need to convert back to a standard vector
+
+# ### Managing levels
+
+arr = [x,y,z,c,v]
+
+#-
+
+isordered.(arr) # chcek if categorical array is orderd
+
+#-
+
+ordered!(x, true), isordered(x) # make x ordered
+
+#-
+
+ordered!(x, false), isordered(x) # and unordered again
+
+#-
+
+levels.(arr) # list levels
+
+#-
+
+unique.(arr) # missing will be included
+
+#-
+
+y[1] < y[2] # can compare as y is ordered
+
+#-
+
+v[1] < v[2] # not comparable, v is unordered although it contains integers
+
+#-
+
+levels!(y, ["C", "B", "A"]) # you can reorder levels, mostly useful for ordered CategoricalArrays
+
+#-
+
+y[1] < y[2] # observe that the order is changed
+
+#-
+
+levels!(z, ["A", "B"]) # you have to specify all levels that are present
+
+#-
+
+levels!(z, ["A", "B"], allow_missing=true) # unless the underlying array allows for missings and force removal of levels
+
+#-
+
+z[1] = "B"
+z # now z has only "B" entries
+
+#-
+
+levels(z) # but it remembers the levels it had (the reason is mostly performance)
+
+#-
+
+droplevels!(z) # this way we can clean it up
+levels(z)
+
+# ### Data manipulation
+
+x, levels(x)
+
+#-
+
+x[2] = "0"
+x, levels(x) # new level added at the end (works only for unordered)
+
+#-
+
+v, levels(v)
+
+#-
+
+v[1] + v[2] # even though underlying data is Int, we cannot operate on it
+
+#-
+
+Vector{Int}(v) # you have either to retrieve the data by conversion (may be expensive)
+
+#-
+
+get(v[1]) + get(v[2]) # or get a single value
+
+#-
+
+get.(v) # this will work for arrays witout missings
+
+#-
+
+get.(z) # but will fail on missing values
+
+#-
+
+Vector{Union{String, Missing}}(z) # you have to do the conversion
+
+#-
+
+z[1]*z[2], z.^2 # the only exception are CategoricalArrays based on String - you can operate on them normally
+
+#-
+
+recode([1,2,3,4,5,missing], 1=>10) # recode some values in an array; has also in place recode! equivalent
+
+#-
+
+recode([1,2,3,4,5,missing], "a", 1=>10, 2=>20) # here we provided a default value for not mapped recodings
+
+#-
+
+recode([1,2,3,4,5,missing], 1=>10, missing=>"missing") # to recode Missing you have to do it explicitly
+
+#-
+
+t = categorical([1:5; missing])
+t, levels(t)
+
+#-
+
+recode!(t, [1,3]=>2)
+t, levels(t) # note that the levels are dropped after recode
+
+#-
+
+t = categorical([1,2,3], ordered=true)
+levels(recode(t, 2=>0, 1=>-1)) # and if you introduce a new levels they are added at the end in the order of appearance
+
+#-
+
+t = categorical([1,2,3,4,5], ordered=true) # when using default it becomes the last level
+levels(recode(t, 300, [1,2]=>100, 3=>200))
+
+# ### Comparisons
+
+x = categorical([1,2,3])
+xs = [x, categorical(x), categorical(x, ordered=true), categorical(x, ordered=true)]
+levels!(xs[2], [3,2,1])
+levels!(xs[4], [2,3,1])
+[a == b for a in xs, b in xs] # all are equal - comparison only by contents
+
+#-
+
+signature(x::CategoricalArray) = (x, levels(x), isordered(x)) # this is actually the full signature of CategoricalArray
+## all are different, notice that x[1] and x[2] are unordered but have a different order of levels
+[signature(a) == signature(b) for a in xs, b in xs]
+
+#-
+
+x[1] < x[2] # you cannot compare elements of unordered CategoricalArray
+
+#-
+
+t[1] < t[2] # but you can do it for an ordered one
+
+#-
+
+isless(x[1], x[2]) # isless works within the same CategoricalArray even if it is not ordered
+
+#-
+
+y = deepcopy(x) # but not across categorical arrays
+isless(x[1], y[2])
+
+#-
+
+isless(get(x[1]), get(y[2])) # you can use get to make a comparison of the contents of CategoricalArray
+
+#-
+
+x[1] == y[2] # equality tests works OK across CategoricalArrays
+
+# ### Categorical columns in a DataFrame
+
+df = DataFrame(x = 1:3, y = 'a':'c', z = ["a","b","c"])
+
+#-
+
+categorical!(df) # converts all eltype(AbstractString) columns to categorical
+
+#-
+
+showcols(df)
+
+#-
+
+categorical!(df, :x) # manually convert to categorical column :x
+
+#-
+
+showcols(df)
+
diff --git a/literate_notebooks/src/08_joins.jl b/literate_notebooks/src/08_joins.jl
new file mode 100644
index 0000000..e52bc22
--- /dev/null
+++ b/literate_notebooks/src/08_joins.jl
@@ -0,0 +1,76 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2017**
+
+using DataFrames # load package
+
+# ## Joining DataFrames
+
+#-
+
+# ### Preparing DataFrames for a join
+
+x = DataFrame(ID=[1,2,3,4,missing], name = ["Alice", "Bob", "Conor", "Dave","Zed"])
+y = DataFrame(id=[1,2,5,6,missing], age = [21,22,23,24,99])
+x,y
+
+#-
+
+rename!(x, :ID=>:id) # names of columns on which we want to join must be the same
+
+# ### Standard joins: inner, left, right, outer, semi, anti
+
+join(x, y, on=:id) # :inner join by default, missing is joined
+
+#-
+
+join(x, y, on=:id, kind=:left)
+
+#-
+
+join(x, y, on=:id, kind=:right)
+
+#-
+
+join(x, y, on=:id, kind=:outer)
+
+#-
+
+join(x, y, on=:id, kind=:semi)
+
+#-
+
+join(x, y, on=:id, kind=:anti)
+
+# ### Cross join
+
+## cross-join does not require on argument
+## it produces a Cartesian product or arguments
+function expand_grid(;xs...) # a simple replacement for expand.grid in R
+    reduce((x,y) -> join(x, DataFrame(Pair(y...)), kind=:cross),
+           DataFrame(Pair(xs[1]...)), xs[2:end])
+end
+
+expand_grid(a=[1,2], b=["a","b","c"], c=[true,false])
+
+# ### Complex cases of joins
+
+x = DataFrame(id1=[1,1,2,2,missing,missing],
+              id2=[1,11,2,21,missing,99],
+              name = ["Alice", "Bob", "Conor", "Dave","Zed", "Zoe"])
+y = DataFrame(id1=[1,1,3,3,missing,missing],
+              id2=[11,1,31,3,missing,999],
+              age = [21,22,23,24,99, 100])
+x,y
+
+#-
+
+join(x, y, on=[:id1, :id2]) # joining on two columns
+
+#-
+
+join(x, y, on=[:id1], makeunique=true) # with duplicates all combinations are produced (here :inner join)
+
+#-
+
+join(x, y, on=[:id1], kind=:semi) # but not by :semi join (as it would duplicate rows)
+
diff --git a/literate_notebooks/src/09_reshaping.jl b/literate_notebooks/src/09_reshaping.jl
new file mode 100644
index 0000000..d6ec25b
--- /dev/null
+++ b/literate_notebooks/src/09_reshaping.jl
@@ -0,0 +1,90 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames # load package
+
+# ## Reshaping DataFrames
+
+#-
+
+# ### Wide to long
+
+x = DataFrame(id=[1,2,3,4], id2=[1,1,2,2], M1=[11,12,13,14], M2=[111,112,113,114])
+
+#-
+
+melt(x, :id, [:M1, :M2]) # first pass id-variables and then measure variables; meltdf makes a view
+
+#-
+
+## optionally you can rename columns; melt and stack are identical but order of arguments is reversed
+stack(x, [:M1, :M2], :id, variable_name=:key, value_name=:observed) # first measures and then id-s; stackdf creates view
+
+#-
+
+## if second argument is omitted in melt or stack , all other columns are assumed to be the second argument
+## but measure variables are selected only if they are <: AbstractFloat
+melt(x, [:id, :id2])
+
+#-
+
+melt(x, [1, 2]) # you can use index instead of symbol
+
+#-
+
+bigx = DataFrame(rand(10^6, 10)) # a test comparing creation of new DataFrame and a view
+bigx[:id] = 1:10^6
+@time melt(bigx, :id)
+@time melt(bigx, :id)
+@time meltdf(bigx, :id)
+@time meltdf(bigx, :id);
+
+#-
+
+x = DataFrame(id = [1,1,1], id2=['a','b','c'], a1 = rand(3), a2 = rand(3))
+
+#-
+
+melt(x)
+
+#-
+
+melt(DataFrame(rand(3,2))) # by default stack and melt treats floats as value columns
+
+#-
+
+df = DataFrame(rand(3,2))
+df[:key] = [1,1,1]
+mdf = melt(df) # duplicates in key are silently accepted
+
+# ### Long to wide
+
+x = DataFrame(id = [1,1,1], id2=['a','b','c'], a1 = rand(3), a2 = rand(3))
+
+#-
+
+y = melt(x, [1,2])
+display(x)
+display(y)
+
+#-
+
+unstack(y, :id2, :variable, :value) # stndard unstack with a unique key
+
+#-
+
+unstack(y, :variable, :value) # all other columns are treated as keys
+
+#-
+
+## by default :id, :variable and :value names are assumed; in this case it produces duplicate keys
+unstack(y)
+
+#-
+
+df = stack(DataFrame(rand(3,2)))
+
+#-
+
+unstack(df, :variable, :value) # unable to unstack when no key column is present
+
diff --git a/literate_notebooks/src/10_transforms.jl b/literate_notebooks/src/10_transforms.jl
new file mode 100644
index 0000000..3b5b4aa
--- /dev/null
+++ b/literate_notebooks/src/10_transforms.jl
@@ -0,0 +1,80 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames # load package
+
+# ## Split-apply-combine
+
+x = DataFrame(id=[1,2,3,4,1,2,3,4], id2=[1,2,1,2,1,2,1,2], v=rand(8))
+
+#-
+
+gx1 = groupby(x, :id)
+
+#-
+
+gx2 = groupby(x, [:id, :id2])
+
+#-
+
+vcat(gx2...) # back to the original DataFrame
+
+#-
+
+x = DataFrame(id = [missing, 5, 1, 3, missing], x = 1:5)
+
+#-
+
+showall(groupby(x, :id)) # by default groups include mising values and are not sorted
+
+#-
+
+showall(groupby(x, :id, sort=true, skipmissing=true)) # but we can change it :)
+
+#-
+
+x = DataFrame(id=rand('a':'d', 100), v=rand(100));
+by(x, :id, y->mean(y[:v])) # apply a function to each group of a data frame
+
+#-
+
+by(x, :id, y->mean(y[:v]), sort=true) # we can sort the output
+
+#-
+
+by(x, :id, y->DataFrame(res=mean(y[:v]))) # this way we can set a name for a column - DataFramesMeta @by is better
+
+#-
+
+x = DataFrame(id=rand('a':'d', 100), x1=rand(100), x2=rand(100))
+aggregate(x, :id, sum) # apply a function over all columns of a data frame in groups given by id
+
+#-
+
+aggregate(x, :id, sum, sort=true) # also can be sorted
+
+# *We omit the discussion of of map/combine as I do not find them very useful (better to use by)*
+
+x = DataFrame(rand(3, 5))
+
+#-
+
+map(mean, eachcol(x)) # map a function over each column and return a data frame
+
+#-
+
+foreach(c -> println(c[1], ": ", mean(c[2])), eachcol(x)) # a raw iteration returns a tuple with column name and values
+
+#-
+
+colwise(mean, x) # colwise is similar, but produces a vector
+
+#-
+
+x[:id] = [1,1,2]
+colwise(mean,groupby(x, :id)) # and works on GroupedDataFrame
+
+#-
+
+map(r -> r[:x1]/r[:x2], eachrow(x)) # now the returned value is DataFrameRow which works similarly to a one-row DataFrame
+
diff --git a/literate_notebooks/src/11_performance.jl b/literate_notebooks/src/11_performance.jl
new file mode 100644
index 0000000..005e877
--- /dev/null
+++ b/literate_notebooks/src/11_performance.jl
@@ -0,0 +1,135 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames
+using BenchmarkTools
+
+# ## Performance tips
+
+#-
+
+# ### Access by column number is faster than by name
+
+x = DataFrame(rand(5, 1000))
+@btime x[500];
+@btime x[:x500];
+
+# ### When working with data `DataFrame` use barrier functions or type annotation
+
+function f_bad() # this function will be slow
+    srand(1); x = DataFrame(rand(1000000,2))
+    y, z = x[1], x[2]
+    p = 0.0
+    for i in 1:nrow(x)
+        p += y[i]*z[i]
+    end
+    p
+end
+
+@btime f_bad();
+
+#-
+
+@code_warntype f_bad() # the reason is that Julia does not know the types of columns in `DataFrame`
+
+#-
+
+## solution 1 is to use barrier function (it should be possible to use it in almost any code)
+function f_inner(y,z)
+   p = 0.0
+   for i in 1:length(y)
+       p += y[i]*z[i]
+   end
+   p
+end
+
+function f_barrier() # extract the work to an inner function
+    srand(1); x = DataFrame(rand(1000000,2))
+    f_inner(x[1], x[2])
+end
+
+function f_inbuilt() # or use inbuilt function if possible
+    srand(1); x = DataFrame(rand(1000000,2))
+    dot(x[1], x[2])
+end
+
+@btime f_barrier();
+@btime f_inbuilt();
+
+#-
+
+## solution 2 is to provide the types of extracted columns
+## it is simpler but there are cases in which you will not know these types
+function f_typed()
+    srand(1); x = DataFrame(rand(1000000,2))
+    y::Vector{Float64}, z::Vector{Float64} = x[1], x[2]
+    p = 0.0
+    for i in 1:nrow(x)
+        p += y[i]*z[i]
+    end
+    p
+end
+
+@btime f_typed();
+
+# ### Consider using delayed `DataFrame` creation technique
+
+function f1()
+    x = DataFrame(Float64, 10^4, 100) # we work with DataFrame directly
+    for c in 1:ncol(x)
+        d = x[c]
+        for r in 1:nrow(x)
+            d[r] = rand()
+        end
+    end
+    x
+end
+
+function f2()
+    x = Vector{Any}(100)
+    for c in 1:length(x)
+        d = Vector{Float64}(10^4)
+        for r in 1:length(d)
+            d[r] = rand()
+        end
+        x[c] = d
+    end
+    DataFrame(x) # we delay creation of DataFrame after we have our job done
+end
+
+@btime f1();
+@btime f2();
+
+# ### You can add rows to a `DataFrame` in place and it is fast
+
+x = DataFrame(rand(10^6, 5))
+y = DataFrame(transpose(1.0:5.0))
+z = [1.0:5.0;]
+
+@btime vcat($x, $y); # creates a new DataFrame - slow
+@btime append!($x, $y); # in place - fast
+
+x = DataFrame(rand(10^6, 5)) # reset to the same starting point
+@btime push!($x, $z); # add a single row in place - fastest
+
+# ### Allowing `missing` as well as `categorical` slows down computations
+
+using StatsBase
+
+function test(data) # uses countmap function to test performance
+    println(eltype(data))
+    x = rand(data, 10^6)
+    y = categorical(x)
+    println(" raw:")
+    @btime countmap($x)
+    println(" categorical:")
+    @btime countmap($y)
+    nothing
+end
+
+test(1:10)
+test([randstring() for i in 1:10])
+test(allowmissing(1:10))
+test(allowmissing([randstring() for i in 1:10]))
+
+
diff --git a/literate_notebooks/src/12_pitfalls.jl b/literate_notebooks/src/12_pitfalls.jl
new file mode 100644
index 0000000..8eb5e79
--- /dev/null
+++ b/literate_notebooks/src/12_pitfalls.jl
@@ -0,0 +1,73 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**
+
+using DataFrames
+
+# ## Possible pitfalls
+
+#-
+
+# ### Know what is copied when creating a `DataFrame`
+
+x = DataFrame(rand(3, 5))
+
+#-
+
+y = DataFrame(x)
+x === y # no copyinng performed
+
+#-
+
+y = copy(x)
+x === y # not the same object
+
+#-
+
+all(x[i] === y[i] for i in ncol(x)) # but the columns are the same
+
+#-
+
+x = 1:3; y = [1, 2, 3]; df = DataFrame(x=x,y=y) # the same when creating arrays or assigning columns, except ranges
+
+#-
+
+y === df[:y] # the same object
+
+#-
+
+typeof(x), typeof(df[:x]) # range is converted to a vector
+
+# ### Do not modify the parent of `GroupedDataFrame`
+
+x = DataFrame(id=repeat([1,2], outer=3), x=1:6)
+g = groupby(x, :id)
+
+#-
+
+x[1:3, 1]=[2,2,2]
+g # well - it is wrong now, g is only a view
+
+# ### Remember that you can filter columns of a `DataFrame` using booleans
+
+srand(1)
+x = DataFrame(rand(5, 5))
+
+#-
+
+x[x[:x1] .< 0.25] # well - we have filtered columns not rows by accident as you can select columns using booleans
+
+#-
+
+x[x[:x1] .< 0.25, :] # probably this is what we wanted
+
+# ### Column selection for DataFrame creates aliases unless explicitly copied
+
+x = DataFrame(a=1:3)
+x[:b] = x[1] # alias
+x[:c] = x[:, 1] # also alias
+x[:d] = x[1][:] # copy
+x[:e] = copy(x[1]) # explicit copy
+display(x)
+x[1,1] = 100
+display(x)
+
diff --git a/literate_notebooks/src/13_extras.jl b/literate_notebooks/src/13_extras.jl
new file mode 100644
index 0000000..5140a31
--- /dev/null
+++ b/literate_notebooks/src/13_extras.jl
@@ -0,0 +1,198 @@
+# # Introduction to DataFrames
+# **[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 13, 2018**
+
+using DataFrames
+
+# ## Extras - selected functionalities of selected packages
+
+#-
+
+# ### FreqTables: creating cross tabulations
+
+using FreqTables
+df = DataFrame(a=rand('a':'d', 1000), b=rand(["x", "y", "z"], 1000))
+ft = freqtable(df, :a, :b) # observe that dimensions are sorted if possible
+
+#-
+
+ft[1,1], ft['b', "z"] # you can index the result using numbers or names
+
+#-
+
+prop(ft, 1) # getting proportions - 1 means we want to calculate them in rows (first dimension)
+
+#-
+
+prop(ft, 2) # and columns are normalized to 1.0 now
+
+#-
+
+x = categorical(rand(1:3, 10))
+levels!(x, [3, 1, 2, 4]) # reordering levels and adding an extra level
+freqtable(x) # order is preserved and not-used level is shown
+
+#-
+
+freqtable([1,1,2,3,missing]) # by default missings are listed
+
+#-
+
+freqtable([1,1,2,3,missing], skipmissing=true) # but we can skip them
+
+# ### DataFramesMeta - working on `DataFrame`
+
+using DataFramesMeta
+df = DataFrame(x=1:8, y='a':'h', z=repeat([true,false], outer=4))
+
+#-
+
+@with(df, :x+:z) # expressions with columns of DataFrame
+
+#-
+
+@with df begin # you can define code blocks
+    a = :x[:z]
+    b = :x[.!:z]
+    :y + [a; b]
+end
+
+#-
+
+a # @with creates hard scope so variables do not leak out
+
+#-
+
+df2 = DataFrame(a = [:a, :b, :c])
+@with(df2, :a .== ^(:a)) # sometimes we want to work on raw Symbol, ^() escapes it
+
+#-
+
+df2 = DataFrame(x=1:3, y=4:6, z=7:9)
+@with(df2, _I_(2:3)) # _I_(expression) is translated to df2[expression]
+
+#-
+
+@where(df, :x .< 4, :z .== true) # very useful macro for filtering
+
+#-
+
+@select(df, :x, y = 2*:x, z=:y) # create a new DataFrame based on the old one
+
+#-
+
+@transform(df, a=1, x = 2*:x, y=:x) # create a new DataFrame adding columns based on the old one
+
+#-
+
+@transform(df, a=1, b=:a) # old DataFrame is used and :a is not present there
+
+#-
+
+@orderby(df, :z, -:x) # sorting into a new data frame, less powerful than sort, but lightweight
+
+#-
+
+@linq df |> # chaining of operations on DataFrame
+    where(:x .< 5) |>
+    orderby(:z) |>
+    transform(x²=:x.^2) |>
+    select(:z, :x, :x²)
+
+#-
+
+f(df, col) = df[col] # you can define your own functions and put them in the chain
+@linq df |> where(:x .<= 4) |> f(:x)
+
+# ### DataFramesMeta - working on grouped `DataFrame`
+
+df = DataFrame(a = 1:12, b = repeat('a':'d', outer=3))
+g = groupby(df, :b)
+
+#-
+
+@by(df, :b, first=first(:a), last=last(:a), mean=mean(:a)) # more convinient than by from DataFrames
+
+#-
+
+@based_on(g, first=first(:a), last=last(:a), mean=mean(:a)) # the same as by but on grouped DataFrame
+
+#-
+
+@where(g, mean(:a) > 6.5) # filter gropus on aggregate conditions
+
+#-
+
+@orderby(g, -sum(:a)) # order groups on aggregate conditions
+
+#-
+
+@transform(g, center = mean(:a), centered = :a - mean(:a)) # perform operations within a group and return ungroped DataFrame
+
+#-
+
+DataFrame(g) # a nice convinience function not defined in DataFrames
+
+#-
+
+@transform(g) # actually this is the same
+
+#-
+
+@linq df |> groupby(:b) |> where(mean(:a) > 6.5) |> DataFrame # you can do chaining on grouped DataFrames as well
+
+# ### DataFramesMeta - rowwise operations on `DataFrame`
+
+df = DataFrame(a = 1:12, b = repeat(1:4, outer=3))
+
+#-
+
+## such conditions are often needed but are complex to write
+@transform(df, x = ifelse.((:a .> 6) .& (:b .== 4), "yes", "no"))
+
+#-
+
+## one option is to use a function that works on a single observation and broadcast it
+myfun(a, b) = a > 6 && b == 4 ? "yes" : "no"
+@transform(df, x = myfun.(:a, :b))
+
+#-
+
+## or you can use @byrow! macro that allows you to process DataFrame rowwise
+@byrow! df begin
+    @newcol x::Vector{String}
+    :x = :a > 6 && :b == 4 ? "yes" : "no"
+end
+
+# ### Visualizing data with StatPlots
+
+using StatPlots # you might need to setup Plots package and some plotting backend first
+
+#-
+
+## we present only a minimal functionality of the package
+
+#-
+
+srand(1)
+df = DataFrame(x = sort(randn(1000)), y=randn(1000), z = [fill("b", 500); fill("a", 500)])
+
+#-
+
+@df df plot(:x, :y, legend=:topleft, label="y(x)") # a most basic plot
+
+#-
+
+@df df density(:x, label="") # density plot
+
+#-
+
+@df df histogram(:y, label="y") # and a histogram
+
+#-
+
+@df df boxplot(:z, :x, label="x")
+
+#-
+
+@df df violin(:z, :y, label="y")
+