import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("../data/cats.csv", index_col=0)
# Read one column
df["age"]
# Read multiple columns
df[["age", "gender"]]
# Read one row
df.loc[113]
# Read all rows that fulfill a condition
df[df["age"] > 4]
#...
# Get the first 10 columns
df.head(10)
# Get the last 10 columns
df.tail(10)
# Some info about the data
df.info()
# Plot some column
df["age"].plot(kind="hist", bins=6)
df.plot(kind="scatter", x="height", y="width", xlabel="Height", ylabel="Width")
# Create column, this is a silly exampl
df["age_size"] = df["age"] * df["size"]
df
# Create a column by labels
def create_label(row):
if row["age"] > 5:
return "old"
else:
return "young"
df["label"] = df.apply(create_label, axis=1)
df
df["label"].value_counts()