Create some variables
#########Assign your name and other details to the corresponding variables######################
myName <- "Gilles" #characters goes between quote mark
myAge <- 22 #numerical value are simply assigned x <- 2
myBirthDate <- "05/10/96" #format as follow d/m/y
#########Create some vectors and lists that will be useful in creating our dataframe#############
Name <- c(myName,'Quentin') #Fill in the ... with the corresponding Variable.
Age <- c(myAge, 22) #You should use myAge in the Age vector, etc
birthDate <- c(myBirthDate, "01/01/97") #You can add other values to your vector.
Create the dataframe
#####Assign all the variables into the dataframe df############################################
df <- data.frame(Name, Age, birthDate, stringsAsFactors = FALSE) #insert this code
df
## Name Age birthDate
## 1 Gilles 22 05/10/96
## 2 Quentin 22 01/01/97
Access data in our DataFrame
#Get the value of your age from the dataframe
#There are 2 ways to do it:
#First, you can access it knowing its position
a1 <- df[1,2]
#Second, you can access it using its index name
a2 <- df$Age[Name=='Gilles']
a3 <- df[1,'Age']
c(a1=a1, a2=a2, a3=a3) #The resulting vector must show the same values
## a1 a2 a3
## 22 22 22
#get both names and age
df[1:2,1:2] #In the row part we indicate that we want everything fom row=1 to row 2. Same for colunms
## Name Age
## 1 Gilles 22
## 2 Quentin 22
df[,1:2] #A good way to select all the rows or all the columns is to leave it blank, like in this example
## Name Age
## 1 Gilles 22
## 2 Quentin 22
Add Data to our DataFrame
#Assign the birthDate of Quentin: 27 August 1996
df$birthDate[2] = "27/08/96" #you access the value that you want to change and you assign it the new value
#add data using the cbind fct, it bind columns that you pass in argument
df <- cbind(df, Country = I(c('LEB', 'FR')), Speciality = I(c('Hummus', 'Cheese')))
#Another way of adding data
df$Sport = c("your hobby", NA) #Change the first value to a hobby that you enjoy. <NA> is used to indecate an unknown value.
#Try adding a column of your data of choice
df$"Best Achievement" <- c("Climbed Mt Everest", "Surfed a 35m high wave")
#The str() fct let you have a detailed overview of your datas
str(df)
## 'data.frame': 2 obs. of 7 variables:
## $ Name : chr "Gilles" "Quentin"
## $ Age : num 22 22
## $ birthDate : chr "05/10/96" "27/08/96"
## $ Country :Class 'AsIs' chr [1:2] "LEB" "FR"
## $ Speciality :Class 'AsIs' chr [1:2] "Hummus" "Cheese"
## $ Sport : chr "your hobby" NA
## $ Best Achievement: chr "Climbed Mt Everest" "Surfed a 35m high wave"
df
## Name Age birthDate Country Speciality Sport
## 1 Gilles 22 05/10/96 LEB Hummus your hobby
## 2 Quentin 22 27/08/96 FR Cheese <NA>
## Best Achievement
## 1 Climbed Mt Everest
## 2 Surfed a 35m high wave
Modify data in our DataFrame
#Change the speciality of person from country FR to Wine
df$Speciality[df$Country == "FR"] <- 'Wine'
df
## Name Age birthDate Country Speciality Sport
## 1 Gilles 22 05/10/96 LEB Hummus your hobby
## 2 Quentin 22 27/08/96 FR Wine <NA>
## Best Achievement
## 1 Climbed Mt Everest
## 2 Surfed a 35m high wave
#Change following column names of the dataframe: Name as FirstName, birthDate as DateOfBirth, Speciality as FavFood, Sport as Hobby.
df = setNames(df,c("FirstName","Age","DateOfBirth","Country","FavFood", "Hobby","Best Achievement"))
df
## FirstName Age DateOfBirth Country FavFood Hobby
## 1 Gilles 22 05/10/96 LEB Hummus your hobby
## 2 Quentin 22 27/08/96 FR Wine <NA>
## Best Achievement
## 1 Climbed Mt Everest
## 2 Surfed a 35m high wave
#Remove raws that has NA values.
df2 = na.omit(df)
df2
## FirstName Age DateOfBirth Country FavFood Hobby Best Achievement
## 1 Gilles 22 05/10/96 LEB Hummus your hobby Climbed Mt Everest
Delete data
#Assign the value NULL to any column of your dataframe to delete it
df$FavFood <- NULL
df
## FirstName Age DateOfBirth Country Hobby Best Achievement
## 1 Gilles 22 05/10/96 LEB your hobby Climbed Mt Everest
## 2 Quentin 22 27/08/96 FR <NA> Surfed a 35m high wave
dplyr library
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Create new dataframe that has raws with Age value == 22
newdf <- df %>% filter(Age==22)
newdf
## FirstName Age DateOfBirth Country Hobby Best Achievement
## 1 Gilles 22 05/10/96 LEB your hobby Climbed Mt Everest
## 2 Quentin 22 27/08/96 FR <NA> Surfed a 35m high wave
# Create a copy of df that does not contain Age column
newdf2<- df %>% select(-contains("Age"))
newdf2
## FirstName DateOfBirth Country Hobby Best Achievement
## 1 Gilles 05/10/96 LEB your hobby Climbed Mt Everest
## 2 Quentin 27/08/96 FR <NA> Surfed a 35m high wave