The factorial of a non-negative integer \(n\) is donated \(n!\), and \[n! = n(n-1)(n-2)・・・1,\] where \(0! = 1\)
kaijo <- function(n) {## function to calculate factorials
## Arguments:
## n = non-negative integer
a <- c(1:n) ## To create a vector in R
if (n == 0){ ## if n = 0, return 1
return(1)
}
else{
prod <- prod(a) ## if n >= 1, return manipulation of all factor in vector 'a'
return(prod)
}
}
To check my own function kaijyo()
, run this command
kaijo(0) ## n = 0
## [1] 1
kaijo(3) ## n = 3
## [1] 6
kaijo(5) ## n = 5
## [1] 120
The result is compared with the function factorial()
in R
factorial(0) ## n = 0
## [1] 1
factorial(3) ## n = 3
## [1] 6
factorial(5) ## n = 5
## [1] 120
Therefore, I find my function kaijo()
right!!!
There are \({n \choose k}\) possible ways to choose \(k\) elements from \(n\), and \[{n \choose k} = \frac{n!}{k!(n - k)!}\]
combi <- function(n, k) {## function to calculate combinations
## Arguments:
## n = non-negative integer
## k = a element choose from n
comb <- kaijo(n) / (kaijo(k) * kaijo(n-k)) ## use my own function "kaijo"
return(comb)
}
Let’s check my own function combi()
combi(5, 1) ## n = 5, k = 1
## [1] 5
combi(10, 3) ## n = 5, k = 2
## [1] 120
The result is compared with the function choose()
in R
choose(5, 1) ## n = 5, k = 1
## [1] 5
choose(10, 3) ## n = 10, k = 3
## [1] 120
Therefore, I find my function combi()
right!!!
The PMF(Probability Mass Function) of the binominal distribution \(f(x)\) can be written as \[f(x) = {n \choose x} p^x (1-p)^{n-x} (x = 0, 1, 2, ..., n),\]
pmf_bin <- function(x, n, p){
## Arguments:
## n = non-negative integer
## x = a element choose from n
## p = the probability of success for each trial
dbin <- combi(n, x)*(p^x)*((1-p)^(n-x))
return(dbin)
}
Let’s examine my own function pmf_bin()
pmf_bin(2, 5, 0.5) ## x = 2, n = 5, p = 0.5
## [1] 0.3125
pmf_bin(4, 10, 0.3) ## x = 4, n = 10, p = 0.3
## [1] 0.2001209
The result is compared with the function dbinom()
in R
dbinom(2, 5, 0.5) ## x = 2, n = 5, p = 0.5
## [1] 0.3125
dbinom(4, 10, 0.3) ## x = 4, n = 10, p = 0.3
## [1] 0.2001209
Therefore, I find my function pmf_bin()
right!!!
First, load the packages we will use.
library(ggplot2) ## install the package 'ggplot2' to create gragh
library(gridExtra) ## install the packege 'gridExtra' to merge various graphs
n <- 2 ## the number of independent Bernoulli trials
p <- 0.5 ## the probability of success for each trial
x2 <- rep(NA, n) ## create a vector to save the simulation result.
## the length of the vector should match n
for(i in 1:n){
## every time we n value, calculate the PMF, and save it to the vector 'x2.'
## repeat this for each n using "for loop"
bin2 <- pmf_bin(i, n, p)
x2[i] <- bin2
}
# However, x2 is not including the case(n = 0),
d1 <- pmf_bin(0, n, p) ## if n = 0, calculate the PMF
d1 <- c(0, d1) ## create the vector
# To transform data.frame from vector in order to use the package "ggplot2"
x <- c(1:n) ## create a vector from 1 to n
df1 <- data.frame(x = x, pmf = x2) ## create data.frame (n = 1 to n)
df1 <- rbind(d1, df1) ## integrate the vector(n = 0) in this data.frame
str(df1) ## check the structure of this data
## 'data.frame': 3 obs. of 2 variables:
## $ x : num 0 1 2
## $ pmf: num 0.25 0.5 0.25
# To make a bar graph of the variable pmf,
p <- ggplot(df1, aes(x = factor(x), y = pmf))
p1 <- p + geom_bar(width = .25, stat = "identity", fill='white', color='black') +
ggtitle("n = 2") + labs(x = "x", y = "PMF")
print(p1)
n <- 5 ## the number of independent Bernoulli trials
p <- 0.5 ## the probability of success for each trial
x5 <- rep(NA, n) ## create a vector to save the simulation result.
## the length of the vector should match n
for(i in 1:n){
## every time we n value, calculate the PMF, and save it to the vector 'x5.'
## repeat this for each n using "for loop"
bin5 <- pmf_bin(i, n, p)
x5[i] <- bin5
}
# However, x5 is not including the case(n = 0),
d2 <- pmf_bin(0, n, p) ## if n = 0, calculate the PMF
d2 <- c(0, d2) ## create the vector
# To transform data.frame from vector in order to use the package "ggplot2"
x <- c(1:n) ## create a vector from 1 to n
df2 <- data.frame(x = x, pmf = x5) ## create data.frame (n = 1 to n)
df2 <- rbind(d2, df2) ## integrate the vector(n = 0) in this data.frame
str(df2) ## check the structure of this data
## 'data.frame': 6 obs. of 2 variables:
## $ x : num 0 1 2 3 4 5
## $ pmf: num 0.0312 0.1562 0.3125 0.3125 0.1562 ...
# To make a bar graph of the variable pmf,
p <- ggplot(df2, aes(x = factor(x), y = pmf))
p2 <- p + geom_bar(width = .25, stat = "identity", fill='white', color='black') +
ggtitle("PMF of Binominal (n = 5)") + labs(x = "x", y = "PMF")
print(p2)
n <- 10 ## the number of independent Bernoulli trials
p <- 0.5 ## the probability of success for each trial
x10 <- rep(NA, n) ## create a vector to save the simulation result.
## the length of the vector should match n
for(i in 1:n){
## every time we n value, calculate the PMF, and save it to the vector 'x10.'
## repeat this for each n using "for loop"
bin10 <- pmf_bin(i, n, p)
x10[i] <- bin10
}
# However, x10 is not including the case(n = 0),
d3 <- pmf_bin(0, n, p) ## if n = 0, calculate the PMF
d3 <- c(0, d3) ## create the vector
# To transform data.frame from vector in order to use the package "ggplot2"
x <- c(1:n) ## create a vector from 1 to n
df3 <- data.frame(x = x, pmf = x10) ## create data.frame (n = 1 to n)
df3 <- rbind(d3, df3) ## integrate the vector(n = 0) in this data.frame
str(df3) ## check the structure of this data
## 'data.frame': 11 obs. of 2 variables:
## $ x : num 0 1 2 3 4 5 6 7 8 9 ...
## $ pmf: num 0.000977 0.009766 0.043945 0.117188 0.205078 ...
# To make a bar graph of the variable pmf,
p <- ggplot(df3, aes(x = factor(x), y = pmf))
p3 <- p + geom_bar(width = .25, stat = "identity", fill='white', color='black') +
ggtitle("PMF of Binominal (n = 10)") + labs(x = "x", y = "PMF")
print(p3)
n <- 50 ## the number of independent Bernoulli trials
p <- 0.5 ## the probability of success for each trial
x50 <- rep(NA, n) ## create a vector to save the simulation result.
## the length of the vector should match n
for(i in 1:n){
## every time we n value, calculate the PMF, and save it to the vector 'x50.'
## repeat this for each n using "for loop"
bin50 <- pmf_bin(i, n, p)
x50[i] <- bin50
}
# However, x50 is not including the case(n = 0),
d4 <- pmf_bin(0, n, p) ## if n = 0, calculate the PMF
d4 <- c(0, d4) ## create the vector
# To transform data.frame from vector in order to use the package "ggplot2"
x <- c(1:n) ## create a vector from 1 to n
df4 <- data.frame(x = x, pmf = x50) ## create data.frame (n = 1 to n)
df4 <- rbind(d4, df4) ## integrate the vector(n = 0) in this data.frame
str(df4) ## check the structure of this data
## 'data.frame': 51 obs. of 2 variables:
## $ x : num 0 1 2 3 4 5 6 7 8 9 ...
## $ pmf: num 8.88e-16 4.44e-14 1.09e-12 1.74e-11 2.05e-10 ...
# To make a bar graph of the variable pmf,
p <- ggplot(df4, aes(x = factor(x), y = pmf))
p4 <- p + geom_bar(width = .5, stat = "identity", fill='white', color='black') +
ggtitle("PMF of Binominal (n = 50)") + labs(x = "x", y = "PMF")
print(p4)
# To merge the graphs(n = 2, 5, 10, 50),
grid.arrange(p1, p2, p3, p4, nrow = 2, top = "PMF of Binominal (p = 0.5)")
n <- 2 ## the number of independent Bernoulli trials
p <- 0.8 ## the probability of success for each trial
x2 <- rep(NA, n) ## create a vector to save the simulation result.
## the length of the vector should match n
for(i in 1:n){
## every time we n value, calculate the PMF, and save it to the vector 'x2.'
## repeat this for each n using "for loop"
bin2_1 <- pmf_bin(i, n, p)
x2[i] <- bin2_1
}
# However, x2 is not including the case(n = 0),
d2_1 <- pmf_bin(0, n, p) ## if n = 0, calculate the PMF
d2_1 <- c(0, d2_1) ## create the vector
# To transform data.frame from vector in order to use the package "ggplot2"
x <- c(1:n) ## create a vector from 1 to n
df2_1 <- data.frame(x = x, pmf = x2) ## create data.frame (n = 1 to n)
df2_1 <- rbind(d2_1, df2_1) ## integrate the vector(n = 0) in this data.frame
str(df2_1) ## check the structure of this data
## 'data.frame': 3 obs. of 2 variables:
## $ x : num 0 1 2
## $ pmf: num 0.04 0.32 0.64
# To make a bar graph of the variable pmf,
p <- ggplot(df2_1, aes(x = factor(x), y = pmf))
p5 <- p + geom_bar(width = .25, stat = "identity", fill='white', color='black') +
ggtitle("PMF of Binominal (n = 2)") + labs(x = "x", y = "PMF")
print(p5)
n <- 5 ## the number of independent Bernoulli trials
p <- 0.8 ## the probability of success for each trial
x5 <- rep(NA, n) ## create a vector to save the simulation result.
## the length of the vector should match n
for(i in 1:n){
## every time we n value, calculate the PMF, and save it to the vector 'x5.'
## repeat this for each n using "for loop"
bin5_1 <- pmf_bin(i, n, p)
x5[i] <- bin5_1
}
# However, x5 is not including the case(n = 0),
d5_1 <- pmf_bin(0, n, p) ## if n = 0, calculate the PMF
d5_1 <- c(0, d5_1) ## create the vector
# To transform data.frame from vector in order to use the package "ggplot2"
x <- c(1:n) ## create a vector from 1 to n
df5_1 <- data.frame(x = x, pmf = x5) ## create data.frame (n = 1 to n)
df5_1 <- rbind(d5_1, df5_1) ## integrate the vector(n = 0) in this data.frame
str(df5_1) ## check the structure of this data
## 'data.frame': 6 obs. of 2 variables:
## $ x : num 0 1 2 3 4 5
## $ pmf: num 0.00032 0.0064 0.0512 0.2048 0.4096 ...
# To make a bar graph of the variable pmf,
p <- ggplot(df5_1, aes(x = factor(x), y = pmf))
p6 <- p + geom_bar(width = .25, stat = "identity", fill='white', color='black') +
ggtitle("PMF of Binominal (n = 5)") + labs(x = "x", y = "PMF")
print(p6)
n <- 10 ## the number of independent Bernoulli trials
p <- 0.8 ## the probability of success for each trial
x10 <- rep(NA, n) ## create a vector to save the simulation result.
## the length of the vector should match n
for(i in 1:n){
## every time we n value, calculate the PMF, and save it to the vector 'x10.'
## repeat this for each n using "for loop"
bin10 <- pmf_bin(i, n, p)
x10[i] <- bin10
}
# However, x10 is not including the case(n = 0),
d10_1 <- pmf_bin(0, n, p) ## if n = 0, calculate the PMF
d10_1 <- c(0, d10_1) ## create the vector
# To transform data.frame from vector in order to use the package "ggplot2"
x <- c(1:n) ## create a vector from 1 to n
df10_1 <- data.frame(x = x, pmf = x10) ## create data.frame (n = 1 to n)
df10_1 <- rbind(d10_1, df10_1) ## integrate the vector(n = 0) in this data.frame
str(df10_1) ## check the structure of this data
## 'data.frame': 11 obs. of 2 variables:
## $ x : num 0 1 2 3 4 5 6 7 8 9 ...
## $ pmf: num 1.02e-07 4.10e-06 7.37e-05 7.86e-04 5.51e-03 ...
# To make a bar graph of the variable pmf,
p <- ggplot(df10_1, aes(x = factor(x), y = pmf))
p7 <- p + geom_bar(width = .25, stat = "identity", fill='white', color='black') +
ggtitle("PMF of Binominal (n = 10)") + labs(x = "x", y = "PMF")
print(p7)
n <- 50 ## the number of independent Bernoulli trials
p <- 0.8 ## the probability of success for each trial
x50 <- rep(NA, n) ## create a vector to save the simulation result.
## the length of the vector should match n
for(i in 1:n){
## every time we n value, calculate the PMF, and save it to the vector 'x50.'
## repeat this for each n using "for loop"
bin50 <- pmf_bin(i, n, p)
x50[i] <- bin50
}
# However, x50 is not including the case(n = 0),
d50_1 <- pmf_bin(0, n, p) ## if n = 0, calculate the PMF
d50_1 <- c(0, d50_1) ## create the vector
# To transform data.frame from vector in order to use the package "ggplot2"
x <- c(1:n) ## create a vector from 1 to n
df50_1 <- data.frame(x = x, pmf = x50) ## create data.frame (n = 1 to n)
df50_1 <- rbind(d50_1, df50_1) ## integrate the vector(n = 0) in this data.frame
str(df50_1) ## check the structure of this data
## 'data.frame': 51 obs. of 2 variables:
## $ x : num 0 1 2 3 4 5 6 7 8 9 ...
## $ pmf: num 1.13e-35 2.25e-33 2.21e-31 1.41e-29 6.64e-28 ...
# To make a bar graph of the variable pmf,
p <- ggplot(df50_1, aes(x = factor(x), y = pmf))
p8 <- p + geom_bar(width = .5, stat = "identity", fill='white', color='black') +
ggtitle("PMF of Binominal (n = 50)") + labs(x = "x", y = "PMF")
print(p8)
# To merge the graphs(n = 2, 5, 10, 50),
grid.arrange(p5, p6, p7, p8, nrow = 2, top = "PMF of Binominal (p = 0.8)")