Professional Documents
Culture Documents
#visit: http://www.rstudio.com/products/rstudio/download/
#Install Packages
#Create Directory
setwd( "D:/Bappa/PGPBA/Data/")
#Import file
DE2<-read.csv(file.choose(), header=T)
#attach(DE2)
attach(DE2)
#Install Packages
#Install 'MASS'
library(epiR)
library(het.test)
library(car)
library(lmtest)
library(sandwich)
library(caret)
library(e1071)
library(nortest)
library(MASS)
#Reading and Verifying the Data
dim(DE2)
#Rename Create variables
poverty<-ifelse(MPCE<pov_line,1,0)
Hindu<-ifelse(Religion=="Hindu",1,0)
Islam<-ifelse(Religion=="Islam",1,0)
ST<-ifelse(Social_Group=="ST",1,0)
SC<-ifelse(Social_Group=="SC",1,0)
OBC<-ifelse(Social_Group=="OBC",1,0)
Gen<-ifelse(Social_Group=="Others",1,0)
LPG<-ifelse(Cook=="LPG",1,0)
Kerosene<-ifelse(Cook=="Kerosene",1,0)
Salary<-ifelse(Salary_income=="1",1,0)
logMPC<-log(MPCE)
#Summary Stats
summary(HH_Size)
summary(poverty)
summary(MPCE)
summary(logMPC)
#Descriptive Statistics
#Mean, Median, Variance, Standard Deviation, Skewness, Kurtosis
mean(poverty)
median(HH_Size)
mean(HH_Size)
mode(HH_size)
var(HH_Size)
sd(HH_Size)
mean(MPCE)
sd(MPCE)
mean(logMPC)
sd(logMPC)
skewness(logMPC)
kurtosis(logMPC)
#Graphical Representations
#pie
count<-table(Religion)
count
pie(count, main="Religion")
#histogram
hist(HH_Size,freq=F)
lines(density(HH_Size))
hist(MPCE)
hist(logMPC)
plot(HH_Size,logMPC, xlab="Size",ylab="MPC")
cor(HH_Size,logMPC)
shapiro.test(logMPC)
#Anderson Darling Test
ad.test(logMPC)
#Kolmogoroc Smirnoff test
ks.test(logMPC,pnorm,mean=7.775,sd=0.64)
###t-Distribution:
#Find the 2.5th and 97.5th percentiles of the Student t distribution with 10 deg
rees of freedom.
qt(c(.025, .975), df=10)
###Chi Square
#Find the 95th percentile of the Chi-Squared distribution with 10 degrees of fre
edom.
qchisq(.95, df=10)
####F Dtsribution
#Find the 95th percentile of the F distribution with (5, 2) degrees of freedom
qf(.95, df1=5, df2=2)
###General rule: dpois, dbinom etc for point/density,,,,ppois, pbinom for cumula
tive
####Hypothesis Test
# For n>30 and population variance known: Apply z test as follows
#say (mu=7.5) and pop sd=0.66, Null: xbar(meanlogMPC)> mu; n=100
#test stat
(mean(logMPC)-7.5)/(0.66/sqrt(10))
pnorm(1.319)
####ANOVA
#1 way ANOVA
boxplot(logMPC~Social_Group)
aov.mpc<-aov(logMPC~Social_Group)
summary(aov.mpc)
tk<-TukeyHSD(aov.mpc)
tk
plot(tk)
#####Regression
#define some more useful variables
#define global
Y<-cbind(poverty)
Assets<-cbind(bedstead,almirah ,chair, radio, tv, fan,
fridge, cycle, car)
Demography<- cbind(Hindu, Islam, SC, ST, OBC, HH_Size)
Access<- cbind(LPG,electricity)
AC,
stove,