####################### # Composite Indicator # ####################### Dataset=read.csv("Passito.csv",header=TRUE,sep=";") head(Dataset) dim(Dataset) X=Dataset[,17:22] head(X) summary(X) dim(X) n=dim(X)[1] n k=dim(X)[2] k cor(X) #normalization of the k=6 variables Z=data.frame(matrix(0,nrow=n,ncol=k)) Z[,1:5]=(X[,1:5]-min(X[,1:5])+1/n)/(max(X[,1:5])-min(X[,1:5])+2/n) Z[,6]=(X[,6]-min(X[,6])+1/n)/(max(X[,6])-min(X[,6])+2/n) dim(Z) head(Z) w=rep(1/k,times=k) w #Fisher combination as product between the matrix of transformed values and #vector of weights Z=as.matrix(Z) ncol(Z) nrow(Z) yF=-log(1-Z,base=2)%*%w yF summary(yF) #final index normalization yF_norm=(yF-min(yF))/(max(yF)-min(yF)) summary(yF_norm) hist(yF_norm,freq=FALSE) lines(density(yF_norm), col ="blue", lty = 2, lwd = 2) hist(yF_norm,freq=FALSE,ylim=c(0,7)) lines(density(yF_norm), col ="blue", lty = 2, lwd = 2) #Additive combination yA=Z%*%w hist(yA,freq=FALSE,ylim=c(0,7)) lines(density(yA), col ="red", lty = 1, lwd = 2) lines(density(yF_norm), col ="blue", lty = 2, lwd = 2) plot(yA,yF_norm) abline(a=0,b=1,col="green",lty=1,lwd=2) plot(Z[,1],yF_norm) plot(Z[,2],yF_norm) plot(Z[,3],yF_norm) plot(Z[,4],yF_norm) plot(Z[,5],yF_norm) #################### # Cluster Analysis # #################### Dataset=read.csv("Passito.csv",header=TRUE,sep=";") head(Dataset) #hierarchical CA X=Dataset[,-seq(1:5)] dim(X) n=dim(X)[1] head(X) summary(X) X=scale(X) #standardization of the variables d=dist(X,method="euclidean") HCA=hclust(d,method="ward.D") plot(HCA) groups=cutree(HCA,k=5) #rect.hclust(HCA,k=5,border="red") HCA$merge # "-" indicates a unit; no sign indicates the group created at that step HCA$height # dissimilarity HCA$dist.method #k-means nonhierarchical CA X=Dataset[,-seq(1:5)] dim(X) n=dim(X)[1] head(X) summary(X) X=scale(X) #standardization of the variables g=5 NCA=kmeans(X,centers=g,iter.max=100) NCA$iter NCA$size NCA$centers TD=NCA$totss WD=NCA$tot.withinss BD=NCA$betweenss R_squared=BD/TD R_squared groups=matrix(0,nrow=n,ncol=2) units=seq(from=1,to=n,by=1) groups[,1]=units NCA$cluster groups[,2]=NCA$cluster colnames(groups)=c("unit","cluster") #see the units belonging to each cluster for (c in 1:g){ print(c("Cluster",c)) print(groups[groups[,2]==c,1]) } Data=cbind(Dataset[,seq(1:5)],groups) head(Data) T.age=table(Data$AGE_CLAS,Data$cluster) apply(T.age,2,function(x) 100*x/margin.table(T.age,1)) T.sex=table(Data$SEX,Data$cluster) apply(T.sex,2,function(x) 100*x/margin.table(T.sex,1)) ########################### # Factor Analysis and PCA # ########################### X=read.csv("Passito.csv",header=TRUE,sep=";") head(X) X=X[,-c(1,2,3,4,5)] head(X) dim(X) F=factanal(X,factors=5,rotation="none",scores="regression") R=F$correlation R e=eigen(R) e$values barplot(rep(1,ncol(X)),col="yellow",ylim=c(0,max(e$values)+2)) barplot(e$values,main="Scree plot",ylab="eigenvalue",add=TRUE) F F=factanal(X,factors=5,rotation="varimax",scores="regression") F F=factanal(X,factors=5,rotation="promax",scores="regression") F L=F$loadings L U=F$uniquenesses U Comm=1-U Comm R=F$correlation R G=F$rotmat G FScores=F$scores FScores # factor by factor p=F$PVAL p PC=prcomp(X,retx =TRUE,center=TRUE,scale.=TRUE) e_values=PC$sdev^2 e_values e_vectors=PC$rotation e_vectors YScores=PC$x YScores ######################## # Regression Analysis # ######################## Dataset=read.csv("Passito.csv",header=TRUE,sep=";") head(Dataset) attach(Dataset) plot(LIKE_AROMA,LIKE_PAS) plot(LIKE_SWEET,LIKE_PAS) plot(LIKE_ALCOHOL,LIKE_PAS) plot(LIKE_TASTE,LIKE_PAS) # investigation on multicollinearity reg1=lm(LIKE_AROMA ~ LIKE_SWEET+LIKE_ALCOHOL+LIKE_TASTE) summary(reg1) e1=reg1$residuals SST=sum((LIKE_AROMA-mean(LIKE_AROMA))^2) SSE=sum(e1^2) R1=1-SSE/SST VIF1=1/(1-R1) VIF1 reg2=lm(LIKE_SWEET ~ LIKE_AROMA+LIKE_ALCOHOL+LIKE_TASTE) summary(reg2) e2=reg2$residuals SST=sum((LIKE_SWEET-mean(LIKE_SWEET))^2) SSE=sum(e2^2) R2=1-SSE/SST VIF2=1/(1-R2) VIF2 reg3=lm(LIKE_ALCOHOL ~ LIKE_AROMA+LIKE_SWEET+LIKE_TASTE) summary(reg3) e3=reg3$residuals SST=sum((LIKE_ALCOHOL-mean(LIKE_ALCOHOL))^2) SSE=sum(e3^2) R3=1-SSE/SST VIF3=1/(1-R3) VIF3 reg4=lm(LIKE_TASTE ~ LIKE_AROMA+LIKE_SWEET+LIKE_ALCOHOL) summary(reg4) e4=reg4$residuals SST=sum((LIKE_TASTE-mean(LIKE_TASTE))^2) SSE=sum(e4^2) R4=1-SSE/SST VIF4=1/(1-R4) VIF4 # regression analysis reg=lm(LIKE_PAS ~ LIKE_AROMA+LIKE_SWEET+LIKE_ALCOHOL+LIKE_TASTE) summary(reg) reg=lm(LIKE_PAS ~ LIKE_AROMA+LIKE_SWEET+LIKE_TASTE) summary(reg) # diagnostic analysis e=reg$residuals plot(e) pred=reg$fitted.values plot(pred,e) plot(LIKE_AROMA,e) plot(LIKE_SWEET,e) plot(LIKE_TASTE,e) hist(e) qqnorm(e) LIKE_AROMA=5 LIKE_SWEET=6 LIKE_ALCOHOL=5 LIKE_TASTE=6 LIKE_PAS=0.128+0.429*LIKE_AROMA+0.197*LIKE_SWEET+0.248*LIKE_TASTE LIKE_PAS