Tidyverse (ggplot)

2019. 3. 19. 00:25분석 R/EDA

Tidyverse (ggplot)

<html 필요하시면 댓글에 글 남겨주세요.>

2. Data Visualization

2.1 Grammar of Graphic by ggplot2 package

  • developed by Hadley Wickham
  • based on the philosphpy of hierarchical architecture


Library Loading

library(tidyverse)
library(MASS)
#library(ggplot2)

2.2 Understanding ggplot2 via iris data

2.2.1 iris data






head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...


2.2.2 Point plot = scatter plot



ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point()


myplot <- ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width))
myplot + geom_point()

myplot + geom_line()

myplot + geom_point() + geom_line()


size

ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) + geom_point(size = 3)

ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) + geom_point(size = 3)

ggplot(iris, aes(Petal.Length, Petal.Width)) + 
  geom_point(aes(size=Sepal.Length))


color

ggplot(iris, aes(Petal.Length, Petal.Width)) +
  geom_point(color = "skyblue")

ggplot(iris, aes(Petal.Length, Petal.Width)) + 
  geom_point(aes(color=Species), alpha=0.5)


alpha

ggplot(iris, aes(Petal.Length, Petal.Width)) +
  geom_point(alpha = 0.6)

ggplot(iris, aes(Petal.Length, Petal.Width)) +
  geom_point(aes(alpha = Sepal.Length ))


shape

ggplot(iris, aes(Petal.Length, Petal.Width)) +
  geom_point(shape = 5)

ggplot(iris, aes(Petal.Length, Petal.Width)) +
  geom_point(aes(shape = Species ))


Example

ggplot(iris, aes(Petal.Length, Petal.Width)) +
  geom_point(aes(shape = Species , 
                 alpha = Sepal.Length , 
                 fill = Sepal.Length) ,
             color ="red")


2.2.3 Box plot



ggplot(iris, aes(x=Species, y=Petal.Length)) + geom_boxplot()

ggplot(iris, aes(x=Species, y=Petal.Length)) + geom_boxplot() + geom_jitter()

ggplot(iris, aes(x=Species, y=Petal.Length)) + geom_boxplot(aes(fill=Species)) +
  geom_jitter()

ggplot(iris, aes(x=Species, y=Petal.Length)) + geom_boxplot(aes(fill=Species)) +
  geom_jitter() + coord_flip()


2.2.4 Bar plot



  • stat = count
  • 해당 값의 빈도를 나타냄
  • stat = identity
  • aes(x,y) x에 해당하는 y값
ggplot(iris, aes(x=Species)) + geom_bar(stat="count")

ggplot(iris, aes(x=Petal.Length)) + geom_bar(stat="count")

ggplot(iris, aes(x=Species)) + geom_bar(aes(fill=Species), stat="count")

ggplot(iris, aes(x=Species, y=Petal.Length)) + geom_bar(stat="identity")

ggplot(iris, aes(x=Species, y=Petal.Length)) + geom_bar(stat="identity", position="stack")

ggplot(iris, aes(x=Species, y=Petal.Length)) + geom_bar(stat="identity", aes(fill=Species))

ggplot(iris, aes(x=Species, y=Petal.Length)) +
  geom_bar(stat="identity", aes(fill=Species)) +
  coord_flip()


gather를 사용해 변수를 모아서 한번에 표현하기

# library(tidyr)
long_iris <- gather(iris, key, value, 1:4)
head(long_iris)
##   Species          key value
## 1  setosa Sepal.Length   5.1
## 2  setosa Sepal.Length   4.9
## 3  setosa Sepal.Length   4.7
## 4  setosa Sepal.Length   4.6
## 5  setosa Sepal.Length   5.0
## 6  setosa Sepal.Length   5.4


  • position
  • stack
  • fill
  • dodge
ggplot(long_iris, aes(x=Species, y=value)) + geom_bar(stat="identity",position ="stack" ,  aes(fill=key))

ggplot(long_iris, aes(x=Species, y=value)) + 
  geom_bar(stat="identity", position="fill", aes(fill=key))

ggplot(long_iris, aes(x=Species, y=value)) + 
  geom_bar(stat="identity", position="dodge", aes(fill=key))


  • 주의
    • Default : position = stack
    • y value를 누적해서 쌓은 값이 y축값이 됨.
## default : position : "stack"
ggplot(long_iris, aes(x=Species, y=value)) + 
  geom_bar(stat="identity", aes(fill=key), alpha = 0.9 , col ="red" )


Example

  • 주의
    • 그림에서 보이는 바와 같이 아래쪽에는 진하게 표시되고 위로 갈수록 하애짐
    • 이유 : y값들이 중복적으로 겹쳐서 진하게 표시되는 것.
mtcars2 <- mtcars %>% dplyr::select(-vs , -am  , -cyl , -carb )

long <- gather( mtcars2 , variable , value , -gear )


base <- ggplot( long , aes(x = gear , y = value , fill = variable ))


base + geom_bar( stat= "identity" , position = "dodge" , color ="yellow" , alpha = 0.1)

long %>% filter(variable == "disp" & gear == 3)
##    gear variable value
## 1     3     disp 258.0
## 2     3     disp 360.0
## 3     3     disp 225.0
## 4     3     disp 360.0
## 5     3     disp 275.8
## 6     3     disp 275.8
## 7     3     disp 275.8
## 8     3     disp 472.0
## 9     3     disp 460.0
## 10    3     disp 440.0
## 11    3     disp 120.1
## 12    3     disp 318.0
## 13    3     disp 304.0
## 14    3     disp 350.0
## 15    3     disp 400.0


2.2.5 Trellis plot


  • facet_grid(x~y)
    • 격자 표시로 비교하기가 용이함.
    • ncol 조정 불가능
    • 꽉 찬 격자형태로만 나옴.
  • facet_wrap(x~y)
    • 개별적으로 그려주기 때문에 비교하기가 용이하지 않음.
    • 빈공간이 나올 수 있음.
    • ncol 조정 가능
ggplot(iris, aes(x=Sepal.Length, y=Petal.Length)) +
  geom_point(size=3) +
  facet_grid( ~ Species)

## linear regression 개별적으로 가능
ggplot(iris, aes(x=Sepal.Length, y=Petal.Length)) +
  geom_point(size=3, aes(color=Species)) +
  facet_grid(~ Species) +
  stat_smooth(method = "lm")

## theme 변경 가능 
ggplot(iris, aes(x=Sepal.Length, y=Petal.Length)) +
  geom_point(size=3, aes(color=Species)) +
  facet_grid(~ Species) +
  stat_smooth(method = "lm") + 
  theme(panel.grid.major = element_line(color = "gray"))

## facet_grid
ggplot(mtcars, aes(x=mpg , y=disp )) +
   geom_point(shape=21, colour="black") +
   facet_grid(cyl ~ am )

## facet_wrap
ggplot(mtcars, aes(x=mpg , y=disp )) +
   geom_point(shape=21, colour="black") +
   facet_wrap(cyl ~ am , ncol=3)


  • 비교 facet_wrap , facet_grid


ggplot(Cars93, aes(x=Weight, y=MPG.highway)) +
   geom_point(shape=21, colour="black") +
   facet_grid(Origin ~ Type)

ggplot(Cars93, aes(x=Weight, y=MPG.highway)) +
   geom_point(shape=21, colour="black") +
   facet_wrap(Origin ~ Type , ncol=6)





********************************


728x90