[R] ggplot Geom_Bar Tips

2019. 10. 20. 17:21분석 R/구현


  • 필요 패키지 : tidyverse , RColorBrewer
  • 필요 데이터 : NHIS.RData

 

Library Load

library(tidyverse)
library(RColorBrewer)
#library(ggplot2)
#library(dplyr)

NHIS Data Load

load("./../Data/NHIS.RData")
str(NHIS)
## 'data.frame':    2047953 obs. of  10 variables:
##  $ IDV_ID        : int  262352 463733 605007 605007 605007 616529 571811 964981 29823 128251 ...
##  $ SEX           : Factor w/ 2 levels "1","2": 2 1 2 2 2 2 2 2 2 2 ...
##  $ AGE_GROUP     : Factor w/ 18 levels "1","2","3","4",..: 13 16 11 11 11 7 4 1 17 12 ...
##  $ FORM_CD       : Factor w/ 2 levels "2","3": 2 2 2 2 2 2 2 2 2 2 ...
##  $ DSBJT_CD      : Factor w/ 30 levels "0","1","10","11",..: 5 5 5 5 5 5 5 7 7 7 ...
##  $ MAIN_SICK     : Factor w/ 1431 levels "A02","A03","A04",..: 466 452 452 455 455 68 455 731 71 731 ...
##  $ VSCN          : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ RECN          : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ EDEC_TRAMT    : int  5000 16880 12880 12880 10720 16880 16880 14670 14000 15550 ...
##  $ EDEC_SBRDN_AMT: int  1500 5000 3800 3800 3200 5000 5000 3000 1500 4600 ...

 

geom_bar()

옵션 설명
alpha 포인트 명암
color 포인트 테두리 색깔
fill 포인트 내부 색깔
group 그룹 지정
linetype 포인트 모양
size 포인트 크기

 

Recall * stat = count * 해당 값의 빈도를 나타냄 * stat = identity * aes(x,y) x에 해당하는 y값

 

formula

ggplot(data , 
       aes(x= variable_1 , 
           y= variable_2 ,
           alpha , color , ...
           )) +
  geom_bar(
    aes(
      alpha ,  
      color ,  
      fill , 
      group , 
      size  ,  
      linetype ) , 
    position = ... , 
    stat = "identity", ...
    )

Aesthetics 0

step 1
  • SEX , FORM_CD 결합하여 변수 하나 생성하기
NHIS2 <-  NHIS %>% 
  mutate(combine = paste0("SEX : " , SEX , 
                          " / ", 
                          "FORM_CD : ", FORM_CD))
NHIS2 %>% head()
##   IDV_ID SEX AGE_GROUP FORM_CD DSBJT_CD MAIN_SICK VSCN RECN EDEC_TRAMT
## 1 262352   2        13       3       12       H25    1    1       5000
## 2 463733   1        16       3       12       H04    1    1      16880
## 3 605007   2        11       3       12       H04    1    1      12880
## 4 605007   2        11       3       12       H10    1    1      12880
## 5 605007   2        11       3       12       H10    1    1      10720
## 6 616529   2         7       3       12       B30    1    1      16880
##   EDEC_SBRDN_AMT               combine
## 1           1500 SEX : 2 / FORM_CD : 3
## 2           5000 SEX : 1 / FORM_CD : 3
## 3           3800 SEX : 2 / FORM_CD : 3
## 4           3800 SEX : 2 / FORM_CD : 3
## 5           3200 SEX : 2 / FORM_CD : 3
## 6           5000 SEX : 2 / FORM_CD : 3
step 2
  • 위에서 만든 변수를 이용하여 AGE_GROUP에 따른 비율 BarGraph 생성하기
ggplot(NHIS2, aes(x = combine , 
                  fill=AGE_GROUP )) +
  geom_bar(sta="count" , alpha = 0.5 , position = "fill") +
  theme_classic() + 
  coord_flip() +
  labs(y= "ratio")

Aesthetics 1

  • NHIS 데이터를 활용해 나이별로 Bar Graph 시각화

  • Hint
    • x = AGE_GROUP
    • fill = AGE_GROUP
    • stat = count
    • color = black
    • alpha = 0.5
step 1 AGE_GROUP 빈도 Bar Graph
ggplot(NHIS, aes(x=AGE_GROUP)) + 
  geom_bar(stat="count")
step 2
  • 테두리 : black
  • 음영 : 0.5
  • theme : theme_class()
ggplot(NHIS, aes(x=AGE_GROUP, fill = factor(AGE_GROUP))) + 
  geom_bar(stat="count", color= "black", alpha= 0.5 ) + theme_classic()
step 3 fill 색깔 Set3로 변경
ggplot(NHIS, aes(x=AGE_GROUP, fill = factor(AGE_GROUP))) + 
  geom_bar(stat="count", color= "black", alpha= 0.5 ) + 
  theme_classic() +
  scale_fill_brewer(palette="Set3")
step 4 Set3 색깔 늘리기
colourCount = length(unique(NHIS$AGE_GROUP))
getPalette = colorRampPalette(brewer.pal(12, "Set3"))

ggplot(NHIS, aes(x=AGE_GROUP, fill = factor(AGE_GROUP))) + 
  geom_bar(stat="count", color= "black", alpha= 0.5 ) + 
  theme_classic() +
  scale_fill_manual(values = getPalette(colourCount))
step 5 정렬하기
  • age_group 큰 순서대로 정렬하기
colourCount = length(unique(NHIS$AGE_GROUP))
getPalette = colorRampPalette(brewer.pal(12, "Set3"))
NHIS %>% group_by(AGE_GROUP) %>% summarise(count= n()) %>%
  ggplot(aes(x = reorder(AGE_GROUP, -count) ,
             y = count , fill = AGE_GROUP )) +
  geom_bar(stat="identity" , color = "black", alpha = 0.5) +
  theme_classic() +
  scale_fill_manual(values = getPalette(colourCount)) +
  labs(x= "age_group") 
step 6
  • 빈도 text 추가 및 돌리기
  • geom_text
    • hjust = 1.1
    • size = 3
  • coord_flip()
colourCount = length(unique(NHIS$AGE_GROUP))
getPalette = colorRampPalette(brewer.pal(12, "Set3"))
NHIS %>% group_by(AGE_GROUP) %>% summarise(count= n()) %>%
  ggplot(aes(x = reorder(AGE_GROUP, -count) ,
             y = count , fill = AGE_GROUP )) +
  geom_bar(stat="identity" , color = "black", alpha = 0.5) +
  theme_classic() +
  scale_fill_manual(values = getPalette(colourCount)) +
  labs(x= "age_group") +
  geom_text(
    aes(label = count , group = AGE_GROUP),
    hjust = 1.1 , size = 3
) + coord_flip()

 

Aesthetics 2

step 1
  • AGE_GROUP별로 성별 비율 BAR GRAPH 시각화
  • color : white
  • alpha : 0.5
ggplot(NHIS, aes(x=AGE_GROUP, fill =SEX)) + 
  geom_bar(stat="count", color= "white", alpha= 0.5 , position="fill" )
step 2
  • x axis 1~9 / 18~10으로 바꾸기
NHIS$AGE_GROUP <- factor(NHIS$AGE_GROUP , levels = c(1:9,18:10))
ggplot(NHIS, aes(x=AGE_GROUP, fill =SEX)) + 
  geom_bar(stat="count", color= "white", alpha= 0.5 , position="fill" )

step 3

  • 성별 색깔 뒤집기
ggplot(NHIS, aes(x=AGE_GROUP, fill =fct_rev(SEX))) + 
  geom_bar(stat="count", color= "white", alpha= 0.5 , position="fill" ) 

 

Aesthetics 3

  • 성별 비율 시각화
    • x = factor(1)
fig <- ggplot(data=NHIS, aes(x=factor(1), stat="count", fill=SEX)) + 
  geom_bar(position="fill") 
fig
  • AGE_GROUP 별로 나눠서 그리기
    • facet_wrap(. ~ AGE_GROUP , ncol = 6)
fig <- fig + facet_wrap(. ~ AGE_GROUP , ncol = 6)
fig
  • 원형으로 만들기
    • coord_polar(theta=“y”)
fig <- fig + coord_polar(theta="y") 
fig + labs(x = "" , y = "AGE GROUP", title = "AGE GROUP별 성별의 비율") +
  theme(legend.position = "bottom")

Aesthetics 3-2

  • (2)와 같은 비율 시각화를 원형그래프로 시각화 및 비율 넣기
    • aes(x=factor(1), stat=“count”, fill=SEX)
    • mutate(ratio = n/sum(n) , pos = cumsum(ratio)- ratio / 2 , )
    • geom_text(aes(label= , y= ), size = 3)
    • facet_wrap(. ~ AGE_GROUP , ncol = 6)
    • coord_polar(theta=“y”)
step 1
  • AGE_GROUP , SEX 별로 빈도수 세기
table <- NHIS %>% group_by(AGE_GROUP , SEX) %>% 
  summarise(n = n()) 

table %>% head()
## # A tibble: 6 x 3
## # Groups:   AGE_GROUP [3]
##   AGE_GROUP SEX       n
##   <fct>     <fct> <int>
## 1 1         1     85673
## 2 1         2     78309
## 3 2         1     52679
## 4 2         2     46670
## 5 3         1     29980
## 6 3         2     27571
step 2
  • AGE_GROUP 별로 ratio 변수 만들기
  • pos = cumsum(ration - ratio/2)
table2 <- table %>%
  group_by(AGE_GROUP) %>%
  mutate(ratio = n/sum(n) , pos = cumsum(ratio)- ratio / 2 )

table2 %>% head()
## # A tibble: 6 x 5
## # Groups:   AGE_GROUP [3]
##   AGE_GROUP SEX       n ratio   pos
##   <fct>     <fct> <int> <dbl> <dbl>
## 1 1         1     85673 0.522 0.261
## 2 1         2     78309 0.478 0.761
## 3 2         1     52679 0.530 0.265
## 4 2         2     46670 0.470 0.765
## 5 3         1     29980 0.521 0.260
## 6 3         2     27571 0.479 0.760
step 3
  • step 2에서 만든 테이블을 활용하여 성별 기준으로 bargraph 그리기
  • position = position_fill(reverse = TRUE)
vis <- table2 %>%
  ggplot(aes(x= factor(1) , y= n,  fill= SEX )) + 
  geom_bar(position = position_fill(reverse = TRUE) , 
           stat="identity")

vis
step 4
  • text 중앙에 표시하기
    • geom_text(aes(label=paste0(round(ratio,2)*100,“%”) , y= pos), size = 3)
    • facet_wrap(.~AGE_GROUP , ncol = 6 )
vis <- vis +geom_text(
  aes(label=paste0(round(ratio,2)*100,"%") , 
      y= pos), size = 3) + 
  facet_wrap(.~AGE_GROUP , ncol = 6 )

vis
step 5
  • 원형으로 만들기
    • coord_polar
    • theme(legend.position=“bottom”)
vis <- vis + 
  coord_polar(theta="y") +
  labs(x = "" , y = "AGE GROUP", title = "AGE GROUP별 성별의 비율") +
  theme(legend.position = "bottom")

vis

 

728x90