Please note: This notebook uses open access data
The following notebook was created by converting the python code in Data Exploration of Framingham Heart Study Teaching Dataset into R. If you wish to perform any of the data visualization or manipulation done in this notebook in python code please cite the original notebook.
Original Author: Qiong Liu
Notebook Author: Owen Dominguez
The dataset used in the following notebook was developed using the longitudinal Framingham heart study teaching dataset as the data source. The teaching dataset includes three clinical examination and 20 year follow-up data based on a subset of the original Framingham cohort participants. This dataset was created for teaching and training purposes, and certain measures were used to created anonymous versions and thus the data is unsuitable for publication. Detailed documentation on the variables can be found here.
In this notebook, we will demonstrate how to pull the object file of the Framingham teaching dataset from BioData Catalyst data commons into a BRH workspace, and perform data exploration and visualization equivocal to the original python code using R packages.
#Option to supress warnings (-1 = suppressed, 0 = unsuppressed)
options(warn=-1)
#Installing required libraries
install.packages(c('tidyverse', 'tidyr', 'haven', 'reshape'))
#Importing libraries
library(tidyverse)
library(tidyr)
library(haven)
library(reshape)
#Importing sav file into a data fram using the haven package
system("gen3 drs-pull object dg.4503/c97507dd-bb79-45ce-9186-3d14ad823f81")
#Checking to see if file exists and importing it into the R environment
if (file.exists("frmgham2.sav")) {
print("file successfully created")
frm_data <- read_sav("frmgham2.sav", user_na = TRUE)
} else {
print("check your gen3 command")
}
[1] "file successfully created"
At the moment the Framingham data is in a state where graphing any value in relation to the patient's demographic information — sex, age, education, bmi, etc — will result in a graph where there are too many bins to sort participants into. Due to this, we will be unable to make any meaningful conclusions with the data in this form. Thus data manipulation is required.
The manipulation we will do is the following:
#Changing patient ID column type to string
col_to_char <- function(col) {
for (i in 1:length(col)) {
col[i] <- as.character(col[i])
}
return(col)
}
frm_data$RANDID <- col_to_char(frm_data$RANDID)
#Mapping gender value onto male or female
sex_list <- rep(0, length(frm_data$SEX))
counter <- 1
for (i in frm_data$SEX) {
if (i == 1) {
sex_list[counter] <- "Male"
} else {
sex_list[counter] <- "Female"
}
counter = counter + 1
}
frm_data$SEX_GROUP <- sex_list
#Filtering subset of data for first visit participants
visit_1 <- frm_data %>% filter(PERIOD == 1)
head(visit_1, n=5)
RANDID | SEX | TOTCHOL | AGE | SYSBP | DIABP | CURSMOKE | CIGPDAY | BMI | DIABETES | ⋯ | HYPERTEN | TIMEAP | TIMEMI | TIMEMIFC | TIMECHD | TIMESTRK | TIMECVD | TIMEDTH | TIMEHYP | SEX_GROUP |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> |
2448 | 1 | 195 | 39 | 106.0 | 70 | 0 | 0 | 26.97 | 0 | ⋯ | 0 | 8766 | 6438 | 6438 | 6438 | 8766 | 6438 | 8766 | 8766 | Male |
6238 | 2 | 250 | 46 | 121.0 | 81 | 0 | 0 | 28.73 | 0 | ⋯ | 0 | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | Female |
9428 | 1 | 245 | 48 | 127.5 | 80 | 1 | 20 | 25.34 | 0 | ⋯ | 0 | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | Male |
10552 | 2 | 225 | 61 | 150.0 | 95 | 1 | 30 | 28.58 | 0 | ⋯ | 1 | 2956 | 2956 | 2956 | 2956 | 2089 | 2089 | 2956 | 0 | Female |
11252 | 2 | 285 | 46 | 130.0 | 84 | 1 | 23 | 23.10 | 0 | ⋯ | 1 | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | 4285 | Female |
# Functions to categorize age, BMI, and education value into groups
age_group <- function(age_list) {
min_age = min(age_list)
# Define age groups within participants
glab1 = paste(as.character(min_age), "-40 yr", sep="")
glab2 = "41-50 yr"
glab3 = "51-60 yr"
glab4 = "61-70 yr"
glab5 = "71-80 yr"
glab6 = "81-90 yr"
counter <- 1
for (i in age_list) {
if (is.na(i)) {
age_list[counter] <- "NA"
} else if (i <= 40) {
age_list[counter] <- glab1
} else if (i <= 50) {
age_list[counter] <- glab2
} else if (i <= 60) {
age_list[counter] <- glab3
} else if (i <= 70) {
age_list[counter] <- glab4
} else if (i <= 80) {
age_list[counter] <- glab5
} else if (i <= 90) {
age_list[counter] <- glab6
} else {
age_list[counter] <- "NA"
}
counter <- counter + 1
}
return(age_list)
}
bmi_group <- function(bmi_list) {
# Define BMI groups
glab1 = "underweight"
glab2 = "healthy"
glab3 = "overweight"
glab4 = "obese"
counter <- 1
for (i in bmi_list) {
if (is.na(i)) {
bmi_list[counter] <- "NA"
} else if (i < 18.5) {
bmi_list[counter] <- glab1
} else if (i < 25) {
bmi_list[counter] <- glab2
} else if (i < 30) {
bmi_list[counter] <- glab3
} else {
bmi_list[counter] <- glab4
}
counter <- counter + 1
}
return(bmi_list)
}
educ_group <- function(edu_list) {
# Define education groups
glab1 = "0-11 years"
glab2 = "High School Diploma, GED"
glab3 = "Some College, Vocational School"
glab4 = "College (BS, BA) degree or more"
counter <- 1
for (i in edu_list) {
if (is.na(i)) {
edu_list[counter] <- "NA"
} else if (i == 1.0) {
edu_list[counter] <- glab1
} else if (i == 2.0) {
edu_list[counter] <- glab2
} else if (i == 3.0) {
edu_list[counter] <- glab3
} else if (i == 4.0) {
edu_list[counter] <- glab4
} else {
edu_list[counter] <- "NA"
}
counter <- counter + 1
}
return(edu_list)
}
#Function to turn the counts into a table in the readable form
to_nice_table <- function(odata, data) {
c1 <- 1
c2 <- 1
for (i in 1:((length(data[, 1])*2))) {
if (c2 == 1) {
data[c1, c2] <- odata$n[i]
c2 <- 2
} else {
data[c1, c2] <- odata$n[i]
c2 <- 1
c1 <- c1 + 1
}
}
l <- rep(0, length(data[, 1]))
for (i in 1:length(l)) {
l[i] <- odata[i*2, 1]
}
rownames(data) <- l
colnames(data) <- c("Female", "Male")
return(data)
}
#Adding the desired catagories to the visit_1 data
visit_1$AGE_GROUP <- age_group(visit_1$AGE)
visit_1$BMI_GROUP <- bmi_group(visit_1$BMI)
visit_1$EDUC_GROUP <- educ_group(visit_1$EDUC)
head(visit_1 %>% select(AGE, AGE_GROUP, BMI, BMI_GROUP, EDUC, EDUC_GROUP), n=5)
AGE | AGE_GROUP | BMI | BMI_GROUP | EDUC | EDUC_GROUP |
---|---|---|---|---|---|
<dbl> | <chr> | <dbl> | <chr> | <dbl> | <chr> |
39 | 32-40 yr | 26.97 | overweight | 4 | College (BS, BA) degree or more |
46 | 41-50 yr | 28.73 | overweight | 2 | High School Diploma, GED |
48 | 41-50 yr | 25.34 | overweight | 1 | 0-11 years |
61 | 61-70 yr | 28.58 | overweight | 3 | Some College, Vocational School |
46 | 41-50 yr | 23.10 | healthy | 3 | Some College, Vocational School |
#Placing counts into desired table format
odata <- visit_1 %>% count(AGE_GROUP, SEX_GROUP)
data <-table(odata$AGE_GROUP, odata$SEX_GROUP)
data <- matrix(data, ncol = 2)
counts1 <- to_nice_table(odata, data)
odata <- visit_1 %>% count(BMI_GROUP, SEX_GROUP)
data <-table(odata$BMI_GROUP, odata$SEX_GROUP)
data <- matrix(data, ncol = 2)
counts2 <- to_nice_table(odata, data)
odata <- visit_1 %>% count(EDUC_GROUP, SEX_GROUP)
data <-table(odata$EDUC_GROUP, odata$SEX_GROUP)
data <- matrix(data, ncol = 2)
counts3 <- to_nice_table(odata, data)
binder <- rbind(counts1, counts2, counts3)
print(binder)
Female Male 32-40 yr 415 339 41-50 yr 908 731 51-60 yr 795 584 61-70 yr 372 290 healthy 1233 703 NA 14 5 obese 345 232 overweight 853 992 underweight 45 12 0-11 years 979 843 College (BS, BA) degree or more 206 296 High School Diploma, GED 772 509 NA 56 57 Some College, Vocational School 477 239
#graphing individual counts of each demographic by sex
options(repr.plot.width=15, repr.plot.height=8)
ggplot(visit_1, aes(x=AGE_GROUP, fill=SEX_GROUP)) + geom_bar() + labs(x="Age Group", y="Count", fill="Sex Group") + ggtitle("Age Group Characteristics of Participants in the Framingham Heart Study")
ggplot(visit_1, aes(x=BMI_GROUP, fill=SEX_GROUP)) + geom_bar() + labs(x="BMI Group", y="Count", fill="Sex Group") + ggtitle("BMI Group Characteristics of Participants in the Framingham Heart Study")
ggplot(visit_1, aes(x=EDUC_GROUP, fill=SEX_GROUP)) + geom_bar() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + labs(x="Education", y="Count", fill="Sex Group") + ggtitle("Education Group Characteristics of Participants in the Framingham Heart Study")
Demographic information of FMS participants at first visit
We will now explore the risk factors of the participants at different visits
#adding demographic information to our intiial frm_data
frm_data$AGE_GROUP = age_group(frm_data$AGE)
frm_data$BMI_GROUP = bmi_group(frm_data$BMI)
frm_data$EDUC_GROUP = educ_group(frm_data$EDUC)
head(frm_data, n=5)
RANDID | SEX | TOTCHOL | AGE | SYSBP | DIABP | CURSMOKE | CIGPDAY | BMI | DIABETES | ⋯ | TIMEMIFC | TIMECHD | TIMESTRK | TIMECVD | TIMEDTH | TIMEHYP | SEX_GROUP | AGE_GROUP | BMI_GROUP | EDUC_GROUP |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> | <chr> | <chr> |
2448 | 1 | 195 | 39 | 106 | 70.0 | 0 | 0 | 26.97 | 0 | ⋯ | 6438 | 6438 | 8766 | 6438 | 8766 | 8766 | Male | 32-40 yr | overweight | College (BS, BA) degree or more |
2448 | 1 | 209 | 52 | 121 | 66.0 | 0 | 0 | NA | 0 | ⋯ | 6438 | 6438 | 8766 | 6438 | 8766 | 8766 | Male | 51-60 yr | NA | College (BS, BA) degree or more |
6238 | 2 | 250 | 46 | 121 | 81.0 | 0 | 0 | 28.73 | 0 | ⋯ | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | Female | 41-50 yr | overweight | High School Diploma, GED |
6238 | 2 | 260 | 52 | 105 | 69.5 | 0 | 0 | 29.43 | 0 | ⋯ | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | Female | 51-60 yr | overweight | High School Diploma, GED |
6238 | 2 | 237 | 58 | 108 | 66.0 | 0 | 0 | 28.50 | 0 | ⋯ | 8766 | 8766 | 8766 | 8766 | 8766 | 8766 | Female | 51-60 yr | overweight | High School Diploma, GED |
# extract a subset of data for risk factor exploration
rf_data <- frm_data %>% select(RANDID, SEX_GROUP, TIME, PERIOD, BMI, AGE, SYSBP, DIABP, CIGPDAY, TOTCHOL, HDLC, LDLC, GLUCOSE)
#Pivoting data to longer format
rf_data_melt <- pivot_longer(rf_data, c(BMI, AGE, SYSBP, DIABP, CIGPDAY, TOTCHOL, HDLC, LDLC, GLUCOSE), names_to = "variable", values_to = "value")
head(rf_data_melt, n=5)
RANDID | SEX_GROUP | TIME | PERIOD | variable | value |
---|---|---|---|---|---|
<chr> | <chr> | <dbl> | <dbl> | <chr> | <dbl> |
2448 | Male | 0 | 1 | BMI | 26.97 |
2448 | Male | 0 | 1 | AGE | 39.00 |
2448 | Male | 0 | 1 | SYSBP | 106.00 |
2448 | Male | 0 | 1 | DIABP | 70.00 |
2448 | Male | 0 | 1 | CIGPDAY | 0.00 |
#Bar and Violin plot of AGE and BMI factors at the three visits
dodge <- position_dodge(width = 0.4)
options(repr.plot.width=13, repr.plot.height=8)
ggplot((rf_data_melt %>% filter(variable == "BMI" | variable == "AGE") %>% drop_na()), aes(x=factor(PERIOD),y=value,fill=variable))+geom_violin(position = dodge) + geom_boxplot(width=.1, position = dodge) + labs(x= "Visit", y="Value", fill="Variable")
Age and BMI risk factors at different visits
#Bar plot of more risk factors at the three visits
options(repr.plot.width=13, repr.plot.height=8)
ggplot((rf_data_melt %>% filter(variable == "SYSBP" | variable == "DIABP" | variable == "TOTCHOL" | variable == "GLUCOSE") %>% drop_na()), aes(x=factor(PERIOD),y=value,fill=variable)) + geom_boxplot(width=.7) + labs(x= "Visit", y="Value", fill="Variable")
For referense:
DIABP, GLUCOSE, SYSBP, and TOTCHOL risk factors at different visits
For the next analysis we need to catagorize whether a patient falls into a risk factor by converting its numeric value into a binary value by using a threshold. For instance we consider serum cholesterol > 200 to be a risk factor and thus would be a binary value of 1.
#Take only the final visit and the known risk factors for cardiovascular disease
visit_fin <- frm_data[!rev(duplicated(rev(frm_data$RANDID))),]
visit_rf <- visit_fin %>% select(RANDID, AGE, SEX_GROUP, CURSMOKE, TOTCHOL, BMI, GLUCOSE, PREVCHD, PREVSTRK, PREVHYP, DIABETES, HEARTRTE)
col_rf <- function(column, criteria, eorg) {
counter <- 1
if (eorg) {
for (i in column) {
if (!(is.na(i))) {
if (i >= criteria) {
column[counter] <- 1
} else {
column[counter] <- 0
}
counter <- counter + 1
} else {
column[counter] <- 0
}
}
} else {
for (i in column) {
if (!(is.na(i))) {
if (i > criteria) {
column[counter] <- 1
} else {
column[counter] <- 0
}
} else {
column[counter] <- 0
}
counter <- counter + 1
}
}
return(column)
}
visit_rf$TOTCHOL_RF <- col_rf(visit_rf$TOTCHOL, 200, FALSE)
visit_rf$BMI_RF <- col_rf(visit_rf$BMI, 25, FALSE)
visit_rf$GLUCOSEL_RF <- col_rf(visit_rf$GLUCOSE, 200, FALSE)
visit_rf$HEARTRTE_RF <- col_rf(visit_rf$HEARTRTE, 100, FALSE)
visit_rf$AGE_RF <- col_rf(visit_rf$AGE, 60, TRUE)
visit_rf <- visit_rf %>% select(-AGE,-TOTCHOL,-BMI,-GLUCOSE, -HEARTRTE, -SEX_GROUP)
head(visit_rf)
RANDID | CURSMOKE | PREVCHD | PREVSTRK | PREVHYP | DIABETES | TOTCHOL_RF | BMI_RF | GLUCOSEL_RF | HEARTRTE_RF | AGE_RF |
---|---|---|---|---|---|---|---|---|---|---|
<chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> |
2448 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
6238 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 |
9428 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 |
10552 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 1 |
11252 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
11263 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 |
The next few blocks of code combine the risk factor binary values with the event data binary event data and converts the data into a format that we can graph using a heat map.
#Combining risk factors with event data
event_data <- visit_fin %>% select(ANGINA, HOSPMI, MI_FCHD, ANYCHD, STROKE, CVD, HYPERTEN, DEATH)
visit_erf <- cbind(visit_rf, event_data)
head(visit_erf)
RANDID | CURSMOKE | PREVCHD | PREVSTRK | PREVHYP | DIABETES | TOTCHOL_RF | BMI_RF | GLUCOSEL_RF | HEARTRTE_RF | AGE_RF | ANGINA | HOSPMI | MI_FCHD | ANYCHD | STROKE | CVD | HYPERTEN | DEATH | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
1 | 2448 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 0 |
2 | 6238 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 9428 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 10552 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 |
5 | 11252 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
6 | 11263 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 1 | 0 |
#Formatting data for heat map
visit_rf_noid <- visit_erf %>% select(-RANDID)
vrn <- cor(as.matrix(visit_rf_noid))
heat_data <- melt(vrn)
colnames(heat_data) <- c("x", "y", "value")
#Graphing the correlation heat map
options(repr.plot.width=9.5, repr.plot.height=8)
ggplot(heat_data, aes(x = x, y = y, fill = value)) +geom_tile() + theme(axis.text.x = element_text(angle = 45, hjust=1))
#Creating column that combines all of the risk factors
visit_erf$RF_SUM <- with(visit_erf, ( CURSMOKE + PREVCHD + PREVSTRK + PREVHYP + DIABETES + TOTCHOL_RF + BMI_RF + GLUCOSEL_RF + HEARTRTE_RF + AGE_RF))
tail(visit_erf)
RANDID | CURSMOKE | PREVCHD | PREVSTRK | PREVHYP | DIABETES | TOTCHOL_RF | BMI_RF | GLUCOSEL_RF | HEARTRTE_RF | AGE_RF | ANGINA | HOSPMI | MI_FCHD | ANYCHD | STROKE | CVD | HYPERTEN | DEATH | RF_SUM | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
4429 | 9989287 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 |
4430 | 9990894 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 4 |
4431 | 9993179 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 3 |
4432 | 9995546 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 0 | 2 |
4433 | 9998212 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2 |
4434 | 9999312 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2 |
The next few blocks of code will manipulate the data into a form in which we can visualize the composition of disease events and the risk factor sum.
#Creating column that combines all of the event risk factors
visit_erf$EVENT_SUM <- with(visit_erf, (ANYCHD + STROKE + CVD + HYPERTEN))
rf_count <- visit_erf %>% count(RF_SUM, EVENT_SUM)
tabled_count <- matrix(nrow= 9, ncol=5)
for (i in 1:45) {
tabled_count[rf_count$RF_SUM[i]+1, rf_count$EVENT_SUM[i]+1] <- rf_count$n[i]
}
c1 <- 1
c2 <- 1
for (i in 1:45) {
if (c2 < 5) {
if(is.na(tabled_count[c1, c2])) {
tabled_count[c1, c2] <- 0
}
c2 <- c2 + 1
} else {
if(is.na(tabled_count[c1, c2])) {
tabled_count[c1, c2] <- 0
}
c2 <- 1
c1 <- c1 + 1
}
}
rownames(tabled_count) <- c("0.0", "1.0", "2.0", "3.0", "4.0", "5.0", "6.0", "7.0", "8.0")
colnames(tabled_count) <- c("0.0", "1.0", "2.0", "3.0", "4.0")
print(tabled_count)
0.0 1.0 2.0 3.0 4.0 0.0 60 15 1 0 0 1.0 250 152 19 14 2 2.0 413 472 77 56 4 3.0 203 741 123 233 28 4.0 31 552 105 297 52 5.0 0 114 73 169 44 6.0 0 13 20 41 32 7.0 0 2 1 11 10 8.0 0 0 0 2 2
In the table above, event sum is represented in colums and the rf sum is represented in rows.
#graphing
options(repr.plot.width=20, repr.plot.height=10)
ggplot(rf_count, aes(x=factor(RF_SUM), y=n, fill=factor(EVENT_SUM))) + geom_bar(stat="identity") + labs(x="RF Sum", y="count", fill="Event Sum") + scale_fill_manual(values=c("#1cd600", "#a11e11", "#80180d", "#60120a", "#400c06")) + ggtitle("Risk Factors and Disease Events")
Risk Factors and Events
From this historgram we can see that: