# solution WS 3 Graphs # importing data library(readxl) data = read_excel("path") # categorical variables mgus2$sex <- factor(mgus2$sex, levels = c('F', 'M'), labels = c('female', 'male')) mgus2$pstat <- factor(mgus2$pstat, levels = c('0', '1'), labels = c('no', 'yes')) mgus2$death <- factor(mgus2$death, levels = c('0', '1'), labels = c('no', 'yes')) # continuous variables mgus2$hgb <- as.numeric(mgus2$hgb) mgus2$creat <- as.numeric(mgus2$creat) mgus2$mspike <- as.numeric(mgus2$mspike) # categorical variable "fu_cat" data$fu_cat[data$futime < 60] <- 1 data$fu_cat[(data$futime >= 60) & (data$futime < 120)] <- 2 data$fu_cat[(data$futime >= 120) & (data$futime < 180)] <- 3 data$fu_cat[data$futime >= 180] <- 4 # declaring new variable as categorial data$fu_cat <- factor(data$fu_cat, levels = c('1', '2', '3', '4'), labels = c('< 5 yrs', '5 - 10 yrs', '10 - 15 yrs', '> 15 yrs')) # ex. 1 # a. pie chart for fu_cat library(ggplot2) # create data frame with categorial variable df <- data.frame(table(data$fu_cat)) names(df)[names(df) == "Var1"] <- "futime" names(df)[names(df) == "Freq"] <- "frequency" df$percent <- round(df$frequency/sum(df$frequency) * 100, 2) # add column with percentages # create piechart with ggplot pie <- ggplot(df, aes(x = "", y = frequency, fill = futime)) + coord_polar("y", start = 0) + geom_bar(width = 1, stat = "identity") + geom_text(aes(label = paste(df$percent, "%")), position = position_stack(vjust = 0.5)) + ggtitle("follow-up time (death)") + theme(plot.title = element_text(hjust = 0.5)) # print piechart pie # b. barplot for creat, separating age_qt and sex # categorical variable from age data$age_qt[data$age < 63] <- 1 data$age_qt[(data$age >= 63) & (data$age < 72)] <- 2 data$age_qt[(data$age >= 72) & (data$age < 79)] <- 3 data$age_qt[data$age >= 79] <- 4 # declare as categorical and change labels data$age_qt <- factor(data$age_qt, levels = c('1', '2', '3', '4'), labels = c('< 63 yrs', '63 - 71 yrs', '72 - 78 yrs', '> 78 yrs')) # create barplot plot <- ggplot(data = data, aes(x = age_qt, y = creat, fill = sex)) + geom_bar(stat = "identity", position = position_dodge()) + ggtitle("creatinine for age groups, split by sex") + theme(plot.title = element_text(hjust = 0.5)) # print plot plot # ex. 2 # a. advanced histogram for age ggplot(data = data, aes(x = age)) + geom_histogram(aes(y = ..density..), bins = 50) + # number of bars geom_density() # b. boxplots for hgb par(mfrow = c(1, 3)) female <- subset(mgus2, mgus2$sex == "female") male <- subset(mgus2, mgus2$sex == "male") boxplot(mgus2$hgb, # range = max(data$hgb, na.rm = TRUE), # determines how far the whisker extends. if set to 0 extends to both extremes main = "overall hgb levels", ylab = "hgb (g / dl)") boxplot(female$hgb, # range = max(female$hgb, na.rm = TRUE), main = "hgb levels females", ylab = "hgb (g / dl)") boxplot(male$hgb, # range = max(male$hgb, na.rm = TRUE), main = "hgb levels males", ylab = "hgb (g / dl)") mtext("boxplots for haemoglobin, split by sex", side = 3, outer = TRUE, cex = 0.9) # other option for nicer plot ggplot(data = mgus2, aes(x = sex, y = hgb)) + # x: factor, y: variable geom_boxplot() + # produces a boxplot with data from above geom_jitter(colour = "black", size = 1, shape = 1) + # adds data points stat_summary(fun.y = mean, # adds mean to plot geom = "point", shape = 17, size = 4, col = "red") # as red triangle (shape 17) # c. to change whiskers of histograms, remove '#' before range in boxplots # d. qq-plot creatinine par(mfrow = c(1, 1)) qqnorm(data$creat, pch = 1, frame = FALSE) qqline(data$creat, col = "steelblue", lwd = 2) # data is not normally distributed since the model does not fit the data # ex. 3 # a. import data rats_weight = read_excel("E:/Malawi 2024/Summer School Malawi/Data/ratsweight.xlsx") # b. convert to factor rats_weight$Diet <- factor(rats_weight$Diet) # c. spaghetti plot spaghetti <- ggplot(data = rats_weight, aes(x = Time, y = weight, group = Rat)) spaghetti + geom_line() + facet_grid(. ~ Diet) # add this parameter for d. separated plots