Utilidad de los graficos:
R permite crear una gran variedad de graficos:
Paquete graphics
Paquete ggplot2
Material extra para aprender sobre graphics:
Trabaja en capas
Los comandos basicos son:
Entendamos ggplot2 con ejemplos!
head(airquality,5)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
Cual es la relacion entre radiacion solar y temperatura?
library(ggplot2)
qplot(x=Solar.R, y=Temp, data=airquality)
## Warning: Removed 7 rows containing missing values (geom_point).
geom_point
ggplot(aes(x = Solar.R, y = Temp), data=airquality) +
geom_point()
## Warning: Removed 7 rows containing missing values (geom_point).
Etiquetas del grafico
ggplot(data=airquality, aes(x=Solar.R, y=Temp)) +
geom_point() +
labs(title = "Titulo", x = "xlab", y = "ylab")
## Warning: Removed 7 rows containing missing values (geom_point).
ggplot(data=airquality, aes(x=Solar.R, y=Temp)) +
geom_point() +
labs(title = "Radiacion Solar vs Temperatura", x = "Radiacion Solar", y = "Temperatura")
## Warning: Removed 7 rows containing missing values (geom_point).
Otra funcion para modificar el titulo ggtitle()
ggplot(data=airquality, aes(x=Solar.R, y=Temp)) +
geom_point() +
labs(x = "Radiacion Solar", y = "Temperatura") +
ggtitle("Radiacion Solar vs Temperatura")
## Warning: Removed 7 rows containing missing values (geom_point).
Si el titulo es muy largo se puede dividir en multiples lineas con
ggplot(data=airquality, aes(x=Solar.R, y=Temp)) +
geom_point() +
labs(x = "Radiacion Solar", y = "Temperatura") +
ggtitle("Relacion de la radicion solar con \n la temperatura ambiental")
## Warning: Removed 7 rows containing missing values (geom_point).
Reducir el espaciado y utilizar negrita en el titulo
ggplot(data=airquality, aes(x=Solar.R, y=Temp)) +
geom_point() +
labs(x = "Radiacion Solar", y = "Temperatura") +
ggtitle("Relacion de la radicion solar con \n la temperatura ambiental")+
theme(plot.title = element_text(lineheight=.8, face="bold"))
## Warning: Removed 7 rows containing missing values (geom_point).
Tamano
ggplot(data=airquality, aes(x=Solar.R, y=Temp)) +
geom_point(size=5)
## Warning: Removed 7 rows containing missing values (geom_point).
Color
ggplot(data=airquality, aes(x=Solar.R, y=Temp)) +
geom_point(size=5,col="blue")
## Warning: Removed 7 rows containing missing values (geom_point).
Forma
ggplot(data=airquality, aes(x=Solar.R, y=Temp)) +
geom_point(size=5,col="blue",pch=4)
## Warning: Removed 7 rows containing missing values (geom_point).
Modificar el color por algun factor
ggplot(data=airquality, aes(x=Solar.R, y=Temp, col=factor(Month))) +
geom_point()
## Warning: Removed 7 rows containing missing values (geom_point).
Modificar el color y la forma por algun factor
ggplot (data=airquality, aes(x=Solar.R, y=Temp, col=factor(Month), shape=factor(Month))) +
geom_point()
## Warning: Removed 7 rows containing missing values (geom_point).
Los patrones de los colores se modifican con:
scale_fill_brewer ( ) : para barras y boxplots
scale_colour_brewer ( ) : para lineas y puntos
ggplot (data=airquality, aes(x=Solar.R, y=Temp, col=factor(Month))) +
geom_point() +
scale_colour_brewer(palette="Set1")
## Warning: Removed 7 rows containing missing values (geom_point).
Codigos de palette
Tambien se puede modificar el fondo o tema del grafico con:
Modificar el fondo o tema del grafico con: theme_ . ( )
ggplot(airquality, aes(Solar.R, Temp, col=factor(Month))) +
geom_point() +
scale_colour_brewer(palette="Set1") +
theme_classic() #theme_bw()
## Warning: Removed 7 rows containing missing values (geom_point).
geom_smooth()
Tipos de modelos: lm, glm, gam
ggplot(airquality, aes(Solar.R, Temp)) +
geom_point(size=3) +
geom_smooth(method="lm", col="red")
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).
Mostrar la relacion lineal por mes (factor)
ggplot(airquality,aes(Solar.R, Temp, colour=factor(Month))) +
geom_point(size=3) +
geom_smooth(method = "lm")
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).
Visualmente no es muy facil de entender
Graficar meses por separado un solo color
facet_wrap()
ggplot(airquality, aes(Solar.R, Temp)) +
geom_point(size=2) +
geom_smooth (method = "lm") +
facet_wrap(~Month)
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).
facet_grid()
ggplot(airquality, aes(Solar.R, Temp, col = factor(Month))) +
geom_point(size=2) +
geom_smooth (method = "lm") +
facet_grid (~Month)
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).
geom=“bar”
qplot(factor(cyl), data=mtcars, geom="bar")
Como seria la funcion con ggplot: geom_bar( )
ggplot(mtcars, aes(factor(cyl))) +
geom_bar()
Modificar graficos de barras con:
fill
qplot(factor(cyl), data=mtcars, fill=factor(cyl), geom="bar")
Intentenlo con ggplot
ggplot(mtcars, aes(factor(cyl), fill=factor(cyl))) +
geom_bar(col="black")
Modificar graficos de barras con:
width
ggplot(mtcars, aes(factor(cyl))) +
geom_bar(width=.2)
coord_flip ( )
ggplot(mtcars, aes(factor(cyl))) +
geom_bar() +
coord_flip()
qplot(factor(cyl), data=mtcars, geom="bar", fill=factor(gear))
ggplot(mtcars, aes(factor(cyl), fill=factor(gear))) +
geom_bar()
ggplot(mtcars,aes(factor(cyl), fill=factor(gear))) +
geom_bar() +
labs(title="Grafico de barras", y="Cantidad de casos", x="# de cilindros") +
scale_fill_brewer(palette="Dark2") # + scale_fill_grey()
Calcular la media de mpg para cada nivel de cyl y generar nueva base de datos:
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: Factor w/ 3 levels "3gears","4gears",..: 2 2 2 1 1 1 1 2 2 2 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
library(plyr)
summary <- ddply(mtcars, "cyl", summarise, mmpg = mean(mpg))
Utilizar stat=“identity”
ggplot(summary, aes(x = factor(cyl), y = mmpg)) + geom_bar(stat = "identity")
alt text
Calcular EE y generar base de datos
summary <- ddply(mtcars, "cyl", summarise, mmpg = mean(mpg), se = sd(mpg)/sqrt(length(mpg)))
View(summary)
Graficar
ggplot(data=summary, aes(x = factor(cyl), y = mmpg)) +
geom_bar(stat = "identity", width =.2) +
geom_errorbar(width=.1, aes(ymin=mmpg-se, ymax=mmpg+se))
ggplot(summary, aes(x = factor(cyl), y = mmpg, colour=factor(cyl))) +
geom_point() +
geom_errorbar(width=.1, aes(ymin=mmpg-se, ymax=mmpg+se))
Otro geom util para obtener promedios e IC95% stat_summary
ggplot(mtcars, aes(x = factor(cyl), y = mpg))+
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.data = 'mean_cl_normal', geom = "errorbar", width=0.1)
alt text
Brinda informacion sobre valores minimo y maximo, los cuartiles Q1, Q2 o mediana y Q3. Tambien sobre la presencia de outliers y la simetria de la distribucion.
Leer sobre uso de boxplots:
http://blogs.nature.com/methagora/2014/01/bring-on-the-box-plots-boxplotr.html
Funcion qplot
qplot(factor(cyl), mpg, data = mtcars, geom = "boxplot")
Funcion ggplot
ggplot(mtcars,aes(factor(cyl),mpg)) +
geom_boxplot()
Agregar observaciones con geom_jitter( )
ggplot(mtcars,aes(factor(cyl),mpg)) +
geom_boxplot() +
geom_jitter()
Agregar color distinto a cada factor de cyl
ggplot(mtcars,aes(factor(cyl), mpg)) +
geom_boxplot(aes(fill = factor(cyl)))
Agregar otra variable factor (gear)
Igual como se realizo con graph bars
ggplot(mtcars,aes(factor(cyl), mpg, fill=factor(gear))) +
geom_boxplot()
Cambiar el titulo de la leyenda
scale_fill_hue
ggplot(mtcars,aes(factor(cyl), mpg, fill=factor(gear))) +
geom_boxplot() +
scale_fill_hue("Gear")
Cambiar etiquetas de la leyenda
ggplot(mtcars,aes(factor(cyl), mpg, fill=factor(gear))) +
geom_boxplot() +
scale_fill_hue(name="Gear", labels=c("Three","Four","Five")) +
ylab("Millas")
Posicion de leyendas
theme (legend.position=“top”, “bottom”, “left”, “right”)
ggplot(mtcars,aes(factor(cyl), mpg, fill=factor(gear))) +
geom_boxplot() +
scale_fill_hue(name="Gear", labels=c("Three","Four","Five")) +
theme(legend.position="")
Si no se desea mostrar la leyenda: theme (legend.position = " " )
Mostrar leyenda dentro de grafico
ggplot(mtcars,aes(factor(cyl), mpg, fill=factor(gear))) +
geom_boxplot() +
scale_fill_hue(name="Gear", labels=c("Three","Four","Five")) +
theme(legend.position=c(.9,.9))
La posicion va desde x,y = 0,0 (abajo-izquierda) a x,y = 1,1 (arriba-derecha)
ggplot2 tiene diferentes opciones de fondos, theme_gray ( ) = default
plot <- ggplot(mtcars, aes(factor(cyl), mpg)) +
geom_boxplot(aes(fill=factor(am))) +
scale_fill_hue(name="Transmission", labels=c("Automatic","Manual"))
plot + theme_bw()
plot + theme_classic()
Tamano de letra
plot + theme_classic(base_size = 20)
library(MASS)
head(birthwt)
## low age lwt race smoke ptl ht ui ftv bwt
## 85 0 19 182 2 0 0 0 1 0 2523
## 86 0 33 155 3 0 0 0 0 3 2551
## 87 0 20 105 1 1 0 0 0 1 2557
## 88 0 21 108 1 1 0 0 1 2 2594
## 89 0 18 107 1 1 0 0 1 0 2600
## 91 0 21 124 3 0 0 0 0 0 2622
geom = “histogram”
qplot(age, data = birthwt, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
geom_histogram ( )
ggplot(birthwt, aes(age)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stat_bin: binwidth defaulted to range/30. Use ‘binwidth = x’ to adjust this.
binwidth = x
ggplot(birthwt,aes(age)) +
geom_histogram(binwidth=5, fill="white",col="black")
Mostrar la distribucion de edades pero en fumadoras y no fumadoras
Dos graficas utilizando facet_grid( ) o facet_wrap()
ggplot(birthwt,aes(age)) +
geom_histogram(binwidth=5, fill="white",col="black")+
facet_grid(~smoke)
Una sola grafica
ggplot(birthwt,aes(age, fill=factor(smoke))) +
geom_histogram(binwidth=5)
alpha = x
ggplot(birthwt,aes(age, fill=factor(smoke))) +
geom_histogram(col="black", binwidth=5,
alpha=0.5)
ggsave ( )
library(ggplot2)
plot2 <- ggplot(birthwt,aes(age,fill=factor(smoke))) +
geom_histogram(binwidth=5) +
theme_classic()
plot2
ggsave("plot.jpg", plot2, width = 10, height = 5, units = "cm", dpi = 400)
plot_grid ( )
require(grid)
## Loading required package: grid
require(gridExtra)
## Loading required package: gridExtra
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
figure <- plot_grid(plot, plot2, labels = c("A", "B"))
ggsave("Figure.jpg", figure, width = 200, height = 110, units="mm",dpi = 600)
Mas informacion sobre paquete cowplot:
https://cran.r-project.org/web/packages/cowplot/vignettes/introduction.html