Ulm University

Graficos

Utilidad de los graficos:

Graficos en R

R permite crear una gran variedad de graficos:

{ graphics } { ggplot2 } { lattice }

Paquete graphics

Paquete ggplot2

Paquete graphics

Paquete ggplot2

Paquete graphics

Paquete ggplot2

Material extra para aprender sobre graphics:

ggplot2

Trabaja en capas

Los comandos basicos son:

Entendamos ggplot2 con ejemplos!

head(airquality,5)
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5

Graficos de dispersion (Scatterplot)

Cual es la relacion entre radiacion solar y temperatura?

library(ggplot2)
qplot(x=Solar.R, y=Temp, data=airquality)
## Warning: Removed 7 rows containing missing values (geom_point).

geom_point

ggplot(aes(x = Solar.R, y = Temp), data=airquality) + 
  geom_point()
## Warning: Removed 7 rows containing missing values (geom_point).

Tipos de geom_:

http://docs.ggplot2.org/current/

Modificar graficos

Etiquetas del grafico

ggplot(data=airquality, aes(x=Solar.R, y=Temp)) + 
      geom_point() + 
      labs(title = "Titulo", x = "xlab", y = "ylab")
## Warning: Removed 7 rows containing missing values (geom_point).

ggplot(data=airquality, aes(x=Solar.R, y=Temp)) + 
        geom_point() + 
        labs(title = "Radiacion Solar vs Temperatura", x = "Radiacion Solar", y = "Temperatura")
## Warning: Removed 7 rows containing missing values (geom_point).

Otra funcion para modificar el titulo ggtitle()

ggplot(data=airquality, aes(x=Solar.R, y=Temp)) + 
        geom_point() + 
        labs(x = "Radiacion Solar", y = "Temperatura") +
  ggtitle("Radiacion Solar vs Temperatura")
## Warning: Removed 7 rows containing missing values (geom_point).

Si el titulo es muy largo se puede dividir en multiples lineas con

ggplot(data=airquality, aes(x=Solar.R, y=Temp)) + 
        geom_point() + 
        labs(x = "Radiacion Solar", y = "Temperatura") +
  ggtitle("Relacion de la radicion solar con \n la temperatura ambiental")
## Warning: Removed 7 rows containing missing values (geom_point).

Reducir el espaciado y utilizar negrita en el titulo

ggplot(data=airquality, aes(x=Solar.R, y=Temp)) + 
        geom_point() + 
        labs(x = "Radiacion Solar", y = "Temperatura") +
  ggtitle("Relacion de la radicion solar con \n la temperatura ambiental")+
  theme(plot.title = element_text(lineheight=.8, face="bold"))
## Warning: Removed 7 rows containing missing values (geom_point).

Tamano

ggplot(data=airquality, aes(x=Solar.R, y=Temp)) + 
  geom_point(size=5)
## Warning: Removed 7 rows containing missing values (geom_point).

Color

ggplot(data=airquality, aes(x=Solar.R, y=Temp)) + 
  geom_point(size=5,col="blue")
## Warning: Removed 7 rows containing missing values (geom_point).

Forma

ggplot(data=airquality, aes(x=Solar.R, y=Temp)) + 
  geom_point(size=5,col="blue",pch=4)
## Warning: Removed 7 rows containing missing values (geom_point).

Modificar el color por algun factor

ggplot(data=airquality, aes(x=Solar.R, y=Temp, col=factor(Month))) + 
  geom_point()
## Warning: Removed 7 rows containing missing values (geom_point).

Modificar el color y la forma por algun factor

ggplot (data=airquality, aes(x=Solar.R, y=Temp, col=factor(Month), shape=factor(Month))) + 
  geom_point() 
## Warning: Removed 7 rows containing missing values (geom_point).

Los patrones de los colores se modifican con:

scale_fill_brewer ( ) : para barras y boxplots

scale_colour_brewer ( ) : para lineas y puntos

ggplot (data=airquality, aes(x=Solar.R, y=Temp, col=factor(Month))) + 
  geom_point() + 
  scale_colour_brewer(palette="Set1")
## Warning: Removed 7 rows containing missing values (geom_point).

Codigos de palette

alt text alt text

Tambien se puede modificar el fondo o tema del grafico con:

Modificar el fondo o tema del grafico con: theme_ . ( )

ggplot(airquality, aes(Solar.R, Temp, col=factor(Month))) + 
  geom_point() + 
  scale_colour_brewer(palette="Set1") + 
  theme_classic() #theme_bw() 
## Warning: Removed 7 rows containing missing values (geom_point).

Grafico de un modelo sencillo

geom_smooth()

Tipos de modelos: lm, glm, gam

ggplot(airquality, aes(Solar.R, Temp)) + 
  geom_point(size=3) + 
  geom_smooth(method="lm", col="red")
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

Mostrar la relacion lineal por mes (factor)

ggplot(airquality,aes(Solar.R, Temp, colour=factor(Month))) +
      geom_point(size=3) + 
      geom_smooth(method = "lm") 
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

Visualmente no es muy facil de entender

Graficar meses por separado un solo color

facet_wrap()

ggplot(airquality, aes(Solar.R, Temp)) +
      geom_point(size=2) + 
      geom_smooth (method = "lm") + 
      facet_wrap(~Month)
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

facet_grid()

ggplot(airquality, aes(Solar.R, Temp, col = factor(Month))) +
      geom_point(size=2) + 
      geom_smooth (method = "lm") + 
       facet_grid (~Month)
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

Grafico de barras

geom=“bar”

qplot(factor(cyl), data=mtcars, geom="bar")

Como seria la funcion con ggplot: geom_bar( )

ggplot(mtcars, aes(factor(cyl))) + 
  geom_bar()

Modificar graficos de barras con:

fill

qplot(factor(cyl), data=mtcars, fill=factor(cyl), geom="bar")

Intentenlo con ggplot

ggplot(mtcars, aes(factor(cyl), fill=factor(cyl))) + 
  geom_bar(col="black")

Modificar graficos de barras con:

width

ggplot(mtcars, aes(factor(cyl))) + 
  geom_bar(width=.2)

coord_flip ( )

ggplot(mtcars, aes(factor(cyl))) + 
  geom_bar() + 
  coord_flip()

Graficos de barra apilados (stack bar charts)

qplot(factor(cyl), data=mtcars, geom="bar", fill=factor(gear))

ggplot(mtcars, aes(factor(cyl), fill=factor(gear))) +
  geom_bar()

Grafico de barras mas estetico

ggplot(mtcars,aes(factor(cyl), fill=factor(gear))) +
  geom_bar() +
  labs(title="Grafico de barras", y="Cantidad de casos", x="# de cilindros") + 
  scale_fill_brewer(palette="Dark2") # + scale_fill_grey()

Graficos de barras cuando el eje y tiene valores (i.e. media)

Calcular la media de mpg para cada nivel de cyl y generar nueva base de datos:

str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: Factor w/ 3 levels "3gears","4gears",..: 2 2 2 1 1 1 1 2 2 2 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
library(plyr)
summary <- ddply(mtcars, "cyl", summarise, mmpg = mean(mpg))

Utilizar stat=“identity”

ggplot(summary, aes(x = factor(cyl), y = mmpg)) + geom_bar(stat = "identity")

Como agregar barras de error al grafico anterior?

alt text

alt text

Calcular EE y generar base de datos

summary <- ddply(mtcars, "cyl", summarise, mmpg = mean(mpg), se = sd(mpg)/sqrt(length(mpg)))

View(summary)

Graficar

ggplot(data=summary, aes(x = factor(cyl), y = mmpg)) + 
  geom_bar(stat = "identity", width =.2) +
  geom_errorbar(width=.1, aes(ymin=mmpg-se, ymax=mmpg+se))

Grafico de barra con puntos

ggplot(summary, aes(x = factor(cyl), y = mmpg, colour=factor(cyl))) + 
  geom_point() + 
  geom_errorbar(width=.1, aes(ymin=mmpg-se, ymax=mmpg+se))

Otro geom util para obtener promedios e IC95% stat_summary

ggplot(mtcars, aes(x = factor(cyl), y = mpg))+
  stat_summary(fun.y = mean, geom = "point") +
  stat_summary(fun.data = 'mean_cl_normal', geom = "errorbar", width=0.1)

#Graficas de Caja

alt text

alt text

Brinda informacion sobre valores minimo y maximo, los cuartiles Q1, Q2 o mediana y Q3. Tambien sobre la presencia de outliers y la simetria de la distribucion.

Leer sobre uso de boxplots:
http://blogs.nature.com/methagora/2014/01/bring-on-the-box-plots-boxplotr.html

Funcion qplot

qplot(factor(cyl), mpg, data = mtcars, geom = "boxplot")

Funcion ggplot

ggplot(mtcars,aes(factor(cyl),mpg)) + 
      geom_boxplot()

Agregar observaciones con geom_jitter( )

ggplot(mtcars,aes(factor(cyl),mpg)) + 
  geom_boxplot() + 
  geom_jitter()

Agregar color distinto a cada factor de cyl

ggplot(mtcars,aes(factor(cyl), mpg)) + 
      geom_boxplot(aes(fill = factor(cyl)))

Agregar otra variable factor (gear)

Igual como se realizo con graph bars

ggplot(mtcars,aes(factor(cyl), mpg, fill=factor(gear))) + 
      geom_boxplot()

Modificar leyendas

Cambiar el titulo de la leyenda

scale_fill_hue

ggplot(mtcars,aes(factor(cyl), mpg, fill=factor(gear))) + 
      geom_boxplot() + 
  scale_fill_hue("Gear")

Cambiar etiquetas de la leyenda

ggplot(mtcars,aes(factor(cyl), mpg, fill=factor(gear))) + 
      geom_boxplot() + 
  scale_fill_hue(name="Gear", labels=c("Three","Four","Five")) + 
  ylab("Millas")

Posicion de leyendas

theme (legend.position=“top”, “bottom”, “left”, “right”)

ggplot(mtcars,aes(factor(cyl), mpg, fill=factor(gear))) + 
      geom_boxplot() + 
  scale_fill_hue(name="Gear", labels=c("Three","Four","Five")) +
  theme(legend.position="")

Si no se desea mostrar la leyenda: theme (legend.position = " " )

Mostrar leyenda dentro de grafico

ggplot(mtcars,aes(factor(cyl), mpg, fill=factor(gear))) + 
      geom_boxplot() + 
  scale_fill_hue(name="Gear", labels=c("Three","Four","Five")) +
  theme(legend.position=c(.9,.9))

La posicion va desde x,y = 0,0 (abajo-izquierda) a x,y = 1,1 (arriba-derecha)

Modificar fondo de graficos

ggplot2 tiene diferentes opciones de fondos, theme_gray ( ) = default

plot <- ggplot(mtcars, aes(factor(cyl), mpg)) + 
  geom_boxplot(aes(fill=factor(am))) + 
  scale_fill_hue(name="Transmission", labels=c("Automatic","Manual"))

Blanco y negro

plot + theme_bw()

Listo para articulo cientifico!

plot + theme_classic()

Modificar Tamano de letra de graficos

Tamano de letra

plot + theme_classic(base_size = 20)

http://www.cookbook-r.com/Graphs/Fonts/

Histogramas

library(MASS)
head(birthwt)
##    low age lwt race smoke ptl ht ui ftv  bwt
## 85   0  19 182    2     0   0  0  1   0 2523
## 86   0  33 155    3     0   0  0  0   3 2551
## 87   0  20 105    1     1   0  0  0   1 2557
## 88   0  21 108    1     1   0  0  1   2 2594
## 89   0  18 107    1     1   0  0  1   0 2600
## 91   0  21 124    3     0   0  0  0   0 2622

geom = “histogram”

qplot(age, data = birthwt, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

geom_histogram ( )

ggplot(birthwt, aes(age)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stat_bin: binwidth defaulted to range/30. Use ‘binwidth = x’ to adjust this.

binwidth = x

ggplot(birthwt,aes(age)) + 
      geom_histogram(binwidth=5, fill="white",col="black") 

Mostrar la distribucion de edades pero en fumadoras y no fumadoras

Dos graficas utilizando facet_grid( ) o facet_wrap()

ggplot(birthwt,aes(age)) + 
      geom_histogram(binwidth=5, fill="white",col="black")+
      facet_grid(~smoke)

Una sola grafica

ggplot(birthwt,aes(age, fill=factor(smoke))) + 
      geom_histogram(binwidth=5)

Modificar la transparencia de los colores

alpha = x

ggplot(birthwt,aes(age, fill=factor(smoke))) + 
    geom_histogram(col="black", binwidth=5,
    alpha=0.5)

Guardar grafico con buena calidad de impresion

ggsave ( )

library(ggplot2)
plot2 <- ggplot(birthwt,aes(age,fill=factor(smoke))) + 
  geom_histogram(binwidth=5) +
  theme_classic()
plot2

ggsave("plot.jpg", plot2, width = 10, height = 5, units = "cm", dpi = 400)

Unir graficos y guardarlo con buena calidad de impresion

plot_grid ( )

require(grid)
## Loading required package: grid
require(gridExtra)
## Loading required package: gridExtra
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
figure <- plot_grid(plot, plot2, labels = c("A", "B")) 

ggsave("Figure.jpg", figure, width = 200, height = 110, units="mm",dpi = 600)

Mas informacion sobre paquete cowplot:

https://cran.r-project.org/web/packages/cowplot/vignettes/introduction.html

Material extra para continuar aprendiendo ggplot2: