# charger les librairies utiles
# install.packages("plyr")
library("plyr")
library("knitr")
knitr::opts_chunk$set(
fig.width = 5, fig.height = 5,
fig.path = 'figures/StressNet_',
fig.align = "center",
size = "tiny",
echo = TRUE, eval = TRUE,
warning = FALSE, message = FALSE,
results = TRUE, comment = "")
library("ggplot2")
library("magrittr")
library("ggpubr")
library("reshape2")
library("ggcorrplot")
library("factoextra")
library("kableExtra")
dir.base <- "http://ktakafka.free.fr/R/"
pathFile <- file.path(dir.base, 'anonym_meuf.csv')
File <- read.table(file = pathFile, header = TRUE, as.is = TRUE, sep = ";")
kable(head(File))
annee_descente | sexe | annee_naissance | derniere_descente | origine | age_1er_descent | X | ckzone |
---|---|---|---|---|---|---|---|
1972 | H | na | 1981 | na | na | NA | https://ckzone.org/showthread.php?tid=16771&page=66 |
1972 | H | na | na | na | na | NA | |
1977 | H | 1963 | na | paris | 14 | NA | |
1978 | H | na | na | na | na | NA | |
1978 | H | 1959 | na | na | 19 | NA | |
1980 | H | na | na | na | na | NA |
empile : permet de mettre une meme barre par annee avec une separation H/F
dodge : cote a cote H/F
geom_bar(stat=“count”) : permet de faire un comptage quand on ne presente pas un phenotype
Nindiv <- nrow(File)#362
print(Nindiv)
[1] 521
# Barplots empiles avec plusieurs groupes
g <- ggplot(File, aes(annee_descente, group = sexe, fill=sexe))
g <- g + geom_bar(stat="count")+
ylab("Annee de premiere descente")+
# geom_text(stat='count', aes(label=..count..),hjust=-1.6, color="white", size=3.5)+
scale_fill_manual(values = c("chartreuse","chartreuse4")) +
scale_x_discrete(limits=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020),
labels=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020)) +
coord_flip()+
labs(title="Barplots empiles H/F 1ere descente")
g
barplot of 1rst descent
# Barplot dodge
gg <- ggplot(File, aes(annee_descente, group = sexe, fill=sexe)) +
geom_bar(stat="count", position = position_dodge2(preserve = "single"))+
# geom_text(stat='count', aes(label=..count..), color="white", size=2)+
scale_fill_manual(values = c("chartreuse","chartreuse4")) +
scale_x_discrete(limits=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020),
labels=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020)) +
coord_flip()+
theme_minimal()+
labs(title="Barplots dodge H/F 1ere descente")
gg
barplot of 1rst descent
# Barplot FACET
ggg <- ggplot(File, aes(x= annee_descente, group=sexe)) +
geom_bar(aes(fill = factor(sexe)), stat="count") +
labs(y = "Percent", fill="Annee de premiere descente") +
facet_grid(~sexe) + coord_flip()+
scale_fill_manual(values = c("chartreuse","chartreuse4")) +
scale_x_discrete(limits=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020),
labels=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020)) +
labs(title="Barplots FACET H/F 1ere descente")
ggg
barplot of 1rst descent
# barplot pyramide inversee (avec le count h/f)
gggg <- ggplot(File) +
aes(x=annee_descente,fill=sexe) +
geom_bar(data = subset(File,sexe=="F"),aes(y=..count..*(-1))) +
geom_bar(data = subset(File,sexe=="H")) +
scale_fill_manual(values = c("chartreuse","chartreuse4")) +
scale_x_discrete(limits=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020),
labels=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020)) +
coord_flip()+
# geom_freqpoly(data = subset(File,sexe=="F",
# aes(y=..count..*(-1)),
# stat = "count", bins = 5))+
# geom_freqpoly(data = subset(File,sexe=="H",
# stat = "count", bins = 5))+
labs(title="barplot pyramide inversee H/F 1ere descente")
gggg
barplot of 1rst descent
L histogramme permet de regler le binwidth, la lagueur des intervalles pris (qui est de 1 an pour le barplot, la j ai regroupe par 4 ans par exemple)
nrow(File)#362
[1] 521
# histogrammes supperposes
ee <- ggplot(File, aes(File$annee_descente, fill = sexe)) +
geom_histogram( binwidth = 5)+
labs(title="histogram supperposes H/F 1ere descente")+
scale_fill_manual(values = c("chartreuse","chartreuse4")) +
coord_flip()
ee
histo of 1rst descent
# density de kernel plot
# eee <- ggplot(File, aes(x = File$annee_descente, fill = sexe, color=sexe)) +
# geom_density(aes(y=..density..), alpha = 0.1, color="black")+
# labs(title="density plot H/F 1ere descente")+
# scale_fill_manual(values = c("chartreuse","chartreuse4")) +
# scale_x_discrete(limits=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020),
# labels=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020))+
# coord_flip()
# eee
# split h et f
Filehm <- split.data.frame(File, File$sexe)
# View(Filehm[1])
## FEMMES
s.80f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente <= 1980,])
s.80_85f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 1981 & Filehm[[1]]$annee_descente <= 1985,])
s.85_90f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 1986 & Filehm[[1]]$annee_descente <= 1990,])
s.90_95f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 1991 & Filehm[[1]]$annee_descente <= 1995,])
s.95_00f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 1996 & Filehm[[1]]$annee_descente <= 2000,])
s.00_05f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 2001 & Filehm[[1]]$annee_descente <= 2005,])
s.05_10f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 2006 & Filehm[[1]]$annee_descente <= 2010,])
s.10_15f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 2011 & Filehm[[1]]$annee_descente <= 2015,])
s.15_20f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 2016 & Filehm[[1]]$annee_descente <= 2020,])
femme <- data.frame(
a1970_1980 = s.80f,
a1980_1985 = s.80_85f,
a1985_1990 = s.85_90f,
a1990_1995 = s.90_95f,
a1995_2000 = s.95_00f,
a2000_2005 = s.00_05f,
a2005_2010 = s.05_10f,
a2010_2015 = s.10_15f,
a2015_2020 = s.15_20f
)
### hommes
s.80h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente <= 1980,])
s.80_85h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 1981 & Filehm[[2]]$annee_descente <= 1985,])
s.85_90h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 1986 & Filehm[[2]]$annee_descente <= 1990,])
s.90_95h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 1991 & Filehm[[2]]$annee_descente <= 1995,])
s.95_00h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 1996 & Filehm[[2]]$annee_descente <= 2000,])
s.00_05h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 2001 & Filehm[[2]]$annee_descente <= 2005,])
s.05_10h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 2006 & Filehm[[2]]$annee_descente <= 2010,])
s.10_15h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 2011 & Filehm[[2]]$annee_descente <= 2015,])
s.15_20h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 2016 & Filehm[[2]]$annee_descente <= 2020,])
homme <- data.frame(
a1970_1980 = s.80h,
a1980_1985 = s.80_85h,
a1985_1990 = s.85_90h,
a1990_1995 = s.90_95h,
a1995_2000 = s.95_00h,
a2000_2005 = s.00_05h,
a2005_2010 = s.05_10h,
a2010_2015 = s.10_15h,
a2015_2020 = s.15_20h
)
classe5ans <- merge(t(femme), t(homme), by = "row.names")
names(classe5ans) <- c("classe", "femme", "homme" )
totfemme <- sum(classe5ans$femme)
tothomme <- sum(classe5ans$homme)
tot_an <- classe5ans$femme+classe5ans$homme
classe5ans$per_f <- (classe5ans$femme*100)/tot_an
classe5ans$per_h <- (classe5ans$homme*100)/tot_an
# View(classe5ans)
per <- classe5ans[,c(1,4,5)]
melt5an <- melt(per, id=c("classe"), measured = c(per_f, per_h) )
str(melt5an)
'data.frame': 18 obs. of 3 variables:
$ classe : 'AsIs' chr "a1970_1980" "a1980_1985" "a1985_1990" "a1990_1995" ...
$ variable: Factor w/ 2 levels "per_f","per_h": 1 1 1 1 1 1 1 1 1 2 ...
$ value : num 0 13.6 13.6 16.7 11.4 ...
# Barplot dodge
gg <- ggplot(melt5an, aes(y = value, x = as.vector(classe), fill = variable)) +
geom_bar(stat="identity")+
scale_fill_manual(values = c("chartreuse","chartreuse4")) +
theme_minimal()+
labs(title="Barplots empile H/F % classe de 5 annees")+
geom_hline(yintercept =50,colour="lightgrey", linetype="dashed")+
geom_hline(yintercept =75,colour="lightgrey", linetype="dashed")+
geom_hline(yintercept =25,colour="lightgrey", linetype="dashed")+
ylab("Pourcentage d'hommes/femmes par classe")+
xlab("classe")+
theme(axis.text.x = element_text(size=10, angle=90))+
geom_text(aes(y=103),label=c(tot_an, tot_an))+
annotate("text", hjust = 0, x = 0.5, y = 108, label = "Nombre d'individus echantillonnes par classe :")+
scale_y_discrete(limits=c(25,50,75,100),
labels=c(25,50,75,100))
gg
barplot of age by class
# View(File)
File$age <- 2019 - as.numeric(File$annee_naissance)
# barplot pyramide inversee (avec le count h/f)
gggg <- ggplot(File) +
aes(x=age,fill=sexe) +
geom_bar(data = subset(File,sexe=="F"),aes(y=..count..*(-1))) +
geom_bar(data = subset(File,sexe=="H")) +
scale_fill_manual(values = c("coral","coral4")) +
scale_x_discrete(limits=c(15,20,25,30,35,40,45,50,55,60,65,70),
labels=c(15,20,25,30,35,40,45,50,55,60,65,70)) +
coord_flip()+
labs(title="barplot pyramide age inversee H/F ")
gggg
barplot of age
On constate que sur les donnees d’age de premiere descente (y a pas beaucoup d’individus pour lesquel j’ai l info), les femmes decouvrent plus tard les ktas.
# View(File)
##boxplot summary genotype
my_comparisons <- list(c("H", "F"))
p <- ggplot(File, aes(x = sexe, y = as.numeric(age_1er_descent), fill = factor(sexe)))
p <- p+ geom_violin(width=.8)+
ggtitle("Age de premiere descente") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3)
p <- p + geom_boxplot(width=0.1, fill="white")
p + stat_compare_means( comparisons = my_comparisons, test = "kruskal.test",
label.y = c(150) +
theme(legend.position = "none"))
barplot of age decouverte