library

# charger les librairies utiles


# install.packages("plyr")

library("plyr")
library("knitr")

knitr::opts_chunk$set(
  fig.width = 5, fig.height = 5, 
  fig.path = 'figures/StressNet_',
  fig.align = "center", 
  size = "tiny", 
  echo = TRUE, eval = TRUE, 
  warning = FALSE, message = FALSE, 
  results = TRUE, comment = "")

library("ggplot2")
library("magrittr")
library("ggpubr")
library("reshape2")
library("ggcorrplot")
library("factoextra")
library("kableExtra")

importer fichier

dir.base <- "http://ktakafka.free.fr/R/"

pathFile <- file.path(dir.base, 'anonym_meuf.csv')

File <- read.table(file = pathFile, header = TRUE, as.is = TRUE, sep = ";")

kable(head(File))
annee_descente sexe annee_naissance derniere_descente origine age_1er_descent X ckzone
1972 H na 1981 na na NA https://ckzone.org/showthread.php?tid=16771&page=66
1972 H na na na na NA
1977 H 1963 na paris 14 NA
1978 H na na na na NA
1978 H 1959 na na 19 NA
1980 H na na na na NA

barplot count H/F

empile : permet de mettre une meme barre par annee avec une separation H/F
dodge : cote a cote H/F
geom_bar(stat=“count”) : permet de faire un comptage quand on ne presente pas un phenotype

Nindiv <- nrow(File)#362
print(Nindiv)
[1] 521
# Barplots empiles avec plusieurs groupes
g <- ggplot(File, aes(annee_descente, group = sexe, fill=sexe))
g <- g + geom_bar(stat="count")+
  ylab("Annee de premiere descente")+
  # geom_text(stat='count', aes(label=..count..),hjust=-1.6, color="white", size=3.5)+
  scale_fill_manual(values = c("chartreuse","chartreuse4")) + 
   scale_x_discrete(limits=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020),
                     labels=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020)) +
  coord_flip()+
  labs(title="Barplots empiles H/F 1ere descente")
g
barplot of 1rst descent

barplot of 1rst descent

# Barplot dodge
gg <- ggplot(File, aes(annee_descente, group = sexe, fill=sexe)) +
  geom_bar(stat="count", position = position_dodge2(preserve = "single"))+
  # geom_text(stat='count', aes(label=..count..), color="white", size=2)+
  scale_fill_manual(values = c("chartreuse","chartreuse4")) + 
  scale_x_discrete(limits=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020),
                     labels=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020)) +
  coord_flip()+
  theme_minimal()+
  labs(title="Barplots dodge H/F 1ere descente")
gg
barplot of 1rst descent

barplot of 1rst descent

# Barplot FACET
ggg <- ggplot(File, aes(x= annee_descente,  group=sexe)) + 
    geom_bar(aes(fill = factor(sexe)), stat="count") +
    labs(y = "Percent", fill="Annee de premiere descente") +
    facet_grid(~sexe) + coord_flip()+
  scale_fill_manual(values = c("chartreuse","chartreuse4")) + 
   scale_x_discrete(limits=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020),
                     labels=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020)) +
  labs(title="Barplots FACET H/F 1ere descente")
ggg
barplot of 1rst descent

barplot of 1rst descent

# barplot pyramide inversee (avec le count h/f)
gggg <-  ggplot(File) +
    aes(x=annee_descente,fill=sexe) +
    geom_bar(data = subset(File,sexe=="F"),aes(y=..count..*(-1))) +
    geom_bar(data = subset(File,sexe=="H")) +
    scale_fill_manual(values = c("chartreuse","chartreuse4")) + 
    scale_x_discrete(limits=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020),
                     labels=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020)) +
    coord_flip()+
   # geom_freqpoly(data = subset(File,sexe=="F", 
   #                             aes(y=..count..*(-1)), 
   #                             stat = "count", bins = 5))+
   # geom_freqpoly(data = subset(File,sexe=="H", 
   #                             stat = "count", bins = 5))+

   labs(title="barplot pyramide inversee H/F 1ere descente")
gggg
barplot of 1rst descent

barplot of 1rst descent

histogram count H/F

L histogramme permet de regler le binwidth, la lagueur des intervalles pris (qui est de 1 an pour le barplot, la j ai regroupe par 4 ans par exemple)

nrow(File)#362
[1] 521
# histogrammes supperposes
ee <- ggplot(File, aes(File$annee_descente, fill = sexe)) +
  geom_histogram( binwidth = 5)+   
  labs(title="histogram supperposes H/F 1ere descente")+
  scale_fill_manual(values = c("chartreuse","chartreuse4")) + 
 coord_flip()
ee
histo of 1rst descent

histo of 1rst descent

# density de kernel plot
# eee <- ggplot(File, aes(x = File$annee_descente, fill = sexe, color=sexe)) +
#   geom_density(aes(y=..density..), alpha = 0.1, color="black")+
#   labs(title="density plot H/F 1ere descente")+
#   scale_fill_manual(values = c("chartreuse","chartreuse4")) +
#   scale_x_discrete(limits=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020),
#                   labels=c(1970,1975,1980,1985,1990,1995,2000,2005,2010,2015,2020))+
#   coord_flip()
# eee

definir classe de 5 ans

# split h et f
Filehm <- split.data.frame(File, File$sexe)
# View(Filehm[1])


## FEMMES
s.80f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente <= 1980,])

s.80_85f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 1981 & Filehm[[1]]$annee_descente <= 1985,])

s.85_90f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 1986 & Filehm[[1]]$annee_descente <= 1990,])

s.90_95f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 1991 & Filehm[[1]]$annee_descente <= 1995,])

s.95_00f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 1996 & Filehm[[1]]$annee_descente <= 2000,])

s.00_05f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 2001 & Filehm[[1]]$annee_descente <= 2005,])

s.05_10f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 2006 & Filehm[[1]]$annee_descente <= 2010,])

s.10_15f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 2011 & Filehm[[1]]$annee_descente <= 2015,])

s.15_20f <- nrow(Filehm[[1]][Filehm[[1]]$annee_descente >= 2016 & Filehm[[1]]$annee_descente <= 2020,])


femme <- data.frame(
  a1970_1980 = s.80f,
  a1980_1985 = s.80_85f,
  a1985_1990 = s.85_90f,
  a1990_1995 = s.90_95f,
  a1995_2000 = s.95_00f,
  a2000_2005 = s.00_05f,
  a2005_2010 = s.05_10f,
  a2010_2015 = s.10_15f,
  a2015_2020 = s.15_20f
  )

### hommes
s.80h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente <= 1980,])

s.80_85h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 1981 & Filehm[[2]]$annee_descente <= 1985,])

s.85_90h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 1986 & Filehm[[2]]$annee_descente <= 1990,])

s.90_95h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 1991 & Filehm[[2]]$annee_descente <= 1995,])

s.95_00h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 1996 & Filehm[[2]]$annee_descente <= 2000,])

s.00_05h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 2001 & Filehm[[2]]$annee_descente <= 2005,])

s.05_10h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 2006 & Filehm[[2]]$annee_descente <= 2010,])

s.10_15h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 2011 & Filehm[[2]]$annee_descente <= 2015,])

s.15_20h <- nrow(Filehm[[2]][Filehm[[2]]$annee_descente >= 2016 & Filehm[[2]]$annee_descente <= 2020,])


homme <- data.frame(
  a1970_1980 = s.80h,
  a1980_1985 = s.80_85h,
  a1985_1990 = s.85_90h,
  a1990_1995 = s.90_95h,
  a1995_2000 = s.95_00h,
  a2000_2005 = s.00_05h,
  a2005_2010 = s.05_10h,
  a2010_2015 = s.10_15h,
  a2015_2020 = s.15_20h
  )


classe5ans <- merge(t(femme), t(homme), by = "row.names")
names(classe5ans) <- c("classe", "femme", "homme" )

totfemme <- sum(classe5ans$femme)
tothomme <- sum(classe5ans$homme)

tot_an <- classe5ans$femme+classe5ans$homme


classe5ans$per_f <- (classe5ans$femme*100)/tot_an

classe5ans$per_h <- (classe5ans$homme*100)/tot_an



# View(classe5ans)

per <- classe5ans[,c(1,4,5)]

melt5an <- melt(per, id=c("classe"), measured = c(per_f, per_h) )
str(melt5an)
'data.frame':   18 obs. of  3 variables:
 $ classe  : 'AsIs' chr  "a1970_1980" "a1980_1985" "a1985_1990" "a1990_1995" ...
 $ variable: Factor w/ 2 levels "per_f","per_h": 1 1 1 1 1 1 1 1 1 2 ...
 $ value   : num  0 13.6 13.6 16.7 11.4 ...
# Barplot dodge
gg <- ggplot(melt5an, aes(y = value, x = as.vector(classe), fill = variable)) +
  geom_bar(stat="identity")+
  scale_fill_manual(values = c("chartreuse","chartreuse4")) + 
  theme_minimal()+
  labs(title="Barplots empile H/F % classe de 5 annees")+
  geom_hline(yintercept =50,colour="lightgrey", linetype="dashed")+
    geom_hline(yintercept =75,colour="lightgrey", linetype="dashed")+
      geom_hline(yintercept =25,colour="lightgrey", linetype="dashed")+
  ylab("Pourcentage d'hommes/femmes par classe")+
  xlab("classe")+
  theme(axis.text.x = element_text(size=10, angle=90))+
  geom_text(aes(y=103),label=c(tot_an, tot_an))+
  annotate("text", hjust = 0, x = 0.5, y = 108, label = "Nombre d'individus echantillonnes par classe :")+
 scale_y_discrete(limits=c(25,50,75,100),
                     labels=c(25,50,75,100)) 
gg
barplot of age by class

barplot of age by class

barplot age H/F

# View(File)

File$age <- 2019 - as.numeric(File$annee_naissance)



# barplot pyramide inversee (avec le count h/f)
gggg <-  ggplot(File) +
    aes(x=age,fill=sexe) +
    geom_bar(data = subset(File,sexe=="F"),aes(y=..count..*(-1))) +
    geom_bar(data = subset(File,sexe=="H")) +
    scale_fill_manual(values = c("coral","coral4")) + 
    scale_x_discrete(limits=c(15,20,25,30,35,40,45,50,55,60,65,70),
                     labels=c(15,20,25,30,35,40,45,50,55,60,65,70)) +
    coord_flip()+

   labs(title="barplot pyramide age inversee H/F ")
gggg
barplot of age

barplot of age

violin age

On constate que sur les donnees d’age de premiere descente (y a pas beaucoup d’individus pour lesquel j’ai l info), les femmes decouvrent plus tard les ktas.

# View(File)

##boxplot summary genotype
my_comparisons <- list(c("H", "F"))

p <- ggplot(File, aes(x = sexe, y = as.numeric(age_1er_descent), fill = factor(sexe)))

p <- p+ geom_violin(width=.8)+ 
  ggtitle("Age de premiere descente") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3)

p <- p + geom_boxplot(width=0.1, fill="white")
p + stat_compare_means( comparisons = my_comparisons, test = "kruskal.test", 
                        label.y = c(150) +
theme(legend.position = "none"))
barplot of age decouverte

barplot of age decouverte