diff --git a/docs/articles/index.html b/docs/articles/index.html index 12bc309..f762ff1 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -23,14 +23,17 @@ - + + - + + + @@ -49,10 +52,34 @@ @@ -58,7 +82,7 @@

2017-04-12

- Number formatting

+Number formatting

format_num formats numbers according to a locale (either French or English). format_num() is just a wrapper for the base-R format() function. Therefore, it is possible to use standard arguments of the format() functions such as scientifif = TRUE/FALSE.

library(tricky)
 format_num(x = 1233.123, locale = "fr")
@@ -75,7 +99,8 @@

@@ -65,17 +89,17 @@

2017-04-12

library(knitr)

- Find keys

+Find keys
  • find_keys() looks at a table and returns a data frame with the name of all available keys.
-
read_csv(
+
read_csv(
   system.file(
     "extdata", 
     "table_deputes.csv", 
     package = "tricky")
-  ) %>% 
+  ) %>% 
   find_keys() 
## Parsed with column specification:
 ## cols(
@@ -105,18 +129,18 @@ 

- Standardize table names

+Standardize table names

Many datasets have non standard variable names including accents (ie é, è, à, …), spaces and so on.

The French IT Dashboard is an example of a data set with column names in natural language :

-
read_excel(
+
read_excel(
   path = system.file(
     "extdata", 
     "panorama.xlsx", 
     package = "tricky"
     )
-  ) %>% 
+  ) %>% 
   names()
-
##  [1] "Ministère porteur "                           
+
##  [1] "Ministère porteur"                            
 ##  [2] "Ministère nom complet"                        
 ##  [3] "Nom du projet"                                
 ##  [4] "Projet interministériel"                      
@@ -135,16 +159,16 @@ 

  • set_standard_names takes a table and returns the same table with standardized names
  • -
    read_excel(
    +
    read_excel(
       path = system.file(
         "extdata", 
         "panorama.xlsx", 
         package = "tricky"
         )
    -  ) %>% 
    -  set_standard_names() %>% 
    -  glimpse()
    -
    ## Observations: 293
    +  ) %>% 
    +  set_standard_names() %>% 
    +  glimpse()
    +
    ## Observations: 57
     ## Variables: 15
     ## $ ministere_porteur                             <chr> "SPM", "SPM", "S...
     ## $ ministere_nom_complet                         <chr> "Premier ministr...
    @@ -152,76 +176,64 @@ 

    ## $ projet_interministeriel <chr> "Oui", "Oui", "O... ## $ description_et_objectifs_du_projet <chr> "FranceConnect o... ## $ en_savoir_plus_sur_le_projet <chr> "https://doc.int... -## $ debut <dbl> 41456, 40725, 41... +## $ debut <dttm> 2013-07-01, 201... ## $ duree_previsionnelle_en_annee <chr> "3.7506849315068... ## $ phase_du_projet_en_cours <chr> "Déploiement", "... ## $ cout_estime <chr> "7", "52", "26.3... ## $ cout_estime_par_tranche <chr> "entre 5 et 9 M€... ## $ zone_fonctionnelle <chr> "Gestion et cont... -## $ financement_programmes <chr> "Coordination du... +## $ financement_programme_s_ <chr> "Coordination du... ## $ lien_vers_les_projets_annuels_de_performances <chr> "http://www.perf... -## $ date_de_publication <dbl> 42677, 42677, 42...

    +## $ date_de_publication <dttm> 2016-11-03, 201...

    - Detect missing values

    +Detect missing values
    • count_na() returns a table of missing and non-missing values in a vector
    • detect_na() returns a table with the number and the share of missing values for each variable
    -
    read_excel(
    +
    read_excel(
       path = system.file(
         "extdata", 
         "panorama.xlsx", 
         package = "tricky"
         )
    -  ) %>% 
    -  set_standard_names() %>% 
    -  .$ministere_porteur %>% 
    +  ) %>% 
    +  set_standard_names() %>% 
    +  .$ministere_porteur %>% 
       count_na()
    -
    ## # A tibble: 2 × 2
    +
    ## # A tibble: 1 x 2
     ##        f     n
     ##   <fctr> <int>
    -## 1  FALSE    57
    -## 2   TRUE   236
    -
    read_excel(
    +## 1  FALSE    57
    +
    read_excel(
       path = system.file(
         "extdata", 
         "panorama.xlsx", 
         package = "tricky"
         )
    -  ) %>% 
    -  set_standard_names() %>% 
    +  ) %>% 
    +  set_standard_names() %>% 
       detect_na()
    -
    ## Source: local data frame [15 x 3]
    -## Groups: variable [15]
    -## 
    -##                                         variable n_missing share_missing
    -##                                           <fctr>     <int>         <dbl>
    -## 1                              ministere_porteur       236      80.54608
    -## 2                          ministere_nom_complet       236      80.54608
    -## 3                                  nom_du_projet       236      80.54608
    -## 4                        projet_interministeriel       236      80.54608
    -## 5             description_et_objectifs_du_projet       236      80.54608
    -## 6                   en_savoir_plus_sur_le_projet       274      93.51536
    -## 7                                          debut       237      80.88737
    -## 8                  duree_previsionnelle_en_annee       236      80.54608
    -## 9                       phase_du_projet_en_cours       236      80.54608
    -## 10                                   cout_estime       236      80.54608
    -## 11                       cout_estime_par_tranche       236      80.54608
    -## 12                            zone_fonctionnelle       236      80.54608
    -## 13                        financement_programmes       237      80.88737
    -## 14 lien_vers_les_projets_annuels_de_performances       242      82.59386
    -## 15                           date_de_publication       236      80.54608
    +
    ## # A tibble: 4 x 3
    +## # Groups:   variable [4]
    +##                                        variable n_missing share_missing
    +##                                          <fctr>     <int>         <dbl>
    +## 1                  en_savoir_plus_sur_le_projet        38     66.666667
    +## 2                                         debut         1      1.754386
    +## 3                      financement_programme_s_         1      1.754386
    +## 4 lien_vers_les_projets_annuels_de_performances         6     10.526316