ScaDaMaLe Course site and book

"./DataPreprocess"
display(valid_distinct_features.describe())
summary iso_code location population population_density median_age aged_65_older aged_70_older gdp_per_capita cardiovasc_death_rate diabetes_prevalence female_smokers male_smokers hospital_beds_per_thousand life_expectancy human_development_index
count 126 126 126 126 126 126 126 126 126 126 126 126 126 126 126
mean null null 5.448349223809524E7 227.40285714285716 32.72619047619046 10.100476190476186 6.477539682539681 22517.798674603175 249.51723809523807 7.5796031746031725 10.470634920634918 32.03650793650794 3.1610476190476198 74.50119047619042 0.7466904761904759
stddev null null 1.8094803349108434E8 737.9315775155493 8.810413643605422 6.507230389299932 4.50202071901193 21194.388486506883 120.66734039269643 3.8247238647083845 10.346516843539705 13.459477571879843 2.4548683837864473 6.634961896757854 0.1496665875490299
min ALB Albania 98340.0 1.98 15.1 1.144 0.526 752.788 79.37 0.99 0.1 7.7 0.1 59.31 0.354
max ZWE Zimbabwe 1.439323774E9 7915.731 48.2 27.049 18.493 116935.6 724.417 22.02 44.0 78.1 13.05 84.63 0.953
display(valid_distinct_features.select($"iso_code", $"population"))

display(valid_distinct_features.select($"iso_code", $"population_density"))

display(valid_distinct_features.select($"iso_code", $"median_age"))

display(valid_distinct_features.select($"iso_code", $"aged_65_older"))

display(valid_distinct_features.select($"iso_code", $"aged_70_older"))

display(valid_distinct_features.select($"iso_code", $"gdp_per_capita"))

display(valid_distinct_features.select($"iso_code", $"cardiovasc_death_rate"))

display(valid_distinct_features.select($"iso_code", $"diabetes_prevalence"))

display(valid_distinct_features.select($"iso_code", $"female_smokers"))

display(valid_distinct_features.select($"iso_code", $"male_smokers"))

display(valid_distinct_features.select($"iso_code", $"hospital_beds_per_thousand"))

display(valid_distinct_features.select($"iso_code", $"life_expectancy"))

display(valid_distinct_features.select($"iso_code", $"human_development_index"))

Correlation between invariant features

There are some pairs of features are highly correlated i.e. 1. medianage, aged65older 2. medianage, humandevelopmentindex 3. medianage, lifeexpectancy 4. gdppercapita, humandevelopmentindex 5. gdppercapita, lifeexpectancy 6. humandevelopmentindex, lifeexpectancy

display(valid_distinct_features.drop("iso_code","location"))

display(df_cleaned_time_series.drop("iso_code", "continent", "location",
                                    "date", "icu_patients", "icu_patients_per_million",
                                    "hosp_patients", "hosp_patients_per_million", "weekly_icu_admissions",
                                    "weekly_icu_admissions_per_million", "weekly_hosp_admissions", "weekly_hosp_admissions_per_million",
                                    "total_tests", "new_tests", "total_tests_per_thousand",
                                    "new_tests_per_thousand", "new_tests_smoothed", "new_tests_smoothed_per_thousand",
                                    "positive_rate", "tests_per_case", "tests_units",
                                    "extreme_poverty", "handwashing_facilities"))