diff --git a/Exploratory_FA.R b/Exploratory_FA.R deleted file mode 100644 index f7f43b0..0000000 --- a/Exploratory_FA.R +++ /dev/null @@ -1,466 +0,0 @@ -######## Define the parameter lists ########### - -### 02.10.2023 - -## Este script contiene el flujo de trabajo para ejecutar la funci髇 search_fa_parameters para realizar -## varios an醠isis factoriales exportando sus resultados a un excel - - -library(psych) -library(GPArotation) -library(dplyr) -library(corrplot) - -########################################################################################### - - -# 1- Cargar datos completos - -setwd("C:/Users/CRISTIAN/github/clustering-metrics/datos/datos_completos") - -archivos <- list.files(path = ".", pattern = "*.csv$") - -lista_datos <- lapply(archivos, read.csv, fileEncoding = "ISO-8859-1") %>% lapply(dplyr::select, -c("Ciudades","X")) - -names(lista_datos) = archivos - -lapply(lista_datos,KMO) - -fa(lista_datos$df_datos_PTrans.csv,4, rotate = "varimax", fm = "minchi") - -completos_PTRANS <- lista_datos$df_datos_PTrans.csv - - -################################################################################################# - -# 2.- Crear distintos set de datos - -# Los set de datos se van a crear eliminando progresivamente variables utilizando como criterio la distribucion original de la variable - -# Que tan alejada de la distribuci髇 normal se encuentra la variable, y el n鷐ero de outliers que existen - -# Primer intento: Eliminar SQUARE_MN, AREA_AM, DIVISION, Vehiculos, SHAPE_MD, PobH. - -# Finalmente evaluamos el adecuaci髇 del dataset para realizar un an醠isis factorial utilizando -# el test de Kaiser-Meyer-Olkin. Esta se realaciona a la propoporcion de varianza entre las variables que podria ser -# varianza com鷑. Mientras m醩 alto el valor, m醩 adecuado el dataset o la variable para realizar un an醠isis factorial. - - -lista_sel_44 <- lapply(lista_datos, dplyr::select, -c("SQUARE_MN","AREA_AM", "DIVISION", "Vehiculos","SHAPE_MD","PobH")) - -lapply(lista_sel_44,KMO) - -PTRANS_44 <- lista_sel_44$df_datos_PTrans.csv -NORM_44 <- lista_sel_44$df_datos_Normalizer.csv - -######################################################################################################## - -lista_sel_varios <- lapply(lista_datos, dplyr::select, -c("SQUARE_MN","AREA_AM", "DIVISION", "Vehiculos","SHAPE_MD","PobH","PobM","LPI","SQUARE_MD","FRAC_MD","T_Viv_Prin", - "T_Viv_Sec","Viv_vacias","MESH","PAFRAC","AREA_MN",)) -lapply(lista_sel_varios,KMO) - -PTRANS32<- lista_sel_varios$df_datos_PTrans.csv -NORM32<- lista_sel_varios$df_datos_Normalizer.csv - -######################################################################### - - -lista_ocio <- lapply(lista_datos, dplyr::select, -c("SQUARE_MN","AREA_AM", "DIVISION", "Vehiculos","SHAPE_MD","PobH","PobM","LPI","SQUARE_MD","FRAC_MD","T_Viv_Prin", - - "T_Viv_Sec","Viv_vacias","MESH","PAFRAC","AREA_MN","OCIO")) -Maxabs_ocio <- lista_ocio$df_datos_Maxabs.csv -MinMax_ocio <- lista_ocio$df_datos_MinMax.csv -Norm_ocio <- lista_ocio$df_datos_Normalizer.csv -Ptrans_ocio <- lista_ocio$df_datos_PTrans.csv -Rscaler_ocio <- lista_ocio$df_datos_Rscaler.csv -Std_ocio <- lista_ocio$df_datos_std.csv - - -####################################################### - -lista_sel1 <- lapply(lista_datos, dplyr::select, c("LPI","AREA_MN","AREA_AM","GYRATE_MN","SHEI","NP","DIVISION","SPLIT","SHAPE_MN", "FRAC_MD","IJI","LSI","TE","ED", - "RNMDP_2020","PobT","Vehiculos","T_Viviendas","T_Viv_Sec","COM","ED_SING","EQUIP","IND","OCIO","OFI","RES_PLU", "RES_UNI")) - -lapply(lista_sel1,KMO) - -Norm_sel1 <- lista_sel1$df_datos_Normalizer.csv -PTrans_sel1 <- lista_sel1$df_datos_PTrans.csv - -################################################################################ -################################################################################ - -lista_sel2 <- lapply(lista_datos, dplyr::select, c("LPI","AREA_MN","GYRATE_MN","SHEI","NP","SPLIT","SHAPE_MN", "FRAC_MD","IJI","LSI","TE","ED", - "RNMDP_2020","PobT","T_Viviendas","T_Viv_Sec","COM","ED_SING","EQUIP","IND","OCIO","OFI","RES_PLU","RES_UNI")) -lapply(lista_sel2,KMO) - - -Norm_sel2 <- lista_sel2$df_datos_Normalizer.csv -PTrans_sel2 <- lista_sel2$df_datos_PTrans.csv - -################################################################################ -################################################################################# - -lista_sel3 <- lapply(lista_datos, dplyr::select, c("LPI","AREA_MN","GYRATE_MN","SHEI","SPLIT","SHAPE_MN", "FRAC_MD","IJI","LSI","TE","ED", - "RNMDP_2020","T_Viviendas","COM","ED_SING","EQUIP","IND","OCIO","OFI","RES_PLU","RES_UNI")) -lapply(lista_sel3,KMO) - -Norm_sel3 <- lista_sel3$df_datos_Normalizer.csv -PTrans_sel3 <- lista_sel3$df_datos_PTrans.csv - -###################################################################################################### - -lista_sel4 <- lapply(lista_datos, dplyr::select, c("LPI","AREA_MN","GYRATE_MN","SHEI","SPLIT","SHAPE_MN", "FRAC_MD","IJI","LSI","TE","ED", - "RNMDP_2020","T_Viviendas","COM","ED_SING","EQUIP","IND","OCIO","OFI","RES_PLU")) - -lapply(lista_sel4,KMO) - -Norm_sel4 <- lista_sel4$df_datos_Normalizer.csv - -## Nuevas pruebas de seleccion de variables #### - -### 26.06.2023 ### - - -lista_seleccion_general1 <- lapply(lista_datos, dplyr::select, c("TA","LPI","AREA_MN","AREA_AM","AREA_MD","GYRATE_MN","GYRATE_AM","GYRATE_MD","SHEI","SIDI","SPLIT","MESH","SHAPE_MN", "PAFRAC", "SHAPE_MD","PARA_MN","PARA_MD","FRAC_MD","IJI","LSI","TE","ED", - "RNMDP_2020","PobT", "Vehiculos", "T_Viv_Prin","T_Viv_Sec", "COM","ED_SING","EQUIP","IND","OCIO","OFI","RES_PLU","RES_UNI")) - - -lista_seleccion_general <- lapply(lista_datos, dplyr::select, c("TA","LPI","AREA_MN","AREA_AM","AREA_MD","GYRATE_MN","GYRATE_AM","GYRATE_MD","SHEI","SIDI","SPLIT","MESH","SHAPE_MN", "PAFRAC", "SHAPE_MD","PARA_MN","PARA_MD","FRAC_MD","IJI","LSI","TE","ED", - "RNMDP_2020","PobT", "Vehiculos", "T_Viv_Prin","T_Viv_Sec", "COM","ED_SING","EQUIP","IND","OFI","RES_PLU","RES_UNI")) - - -datos_seleccion_general <- list(lista_seleccion_general$df_datos_Maxabs.csv,lista_seleccion_general$df_datos_MinMax.csv,lista_seleccion_general$df_datos_Normalizer.csv, lista_seleccion_general$df_datos_Normalizer.csv, - lista_seleccion_general$df_datos_PTrans.csv, lista_seleccion_general$df_datos_Rscaler.csv, lista_seleccion_general$df_datos_std.csv) - -nombres_seleccion_general <- c("df_datos_Maxabs","df_datos_MinMax","df_datos_Normalizer", "df_datos_Normalizer", - "df_datos_PTrans", "df_datos_Rscaler", "df_datos_std") - - - -n_factors <- c(3,4,5) -fm_methods <- c("minchi") -#rotate_methods <- c("varimax","quartimax","bentlerT","equamax","varimin","geominT") -rotate_methods <- c("promax","oblimin","bentlerQ","geominQ") - -setwd("C:/Users/CRISTIAN/github/clustering-metrics") - -source("function_search_FA_parameters.R") - -library(dplyr) - -factor_analysis_export(datos_seleccion_general, nombres_seleccion_general, n_factors, fm_methods, rotate_methods) - - -######################################################### - -### Nuevas pruebas de seleccion de variables #### - -### 27.06.2023 ### - - -### Cargar datos completos #### - -setwd("C:/Users/CRISTIAN/github/clustering-metrics/datos/datos_completos") - -archivos <- list.files(path = ".", pattern = "*.csv$") - - - -lista_datos <- lapply(archivos, read.csv, fileEncoding = "ISO-8859-1") %>% lapply(dplyr::select, -c("Ciudades","X")) - -names(lista_datos) = archivos - - -lista_seleccion_1 <- lapply(lista_datos, dplyr::select, c("TA","LPI","AREA_MN","AREA_AM","AREA_MD","GYRATE_MN","GYRATE_AM","GYRATE_MD","SHEI","SIDI","SPLIT","MESH","SHAPE_MN","PAFRAC","SHAPE_MD","FRAC_MD","IJI","LSI","TE","ED", - "RNMDP_2020","PobT", "Vehiculos","T_Viv_Prin", "COM","ED_SING","EQUIP","IND","OFI","RES_PLU","RES_UNI")) - - - -datos_seleccion_1 <- list(lista_seleccion_1$df_datos_Maxabs.csv,lista_seleccion_1$df_datos_MinMax.csv,lista_seleccion_1$df_datos_Normalizer.csv, lista_seleccion_1$df_datos_Normalizer.csv, - lista_seleccion_1$df_datos_PTrans.csv, lista_seleccion_1$df_datos_Rscaler.csv, lista_seleccion_1$df_datos_std.csv) - -nombres_seleccion_1 <- c("df_datos_Maxabs","df_datos_MinMax","df_datos_Normalizer", "df_datos_Normalizer", - "df_datos_PTrans", "df_datos_Rscaler", "df_datos_std") - - -n_factors <- c(4,5) -fm_methods <- c("minchi") -rotate_methods <- c("varimax","quartimax","bentlerT","equamax","varimin","geominT") -#rotate_methods <- c("promax","oblimin") - -setwd("C:/Users/CRISTIAN/github/clustering-metrics") - -source("function_search_FA_parameters.R") - -library(dplyr) - -factor_analysis_export(datos_seleccion_1, nombres_seleccion_1, n_factors, fm_methods, rotate_methods) - - -################################################################################################################ - - -# Nuevas pruebas de seleccion de variables - -### 28.06.2023 - -## Cargar datos completos - -setwd("C:/Users/CRISTIAN/github/clustering-metrics/datos/datos_completos") - -archivos <- list.files(path = ".", pattern = "*.csv$") - -lista_datos <- lapply(archivos, read.csv, fileEncoding = "ISO-8859-1") %>% lapply(dplyr::select, -c("Ciudades","X")) - -names(lista_datos) = archivos - -lista_seleccion_2 <- lapply(lista_datos, dplyr::select, c("TA","LPI","AREA_MN","AREA_AM","SHEI","SIDI","SPLIT","MESH","DIVISION", "SHAPE_MN","PAFRAC","IJI","LSI","TE","ED", - "RNMDP_2020","PobT", "Vehiculos","T_Viv_Prin", "COM","ED_SING","EQUIP","IND","OFI","RES_PLU","RES_UNI")) - - - -datos_seleccion_2 <- list(lista_seleccion_2$df_datos_Maxabs.csv,lista_seleccion_2$df_datos_MinMax.csv,lista_seleccion_2$df_datos_Normalizer.csv, lista_seleccion_2$df_datos_Normalizer.csv, - lista_seleccion_2$df_datos_PTrans.csv, lista_seleccion_2$df_datos_Rscaler.csv, lista_seleccion_2$df_datos_std.csv) - -nombres_seleccion_2 <- c("df_datos_Maxabs","df_datos_MinMax","df_datos_Normalizer", "df_datos_Normalizer", - "df_datos_PTrans", "df_datos_Rscaler", "df_datos_std") - -n_factors <- c(4,5) -fm_methods <- c("minchi") - -#rotate_methods <- c("varimax","quartimax","bentlerT","equamax","varimin","geominT") - -rotate_methods <- c("promax","oblimin") - -setwd("C:/Users/CRISTIAN/github/clustering-metrics") - -source("function_search_FA_parameters.R") - -library(dplyr) - -factor_analysis_export(datos_seleccion_2, nombres_seleccion_2, n_factors, fm_methods, rotate_methods) - -################################################################################################################ - -# Nuevas pruebas de seleccion de variables #### - -### 28.06.2023 ### - -## Parte II. Seleccion de un menor numero de variables ## - -## Cargar datos completos #### - -setwd("C:/Users/CRISTIAN/github/clustering-metrics/datos/datos_completos") - -archivos <- list.files(path = ".", pattern = "*.csv$") - -lista_datos <- lapply(archivos, read.csv, fileEncoding = "ISO-8859-1") %>% lapply(dplyr::select, -c("Ciudades","X")) - -names(lista_datos) = archivos - -lista_seleccion_3 <- lapply(lista_datos, dplyr::select, c("TA","AREA_MN","AREA_AM","SIDI","NP","SPLIT","DIVISION", "SHAPE_MN","PAFRAC","IJI","LSI","TE","ED", - "RNMDP_2020","T_Viviendas", "COM","ED_SING","EQUIP","IND","OFI","RES_PLU","RES_UNI")) - - - -datos_seleccion_3 <- list(lista_seleccion_3$df_datos_Maxabs.csv,lista_seleccion_3$df_datos_MinMax.csv,lista_seleccion_3$df_datos_Normalizer.csv, lista_seleccion_3$df_datos_Normalizer.csv, - lista_seleccion_3$df_datos_PTrans.csv, lista_seleccion_3$df_datos_Rscaler.csv, lista_seleccion_3$df_datos_std.csv) - -nombres_seleccion_3 <- c("df_datos_Maxabs","df_datos_MinMax","df_datos_Normalizer", - "df_datos_PTrans", "df_datos_Rscaler", "df_datos_std") - -n_factors <- c(5) -fm_methods <- c("minchi") - -rotate_methods <- c("varimax","quartimax","bentlerT","equamax","varimin","geominT") - -#rotate_methods <- c("promax","oblimin") - -setwd("C:/Users/CRISTIAN/github/clustering-metrics") - -source("function_search_FA_parameters.R") - -library(dplyr) - -factor_analysis_export(datos_seleccion_3, nombres_seleccion_3, n_factors, fm_methods, rotate_methods) - - -########################################################################################################## - -# Nuevas pruebas de seleccion de variables #### - -## 29.06.2023 #### - -## Parte I. Seleccion de un menor numero de variables ## - -## Cargar datos completos #### - -setwd("C:/Users/CRISTIAN/github/clustering-metrics/datos/datos_completos") - -archivos <- list.files(path = ".", pattern = "*.csv$") - -lista_datos <- lapply(archivos, read.csv, fileEncoding = "ISO-8859-1") %>% lapply(dplyr::select, -c("Ciudades","X")) - -names(lista_datos) = archivos - -lista_seleccion_4 <- lapply(lista_datos, dplyr::select, c("AREA_MN","GYRATE_AM","SIDI","SPLIT","MESH","DIVISION", "SHAPE_MN","PAFRAC","IJI","LSI","ED", - "RNMDP_2020","T_Viviendas", "COM","ED_SING","EQUIP","IND","OFI","RES_PLU","RES_UNI")) - - - -datos_seleccion_4 <- list(lista_seleccion_4$df_datos_Maxabs.csv,lista_seleccion_4$df_datos_MinMax.csv,lista_seleccion_4$df_datos_Normalizer.csv, - lista_seleccion_4$df_datos_PTrans.csv, lista_seleccion_4$df_datos_Rscaler.csv, lista_seleccion_4$df_datos_std.csv) - -nombres_seleccion_4 <- c("df_datos_Maxabs","df_datos_MinMax","df_datos_Normalizer", - "df_datos_PTrans", "df_datos_Rscaler", "df_datos_std") - -n_factors <- c(5) -fm_methods <- c("minchi") - -#rotate_methods <- c("varimax","quartimax","bentlerT","equamax","varimin","geominT") - -rotate_methods <- c("promax","oblimin") - -setwd("C:/Users/CRISTIAN/github/clustering-metrics") - -source("function_search_FA_parameters.R") - -library(dplyr) - -factor_analysis_export(datos_seleccion_4, nombres_seleccion_4, n_factors, fm_methods, rotate_methods) - - -############################################################################################################################# -############################################################################################################################# - -## 29.06.2023 #### - -## Parte II ## - -## Cargar datos completos #### - -setwd("C:/Users/CRISTIAN/github/clustering-metrics/datos/datos_completos") - -archivos <- list.files(path = ".", pattern = "*.csv$") - -lista_datos <- lapply(archivos, read.csv, fileEncoding = "ISO-8859-1") %>% lapply(dplyr::select, -c("Ciudades","X")) - -names(lista_datos) = archivos - -lista_seleccion_5 <- lapply(lista_datos, dplyr::select, c("GYRATE_AM","SIDI", "SPLIT","DIVISION", "SHAPE_MN","PAFRAC","IJI","LSI","ED", - "RNMDP_2020","T_Viviendas", "COM","ED_SING","EQUIP","OFI","RES_PLU","RES_UNI")) - - -datos_seleccion_5 <- list(lista_seleccion_5$df_datos_Maxabs.csv,lista_seleccion_5$df_datos_MinMax.csv,lista_seleccion_5$df_datos_Normalizer.csv, - lista_seleccion_5$df_datos_PTrans.csv, lista_seleccion_5$df_datos_Rscaler.csv, lista_seleccion_5$df_datos_std.csv) - -nombres_seleccion_5 <- c("df_datos_Maxabs","df_datos_MinMax","df_datos_Normalizer", - "df_datos_PTrans", "df_datos_Rscaler", "df_datos_std") - -n_factors <- c(5) -fm_methods <- c("minchi") -#rotate_methods <- c("varimax","quartimax","bentlerT","equamax","varimin","geominT") -rotate_methods <- c("promax","oblimin") - -setwd("C:/Users/CRISTIAN/github/clustering-metrics") - -source("function_search_FA_parameters.R") - -library(dplyr) - -factor_analysis_export(datos_seleccion_5, nombres_seleccion_5, n_factors, fm_methods, rotate_methods) - -############################################################################################## - -## Analysis 04.07.2023 #### - - -## Cargar datos completos #### - -setwd("C:/Users/CRISTIAN/github/clustering-metrics/datos/datos_completos") - -archivos <- list.files(path = ".", pattern = "*.csv$") - - - -lista_datos <- lapply(archivos, read.csv, fileEncoding = "ISO-8859-1") %>% lapply(dplyr::select, -c("Ciudades","X")) - -names(lista_datos) = archivos - - -lista_seleccion_ohne_AM <- lapply(lista_datos, dplyr::select, c("TA","LPI","AREA_MN","AREA_MD","GYRATE_MN","GYRATE_MD","SHEI","SIDI","SPLIT","MESH","SHAPE_MN","PAFRAC","SHAPE_MD","FRAC_MD","IJI","LSI","TE","ED", - "RNMDP_2020","PobT", "Vehiculos","T_Viv_Prin", "COM","ED_SING","EQUIP","IND","OFI","RES_PLU","RES_UNI")) - - - -datos_seleccion_ohne_AM <- list(lista_seleccion_ohne_AM$df_datos_Maxabs.csv,lista_seleccion_ohne_AM$df_datos_MinMax.csv,lista_seleccion_ohne_AM$df_datos_Normalizer.csv, lista_seleccion_ohne_AM$df_datos_Normalizer.csv, - lista_seleccion_ohne_AM$df_datos_PTrans.csv, lista_seleccion_ohne_AM$df_datos_Rscaler.csv, lista_seleccion_ohne_AM$df_datos_std.csv) - -nombres_seleccion_ohne_AM <- c("df_datos_Maxabs","df_datos_MinMax","df_datos_Normalizer", "df_datos_Normalizer", - "df_datos_PTrans", "df_datos_Rscaler", "df_datos_std") - - -n_factors <- c(5) -fm_methods <- c("minchi") -rotate_methods <- c("varimax","quartimax","bentlerT","equamax","varimin","geominT") - -#rotate_methods <- c("promax","oblimin") - -setwd("C:/Users/CRISTIAN/github/clustering-metrics") - -source("function_search_FA_parameters.R") - -library(dplyr) - -factor_analysis_export(datos_seleccion_ohne_AM, nombres_seleccion_ohne_AM, n_factors, fm_methods, rotate_methods) - - - -############################################################################################################ - -## Analysis 05.07.2023 #### - -## Cargar datos completos #### - -setwd("C:/Users/CRISTIAN/github/clustering-metrics/datos/datos_completos") - -archivos <- list.files(path = ".", pattern = "*.csv$") - - - -lista_datos <- lapply(archivos, read.csv, fileEncoding = "ISO-8859-1") %>% lapply(dplyr::select, -c("Ciudades","X")) - -names(lista_datos) = archivos - - -lista_seleccion_ohne_AM <- lapply(lista_datos, dplyr::select, c("TA","LPI","AREA_MN","SIDI","SPLIT","MESH","SHAPE_MN","PAFRAC","IJI","LSI","ED", - "RNMDP_2020","T_Viviendas","COM","ED_SING","EQUIP","IND","OFI","RES_PLU","RES_UNI")) - - - -datos_seleccion_ohne_AM <- list(lista_seleccion_ohne_AM$df_datos_Maxabs.csv,lista_seleccion_ohne_AM$df_datos_MinMax.csv,lista_seleccion_ohne_AM$df_datos_Normalizer.csv, lista_seleccion_ohne_AM$df_datos_Normalizer.csv, - lista_seleccion_ohne_AM$df_datos_PTrans.csv, lista_seleccion_ohne_AM$df_datos_Rscaler.csv, lista_seleccion_ohne_AM$df_datos_std.csv) - -nombres_seleccion_ohne_AM <- c("df_datos_Maxabs","df_datos_MinMax","df_datos_Normalizer", "df_datos_Normalizer", - "df_datos_PTrans", "df_datos_Rscaler", "df_datos_std") - - -n_factors <- c(5) - -fm_methods <- c("minchi") - -#rotate_methods <- c("varimax","quartimax","bentlerT","equamax","varimin","geominT") - -rotate_methods <- c("varimax","equamax") - -#rotate_methods <- c("promax","oblimin") - -setwd("C:/Users/CRISTIAN/github/clustering-metrics") - -source("function_search_FA_parameters.R") - -library(dplyr) - -factor_analysis_export(datos_seleccion_ohne_AM, nombres_seleccion_ohne_AM, n_factors, fm_methods, rotate_methods) - - diff --git a/calculo_porcentaje_uso_ciudades.ipynb b/calculo_porcentaje_uso_ciudades.ipynb deleted file mode 100644 index ce31478..0000000 --- a/calculo_porcentaje_uso_ciudades.ipynb +++ /dev/null @@ -1,622 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# C谩lculo de porcentajes de uso del suelo" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Este script abarca el flujo de trabajo completo para obtener una tabla con el porcentaje de ocupaci贸n de uso del suelo para cada una de las ciudades de m谩s de 100.000 habitantes de Espa帽a, excluyendo el uso sin edificar y otros. Los pasos que contempla este flujo de trabajo son los siguientes: \n", - "\n", - "1.- Crear nueva columna para la categor铆a uso agregado.\n", - "2.- Reclasificar los usos a una nueva clase uso agregado.\n", - "3.- Asignar un nuevo c贸digo num茅rico a los usos de la categor铆a uso agregado.\n", - "4.- Exportar los shapefile con las modificaciones realizadas.\n", - "5.- Eliminar las clases sin edificar y otros.\n", - "6.- Calcular los porcentajes de ocupaci贸n de cada uso respecto al total, sin considerar sin edificar y otros. " - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import geopandas as gpd\n", - "import os\n", - "import glob\n", - "import string" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " ### Incorporar una barra de progreso por medio de la librar铆a tqdm" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "from tqdm import tqdm, trange \n", - "from time import sleep\n", - "from tqdm.notebook import tqdm" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Definir el directorio utilizando la librer铆a os\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "path =(r\"F:/Respaldo toshiba/projectos/shapes/marcos_clasificacion_ag/\")\n", - "# Definir directorio de trabajo\n", - "os.chdir(path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Leer de forma iterativa cada uno de los shapefiles y almacenarlos en un diccionario con clave igual al nombre del archivo, y valor correspondiente al shapefile." - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e1dd2897ff1347f7ac73f5738b11a5c1", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/72 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
USOUSO_AG
0COMCOM
1ED_SINGED_SING
2EQUIP_EDUEQUIP
3EQUIP_OTREQUIP
4EQUIP_SANIEQUIP
5HOS_RESTOCIO
6INDIND
7IND_MXIND
8OCIO_ESPOCIO
9OFIOFI
10RES_PLURES_PLU
11RES_PLU_MXRES_PLU
12RES_UNIRES_UNI
13RES_UNI_MXRES_UNI
14SIN_EDIFSIN_EDIF
15OTROSOTROS
\n", - "" - ], - "text/plain": [ - " USO USO_AG\n", - "0 COM COM\n", - "1 ED_SING ED_SING\n", - "2 EQUIP_EDU EQUIP\n", - "3 EQUIP_OTR EQUIP\n", - "4 EQUIP_SANI EQUIP\n", - "5 HOS_REST OCIO\n", - "6 IND IND\n", - "7 IND_MX IND\n", - "8 OCIO_ESP OCIO\n", - "9 OFI OFI\n", - "10 RES_PLU RES_PLU\n", - "11 RES_PLU_MX RES_PLU\n", - "12 RES_UNI RES_UNI\n", - "13 RES_UNI_MX RES_UNI\n", - "14 SIN_EDIF SIN_EDIF\n", - "15 OTROS OTROS" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clases_agregadas = pd.read_csv(r\"F:/Respaldo toshiba/projectos/shapes/clases_agregadas.csv\")\n", - "clases_agregadas" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Definimos una funci贸n para hacer una join usando la funci贸n merge de pandas. Ejecuta un leftjoin a partir de la columna com煤n uso." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "def uso_agregado(data_in, clases):\n", - " data_out = {}\n", - " for key, value in data_in.items():\n", - " data_out[key] = value.merge(clases, on = 'USO', how = 'left')\n", - " return data_out\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Aplicar la funci贸n y obtenemos un nuevo diccionario con el uso agregado" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "uso_agregado = uso_agregado(dict_areas,clases_agregadas)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Definimos una funci贸n para asignar un codigo por categoria agregada" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [], - "source": [ - "def asignacion_codigo_categorias(data_in):\n", - " data_out = {}\n", - " for key, df in data_in.items():\n", - " df = df.reset_index()\n", - " usos = df['USO_AG'].unique()\n", - " cod_usos = np.arange(len(usos))\n", - " cod_map = pd.Series(cod_usos, index=usos)\n", - " df['cod_usos_a'] = df['USO_AG'].map(cod_map)\n", - " data_out[key] = df\n", - " return data_out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Aplicamos la funci贸n para obtener un c贸digo n煤merico asociado a cada categor铆a." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "uso_agregado_cat = asignacion_codigo_categorias(uso_agregado)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "Calcular la suma de las 谩reas de cada categor铆a agregada" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "df_final = pd.DataFrame()\n", - "for key, value in uso_agregado_cat.items():\n", - " temp = uso_agregado_cat[key] \n", - " temp_cat = temp.groupby('USO_AG')['Area'].sum()\n", - " temp_area = temp.groupby('USO_AG')['Area'].sum().sum()\n", - " temp_porcentaje = (temp_cat/temp_area)*100\n", - " df_temp = temp_porcentaje.to_frame().rename(columns = {'Area':key})\n", - " df_final = pd.concat([df_final,df_temp], axis = 1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exportar los datos a un csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Definimos una funci贸n para eliminar la categor铆a otros de todos los shapesfiles." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "dict_filter_sin_otros = {}\n", - "for key, value in uso_agregado_gtp_cat.items():\n", - " ciudad = uso_agregado_gtp_cat[key].copy()\n", - " ciudad1 = ciudad[ciudad.loc[:,'USO_AG'] != \"OTROS\"].copy()\n", - " dict_filter_sin_otros[key] = ciudad1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Calcular los porcentajes sin considerar la categor铆a otros. " - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "df_sin_otros = pd.DataFrame()\n", - "for key, value in dict_filter_otros.items():\n", - " temp = dict_filter_otros[key] \n", - " temp_cat = temp.groupby('USO_AG')['Area'].sum()\n", - " temp_area = temp.groupby('USO_AG')['Area'].sum().sum()\n", - " temp_porcentaje = (temp_cat/temp_area)*100\n", - " df_temp = temp_porcentaje.to_frame().rename(columns = {'Area':key})\n", - " df_otros = pd.concat([df_sin_otros,df_temp], axis = 1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finalmente, volvemos a calcular los porcentajes de cada categor铆a respecto al total, pero sin considerar la categor铆a otros, y sin edificaci贸n." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dict_filter = {}\n", - "for key, value in uso_agregado_gtp_cat.items():\n", - " ciudad = uso_agregado_gtp_cat[key].copy()\n", - " ciudad1 = ciudad[ciudad.loc[:,'USO_AG'] != \"SIN_EDIF\"].copy()\n", - " ciudad2 = ciudad1[ciudad1.loc[:,'USO_AG'] != \"OTROS\"].copy()\n", - " dict_filter[key] = ciudad2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_final_filter = pd.DataFrame()\n", - "for key, value in dict_filter.items():\n", - " temp = dict_filter[key] \n", - " temp_cat = temp.groupby('USO_AG')['Area'].sum()\n", - " temp_area = temp.groupby('USO_AG')['Area'].sum().sum()\n", - " temp_porcentaje = (temp_cat/temp_area)*100\n", - " df_temp = temp_porcentaje.to_frame().rename(columns = {'Area':key})\n", - " df_final_filter = pd.concat([df_final_filter,df_temp], axis = 1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Exportar la tabla final" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_final_filter.to_csv('')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "La tabla tiene las variables en las filas y las observaciones en las columnas, por lo tanto tenemos que transformarla (Formato wide a long)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_final_filter_melt = df_final_filter.melt(id_vars = \"USO_AG\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_final_filter_pivot = df_final_filter_melt.pivot(columns = \"USO_AG\", values = 'value', index = 'variable')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_final_filter_pivot = df_final_filter_pivot.reset_index()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_final_filter_pivot.to_csv(\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Finalmente queremos corregir el nombre de las ciudades para dejar solamente el nombre de las ciudades. Esto lo haremos utilizando expresiones regulares." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_final_filter_pivot['Ciudad'] = df_final_filter_pivot['Ciudades'].str.replace('_', ' ')\n", - "df_final_filter_pivot['Ciudad'] = df_final_filter_pivot['Ciudad'].str.replace(r'\\.txt$', '')\n", - "df_final_filter_pivot['Ciudad'] = df_final_filter_pivot['Ciudad'].apply(lambda x: x.capitalize())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rep = {'Alcorcon':'Alcorc贸n','Ciudad real':'Ciudad Real','Bodajoz':'Badajoz','Dos hermanas':'Dos Hermanas','Fuentelabra':'Fuenlabrada',\n", - "'Leganes':'Legan茅s','Logronio':'Logro帽o','Mataro':'Matar贸','Mostoles':'M贸stoles','Santa coloma':'Santa Coloma','Santa cruz tenerife':'Santa Cruz de Tenerife',\n", - "'San cristobal':'San Crist贸bal','Terrasa':'Terrassa','Terragona':'Tarragona','Corunia':'Coru帽a','Gijon':'Gij贸n',\n", - "'Las palmas':'Palmas de Gran Canaria, Las'}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def replace_cities(df, replacements,col):\n", - " for city, replacement in replacements.items():\n", - " df[col] = df[col].str.replace(city, replacement)\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "df_final_filter_pivot_replace = replace_cities(df_final_filter_pivot, rep,'Ciudad')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "datamecum", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/datos_metricas_socioeconomicos_porcentajes.csv b/datos_metricas_socioeconomicos_porcentajes.csv deleted file mode 100644 index c505c02..0000000 --- a/datos_metricas_socioeconomicos_porcentajes.csv +++ /dev/null @@ -1,73 +0,0 @@ -Ciudades,TA,LPI,AREA_MN,AREA_AM,AREA_MD,GYRATE_MN,GYRATE_AM,GYRATE_MD,PRD,SHDI,SIDI,MSIDI,SHEI,SIEI,MSIEI,NP,DIVISION,SPLIT,MESH,PAFRAC,SHAPE_MN,SHAPE_MD,PARA_MN,PARA_MD,FRAC_MN,FRAC_MD,SQUARE_MN,SQUARE_MD,IJI,LSI,TE,ED,RNMDP_2020,PobT,PobH,PobM,Vehiculos,T_Viviendas,T_Viv_Prin,T_Viv_Sec,Viv_vacias,COM,ED_SING,EQUIP,IND,OCIO,OFI,RES_PLU,RES_UNI -Albacete,1605.293355,2.929497,0.116461,2.976411,0.03152,8.380829,36.057566,5.710261,0.498351,0.723601,0.755052,1.40671,0.745706,0.862917,0.676484,13784,0.998146,539.338537,29764.11375,1.036782,1.239875,1.196013,0.251356,0.242674,1.071821,1.06197,0.523948,0.500157,9.879637,79.687495,1546155.784,963.160895,12529,174336,85214,89122,125053,83250,62980,5168,15081,0.44501767,6.865099536,12.01865817,28.68764933,0.698910129,0.55346804,16.78948082,33.94171631 -Alcal de Henares,1615.304126,2.451039,0.136451,4.559123,0.019854,7.591469,55.583827,4.874116,0.495263,0.76967,0.746116,1.370878,0.771443,0.852704,0.659253,11838,0.997178,354.3015,45591.23029,1.025171,1.293679,1.267219,0.325155,0.328765,1.095561,1.092224,0.575723,0.578704,11.52813,63.226395,1220331.826,755.481155,13243,197562,96061,101501,124019,78947,71864,1255,5798,1.557488817,11.25917132,19.28088326,41.83691969,0.823272898,1.896483842,13.99431428,9.351465887 -Alcobendas,1386.626321,2.882984,0.289423,3.843857,0.069676,12.998986,45.886723,9.320043,0.495263,0.76967,0.746116,1.370878,0.771443,0.852704,0.659253,4791,0.997228,360.738228,38438.57433,1.028039,1.247419,1.185695,0.205145,0.164615,1.066683,1.054622,-3.814456,0.493816,15.371893,46.890853,848092.2295,611.622769,18684,118417,56554,61863,247035,41189,39101,566,1513,0.7796493,10.60864754,16.11861917,12.71888838,0.426557736,4.777436623,14.14253776,40.4276635 -Alcorc髇,787.044792,3.295528,0.161214,3.023467,0.028205,9.644223,43.879007,6.0778,1.016461,0.714969,0.757634,1.417307,0.751111,0.865868,0.681581,4882,0.996158,260.312025,30234.66906,1.029029,1.248374,1.223028,0.262249,0.25072,1.068275,1.070708,0.539567,0.536001,8.229806,48.316227,606523.6182,770.634181,13522,172384,82959,89425,99385,70861,66066,1303,3488,1.917221812,6.864289345,33.29560565,25.02186498,0.369680718,0.344602126,23.83687954,8.349855836 -Algeciras,1063.319019,2.044908,0.0633,2.629819,0.013341,5.460983,38.065313,3.687046,0.752361,1.209382,0.625019,0.98088,0.626374,0.714308,0.471704,16798,0.997527,404.331644,26298.18949,1.048912,1.259996,1.214451,0.404055,0.386923,1.091095,1.080717,0.539164,0.521263,10.820873,82.217416,1289373.115,1212.59292,10665,123078,60729,62349,90188,54089,42692,4700,6686,1.155168688,2.049920947,13.42054536,14.52477325,0.86434838,0.355067089,10.73480668,56.89536961 -Alicante/Alacant,2644.25298,1.662929,0.13336,2.778957,0.026882,8.629026,35.448592,5.137771,0.302543,0.75845,0.742545,1.356912,0.741693,0.848623,0.652537,19828,0.998949,951.527179,27789.56859,1.024859,1.240818,1.204889,0.267953,0.262356,1.073578,1.066561,0.53536,0.518358,10.057429,98.014149,2347700.917,887.850343,11676,337482,163385,174097,235733,186558,132637,28707,25172,1.158215963,3.049092686,11.65871786,17.6506232,2.372703335,0.42090185,25.42464328,38.26510183 -Almer韆,1221.025597,2.647566,0.047095,3.179176,0.011569,4.981105,39.172147,3.608059,0.655187,0.713636,0.759126,1.42348,0.76275,0.867572,0.684549,25927,0.997396,384.069792,31791.76342,1.027705,1.267989,1.232491,0.416482,0.413503,1.097109,1.084962,0.555736,0.542673,10.81042,103.44117,1748123.034,1431.684183,11233,201322,98531,102791,142223,99692,69178,12276,18220,1.19786496,6.426123798,21.57405394,9.32361881,1.115114248,0.829088532,25.31985637,34.21427934 -羦ila,1221.025597,2.647566,0.047095,3.179176,0.011569,4.981105,39.172147,3.608059,1.096773,0.664081,0.780506,1.516428,0.780474,0.892006,0.729248,7089,0.991158,113.102202,64491.48078,1.034888,1.25529,1.2124,0.321635,0.302125,1.082146,1.071951,0.538745,0.519508,10.374722,55.25446,693473.4489,950.728315,13209,58369,27784,30585,41695,34487,22770,3506,8189,0.473220582,7.168614595,24.10689063,16.73265346,1.775391427,0.24729183,19.54842686,29.94751061 -Badajoz,2459.10219,2.496544,0.117638,3.88053,0.017477,7.522677,39.867536,4.287598,0.325322,1.675347,0.54112,0.778966,0.551175,0.618423,0.374603,20904,0.998422,633.702638,38805.30146,1.008527,1.263527,1.219802,0.33739,0.338322,1.087256,1.074434,0.550015,0.535521,9.110097,88.067061,2143663.888,871.726233,11775,150984,73170,77814,116431,72067,56297,5161,10589,1.276354038,1.851106082,11.66369425,8.677849144,0.248779704,0.409059324,10.60708148,65.26607598 -Badalona,681.177151,4.079243,0.045908,2.010601,0.014343,5.476681,28.199631,4.179474,1.174438,0.667509,0.779475,1.511744,0.777582,0.890829,0.726995,14838,0.997048,338.792854,20106.00707,1.046864,1.336913,1.302643,0.388141,0.412724,1.108151,1.104945,0.622964,0.636074,14.459889,88.64089,1118574.663,1642.120059,12537,223166,110018,113148,126321,92894,81793,2199,8881,1.24626388,8.509623017,16.21313696,23.82652618,0.256721655,0.46583534,30.91355603,18.56833694 -Barcelona,4436.370992,1.165791,0.063782,4.135431,0.024406,6.204639,41.995155,5.018938,0.180328,0.830052,0.726807,1.297576,0.76185,0.830636,0.624002,69555,0.999068,1072.771035,41354.3137,1.053047,1.294586,1.247781,0.314192,0.289829,1.093063,1.080488,0.571572,0.56994,17.034972,176.428499,6127182.411,1381.124893,16750,1664182,790708,873474,968655,811521,684078,38769,88259,1.748788632,4.652241115,18.29327773,14.51636168,2.092078116,2.720612859,45.06742927,10.9092106 -Burgos,1736.579705,2.081176,0.18573,5.610663,0.030097,8.833211,58.938853,5.566782,0.460676,1.032058,0.668666,1.104628,0.667596,0.764189,0.531214,9350,0.996769,309.514149,56106.63393,1.041695,1.256064,1.222159,0.285839,0.251415,1.078412,1.069578,0.548882,0.53238,8.134834,59.286097,1126346.242,648.600372,14421,176418,83995,92423,107558,92140,71076,6893,14107,0.791483702,8.259385879,14.40477962,52.3121759,0.307489852,0.136661612,14.84266721,8.945356237 -C醕eres,1004.264138,10.801445,0.087993,15.591491,0.017182,6.305038,78.210549,4.368,0.796603,0.676864,0.773811,1.486385,0.767844,0.884356,0.7148,11413,0.984475,64.411039,155914.9093,1.033562,1.254194,1.220952,0.339863,0.348555,1.084645,1.073838,0.543867,0.532931,9.008143,65.01955,981162.6486,976.996601,12815,96255,46104,50151,73858,50311,37048,5989,7256,0.434506667,5.610840978,20.21645876,27.46440934,1.403430191,0.404661328,16.04868791,28.41700482 -C醖iz,346.91323,4.314831,0.06941,1.354117,0.022331,6.344666,26.279749,4.66083,2.306052,0.847446,0.71318,1.248899,0.710609,0.815062,0.600593,4998,0.996097,256.191456,13541.17093,1.065735,1.242394,1.198581,0.295931,0.287829,1.076832,1.065849,0.507219,0.486576,10.370918,51.652735,448626.6019,1293.195426,12997,115439,54580,60859,70672,59204,46966,5346,6874,1.045978109,9.97986691,17.39455304,22.72792078,1.604226427,1.280809783,44.05111827,1.915526688 -Cartagena,2538.032439,5.180597,0.061825,12.172446,0.015923,5.60611,56.254443,4.020624,0.315205,0.790186,0.734601,1.326523,0.733105,0.839545,0.637923,41052,0.995204,208.506366,121724.4577,1.030318,1.24236,1.201139,0.352023,0.341692,1.083272,1.071986,0.527564,0.508556,12.057665,129.406205,3142335.059,1238.098856,11092,216108,107478,108630,159993,115780,76599,28332,10818,1.467330278,4.402338522,14.28780137,20.87064966,1.207123214,0.138342862,15.95147077,41.67494333 -Castell de la Plana,1065.590722,6.736165,0.072494,7.510539,0.015936,6.126644,50.773006,4.231785,0.750757,0.629566,0.796432,1.591756,0.803526,0.910208,0.765473,14699,0.992952,141.879403,75105.3851,0.995107,1.300942,1.239912,0.366698,0.353584,1.101108,1.081041,0.582947,0.565608,14.856298,77.022664,1235147.245,1159.119744,12785,174264,84319,89945,128463,90990,68917,8363,13696,1.462352126,12.12712797,14.61217847,23.72778937,0.834905352,0.157396688,24.26244076,22.81580926 -Ceuta,349.66179,3.370658,0.0513,2.274088,0.009016,4.6342,37.428848,3.044126,2.287925,0.620345,0.801459,1.616759,0.821489,0.915953,0.777497,6816,0.993496,153.759147,22740.87731,1.095603,1.262175,1.203947,0.47462,0.456466,1.095877,1.082542,0.488503,0.496238,10.172125,54.792736,456502.5037,1305.554445,12358,84202,42542,41660,67589,26664,24904,413,1335,0.878866735,19.96326745,12.05413647,18.74626709,2.046676515,0.597665077,18.38491296,27.32820769 -Ciudad Real,575.896521,7.185502,0.079963,4.279406,0.023164,6.736786,39.660272,5.021657,1.389138,0.713925,0.756267,1.411683,0.744301,0.864305,0.678876,7202,0.992569,134.573939,42794.05996,1.028727,1.284991,1.257552,0.298989,0.302434,1.089199,1.082145,0.573261,0.56893,11.548152,55.824071,669910.4258,1163.247912,13580,75504,35773,39731,48679,38018,29682,3070,5253,0.615294047,3.277289094,31.04549861,11.35236866,0.726024455,1.390794448,26.06445735,25.52827334 -C髍doba,3892.869064,1.245034,0.11267,2.730728,0.027762,8.262326,35.02702,5.389476,0.205504,1.082266,0.651208,1.053279,0.645673,0.744237,0.50652,34551,0.999299,1425.579338,27307.27755,1.028187,1.284991,1.213202,0.287245,0.269167,1.078779,1.068802,0.523638,0.500479,8.638293,127.167742,3794441.747,974.715996,11791,326039,156523,169516,225687,152772,121823,13280,17615,0.545181932,3.625494744,9.032239778,20.03786553,0.994550043,0.300080889,12.14712247,53.31746462 -"Coru馻, A",1141.314854,2.34648,0.078695,3.041818,0.01881,6.538037,35.02702,4.529875,0.700946,0.645963,0.787801,1.55023,0.793153,0.900344,0.745503,14503,0.997335,375.208183,30418.17601,1.068158,1.259186,1.210611,0.327174,0.322582,1.084438,1.074097,-369540.8625,0.539218,9.371166,80.963926,1298866.823,1138.044264,14591,247604,114776,132828,144917,135450,105584,10544,19283,1.042108506,6.741383266,23.87092748,22.19081973,0.875388518,0.810725057,25.62930555,18.83934189 -Cuenca,478.304019,3.312846,0.074433,1.729964,0.013484,6.117953,36.723471,3.713148,1.672576,0.638837,0.792325,1.571782,0.805076,0.905514,0.755867,6426,0.996383,276.482083,17299.63891,1.041051,1.257844,1.218278,0.41456,0.380563,1.085491,1.078784,0.542628,0.527547,8.71695,54.160052,523593.1765,1094.686968,13160,54621,25956,28665,40587,30958,21473,4594,4866,1.500509888,10.14447526,15.38808253,26.96001378,0.571314941,0.905879967,25.03576172,19.4939619 -Dos Hermanas,1925.880926,4.048958,0.081792,8.973081,0.01476,6.152659,56.908686,3.957091,0.415394,0.835171,0.713537,1.250146,0.692011,0.815471,0.601193,23546,0.995341,214.628729,89730.80787,1.018742,1.270227,1.230733,0.342839,0.368318,1.085491,1.08187,0.551995,0.54694,8.466449,90.426492,1983809.302,1030.078898,10994,135050,66330,68720,97507,50661,44299,1094,5258,1.019749879,5.436159202,17.73784075,26.50511794,0.282604548,0.211401794,6.716097268,42.09102862 -Elche/Elx,1986.561847,3.488935,0.111261,3.578773,0.025248,8.125683,34.893413,5.101094,0.402706,0.918435,0.693897,1.183835,0.683827,0.793026,0.569304,17855,0.998199,555.095863,35787.72568,1.014367,1.239454,1.206914,0.276196,0.269019,1.074923,1.06784,0.234472,0.519107,10.0178,90.785293,1935341.469,974.21657,9840,234765,116386,118379,164532,113351,86118,13660,13558,2.28447609,1.580429639,11.91945174,19.66095931,0.877565925,0.156869792,15.84665598,47.67359152 -Fuenlabrada,986.88558,4.797541,0.123561,4.437921,0.034045,9.006354,42.055108,6.179378,0.810631,0.937728,0.687368,1.162728,0.687395,0.785563,0.559154,7987,0.995503,222.375662,44379.20819,1.027474,1.297684,1.253234,0.265427,0.233612,1.096833,1.07568,0.562036,0.554194,15.336302,66.95934,942800.8973,955.329489,11205,194514,95847,98667,122452,70841,66655,674,3506,2.143776295,4.22067063,22.22148147,48.25205318,1.178906775,0.210116081,15.54435271,6.228642869 -Getafe,1333.511318,3.280688,0.090377,4.404839,0.018878,6.498853,48.168808,4.376008,0.59992,0.791764,0.737726,1.338367,0.741321,0.843116,0.643618,14755,0.996697,302.737816,44048.38937,1.023921,1.277073,1.242009,0.319235,0.328134,1.089673,1.081304,0.575847,0.559715,6.651213,71.109092,1292087.672,968.936412,13638,185180,90403,94777,107745,69002,62509,1663,4812,1.391918878,8.96903095,14.62894766,42.7313536,0.411231219,0.267820787,15.14063568,16.45906123 -Gij髇,2071.146768,3.37409,0.141714,7.742569,0.036526,8.640796,59.908947,5.967553,0.386259,0.771771,0.735261,1.329011,0.708071,0.840298,0.639119,14615,0.996262,267.501224,77425.69323,1.061926,1.235201,1.187461,0.240152,0.225954,1.068407,1.059691,0.529323,0.50022,8.487592,80.800661,1742175.234,841.164548,13842,271717,127961,143756,167896,147212,122519,8079,16551,0.595710872,2.246498579,14.10405399,38.15103855,0.537690162,0.440848625,18.61450426,25.30965497 -Girona,679.734461,1.979426,0.072266,1.115294,0.026972,6.89068,23.841674,5.343331,1.17693,0.685813,0.769897,1.469227,0.768239,0.879882,0.706549,9406,0.998359,609.466839,11152.93595,1.031952,1.235201,1.221572,0.293199,0.270808,1.084289,1.071519,0.562663,0.543797,13.083362,70.786354,882978.1671,1299.004563,14750,103369,49809,53560,76071,47486,38245,2474,6727,1.969354894,3.52277132,23.44458903,16.06646087,0.909671049,0.53342316,22.20234348,31.3513862 -Granada,1395.765302,4.125408,0.072981,4.751803,0.015907,5.810415,43.193601,3.975179,0.573162,0.667978,0.778922,1.509238,0.789183,0.890196,0.72579,19125,0.996596,293.733857,47518.02585,1.037741,1.244693,1.203264,0.36677,0.342883,1.083137,1.07286,0.512325,0.493546,10.861967,89.200138,1540392.448,1103.618528,13251,233648,107829,125819,173509,140027,96916,16029,27024,1.048003945,7.500218094,27.4712954,13.88726665,1.2477455,0.874957416,29.13292877,18.83758422 -Guadalajara,788.017019,3.07834,0.104845,3.024986,0.019503,7.150348,43.903155,4.658112,1.015207,0.677647,0.775695,1.494749,0.7899,0.886509,0.718822,7516,0.996161,260.502719,30249.85776,1.025042,1.281085,1.250415,0.314805,0.324792,1.089258,1.083527,0.567957,0.565557,13.057642,55.134655,742005.5915,941.611125,13514,87484,42091,45393,57686,40223,32538,2263,5398,1.474126118,5.185273581,16.5472776,33.08551424,2.224587785,0.599759579,18.94362021,21.93984089 -"Hospitalet de Llobregat, L'",629.336224,2.738342,0.060023,2.476622,0.017624,5.836993,37.187806,4.335084,1.271181,0.83472,0.705742,1.223298,0.669583,0.806562,0.588282,10485,0.996065,254.110685,24766.22439,1.043084,1.294451,1.26153,0.341749,0.361694,1.096029,1.090425,0.596134,0.595362,14.031637,66.061117,857621.5779,1362.739893,11726,269382,130289,139093,126843,112105,102031,1429,8605,1.007766588,2.463846665,20.79592604,37.89075394,0.397821571,0.812028081,32.37917847,4.252678643 -Huelva,723.771461,6.668003,0.072836,8.179238,0.018571,5.920937,61.218199,4.433064,1.105321,0.672091,0.777305,1.50195,0.784593,0.888348,0.722285,9937,0.988699,88.488862,81792.38016,1.035522,1.264199,1.227771,0.355944,0.318338,1.090463,1.078018,0.532601,0.516791,9.983486,64.017992,799234.623,1104.263799,11310,143837,69043,74794,98494,67752,55255,3979,8507,1.639460011,9.137241276,30.97576606,22.86379262,0.863142065,0.446594343,23.24439887,10.82960476 -Huesca,388.279842,2.759404,0.100931,1.452965,0.036755,8.069576,30.759632,6.25183,2.06037,0.82612,0.72376,1.286486,0.727665,0.827154,0.618669,3847,0.996258,267.232724,14529.65173,1.045864,1.280859,1.231216,0.277026,0.231976,1.084043,1.073433,0.552099,0.534035,11.90866,46.307333,424146.7461,1092.373852,13630,53956,25702,28254,36057,28310,20786,2615,4883,1.476991681,8.280800709,14.77907294,43.77816207,0.317369712,0.766063212,21.92581989,8.675719798 -Ja閚,742.615915,2.867485,0.057688,2.734006,0.013795,5.840019,36.917021,4.181158,1.077273,0.634194,0.793492,1.577414,0.796878,0.906848,0.758576,12873,0.996318,271.621896,27340.06078,1.039976,1.250475,1.220862,0.375607,0.37575,1.087279,1.079303,0.536592,0.526757,7.97694,75.435438,964115.2493,1298.269037,12495,112757,54244,58513,82789,54642,43258,4123,7247,1.188939386,8.65018791,22.22008346,22.82412436,0.524742753,0.530847461,20.82115597,23.2399187 -Jerez de la Frontera,2182.981449,4.669319,0.078415,9.203425,0.015923,5.986853,53.919284,4.123398,0.366471,0.710242,0.76436,1.445449,0.770671,0.873554,0.695114,27839,0.995784,237.192296,92034.2474,1.039824,1.262895,1.22998,0.346752,0.345283,1.087721,1.078811,0.391348,0.532329,11.581823,102.536358,2336904.637,1070.510535,10049,213105,104086,109019,156740,92037,77378,4424,10213,0.942903803,13.89946354,12.21691798,23.12638675,0.818010371,0.540065787,11.9141192,36.54213256 -"Palmas de Gran Canaria, Las",1576.635765,2.406659,0.0416,2.056654,0.013085,4.793442,28.428436,3.592223,0.50741,0.687819,0.770551,1.472074,0.782022,0.880629,0.707918,37900,0.998696,766.602228,20566.54295,1.059347,1.226561,1.197519,0.367684,0.375381,1.080615,1.072834,0.523458,0.506516,14.176409,130.947028,2516826.563,1596.327205,12509,381223,183905,197318,275061,182930,144987,8299,29604,2.334813533,6.776839401,17.48749089,11.97842239,0.5785812,0.971454562,30.24429742,29.6281006 -Legan閟,1189.409657,12.402124,0.143251,21.368705,0.025194,7.908193,85.224575,5.015887,0.672603,0.665155,0.779888,1.513618,0.776091,0.8913,0.727896,8303,0.982034,55.661289,213687.0481,1.02423,1.272742,1.225505,0.286329,0.293514,1.084037,1.071646,0.548894,0.527147,8.331189,57.201862,903347.5826,759.492389,12584,191114,92573,98541,111070,75525,70949,651,3914,0.644739285,23.15812653,19.62997727,30.14009363,0.586674277,0.556979416,17.66001629,7.623393303 -Le髇,940.397089,4.102869,0.093955,5.121285,0.0226,6.788706,64.186164,4.76932,0.850704,0.687343,0.768489,1.46313,0.770071,0.878274,0.703617,10009,0.994554,183.625222,51212.84974,1.070044,1.239692,1.19268,0.296222,0.289311,1.075591,1.066118,0.518728,0.497454,11.487682,65.560972,934193.8255,993.40357,14434,124028,56191,67837,78774,78776,57490,5889,15346,2.221450988,3.365608961,24.46232624,28.84845073,0.622502926,1.212641379,26.53995649,12.72706229 -Lleida,1021.769176,2.504677,0.112307,3.573076,0.024872,7.461297,45.294329,5.065696,0.782956,0.706056,0.764,1.443922,0.756322,0.873142,0.69438,9098,0.996503,285.963493,35730.7559,1.040533,1.257449,1.207681,0.29428,0.280211,1.080504,1.070304,0.545342,0.524194,12.846056,62.381167,932638.8447,912.768624,13303,140403,69541,70862,94148,66454,55792,3167,7456,0.59529133,7.719892586,17.73560412,35.09503345,0.290183155,0.618889635,23.06261972,14.88248601 -Logro駉,852.032302,2.157928,0.158311,2.262931,0.043755,9.731187,44.4711,6.557399,0.938932,0.761619,0.738439,1.341086,0.717721,0.84393,0.644926,5382,0.997344,376.517135,22629.31015,1.030207,1.251819,1.203456,0.234857,0.206249,1.072616,1.060387,0.531431,0.508794,9.947848,52.172066,706098.2178,828.722357,13791,152485,72341,80144,93436,77682,61904,4106,11640,1.138860474,12.01304102,15.42243682,34.77140623,0.248709409,0.176393749,31.65587283,4.57327947 -Lugo,643.117386,2.530207,0.066575,1.534823,0.022438,7.14451,28.018985,5.367742,1.243941,0.668627,0.781168,1.519453,0.797964,0.892764,0.730703,9660,0.997613,419.017367,15348.22747,1.091946,1.336751,1.286445,0.31638,0.331624,1.101408,1.092393,0.571794,0.621237,10.901138,77.274943,915007.7323,1422.769391,12992,98519,45568,52951,71727,59647,40227,7026,12370,2.131199021,9.270127611,15.8464165,20.82275622,0.731994035,0.702100673,33.53081465,16.96459129 -Madrid,643.117386,2.530207,0.066575,1.534823,0.022438,7.14451,28.018985,5.367742,0.058042,0.663808,0.782504,1.525577,0.806105,0.894291,0.733647,123098,0.998446,643.361666,214237.1569,1.038102,1.25444,1.205531,0.301839,0.274237,inf,1.06819,0.350863,0.504962,10.248382,219.605314,12057792.36,874.818229,17059,3334730,1553899,1780831,2012707,1531490,1320531,57325,153101,0.786506149,13.96393448,27.12639268,13.70369557,0.909222196,3.08618411,30.89636255,9.527702268 -M醠aga,643.117386,2.530207,0.066575,1.534823,0.022438,7.14451,28.018985,5.367742,0.231482,0.676318,0.776218,1.497082,0.791311,0.887106,0.719944,46248,0.999177,1215.723799,28427.48739,1.045858,1.236386,1.195609,0.33595,0.313932,1.077701,1.068645,0.521593,0.501037,10.104268,144.608844,3989460.882,1154.358797,11246,578460,277789,300671,420160,254736,211358,14890,28410,1.864574895,5.110216948,18.10596646,18.32285617,1.004683791,1.346577432,21.18167968,33.06344462 -Marbella,3168.727803,2.135795,0.178239,5.418642,0.063617,10.20654,49.245705,8.446928,0.252467,0.998898,0.665668,1.09562,0.640649,0.760763,0.526882,17778,0.99829,584.782661,54186.41859,1.054742,1.250681,1.185366,0.247345,0.169418,1.070926,1.056655,0.211107,0.497154,10.688337,97.523648,2526590.195,797.351603,9993,147633,70833,76800,128691,86716,48791,26231,11678,0.62215373,0.984625537,19.57024139,5.143135168,2.663106728,0.194925208,20.96231564,49.8594966 -Matar,594.463594,5.236858,0.051442,2.462405,0.015254,5.555791,29.73569,4.219238,1.345751,0.637263,0.793009,1.575078,0.813828,0.906296,0.757452,11556,0.995858,241.415842,24624.05073,1.015978,1.350681,1.312535,0.369457,0.403057,1.114026,1.104583,0.631793,0.645094,16.693761,71.813266,920497.1209,1548.449948,12201,129661,64476,65185,82443,54329,47081,1272,5957,1.037829449,4.016876473,17.27686117,19.15065419,6.103669416,0.467122565,26.58451317,25.36247357 -Melilla,401.005097,6.104528,0.041811,3.416999,0.00939,4.313018,40.89123,3.119782,1.994987,0.639054,0.79249,1.572577,0.793978,0.905703,0.75625,9591,0.991479,117.35592,34169.99307,1.070934,1.19913,1.157815,0.444843,0.437038,1.076771,1.067197,0.480239,0.446324,9.21604,60.528787,542851.8849,1353.728142,11665,87076,44162,42914,72353,26246,24666,200,1367,1.016856341,28.23870996,14.3167599,14.04272964,0.54393563,0.223297634,18.79531409,22.8223968 -M髎toles,882.374697,4.427847,0.109857,4.601716,0.029115,8.071251,45.552518,5.88974,0.906644,0.642828,0.789963,1.560473,0.793922,0.902815,0.750429,8032,0.994785,191.749053,46017.16061,1.017083,1.297285,1.263912,0.283876,0.264516,1.09021,1.080007,0.57319,0.570475,7.604313,62.325516,866535.2921,982.049118,12064,210309,102307,108002,127220,79599,74951,1649,2992,0.668677476,9.202293212,24.09781895,26.79997114,0.492700331,1.065648059,20.46323557,17.20965526 -Murcia,882.374697,4.427847,0.109857,4.601716,0.029115,8.071251,45.552518,5.88974,0.188628,0.735037,0.75396,1.402262,0.751272,0.861669,0.674345,57688,0.998245,569.772079,74436.04097,1.047906,1.268026,1.221349,0.317831,0.307044,1.086123,1.074292,-153655.9954,0.531412,11.48254,172.658116,5086644.201,1199.352739,11778,459403,224929,234474,358228,207618,156916,14611,36044,1.037802889,7.476055071,13.77790411,22.31853906,0.541381515,0.346356319,16.53097129,37.97098974 -Ourense,1072.807433,1.343457,0.06342,1.306178,0.023652,6.570052,25.322967,5.014611,0.745707,1.208704,0.622217,0.973436,0.603775,0.711105,0.468124,16916,0.998782,821.333532,13061.77565,1.096012,1.280935,1.224703,0.326494,0.291726,1.087607,1.077016,0.56419,0.543765,6.478429,103.781409,1541938.731,1437.293109,13037,105643,47891,57752,73491,67104,43805,8707,14561,0.260536632,2.458586705,9.937318601,13.04664616,0.178122256,0.311044864,17.17025435,56.63749043 -Oviedo,981.507459,3.520464,0.11599,2.993032,0.032942,8.17277,37.143082,5.842881,0.815073,0.764665,0.737757,1.338482,0.705927,0.84315,0.643674,8462,0.996951,327.930774,29930.32486,1.057826,1.254438,1.205068,0.265896,0.243552,1.35325,1.065915,0.538619,0.514612,9.230418,66.354134,950174.9494,968.077156,14657,219910,101749,118161,133254,123730,97821,7283,18582,0.507020142,1.795108367,22.89803533,20.76770152,0.418780422,0.514738154,37.80021051,15.29840555 -Palencia,624.655113,3.783919,0.103471,3.707235,0.021156,7.332732,55.588778,4.845938,1.280707,0.737545,0.750623,1.388791,0.731803,0.857855,0.667867,6037,0.994065,168.496219,37072.35186,1.043739,1.245576,1.201339,0.295989,0.296904,0.661278,1.068949,0.549911,0.518849,9.575122,50.37555,602511.9897,964.551441,13662,78144,36875,41269,51281,44610,32691,4745,7137,1.093943754,4.112275441,20.23707835,37.20466967,0.20911061,0.214805186,18.47343359,18.4546834 -Palma de Mallorca,2946.774052,3.279668,0.083951,4.930098,0.028243,7.128906,37.14125,5.237095,0.271483,0.683856,0.773189,1.483637,0.791149,0.883644,0.713479,35101,0.998327,597.711104,49300.97558,1.027479,1.254301,1.214899,0.269904,0.262725,1.078541,1.06891,0.532558,0.529751,15.600723,125.961797,3460655.326,1174.387742,12998,422587,206654,215933,360254,182244,159316,6521,16349,0.927676765,6.242174717,16.01056755,13.67968868,3.027674339,0.815612142,26.73804607,32.55855973 -Parla,633.212934,19.994841,0.096822,27.703034,0.019997,6.856993,109.147215,5.466245,1.263398,0.675798,0.776232,1.497147,0.791741,0.887123,0.719976,6540,0.95625,22.857169,277030.3385,1.043017,1.300935,1.28017,0.329106,0.336968,1.096462,1.092514,0.426996,0.608534,7.905136,47.406647,578738.9117,913.972031,9803,133482,66524,66958,66997,47097,43712,576,2805,2.334117454,5.324166237,19.0690841,32.24693509,0.377942902,1.990844833,24.28335942,14.37354996 -Pontevedra,604.755716,6.60961,0.085454,3.423965,0.035801,7.74214,38.137105,6.265001,1.322848,0.770596,0.735808,1.331078,0.710126,0.840923,0.640113,7077,0.994338,176.62441,34239.64548,1.090859,1.296588,1.230306,0.284937,0.243004,1.086863,1.07469,0.521571,0.548893,10.560709,69.490392,753826.0941,1246.496848,13047,83260,39344,43916,59044,39568,30946,1726,6886,0.681221751,1.489442517,14.01950383,23.7660659,1.078373903,0.428354474,20.23446423,38.3025734 -Sabadell,1090.068472,2.090116,0.043296,1.570535,0.013927,5.171963,27.855174,4.082364,0.733899,0.673577,0.773694,1.485865,0.764465,0.884221,0.71455,25177,0.998559,694.074811,15705.34551,1.083084,1.415343,1.360043,0.415406,0.435982,1.133652,1.122543,-123991.3169,0.687683,17.994628,107.511781,1910152.83,1752.323712,13520,216520,105388,111132,139550,93237,81596,1423,10184,1.392173289,2.515662469,19.89253037,23.5695134,1.087194583,0.598761951,26.95004489,23.99411905 -Salamanca,885.137174,3.267961,0.094073,4.048108,0.020095,6.581246,46.490574,4.532603,0.903815,0.714124,0.756183,1.411336,0.745841,0.864209,0.678709,9409,0.995427,218.654554,40481.07659,1.051635,1.257037,1.21058,0.312493,0.306654,1.082017,1.07135,0.549894,0.51992,11.288692,64.237809,882897.217,997.469367,13185,144825,65661,79164,79554,92491,64588,14907,12954,0.761127537,7.018541555,29.25256781,26.09774571,1.115083177,0.421178931,28.29401661,7.03973867 -San Crist骲al de La Laguna,1438.04495,0.814449,0.052825,1.018972,0.017127,6.116619,24.412439,4.596158,0.556311,0.932454,0.69106,1.174608,0.681847,0.789783,0.564867,27223,0.999291,1411.271008,10189.7151,1.082293,1.253427,1.205466,0.331662,0.335698,1.082882,1.072537,0.531807,0.518625,12.33727,123.810552,2160385.8,1502.307561,11481,158911,76703,82208,129699,71233,57555,4045,9621,0.925674134,3.908889416,11.64032344,15.55077719,0.533987354,0.243181842,18.80216244,48.39500419 -Santa Coloma de Gramenet,275.938786,9.182635,0.041476,2.952744,0.020184,5.446041,29.299202,4.79812,2.899194,0.850595,0.708782,1.233682,0.689198,0.810036,0.593276,6653,0.989299,93.451637,29527.44262,1.04622,1.254024,1.20779,0.264173,0.230264,1.076411,1.065345,0.536362,0.516802,16.338627,55.920871,510347.0832,1849.493832,10799,120443,59527,60916,57124,48527,45646,498,2371,2.440079648,2.179385759,25.5793274,8.273807802,0.314589044,0.278266951,43.15656515,17.77797824 -Santa Cruz de Tenerife,1017.423254,1.543634,0.048066,1.004617,0.013195,5.551545,24.463683,4.069826,0.7863,0.727781,0.750957,1.390129,0.728807,0.858236,0.668511,21167,0.999013,1012.747587,10046.16814,1.049767,1.235287,1.199565,0.374101,0.374048,1.082768,1.072958,0.532703,0.516691,9.025349,106.397217,1496086.987,1470.466672,12547,209194,99840,109354,176209,96313,78572,5125,12593,0.739465334,2.971492933,14.5187984,19.0880732,0.413263605,0.604738863,32.26989874,29.39426893 -Santander,1216.584547,3.359808,0.123049,3.951181,0.032875,8.443763,45.332585,5.853981,0.657579,0.672155,0.777239,1.501657,0.778447,0.888274,0.722145,9887,0.996752,307.904062,39511.80574,1.069452,1.275037,1.213704,0.273275,0.244573,1.080693,1.068258,0.556365,0.5255,13.140323,72.304405,1161190.899,954.467901,14202,173375,79936,93439,117011,92442,73395,8286,10742,0.771991578,6.903087829,19.01689502,17.76517482,0.843979824,0.698586902,31.63147502,22.368809 -Segovia,550.996736,3.749205,0.102359,2.840016,0.024102,7.334539,41.711668,4.959739,1.451914,0.616999,0.802926,1.624175,0.815801,0.91763,0.781063,5383,0.994846,194.011864,28400.15682,1.064557,1.273825,1.215704,0.302834,0.295236,1.083889,1.070855,0.54725,0.518428,11.974658,50.979095,549244.4213,996.819737,13441,52057,24328,27729,36270,28894,21254,2649,4972,0.628881795,16.82703858,15.53102878,23.03909757,2.210847803,0.167142189,25.54959821,16.04636507 -Sevilla,550.996736,3.749205,0.102359,2.840016,0.024102,7.334539,41.711668,4.959739,0.214436,0.615403,0.803449,1.626831,0.829605,0.918227,0.782341,57878,0.997208,358.230217,104142.886,1.035909,1.267778,1.219951,0.450273,0.389583,1.096375,1.081786,0.340574,0.513197,13.41208,139.26766,4097269.579,1098.253798,12490,691395,328013,363382,485440,337303,268435,20612,48178,1.465233613,14.4821703,21.98988141,23.33898421,1.35333236,1.202196674,23.91609028,12.25211116 -Soria,379.935971,4.003721,0.110511,2.281415,0.033063,8.137307,34.455219,5.682173,2.105618,0.749789,0.745836,1.369775,0.737823,0.852384,0.658723,3438,0.993995,166.535215,22814.152,1.04622,1.254024,1.20779,0.264173,0.230264,1.076411,1.065345,0.536362,0.516802,9.009657,41.858176,374902.5855,986.752016,14573,39821,18788,21033,29588,23597,15965,3645,3977,1.02237651,3.16006562,16.31025216,37.93921379,0.674618665,0.947700241,23.57955247,16.36622055 -Tarragona,1313.091165,5.279116,0.138017,2.281415,0.040985,8.785371,55.511952,6.577365,0.609249,0.629911,0.797619,1.597604,0.823623,0.911565,0.768285,9514,0.994988,199.540585,65805.71883,1.057172,1.259214,1.206575,0.263796,0.216383,1.076287,1.065068,0.54812,0.528677,14.707599,65.374082,1138688.585,867.181667,13860,136496,66666,69830,94004,65723,52391,5548,7772,0.694231218,12.17989777,15.55301158,23.39023286,1.162169448,2.463067373,15.72582008,28.83156967 -Terrassa,1253.75102,1.919933,0.049161,1.306412,0.01567,5.899534,23.790631,4.719074,0.638085,0.713778,0.757116,1.41517,0.749488,0.865275,0.680553,25503,0.998958,959.690368,13064.12008,1.035035,1.404373,1.348658,0.384417,0.417365,1.12768,1.119531,0.653962,0.675532,17.736428,111.673236,2090006.008,1667.002438,13243,223627,109948,113679,149620,101739,83249,2544,15908,2.146106915,4.494514233,9.861473011,27.15666178,0.459974344,0.44176089,23.71182654,31.72768228 -Teruel,519.992981,4.61585,0.087497,2.474775,0.020931,6.973207,34.569297,4.802547,1.538482,0.79297,0.728547,1.303966,0.708345,0.832625,0.627075,5943,0.995241,210.117254,24747.75254,1.040073,1.276532,1.227719,0.326296,0.311237,1.088724,1.077512,0.561993,0.539894,9.270194,56.084285,566438.277,1089.319082,14090,36240,17287,18953,28945,19022,13218,2755,3026,0.612204498,3.333225824,14.05755242,39.63495541,1.050809995,0.119447027,14.07670123,27.11510359 -Toledo,1699.657512,13.068388,0.217654,34.804116,0.022233,8.69961,110.391413,4.81292,0.470683,0.735045,0.756789,1.413825,0.765116,0.864902,0.679906,7809,0.979523,48.834958,348041.156,1.02563,1.259621,1.216064,0.311901,0.295576,1.084151,1.07396,-55179.7298,0.520097,10.888881,50.290169,946247.6269,556.728411,14326,85811,40941,44870,60442,37923,31386,2464,4030,0.314278985,16.67185861,17.11969161,15.40663401,1.700557438,0.645446531,8.880285029,39.26124778 -Torrej髇 de Ardoz,2003.737494,66.928393,0.339099,898.453967,0.024424,8.297531,821.120921,5.573816,0.399254,2.307808,0.45807,0.612619,0.471648,0.523509,0.294608,5909,0.551611,2.230206,8984539.673,1.046314,1.279729,1.224598,0.28411,0.283542,1.084836,1.072528,0.559403,0.53502,9.017763,31.137722,655656.7445,327.216887,12188,132853,65343,67510,86439,49216,44581,661,3967,0.300058007,72.04759211,4.894815203,11.73851288,0.234591027,0.22512086,7.597285006,2.962024905 -Valencia,2164.215491,2.51541,0.065111,4.506163,0.024895,6.216411,38.408851,4.992987,0.369649,0.871006,0.710258,1.238763,0.723575,0.811723,0.595719,33239,0.997918,480.279041,45061.62681,1.042692,1.257228,1.213188,0.306693,0.276416,1.079785,1.070835,0.54507,0.5255,14.618656,125.303941,2864155.963,1323.415332,13873,800215,380293,419922,513747,419994,328979,33757,57193,2.379025634,4.969881312,20.81178784,15.18235964,0.892344167,0.835997541,46.08362235,8.844981523 -Valladolid,2764.692882,2.285334,0.170702,9.992102,0.028855,8.463257,74.230483,5.39219,0.289363,0.708775,0.762939,1.439438,0.757313,0.87193,0.692223,16196,0.996386,276.687811,99921.02196,1.029885,1.260936,1.224576,0.297016,0.25854,2.406995,1.07222,0.554657,0.537779,10.983938,75.829594,1873658.813,677.709566,14247,299265,140247,159018,182319,158259,129151,10260,18757,0.554565569,6.484726891,20.23269672,35.57945567,0.798932155,0.459606087,19.95669411,15.93332281 -Vigo,2673.672921,4.253406,0.076896,7.653004,0.03715,7.448687,40.795189,6.35969,0.299214,0.92435,0.691404,1.175722,0.67477,0.790176,0.565403,34770,0.997138,349.362521,76530.04423,1.109125,1.296248,1.232625,0.278694,0.239418,1.086155,1.074478,0.565968,0.551518,11.377132,151.527114,3592622.038,1343.702893,13164,296692,140164,156528,215138,143638,114455,10536,18588,0.428888023,3.261984294,15.13932598,20.63672613,0.557649427,0.404116363,11.82431516,47.74699461 -Zamora,551.778808,9.977708,0.075195,6.638329,0.018018,6.080244,52.258509,4.223122,1.449856,0.673657,0.774217,1.488181,0.758289,0.88482,0.715664,7338,0.987969,83.120135,66383.29042,1.047176,1.260158,1.217223,0.350769,0.330062,1.086805,1.076438,0.547378,0.528852,12.125267,55.257533,622534.684,1128.23232,12884,60988,28299,32689,41943,39079,27297,5092,6673,1.117132834,4.423835616,24.28850055,27.91758554,0.424894784,0.199479246,21.52089153,20.10767991 -Zaragoza,4188.64345,1.57618,0.148265,6.420273,0.028926,8.365215,65.532054,5.374877,0.190993,0.765839,0.742138,1.35533,0.731288,0.848158,0.651776,28251,0.998467,652.408945,64202.72871,1.04527,1.245742,1.197868,0.269641,0.255703,1.074214,1.064998,0.036638,0.504216,12.7523,105.150657,3250830.304,776.105759,14220,681877,326103,355774,376213,327112,277588,14208,35136,0.759847791,7.166900881,14.00403742,39.66411238,0.305394384,0.273807024,23.48225866,14.34364147 diff --git a/escalar_variables.ipynb b/escalar_variables.ipynb deleted file mode 100644 index ca648e1..0000000 --- a/escalar_variables.ipynb +++ /dev/null @@ -1,4814 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ESCALADO DE CARACTER脥STICAS" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler,StandardScaler,RobustScaler,Normalizer,QuantileTransformer,PowerTransformer\n", - "from matplotlib import pyplot as plt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Desactivamos la notaci贸n cient铆fica" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "pd.set_option('display.float_format', '{:.2f}'.format)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Si es necesario podemos definir un diccionario definiendo el tipo de datos que deben tomar las variables al leer el archivo csv. " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "#dtypes = {'NP':float, 'RNMDP_2020':float,'PobT':float,'PobH':float,'PobM':float,'Vehiculos':float,\n", - "#'T_Viviendas':float,'T_Viv_Prin':float,'T_Viv_Sec':float,'Viv_vacias':float,'T_viv_col':float}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Leer el archivo de datos" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "datos = pd.read_csv(r'C:\\Users\\crist\\Documents\\GitHub\\manifolds\\clustering-cities-spain\\datos_metricas_socioeconomicos_porcentajes.csv', encoding = \"ISO-8859-1\"\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CiudadesTALPIAREA_MNAREA_AMAREA_MDGYRATE_MNGYRATE_AMGYRATE_MDPRD...T_Viv_SecViv_vaciasCOMED_SINGEQUIPINDOCIOOFIRES_PLURES_UNI
0Albacete1605.2933552.9294970.1164612.9764110.0315208.38082936.0575665.7102610.498351...5168150810.4450186.86510012.01865828.6876490.6989100.55346816.78948133.941716
1Alcal谩 de Henares1615.3041262.4510390.1364514.5591230.0198547.59146955.5838274.8741160.495263...125557981.55748911.25917119.28088341.8369200.8232731.89648413.9943149.351466
2Alcobendas1386.6263212.8829840.2894233.8438570.06967612.99898645.8867239.3200430.495263...56615130.77964910.60864816.11861912.7188880.4265584.77743714.14253840.427664
3Alcorc贸n787.0447923.2955280.1612143.0234670.0282059.64422343.8790076.0778001.016461...130334881.9172226.86428933.29560625.0218650.3696810.34460223.8368808.349856
4Algeciras1063.3190192.0449080.0633002.6298190.0133415.46098338.0653133.6870460.752361...470066861.1551692.04992113.42054514.5247730.8643480.35506710.73480756.895370
..................................................................
67Valencia2164.2154912.5154100.0651114.5061630.0248956.21641138.4088514.9929870.369649...33757571932.3790264.96988120.81178815.1823600.8923440.83599846.0836228.844982
68Valladolid2764.6928822.2853340.1707029.9921020.0288558.46325774.2304835.3921900.289363...10260187570.5545666.48472720.23269735.5794560.7989320.45960619.95669415.933323
69Vigo2673.6729214.2534060.0768967.6530040.0371507.44868740.7951896.3596900.299214...10536185880.4288883.26198415.13932620.6367260.5576490.40411611.82431547.746995
70Zamora551.7788089.9777080.0751956.6383290.0180186.08024452.2585094.2231221.449856...509266731.1171334.42383624.28850127.9175860.4248950.19947921.52089220.107680
71Zaragoza4188.6434501.5761800.1482656.4202730.0289268.36521565.5320545.3748770.190993...14208351360.7598487.16690114.00403739.6641120.3053940.27380723.48225914.343641
\n", - "

72 rows 脳 49 columns

\n", - "
" - ], - "text/plain": [ - " Ciudades TA LPI AREA_MN AREA_AM AREA_MD \\\n", - "0 Albacete 1605.293355 2.929497 0.116461 2.976411 0.031520 \n", - "1 Alcal谩 de Henares 1615.304126 2.451039 0.136451 4.559123 0.019854 \n", - "2 Alcobendas 1386.626321 2.882984 0.289423 3.843857 0.069676 \n", - "3 Alcorc贸n 787.044792 3.295528 0.161214 3.023467 0.028205 \n", - "4 Algeciras 1063.319019 2.044908 0.063300 2.629819 0.013341 \n", - ".. ... ... ... ... ... ... \n", - "67 Valencia 2164.215491 2.515410 0.065111 4.506163 0.024895 \n", - "68 Valladolid 2764.692882 2.285334 0.170702 9.992102 0.028855 \n", - "69 Vigo 2673.672921 4.253406 0.076896 7.653004 0.037150 \n", - "70 Zamora 551.778808 9.977708 0.075195 6.638329 0.018018 \n", - "71 Zaragoza 4188.643450 1.576180 0.148265 6.420273 0.028926 \n", - "\n", - " GYRATE_MN GYRATE_AM GYRATE_MD PRD ... T_Viv_Sec Viv_vacias \\\n", - "0 8.380829 36.057566 5.710261 0.498351 ... 5168 15081 \n", - "1 7.591469 55.583827 4.874116 0.495263 ... 1255 5798 \n", - "2 12.998986 45.886723 9.320043 0.495263 ... 566 1513 \n", - "3 9.644223 43.879007 6.077800 1.016461 ... 1303 3488 \n", - "4 5.460983 38.065313 3.687046 0.752361 ... 4700 6686 \n", - ".. ... ... ... ... ... ... ... \n", - "67 6.216411 38.408851 4.992987 0.369649 ... 33757 57193 \n", - "68 8.463257 74.230483 5.392190 0.289363 ... 10260 18757 \n", - "69 7.448687 40.795189 6.359690 0.299214 ... 10536 18588 \n", - "70 6.080244 52.258509 4.223122 1.449856 ... 5092 6673 \n", - "71 8.365215 65.532054 5.374877 0.190993 ... 14208 35136 \n", - "\n", - " COM ED_SING EQUIP IND OCIO OFI RES_PLU \\\n", - "0 0.445018 6.865100 12.018658 28.687649 0.698910 0.553468 16.789481 \n", - "1 1.557489 11.259171 19.280883 41.836920 0.823273 1.896484 13.994314 \n", - "2 0.779649 10.608648 16.118619 12.718888 0.426558 4.777437 14.142538 \n", - "3 1.917222 6.864289 33.295606 25.021865 0.369681 0.344602 23.836880 \n", - "4 1.155169 2.049921 13.420545 14.524773 0.864348 0.355067 10.734807 \n", - ".. ... ... ... ... ... ... ... \n", - "67 2.379026 4.969881 20.811788 15.182360 0.892344 0.835998 46.083622 \n", - "68 0.554566 6.484727 20.232697 35.579456 0.798932 0.459606 19.956694 \n", - "69 0.428888 3.261984 15.139326 20.636726 0.557649 0.404116 11.824315 \n", - "70 1.117133 4.423836 24.288501 27.917586 0.424895 0.199479 21.520892 \n", - "71 0.759848 7.166901 14.004037 39.664112 0.305394 0.273807 23.482259 \n", - "\n", - " RES_UNI \n", - "0 33.941716 \n", - "1 9.351466 \n", - "2 40.427664 \n", - "3 8.349856 \n", - "4 56.895370 \n", - ".. ... \n", - "67 8.844982 \n", - "68 15.933323 \n", - "69 47.746995 \n", - "70 20.107680 \n", - "71 14.343641 \n", - "\n", - "[72 rows x 49 columns]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "datos" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Visualizaci贸n de la distribuci贸n de los datos en forma de tabla" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countmeanstdmin25%50%75%max
TA72.01.318404e+039.184625e+02275.938786643.1173861.042544e+031.636392e+034.436371e+03
LPI72.04.985617e+008.019584e+000.8144492.4851683.304187e+004.474848e+006.692839e+01
AREA_MN72.09.916672e-025.319897e-020.0414760.0647798.647550e-021.161077e-013.390990e-01
AREA_AM72.01.743274e+011.054410e+021.0046172.4171583.420482e+004.977895e+008.984540e+02
AREA_MD72.02.392935e-021.060419e-020.0090160.0168292.243800e-022.839600e-026.967600e-02
GYRATE_MN72.07.094323e+001.488582e+004.3130186.0568967.051056e+008.128589e+001.299899e+01
GYRATE_AM72.05.511094e+019.310961e+0123.79063134.9936184.084321e+015.114438e+018.211209e+02
GYRATE_MD72.05.008917e+001.050683e+003.0441264.2736454.959739e+005.491379e+009.320043e+00
PRD72.08.524466e-015.863966e-010.0580420.4018437.398030e-011.175061e+002.899194e+00
SHDI72.07.892292e-012.464732e-010.6154030.6721397.145465e-018.012575e-012.307808e+00
SIDI72.07.421337e-015.828422e-020.4580700.7281127.565280e-017.772555e-018.034490e-01
MSIDI72.01.375261e+001.911268e-010.6126191.3023691.412754e+001.501730e+001.626831e+00
SHEI72.07.417274e-016.331482e-020.4716480.7104887.568175e-017.857405e-018.296050e-01
SIEI72.08.481527e-016.661054e-020.5235090.8321288.646035e-018.882925e-019.182270e-01
MSIEI72.06.613607e-019.191256e-020.2946080.6263076.793910e-017.221800e-017.823410e-01
NP72.01.828046e+041.855340e+043438.0000007471.5000001.148450e+042.176175e+041.230980e+05
DIVISION72.09.889365e-015.260589e-020.5516110.9948319.964445e-019.981592e-019.992990e-01
SPLIT72.03.901311e+023.149477e+022.230206193.4461612.813257e+025.432779e+021.425579e+03
MESH72.01.797694e+051.053929e+0610046.16814025915.1982153.775546e+046.482004e+048.984540e+06
PAFRAC72.01.044594e+002.184804e-020.9951071.0289541.041373e+001.053471e+001.109125e+00
SHAPE_MN72.01.269265e+003.491183e-021.1991301.2506301.259809e+001.280878e+001.415343e+00
SHAPE_MD72.01.225296e+003.445610e-021.1578151.2053671.217750e+001.230854e+001.360043e+00
PARA_MN72.03.192130e-015.362411e-020.2051450.2825813.133425e-013.477562e-014.746200e-01
PARA_MD72.03.062550e-016.350343e-020.1646150.2614022.995145e-013.434830e-014.564660e-01
FRAC_MD72.01.075643e+001.208133e-021.0546221.0687631.072909e+001.080127e+001.122543e+00
SQUARE_MN72.0-9.754677e+034.918178e+04-369540.8625000.5215875.393655e-015.571245e-016.539620e-01
SQUARE_MD72.05.364690e-014.159291e-020.4463240.5161715.273470e-015.445828e-016.876830e-01
IJI72.01.129271e+012.676121e+006.4784299.2268241.084142e+011.289895e+011.799463e+01
LSI72.08.309622e+013.564757e+0131.13772256.0434317.146118e+011.027626e+022.196053e+02
TE72.01.685269e+061.701765e+06374902.585500750870.9684501.122460e+062.010358e+061.205779e+07
ED72.01.097312e+032.901204e+02327.216887934.7013511.079915e+031.298453e+031.849494e+03
RNMDP_202072.01.292796e+041.569903e+039803.00000011787.7500001.301750e+041.369425e+041.868400e+04
PobT72.02.620163e+054.322667e+0536240.000000110978.5000001.656475e+052.261322e+053.334730e+06
PobH72.01.251140e+052.023449e+0517287.00000053135.2500007.831950e+041.099655e+051.553899e+06
PobM72.01.369024e+052.299629e+0518953.00000058322.7500008.566500e+041.182155e+051.780831e+06
Vehiculos72.01.739258e+052.613792e+0528945.00000073206.5000001.137505e+051.611278e+052.012707e+06
T_Viviendas72.01.247685e+052.020095e+0519022.00000049043.7500007.660350e+041.139582e+051.531490e+06
T_Viv_Prin72.01.016010e+051.727884e+0513218.00000042075.7500006.274450e+048.881750e+041.320531e+06
T_Viv_Sec72.08.011514e+039.963596e+03200.0000002247.0000004.722500e+039.095250e+035.732500e+04
Viv_vacias72.01.511262e+042.143417e+041335.0000005256.7500009.251000e+031.601825e+041.531010e+05
COM72.01.142787e+005.816195e-010.2605370.6780861.037816e+001.469029e+002.440080e+00
ED_SING72.08.086762e+009.199959e+000.9846263.4834816.455425e+009.153504e+007.204759e+01
EQUIP72.01.799633e+015.528186e+004.89481514.3095201.719828e+012.100235e+013.329561e+01
IND72.02.391042e+011.016568e+015.14313515.9375402.284396e+012.872785e+015.231218e+01
OCIO72.09.576019e-018.744678e-010.1781220.4233667.654631e-011.115091e+006.103669e+00
OFI72.07.223227e-017.620613e-010.1194470.2771524.909304e-018.189812e-014.777437e+00
RES_PLU72.02.251444e+018.517932e+006.71609716.4104002.172336e+012.655110e+014.608362e+01
RES_UNI72.02.466934e+011.454853e+011.91552714.3660732.259231e+013.328301e+016.526608e+01
\n", - "
" - ], - "text/plain": [ - " count mean std min 25% \\\n", - "TA 72.0 1.318404e+03 9.184625e+02 275.938786 643.117386 \n", - "LPI 72.0 4.985617e+00 8.019584e+00 0.814449 2.485168 \n", - "AREA_MN 72.0 9.916672e-02 5.319897e-02 0.041476 0.064779 \n", - "AREA_AM 72.0 1.743274e+01 1.054410e+02 1.004617 2.417158 \n", - "AREA_MD 72.0 2.392935e-02 1.060419e-02 0.009016 0.016829 \n", - "GYRATE_MN 72.0 7.094323e+00 1.488582e+00 4.313018 6.056896 \n", - "GYRATE_AM 72.0 5.511094e+01 9.310961e+01 23.790631 34.993618 \n", - "GYRATE_MD 72.0 5.008917e+00 1.050683e+00 3.044126 4.273645 \n", - "PRD 72.0 8.524466e-01 5.863966e-01 0.058042 0.401843 \n", - "SHDI 72.0 7.892292e-01 2.464732e-01 0.615403 0.672139 \n", - "SIDI 72.0 7.421337e-01 5.828422e-02 0.458070 0.728112 \n", - "MSIDI 72.0 1.375261e+00 1.911268e-01 0.612619 1.302369 \n", - "SHEI 72.0 7.417274e-01 6.331482e-02 0.471648 0.710488 \n", - "SIEI 72.0 8.481527e-01 6.661054e-02 0.523509 0.832128 \n", - "MSIEI 72.0 6.613607e-01 9.191256e-02 0.294608 0.626307 \n", - "NP 72.0 1.828046e+04 1.855340e+04 3438.000000 7471.500000 \n", - "DIVISION 72.0 9.889365e-01 5.260589e-02 0.551611 0.994831 \n", - "SPLIT 72.0 3.901311e+02 3.149477e+02 2.230206 193.446161 \n", - "MESH 72.0 1.797694e+05 1.053929e+06 10046.168140 25915.198215 \n", - "PAFRAC 72.0 1.044594e+00 2.184804e-02 0.995107 1.028954 \n", - "SHAPE_MN 72.0 1.269265e+00 3.491183e-02 1.199130 1.250630 \n", - "SHAPE_MD 72.0 1.225296e+00 3.445610e-02 1.157815 1.205367 \n", - "PARA_MN 72.0 3.192130e-01 5.362411e-02 0.205145 0.282581 \n", - "PARA_MD 72.0 3.062550e-01 6.350343e-02 0.164615 0.261402 \n", - "FRAC_MD 72.0 1.075643e+00 1.208133e-02 1.054622 1.068763 \n", - "SQUARE_MN 72.0 -9.754677e+03 4.918178e+04 -369540.862500 0.521587 \n", - "SQUARE_MD 72.0 5.364690e-01 4.159291e-02 0.446324 0.516171 \n", - "IJI 72.0 1.129271e+01 2.676121e+00 6.478429 9.226824 \n", - "LSI 72.0 8.309622e+01 3.564757e+01 31.137722 56.043431 \n", - "TE 72.0 1.685269e+06 1.701765e+06 374902.585500 750870.968450 \n", - "ED 72.0 1.097312e+03 2.901204e+02 327.216887 934.701351 \n", - "RNMDP_2020 72.0 1.292796e+04 1.569903e+03 9803.000000 11787.750000 \n", - "PobT 72.0 2.620163e+05 4.322667e+05 36240.000000 110978.500000 \n", - "PobH 72.0 1.251140e+05 2.023449e+05 17287.000000 53135.250000 \n", - "PobM 72.0 1.369024e+05 2.299629e+05 18953.000000 58322.750000 \n", - "Vehiculos 72.0 1.739258e+05 2.613792e+05 28945.000000 73206.500000 \n", - "T_Viviendas 72.0 1.247685e+05 2.020095e+05 19022.000000 49043.750000 \n", - "T_Viv_Prin 72.0 1.016010e+05 1.727884e+05 13218.000000 42075.750000 \n", - "T_Viv_Sec 72.0 8.011514e+03 9.963596e+03 200.000000 2247.000000 \n", - "Viv_vacias 72.0 1.511262e+04 2.143417e+04 1335.000000 5256.750000 \n", - "COM 72.0 1.142787e+00 5.816195e-01 0.260537 0.678086 \n", - "ED_SING 72.0 8.086762e+00 9.199959e+00 0.984626 3.483481 \n", - "EQUIP 72.0 1.799633e+01 5.528186e+00 4.894815 14.309520 \n", - "IND 72.0 2.391042e+01 1.016568e+01 5.143135 15.937540 \n", - "OCIO 72.0 9.576019e-01 8.744678e-01 0.178122 0.423366 \n", - "OFI 72.0 7.223227e-01 7.620613e-01 0.119447 0.277152 \n", - "RES_PLU 72.0 2.251444e+01 8.517932e+00 6.716097 16.410400 \n", - "RES_UNI 72.0 2.466934e+01 1.454853e+01 1.915527 14.366073 \n", - "\n", - " 50% 75% max \n", - "TA 1.042544e+03 1.636392e+03 4.436371e+03 \n", - "LPI 3.304187e+00 4.474848e+00 6.692839e+01 \n", - "AREA_MN 8.647550e-02 1.161077e-01 3.390990e-01 \n", - "AREA_AM 3.420482e+00 4.977895e+00 8.984540e+02 \n", - "AREA_MD 2.243800e-02 2.839600e-02 6.967600e-02 \n", - "GYRATE_MN 7.051056e+00 8.128589e+00 1.299899e+01 \n", - "GYRATE_AM 4.084321e+01 5.114438e+01 8.211209e+02 \n", - "GYRATE_MD 4.959739e+00 5.491379e+00 9.320043e+00 \n", - "PRD 7.398030e-01 1.175061e+00 2.899194e+00 \n", - "SHDI 7.145465e-01 8.012575e-01 2.307808e+00 \n", - "SIDI 7.565280e-01 7.772555e-01 8.034490e-01 \n", - "MSIDI 1.412754e+00 1.501730e+00 1.626831e+00 \n", - "SHEI 7.568175e-01 7.857405e-01 8.296050e-01 \n", - "SIEI 8.646035e-01 8.882925e-01 9.182270e-01 \n", - "MSIEI 6.793910e-01 7.221800e-01 7.823410e-01 \n", - "NP 1.148450e+04 2.176175e+04 1.230980e+05 \n", - "DIVISION 9.964445e-01 9.981592e-01 9.992990e-01 \n", - "SPLIT 2.813257e+02 5.432779e+02 1.425579e+03 \n", - "MESH 3.775546e+04 6.482004e+04 8.984540e+06 \n", - "PAFRAC 1.041373e+00 1.053471e+00 1.109125e+00 \n", - "SHAPE_MN 1.259809e+00 1.280878e+00 1.415343e+00 \n", - "SHAPE_MD 1.217750e+00 1.230854e+00 1.360043e+00 \n", - "PARA_MN 3.133425e-01 3.477562e-01 4.746200e-01 \n", - "PARA_MD 2.995145e-01 3.434830e-01 4.564660e-01 \n", - "FRAC_MD 1.072909e+00 1.080127e+00 1.122543e+00 \n", - "SQUARE_MN 5.393655e-01 5.571245e-01 6.539620e-01 \n", - "SQUARE_MD 5.273470e-01 5.445828e-01 6.876830e-01 \n", - "IJI 1.084142e+01 1.289895e+01 1.799463e+01 \n", - "LSI 7.146118e+01 1.027626e+02 2.196053e+02 \n", - "TE 1.122460e+06 2.010358e+06 1.205779e+07 \n", - "ED 1.079915e+03 1.298453e+03 1.849494e+03 \n", - "RNMDP_2020 1.301750e+04 1.369425e+04 1.868400e+04 \n", - "PobT 1.656475e+05 2.261322e+05 3.334730e+06 \n", - "PobH 7.831950e+04 1.099655e+05 1.553899e+06 \n", - "PobM 8.566500e+04 1.182155e+05 1.780831e+06 \n", - "Vehiculos 1.137505e+05 1.611278e+05 2.012707e+06 \n", - "T_Viviendas 7.660350e+04 1.139582e+05 1.531490e+06 \n", - "T_Viv_Prin 6.274450e+04 8.881750e+04 1.320531e+06 \n", - "T_Viv_Sec 4.722500e+03 9.095250e+03 5.732500e+04 \n", - "Viv_vacias 9.251000e+03 1.601825e+04 1.531010e+05 \n", - "COM 1.037816e+00 1.469029e+00 2.440080e+00 \n", - "ED_SING 6.455425e+00 9.153504e+00 7.204759e+01 \n", - "EQUIP 1.719828e+01 2.100235e+01 3.329561e+01 \n", - "IND 2.284396e+01 2.872785e+01 5.231218e+01 \n", - "OCIO 7.654631e-01 1.115091e+00 6.103669e+00 \n", - "OFI 4.909304e-01 8.189812e-01 4.777437e+00 \n", - "RES_PLU 2.172336e+01 2.655110e+01 4.608362e+01 \n", - "RES_UNI 2.259231e+01 3.328301e+01 6.526608e+01 " - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "datos.describe().T" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "La variable Frac_mn tiene una dato con valor infinito, por lo que eliminaremos la variable usando el m茅todo drop." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "datos = datos.drop(columns='FRAC_MN')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "La libreria sklean proporciona una variedad de m茅todos de escalado y normalizaci贸n. Mayor informaci贸n sobre cada m茅todo se puede consultar en la [documentaci贸n de sklearn](https://scikit-learn.org/stable/modules/preprocessing.html)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Creamos un objeto para cada m茅todo importado." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "MinMax = MinMaxScaler()\n", - "Maxabs = MaxAbsScaler()\n", - "Std = StandardScaler()\n", - "Rscaler = RobustScaler()\n", - "Normalizer = Normalizer()\n", - "QuantileT = QuantileTransformer()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Indexaci贸n del dataframe excluyendo la variable ciudades que es de tipo string" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countmeanstdmin25%50%75%max
TA72.001318.40918.46275.94643.121042.541636.394436.37
LPI72.004.998.020.812.493.304.4766.93
AREA_MN72.000.100.050.040.060.090.120.34
AREA_AM72.0017.43105.441.002.423.424.98898.45
AREA_MD72.000.020.010.010.020.020.030.07
GYRATE_MN72.007.091.494.316.067.058.1313.00
GYRATE_AM72.0055.1193.1123.7934.9940.8451.14821.12
GYRATE_MD72.005.011.053.044.274.965.499.32
PRD72.000.850.590.060.400.741.182.90
SHDI72.000.790.250.620.670.710.802.31
SIDI72.000.740.060.460.730.760.780.80
MSIDI72.001.380.190.611.301.411.501.63
SHEI72.000.740.060.470.710.760.790.83
SIEI72.000.850.070.520.830.860.890.92
MSIEI72.000.660.090.290.630.680.720.78
NP72.0018280.4618553.403438.007471.5011484.5021761.75123098.00
DIVISION72.000.990.050.550.991.001.001.00
SPLIT72.00390.13314.952.23193.45281.33543.281425.58
MESH72.00179769.411053928.6610046.1725915.2037755.4664820.048984539.67
PAFRAC72.001.040.021.001.031.041.051.11
SHAPE_MN72.001.270.031.201.251.261.281.42
SHAPE_MD72.001.230.031.161.211.221.231.36
PARA_MN72.000.320.050.210.280.310.350.47
PARA_MD72.000.310.060.160.260.300.340.46
FRAC_MD72.001.080.011.051.071.071.081.12
SQUARE_MN72.00-9754.6849181.78-369540.860.520.540.560.65
SQUARE_MD72.000.540.040.450.520.530.540.69
IJI72.0011.292.686.489.2310.8412.9017.99
LSI72.0083.1035.6531.1456.0471.46102.76219.61
TE72.001685269.281701764.56374902.59750870.971122460.452010358.4812057792.36
ED72.001097.31290.12327.22934.701079.911298.451849.49
RNMDP_202072.0012927.961569.909803.0011787.7513017.5013694.2518684.00
PobT72.00262016.32432266.7236240.00110978.50165647.50226132.253334730.00
PobH72.00125113.96202344.8517287.0053135.2578319.50109965.501553899.00
PobM72.00136902.36229962.9318953.0058322.7585665.00118215.501780831.00
Vehiculos72.00173925.78261379.2028945.0073206.50113750.50161127.752012707.00
T_Viviendas72.00124768.53202009.4619022.0049043.7576603.50113958.251531490.00
T_Viv_Prin72.00101600.97172788.3813218.0042075.7562744.5088817.501320531.00
T_Viv_Sec72.008011.519963.60200.002247.004722.509095.2557325.00
Viv_vacias72.0015112.6221434.171335.005256.759251.0016018.25153101.00
COM72.001.140.580.260.681.041.472.44
ED_SING72.008.099.200.983.486.469.1572.05
EQUIP72.0018.005.534.8914.3117.2021.0033.30
IND72.0023.9110.175.1415.9422.8428.7352.31
OCIO72.000.960.870.180.420.771.126.10
OFI72.000.720.760.120.280.490.824.78
RES_PLU72.0022.518.526.7216.4121.7226.5546.08
RES_UNI72.0024.6714.551.9214.3722.5933.2865.27
\n", - "
" - ], - "text/plain": [ - " count mean std min 25% 50% \\\n", - "TA 72.00 1318.40 918.46 275.94 643.12 1042.54 \n", - "LPI 72.00 4.99 8.02 0.81 2.49 3.30 \n", - "AREA_MN 72.00 0.10 0.05 0.04 0.06 0.09 \n", - "AREA_AM 72.00 17.43 105.44 1.00 2.42 3.42 \n", - "AREA_MD 72.00 0.02 0.01 0.01 0.02 0.02 \n", - "GYRATE_MN 72.00 7.09 1.49 4.31 6.06 7.05 \n", - "GYRATE_AM 72.00 55.11 93.11 23.79 34.99 40.84 \n", - "GYRATE_MD 72.00 5.01 1.05 3.04 4.27 4.96 \n", - "PRD 72.00 0.85 0.59 0.06 0.40 0.74 \n", - "SHDI 72.00 0.79 0.25 0.62 0.67 0.71 \n", - "SIDI 72.00 0.74 0.06 0.46 0.73 0.76 \n", - "MSIDI 72.00 1.38 0.19 0.61 1.30 1.41 \n", - "SHEI 72.00 0.74 0.06 0.47 0.71 0.76 \n", - "SIEI 72.00 0.85 0.07 0.52 0.83 0.86 \n", - "MSIEI 72.00 0.66 0.09 0.29 0.63 0.68 \n", - "NP 72.00 18280.46 18553.40 3438.00 7471.50 11484.50 \n", - "DIVISION 72.00 0.99 0.05 0.55 0.99 1.00 \n", - "SPLIT 72.00 390.13 314.95 2.23 193.45 281.33 \n", - "MESH 72.00 179769.41 1053928.66 10046.17 25915.20 37755.46 \n", - "PAFRAC 72.00 1.04 0.02 1.00 1.03 1.04 \n", - "SHAPE_MN 72.00 1.27 0.03 1.20 1.25 1.26 \n", - "SHAPE_MD 72.00 1.23 0.03 1.16 1.21 1.22 \n", - "PARA_MN 72.00 0.32 0.05 0.21 0.28 0.31 \n", - "PARA_MD 72.00 0.31 0.06 0.16 0.26 0.30 \n", - "FRAC_MD 72.00 1.08 0.01 1.05 1.07 1.07 \n", - "SQUARE_MN 72.00 -9754.68 49181.78 -369540.86 0.52 0.54 \n", - "SQUARE_MD 72.00 0.54 0.04 0.45 0.52 0.53 \n", - "IJI 72.00 11.29 2.68 6.48 9.23 10.84 \n", - "LSI 72.00 83.10 35.65 31.14 56.04 71.46 \n", - "TE 72.00 1685269.28 1701764.56 374902.59 750870.97 1122460.45 \n", - "ED 72.00 1097.31 290.12 327.22 934.70 1079.91 \n", - "RNMDP_2020 72.00 12927.96 1569.90 9803.00 11787.75 13017.50 \n", - "PobT 72.00 262016.32 432266.72 36240.00 110978.50 165647.50 \n", - "PobH 72.00 125113.96 202344.85 17287.00 53135.25 78319.50 \n", - "PobM 72.00 136902.36 229962.93 18953.00 58322.75 85665.00 \n", - "Vehiculos 72.00 173925.78 261379.20 28945.00 73206.50 113750.50 \n", - "T_Viviendas 72.00 124768.53 202009.46 19022.00 49043.75 76603.50 \n", - "T_Viv_Prin 72.00 101600.97 172788.38 13218.00 42075.75 62744.50 \n", - "T_Viv_Sec 72.00 8011.51 9963.60 200.00 2247.00 4722.50 \n", - "Viv_vacias 72.00 15112.62 21434.17 1335.00 5256.75 9251.00 \n", - "COM 72.00 1.14 0.58 0.26 0.68 1.04 \n", - "ED_SING 72.00 8.09 9.20 0.98 3.48 6.46 \n", - "EQUIP 72.00 18.00 5.53 4.89 14.31 17.20 \n", - "IND 72.00 23.91 10.17 5.14 15.94 22.84 \n", - "OCIO 72.00 0.96 0.87 0.18 0.42 0.77 \n", - "OFI 72.00 0.72 0.76 0.12 0.28 0.49 \n", - "RES_PLU 72.00 22.51 8.52 6.72 16.41 21.72 \n", - "RES_UNI 72.00 24.67 14.55 1.92 14.37 22.59 \n", - "\n", - " 75% max \n", - "TA 1636.39 4436.37 \n", - "LPI 4.47 66.93 \n", - "AREA_MN 0.12 0.34 \n", - "AREA_AM 4.98 898.45 \n", - "AREA_MD 0.03 0.07 \n", - "GYRATE_MN 8.13 13.00 \n", - "GYRATE_AM 51.14 821.12 \n", - "GYRATE_MD 5.49 9.32 \n", - "PRD 1.18 2.90 \n", - "SHDI 0.80 2.31 \n", - "SIDI 0.78 0.80 \n", - "MSIDI 1.50 1.63 \n", - "SHEI 0.79 0.83 \n", - "SIEI 0.89 0.92 \n", - "MSIEI 0.72 0.78 \n", - "NP 21761.75 123098.00 \n", - "DIVISION 1.00 1.00 \n", - "SPLIT 543.28 1425.58 \n", - "MESH 64820.04 8984539.67 \n", - "PAFRAC 1.05 1.11 \n", - "SHAPE_MN 1.28 1.42 \n", - "SHAPE_MD 1.23 1.36 \n", - "PARA_MN 0.35 0.47 \n", - "PARA_MD 0.34 0.46 \n", - "FRAC_MD 1.08 1.12 \n", - "SQUARE_MN 0.56 0.65 \n", - "SQUARE_MD 0.54 0.69 \n", - "IJI 12.90 17.99 \n", - "LSI 102.76 219.61 \n", - "TE 2010358.48 12057792.36 \n", - "ED 1298.45 1849.49 \n", - "RNMDP_2020 13694.25 18684.00 \n", - "PobT 226132.25 3334730.00 \n", - "PobH 109965.50 1553899.00 \n", - "PobM 118215.50 1780831.00 \n", - "Vehiculos 161127.75 2012707.00 \n", - "T_Viviendas 113958.25 1531490.00 \n", - "T_Viv_Prin 88817.50 1320531.00 \n", - "T_Viv_Sec 9095.25 57325.00 \n", - "Viv_vacias 16018.25 153101.00 \n", - "COM 1.47 2.44 \n", - "ED_SING 9.15 72.05 \n", - "EQUIP 21.00 33.30 \n", - "IND 28.73 52.31 \n", - "OCIO 1.12 6.10 \n", - "OFI 0.82 4.78 \n", - "RES_PLU 26.55 46.08 \n", - "RES_UNI 33.28 65.27 " - ] - }, - "execution_count": 76, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "datos.iloc[:,1:50].describe().T" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0.12\n", - "1 0.14\n", - "2 0.29\n", - "3 0.16\n", - "4 0.06\n", - " ... \n", - "67 0.07\n", - "68 0.17\n", - "69 0.08\n", - "70 0.08\n", - "71 0.15\n", - "Name: AREA_MN, Length: 72, dtype: float64" - ] - }, - "execution_count": 83, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "datos.loc[:,\"AREA_MN\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([23., 20., 15., 7., 4., 1., 0., 0., 1., 1.]),\n", - " array([0.041476 , 0.0712383, 0.1010006, 0.1307629, 0.1605252, 0.1902875,\n", - " 0.2200498, 0.2498121, 0.2795744, 0.3093367, 0.339099 ]),\n", - " )" - ] - }, - "execution_count": 114, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.title(\"Histograma Area mn, datos originales\")\n", - "plt.hist(datos.loc[:,\"AREA_MN\"], bins = 10)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Primer m茅todo: StandardScaler. \n", - "\n", - "Muchos algoritmos pueden comportarse de forma negativa si las variables no muestran uns distribuci贸n Gaussiana normalizada con media cero, y varianza igual a uno. En la pr谩ctica se suele ignorar la forma de la distribuci贸n, y transformar para centrarlos removiendo el valor medio de cada caracter铆stica y dividiendolo por la desviaci贸n estandar. Muchos elementos de la funci贸n objetivo de un algoritmo de aprendizaje como RBF and SVM asumen que las car谩cteristicas est谩n centradas cerca de cero y tienen una varianza en la misma magnitud.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "# Ajustar el objeto Std creado utilizando los datos num茅ricos\n", - "Std.fit(datos.iloc[:,1:50])\n", - "# Transformar los datos utilizando el objeto ajustado\n", - "datos_std = Std.transform(datos.iloc[:,1:50])\n", - "# Convertir datos_std que era un array a un dataframe utilizando los nombres de las columans\n", - "df_datos_std = pd.DataFrame(datos_std, columns = datos.iloc[:,1:50].columns)\n", - "# Agregar al dataframe la columna con el nombre de las ciudades\n", - "df_datos_std['Ciudades'] = datos['Ciudades']" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TALPIAREA_MNAREA_AMAREA_MDGYRATE_MNGYRATE_AMGYRATE_MDPRDSHDI...T_Viv_SecViv_vaciasCOMED_SINGEQUIPINDOCIOOFIRES_PLURES_UNI
count72.0072.0072.0072.0072.0072.0072.0072.0072.0072.00...72.0072.0072.0072.0072.0072.0072.0072.0072.0072.00
mean0.000.000.00-0.00-0.00-0.000.000.000.000.00...0.000.000.000.000.00-0.000.00-0.00-0.00-0.00
std1.011.011.011.011.011.011.011.011.011.01...1.011.011.011.011.011.011.011.011.011.01
min-1.14-0.52-1.09-0.16-1.42-1.88-0.34-1.88-1.36-0.71...-0.79-0.65-1.53-0.78-2.39-1.86-0.90-0.80-1.87-1.57
25%-0.74-0.31-0.65-0.14-0.67-0.70-0.22-0.70-0.77-0.48...-0.58-0.46-0.80-0.50-0.67-0.79-0.62-0.59-0.72-0.71
50%-0.30-0.21-0.24-0.13-0.14-0.03-0.15-0.05-0.19-0.31...-0.33-0.28-0.18-0.18-0.15-0.11-0.22-0.31-0.09-0.14
75%0.35-0.060.32-0.120.420.70-0.040.460.550.05...0.110.040.560.120.550.480.180.130.480.60
max3.427.784.548.414.343.998.284.133.516.20...4.986.482.257.002.792.815.935.362.792.81
\n", - "

8 rows 脳 48 columns

\n", - "
" - ], - "text/plain": [ - " TA LPI AREA_MN AREA_AM AREA_MD GYRATE_MN GYRATE_AM GYRATE_MD \\\n", - "count 72.00 72.00 72.00 72.00 72.00 72.00 72.00 72.00 \n", - "mean 0.00 0.00 0.00 -0.00 -0.00 -0.00 0.00 0.00 \n", - "std 1.01 1.01 1.01 1.01 1.01 1.01 1.01 1.01 \n", - "min -1.14 -0.52 -1.09 -0.16 -1.42 -1.88 -0.34 -1.88 \n", - "25% -0.74 -0.31 -0.65 -0.14 -0.67 -0.70 -0.22 -0.70 \n", - "50% -0.30 -0.21 -0.24 -0.13 -0.14 -0.03 -0.15 -0.05 \n", - "75% 0.35 -0.06 0.32 -0.12 0.42 0.70 -0.04 0.46 \n", - "max 3.42 7.78 4.54 8.41 4.34 3.99 8.28 4.13 \n", - "\n", - " PRD SHDI ... T_Viv_Sec Viv_vacias COM ED_SING EQUIP IND \\\n", - "count 72.00 72.00 ... 72.00 72.00 72.00 72.00 72.00 72.00 \n", - "mean 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.00 -0.00 \n", - "std 1.01 1.01 ... 1.01 1.01 1.01 1.01 1.01 1.01 \n", - "min -1.36 -0.71 ... -0.79 -0.65 -1.53 -0.78 -2.39 -1.86 \n", - "25% -0.77 -0.48 ... -0.58 -0.46 -0.80 -0.50 -0.67 -0.79 \n", - "50% -0.19 -0.31 ... -0.33 -0.28 -0.18 -0.18 -0.15 -0.11 \n", - "75% 0.55 0.05 ... 0.11 0.04 0.56 0.12 0.55 0.48 \n", - "max 3.51 6.20 ... 4.98 6.48 2.25 7.00 2.79 2.81 \n", - "\n", - " OCIO OFI RES_PLU RES_UNI \n", - "count 72.00 72.00 72.00 72.00 \n", - "mean 0.00 -0.00 -0.00 -0.00 \n", - "std 1.01 1.01 1.01 1.01 \n", - "min -0.90 -0.80 -1.87 -1.57 \n", - "25% -0.62 -0.59 -0.72 -0.71 \n", - "50% -0.22 -0.31 -0.09 -0.14 \n", - "75% 0.18 0.13 0.48 0.60 \n", - "max 5.93 5.36 2.79 2.81 \n", - "\n", - "[8 rows x 48 columns]" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_datos_std.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([23., 20., 15., 7., 4., 1., 0., 0., 1., 1.]),\n", - " array([-1.09204316, -0.5286646 , 0.03471396, 0.59809252, 1.16147108,\n", - " 1.72484964, 2.28822821, 2.85160677, 3.41498533, 3.97836389,\n", - " 4.54174245]),\n", - " )" - ] - }, - "execution_count": 90, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.title(\"Histograma Area MN, datos escalados utilizando Standard scaler\")\n", - "plt.hist(df_datos_std.loc[:,\"AREA_MN\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Exportar los datos a un csv" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "df_datos_std.to_csv(r'C:\\Users\\crist\\Documents\\GitHub\\manifolds\\ciudades\\datos\\datos_completos\\df_datos_std.csv', encoding = 'ISO-8859-1')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### MinMaxScaler\n", - "\n", - "La alternativa a la estandarizaci贸n es escalar las caracteriscas para que se situan en un rango determinado, generalmente entre 0 y 1. La motivaci贸n para realizar esto es que es un procedimiento robusto cuando hay desviaciones estandar muy peque帽as de las caracter铆sticas y preservar las entradas con valor 0 de los datos \"sparse\".\n", - "\n", - "Los datos que son sparsity se refiere a aquellos en los que un porcentaje muy alto de los datos son datos perdidos o 0. Esta estructura a veces se utiliza para mejorar la eficiencia computacional y de almacenamiento.\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "MinMax.fit(datos.iloc[:,1:50])\n", - "datos_MinMax = MinMax.transform(datos.iloc[:,1:50])\n", - "df_datos_MinMax = pd.DataFrame(datos_MinMax, columns = datos.iloc[:,1:50].columns)\n", - "df_datos_MinMax['Ciudades'] = datos['Ciudades']" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "df_datos_MinMax.to_csv(r'C:\\Users\\crist\\Documents\\GitHub\\manifolds\\ciudades\\datos\\datos_completos\\df_datos_MinMax.csv', encoding = 'ISO-8859-1')" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TALPIAREA_MNAREA_AMAREA_MDGYRATE_MNGYRATE_AMGYRATE_MDPRDSHDI...T_Viv_SecViv_vaciasCOMED_SINGEQUIPINDOCIOOFIRES_PLURES_UNI
count72.0072.0072.0072.0072.0072.0072.0072.0072.0072.00...72.0072.0072.0072.0072.0072.0072.0072.0072.0072.00
mean0.250.060.190.020.250.320.040.310.280.10...0.140.090.400.100.460.400.130.130.400.36
std0.220.120.180.120.170.170.120.170.210.15...0.170.140.270.130.190.220.150.160.220.23
min0.000.000.000.000.000.000.000.000.000.00...0.000.000.000.000.000.000.000.000.000.00
25%0.090.030.080.000.130.200.010.200.120.03...0.040.030.190.040.330.230.040.030.250.20
50%0.180.040.150.000.220.320.020.310.240.06...0.080.050.360.080.430.380.100.080.380.33
75%0.330.060.250.000.320.440.030.390.390.11...0.160.100.550.110.570.500.160.150.500.50
max1.001.001.001.001.001.001.001.001.001.00...1.001.001.001.001.001.001.001.001.001.00
\n", - "

8 rows 脳 48 columns

\n", - "
" - ], - "text/plain": [ - " TA LPI AREA_MN AREA_AM AREA_MD GYRATE_MN GYRATE_AM GYRATE_MD \\\n", - "count 72.00 72.00 72.00 72.00 72.00 72.00 72.00 72.00 \n", - "mean 0.25 0.06 0.19 0.02 0.25 0.32 0.04 0.31 \n", - "std 0.22 0.12 0.18 0.12 0.17 0.17 0.12 0.17 \n", - "min 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 \n", - "25% 0.09 0.03 0.08 0.00 0.13 0.20 0.01 0.20 \n", - "50% 0.18 0.04 0.15 0.00 0.22 0.32 0.02 0.31 \n", - "75% 0.33 0.06 0.25 0.00 0.32 0.44 0.03 0.39 \n", - "max 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 \n", - "\n", - " PRD SHDI ... T_Viv_Sec Viv_vacias COM ED_SING EQUIP IND \\\n", - "count 72.00 72.00 ... 72.00 72.00 72.00 72.00 72.00 72.00 \n", - "mean 0.28 0.10 ... 0.14 0.09 0.40 0.10 0.46 0.40 \n", - "std 0.21 0.15 ... 0.17 0.14 0.27 0.13 0.19 0.22 \n", - "min 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.00 0.00 \n", - "25% 0.12 0.03 ... 0.04 0.03 0.19 0.04 0.33 0.23 \n", - "50% 0.24 0.06 ... 0.08 0.05 0.36 0.08 0.43 0.38 \n", - "75% 0.39 0.11 ... 0.16 0.10 0.55 0.11 0.57 0.50 \n", - "max 1.00 1.00 ... 1.00 1.00 1.00 1.00 1.00 1.00 \n", - "\n", - " OCIO OFI RES_PLU RES_UNI \n", - "count 72.00 72.00 72.00 72.00 \n", - "mean 0.13 0.13 0.40 0.36 \n", - "std 0.15 0.16 0.22 0.23 \n", - "min 0.00 0.00 0.00 0.00 \n", - "25% 0.04 0.03 0.25 0.20 \n", - "50% 0.10 0.08 0.38 0.33 \n", - "75% 0.16 0.15 0.50 0.50 \n", - "max 1.00 1.00 1.00 1.00 \n", - "\n", - "[8 rows x 48 columns]" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_datos_MinMax.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0.5, 1.0, 'Histograma 脕rea mn, datos escalados utilizando MinMax scaler ')" - ] - }, - "execution_count": 93, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.hist(df_datos_MinMax.loc[:,\"AREA_MN\"])\n", - "plt.title(\"Histograma 脕rea mn, datos escalados utilizando MinMax scaler \")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Maxabs\n", - "\n", - "Centrar los datos sparse destruir铆a la estructura sparseness de los datos, y por lo tanto raramente es un procedimiento adecuado. Sin embargo, puede tener sentido escalar entradas sparse, especialmente si las caracter铆sticas se encuentran en distintas escalas. El escalamiento de MaxAbsScaler fue desarrollado especialmente para escalar datos sparse, y es la forma recomendada de hacer esto.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [], - "source": [ - "Maxabs.fit(datos.iloc[:,1:50])\n", - "datos_Maxabs = Maxabs.transform(datos.iloc[:,1:50])\n", - "df_datos_Maxabs = pd.DataFrame(datos_Maxabs, columns = datos.iloc[:,1:50].columns)\n", - "df_datos_Maxabs['Ciudades'] = datos['Ciudades']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_datos_Maxabs.to_csv(r'C:\\Users\\crist\\Documents\\GitHub\\manifolds\\ciudades\\datos\\datos_completos\\df_datos_Maxabs.csv', encoding = 'ISO-8859-1')" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countmeanstdmin25%50%75%max
TA72.000.300.210.060.140.230.371.00
LPI72.000.070.120.010.040.050.071.00
AREA_MN72.000.290.160.120.190.260.341.00
AREA_AM72.000.020.120.000.000.000.011.00
AREA_MD72.000.340.150.130.240.320.411.00
GYRATE_MN72.000.550.110.330.470.540.631.00
GYRATE_AM72.000.070.110.030.040.050.061.00
GYRATE_MD72.000.540.110.330.460.530.591.00
PRD72.000.290.200.020.140.260.411.00
SHDI72.000.340.110.270.290.310.351.00
SIDI72.000.920.070.570.910.940.971.00
MSIDI72.000.850.120.380.800.870.921.00
SHEI72.000.890.080.570.860.910.951.00
SIEI72.000.920.070.570.910.940.971.00
MSIEI72.000.850.120.380.800.870.921.00
NP72.000.150.150.030.060.090.181.00
DIVISION72.000.990.050.551.001.001.001.00
SPLIT72.000.270.220.000.140.200.381.00
MESH72.000.020.120.000.000.000.011.00
PAFRAC72.000.940.020.900.930.940.951.00
SHAPE_MN72.000.900.020.850.880.890.901.00
SHAPE_MD72.000.900.030.850.890.900.911.00
PARA_MN72.000.670.110.430.600.660.731.00
PARA_MD72.000.670.140.360.570.660.751.00
FRAC_MD72.000.960.010.940.950.960.961.00
SQUARE_MN72.00-0.030.13-1.000.000.000.000.00
SQUARE_MD72.000.780.060.650.750.770.791.00
IJI72.000.630.150.360.510.600.721.00
LSI72.000.380.160.140.260.330.471.00
TE72.000.140.140.030.060.090.171.00
ED72.000.590.160.180.510.580.701.00
RNMDP_202072.000.690.080.520.630.700.731.00
PobT72.000.080.130.010.030.050.071.00
PobH72.000.080.130.010.030.050.071.00
PobM72.000.080.130.010.030.050.071.00
Vehiculos72.000.090.130.010.040.060.081.00
T_Viviendas72.000.080.130.010.030.050.071.00
T_Viv_Prin72.000.080.130.010.030.050.071.00
T_Viv_Sec72.000.140.170.000.040.080.161.00
Viv_vacias72.000.100.140.010.030.060.101.00
COM72.000.470.240.110.280.430.601.00
ED_SING72.000.110.130.010.050.090.131.00
EQUIP72.000.540.170.150.430.520.631.00
IND72.000.460.190.100.300.440.551.00
OCIO72.000.160.140.030.070.130.181.00
OFI72.000.150.160.030.060.100.171.00
RES_PLU72.000.490.180.150.360.470.581.00
RES_UNI72.000.380.220.030.220.350.511.00
\n", - "
" - ], - "text/plain": [ - " count mean std min 25% 50% 75% max\n", - "TA 72.00 0.30 0.21 0.06 0.14 0.23 0.37 1.00\n", - "LPI 72.00 0.07 0.12 0.01 0.04 0.05 0.07 1.00\n", - "AREA_MN 72.00 0.29 0.16 0.12 0.19 0.26 0.34 1.00\n", - "AREA_AM 72.00 0.02 0.12 0.00 0.00 0.00 0.01 1.00\n", - "AREA_MD 72.00 0.34 0.15 0.13 0.24 0.32 0.41 1.00\n", - "GYRATE_MN 72.00 0.55 0.11 0.33 0.47 0.54 0.63 1.00\n", - "GYRATE_AM 72.00 0.07 0.11 0.03 0.04 0.05 0.06 1.00\n", - "GYRATE_MD 72.00 0.54 0.11 0.33 0.46 0.53 0.59 1.00\n", - "PRD 72.00 0.29 0.20 0.02 0.14 0.26 0.41 1.00\n", - "SHDI 72.00 0.34 0.11 0.27 0.29 0.31 0.35 1.00\n", - "SIDI 72.00 0.92 0.07 0.57 0.91 0.94 0.97 1.00\n", - "MSIDI 72.00 0.85 0.12 0.38 0.80 0.87 0.92 1.00\n", - "SHEI 72.00 0.89 0.08 0.57 0.86 0.91 0.95 1.00\n", - "SIEI 72.00 0.92 0.07 0.57 0.91 0.94 0.97 1.00\n", - "MSIEI 72.00 0.85 0.12 0.38 0.80 0.87 0.92 1.00\n", - "NP 72.00 0.15 0.15 0.03 0.06 0.09 0.18 1.00\n", - "DIVISION 72.00 0.99 0.05 0.55 1.00 1.00 1.00 1.00\n", - "SPLIT 72.00 0.27 0.22 0.00 0.14 0.20 0.38 1.00\n", - "MESH 72.00 0.02 0.12 0.00 0.00 0.00 0.01 1.00\n", - "PAFRAC 72.00 0.94 0.02 0.90 0.93 0.94 0.95 1.00\n", - "SHAPE_MN 72.00 0.90 0.02 0.85 0.88 0.89 0.90 1.00\n", - "SHAPE_MD 72.00 0.90 0.03 0.85 0.89 0.90 0.91 1.00\n", - "PARA_MN 72.00 0.67 0.11 0.43 0.60 0.66 0.73 1.00\n", - "PARA_MD 72.00 0.67 0.14 0.36 0.57 0.66 0.75 1.00\n", - "FRAC_MD 72.00 0.96 0.01 0.94 0.95 0.96 0.96 1.00\n", - "SQUARE_MN 72.00 -0.03 0.13 -1.00 0.00 0.00 0.00 0.00\n", - "SQUARE_MD 72.00 0.78 0.06 0.65 0.75 0.77 0.79 1.00\n", - "IJI 72.00 0.63 0.15 0.36 0.51 0.60 0.72 1.00\n", - "LSI 72.00 0.38 0.16 0.14 0.26 0.33 0.47 1.00\n", - "TE 72.00 0.14 0.14 0.03 0.06 0.09 0.17 1.00\n", - "ED 72.00 0.59 0.16 0.18 0.51 0.58 0.70 1.00\n", - "RNMDP_2020 72.00 0.69 0.08 0.52 0.63 0.70 0.73 1.00\n", - "PobT 72.00 0.08 0.13 0.01 0.03 0.05 0.07 1.00\n", - "PobH 72.00 0.08 0.13 0.01 0.03 0.05 0.07 1.00\n", - "PobM 72.00 0.08 0.13 0.01 0.03 0.05 0.07 1.00\n", - "Vehiculos 72.00 0.09 0.13 0.01 0.04 0.06 0.08 1.00\n", - "T_Viviendas 72.00 0.08 0.13 0.01 0.03 0.05 0.07 1.00\n", - "T_Viv_Prin 72.00 0.08 0.13 0.01 0.03 0.05 0.07 1.00\n", - "T_Viv_Sec 72.00 0.14 0.17 0.00 0.04 0.08 0.16 1.00\n", - "Viv_vacias 72.00 0.10 0.14 0.01 0.03 0.06 0.10 1.00\n", - "COM 72.00 0.47 0.24 0.11 0.28 0.43 0.60 1.00\n", - "ED_SING 72.00 0.11 0.13 0.01 0.05 0.09 0.13 1.00\n", - "EQUIP 72.00 0.54 0.17 0.15 0.43 0.52 0.63 1.00\n", - "IND 72.00 0.46 0.19 0.10 0.30 0.44 0.55 1.00\n", - "OCIO 72.00 0.16 0.14 0.03 0.07 0.13 0.18 1.00\n", - "OFI 72.00 0.15 0.16 0.03 0.06 0.10 0.17 1.00\n", - "RES_PLU 72.00 0.49 0.18 0.15 0.36 0.47 0.58 1.00\n", - "RES_UNI 72.00 0.38 0.22 0.03 0.22 0.35 0.51 1.00" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_datos_Maxabs.describe().T" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([23., 20., 15., 7., 4., 1., 0., 0., 1., 1.]),\n", - " array([0.12231236, 0.21008113, 0.29784989, 0.38561865, 0.47338742,\n", - " 0.56115618, 0.64892495, 0.73669371, 0.82446247, 0.91223124,\n", - " 1. ]),\n", - " )" - ] - }, - "execution_count": 110, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.title(\"Histograma Area mn, datos escalados Maxabs\")\n", - "plt.hist(df_datos_Maxabs.loc[:,\"AREA_MN\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### ROBUST SCALER\n", - "\n", - "Si los datos contienen datos at铆picos, escalar usando la media y la varianza de los datos es probable que no funcione bien. En estos casos, es posible utilizar el RobustScaler como una alternativa. Este m茅todo usa estimadores m谩s robustos para estimar la centralidad y rango de los valores.Este m茅todo de escalado no utiliza la media y escala los datos de acuerdo al rango cuant铆lico (Por defecto el rango intercuart铆lico). El rango intercuart铆lico se define como el rango entre el primer cuantil (25%) y el tercer cuantil (75%). \n", - "\n", - "[COMPARACI脫N DE DISTINTOS M脡TODOS DE ESCALADO APLICADO A UN DATASET CON\n", - "OUTLIERS](https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html#plot-all-scaling-robust-scaler-section)" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "Rscaler.fit(datos.iloc[:,1:50])\n", - "datos_Rscaler = Rscaler.transform(datos.iloc[:,1:50])\n", - "df_datos_Rscaler = pd.DataFrame(datos_Rscaler, columns = datos.iloc[:,1:50].columns)\n", - "df_datos_Rscaler['Ciudades'] = datos['Ciudades']" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ - "df_datos_Rscaler\n", - "df_datos_Rscaler.to_csv(r'C:\\Users\\crist\\Documents\\GitHub\\manifolds\\ciudades\\datos\\datos_completos\\df_datos_Rscaler.csv', encoding = 'ISO-8859-1')" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countmeanstdmin25%50%75%max
TA72.000.280.92-0.77-0.40-0.000.603.42
LPI72.000.854.03-1.25-0.410.000.5931.98
AREA_MN72.000.251.04-0.88-0.42-0.000.584.92
AREA_AM72.005.4741.18-0.94-0.390.000.61349.52
AREA_MD72.000.130.92-1.16-0.480.000.524.08
GYRATE_MN72.000.020.72-1.32-0.480.000.522.87
GYRATE_AM72.000.885.77-1.06-0.360.000.6448.31
GYRATE_MD72.000.040.86-1.57-0.560.000.443.58
PRD72.000.150.76-0.88-0.440.000.562.79
SHDI72.000.581.91-0.77-0.330.000.6712.34
SIDI72.00-0.291.19-6.07-0.58-0.000.420.95
MSIDI72.00-0.190.96-4.01-0.550.000.451.07
SHEI72.00-0.200.84-3.79-0.620.000.380.97
SIEI72.00-0.291.19-6.07-0.580.000.420.95
MSIEI72.00-0.190.96-4.01-0.55-0.000.451.07
NP72.000.481.30-0.56-0.280.000.727.81
DIVISION72.00-2.2615.80-133.64-0.480.000.520.86
SPLIT72.000.310.90-0.80-0.250.000.753.27
MESH72.003.6527.09-0.71-0.300.000.70229.97
PAFRAC72.000.130.89-1.89-0.51-0.000.492.76
SHAPE_MN72.000.311.15-2.01-0.30-0.000.705.14
SHAPE_MD72.000.301.35-2.35-0.490.000.515.58
PARA_MN72.000.090.82-1.66-0.470.000.532.47
PARA_MD72.000.080.77-1.64-0.46-0.000.541.91
FRAC_MD72.000.241.06-1.61-0.36-0.000.644.37
SQUARE_MN72.00-274508.731383959.91-10398778.79-0.500.000.503.22
SQUARE_MD72.000.321.46-2.85-0.390.000.615.64
IJI72.000.120.73-1.19-0.440.000.561.95
LSI72.000.250.76-0.86-0.330.000.673.17
TE72.000.451.35-0.59-0.30-0.000.708.68
ED72.000.050.80-2.07-0.400.000.602.12
RNMDP_202072.00-0.050.82-1.69-0.650.000.352.97
PobT72.000.843.75-1.12-0.470.000.5327.52
PobH72.000.823.56-1.07-0.440.000.5625.96
PobM72.000.863.84-1.11-0.460.000.5428.30
Vehiculos72.000.682.97-0.96-0.460.000.5421.60
T_Viviendas72.000.743.11-0.89-0.420.000.5822.41
T_Viv_Prin72.000.833.70-1.06-0.440.000.5626.91
T_Viv_Sec72.000.481.45-0.66-0.360.000.647.68
Viv_vacias72.000.541.99-0.74-0.370.000.6313.37
COM72.000.130.74-0.98-0.45-0.000.551.77
ED_SING72.000.291.62-0.96-0.520.000.4811.57
EQUIP72.000.120.83-1.84-0.430.000.572.41
IND72.000.080.79-1.38-0.540.000.462.30
OCIO72.000.281.26-0.85-0.49-0.000.517.72
OFI72.000.431.41-0.69-0.390.000.617.91
RES_PLU72.000.080.84-1.48-0.520.000.482.40
RES_UNI72.000.110.77-1.09-0.430.000.572.26
\n", - "
" - ], - "text/plain": [ - " count mean std min 25% 50% 75% max\n", - "TA 72.00 0.28 0.92 -0.77 -0.40 -0.00 0.60 3.42\n", - "LPI 72.00 0.85 4.03 -1.25 -0.41 0.00 0.59 31.98\n", - "AREA_MN 72.00 0.25 1.04 -0.88 -0.42 -0.00 0.58 4.92\n", - "AREA_AM 72.00 5.47 41.18 -0.94 -0.39 0.00 0.61 349.52\n", - "AREA_MD 72.00 0.13 0.92 -1.16 -0.48 0.00 0.52 4.08\n", - "GYRATE_MN 72.00 0.02 0.72 -1.32 -0.48 0.00 0.52 2.87\n", - "GYRATE_AM 72.00 0.88 5.77 -1.06 -0.36 0.00 0.64 48.31\n", - "GYRATE_MD 72.00 0.04 0.86 -1.57 -0.56 0.00 0.44 3.58\n", - "PRD 72.00 0.15 0.76 -0.88 -0.44 0.00 0.56 2.79\n", - "SHDI 72.00 0.58 1.91 -0.77 -0.33 0.00 0.67 12.34\n", - "SIDI 72.00 -0.29 1.19 -6.07 -0.58 -0.00 0.42 0.95\n", - "MSIDI 72.00 -0.19 0.96 -4.01 -0.55 0.00 0.45 1.07\n", - "SHEI 72.00 -0.20 0.84 -3.79 -0.62 0.00 0.38 0.97\n", - "SIEI 72.00 -0.29 1.19 -6.07 -0.58 0.00 0.42 0.95\n", - "MSIEI 72.00 -0.19 0.96 -4.01 -0.55 -0.00 0.45 1.07\n", - "NP 72.00 0.48 1.30 -0.56 -0.28 0.00 0.72 7.81\n", - "DIVISION 72.00 -2.26 15.80 -133.64 -0.48 0.00 0.52 0.86\n", - "SPLIT 72.00 0.31 0.90 -0.80 -0.25 0.00 0.75 3.27\n", - "MESH 72.00 3.65 27.09 -0.71 -0.30 0.00 0.70 229.97\n", - "PAFRAC 72.00 0.13 0.89 -1.89 -0.51 -0.00 0.49 2.76\n", - "SHAPE_MN 72.00 0.31 1.15 -2.01 -0.30 -0.00 0.70 5.14\n", - "SHAPE_MD 72.00 0.30 1.35 -2.35 -0.49 0.00 0.51 5.58\n", - "PARA_MN 72.00 0.09 0.82 -1.66 -0.47 0.00 0.53 2.47\n", - "PARA_MD 72.00 0.08 0.77 -1.64 -0.46 -0.00 0.54 1.91\n", - "FRAC_MD 72.00 0.24 1.06 -1.61 -0.36 -0.00 0.64 4.37\n", - "SQUARE_MN 72.00 -274508.73 1383959.91 -10398778.79 -0.50 0.00 0.50 3.22\n", - "SQUARE_MD 72.00 0.32 1.46 -2.85 -0.39 0.00 0.61 5.64\n", - "IJI 72.00 0.12 0.73 -1.19 -0.44 0.00 0.56 1.95\n", - "LSI 72.00 0.25 0.76 -0.86 -0.33 0.00 0.67 3.17\n", - "TE 72.00 0.45 1.35 -0.59 -0.30 -0.00 0.70 8.68\n", - "ED 72.00 0.05 0.80 -2.07 -0.40 0.00 0.60 2.12\n", - "RNMDP_2020 72.00 -0.05 0.82 -1.69 -0.65 0.00 0.35 2.97\n", - "PobT 72.00 0.84 3.75 -1.12 -0.47 0.00 0.53 27.52\n", - "PobH 72.00 0.82 3.56 -1.07 -0.44 0.00 0.56 25.96\n", - "PobM 72.00 0.86 3.84 -1.11 -0.46 0.00 0.54 28.30\n", - "Vehiculos 72.00 0.68 2.97 -0.96 -0.46 0.00 0.54 21.60\n", - "T_Viviendas 72.00 0.74 3.11 -0.89 -0.42 0.00 0.58 22.41\n", - "T_Viv_Prin 72.00 0.83 3.70 -1.06 -0.44 0.00 0.56 26.91\n", - "T_Viv_Sec 72.00 0.48 1.45 -0.66 -0.36 0.00 0.64 7.68\n", - "Viv_vacias 72.00 0.54 1.99 -0.74 -0.37 0.00 0.63 13.37\n", - "COM 72.00 0.13 0.74 -0.98 -0.45 -0.00 0.55 1.77\n", - "ED_SING 72.00 0.29 1.62 -0.96 -0.52 0.00 0.48 11.57\n", - "EQUIP 72.00 0.12 0.83 -1.84 -0.43 0.00 0.57 2.41\n", - "IND 72.00 0.08 0.79 -1.38 -0.54 0.00 0.46 2.30\n", - "OCIO 72.00 0.28 1.26 -0.85 -0.49 -0.00 0.51 7.72\n", - "OFI 72.00 0.43 1.41 -0.69 -0.39 0.00 0.61 7.91\n", - "RES_PLU 72.00 0.08 0.84 -1.48 -0.52 0.00 0.48 2.40\n", - "RES_UNI 72.00 0.11 0.77 -1.09 -0.43 0.00 0.57 2.26" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_datos_Rscaler.describe().T" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([23., 20., 15., 7., 4., 1., 0., 0., 1., 1.]),\n", - " array([-0.87668764, -0.29685363, 0.28298038, 0.86281439, 1.44264841,\n", - " 2.02248242, 2.60231643, 3.18215044, 3.76198445, 4.34181847,\n", - " 4.92165248]),\n", - " )" - ] - }, - "execution_count": 109, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.title(\"Histograma Area Mn, datos escalados utilizando el Robust Scaler\")\n", - "plt.hist(df_datos_Rscaler.loc[:,\"AREA_MN\"])" - ] - }, - { - "attachments": { - "image-2.png": { - "image/png": "" - }, - "image.png": { - "image/png": "" - } - }, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Transformaciones no lineales\n", - "\n", - "Dos tipos de transformaciones estan disponibles: la transformaci贸n de cuantil y la transformaci贸n de potencia. Ambas transformaciones est谩n basadas en transformaciones monot贸nicas de las caracter铆sticas y por lo tanto preservan el ranking de los valore a trav茅s de cada caracter铆stica. Sin embargo, distorcionan las correlaciones y las distancias a trav茅s de las caracter铆sticas. \n", - "\n", - "Las transformaciones de potencia pertenecen a una familia de transformaciones que apuntan a representar datos de cualquier distribuci贸n a una tan cercana posible a la distribuci贸n Gaussiana. \n", - "\n", - "Esto es necesario en algunos casos, debido a que algunos modelos requieren que las caracter铆sticas en un set de datos cumplan estos requerimientos.\n", - "\n", - "Las transformaciones de potencia actualmente proveen dos de estas transformaciones. La Yeo-Johnson y the Box-Cox. Box-Cox solo puede ser aplicada cuando los datos son mayor a 0. \n", - "\n", - "En ambos m茅todos, las transformaciones son parametrizadas por alpha que es determinada a trav茅s de estimaci贸n de m谩xima probabilidad. \n", - "\n", - "In both methods, the transformation is parameterized by, which is determined through maximum likelihood estimation. \n", - "\n", - "![image.png](attachment:image.png)\n", - "\n", - "![image-2.png](attachment:image-2.png)\n", - "\n", - "\n", - "En resumen, el objetivo primario es transformar los datos para que estos presentan una distribuci贸n normal. Si los datos son mayores a 0, se puede utilizar una transformaci贸n de Box-Cox, si por el contrario, los datos contienen valores negativos o ceros, o si se busca una transformaci贸n m谩s flexible que pueda manejar un mayor rango de valores, la transformaci贸n de Yeo-Johnson es preferible. \n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [], - "source": [ - "PTrans = PowerTransformer(method='yeo-johnson', standardize=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [], - "source": [ - "PTrans.fit(datos.iloc[:,1:50])\n", - "datos_PTrans= PTrans.transform(datos.iloc[:,1:50])\n", - "df_datos_PTrans= pd.DataFrame(datos_PTrans, columns = datos.iloc[:,1:50].columns)\n", - "df_datos_PTrans['Ciudades'] = datos['Ciudades']" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "#df_datos_PTrans.to_csv(r'C:\\Users\\crist\\Documents\\GitHub\\manifolds\\ciudades\\datos\\datos_completos\\df_datos_PTrans.csv', encoding = 'ISO-8859-1')" - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countmeanstdmin25%50%75%max
TA72.00-0.001.01-2.29-0.79-0.000.692.09
LPI72.00-0.001.01-2.87-0.530.010.522.67
AREA_MN72.000.001.01-1.73-0.73-0.030.682.30
AREA_AM72.000.001.01-2.09-0.55-0.000.532.60
AREA_MD72.00-0.001.01-2.23-0.730.060.702.45
GYRATE_MN72.00-0.001.01-2.44-0.670.090.782.91
GYRATE_AM72.00-0.001.01-2.08-0.450.050.652.87
GYRATE_MD72.000.001.01-2.64-0.690.090.602.89
PRD72.000.001.01-2.21-0.810.050.802.18
SHDI72.00-0.001.01-1.72-0.73-0.180.632.23
SIDI72.00-0.001.01-2.26-0.650.040.721.84
MSIDI72.000.001.01-2.29-0.660.010.701.93
SHEI72.000.001.01-2.35-0.770.060.762.14
SIEI72.000.001.01-2.26-0.650.040.721.84
MSIEI72.00-0.001.01-2.29-0.660.010.691.95
NP72.00-0.001.01-2.29-0.76-0.080.762.28
DIVISION72.00-0.001.01-4.07-0.180.220.691.03
SPLIT72.00-0.001.01-2.94-0.54-0.130.722.32
MESH72.000.001.01-2.30-0.510.030.672.85
PAFRAC72.000.001.01-3.15-0.71-0.030.542.32
SHAPE_MN72.000.001.01-3.80-0.53-0.140.602.59
SHAPE_MD72.00-0.001.01-3.72-0.58-0.050.422.46
PARA_MN72.000.001.01-2.85-0.660.010.652.30
PARA_MD72.00-0.001.01-2.47-0.69-0.060.622.20
FRAC_MD72.000.000.00-0.00-0.00-0.000.000.00
SQUARE_MN72.00-0.001.01-3.510.230.310.400.90
SQUARE_MD72.00-0.001.01-3.89-0.44-0.070.422.48
IJI72.000.001.01-2.43-0.75-0.030.712.04
LSI72.000.001.01-2.91-0.82-0.110.812.29
TE72.000.001.01-2.15-0.71-0.040.772.35
ED72.000.001.01-2.96-0.54-0.030.712.46
RNMDP_202072.00-0.001.01-2.33-0.710.130.553.04
PobT72.00-0.001.01-2.59-0.510.070.482.76
PobH72.000.001.01-2.57-0.520.050.492.77
PobM72.00-0.001.01-2.61-0.500.070.492.75
Vehiculos72.00-0.001.01-2.36-0.580.090.552.66
T_Viviendas72.00-0.001.01-2.54-0.630.060.582.60
T_Viv_Prin72.00-0.001.01-2.68-0.500.080.522.70
T_Viv_Sec72.000.001.01-2.40-0.600.030.612.41
Viv_vacias72.00-0.001.01-2.43-0.640.010.622.73
COM72.000.001.01-2.11-0.790.000.721.80
ED_SING72.000.001.01-2.46-0.730.160.642.83
EQUIP72.00-0.001.01-3.09-0.63-0.060.612.41
IND72.000.001.01-2.61-0.750.030.582.26
OCIO72.000.001.01-1.89-0.830.090.682.34
OFI72.00-0.001.01-1.76-0.86-0.040.712.17
RES_PLU72.000.001.01-2.47-0.680.030.572.29
RES_UNI72.00-0.001.01-2.29-0.620.030.702.17
\n", - "
" - ], - "text/plain": [ - " count mean std min 25% 50% 75% max\n", - "TA 72.00 -0.00 1.01 -2.29 -0.79 -0.00 0.69 2.09\n", - "LPI 72.00 -0.00 1.01 -2.87 -0.53 0.01 0.52 2.67\n", - "AREA_MN 72.00 0.00 1.01 -1.73 -0.73 -0.03 0.68 2.30\n", - "AREA_AM 72.00 0.00 1.01 -2.09 -0.55 -0.00 0.53 2.60\n", - "AREA_MD 72.00 -0.00 1.01 -2.23 -0.73 0.06 0.70 2.45\n", - "GYRATE_MN 72.00 -0.00 1.01 -2.44 -0.67 0.09 0.78 2.91\n", - "GYRATE_AM 72.00 -0.00 1.01 -2.08 -0.45 0.05 0.65 2.87\n", - "GYRATE_MD 72.00 0.00 1.01 -2.64 -0.69 0.09 0.60 2.89\n", - "PRD 72.00 0.00 1.01 -2.21 -0.81 0.05 0.80 2.18\n", - "SHDI 72.00 -0.00 1.01 -1.72 -0.73 -0.18 0.63 2.23\n", - "SIDI 72.00 -0.00 1.01 -2.26 -0.65 0.04 0.72 1.84\n", - "MSIDI 72.00 0.00 1.01 -2.29 -0.66 0.01 0.70 1.93\n", - "SHEI 72.00 0.00 1.01 -2.35 -0.77 0.06 0.76 2.14\n", - "SIEI 72.00 0.00 1.01 -2.26 -0.65 0.04 0.72 1.84\n", - "MSIEI 72.00 -0.00 1.01 -2.29 -0.66 0.01 0.69 1.95\n", - "NP 72.00 -0.00 1.01 -2.29 -0.76 -0.08 0.76 2.28\n", - "DIVISION 72.00 -0.00 1.01 -4.07 -0.18 0.22 0.69 1.03\n", - "SPLIT 72.00 -0.00 1.01 -2.94 -0.54 -0.13 0.72 2.32\n", - "MESH 72.00 0.00 1.01 -2.30 -0.51 0.03 0.67 2.85\n", - "PAFRAC 72.00 0.00 1.01 -3.15 -0.71 -0.03 0.54 2.32\n", - "SHAPE_MN 72.00 0.00 1.01 -3.80 -0.53 -0.14 0.60 2.59\n", - "SHAPE_MD 72.00 -0.00 1.01 -3.72 -0.58 -0.05 0.42 2.46\n", - "PARA_MN 72.00 0.00 1.01 -2.85 -0.66 0.01 0.65 2.30\n", - "PARA_MD 72.00 -0.00 1.01 -2.47 -0.69 -0.06 0.62 2.20\n", - "FRAC_MD 72.00 0.00 0.00 -0.00 -0.00 -0.00 0.00 0.00\n", - "SQUARE_MN 72.00 -0.00 1.01 -3.51 0.23 0.31 0.40 0.90\n", - "SQUARE_MD 72.00 -0.00 1.01 -3.89 -0.44 -0.07 0.42 2.48\n", - "IJI 72.00 0.00 1.01 -2.43 -0.75 -0.03 0.71 2.04\n", - "LSI 72.00 0.00 1.01 -2.91 -0.82 -0.11 0.81 2.29\n", - "TE 72.00 0.00 1.01 -2.15 -0.71 -0.04 0.77 2.35\n", - "ED 72.00 0.00 1.01 -2.96 -0.54 -0.03 0.71 2.46\n", - "RNMDP_2020 72.00 -0.00 1.01 -2.33 -0.71 0.13 0.55 3.04\n", - "PobT 72.00 -0.00 1.01 -2.59 -0.51 0.07 0.48 2.76\n", - "PobH 72.00 0.00 1.01 -2.57 -0.52 0.05 0.49 2.77\n", - "PobM 72.00 -0.00 1.01 -2.61 -0.50 0.07 0.49 2.75\n", - "Vehiculos 72.00 -0.00 1.01 -2.36 -0.58 0.09 0.55 2.66\n", - "T_Viviendas 72.00 -0.00 1.01 -2.54 -0.63 0.06 0.58 2.60\n", - "T_Viv_Prin 72.00 -0.00 1.01 -2.68 -0.50 0.08 0.52 2.70\n", - "T_Viv_Sec 72.00 0.00 1.01 -2.40 -0.60 0.03 0.61 2.41\n", - "Viv_vacias 72.00 -0.00 1.01 -2.43 -0.64 0.01 0.62 2.73\n", - "COM 72.00 0.00 1.01 -2.11 -0.79 0.00 0.72 1.80\n", - "ED_SING 72.00 0.00 1.01 -2.46 -0.73 0.16 0.64 2.83\n", - "EQUIP 72.00 -0.00 1.01 -3.09 -0.63 -0.06 0.61 2.41\n", - "IND 72.00 0.00 1.01 -2.61 -0.75 0.03 0.58 2.26\n", - "OCIO 72.00 0.00 1.01 -1.89 -0.83 0.09 0.68 2.34\n", - "OFI 72.00 -0.00 1.01 -1.76 -0.86 -0.04 0.71 2.17\n", - "RES_PLU 72.00 0.00 1.01 -2.47 -0.68 0.03 0.57 2.29\n", - "RES_UNI 72.00 -0.00 1.01 -2.29 -0.62 0.03 0.70 2.17" - ] - }, - "execution_count": 112, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_datos_PTrans.describe().T" - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([ 9., 4., 10., 11., 8., 13., 6., 6., 3., 2.]),\n", - " array([-1.72731958, -1.32416401, -0.92100844, -0.51785287, -0.1146973 ,\n", - " 0.28845827, 0.69161384, 1.09476941, 1.49792499, 1.90108056,\n", - " 2.30423613]),\n", - " )" - ] - }, - "execution_count": 113, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.title(\"Histograma Area mn, escalado aplicando Yeo-Johnson\")\n", - "plt.hist(df_datos_PTrans.loc[:,\"AREA_MN\"])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "datamecum", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/function_search_FA_parameters.R b/function_search_FA_parameters.R deleted file mode 100644 index 301fb87..0000000 --- a/function_search_FA_parameters.R +++ /dev/null @@ -1,155 +0,0 @@ -library(psych) -library(writexl) -library(tidyverse) -library(psych) -library(GPArotation) -library(writexl) -library(openxlsx2) - -### Funcion para exportar datos a excel - -export_list_to_excel <- function(my_list, output_file) { - # Create a new workbook - - wb <- wb_workbook() - names_list <- names(my_list) - - # Iterate over the list elements - for (i in seq_along(my_list)) { - # Extract the current element - current_element <- my_list[[i]] - - # Create a name sheet - - name_sheet = paste('fa',i,sep = "") - - fa_parameters = names_list[i] - - # Create a new sheet in the workbook - wb$add_worksheet(sheet = name_sheet) - - wb$add_data(name_sheet, fa_parameters, startCol = 1, startRow = 1) - - wb$add_data(name_sheet, current_element, startCol = 1, startRow = 3, rowNames = TRUE) - - } - - # Save the workbook as an Excel file - wb_save(wb, path = output_file) -} - - -# Funcion que a partir de la salida del objeto FA devuelve un data.frame - -df_factors <- function(fa_result){ - t = fa_result$Vaccounted[3,] - n = length(t) - column_names <- paste0("F", 1:n) # Generate column names - order_t <- t[order(names(t))] - df <- setNames(as.data.frame(matrix(order_t, nrow = 1, ncol = n)), column_names) - return(df) -} - - -# funcion para crear un objeto de workbook para ir agregando los resultados de los FA - -summarize_fa <- function(wb,df, n = i,it = iterations, pc = parameter_combinations){ - if (it == 1){ - wb$add_data("resultados", df[1,], startCol = 2, startRow = 2 + n, colNames = FALSE, rowNames = FALSE) - } - else{ - wb$add_data("resultados", df[1,], startCol = 2, startRow = ((pc+2)*(it-1)) + n, colNames = FALSE, rowNames = FALSE) - } - return(wb) - - -} - -# Funcion que a partir una lista de parametros prueba todas las combinaciones, y devuelve los resultados - -factor_analysis_export <- function(data_list,data_names, n_factors, fm_methods, rotate_methods){ - # Generate all possible combinations of parameters - parameter_combinations <- tidyr::expand_grid(n_factors = as.integer(n_factors), - fm_methods = fm_methods, - rotate_methods = rotate_methods) - - # Create an empty structures to save results - loading_list <- list() - result_df <- data.frame() - vector_names <-c() - wb <- wb_workbook() - wb$add_worksheet(sheet = "resultados") - number_parameter_combinations = nrow(parameter_combinations) - iterations = 0 - #data_names = paste("data",seq(length(data_list)), sep = "") - - for (data in data_list) { - iterations = iterations + 1 - - # Iterate over the parameter combinations - for (i in 1:nrow(parameter_combinations)) { - # Extract the current parameter combination - current_params <- parameter_combinations[i, ] - n <- current_params$n_factors - fm <- current_params$fm_methods - rotate <- current_params$rotate_methods - - # Perform factor analysis with the current parameters - - fa_result <- fa(r = data, nfactors = n, fm = fm, rotate = rotate) - - variance_result <- df_factors(fa_result) # Funcion propia. Devuelve un dataframe de un fila de la varianza. - # m = length(colnames(data)) - - # Vamoso a crear un data frame - loadings_values <- data.frame(fa_result$loadings[]) - #resultados_n <- resultados[1:m,] # Esto en principio no es necesario. - - - # Resultados 1 es dataframe. Comunalidad, complexity, unicicidad - complexity <- data.frame(comunality = fa_result$communality, complexity = fa_result$complexity, uniqueness = fa_result$uniquenesses - ) - - Parameters = paste("data = ", data_names[iterations],"nfactors =", n, "fm =", fm, "rotate =", rotate) - - # Este es el data.frame resultado de cada fa. - - final <- cbind(loadings_values,complexity) - - ### Esto se almacena en una lista y luego se exporta a un hoja de excel - - loading_list[[length(loading_list) + 1]] <- final - - # Estamos creando una lista de nombres para darle a los elementos de la lista - - vector_names = c(vector_names,Parameters) - - parameters_model <- data.frame(Parameters = paste("data = ", data_names[iterations],"nfactors =", n, "fm =", fm, "rotate =", rotate)) - - # Este dataframe debe ir agregandose en cada iteracion en una hoja workbook - res_var <- cbind(parameters_model,variance_result) - - - ## Le damos un nombre a cada elemento de la lista basado en el vector vecto_names - names(loading_list) <- vector_names - - wb <- summarize_fa(wb,res_var,i,iterations,number_parameter_combinations) - - } - - - } - - # Esta es la salida de la loading list - - output_file <- "fa_results.xlsx" - export_list_to_excel(loading_list, output_file) - - wb_save(wb, path = "wb_test.xlsx") - - -} - - - -