-
Notifications
You must be signed in to change notification settings - Fork 0
/
clean_data.R
33 lines (24 loc) · 1.56 KB
/
clean_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
library(here)
here::i_am("lib.R")
source(here::here("lib.R"))
source(here::here("env.R"))
library(elastic)
library(dplyr)
library(jsonlite)
# open connection with elasticsearch
con_elasticsearch <- connect(host = host_elasticsearch, user = user_elasticsearch, pwd = password_elasticsearch, port = port_elasticsearch, transport_schema = "http")
# import ssl data from elasticsearch
ssl_data <- fromJSON(Search(con_elasticsearch, index = "ssl", size = 10000, raw = TRUE))$hits$hits$"_source"
# import cmdb data from elasticsearch
cmdb_data <- fromJSON(Search(con_elasticsearch, index = "cmdb", size = 100000, raw = TRUE))$hits$hits$"_source"
# filter ssl data if LAMP
# FIXME : liste correcte et exhaustive ?
ssl_data <- ssl_data %>% filter(!str_detect(ipv4, "^127\\.178\\.226\\..*"), !str_detect(ipv4, "^127\\.178\\.222\\..*"), !str_detect(ipv4, "^127\\.178\\.32\\..*"))
# filter ssl data if wildcards
ssl_data <- ssl_data %>% filter(!str_detect(subject$CN, "\\*"), !str_detect(as.character(san), "\\*"))
# filter ssl data if ips not in cmdb data
ssl_data <- ssl_data %>% filter(ipv4 %in% cmdb_data$ip)
# filter cmdb data if ips not in ssl data
cmdb_data <- cmdb_data %>% filter(ip %in% ssl_data$ipv4)
# clean data types and column names
ssl_data <- ssl_data %>% mutate(ipv4 = as.character(ipv4), san = sapply(san, paste, collapse = ", "), ciphers = sapply(ciphers, paste, collapse = ", "), technologies = sapply(technologies, paste, collapse = ", "), validFrom = as.Date(validFrom), validTo = as.Date(validTo)) %>% dplyr::rename(ip = ipv4, date_debut = validFrom, date_fin = validTo)