Skip to content

Commit

Permalink
Merge branch 'main' into km/aws_amazon_q
Browse files Browse the repository at this point in the history
  • Loading branch information
aquamatthias authored Sep 5, 2024
2 parents 054f5a5 + a0efd4f commit 205de26
Show file tree
Hide file tree
Showing 61 changed files with 5,204 additions and 456 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


## Overview
🔍 Search Infrastructure: Fix Inventory maps out your cloud infrastructure in a [graph](https://inventory.fix.security/docs/concepts/graph) and provides a simple [search syntax](https://inventory.fix.security/docs/concepts/search).
🔍 Search Infrastructure: Fix Inventory maps out your cloud infrastructure in a [graph](https://inventory.fix.security/concepts/asset-inventory-graph) and provides a simple [search syntax](https://inventory.fix.security/docs/concepts/search).

📊 Generate Reports: Fix Inventory keeps track of and reports infrastructure changes over time, making it easy to [audit resource usage and cleanup](https://inventory.fix.security/docs/concepts/cloud-data-sync).

Expand Down
27 changes: 20 additions & 7 deletions fixcore/fixcore/analytics/posthog.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import annotations

import asyncio
import json
import logging
from collections import deque
from datetime import timedelta, datetime
from typing import MutableSequence, Optional, List
from typing import MutableSequence, Optional, List, Set

from aiohttp import ClientSession
from posthog.client import Client
Expand Down Expand Up @@ -56,6 +57,7 @@ def __init__(
self.lock = asyncio.Lock()
self.last_fetched: Optional[datetime] = None
self.session: Optional[ClientSession] = None
self.white_listed_events: Set[str] = set()

async def capture(self, event: List[AnalyticsEvent]) -> None:
"""
Expand All @@ -64,24 +66,34 @@ async def capture(self, event: List[AnalyticsEvent]) -> None:
Only in the rare case when the queue size reached its maximum the queue will be flushed directly.
"""
async with self.lock:
self.queue.extend(event)
for e in event:
if e.kind not in self.white_listed_events:
log.debug(f"Event {e.kind} is not whitelisted and will be ignored.")
continue
self.queue.append(e)

if len(self.queue) >= self.flush_at:
await self.flush()

async def refresh_public_api_key(self) -> None:
async def refresh_from_cdn(self) -> None:
"""
The API key is public but not static, so we need to refresh it periodically.
"""
try:
if not self.session:
self.session = ClientSession()
async with self.session.get("https://cdn.some.engineering/posthog/public_api_key") as resp:
api_key = (await resp.text()).strip()
async with self.session.get("https://cdn.some.engineering/posthog/posthog.json") as resp:
ph = json.loads(await resp.text())
# update the api key
api_key = ph["api_key"]
self.client.api_key = api_key
for consumer in self.client.consumers:
consumer.api_key = api_key
# update the events to report
self.white_listed_events = set(ph["events"])
# update the last fetched time
self.last_fetched = utc()
log.debug("Fetched latest posthog data from CDN.")
except Exception as ex:
log.debug(f"Could not fetch latest api key. Will use the current one. {ex}")

Expand All @@ -91,11 +103,11 @@ async def flush(self) -> None:
"""
# check, if we need to fetch or refresh the public api key
if not self.last_fetched:
await self.refresh_public_api_key()
await self.refresh_from_cdn()
sd = self.system_data
self.client.identify(sd.system_id, {"run_id": self.run_id, "created_at": sd.created_at}) # type: ignore
elif (utc() - self.last_fetched) > timedelta(hours=1):
await self.refresh_public_api_key()
await self.refresh_from_cdn()

# acquire the lock, send all events to the client and clear the queue
async with self.lock:
Expand All @@ -114,6 +126,7 @@ async def flush(self) -> None:
self.queue.clear()

async def start(self) -> PostHogEventSender:
await self.flush() # flush will make sure to load initial data from CDN
await self.flusher.start()
return self

Expand Down
20 changes: 13 additions & 7 deletions fixcore/fixcore/cli/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -4566,7 +4566,9 @@ async def stop_workflow(task_id: TaskId) -> AsyncIterator[str]:
elif arg and len(args) == 2 and args[0] == "run":
return CLISource.single(partial(run_workflow, args[1].strip()), required_permissions={Permission.admin})
elif arg and len(args) == 2 and args[0] == "stop":
return CLISource.single(partial(stop_workflow, args[1].strip()), required_permissions={Permission.admin})
return CLISource.single(
partial(stop_workflow, TaskId(args[1].strip())), required_permissions={Permission.admin}
)
elif arg and len(args) == 1 and args[0] == "running":
return CLISource.only_count(running_workflows, required_permissions={Permission.read})
elif arg and len(args) == 1 and args[0] == "list":
Expand Down Expand Up @@ -4749,24 +4751,28 @@ async def list_configs() -> Tuple[int, JsStream]:

args = re.split("\\s+", arg, maxsplit=2) if arg else []
if arg and len(args) == 2 and (args[0] == "show" or args[0] == "get"):
return CLISource.single(partial(show_config, args[1]), required_permissions={Permission.admin})
return CLISource.single(partial(show_config, ConfigId(args[1])), required_permissions={Permission.admin})
elif arg and len(args) == 2 and args[0] == "delete":
return CLISource.single(partial(delete_config, args[1]), required_permissions={Permission.admin})
return CLISource.single(partial(delete_config, ConfigId(args[1])), required_permissions={Permission.admin})
elif arg and len(args) == 3 and args[0] == "set":
update = path_values_parser.parse(args[2])
return CLISource.single(partial(set_config, args[1], update), required_permissions={Permission.admin})
return CLISource.single(
partial(set_config, ConfigId(args[1]), update), required_permissions={Permission.admin}
)
elif arg and len(args) == 2 and args[0] == "edit":
config_id = args[1]
config_id = ConfigId(args[1])
return CLISource.single(
partial(edit_config, config_id),
produces=MediaType.FilePath,
envelope={CLIEnvelope.action: "edit", CLIEnvelope.command: f"configs update {config_id}"},
required_permissions={Permission.admin},
)
elif arg and len(args) == 3 and args[0] == "copy":
return CLISource.single(partial(copy_config, args[1], args[2]), required_permissions={Permission.admin})
return CLISource.single(
partial(copy_config, ConfigId(args[1]), ConfigId(args[2])), required_permissions={Permission.admin}
)
elif arg and len(args) == 3 and args[0] == "update":
config_id = args[1]
config_id = ConfigId(args[1])
return CLISource.single(
partial(update_config, config_id),
produces=MediaType.FilePath,
Expand Down
9 changes: 8 additions & 1 deletion fixcore/fixcore/db/arango_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ def view_term(term: Term) -> Tuple[Optional[str], Term]:
return (None, term) if sp is None else (sp, evolve(term, pre_filter=pre))
elif isinstance(term, NotTerm):
sp, nt = view_term(term.term)
return (None, term) if sp is None else (f"NOT ({sp})", NotTerm(nt))
remaining = nt if nt.is_all else NotTerm(nt) # a remaining filter needs to be negated
return (None, term) if sp is None else (f"NOT ({sp})", remaining)
elif isinstance(term, ContextTerm):
# context terms cannot be handled by the view search exhaustively
# we filter the list down as much as possible, but leave the context term untouched
Expand Down Expand Up @@ -295,6 +296,12 @@ def view_term(term: Term) -> Tuple[Optional[str], Term]:
return None, term
return combine_optional(lsp, rsp, lambda ll, rr: f"({ll} {term.op} {rr})"), lt.combine(term.op, rt)
elif isinstance(term, Predicate):
# arangosearch view does not handle nested array searches correctly
# see: https://github.com/arangodb/arangodb/issues/21281
# once this is resolved we can delete the next 2 lines
if term.op in ["!=", "not in"] and bool(array_marker.search(term.name)):
return "true", term # true will not filter anything leaving the term for the filter

return predicate_term(term)
else:
return None, term
Expand Down
2 changes: 1 addition & 1 deletion fixcore/fixcore/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def service(self, name: str, clazz: Type[T]) -> T:
existing = self.get(name)
if existing is None:
raise KeyError(f"Service {name} not found")
elif clazz is Any or isinstance(existing, clazz):
elif clazz is Any or isinstance(existing, clazz): # type: ignore
return existing # type: ignore
else:
raise ValueError(f"Service {name} is not of type {clazz}")
Expand Down
1 change: 0 additions & 1 deletion fixcore/tests/fixcore/db/arango_query_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,6 @@ def assert_view(query: str, expected: str, **kwargs: Any) -> Tuple[str, Json]:
# asking for a specific element in an array can leverage the view
assert_view("g[*]==1", "SEARCH v0.g == @b0 RETURN v0")
assert_view("g[*] in [1,2,3]", "SEARCH v0.g in @b0 RETURN v0) FOR result in view0")
assert_view("g[*] not in [1,2,3]", "SEARCH v0.g not in @b0 RETURN v0) FOR result in view0")
# use like instead of regex
if TranslateRegexpToLike:
assert_view('name=~"^123"', "SEARCH v0.name LIKE @b0", b0="123%")
Expand Down
5 changes: 1 addition & 4 deletions fixcore/tests/fixcore/report/inspector_service_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,12 @@ async def test_predefined_benchmarks(inspector_service: InspectorService) -> Non
benchmarks = BenchmarkConfig.from_files()
assert len(benchmarks) > 0
for name, check in benchmarks.items():
# todo: fix the root cause and don't skip this benchmark
if name == "azure_cis_2_1":
continue
config = {BenchmarkConfigRoot: check}
cfg_id = ConfigId(name)
validation = await inspector_service.validate_benchmark_config(cfg_id, config)
assert validation is None, f"Benchmark: {name}" + str(validation)
benchmark = BenchmarkConfig.from_config(ConfigEntity(cfg_id, config))
assert benchmark.clouds == ["aws"]
any(cloud in (benchmark.clouds or []) for cloud in ["aws", "azure"])


async def test_list_failing(inspector_service: InspectorService) -> None:
Expand Down
1 change: 1 addition & 0 deletions fixlib/fixlib/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def parse_known_args( # type: ignore
new_default = os.environ.get(env_name)

if new_default is not None:
type_goal: Any = str
if isinstance(action.type, type):
type_goal = action.type
elif callable(action.type):
Expand Down
2 changes: 1 addition & 1 deletion fixlib/fixlib/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def override_config(running_config: RunningConfig) -> None:
log.error(f"Override key {config_key} is unknown - skipping")
break

target_type = str
target_type: Any = str
if target_type in (list, tuple, set):
config_value = target_type(config_value.split(","))
config_value = convert(config_value, target_type)
Expand Down
2 changes: 1 addition & 1 deletion fixlib/fixlib/threading.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class ExecutorQueue:

def submit_work(self, key: Any, fn: Callable[..., T], *args: Any, **kwargs: Any) -> Future[T]:
future = Future[T]()
task = ExecutorQueueTask(key=key, fn=fn, args=args, kwargs=kwargs, future=future)
task = ExecutorQueueTask(key=key, fn=fn, args=args, kwargs=kwargs, future=future) # type: ignore
self.__append_work(task)
return future

Expand Down
4 changes: 2 additions & 2 deletions fixworker/fixworker/tag.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Type
from typing import Any, Dict, List

from fixlib.baseplugin import BaseCollectorPlugin
from fixlib.baseresources import BaseResource
Expand All @@ -8,7 +8,7 @@
from fixlib.types import Json


def core_tag_tasks_processor(plugin: Type[BaseCollectorPlugin], config: Config, task_data: Dict[str, Any]) -> Json:
def core_tag_tasks_processor(plugin: BaseCollectorPlugin, config: Config, task_data: Dict[str, Any]) -> Json:
delete_tags: List[str] = task_data.get("delete", [])
update_tags: Dict[str, str] = task_data.get("update", {})
node_data: Dict[str, Any] = task_data.get("node", {})
Expand Down
40 changes: 40 additions & 0 deletions plugins/aws/docs/iam.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
@startuml

class IAMEntity {
- id: string
- inlinePolicy: PolicyStatement[]
}

class User
class Group
class Role
note bottom of Role
Role has a trust policy that defines what users,
groups or services can assume this role.
Can have a cross account trust.
end note
IAMEntity <|-- Group
IAMEntity <|-- Role
IAMEntity <|-- User
Group *-- User

class Policy {
- managedBy: AWS|Customer
}
class PolicyStatement {
- effect: Allow|Deny
- actions: string[]
- notActions: string[]
- resources: string[]
- conditions: string[]
- more...
}
class Resource {
- inlinePolicy: PolicyStatement[]
}
Policy -> PolicyStatement
IAMEntity -> Policy
PolicyStatement ..> Resource


@enduml
58 changes: 58 additions & 0 deletions plugins/aws/docs/sso.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
@startuml

hide empty members


class User
class Group
class PermissionSet {
- inlinePolicy: PolicyStatement[]
}

Group o--> User
User o--> PermissionSet
Group o--> PermissionSet

(Group, PermissionSet) .. Account
Account .. (User, PermissionSet)

package AwsAccount {
class Role
PermissionSet .> Role
}

note bottom of AwsAccount.Role
The Permissions of the PermissionSet is
replicated as Role into every Account.
end note

class PolicyStatement {
- effect: Allow|Deny
- actions: string[]
- notActions: string[]
- resources: string[]
- conditions: string[]
}
PermissionSet o--> PolicyStatement

note bottom of PermissionSet
The same PermissionSet can be assigned to multiple
Users and Groups in multiple accounts.
end note
note right of User
Has nothing to do with Iam User.
end note
note right of Group
Has nothing to do with Iam Group.
end note



note top of Group
AWS SSO User AWS IAM Identity Center (SSO)
is usually available in one account in the organization
and one region.

It is allowed to have more than one SSO instance in individual accounts.
end note
@enduml
2 changes: 1 addition & 1 deletion plugins/azure/fix_plugin_azure/azure_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def is_retryable_exception(e: Exception) -> bool:
return True
if isinstance(e, HttpResponseError):
error_code = getattr(e.error, "code", None)
status_code = getattr(e.response, "status_code", None)
status_code = getattr(e, "status_code", None)

if error_code == "TooManyRequests" or status_code == 429:
log.debug(f"Azure API request limit exceeded or throttling, retrying with exponential backoff: {e}")
Expand Down
9 changes: 8 additions & 1 deletion plugins/azure/fix_plugin_azure/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@
)
from fix_plugin_azure.resource.storage import AzureStorageAccountUsage, AzureStorageSku, resources as storage_resources
from fix_plugin_azure.resource.web import resources as web_resources
from fix_plugin_azure.resource.machinelearning import (
AzureMachineLearningUsage,
AzureMachineLearningVirtualMachineSize,
resources as ml_resources,
)
from fixlib.baseresources import Cloud, GraphRoot, BaseAccount, BaseRegion
from fixlib.core.actions import CoreFeedback, ErrorAccumulator
from fixlib.graph import Graph
Expand Down Expand Up @@ -80,6 +85,7 @@ def resource_with_params(clazz: Type[MicrosoftResource], param: str) -> bool:
+ sql_resources
+ storage_resources
+ web_resources
+ ml_resources
)
all_resources = subscription_resources + graph_resources # defines all resource kinds. used in model check

Expand Down Expand Up @@ -241,7 +247,7 @@ def rm_nodes(cls, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = T

def remove_usage_zero_value() -> None:
for node in self.graph.nodes:
if not isinstance(node, (AzureNetworkUsage, AzureStorageAccountUsage)):
if not isinstance(node, (AzureNetworkUsage, AzureStorageAccountUsage, AzureMachineLearningUsage)):
continue
# Azure Usage just keep info about how many kind of resources on account exists
# Check if the current usage value of the Azure Usage node is 0
Expand All @@ -254,6 +260,7 @@ def remove_usage_zero_value() -> None:
rm_nodes(AzureExpressRoutePortsLocation, AzureSubscription)
rm_nodes(AzureNetworkVirtualApplianceSku, AzureSubscription)
rm_nodes(AzureDiskType, AzureSubscription)
rm_nodes(AzureMachineLearningVirtualMachineSize, AzureLocation)
rm_nodes(AzureStorageSku, AzureLocation)
rm_nodes(AzureMysqlServerType, AzureSubscription)
rm_nodes(AzurePostgresqlServerType, AzureSubscription)
Expand Down
Loading

0 comments on commit 205de26

Please sign in to comment.