src/main/resources/bullet_defaults.yaml

# The name of the Storm DRPC function
topology.function: "tracer"

# The name of the Storm topology
topology.name: "bullet-topology"

# The number of topology workers to use. Applicable when the scheduler is not ras.
topology.workers: 92

# Enable Storm debug logging
topology.debug: false

# The scheduling strategy to use. only "ras" currently supported.
topology.scheduler: "ras"

# Enable metrics collection for the topology. This uses the LoggingMetricsConsumer
# and collects a CPU metric using org.apache.storm.metrics.sigar.CPUMetric
topology.metrics.enable: false

# The following CPU loads and memory on and off heap control their respective component's CPU
# and memory configuration. These settings are only used when scheduler is "ras"
# The parallelism setting controls the number of executors used for each component.
topology.drpc.spout.cpu.load: 20.0
topology.drpc.spout.memory.on.heap.load: 256.0
topology.drpc.spout.memory.off.heap.load: 160.0
topology.drpc.spout.parallelism: 20
topology.prepare.bolt.cpu.load: 20.0
topology.prepare.bolt.memory.on.heap.load: 128.0
topology.prepare.bolt.memory.off.heap.load: 160.0
topology.prepare.bolt.parallelism: 5
topology.return.bolt.cpu.load: 20.0
topology.return.bolt.memory.on.heap.load: 128.0
topology.return.bolt.memory.off.heap.load: 160.0
topology.return.bolt.parallelism: 10
topology.filter.bolt.cpu.load: 100.0
topology.filter.bolt.memory.on.heap.load: 256.0
topology.filter.bolt.memory.off.heap.load: 160.0
topology.filter.bolt.parallelism: 35
topology.join.bolt.cpu.load: 100.0
topology.join.bolt.memory.on.heap.load: 512.0
topology.join.bolt.memory.off.heap.load: 160.0
topology.join.bolt.parallelism: 20

# Bullet uses tick tuples underneath the hood as a "clock" mechanism to do metadata and query updates (checking if rules
# have expired) etc. This setting controls the how frequently a tick happens - number of seconds between ticks.
topology.tick.interval.secs: 5

# This is the number of ticks for which an error caused by receiving a bad rule will be buffered if the
# return information has not been received, will be buffered before being thrown away
topology.join.bolt.error.tick.timeout: 3

# This is the number of ticks for which a rule will be buffered past its expiry in order to wait for
# aggregations to trickle in from the Filter Bolts.
topology.join.bolt.rule.tick.timeout: 3

# The default duration in milliseconds for a rule if one has not been specified.
rule.default.duration: 30000

# The maximum duration in milliseconds allowed for a rule. Anything greater will be clamped to this value.
rule.max.duration: 120000

# The default number of records that can be aggregated for a rule if one has not been specified.
rule.aggregation.default.size: 1

# The maximum number of records that will be aggregated per rule. Anything greater will be clamped to this value.
rule.aggregation.max.size: 512

# This is the separator that is used when a set of fields has to be considered as a single String.
# This is relevant when hashing a set of fields (for example, in a GROUP operation) for uniqueness purposes, such
# as when inserting into a Sketch. Without this, for example, if you were considering two fields together as a
# group, with values ab and cd, simply concatenating them would produce abcd. This is ambiguous if you with another
# record that had values a and bcd for those two fields. Using this separator distinguishes them for this purpose.
# If the default separator occurs in your fields, you should change it something else.
rule.aggregation.composite.field.separator: "|"

# The maximum number of records that will be collected in the Filter Bolt till it is emitted - i.e. a micro-batch.
# Leaving this at 1 emits your raw aggregation records as soon as they are received in the Filter Bolt. This makes
# your raw aggregation query run snappier if the total number of matched records across the Filter Bolts exceeds
# the number of records your query is looking for but individually each Filter Bolt does not find enough records to
# satisfy the query. Since the records are emitted immediately, the Join Bolt will terminate your query as soon
# as the total records are received instead of waiting for the micro-batch size to be reached.
# If you set this too high (for example, higher than the query size), you will wait the entire duration of the query,
# and the number of ticks specified in topology.join.bolt.rule.tick.timeout.
rule.aggregation.raw.micro.batch.size: 1

# The maximum number of records that will be collected for Raw aggregations. This number should be <= to the
# rule.aggregation.max.size value. If it is greater, the aggregation size will be used.
rule.aggregation.raw.max.size: 30

# The maximum number of entries stored by a Sketch created for doing COUNT DISTINCTS. Decreasing this number
# (rounded to powers of 2) can decrease the accuracy for high cardinality dimensions while decreasing the total
# memory used by the Sketch. The errors for a Theta Sketch is fixed at a maximum when this number is chosen - in other
# words, the error will be no worse than this maximum regardless of how many unique entries are inserted into the
# Sketch. Refer to: https://datasketches.github.io/docs/Theta/ThetaErrorTable.html
rule.aggregation.count.distinct.sketch.entries: 16384

# Controls how much sampling is done by the Sketch for COUNT DISTINCTS. A value of 1.0 means no sampling is done.
# A value of 0.5 means, the Sketch will throw out half the data coming into the Sketch.
# You can leave this at 1 since it really only affects it when we start supporting COUNT DISTINCTS as GROUP operations.
# https://datasketches.github.io/docs/Theta/ThetaPSampling.html
rule.aggregation.count.distinct.sketch.sampling: 1.0

# This can either be QuickSelect or Alpha (CaSe SeNsiTive). You can leave this at the default.
# Alpha Sketches are 30% more accurate if their estimates are queried directly but since we union them, their accuracy
# reverts back to the QuickSelect accuracy. Alpha Sketches are also faster when updating.
# https://datasketches.github.io/docs/Theta/ThetaUpdateSpeed.html
rule.aggregation.count.distinct.sketch.family: "Alpha"

# A Sketch does not start the maximum size specified tbe sketch.entries setting. It grows toward it and can be at most
# 2 x the size at the maximum. This factor controls by how much the size grows when the threshold is reached. Valid
# values are 1 (no resize start at maximum), 2 (double), 4 (quadruple) and 8 (octuple). Any other value defaults to 8.
# https://datasketches.github.io/docs/Theta/ThetaUpdateSpeed.html
rule.aggregation.count.distinct.sketch.resize.factor: 8

# The maximum number of entries stored by a Sketch created for doing GROUP BY. Sketches are used to do a uniform
# sample across your unique groups. So, this value should be set to a power of 2 approximately equal to your value for
# rule.aggregation.max.size. Anything greater will still work but the aggregation max size will limit your result
# anyway, so it's just a waste of resources to do so. If you have a count or sum as a metric for the group, summing them
# across the groups and dividing by your Sketch Theta (in the metadata), gives you an approximate estimate of the real
# sum/count across all your actual groups. The error is defined by the QuickSelect Sketch error. Refer to:
# https://datasketches.github.io/docs/Theta/ThetaErrorTable.html
rule.aggregation.group.sketch.entries: 512

# Controls how much sampling is done by the Sketch for GROUP BY. A value of 1.0 means no sampling is done.
# A value of 0.5 means, the Sketch will throw out half the data coming into the Sketch.
# You can leave this at 1 since it really only affects it when we start supporting COUNT DISTINCTS as GROUP operations.
# https://datasketches.github.io/docs/Theta/ThetaPSampling.html
rule.aggregation.group.sketch.sampling: 1.0

# A Sketch does not start the maximum size specified tbe sketch.entries setting. It grows toward it and can be at most
# 2 x the size at the maximum. This factor controls by how much the size grows when the threshold is reached. Valid
# values are 1 (no resize start at maximum), 2 (double), 4 (quadruple) and 8 (octuple). Any other value defaults to 8.
# https://datasketches.github.io/docs/Theta/ThetaUpdateSpeed.html
rule.aggregation.group.sketch.resize.factor: 8

# Enable logging meta information in the results. Configured metadata will be add to the meta section of the
# results: {"meta": {}, "records": []}
result.metadata.enable: true

# Each entry in this list indicates which metadata to collect (the name) and what key to add it as (the key) to the meta
# AbstractRule Identifier adds the original DRPC ID that was generated for the rule.
# AbstractRule Body adds the received rule definition. This is useful for diagnosing syntax exceptions when errors are received.
# Creation Time adds the timestamp in milliseconds when the AbstractRule was received by the Join Bolt
# Termination Time adds the timestamp in milliseconds when the Records were emitted by the Join Bolt
# Aggregation Metadata adds additional nested metadata about the aggregation if set. These are listed below.

# Estimated Result adds a boolean denoting whether the result was estimated. (COUNT DISTINCT, GROUP)
# Standard Deviations adds an object inside the Aggregation Metadata object where the keys are the standard deviations
#                     and the values are objects containing upper and lower bounds (COUNT DISTINCT, GROUP)
# Sketch Family adds the family of Sketches uses to produce the result, if one was used. (COUNT DISTINCT)
# Sketch Size adds the size of final Sketch used to produced the result, if one was used. (COUNT DISTINCT)
# Sketch Theta adds the theta value of the Sketch for Theta and Tuple Sketches, if one was used. (COUNT DISTINCT, GROUP)
# Uniques Estimate adds the approximate unique values seen for Tuple Sketches. (GROUP)
result.metadata.metrics:
    - name: "Rule Identifier"
      key: "rule_id"
    - name: "Rule Body"
      key: "rule_body"
    - name: "Creation Time"
      key: "rule_receive_time"
    - name: "Termination Time"
      key: "rule_finish_time"
    - name: "Aggregation Metadata"
      key: "aggregation"
    - name: "Estimated Result"
      key: "wasEstimated"
    - name: "Standard Deviations"
      key: "standardDeviations"
    - name: "Sketch Family"
      key: "sketchFamily"
    - name: "Sketch Size"
      key: "sketchSize"
    - name: "Sketch Theta"
      key: "sketchTheta"
    - name: "Uniques Estimate"
      key: "uniquesEstimate"

# Enables whether each record should have a new key added to it denoting when the Filter Bolt saw it
record.inject.timestamp.enable: true
# This is the key that is used to add the timestamp in milliseconds to the record, if record.inject.timestamp.enable is true
record.inject.timestamp.key: "__receive_timestamp"