Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explicitly support vm.dirty_(bytes|ratio) and vm.dirty_background_(bytes|ratio) #688

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions profiles/accelerator-performance/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,21 @@ platform_profile=performance
[disk]
readahead=>4096

[sysctl]
[vm]
# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up. Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage (system default
# is 20%)
vm.dirty_ratio = 40
dirty_ratio = 40

# Start background writeback (via writeback threads) at this percentage (system
# default is 10%)
vm.dirty_background_ratio = 10
dirty_background_ratio = 10

[sysctl]
# PID allocation wrap value. When the kernel's next PID value
# reaches this value, it wraps back to a minimum PID value.
# PIDs of value pid_max or larger are not allocated.
Expand Down
7 changes: 4 additions & 3 deletions profiles/latency-performance/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,21 @@ min_perf_pct=100
[acpi]
platform_profile=performance

[sysctl]
[vm]
# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up. Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage (system default
# is 20%)
vm.dirty_ratio=10
dirty_ratio=10

# Start background writeback (via writeback threads) at this percentage (system
# default is 10%)
vm.dirty_background_ratio=3
dirty_background_ratio=3

[sysctl]
# The swappiness parameter controls the tendency of the kernel to move
# processes out of physical memory and onto the swap disk.
# 0 tells the kernel to avoid swapping processes out of physical memory
Expand Down
4 changes: 2 additions & 2 deletions profiles/mssql/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ force_latency=5
[vm]
# For multi-instance SQL deployments use 'madvise' instead of 'always'
transparent_hugepages=always
dirty_background_ratio=3
dirty_ratio=80

[sysctl]
vm.swappiness=1
vm.dirty_background_ratio=3
vm.dirty_ratio=80
vm.dirty_expire_centisecs=500
vm.dirty_writeback_centisecs=100
vm.max_map_count=1600000
Expand Down
4 changes: 2 additions & 2 deletions profiles/oracle/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ include=throughput-performance

[sysctl]
vm.swappiness = 10
vm.dirty_background_ratio = 3
vm.dirty_ratio = 40
vm.dirty_expire_centisecs = 500
vm.dirty_writeback_centisecs = 100
kernel.shmmax = 4398046511104
Expand All @@ -27,4 +25,6 @@ kernel.panic_on_oops = 1
kernel.numa_balancing = 0

[vm]
dirty_background_ratio = 3
dirty_ratio = 40
transparent_hugepages=never
12 changes: 5 additions & 7 deletions profiles/postgresql/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,20 @@ force_latency=1
[vm]
transparent_hugepages=never

[sysctl]

# The dirty_background_ratio and dirty_ratio controls percentage of memory
# that file system cache have to fill with dirty data before kernel will
# will start to flush data to disks. The default values are 10% and 20%
# accordingly. On a systems with a big amount of memory this values can
# be tens of gigabytes and produce IO spikes when PostgreSQL server writes
# checkpoints.
# checkpoints. The values can alternatively be set in absolute number of bytes
# via dirty_background_bytes and dirty_bytes.
#
# Keep this values reasonable small - about size of RAID controller write-back
# cache size (typcal 512MB - 2GB).
vm.dirty_background_ratio = 0
vm.dirty_ratio = 0
vm.dirty_background_bytes = 67108864
vm.dirty_bytes = 536870912
dirty_background_bytes = 67108864
dirty_bytes = 536870912

[sysctl]
# The swappiness parameter controls the tendency of the kernel to move
# processes out of physical memory and onto the swap disk.
# 0 tells the kernel to avoid swapping processes out of physical memory
Expand Down
4 changes: 2 additions & 2 deletions profiles/sap-hana/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ min_perf_pct=100

[vm]
transparent_hugepages=never
dirty_ratio = 40
dirty_background_ratio = 10

[sysctl]
kernel.sem = 32000 1024000000 500 32000
kernel.numa_balancing = 0
vm.dirty_ratio = 40
vm.dirty_background_ratio = 10
vm.swappiness = 10
6 changes: 4 additions & 2 deletions profiles/spectrumscale-ece/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ governor=performance
energy_perf_bias=performance
min_perf_pct=100

[vm]
dirty_ratio = 40
dirty_background_ratio = 10

[sysctl]
kernel.numa_balancing = 1
vm.dirty_ratio = 40
vm.dirty_background_ratio = 10
vm.swappiness=10
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_timestamps = 1
Expand Down
4 changes: 3 additions & 1 deletion profiles/spindown-disk/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ spindown=6
[scsi_host]
alpm=medium_power

[vm]
dirty_ratio=60

[sysctl]
vm.dirty_writeback_centisecs=6000
vm.dirty_expire_centisecs=9000
vm.dirty_ratio=60
vm.laptop_mode=5
vm.swappiness=30

Expand Down
27 changes: 14 additions & 13 deletions profiles/throughput-performance/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@ energy_performance_preference=performance
[acpi]
platform_profile=performance

[vm]
# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up. Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage (system default
# is 20%)
dirty_ratio = 40

# Start background writeback (via writeback threads) at this percentage (system
# default is 10%)
dirty_background_ratio = 10

# Marvell ThunderX
[vm.thunderx]
type=vm
Expand All @@ -31,19 +45,6 @@ transparent_hugepages=never
readahead=>4096

[sysctl]
# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up. Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage (system default
# is 20%)
vm.dirty_ratio = 40

# Start background writeback (via writeback threads) at this percentage (system
# default is 10%)
vm.dirty_background_ratio = 10

# PID allocation wrap value. When the kernel's next PID value
# reaches this value, it wraps back to a minimum PID value.
# PIDs of value pid_max or larger are not allocated.
Expand Down
5 changes: 3 additions & 2 deletions profiles/virtual-guest/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,17 @@
summary=Optimize for running inside a virtual guest
include=throughput-performance

[sysctl]
[vm]
# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up. Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage (system default
# is 20%)
vm.dirty_ratio = 30
dirty_ratio = 30

[sysctl]
# Filesystem I/O is usually much more efficient than swapping, so try to keep
# swapping low. It's usually safe to go even lower than this on systems with
# server-grade storage.
Expand Down
4 changes: 2 additions & 2 deletions profiles/virtual-host/tuned.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
summary=Optimize for running KVM guests
include=throughput-performance

[sysctl]
[vm]
# Start background writeback (via writeback threads) at this percentage (system
# default is 10%)
vm.dirty_background_ratio = 5
dirty_background_ratio = 5

[cpu]
# Setting C3 state sleep mode/power savings
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[main]
summary=Post-loaded profile that uses variables from the regular active profile

[sysctl]
vm.dirty_ratio=${foo}
[vm]
dirty_ratio=${foo}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[main]
summary=Post-loaded profile

[sysctl]
vm.dirty_ratio=8
[vm]
dirty_ratio=8
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[main]
summary=Second version of the post-loaded profile

[sysctl]
vm.dirty_ratio=7
[vm]
dirty_ratio=7
108 changes: 105 additions & 3 deletions tuned/plugins/plugin_vm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,15 @@

class VMPlugin(base.Plugin):
"""
Enables or disables transparent huge pages depending on value of the
[option]`transparent_hugepages` option. The option can have one of three
possible values `always`, `madvise` and `never`.
Tunes selected sysctl options in `/proc/sys/vm`, currently
[option]`dirty_ratio`, [option]`dirty_background_ratio`,
[option]`dirty_bytes`, and [option]`dirty_background_bytes`.
See https://docs.kernel.org/admin-guide/sysctl/vm.html for detailed
documentation of these options.

Additionaly enables or disables transparent huge pages depending on
the value of the [option]`transparent_hugepages` option. The option
can have one of three possible values: `always`, `madvise` and `never`.

.Disable transparent hugepages
====
Expand All @@ -38,11 +44,26 @@ def _get_config_options(self):
"transparent_hugepages" : None,
"transparent_hugepage" : None,
"transparent_hugepage.defrag" : None,
"dirty_bytes" : None,
"dirty_ratio" : None,
"dirty_background_bytes" : None,
"dirty_background_ratio" : None
}

@staticmethod
def _check_conflicting_dirty_options(instance, first, second):
if instance.options[first] is not None and instance.options[second] is not None:
log.error("Conflicting options '%s' and '%s', this may cause undefined behavior." % (first, second))

@staticmethod
def _proc_sys_vm_option_path(option):
return os.path.join("/proc/sys/vm", option)

def _instance_init(self, instance):
instance._has_static_tuning = True
instance._has_dynamic_tuning = False
self._check_conflicting_dirty_options(instance, "dirty_bytes", "dirty_ratio")
self._check_conflicting_dirty_options(instance, "dirty_background_bytes", "dirty_background_ratio")

def _instance_cleanup(self, instance):
pass
Expand Down Expand Up @@ -117,3 +138,84 @@ def _get_transparent_hugepage_defrag(self):
return cmd.get_active_option(cmd.read_file(sys_file))
else:
return None

def _check_twice_pagesize(self, option, int_value):
min_bytes = 2 * int(cmd.getconf("PAGESIZE"))
if int_value < min_bytes:
log.error("The value of '%s' must be at least twice the page size (%s)." % (option, min_bytes))
return False
return True

def _check_positive(self, option, int_value):
if int_value <= 0:
log.error("The value of '%s' must be positive." % option)
return False
return True

def _check_ratio(self, option, int_value):
if not 0 <= int_value <= 100:
log.error("The value of '%s' must be between 0 and 100." % option)
return False
return True

@command_custom("dirty_bytes")
def _dirty_bytes(self, enabling, value, verify, ignore_missing):
return self._dirty_option("dirty_bytes", "dirty_ratio", self._check_twice_pagesize, enabling, value, verify)

@command_custom("dirty_ratio")
def _dirty_ratio(self, enabling, value, verify, ignore_missing):
return self._dirty_option("dirty_ratio", "dirty_bytes", self._check_ratio, enabling, value, verify)

@command_custom("dirty_background_bytes")
def _dirty_background_bytes(self, enabling, value, verify, ignore_missing):
return self._dirty_option("dirty_background_bytes", "dirty_background_ratio", self._check_positive, enabling, value, verify)

@command_custom("dirty_background_ratio")
def _dirty_background_ratio(self, enabling, value, verify, ignore_missing):
return self._dirty_option("dirty_background_ratio", "dirty_background_bytes", self._check_ratio, enabling, value, verify)

def _dirty_option(self, option, counterpart, check_fun, enabling, value, verify):
Fixed Show fixed Hide fixed
option_path = self._proc_sys_vm_option_path(option)
counterpart_path = self._proc_sys_vm_option_path(counterpart)
option_key = self._storage_key(command_name=option)
counterpart_key = self._storage_key(command_name=counterpart)
if not os.path.isfile(option_path):
log.warning("Option '%s' is not supported on the current hardware." % option)
current_value = cmd.read_file(option_path).strip()
if verify:
return current_value == value
if enabling:
try:
int_value = int(value)
except ValueError:
log.error("The value of '%s' must be an integer." % option)
if not check_fun(option, int_value):
return None
if current_value == value:
log.info("Not setting option '%s' to '%s', already set." % (option, value))
return value
# Backup: if the option (e.g. dirty_bytes) is currently 0,
# its counterpart (dirty_ratio) is the active one, so we
# back up that one instead.
if int(current_value) == 0:
current_counterpart_value = cmd.read_file(counterpart_path).strip()
self._storage.set(counterpart_key, current_counterpart_value)
else:
self._storage.set(option_key, current_value)
log.info("Setting option '%s' to '%s'." % (option, value))
cmd.write_to_file(option_path, value)
return value
# Rollback is analogous to the backup: if there is no backed up
# value for this option, it means that its counterpart was active
# and we have to restore that one.
old_value = self._storage.get(option_key)
old_counterpart_value = self._storage.get(counterpart_key)
if old_value is not None:
log.info("Setting option '%s' to '%s'" % (option, old_value))
cmd.write_to_file(option_path, old_value)
elif old_counterpart_value is not None:
log.info("Setting option '%s' to '%s'" % (counterpart, old_counterpart_value))
cmd.write_to_file(counterpart_path, old_counterpart_value)
else:
log.info("Not restoring '%s', previous value is the same or unknown." % option)
return None
3 changes: 3 additions & 0 deletions tuned/utils/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,3 +552,6 @@ def tr(self, text, source_chars, dest_chars):
# Checks if name contains only valid characters and has valid length or is empty string or None
def is_valid_name(self, name):
return not name or (all(c in consts.NAMES_ALLOWED_CHARS for c in name) and len(name) <= consts.NAMES_MAX_LENGTH)

def getconf(self, variable):
return check_output(["getconf", variable]).decode().strip()
Loading