Skip to content

Commit

Permalink
Merge branch 'master' into reduce_extent
Browse files Browse the repository at this point in the history
  • Loading branch information
jreadey committed Oct 10, 2023
2 parents 2f33dca + 8fb1bb6 commit 28bb586
Show file tree
Hide file tree
Showing 15 changed files with 207 additions and 71 deletions.
2 changes: 1 addition & 1 deletion admin/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ log_prefix: null # Prefix text to append to log entries
max_tcp_connections: 100 # max number of inflight tcp connections
head_sleep_time: 10 # max sleep time between health checks for head node
node_sleep_time: 10 # max sleep time between health checks for SN/DN nodes
async_sleep_time: 10 # max sleep time between async task runs
async_sleep_time: 1 # max sleep time between async task runs
s3_sync_interval: 1 # time to wait between s3_sync checks (in sec)
s3_age_time: 1 # time to wait since last update to write an object to S3
s3_sync_task_timeout: 10 # time to cancel write task if no response
Expand Down
6 changes: 3 additions & 3 deletions admin/docker/docker-compose-internal-lb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ services:
restart: ${RESTART_POLICY}
mem_limit: ${HEAD_RAM}
environment:
- TARGET_SN_COUNT=${CORES}
- TARGET_DN_COUNT=${CORES}
- TARGET_SN_COUNT=${SN_CORES}
- TARGET_DN_COUNT=${DN_CORES}
- NODE_TYPE=head_node
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
Expand Down Expand Up @@ -49,7 +49,7 @@ services:
- HSDS_ENDPOINT=${HSDS_ENDPOINT}

ports:
- ${SN_PORT}:${SN_PORT}
- ${SN_PORT_RANGE}:${SN_PORT}
logging:
options:
max-size: "5k"
Expand Down
2 changes: 1 addition & 1 deletion admin/docker/docker-compose.aws.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ services:
- LOG_LEVEL=${LOG_LEVEL}
- HSDS_ENDPOINT=${HSDS_ENDPOINT}
ports:
- ${SN_PORT}:${SN_PORT}
- ${SN_PORT_RANGE}:${SN_PORT}
depends_on:
- head
volumes:
Expand Down
2 changes: 1 addition & 1 deletion admin/docker/docker-compose.azure.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ services:
- LOG_LEVEL=${LOG_LEVEL}
- HSDS_ENDPOINT=${HSDS_ENDPOINT}
ports:
- ${SN_PORT}:${SN_PORT}
- ${SN_PORT_RANGE}:${SN_PORT}
depends_on:
- head
volumes:
Expand Down
2 changes: 1 addition & 1 deletion admin/docker/docker-compose.posix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ services:
- BUCKET_NAME=${BUCKET_NAME}
- HSDS_ENDPOINT=${HSDS_ENDPOINT}
ports:
- ${SN_PORT}:${SN_PORT}
- ${SN_PORT_RANGE}:${SN_PORT}
depends_on:
- head
volumes:
Expand Down
2 changes: 1 addition & 1 deletion admin/docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ services:
- LOG_LEVEL=${LOG_LEVEL}
- HSDS_ENDPOINT=${HSDS_ENDPOINT}
ports:
- ${SN_PORT}:${SN_PORT}
- ${SN_PORT_RANGE}:${SN_PORT}
depends_on:
- head
volumes:
Expand Down
2 changes: 1 addition & 1 deletion hsds/attr_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ async def PUT_Attribute(request):

# ready to add attribute now
req = getDataNodeUrl(app, obj_id)
req += "/" + collection + "/" + obj_id + "/attributes/" + attr_name
req += f"/{collection}/{obj_id}/attributes/{attr_name}"
log.info("PUT Attribute: " + req)

attr_json = {}
Expand Down
18 changes: 14 additions & 4 deletions hsds/datanode.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,10 @@ async def bucketScan(app):
log.info("bucketScan start")

async_sleep_time = int(config.get("async_sleep_time"))
scan_wait_time = async_sleep_time * 6 # default to ~1min
log.info("scan_wait_time: {}".format(scan_wait_time))
short_sleep_time = float(async_sleep_time) / 10.0
scan_wait_time = async_sleep_time # default to ~1min
log.info(f"scan_wait_time: {scan_wait_time}")
last_action = time.time() # keep track of the last time any work was done

# update/initialize root object before starting node updates

Expand Down Expand Up @@ -165,8 +167,16 @@ async def bucketScan(app):
tb = traceback.format_exc()
print("traceback:", tb)

log.info(f"bucketScan - sleep: {async_sleep_time}")
await asyncio.sleep(async_sleep_time)
last_action = time.time()

now = time.time()
if (now - last_action) > async_sleep_time:
sleep_time = async_sleep_time # long nap
else:
sleep_time = short_sleep_time # shot nap

log.info(f"bucketScan - sleep: {sleep_time}")
await asyncio.sleep(sleep_time)

# shouldn't ever get here
log.error("bucketScan terminating unexpectedly")
Expand Down
2 changes: 1 addition & 1 deletion hsds/datanode_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def get_obj_id(request, body=None):


async def notify_root(app, root_id, bucket=None):
"""flag to write to S3"""
"""flag to indicate domain state has changed """

log.info(f"notify_root: {root_id}, bucket={bucket}")
if not isValidUuid(root_id) or not isSchema2Id(root_id):
Expand Down
59 changes: 53 additions & 6 deletions hsds/domain_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,21 @@ async def doFlush(app, root_id, bucket=None):
return dn_ids


async def getScanTime(app, root_id, bucket=None):
""" Return timestamp for the last scan of the given root id """
root_scan = 0
log.debug(f"getScanTime: {root_id}")
root_info = await getRootInfo(app, root_id, bucket=bucket)
if root_info:
log.debug(f"getScanTime root_info: {root_info}")
if "scan_complete" in root_info:
root_scan = root_info["scan_complete"] # timestamp last scan was finished
else:
log.warn("scan_complete key not found in root_info")

return root_scan


async def PUT_Domain(request):
"""HTTP method to create a new domain"""
log.request(request)
Expand Down Expand Up @@ -997,6 +1012,8 @@ async def PUT_Domain(request):
raise HTTPBadRequest(reason=msg)
log.debug(f"PUT domain with body: {body}")

log.debug(f"params: {params}")

if "getdnids" in params and params["getdnids"]:
getdnids = True
elif body and "getdnids" in body and body["getdnids"]:
Expand All @@ -1010,6 +1027,16 @@ async def PUT_Domain(request):
do_flush = True
else:
do_flush = False

if "rescan" in params and params["rescan"]:
do_rescan = True
elif body and "rescan" in body and body["rescan"]:
do_rescan = True
else:
do_rescan = False

log.debug(f"do_flush: {do_flush} do_rescan: {do_rescan}")

if do_flush:
# flush domain - update existing domain rather than create
# a new resource
Expand Down Expand Up @@ -1045,11 +1072,13 @@ async def PUT_Domain(request):
else:
log.info("flush called on folder, ignoring")
status_code = 204
resp = await jsonResponse(request, rsp_json, status=status_code)
log.response(request, resp=resp)
return resp
if not do_rescan:
# send the response now
resp = await jsonResponse(request, rsp_json, status=status_code)
log.response(request, resp=resp)
return resp

if "rescan" in params and params["rescan"]:
if do_rescan:
# refresh scan info for the domain
log.info(f"rescan for domain: {domain}")
domain_json = await getDomainJson(app, domain, reload=True)
Expand All @@ -1066,15 +1095,33 @@ async def PUT_Domain(request):
log.info(msg)
raise HTTPBadRequest(reashon=msg)
aclCheck(app, domain_json, "update", username)
log.info(f"notify_root: {root_id}")
log.debug(f"notify_root: {root_id}")
notify_req = getDataNodeUrl(app, root_id) + "/roots/" + root_id
post_params = {}
post_params = {"timestamp": 0} # have scan run immediately
if bucket:
post_params["bucket"] = bucket
req_send_time = time.time()
await http_post(app, notify_req, data={}, params=post_params)

# Poll until the scan_complete time is greater than
# req_send_time or 3 minutes have elapsed
MAX_WAIT_TIME = 180
RESCAN_SLEEP_TIME = 0.1
while True:
scan_time = await getScanTime(app, root_id, bucket=bucket)
if scan_time > req_send_time:
log.info(f"scan complete for root: {root_id}")
break
if time.time() - req_send_time > MAX_WAIT_TIME:
log.warn(f"scan failed to complete in {MAX_WAIT_TIME} seconds for {root_id}")
raise HTTPServiceUnavailable()
log.debug(f"do_rescan sleeping for {RESCAN_SLEEP_TIME}s")
await asyncio.sleep(RESCAN_SLEEP_TIME) # avoid busy wait
resp = json_response(None, status=204) # No Content response
return resp

# from here we are just doing a normal new domain creation

is_folder = False
owner = username
linked_domain = None
Expand Down
15 changes: 12 additions & 3 deletions hsds/group_dn.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,16 +282,25 @@ async def POST_Root(request):
log.error(f"Expected root id but got: {root_id}")
raise HTTPInternalServerError()
params = request.rel_url.query
log.debug(f"POST_Root params: {params}")
if "bucket" in params:
bucket = params["bucket"]
else:
bucket = None
if "timestamp" in params:
try:
timestamp = int(params["timestamp"])
except ValueError:
log.error("unexpected value for timestamp: {params}")
raise HTTPInternalServerError()
else:
timestamp = time.time()

log.info(f"POST_Root: {root_id} bucket: {bucket}")
log.info(f"POST_Root: {root_id} bucket: {bucket} timestamp: {timestamp}")

# add id to be scanned by the s3sync task
# add id to be scanned by the bucket scan task
root_scan_ids = app["root_scan_ids"]
root_scan_ids[root_id] = (bucket, time.time())
root_scan_ids[root_id] = (bucket, timestamp)

resp_json = {}

Expand Down
53 changes: 26 additions & 27 deletions runall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ config_value() {

# script to startup hsds service
if [[ $# -eq 1 ]] && ([[ $1 == "-h" ]] || [[ $1 == "--help" ]]); then
echo "Usage: runall.sh [--no-docker] [--no-docker-tcp] [--stop] [--config] [count] "
echo "Usage: runall.sh [--no-docker] [--no-docker-tcp] [--stop] [--config] [dn_count] [sn_count]"
echo " --no-docker: run server as set of processes rather than Docker containers (using unix sockets)"
echo " --no-docker-tcp: run server as set of processes rather than Docker containers (using tcp)"
echo " --stop: shutdown the server (Docker only)"
Expand All @@ -38,40 +38,28 @@ if [[ $# -eq 1 ]] && ([[ $1 == "-h" ]] || [[ $1 == "--help" ]]); then
exit 1
fi

if [[ $# -gt 0 ]]; then

DOCKER_CMD="up"

while [[ $# -gt 0 ]]; do
if [[ $1 == "--no-docker" ]]; then
export NO_DOCKER=1

if [[ $# -gt 1 ]] ; then
export CORES=$2
fi
elif [[ $1 == "--no-docker-tcp" ]]; then
export NO_DOCKER=1
export USE_TCP=1

if [[ $# -gt 1 ]] ; then
export CORES=$2
fi

elif [[ $1 == "--stop" ]]; then
echo "stopping"
export DOCKER_CMD="down"
elif [[ $1 == "--config" ]]; then
PRINT_CONFIG=1
elif [[ -z ${DN_CORES} ]]; then
export DN_CORES=$1
else
export CORES=$1
export SN_CORES=$1
fi
fi

if [[ ${CORES} ]]; then
export DN_CORES=${CORES}
else
export DN_CORES=4
fi
shift
done

if [[ -z $SN_CORES ]]; then
# Use 1 SN_CORE unless there's an environment variable set
export SN_CORES=1
fi

if [[ -z $CONFIG_DIR ]]; then
export CONFIG_DIR="admin/config"
Expand Down Expand Up @@ -105,8 +93,19 @@ config_value "DN_PORT" && export DN_PORT=$rv
config_value "DN_RAM" && export DN_RAM=$rv
config_value "SN_PORT" && export SN_PORT=$rv
config_value "SN_RAM" && export SN_RAM=$rv
config_value "RANGEGET_PORT" && export RANGEGET_PORT=$rv
config_value "RANGEGET_RAM" && export RANGEGET_RAM=$rv


if [[ -z ${DN_CORES} ]]; then
export DN_CORES=4
fi

if [[ -z $SN_CORES ]]; then
# Use 1 SN_CORE by default
export SN_CORES=1
export SN_PORT_RANGE=$SN_PORT
else
export SN_PORT_RANGE=$SN_PORT-$((SN_PORT + SN_CORES - 1))
fi


if [[ ${NO_DOCKER} ]]; then
Expand Down Expand Up @@ -201,13 +200,13 @@ if [[ $NO_DOCKER ]] ; then
fi
# this will run until server is killed by ^C
else
if [[ $# -eq 1 ]] && [[ $1 == "--stop" ]]; then
if [[ $DOCKER_CMD == "down" ]]; then
# use the compose file to shutdown the sevice
echo "Running docker-compose -f ${COMPOSE_FILE} down"
docker-compose -f ${COMPOSE_FILE} down
exit 0 # can quit now
else
echo "Running docker-compose -f ${COMPOSE_FILE} up"
echo "Running docker-compose -f ${COMPOSE_FILE} up -d --scale sn=${SN_CORES} --scale dn=${DN_CORES}"
docker-compose -f ${COMPOSE_FILE} up -d --scale sn=${SN_CORES} --scale dn=${DN_CORES}
fi

Expand Down
Loading

0 comments on commit 28bb586

Please sign in to comment.