diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..f6b4617d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "thirdparty/ha-lib"] + path = thirdparty/ha-lib + url = git@github.com:CacheboxInc/ha-lib.git diff --git a/Makefile b/Makefile index cad07b96..0b950a5b 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,10 @@ export CEPH_RBD export GLFS_BD export SD_NOTIFY +#top-level dir path +TGT_DIR = $(PWD) +export TGT_DIR + .PHONY: all all: programs doc conf scripts diff --git a/basic_setup.sh b/basic_setup.sh new file mode 100755 index 00000000..878973d1 --- /dev/null +++ b/basic_setup.sh @@ -0,0 +1,138 @@ +#!/bin/bash -x + +function Usage() { + echo "1st arg disk file name for target" + echo "2nd arg fio config file" + exit 1 +} + +if [ $# -lt 2 ]; then + Usage +fi + +DISK1=$1 +FIO_CONFIG=$2 + +function Cmd_present() { + command -v $1 2>&1 > /dev/null + if [ ! $? -eq 0 ]; then + echo "Command $1 not present" + exit $? + fi +} + +# Check for all the to be used commands +Cmd_present tgtd +Cmd_present tgtadm +Cmd_present curl +Cmd_present iscsiadm +Cmd_present fio + +function cleanup() { + rm -rf $DISK1 + killall -9 tgtd +} + +if [ -f "$DISK1" ]; then + echo "File $DISK1 already present." + exit 1 +fi + +echo "Creating 1G file to be exported as LUN" +#fallocate -l 1G $DISK1 +dd if=/dev/zero of=$DISK1 bs=1G count=1 + +if [ ! $? -eq 0 ]; then + echo "File creation of $DISK1 failed" + cleanup + exit $? +fi + +# Start the tgtd +tgtd + +if [ ! $? -eq 0 ]; then + echo "Starting tgtd failed" + cleanup + exit $? +fi + +# Create new VM through REST API +curl -s -XPOST 'http://localhost:1984/new_vm/1' \ + -d '{"vmid":"1","TargetID":"1","TargetName":"disk1"}' \ + -H 'Content-Type: application/json' 2>&1 > /dev/null + +if [ ! $? -eq 0 ]; then + echo "Create new VM REST API failed" + cleanup + exit $? +else + echo "New VM with vmid 1, rest api successful" +fi + +# Create new VMDK through REST API +curl -s -XPOST 'http://localhost:1984/vm/1/new_vmdk/1' \ + -d '{"TargetID":"1","LunID":"1","DevPath":"/var/tmp/iscsi-disk1","VmID":"1","VmdkID":"1","BlockSize":"4096","Compression":{"Enabled":"false"},"Encryption":{"Enabled":"false"},"RamCache":{"Enabled":"true","MemoryInMB":"1024"},"FileCache":{"Enabled":"false"},"SuccessHandler":{"Enabled":"true"}}' \ + -H'Content-Type: application/json' 2>&1 > /dev/null + +if [ ! $? -eq 0 ]; then + echo "Create new VMDK REST API failed" + cleanup + exit $? +else + echo "New VMDK with vmdkid 1, rest api successful" +fi + +# Make the target discoverable +tgtadm --lld iscsi --op bind --mode target --tid 1 -I ALL + +if [ ! $? -eq 0 ]; then + echo "Target discoverable command failed" + cleanup + exit $? +else + echo "Target with tid 1 is now discoverable" +fi + +# Discover +iscsiadm --mode discovery --type sendtargets --portal 127.0.0.1 + +# Login through iscsi on this target +iscsiadm --mode node --targetname disk1 --portal 127.0.0.1:3260 --login + +if [ ! $? -eq 0 ]; then + echo "iscsi login to target tid 1 failed" + cleanup + exit $? +else + echo "iscsi login to target tid 1 successful" +fi + +# run fio +FIO_PATH=$HOME/fio_$$ + +mkdir $FIO_PATH + +if [ ! $? -eq 0 ]; then + echo "mkdir $FIO_PATH failed" + cleanup + exit $? +else + echo "FIO logs with be at $FIO_PATH" +fi + +cd $FIO_PATH + +fio $HOME/config.fio + +if [ ! $? -eq 0 ]; then + echo "fio run failed" +else + echo "fio run succeeded" +fi + +# logout +iscsiadm --mode node --targetname disk1 --portal 127.0.0.1:3260 --logout + +cleanup + diff --git a/config.fio b/config.fio new file mode 100644 index 00000000..f40e7fbe --- /dev/null +++ b/config.fio @@ -0,0 +1,20 @@ +[global] +group_reporting=1 +exitall +runtime=1m +time_based=1 + +# writers, will repeatedly randomly write and verify data +[writers] +rw=randwrite +bsrange=4k-16k +ioengine=libaio +iodepth=128 +filename=/dev/sdb +verify=crc32c +verify_backlog=1024 +verify_backlog_batch=512 +verify_interval=512 +numjobs=1 +create_serialize=0 + diff --git a/dockers/build/Dockerfile b/dockers/build/Dockerfile new file mode 100644 index 00000000..a4d3dec0 --- /dev/null +++ b/dockers/build/Dockerfile @@ -0,0 +1,31 @@ +# Pull base image +FROM ubuntu:16.04 + +# Install essential packages + +RUN rm -rf /var/lib/apt/lists/* +RUN apt-get -y update +RUN apt-get -y upgrade + +# for folly +RUN apt-get install -y cmake libboost-all-dev libevent-dev libdouble-conversion-dev libgoogle-glog-dev +RUN apt-get install -y libgflags-dev libiberty-dev liblz4-dev liblzma-dev libsnappy-dev make +RUN apt-get -y update +RUN apt-get -y upgrade +RUN apt-get install -y zlib1g-dev binutils-dev libjemalloc-dev libssl-dev pkg-config + +# for storage layer +RUN apt-get -y update +RUN apt-get -y upgrade +RUN apt-get install -y software-properties-common +RUN add-apt-repository -y ppa:jonathonf/gcc-7.2 +RUN apt-get -y update +RUN apt-get install -y gcc-7 g++-7 +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 60 --slave /usr/bin/g++ g++ /usr/bin/g++-7 + +ADD run.sh /opt/run.sh + +VOLUME /shared +WORKDIR /shared +RUN chmod +x /opt/run.sh +CMD ["/opt/run.sh"] diff --git a/dockers/build/run.sh b/dockers/build/run.sh new file mode 100644 index 00000000..993d9cf4 --- /dev/null +++ b/dockers/build/run.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +echo "this is run.sh for tgt" + +# uncomment the below line and comment rest all to aid debug +#tail -F -n0 /etc/hosts + +cwd=$(pwd) + +echo "build folly start" +cd folly +rm -rf _build +mkdir _build +cd _build +cmake configure .. -DBUILD_SHARED_LIBS=ON +make +make install +echo "build folly complete" + +cd $cwd +echo "build CRoaring start" +cd hyc-storage-layer/thirdparty/CRoaring +rm -rf _build +mkdir _build +cd _build +cmake .. +make +make install +echo "build CRoaring complete" + +cd $cwd +echo "build restbed start" +cd hyc-storage-layer/thirdparty/restbed +rm -rf _build +mkdir _build +cd _build +cmake -DBUILD_SHARED=YES .. +make +make install +echo "build restbed complete" + +cd $cwd +echo "gtest build start" +cd googletest +rm -rf _build +mkdir _build +cd _build +cmake .. +make +make install +echo "gtest build complete" + +cd $cwd +echo "storage lib build start" +cd hyc-storage-layer +rm -rf _build +mkdir _build +cd _build +cmake .. +make +make test +make install +echo "storage lib build complete" + +cd $cwd +echo "tgt build start" +cd tgt +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/hyc +make +echo "tgt build complete" + +cd $cwd +echo "run.sh for tgt exiting" diff --git a/readme b/readme new file mode 100644 index 00000000..18268eaa --- /dev/null +++ b/readme @@ -0,0 +1,3 @@ +how to run tgt: +#./usr/tgtd -f -e "http://127.0.0.1:2379" -s "tgt_svc" -v "v1.0" -p 9001 -D "127.0.0.1" -P 9876 + diff --git a/setup_cmds.txt b/setup_cmds.txt new file mode 100644 index 00000000..86f1c79e --- /dev/null +++ b/setup_cmds.txt @@ -0,0 +1,35 @@ +Following are the list of commands in order that has to be run to configure +hyc tgt : +........................................................................... + +1. Start the tgtd. +2. Create new VM through Rest API + # curl -v -XPOST 'http://localhost:1984/new_vm/1' -d '{"vmid":"1"}' \ + -H'Content-Type: application/json' +3. Create new VMDK through Rest API + # curl -v -XPOST 'http://localhost:1984/vm/1/new_vmdk/1' \ + -d '{"VmID":"1","VmdkID":"1","BlockSize":"4096",\ + "Compression":{"Enabled":"false"},"Encryption":{"Enabled":"false"},\ + "RamCache":{"Enabled":"true","MemoryInMB":"1024"},\ + "FileCache":{"Enabled":"false"},"SuccessHandler":{"Enabled":"true"}}'\ + -H'Content-Type: application/json' + +4. Create new target. + # sudo ./tgtadm --lld iscsi --mode target --op new --tid=1 \ + --targetname disk1 + +5. Create a new LUN for this new target. + # sudo ./tgtadm --lld iscsi --mode logicalunit --op new --tid 1 \ + --lun 1 -b /var/tmp/iscsi-disk1 --bstype hyc \ + --bsopts vmid=1:vmdkid:1 + NOTE: bstype indicates, backing store type, we have our own named "hyc". + +6. Make this target discoverable + # sudo ./tgtadm --lld iscsi --op bind --mode target --tid 1 -I ALL + +7. Discover this target from iscsi client + # iscsiadm --mode discovery --type sendtargets --portal 127.0.0.1 + +8. Login to this target from iscsi client + # iscsiadm --mode node --targetname disk1 --portal 127.0.0.1:3260 --login + diff --git a/thirdparty/ha-lib b/thirdparty/ha-lib new file mode 160000 index 00000000..5dbb8b23 --- /dev/null +++ b/thirdparty/ha-lib @@ -0,0 +1 @@ +Subproject commit 5dbb8b235c4b80f60e2ef80f0389da35170edeb2 diff --git a/usr/Makefile b/usr/Makefile index decf13c7..7c2dde83 100644 --- a/usr/Makefile +++ b/usr/Makefile @@ -1,6 +1,8 @@ sbindir ?= $(PREFIX)/sbin libdir ?= $(PREFIX)/lib/tgt +CFLAGS += -fPIC + ifneq ($(shell test -e /usr/include/linux/signalfd.h && echo 1),) CFLAGS += -DUSE_SIGNALFD endif @@ -37,19 +39,46 @@ LIBS += -libverbs -lrdmacm endif INCLUDES += -I. +INCLUDES +=-I/usr/local/include/hyc/include/ +INCLUDES += -I/$(TGT_DIR)/thirdparty/ha-lib/src/ +INCLUDES += -I/$(TGT_DIR)/thirdparty/ha-lib/third-party/include/ CFLAGS += -D_GNU_SOURCE CFLAGS += $(INCLUDES) + ifneq ($(DEBUG),) CFLAGS += -g -O0 -ggdb -rdynamic +CFLAGS += -fsanitize=address else CFLAGS += -g -O2 -fno-strict-aliasing endif -CFLAGS += -Wall -Wstrict-prototypes -Werror -fPIC +CFLAGS += -Wall -Werror -fPIC CFLAGS += -DTGT_VERSION=\"$(VERSION)$(EXTRAVERSION)\" CFLAGS += -DBSDIR=\"$(DESTDIR)$(libdir)/backing-store\" -LIBS += -lpthread -ldl + +LIBS_GRP_START += -Wl,--start-group +LIBS_GRP_END += -Wl,--end-group +HYC_LIBS += -lHycClientLib -lStorRpcLib -lHycStorCommon +FOLLY_LIBS += -lfolly -lglog -lstdc++ -lm -levent -ldouble-conversion -liberty +C++_LIBS += -lstdc++ -lboost_system -lboost_context -lboost_thread +COMPRESSION_LIBS += -lz -lsnappy -llz4 -llzma -lzstd -lbz2 +MISC_LIBS += -lpthread -ldl -lgflags -lcrypto -lssl +THRIFT_LIBS += -lthrift-core -lprotocol -lconcurrency -ltransport -lasync +THRIFT_LIBS += -lsecurity -lserver -lthriftfrozen2 -lthriftprotocol +THRIFT_LIBS += -lthriftcpp2 -lwangle -latomic + +HA_DEP_LIBS += -lbase64 -lcurl -ljansson -lmicrohttpd -lorcania -lulfius -lyder +HA_LIB += -lha $(HA_DEP_LIBS) + +LIBS += $(LIBS_GRP_START) $(HYC_LIBS) $(FOLLY_LIBS) $(THRIFT_LIBS) $(C++_LIBS) +LIBS += $(COMPRESSION_LIBS) $(MISC_LIBS) $(HA_LIB) $(LIBS_GRP_END) + +ASAN_LIB := +ifneq ($(DEBUG), ) +ASAN_LIB = -lasan +LIBS += $(ASAN_LIB) +endif ifneq ($(SD_NOTIFY),) LIBS += -lsystemd @@ -59,11 +88,13 @@ PROGRAMS += tgtd tgtadm tgtimg TGTD_OBJS += tgtd.o mgmt.o target.o scsi.o log.o driver.o util.o work.o \ concat_buf.o parser.o spc.o sbc.o mmc.o osd.o scc.o smc.o \ ssc.o libssc.o bs_rdwr.o bs_ssc.o \ - bs_null.o bs_sg.o bs.o libcrc32c.o bs_sheepdog.o + bs_null.o bs_sg.o bs.o libcrc32c.o bs_sheepdog.o bs_hyc.o TGTD_DEP = $(TGTD_OBJS:.o=.d) LDFLAGS = -Wl,-E,-rpath=$(libdir) +LDFLAGS += -L/usr/lib/hyc -L/$(TGT_DIR)/thirdparty/ha-lib/build/src/ +LDFLAGS += -L$(TGT_DIR)/thirdparty/ha-lib/third-party/lib .PHONY:all all: $(PROGRAMS) $(MODULES) @@ -78,7 +109,7 @@ TGTADM_OBJS = tgtadm.o concat_buf.o TGTADM_DEP = $(TGTADM_OBJS:.o=.d) tgtadm: $(TGTADM_OBJS) - $(CC) $^ -o $@ + $(CC) $^ -o $@ $(ASAN_LIB) -include $(TGTADM_DEP) @@ -86,7 +117,7 @@ TGTIMG_OBJS = tgtimg.o libssc.o libcrc32c.o TGTIMG_DEP = $(TGTIMG_OBJS:.o=.d) tgtimg: $(TGTIMG_OBJS) - $(CC) $^ -o $@ + $(CC) $^ -o $@ $(ASAN_LIB) -include $(TGTIMG_DEP) diff --git a/usr/bs_hyc.c b/usr/bs_hyc.c new file mode 100644 index 00000000..d4efad9b --- /dev/null +++ b/usr/bs_hyc.c @@ -0,0 +1,440 @@ +/* + * hyc I/O backing store routine + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "list.h" +#include "tgtd.h" +#include "scsi.h" +#include "target.h" +#include "util.h" +#include "parser.h" +#include "iscsi/iscsid.h" + +#include "bs_hyc.h" + +#include "TgtTypes.h" +#include "TgtInterface.h" + +static inline struct bs_hyc_info *BS_HYC_I(struct scsi_lu *lu) +{ + return (struct bs_hyc_info *) ((char *)lu + sizeof (*lu)); +} + +io_type_t scsi_cmd_operation(struct scsi_cmd *cmdp) +{ + unsigned int scsi_op = (unsigned int) cmdp->scb[0]; + io_type_t op = UNKNOWN; + struct mgmt_req *mreq; + + mreq = cmdp->mreq; + if (mreq) { + switch(mreq->function) { + case ABORT_TASK: + op = ABORT_TASK_OP; + break; + case ABORT_TASK_SET: + op = ABORT_TASK_SET_OP; + break; + default: + op = UNKNOWN; + } + + eprintf("\n**** MTF: cmd: %p op: %x, function:%x\n", cmdp, scsi_op, op); + return op; + } + switch (scsi_op) { + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + op = WRITE; + break; + case READ_6: + case READ_10: + case READ_12: + case READ_16: + op = READ; + break; + case WRITE_SAME: + case WRITE_SAME_16: + /** WRITE_SAME used to punch a hole in file */ + if (cmdp->scb[1] & 0x08) { + eprintf("Unmap with WRITE_SAME for hyc backend is not" + " supported yet.\n"); + } + op = WRITE_SAME_OP; + break; + case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: + default: + eprintf("skipped cmd: %p op: %x\n", cmdp, scsi_op); + op = UNKNOWN; + } + return op; +} + +static uint64_t scsi_cmd_offset(struct scsi_cmd *cmdp) +{ + return cmdp->offset; +} + +static uint32_t scsi_cmd_length(struct scsi_cmd *cmdp) +{ + switch (scsi_cmd_operation(cmdp)) { + case READ: + return scsi_get_in_length(cmdp); + case WRITE_SAME_OP: + case WRITE: + return scsi_get_out_transfer_len(cmdp); + case ABORT_TASK_OP: + case ABORT_TASK_SET_OP: + return 0; + case UNKNOWN: + assert(0); + } + return 0; +} + +static char *scsi_cmd_buffer(struct scsi_cmd *cmdp) +{ + switch (scsi_cmd_operation(cmdp)) { + default: + assert(0); + return NULL; + case READ: + return scsi_get_in_buffer(cmdp); + case WRITE: + case WRITE_SAME_OP: + return scsi_get_out_buffer(cmdp); + } +} + +static int bs_hyc_cmd_submit(struct scsi_cmd *cmdp) +{ + struct scsi_lu *lup = NULL; + struct bs_hyc_info *infop = NULL; + io_type_t op; + size_t length = 0; + uint64_t offset = 0; + char *bufp = NULL; + RequestID reqid = kInvalidRequestID; + int rc = 0; + + lup = cmdp->dev; + infop = BS_HYC_I(lup); + + assert(infop->vmdk_handle != kInvalidVmdkHandle); + + op = scsi_cmd_operation(cmdp); + if (hyc_unlikely(op == WRITE_SAME_OP)) { + return -1; + } else if (hyc_unlikely(op == UNKNOWN)) { + return 0; + } + + if (op != ABORT_TASK_OP && op != ABORT_TASK_SET_OP) { + + offset = scsi_cmd_offset(cmdp); + length = scsi_cmd_length(cmdp); + + /* + * Simply returing from top for zero size IOs, we may need to handle + * it later for the barrier IOs + */ + + if (op == WRITE) { + if (!length) { + eprintf("Zero size write IO, returning from top :%lu\n", length); + return 0; + } + } else if (op == READ) { + if (!length) { + eprintf("Zero size read IO, returning from top :%lu\n", length); + return 0; + } + } + + bufp = scsi_cmd_buffer(cmdp); + set_cmd_async(cmdp); + } + + switch (op) { + case READ: + reqid = HycScheduleRead(infop->vmdk_handle, cmdp, bufp, length, offset); + break; + case WRITE: + reqid = HycScheduleWrite(infop->vmdk_handle, cmdp, bufp, length, offset); + break; + case ABORT_TASK_OP: + case ABORT_TASK_SET_OP: + rc = HycScheduleAbort(infop->vmdk_handle, cmdp); + eprintf("\n ABORT REQUEST SENT to THRIFT CLIENT got reply rc:%d\n", rc); + return rc; + case WRITE_SAME_OP: + case UNKNOWN: + default: + assert(0); + } + + /* If we got reqid, set it in hyc_cmd */ + if (hyc_unlikely(reqid == kInvalidRequestID)) { + eprintf("request submission got error invalid request" + " size: %lu offset : %"PRIu64" opcode :%u\n", + length, offset, (unsigned int) cmdp->scb[0]); + /* + * TODO: This change requires further investigation we have seen core dumps + * with this change. Keeping it as todo, investigation will be done later. + * Reverting to the original path. + */ + + //clear_cmd_async(cmdp); + target_cmd_io_done(cmdp, SAM_STAT_CHECK_CONDITION); + return -EINVAL; + } + + return rc; +} + +static inline struct iscsi_connection* +scsi_cmd_to_iscsi_connection(struct scsi_cmd* cmdp) +{ + struct iscsi_task* taskp = container_of(cmdp, struct iscsi_task, scmd); + if (hyc_unlikely(taskp->conn->state == STATE_CLOSE)) { + return NULL; + } + + return taskp->conn; +} + +static void post_scsi_completion(struct iscsi_connection* connp) +{ + if (connp == NULL) { + return; + } + + if (hyc_likely(connp->state == STATE_SCSI)) { + do { + int ret = iscsi_tx_handler(connp); + if (hyc_unlikely(ret)) { + break; + } + } while (connp->state == STATE_SCSI && !list_empty(&connp->tx_clist)); + } +} + +static void bs_hyc_handle_completion(int fd, int events, void *datap) +{ + struct bs_hyc_info *infop; + struct RequestResult *resultsp; + bool has_more; + struct iscsi_connection* connp; + + assert(datap); + connp = NULL; + infop = datap; + resultsp = infop->request_resultsp; + has_more = true; + + while (has_more == true) { + uint32_t nr_results = HycGetCompleteRequests(infop->vmdk_handle, + resultsp, infop->nr_results, &has_more); + + /* Process completed request commands */ + for (uint32_t i = 0; i < nr_results; ++i) { + struct scsi_cmd *cmdp = (struct scsi_cmd *) resultsp[i].privatep; + if (cmdp == NULL) { + continue; + } + struct iscsi_connection* cp = scsi_cmd_to_iscsi_connection(cmdp); + + if (connp == NULL) { + connp = cp; + } + assert(connp == cp); + + assert(resultsp[i].result == 0); + target_cmd_io_done(cmdp, SAM_STAT_GOOD); + } + memset(resultsp, 0, sizeof(*resultsp) * nr_results); + + if (has_more == false) { + eventfd_t c = 0; + int rc = eventfd_read(fd, &c); + if (hyc_unlikely(rc < 0)) { + assert(errno == EAGAIN || errno == EWOULDBLOCK); + } + has_more = c != 0; + } + } + + post_scsi_completion(connp); +} + +static int bs_hyc_open(struct scsi_lu *lup, char *pathp, + int *fdp, uint64_t *sizep) +{ + struct bs_hyc_info *infop = BS_HYC_I(lup); + int rc = 0; + int ffd = -1; + int efd = -1; + uint32_t blksize; + + ffd = backed_file_open(pathp, O_RDWR | O_LARGEFILE | O_DIRECT, sizep, + &blksize); + if (ffd < 0) { + eprintf("Failed to open %s, for tgt: %d, lun: %"PRId64 ", %m\n", + pathp, infop->lup->tgt->tid, infop->lup->lun); + rc = ffd; + goto error; + } + + /** TODO: Validate blksize, whether its what we need or not */ + if (!lup->attrs.no_auto_lbppbe) { + update_lbppbe(lup, blksize); + } + + efd = eventfd(0, O_NONBLOCK); + if (efd < 0) { + rc = errno; + goto error; + } + + rc = tgt_event_add(efd, EPOLLIN, bs_hyc_handle_completion, infop); + if (rc < 0) { + goto error; + } + + infop->done_eventfd = efd; + + rc = HycOpenVmdk(infop->vmid, infop->vmdkid, infop->done_eventfd, + &infop->vmdk_handle); + if (rc < 0) { + goto error; + } + + *fdp = ffd; + return 0; +error: + if (efd >= 0) { + close(efd); + efd = -1; + } + if (ffd >= 0) { + close(ffd); + ffd = -1; + } + return rc; +} + +static void bs_hyc_close(struct scsi_lu *lup) +{ + struct bs_hyc_info *infop = BS_HYC_I(lup); + + assert(infop); + assert(infop->done_eventfd >= 0); + + tgt_event_del(infop->done_eventfd); + HycCloseVmdk(infop->vmdk_handle); + close(infop->done_eventfd); + infop->done_eventfd = -1; + + close(lup->fd); +} + +enum { + Opt_vmid, Opt_vmdkid, Opt_err, +}; + +static match_table_t bs_hyc_opts = { + {Opt_vmid, "vmid=%s"}, + {Opt_vmdkid, "vmdkid=%s"}, + {Opt_err, NULL}, +}; + +static tgtadm_err bs_hyc_init(struct scsi_lu *lup, char *bsoptsp) +{ + struct bs_hyc_info *infop = BS_HYC_I(lup); + tgtadm_err e = TGTADM_SUCCESS; + char *p; + char *vmdkid = NULL; + char *vmid = NULL; + + assert(lup->tgt); + + eprintf("bsopts:%s\n", bsoptsp); + while((p = strsep(&bsoptsp, ":")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + int token; + if (!*p) + continue; + token = match_token(p, bs_hyc_opts, args); + switch (token) { + case Opt_vmid: + vmid = match_strdup(&args[0]); + break; + case Opt_vmdkid: + vmdkid = match_strdup(&args[0]); + break; + default: + break; + } + } + if (!vmid || !vmdkid) { + eprintf("hyc bst needs both vmid: %s & vmdkid: %s as bsopts\n", + vmid, vmdkid); + return TGTADM_INVALID_REQUEST; + } + + memset(infop, 0, sizeof(*infop)); + + infop->lup = lup; + infop->vmid = vmid; + infop->vmdkid = vmdkid; + infop->nr_results = 32; + infop->request_resultsp = calloc(infop->nr_results, + sizeof(*infop->request_resultsp)); + if (!infop->request_resultsp) { + eprintf("hyc bs init failed\n"); + e = TGTADM_NOMEM; + free(vmid); + free(vmdkid); + } + return e; +} + +static void bs_hyc_exit(struct scsi_lu *lup) +{ + struct bs_hyc_info *infop = BS_HYC_I(lup); + + assert(infop); + + free(infop->request_resultsp); + free(infop->vmid); + free(infop->vmdkid); +} + +static struct backingstore_template hyc_bst = { + .bs_name = "hyc", + .bs_datasize = sizeof(struct bs_hyc_info), + .bs_init = bs_hyc_init, + .bs_exit = bs_hyc_exit, + .bs_open = bs_hyc_open, + .bs_close = bs_hyc_close, + .bs_cmd_submit = bs_hyc_cmd_submit, +}; + +__attribute__((constructor)) static void bs_hyc_constructor(void) +{ + register_backingstore_template(&hyc_bst); +} diff --git a/usr/bs_hyc.h b/usr/bs_hyc.h new file mode 100644 index 00000000..064eac34 --- /dev/null +++ b/usr/bs_hyc.h @@ -0,0 +1,29 @@ +#ifndef __BS_HYC_H__ +#define __BS_HYC_H__ + +#include "TgtTypes.h" +#include "dll.h" + +typedef enum { + READ, + WRITE, + /* Appending with *_OP, since just WRITE_SAME conflicts with scsi.h */ + WRITE_SAME_OP, + ABORT_TASK_OP, + ABORT_TASK_SET_OP, + UNKNOWN, +} io_type_t; + +/** This structure is per LUN/VMDK */ +struct bs_hyc_info { + struct scsi_lu *lup; + char *vmid; + char *vmdkid; + VmdkHandle vmdk_handle; + int done_eventfd; + struct RequestResult *request_resultsp; + uint32_t nr_results; +}; + +#endif + diff --git a/usr/dll.h b/usr/dll.h new file mode 100644 index 00000000..08fffece --- /dev/null +++ b/usr/dll.h @@ -0,0 +1,75 @@ +/* + * Copyright(2013) Cachebox Inc. + * + * dll.h + */ + +#ifndef DDL_H +#define DDL_H + + +#ifndef offsetof +#ifndef __builtin_offsetof +#define offsetof(type, member) ((size_t)(&(((type *) 0)->member))) +#else +#define offsetof(type, member) __builtin_offsetof (type, member) +#endif +#endif + +#ifndef container_of +#define container_of(ptr, type, member) \ + (type *) (((char *) ptr) - offsetof(type, member)) +#endif + +struct dllist { + struct dllist *dll_next; + struct dllist *dll_prev; +}; +typedef struct dllist dll_t; + +#define DLL_INIT(dllp) { \ + (dllp)->dll_next = (dllp); \ + (dllp)->dll_prev = (dllp); \ +}; + +#define DLL_REM(dllp) { \ + (dllp)->dll_next->dll_prev = (dllp)->dll_prev; \ + (dllp)->dll_prev->dll_next = (dllp)->dll_next; \ + (dllp)->dll_next = (dllp); \ + (dllp)->dll_prev = (dllp); \ +}; + +#define DLL_ADD(dllp, newp) { \ + (newp)->dll_next = (dllp)->dll_next; \ + (newp)->dll_prev = (dllp); \ + (dllp)->dll_next->dll_prev = (newp); \ + (dllp)->dll_next = (newp); \ +}; + +#define DLL_REVADD(dllp, newp) { \ + (newp)->dll_prev = (dllp)->dll_prev; \ + (newp)->dll_next = (dllp); \ + (dllp)->dll_prev = (newp); \ + (newp)->dll_prev->dll_next = (newp); \ +}; + +#define DLL_MOVE(oldlist, newlist) do { \ + if (!DLL_ISEMPTY(oldlist)) { \ + dll_t *_np = (newlist)->dll_prev; \ + dll_t *_op = (oldlist)->dll_prev; \ + /* add items from oldlist to newlist */ \ + _np->dll_next = (oldlist)->dll_next; \ + (oldlist)->dll_next->dll_prev = _np; \ + /* oldlist end should now point to newlist head */ \ + _op->dll_next = (newlist); \ + (newlist)->dll_prev = _op; \ + DLL_INIT(oldlist); \ + } \ +} while(0) + +#define DLL_ISEMPTY(dllp) ((dllp)->dll_next == (dllp)) + +#define DLL_PREV(dllp) ((dllp)->dll_prev) +#define DLL_NEXT(dllp) ((dllp)->dll_next) + +#endif /* DDL_H */ diff --git a/usr/iscsi/conn.c b/usr/iscsi/conn.c index e7d4e8c7..5c44451e 100644 --- a/usr/iscsi/conn.c +++ b/usr/iscsi/conn.c @@ -116,7 +116,6 @@ void conn_close(struct iscsi_connection *conn) c_list) { if (task->conn != conn) continue; - eprintf("Forcing release of pending task %p %" PRIx64 "\n", task, task->tag); list_del(&task->c_list); @@ -191,6 +190,7 @@ void conn_close(struct iscsi_connection *conn) * This task is in SCSI. We need to wait for I/O * completion. */ + if (task_in_scsi(task)) continue; iscsi_free_task(task); diff --git a/usr/iscsi/iscsi_tcp.c b/usr/iscsi/iscsi_tcp.c index 536f22e5..56d43911 100644 --- a/usr/iscsi/iscsi_tcp.c +++ b/usr/iscsi/iscsi_tcp.c @@ -280,8 +280,18 @@ static void iscsi_tcp_event_handler(int fd, int events, void *data) if (conn->state == STATE_CLOSE) dprintf("connection closed\n"); - if (conn->state != STATE_CLOSE && events & EPOLLOUT) - iscsi_tx_handler(conn); + if (conn->state != STATE_CLOSE && events & EPOLLOUT) { + if (conn->state == STATE_SCSI) { + do { + int ret = iscsi_tx_handler(conn); + if (ret) { + break; + } + } while (conn->state == STATE_SCSI && !list_empty(&conn->tx_clist)); + } else { + iscsi_tx_handler(conn); + } + } if (conn->state == STATE_CLOSE) { dprintf("connection closed %p\n", conn); diff --git a/usr/target.c b/usr/target.c index 84324380..5ac1198a 100644 --- a/usr/target.c +++ b/usr/target.c @@ -1163,7 +1163,6 @@ int target_cmd_perform(int tid, struct scsi_cmd *cmd) enabled = cmd_enabled(q, cmd); dprintf("%p %x %" PRIx64 " %d\n", cmd, cmd->scb[0], cmd->dev_id, enabled); - if (enabled) { result = scsi_cmd_perform(cmd->it_nexus->host_no, cmd); @@ -1322,7 +1321,7 @@ static int abort_cmd(struct target *target, struct mgmt_req *mreq, { int err = 0; - eprintf("found %" PRIx64 " %lx\n", cmd->tag, cmd->state); + eprintf("\nfound %" PRIx64 " %lx\n", cmd->tag, cmd->state); if (cmd_processed(cmd)) { /* @@ -1330,12 +1329,12 @@ static int abort_cmd(struct target *target, struct mgmt_req *mreq, * We'll send the tsk mgmt response when we get the * completion of this command. */ + cmd->mreq = mreq; - err = -EBUSY; - } else { - cmd->dev->cmd_done(target, cmd); - target_cmd_io_done(cmd, TASK_ABORTED); + eprintf("\n%s: cmd:%p\n", __func__, cmd); + err = cmd->dev->cmd_perform(target->tid, cmd); } + return err; } @@ -1346,7 +1345,7 @@ static int abort_task_set(struct mgmt_req *mreq, struct target *target, struct it_nexus *itn; int err, count = 0; - eprintf("found %" PRIx64 " %d\n", tag, all); + eprintf("\nfound %" PRIx64 " %d\n", tag, all); list_for_each_entry(itn, &target->it_nexus_list, nexus_siblings) { list_for_each_entry_safe(cmd, tmp, &itn->cmd_list, c_hlist) { diff --git a/usr/tgtd.c b/usr/tgtd.c index ae2f4899..a4babe2d 100644 --- a/usr/tgtd.c +++ b/usr/tgtd.c @@ -31,18 +31,26 @@ #include #include #include +#include #include #include #include #include #include - #include "list.h" #include "tgtd.h" #include "driver.h" #include "work.h" #include "util.h" +#include "TgtInterface.h" + +#include "halib.h" + +#define RETRY 24 +#define DELAY 5 +#define MAX_REST_CALLS 40 + unsigned long pagesize, pageshift; int system_active = 1; @@ -51,6 +59,14 @@ static char program_name[] = "tgtd"; static LIST_HEAD(tgt_events_list); static LIST_HEAD(tgt_sched_events_list); +static pthread_t ha_hb_tid; +static struct _ha_instance *ha; +static bool ha_thread_init = false; +static pthread_mutex_t ha_mutex; +static pthread_mutex_t ha_rest_mutex; +int active_rest_calls = 0; +static pthread_mutex_t ha_active_call_cnt_mutex; + static struct option const long_options[] = { {"foreground", no_argument, 0, 'f'}, {"control-port", required_argument, 0, 'C'}, @@ -58,10 +74,16 @@ static struct option const long_options[] = { {"debug", required_argument, 0, 'd'}, {"version", no_argument, 0, 'V'}, {"help", no_argument, 0, 'h'}, + {"etcd_ip", required_argument, 0, 'e'}, + {"svc_label", required_argument, 0, 's'}, + {"version_for_ha", required_argument, 0, 'v'}, + {"ha_svc_port", required_argument, 0, 'p'}, + {"stord_ip", required_argument, 0, 'D'}, + {"stord_port", required_argument, 0, 'P'}, {0, 0, 0, 0}, }; -static char *short_options = "fC:d:t:Vh"; +static char *short_options = "fC:d:t:Vhe:s:v:p:D:P:"; static char *spare_args; static void usage(int status) @@ -79,6 +101,11 @@ static void usage(int status) "-t, --nr_iothreads NNNN specify the number of I/O threads\n" "-d, --debug debuglevel print debugging information\n" "-V, --version print version and exit\n" + "-p, --ha_svc_port HA service port number\n" + "-e, --etcd_ip give etcd_ip to configure ha-lib with\n" + "-s, --svc_label service label needed for ha-lib\n" + "-v, --version_for_ha tgt version used by ha-lib\n" + "-D, --stord_ip stord ip address to connect with\n" "-h, --help display this help and exit\n", TGT_VERSION, program_name); exit(0); @@ -246,9 +273,14 @@ int tgt_event_modify(int fd, int events) return -EINVAL; } + if (tev->events == events) { + return 0; + } + memset(&ev, 0, sizeof(ev)); ev.events = events; ev.data.ptr = tev; + tev->events = events; return epoll_ctl(ep_fd, EPOLL_CTL_MOD, fd, &ev); } @@ -518,6 +550,660 @@ static int parse_params(char *name, char *p) return -1; } +void *ha_heartbeat(void *arg) +{ + struct _ha_instance *hap = (struct _ha_instance *) arg; + + while (1) { + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, 0); + ha_healthupdate(hap); + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, 0); + sleep(20); + } + pthread_mutex_lock(&ha_mutex); + ha_thread_init = false; + pthread_mutex_unlock(&ha_mutex); +} + +enum tgt_svc_err { + TGT_ERR_INVALID_PARAM = 1, + TGT_ERR_TARGET_CREATE, + TGT_ERR_NO_DATA, + TGT_ERR_INVALID_JSON, + TGT_ERR_INVALID_TARGET_NAME, + TGT_ERR_INVALID_DEV_NAME, + TGT_ERR_INVALID_VMID, + TGT_ERR_INVALID_VMDKID, + TGT_ERR_LUN_CREATE, + TGT_ERR_TOO_LONG, + TGT_ERR_TARGET_BIND, + TGT_ERR_SPARSE_FILE_DIR_CREATE, + TGT_ERR_INVALID_LUN_SIZE, + TGT_ERR_SPARSE_FILE_CREATE, + TGT_ERR_INVALID_STORD_IP, + TGT_ERR_INVALID_STORD_PORT, + TGT_ERR_INVALID_DELETE_FORCE, + TGT_ERR_TARGET_DELETE, + TGT_ERR_INVALID_LUNID, + TGT_ERR_LUN_DELETE, + TGT_ERR_STR_OUT_OF_RANGE, + TGT_ERR_HA_MAX_LIMIT, +}; + +static void set_err_msg(_ha_response *resp, enum tgt_svc_err err, + char *msg) +{ + char *err_msg = ha_get_error_message(ha, err, msg); + ha_set_response_body(resp, HTTP_STATUS_ERR, err_msg, + strlen(err_msg) + 1); + free(err_msg); +} + +static int exec(char *cmd) +{ + FILE *filp = NULL; + int ret = 0; + int status = 0; + + filp = popen(cmd, "r"); + if (filp == NULL) { + return -1; + } + + ret = pclose(filp); + status = WEXITSTATUS(ret); + + return status; +} + +static int disallow_rest_call() +{ + int rc = 0; + pthread_mutex_lock(&ha_active_call_cnt_mutex); + if (active_rest_calls > MAX_REST_CALLS) { + rc = 1; + eprintf("Rejecting request\n"); + pthread_mutex_unlock(&ha_active_call_cnt_mutex); + return rc; + } + + ++active_rest_calls; + pthread_mutex_unlock(&ha_active_call_cnt_mutex); + return rc; +} + +static void remove_rest_call() +{ + pthread_mutex_lock(&ha_active_call_cnt_mutex); + --active_rest_calls; + pthread_mutex_unlock(&ha_active_call_cnt_mutex); +} + + +static int target_create(const _ha_request *reqp, + _ha_response *resp, void *userp) +{ + char cmd[512]; + const char *tid = ha_parameter_get(reqp, "tid"); + int rc = 0; + char *data = NULL; + + if (tid == NULL) { + set_err_msg(resp, TGT_ERR_INVALID_PARAM, + "tid param not given"); + return HA_CALLBACK_CONTINUE; + } + + data = ha_get_data(reqp); + if (data == NULL) { + set_err_msg(resp, TGT_ERR_NO_DATA, + "json config not given"); + return HA_CALLBACK_CONTINUE; + } + + json_error_t error; + json_auto_t *root = json_loads(data, 0, &error); + + free(data); + if (root == NULL) { + set_err_msg(resp, TGT_ERR_INVALID_JSON, + "json config is incorrect"); + return HA_CALLBACK_CONTINUE; + } + + json_t *tname = json_object_get(root, "TargetName"); + if (!json_is_string(tname)) { + set_err_msg(resp, TGT_ERR_INVALID_TARGET_NAME, + "TargetName is not string"); + return HA_CALLBACK_CONTINUE; + } + + memset(cmd, 0, sizeof(cmd)); + + int len = snprintf(cmd, sizeof(cmd), + "tgtadm --lld iscsi --mode target --op new" + " --tid=%s --targetname=%s", tid, json_string_value(tname)); + if (len >= sizeof(cmd)) { + set_err_msg(resp, TGT_ERR_TOO_LONG, + "tgt cmd too long"); + return HA_CALLBACK_CONTINUE; + } + + if (disallow_rest_call()) { + set_err_msg(resp, TGT_ERR_HA_MAX_LIMIT, + "Too many pending requests at TGT. Retry after some time"); + return HA_CALLBACK_CONTINUE; + } + + pthread_mutex_lock(&ha_rest_mutex); + rc = exec(cmd); + if (rc) { + set_err_msg(resp, TGT_ERR_TARGET_CREATE, + "target create failed"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + + memset(cmd, 0, sizeof(cmd)); + len = snprintf(cmd, sizeof(cmd), + "tgtadm --lld iscsi --op bind --mode target --tid %s -I ALL", tid); + if (len >= sizeof(cmd)) { + set_err_msg(resp, TGT_ERR_TOO_LONG, + "tgt cmd too long"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + + rc = exec(cmd); + if (rc) { + set_err_msg(resp, TGT_ERR_TARGET_BIND, + "target bind failed"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + + ha_set_empty_response_body(resp, HTTP_STATUS_OK); + + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; +} + +static int lun_create(const _ha_request *reqp, + _ha_response *resp, void *userp) +{ + char cmd[512]; + const char *tid = ha_parameter_get(reqp, "tid"); + const char *lid = ha_parameter_get(reqp, "lid"); + int rc = 0; + char *data = NULL; + + if (tid == NULL) { + set_err_msg(resp, TGT_ERR_INVALID_PARAM, + "tid param not given"); + return HA_CALLBACK_CONTINUE; + } + + if (lid == NULL) { + set_err_msg(resp, TGT_ERR_INVALID_PARAM, + "lid param not given"); + return HA_CALLBACK_CONTINUE; + } + + data = ha_get_data(reqp); + if (data == NULL) { + set_err_msg(resp, TGT_ERR_NO_DATA, + "json config not given"); + return HA_CALLBACK_CONTINUE; + } + + json_error_t error; + json_auto_t *root = json_loads(data, 0, &error); + + free(data); + if (root == NULL) { + set_err_msg(resp, TGT_ERR_INVALID_JSON, + "json config is incorrect"); + return HA_CALLBACK_CONTINUE; + } + + json_t *dev_name = json_object_get(root, "DevName"); + if (!json_is_string(dev_name)) { + set_err_msg(resp, TGT_ERR_INVALID_DEV_NAME, + "DevName is not string"); + return HA_CALLBACK_CONTINUE; + } + + json_t *lun_size = json_object_get(root, "LunSize"); + if (!json_is_string(lun_size)) { + set_err_msg(resp, TGT_ERR_INVALID_LUN_SIZE, + "Lun size is not json string"); + return HA_CALLBACK_CONTINUE; + } + + json_t *vmid = json_object_get(root, "VmID"); + if (!json_is_string(vmid)) { + set_err_msg(resp, TGT_ERR_INVALID_VMID, + "VmID is not string"); + return HA_CALLBACK_CONTINUE; + } + + json_t *vmdkid = json_object_get(root, "VmdkID"); + if (!json_is_string(vmdkid)) { + set_err_msg(resp, TGT_ERR_INVALID_VMDKID, + "VmdkID is not string"); + return HA_CALLBACK_CONTINUE; + } + + memset(cmd, 0, sizeof(cmd)); + + /* Create sparse file directory if not already created */ + const char *hyc_sparse_files_loc = "/var/hyc"; + + int len = snprintf(cmd, sizeof(cmd), + "mkdir -p %s", hyc_sparse_files_loc); + if (len >= sizeof(cmd)) { + set_err_msg(resp, TGT_ERR_TOO_LONG, + "mkdir cmd too long"); + return HA_CALLBACK_CONTINUE; + } + + if (disallow_rest_call()) { + eprintf("Request rejected for %s\n", lid); + set_err_msg(resp, TGT_ERR_HA_MAX_LIMIT, + "Too many pending requests at TGT. Retry after some time"); + return HA_CALLBACK_CONTINUE; + } + + pthread_mutex_lock(&ha_rest_mutex); + + rc = exec(cmd); + if (rc) { + set_err_msg(resp, TGT_ERR_SPARSE_FILE_DIR_CREATE, + "sparse files dir create failed"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + + /* Reset cmd, for future use */ + memset(cmd, 0, sizeof(cmd)); + len = 0; + + /* Create sparse file for this LUN */ + len = snprintf(cmd, sizeof(cmd), + "dd if=/dev/zero of=%s/%s bs=1 count=0 seek=%sG", + hyc_sparse_files_loc, json_string_value(dev_name), + json_string_value(lun_size)); + if (len >= sizeof(cmd)) { + set_err_msg(resp, TGT_ERR_TOO_LONG, + "spare file create cmd too long"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + rc = exec(cmd); + if (rc) { + set_err_msg(resp, TGT_ERR_SPARSE_FILE_CREATE, + "sparse file create failed"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + + /* Reset cmd, for future use */ + memset(cmd, 0, sizeof(cmd)); + len = 0; + + char dev_path[512]; + + memset(dev_path, 0, sizeof(dev_path)); + + len = snprintf(dev_path, sizeof(dev_path), "%s/%s", hyc_sparse_files_loc, + json_string_value(dev_name)); + if (len >= sizeof(dev_path)) { + set_err_msg(resp, TGT_ERR_TOO_LONG, + "dev_path too long"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + len = 0; + len = snprintf(cmd, sizeof(cmd), + "tgtadm --lld iscsi --mode logicalunit --op new" + " --tid=%s --lun=%s -b %s --bstype hyc --bsopts vmid=%s:vmdkid=%s", + tid, lid, dev_path, json_string_value(vmid), + json_string_value(vmdkid)); + if (len >= sizeof(cmd)) { + set_err_msg(resp, TGT_ERR_TOO_LONG, + "tgt cmd too long"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + + rc = exec(cmd); + + if (rc) { + set_err_msg(resp, TGT_ERR_LUN_CREATE, + "target create failed"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + ha_set_empty_response_body(resp, HTTP_STATUS_OK); + + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; +} + +static int new_stord(const _ha_request *reqp, + _ha_response *resp, void *userp) +{ + char *data = NULL; + + char *hyc_argv[1] = {"tgtd"}; + uint16_t stord_port = 0; + + data = ha_get_data(reqp); + if (data == NULL) { + set_err_msg(resp, TGT_ERR_NO_DATA, + "json config not given"); + return HA_CALLBACK_CONTINUE; + } + + json_error_t error; + json_auto_t *root = json_loads(data, 0, &error); + + free(data); + + if (root == NULL) { + set_err_msg(resp, TGT_ERR_INVALID_JSON, + "json config is incorrect"); + return HA_CALLBACK_CONTINUE; + } + + json_t *sip = json_object_get(root, "StordIp"); + if (!json_is_string(sip)) { + set_err_msg(resp, TGT_ERR_INVALID_STORD_IP, + "StordIp is not string"); + return HA_CALLBACK_CONTINUE; + } + + json_t *sport = json_object_get(root, "StordPort"); + if (!json_is_string(sport)) { + set_err_msg(resp, TGT_ERR_INVALID_STORD_PORT, + "StordPort is not string"); + return HA_CALLBACK_CONTINUE; + } + + /* + * Linux allows user's to use ports 1 - 65535, but many + * linux kernels use ephemeral ports from range 32768 to 61000 + * so we are keeping our range 1 - 32767 + */ + int ret = str_to_int_range((char *)json_string_value(sport), + stord_port, 1, 32768); + if (ret) { + set_err_msg(resp, TGT_ERR_INVALID_STORD_PORT, + "StordPort out of range"); + return HA_CALLBACK_CONTINUE; + + } + + //TODO: Add error handling for Stord init + if (disallow_rest_call()) { + set_err_msg(resp, TGT_ERR_HA_MAX_LIMIT, + "Too many pending requests at TGT. Retry after some time"); + return HA_CALLBACK_CONTINUE; + } + + pthread_mutex_lock(&ha_rest_mutex); + HycStorInitialize(1, hyc_argv, (char *)json_string_value(sip), + stord_port); + + ha_set_empty_response_body(resp, HTTP_STATUS_OK); + pthread_mutex_unlock(&ha_rest_mutex); + ret = HycStorRpcServerConnect(); + assert(ret == 0); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; +} + +static int target_delete(const _ha_request *reqp, + _ha_response *resp, void *userp) +{ + char cmd[512]; + const char *tid = ha_parameter_get(reqp, "tid"); + const char *force_param = ha_parameter_get(reqp, "force"); + int rc = 0; + int len = 0; + int force = 0; + int retry; + + retry = RETRY; + + if (tid == NULL || force_param == NULL) { + set_err_msg(resp, TGT_ERR_INVALID_PARAM, + "tid param not given"); + return HA_CALLBACK_CONTINUE; + } + + memset(cmd, 0, sizeof(cmd)); + + rc = str_to_int(force_param, force); + if (rc) { + set_err_msg(resp, TGT_ERR_INVALID_DELETE_FORCE, + "Invalid value of force param"); + return HA_CALLBACK_CONTINUE; + } + + /*Unbind before delete*/ + len = snprintf(cmd, sizeof(cmd), + "tgtadm --lld iscsi --mode target --op unbind --tid=%s" + " -I ALL", tid); + if (len >= sizeof(cmd)) { + set_err_msg(resp, TGT_ERR_STR_OUT_OF_RANGE, + "tgt unbind cmd #characters out of range"); + return HA_CALLBACK_CONTINUE; + } + + if (disallow_rest_call()) { + set_err_msg(resp, TGT_ERR_HA_MAX_LIMIT, + "Too many pending requests at TGT. Retry after some time"); + return HA_CALLBACK_CONTINUE; + } + + pthread_mutex_lock(&ha_rest_mutex); + + //Ignoring error for now + rc = exec(cmd); + memset(cmd, 0, sizeof(cmd)); +#if 0 + /* + * Keeping the below code in place for now as it is needed later. + * Actual code must find proper connections, luns and delete them. + */ + + /*conn delete*/ + for (int i = 0; i < 20; i++) { + len = snprintf(cmd, sizeof(cmd), + "tgtadm --lld iscsi --mode conn --op delete --tid=%s" + " --sid %d --cid 0", tid, i); + if (len >= sizeof(cmd)) { + set_err_msg(resp, TGT_ERR_TOO_LONG, + "tgt cmd too long"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + + //Ignoring error for now + rc = exec(cmd); + memset(cmd, 0, sizeof(cmd)); + } + + /*lun delete*/ + for (int i = 0; i < 20; i++) { + memset(cmd, 0, sizeof(cmd)); + len = snprintf(cmd, sizeof(cmd), + "tgtadm --lld iscsi --mode logicalunit --op delete" + " --tid=%s --lun=%d", tid, i); + if (len >= sizeof(cmd)) { + set_err_msg(resp, TGT_ERR_TOO_LONG, + "tgt cmd too long"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + + //Ignoring error for now + rc = exec(cmd); + } +#endif + /*actual target delete*/ + memset(cmd, 0, sizeof(cmd)); + if (force) { + len = snprintf(cmd, sizeof(cmd), + "tgtadm --lld iscsi --mode target --op delete --force" + " --tid=%s", tid); + } else { + len = snprintf(cmd, sizeof(cmd), + "tgtadm --lld iscsi --mode target --op delete" + " --tid=%s", tid); + } + + if (len >= sizeof(cmd)) { + set_err_msg(resp, TGT_ERR_TOO_LONG, + "tgt cmd too long"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + while (retry > 0) { + rc = exec(cmd); + if (rc == TGTADM_LUN_ACTIVE || + rc == TGTADM_TARGET_ACTIVE || + rc == TGTADM_DRIVER_ACTIVE || + rc == TGTADM_UNSUPPORTED_OPERATION) { + fprintf(stderr, "Retrying for errno: %d\n", rc); + sleep(DELAY); + retry--; + continue; + } + break; + } + if (rc) { + set_err_msg(resp, TGT_ERR_TARGET_DELETE, + "target delete failed"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + + ha_set_empty_response_body(resp, HTTP_STATUS_OK); + + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; +} + +static int lun_delete(const _ha_request *reqp, + _ha_response *resp, void *userp) +{ + char cmd[512]; + int rc, len; + const char *tid, *lid; + + rc = 0; + + tid = ha_parameter_get(reqp, "tid"); + if (tid == NULL) { + set_err_msg(resp, TGT_ERR_INVALID_PARAM, + "tid param not given"); + return HA_CALLBACK_CONTINUE; + } + lid = ha_parameter_get(reqp, "lid"); + if (tid == NULL) { + set_err_msg(resp, TGT_ERR_INVALID_PARAM, + "lid param not given"); + return HA_CALLBACK_CONTINUE; + } + + memset(cmd, 0, sizeof(cmd)); + + len = snprintf(cmd, sizeof(cmd), + "tgtadm --lld iscsi --mode logicalunit --op delete" + " --tid=%s --lun=%s", tid, lid); + + if (len >= sizeof(cmd)) { + set_err_msg(resp, TGT_ERR_TOO_LONG, + "tgt cmd too long"); + return HA_CALLBACK_CONTINUE; + } + + if (disallow_rest_call()) { + set_err_msg(resp, TGT_ERR_HA_MAX_LIMIT, + "Too many pending requests at TGT. Retry after some time"); + return HA_CALLBACK_CONTINUE; + } + + pthread_mutex_lock(&ha_rest_mutex); + rc = exec(cmd); + if (rc) { + set_err_msg(resp, TGT_ERR_LUN_DELETE, + "TGT lun delete failed"); + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; + } + + ha_set_empty_response_body(resp, HTTP_STATUS_OK); + + pthread_mutex_unlock(&ha_rest_mutex); + remove_rest_call(); + return HA_CALLBACK_CONTINUE; +} + + + +int tgt_ha_start_cb(const _ha_request *reqp, + _ha_response *resp, void *userp) +{ + struct _ha_instance *hap = (struct _ha_instance *) userp; + + pthread_mutex_lock(&ha_mutex); + if (!ha_thread_init) { + int rc = pthread_create(&ha_hb_tid, NULL, + &ha_heartbeat, (void *)hap); + if (rc) { + pthread_mutex_unlock(&ha_mutex); + return HA_CALLBACK_ERROR; + } + ha_thread_init = true; + } + pthread_mutex_unlock(&ha_mutex); + return HA_CALLBACK_CONTINUE; +} + +int tgt_ha_stop_cb(const _ha_request *reqp, + _ha_response *resp, void *userp) +{ + pthread_mutex_lock(&ha_mutex); + + if (ha_thread_init) { + pthread_cancel(ha_hb_tid); + pthread_join(ha_hb_tid, NULL); + } + pthread_mutex_unlock(&ha_mutex); + + return HA_CALLBACK_CONTINUE; +} + int main(int argc, char **argv) { struct sigaction sa_old; @@ -525,7 +1211,18 @@ int main(int argc, char **argv) int err, ch, longindex, nr_lld = 0; int is_daemon = 1, is_debug = 0; int ret; - + struct ha_handlers *ep_handlers = malloc(sizeof(struct ha_handlers) + + 5 * sizeof(struct ha_endpoint_handlers)); + char *etcd_ip = NULL; + char *svc_label = NULL; + char *tgt_version = NULL; + int ha_svc_port = 0; + char *stord_ip = NULL; + uint16_t stord_port = 0; + int *ha_handler_idx; + + if (ep_handlers == NULL) + exit(1); sa_new.sa_handler = signal_catch; sigemptyset(&sa_new.sa_mask); sa_new.sa_flags = 0; @@ -539,6 +1236,53 @@ int main(int argc, char **argv) opterr = 0; + if (pthread_mutex_init(&ha_mutex, NULL) != 0) + exit(1); + + if (pthread_mutex_init(&ha_rest_mutex, NULL) != 0) + exit(1); + + if (pthread_mutex_init(&ha_active_call_cnt_mutex, NULL) != 0) + exit(1); + + ep_handlers->ha_count = 0; + ha_handler_idx = &ep_handlers->ha_count; + + ep_handlers->ha_endpoints[*ha_handler_idx].ha_http_method = POST; + strncpy(ep_handlers->ha_endpoints[*ha_handler_idx].ha_url_endpoint, + "target_create", strlen("target_create") + 1); + ep_handlers->ha_endpoints[*ha_handler_idx].callback_function = target_create; + ep_handlers->ha_endpoints[*ha_handler_idx].ha_user_data = NULL; + ep_handlers->ha_count += 1; + + ep_handlers->ha_endpoints[*ha_handler_idx].ha_http_method = POST; + strncpy(ep_handlers->ha_endpoints[*ha_handler_idx].ha_url_endpoint, "lun_create", + strlen("lun_create") + 1); + ep_handlers->ha_endpoints[*ha_handler_idx].callback_function = lun_create; + ep_handlers->ha_endpoints[*ha_handler_idx].ha_user_data = NULL; + ep_handlers->ha_count += 1; + + ep_handlers->ha_endpoints[*ha_handler_idx].ha_http_method = POST; + strncpy(ep_handlers->ha_endpoints[*ha_handler_idx].ha_url_endpoint, "new_stord", + strlen("new_stord") + 1); + ep_handlers->ha_endpoints[*ha_handler_idx].callback_function = new_stord; + ep_handlers->ha_endpoints[*ha_handler_idx].ha_user_data = NULL; + ep_handlers->ha_count += 1; + + ep_handlers->ha_endpoints[*ha_handler_idx].ha_http_method = POST; + strncpy(ep_handlers->ha_endpoints[*ha_handler_idx].ha_url_endpoint, "target_delete", + strlen("target_delete") + 1); + ep_handlers->ha_endpoints[*ha_handler_idx].callback_function = target_delete; + ep_handlers->ha_endpoints[*ha_handler_idx].ha_user_data = NULL; + ep_handlers->ha_count += 1; + + ep_handlers->ha_endpoints[*ha_handler_idx].ha_http_method = POST; + strncpy(ep_handlers->ha_endpoints[*ha_handler_idx].ha_url_endpoint, "lun_delete", + strlen("lun_delete") + 1); + ep_handlers->ha_endpoints[*ha_handler_idx].callback_function = lun_delete; + ep_handlers->ha_endpoints[*ha_handler_idx].ha_user_data = NULL; + ep_handlers->ha_count += 1; + while ((ch = getopt_long(argc, argv, short_options, long_options, &longindex)) >= 0) { switch (ch) { @@ -566,6 +1310,28 @@ int main(int argc, char **argv) case 'h': usage(0); break; + case 'p': + ret = str_to_int_range(optarg, ha_svc_port, 1, 32768); + if (ret) + bad_optarg(ret, ch, optarg); + break; + case 'e': + etcd_ip = strdup(optarg); + break; + case 's': + svc_label = strdup(optarg); + break; + case 'v': + tgt_version = strdup(optarg); + break; + case 'D': + stord_ip = strdup(optarg); + break; + case 'P': + ret = str_to_int_range(optarg, stord_port, 1, 32768); + if (ret) + bad_optarg(ret, ch, optarg); + break; default: if (strncmp(argv[optind - 1], "--", 2)) usage(1); @@ -578,49 +1344,87 @@ int main(int argc, char **argv) } } + if ((etcd_ip == NULL) || (svc_label == NULL) || + (tgt_version == NULL) || (ha_svc_port == 0)) { + free(etcd_ip); + free(svc_label); + free(tgt_version); + free(ep_handlers); + free(stord_ip); + usage(0); + exit(1); + } + + ha = ha_initialize(ha_svc_port, etcd_ip, svc_label, tgt_version, 120, + ep_handlers, tgt_ha_start_cb, tgt_ha_stop_cb, 0 , NULL); + + if (ha == NULL) { + fprintf(stderr, "ha_initilize failed\n"); + free(etcd_ip); + free(svc_label); + free(tgt_version); + free(ep_handlers); + free(stord_ip); + exit(1); + } + + ep_fd = epoll_create(4096); if (ep_fd < 0) { fprintf(stderr, "can't create epoll fd, %m\n"); + ha_deinitialize(ha); exit(1); } spare_args = optind < argc ? argv[optind] : NULL; - if (is_daemon && daemon(0, 0)) + if (is_daemon && daemon(0, 0)) { + ha_deinitialize(ha); exit(1); + } err = ipc_init(); - if (err) + if (err) { + ha_deinitialize(ha); exit(1); + } err = log_init(program_name, LOG_SPACE_SIZE, is_daemon, is_debug); - if (err) + if (err) { + ha_deinitialize(ha); exit(1); + } nr_lld = lld_init(); if (!nr_lld) { + ha_deinitialize(ha); fprintf(stderr, "No available low level driver!\n"); exit(1); } err = oom_adjust(); - if (err && (errno != EACCES) && getuid() == 0) + if (err && (errno != EACCES) && getuid() == 0) { + ha_deinitialize(ha); exit(1); + } err = nr_file_adjust(); - if (err) + if (err) { + ha_deinitialize(ha); exit(1); + } err = work_timer_start(); - if (err) + if (err) { + ha_deinitialize(ha); exit(1); + } bs_init(); #ifdef USE_SYSTEMD sd_notify(0, "READY=1\nSTATUS=Starting event loop..."); #endif - event_loop(); lld_exit(); @@ -629,7 +1433,15 @@ int main(int argc, char **argv) ipc_exit(); + free(etcd_ip); + free(svc_label); + free(tgt_version); + free(ep_handlers); + free(stord_ip); + log_close(); + ha_deinitialize(ha); + return 0; } diff --git a/usr/tgtd.h b/usr/tgtd.h index d8b2ac13..d6b6bd1e 100644 --- a/usr/tgtd.h +++ b/usr/tgtd.h @@ -395,6 +395,7 @@ struct event_data { int fd; int scheduled; }; + int events; void *data; struct list_head e_list; };