debugging hadoop extension

dice-project · Aug 25, 2016 · e647495 · e647495
1 parent c0be74a
commit e647495
Show file tree

Hide file tree

Showing 6 changed files with 27 additions and 16 deletions.
diff --git a/src/conf/expconfig.yaml b/src/conf/expconfig.yaml
@@ -43,10 +43,12 @@ services:
     password: "PASSWORD"  
     storm_client: "/usr/local/hadoop/"     
 application:
+  cli_file: "commandLineTool.jar" 
   jar_file: "storm-starter-0.9.3.jar"
   jar_path: URL # this is a URL from which the jar file will be submited, you can put the prepared jar file into dropbox and use the dropbox link here
   class: "storm.starter.WordCountTopology"
   name: "wordcount"
+  args: "input,output"
   type: "storm" # hadoop spark cassandra
 # information about the parameters, note that for each parameter the specific technology
 # , e.g., storm, is also specified via "node"

diff --git a/src/integrated/f.m b/src/integrated/f.m
@@ -25,7 +25,7 @@
         response_1=latency;
         response_2=throughput;
     case 'hadoop'
-        [job_completion_time]=f_hadoop(x);
+        [job_completion_time]=f_hadoop(setting);
         response_1=job_completion_time;
         response_2=-1; % throughput is not applicable for batch applications
 end
diff --git a/src/integrated/hadoop/deploy_hadoop_mapreduce_job.m b/src/integrated/hadoop/deploy_hadoop_mapreduce_job.m
@@ -1,4 +1,4 @@
-function [status]=deploy_hadoop_mapreduce_job(setting)
+function deploy_hadoop_mapreduce_job(setting)
 % Submit a mapreduce job with a specific configuration setting.
 
 % Authors: Pooyan Jamshidi ([email protected])
@@ -22,9 +22,9 @@
 extrastr = ' ';
 
 % clean hdfs for the job
-if strcmp(status,'deployed')
-    undeploy_storm_topology(deployment_id);
-end
+% if strcmp(status,'deployed')
+%     undeploy_storm_topology(deployment_id);
+% end
 
 % prepare the connection
 ssh2_conn = ssh2_config(hadoop_.ip,hadoop_.username,hadoop_.password);
@@ -45,11 +45,12 @@
 % added to the path
 config_str='';
 for i=1:length(options_)
-    config_str=[config_str extrastr options_{1,i} '=' num2str(setting(i)) ';'];
+    config_str=[config_str options_{1,i} '=' num2str(setting(i)) ';'];
 end
+config_str=config_str(1:end-1); % taking out the last ;
 
-cli='java -jar CommandLineTool.jar';
-cmd=[cli extrastr '-jar' extrastr application_.jar_file extrastr '-params' extrastr config_str extrastr '-class' extrastr application_.class extrastr '-args' extrastr application_.args extrastr '-applicationReplication' extrastr replication_];
+cli=['java -jar' extrastr application_.cli_file];
+cmd=[cli extrastr '-jar' extrastr application_.jar_file extrastr '-params' extrastr '"' config_str '"' extrastr '-class' extrastr application_.class extrastr '-args' extrastr application_.args extrastr '-applicationReplication' extrastr int2str(replication_)];
 [ssh2_conn, response] = ssh2_command(ssh2_conn,cmd);
 
 ssh2_conn = ssh2_close(ssh2_conn); %will call ssh2.m and run command and then close connection

diff --git a/src/integrated/hadoop/f_hadoop.m b/src/integrated/hadoop/f_hadoop.m
@@ -17,12 +17,12 @@
     application_=getmcruserdata('application');
 end
 
-try
-    % deploy the job under a specific setting
-    [status]=deploy_hadoop_mapreduce_job(setting);
-catch ME
-    warning(ME.message);
-end
+% try
+%     % deploy the job under a specific setting
+%     deploy_hadoop_mapreduce_job(setting);
+% catch ME
+%     warning(ME.message);
+% end
 expdata_csv_name=strcat(application_.name,'_metrics_',num2str(datenum(datetime('now')),'%bu'),'.csv');
 
 if ~isempty(expdata_csv_name)

diff --git a/src/integrated/hadoop/summarize_expdata_hadoop.m b/src/integrated/hadoop/summarize_expdata_hadoop.m
@@ -24,9 +24,10 @@
 movefile([save_folder_ 'data.csv'],[save_folder_ expdata_csv_name]);
 
 summary=[];
-
 filename=[save_folder_ expdata_csv_name];
-thiscsv=csvread(filename,firstrow,0);
+firstrow = 1;
+firstcol = number_of_cols(filename)-1;
+thiscsv=csvread(filename,firstrow,firstcol);
 
 % if instead of mean percentile required replace it with prctile(X,p)
 if ~isempty(thiscsv)

diff --git a/src/integrated/number_of_cols.m b/src/integrated/number_of_cols.m
@@ -0,0 +1,7 @@
+function numberOfCols=number_of_cols(filename)
+fid = fopen(filename);
+allText = textscan(fid,'%s','delimiter','\n');
+v=cell2mat(allText{1,1}(1));
+numberOfCols=length(find(v==','))+1;
+fclose(fid);
+end