We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Describe the bug When execute the SQL, origin Spark and Blaze will get different value.
To Reproduce Steps to reproduce the behavior:
CREATE TABLE blaze_t.wp_history ( vender_id BIGINT , sage_item_no STRING , matnr STRING , purchase_category5_code STRING , purchase_category5_name STRING , purchase_category4_code STRING , purchase_category4_name STRING , purchase_category3_code STRING , purchase_category3_name STRING , purchase_category2_code STRING , purchase_category2_name STRING , purchase_category1_code STRING , purchase_category1_name STRING ) stored as parquet; CREATE TABLE blaze_t.dw ( vender_id BIGINT , matnr STRING , sku_id INT , item_num STRING , matkl STRING , ware_name STRING , offline_ware_name STRING ) stored as parquet; insert into blaze_t.dw values (9,'100101',101427920,'100101','100001','100101(auto-generated)t','100101(auto-generated)'); CREATE TABLE blaze_t.cat_class ( id INT, vender_id INT , code STRING , name STRING , parent_id INT , is_leaf INT , status INT , applier STRING , created STRING , reviser STRING , modified STRING ) stored as parquet; insert into blaze_t.cat_class values (4852779,9,'100001','0APPLES',0,1,1,'AB-MQ','2020-12-02 14:12:05','AB-MQ','2021-11-02 03:24:18'); with l1_cat as ( select vender_id ,id as l1_id ,code as l1_code ,name as l1_name ,classifylist ,modified from blaze_t.test_level where yn=1 and status=1 and level=1 and shop_type=0 ), l2_cat as ( select vender_id ,id as l2_id ,code as l2_code ,name as l2_name ,parent_id as l1_id ,classifylist ,modified from blaze_t.test_level where yn=1 and status=1 and level=2 and shop_type=0 ), l3_cat as ( select vender_id ,id as l3_id ,code as l3_code ,name as l3_name ,parent_id as l2_id ,classifylist ,modified from blaze_t.test_level where yn=1 and status=1 and level=3 and shop_type=0 ), l4_cat as ( select vender_id ,id as l4_id ,code as l4_code ,name as l4_name ,parent_id as l3_id ,classifylist ,modified from blaze_t.test_level where yn=1 and status=1 and level=4 and shop_type=0 ), l5_cat as ( select vender_id ,id as l5_id ,code as l5_code ,name as l5_name ,parent_id as l4_id ,classifylist ,modified from blaze_t.test_level where yn=1 and status=1 and level=5 and shop_type=0 ), l6_cat as ( select vender_id ,id as l6_id ,code as l6_code ,name as l6_name ,parent_id as l5_id ,classifylist ,modified from blaze_t.test_level where yn=1 and status=1 and level=6 and shop_type=0 ), all_cat as ( select l1.vender_id ,l1.l1_id ,l1.l1_code ,l1.l1_name ,l2.l2_id ,l2.l2_code ,l2.l2_name ,l3.l3_id ,l3.l3_code ,l3.l3_name ,l4.l4_id ,l4.l4_code ,l4.l4_name ,l5.l5_id ,l5.l5_code ,l5.l5_name ,l6.l6_id ,l6.l6_code ,l6.l6_name ,COALESCE(l6.classifylist,l5.classifylist,l4.classifylist,l3.classifylist,l2.classifylist,l1.classifylist,'') as classifylist ,sort_array(array(l6.modified,l5.modified,l4.modified,l3.modified,l2.modified,l1.modified))[4] as modified from l1_cat l1 left join l2_cat l2 on ( l1.l1_id = l2.l1_id and l1.vender_id = l2.vender_id ) left join l3_cat l3 on ( l2.l2_id = l3.l2_id and l2.vender_id = l3.vender_id ) left join l4_cat l4 on ( l3.l3_id = l4.l3_id and l3.vender_id = l4.vender_id ) left join l5_cat l5 on ( l4.l4_id = l5.l4_id and l4.vender_id = l5.vender_id ) left join l6_cat l6 on ( l5.l5_id = l6.l5_id and l5.vender_id = l6.vender_id ) ), class_table_tmp as ( select vender_id ,l1_id ,l1_code ,l1_name ,l2_id ,l2_code ,l2_name ,l3_id ,l3_code ,l3_name ,l4_id ,l4_code ,l4_name ,l5_id ,l5_code ,l5_name ,l6_id ,l6_code ,l6_name ,matkl_id ,modified from all_cat lateral view explode(split(classifylist,'#')) tmp_t as matkl_id ) , class_table as ( select vender_id ,l1_id ,l1_code ,l1_name ,l2_id ,l2_code ,l2_name ,l3_id ,l3_code ,l3_name ,l4_id ,l4_code ,l4_name ,l5_id ,l5_code ,l5_name ,l6_id ,l6_code ,l6_name ,matkl_id ,modified ,row_number() over (partition by vender_id,matkl_id order by modified desc) as rank from class_table_tmp ) ,new_data as ( select ct.vender_id ,dw.matnr ,case when ct.l5_id is not null then 5 when ct.l4_id is not null then 4 when ct.l3_id is not null then 3 when ct.l2_id is not null then 2 else 1 end as purchase_category_layers ,ct.l1_code as purchase_category1_code ,ct.l1_name as purchase_category1_name ,ct.l2_code as purchase_category2_code ,ct.l2_name as purchase_category2_name ,ct.l3_code as purchase_category3_code ,ct.l3_name as purchase_category3_name ,ct.l4_code as purchase_category4_code ,ct.l4_name as purchase_category4_name ,coalesce(ct.l5_code,ci.code) as purchase_category5_code ,coalesce(ct.l5_name,ci.name) as purchase_category5_name ,ci.code as matkl_code ,ci.name as matkl_name ,dw.offline_ware_name as ware_name ,ct.l6_code as purchase_category6_code ,ct.l6_name as purchase_category6_name ,ct.rank ,ct.matkl_id ,ct.modified from class_table ct inner join blaze_t.cat_class ci on ( ct.matkl_id = ci.id ) inner join blaze_t.dw dw on ( ci.code = dw.matkl and ci.vender_id = dw.vender_id ) where ct.rank=1) select coalesce(new.vender_id,his.vender_id) as vender_id ,coalesce(new.matnr,his.matnr) as matnr ,new.purchase_category_layers ,if(new.matnr is not null,new.purchase_category1_code,his.purchase_category1_code) as purchase_category1_code ,if(new.matnr is not null,new.purchase_category1_name,his.purchase_category1_name) as purchase_category1_name ,if(new.matnr is not null,new.purchase_category2_code,his.purchase_category2_code) as purchase_category2_code ,if(new.matnr is not null,new.purchase_category2_name,his.purchase_category2_name) as purchase_category2_name ,if(new.matnr is not null,new.purchase_category3_code,his.purchase_category3_code) as purchase_category3_code ,if(new.matnr is not null,new.purchase_category3_name,his.purchase_category3_name) as purchase_category3_name ,if(new.matnr is not null,new.purchase_category4_code,his.purchase_category4_code) as purchase_category4_code ,if(new.matnr is not null,new.purchase_category4_name,his.purchase_category4_name) as purchase_category4_name ,if(new.matnr is not null,new.purchase_category5_code,his.purchase_category5_code) as purchase_category5_code ,if(new.matnr is not null,new.purchase_category5_name,his.purchase_category5_name) as purchase_category5_name ,new.matkl_code ,new.matkl_name ,dw.offline_ware_name as ware_name ,new.purchase_category6_code as purchase_category6_code ,new.purchase_category6_name as purchase_category6_name from new_data new full join blaze_t.wp_history his on new.vender_id=his.vender_id and new.matnr=his.matnr left join blaze_t.dw dw on coalesce(new.vender_id,his.vender_id) = dw.vender_id and coalesce(new.matnr,his.matnr) = dw.matnr where coalesce(new.vender_id,his.vender_id) = 9 and coalesce(new.matnr,his.matnr) = '100101' DISTRIBUTE BY CAST(RAND() * 4 AS INT)
[TABLE] blaze_t.test_level's DATA is in output.csv file. output.csv
[ENV] Executor: 4executors1core, memory: 2g, overhead memory: 4g Driver: 2g use celeborn v0.5.1 to solve shuffle data Spark v3.3.4 in k8s
Expected behavior Hoping they can get the same result.
Screenshots origin Spark:
Blaze:
The text was updated successfully, but these errors were encountered:
can you also provide the schema and sample data of table blaze_t.test_level?
blaze_t.test_level
Sorry, something went wrong.
Sorry for forgetting the important information.
CREATE TABLE blaze_t.test_level ( id INT, name STRING, code STRING, parent_code STRING, vender_id INT, parent_id INT, level INT, status INT, classifylist STRING, type INT, shop_type INT, applier STRING, created STRING, reviser STRING, modified STRING, yn INT, log_info_json STRING, cm_code STRING, rb_code STRING, main_cm_code STRING, main_rb_code STRING) stored as parquet;
and the sample data is in this link:
No branches or pull requests
Describe the bug
When execute the SQL, origin Spark and Blaze will get different value.
To Reproduce
Steps to reproduce the behavior:
[TABLE] blaze_t.test_level's DATA is in output.csv file.
output.csv
[ENV]
Executor: 4executors1core, memory: 2g, overhead memory: 4g
Driver: 2g
use celeborn v0.5.1 to solve shuffle data
Spark v3.3.4 in k8s
Expected behavior
Hoping they can get the same result.
Screenshots
origin Spark:
Blaze:
The text was updated successfully, but these errors were encountered: