Skip to content

Commit

Permalink
get_evs
Browse files Browse the repository at this point in the history
  • Loading branch information
yffbit committed Jun 1, 2024
1 parent ba31156 commit 6742c56
Show file tree
Hide file tree
Showing 8 changed files with 240 additions and 80 deletions.
6 changes: 3 additions & 3 deletions include/solver/cuda_cfr.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ class CudaCFR : public SliceCFR {
size_t init_leaf_node();
void set_cfv_and_offset(DFSNode &node, int player, float *&cfv, int &offset);
size_t init_strength_table();
virtual void step(int iter, int player, bool best_cfv=false);
virtual void step(int iter, int player, int task);
virtual void leaf_cfv(int player);
int block_size(int size) {// ceil
return (size + LANE_SIZE - 1) / LANE_SIZE;
}
void clear_prob_sum(int len);
virtual void _reach_prob(int player, bool best_cfv=false);
virtual void _rm(int player, bool best_cfv=false);
virtual void _reach_prob(int player, bool avg_strategy);
virtual void _rm(int player, bool avg_strategy);
virtual void clear_data(int player);
virtual void clear_root_cfv();
virtual void post_process();
Expand Down
2 changes: 1 addition & 1 deletion include/solver/cuda_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ extern __global__ void fold_cfv_kernel(int player, int size, CudaLeafNode *node,
extern __global__ void sd_cfv_kernel(int player, int size, CudaLeafNode *node, float *opp_prob_sum, int my_hand, int opp_hand, int *my_card, int *opp_card, int n_card);
extern __global__ void best_cfv_kernel(Node *node, int size, int n_hand);
extern __global__ void cfv_kernel(Node *node, int size, int n_hand);
extern __global__ void updata_data_kernel(Node *node, int size, int n_hand, float pos_coef, float neg_coef, float coef);
extern __global__ void discount_data_kernel(Node *node, int size, int n_hand, float pos_coef, float neg_coef, float coef);

#endif // _CUDA_FUNC_H_
26 changes: 20 additions & 6 deletions include/solver/slice_cfr.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ using std::mutex;
#define N_LEAF_TYPE 2

#define N_TYPE 5
#define N_TASK_SIZE 5

#define two_card_hash(card1, card2) ((1LL<<(card1)) | (1LL<<(card2)))
#define tril_idx(r, c) (((r)*((r)-1)>>1)+(c)) // r>c>=0

Expand All @@ -49,12 +49,18 @@ using std::mutex;
#define code_idx1(i) (((i)+1)<<16)
#define decode_idx1(x) ((((x)>>16)&0xff) - 1)

#define EXP_TASK 0
#define CFV_TASK 1
#define CFR_TASK 2

struct Node {
int n_act = 0;// 动作数
int parent_offset = -1;// 本节点对应的父节点数据reach_prob的偏移量
float *parent_cfv = nullptr;
// mutex *mtx = nullptr;
float *data = nullptr;// cfv,regret_sum,strategy_sum,reach_prob,sum
float *opp_prob = nullptr;
size_t board = 0LL;
};
struct LeafNode {
float *reach_prob[N_PLAYER] = {nullptr,nullptr};
Expand Down Expand Up @@ -116,6 +122,8 @@ class SliceCFR : public Solver {
int steps = 0, interval = 0, n_card = N_CARD, min_card = 0;
int init_round = 0;
int dfs_idx = 0;// 先序遍历
unordered_map<ActionNode*, vector<int>> node_idx;
int combination_num[N_ROUND-1] {1,N_CARD,N_CARD*N_CARD};
size_t init_board = 0;
int hand_size[N_PLAYER];
float norm = 1;// 根节点概率归一化系数
Expand Down Expand Up @@ -162,9 +170,9 @@ class SliceCFR : public Solver {
size_t init_strength_table();
void dfs(shared_ptr<GameTreeNode> node, int parent_act=-1, int parent_dfs_idx=-1, int parent_p0_act=-1, int parent_p0_idx=-1, int parent_p1_act=-1, int parent_p1_idx=-1, int cnt0=0, int cnt1=0, int info=0);
void init_poss_card(Deck& deck, size_t board);
virtual void step(int iter, int player, bool best_cfv=false);
virtual void step(int iter, int player, int task);
virtual void leaf_cfv(int player);
void fold_cfv(int player, float *cfv, float *opp_reach, int my_hand, int opp_hand, float val, size_t board);
void fold_cfv(int player, float *cfv, float *opp_reach, int my_hand, float val, size_t board);
void sd_cfv(int player, float *cfv, float *opp_reach, int my_hand, int opp_hand, float val, int idx);
void append_node_idx(int p_idx, int act_idx, int player, int cpu_node_idx);
vector<vector<int>> pre_leaf_node_map;// [dfs_idx,act_idx]
Expand All @@ -179,13 +187,19 @@ class SliceCFR : public Solver {
// int mtx_idx = N_PLAYER;
vector<vector<StrengthData>> strength;
size_t _estimate_tree_size(shared_ptr<GameTreeNode> node);
virtual void _reach_prob(int player, bool best_cfv=false);
virtual void _rm(int player, bool best_cfv=false);
virtual void _reach_prob(int player, bool avg_strategy);
virtual void _rm(int player, bool avg_strategy);
virtual void clear_data(int player);
virtual void clear_root_cfv();
virtual void post_process() {}
json reConvertJson(const shared_ptr<GameTreeNode>& node, int depth, int max_depth, int &idx, int info);
virtual vector<vector<float>> get_avg_strategy(int idx);
virtual vector<vector<float>> get_avg_strategy(int idx);// [n_hand,n_act]
virtual vector<vector<float>> get_ev(int idx);// [n_hand,n_act]
bool print_exploitability(int iter, Timer &timer);
void cfv_to_ev();
void cfv_to_ev(Node *node, int player);
void get_prob_sum(vector<float> &prob_sum, float &sum, int player, float *reach_prob, size_t board);
void output_data(ActionNode *node, vector<Card> &cards, vector<vector<vector<float>>> &out, bool ev);
};

#endif // _SLICE_CFR_H_
2 changes: 1 addition & 1 deletion include/tools/CommandLineTool.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class CommandLineTool{
string board;
string res_file;
string algorithm = "discounted_cfr";
float accuracy;
float accuracy = 0.1;
int max_iteration=100;
bool use_isomorphism=0;
int use_halffloats=0;
Expand Down
2 changes: 2 additions & 0 deletions src/Card.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const string& Card::getCard() {
return this->card;
}

// rank * 4 + suit,[13,4]
int Card::getCardInt() {
return this->card_int;
}
Expand All @@ -39,6 +40,7 @@ int Card::card2int(Card card) {
return strCard2int(card.getCard());
}

// rank * 4 + suit,[13,4]
int Card::strCard2int(const string &card) {
char rank = card.at(0);
char suit = card.at(1);
Expand Down
18 changes: 9 additions & 9 deletions src/solver/cuda_cfr.cu
Original file line number Diff line number Diff line change
Expand Up @@ -250,23 +250,23 @@ size_t CudaCFR::estimate_tree_size() {
return size;
}

void CudaCFR::_reach_prob(int player, bool best_cfv) {
void CudaCFR::_reach_prob(int player, bool avg_strategy) {
vector<int>& offset = slice_offset[player];
int n = offset.size() - 1, size = 0, block = 0, n_hand = hand_size[player];
for(int i = 0; i < n; i++) {
size = offset[i+1] - offset[i];
block = block_size(size);
if(best_cfv) reach_prob_avg_kernel<<<block, LANE_SIZE>>>(dev_nodes+offset[i], size, n_hand);
if(avg_strategy) reach_prob_avg_kernel<<<block, LANE_SIZE>>>(dev_nodes+offset[i], size, n_hand);
else reach_prob_kernel<<<block, LANE_SIZE>>>(dev_nodes+offset[i], size, n_hand);
cudaDeviceSynchronize();
}
}

void CudaCFR::_rm(int player, bool best_cfv) {
void CudaCFR::_rm(int player, bool avg_strategy) {
int size = node_cnt[N_LEAF_TYPE + player];
int block = block_size(size);
Node *node = dev_nodes + slice_offset[player][0];
if(best_cfv) rm_avg_kernel<<<block, LANE_SIZE>>>(node, size, hand_size[player]);
if(avg_strategy) rm_avg_kernel<<<block, LANE_SIZE>>>(node, size, hand_size[player]);
else rm_kernel<<<block, LANE_SIZE>>>(node, size, hand_size[player]);
cudaDeviceSynchronize();
}
Expand All @@ -288,27 +288,27 @@ void CudaCFR::clear_root_cfv() {
cudaDeviceSynchronize();
}

void CudaCFR::step(int iter, int player, bool best_cfv) {
void CudaCFR::step(int iter, int player, int task) {
Timer timer;
int opp = 1 - player, my_hand = hand_size[player], size = 0, block = 0;
_reach_prob(opp, best_cfv);
_reach_prob(opp, task != CFR_TASK);
size_t t1 = timer.ms(true);

leaf_cfv(player);
size_t t2 = timer.ms(true);

if(!best_cfv) {
if(task == CFR_TASK) {
size = n_player_node;
block = block_size(size);
updata_data_kernel<<<block, LANE_SIZE>>>(dev_nodes, size, my_hand, pos_coef, neg_coef, coef);
discount_data_kernel<<<block, LANE_SIZE>>>(dev_nodes, size, my_hand, pos_coef, neg_coef, coef);
cudaDeviceSynchronize();
}
size_t t3 = timer.ms(true);
vector<int>& offset = slice_offset[player];
for(int i = offset.size()-2; i >= 0; i--) {
size = offset[i+1] - offset[i];
block = block_size(size);
if(best_cfv) best_cfv_kernel<<<block, LANE_SIZE>>>(dev_nodes+offset[i], size, my_hand);
if(task == EXP_TASK) best_cfv_kernel<<<block, LANE_SIZE>>>(dev_nodes+offset[i], size, my_hand);
else cfv_kernel<<<block, LANE_SIZE>>>(dev_nodes+offset[i], size, my_hand);
cudaDeviceSynchronize();
}
Expand Down
2 changes: 1 addition & 1 deletion src/solver/cuda_func.cu
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ __global__ void cfv_kernel(Node *node, int size, int n_hand) {
for(i = 0; i < size; i++) cfv[i] = 0;// 清零cfv
}

__global__ void updata_data_kernel(Node *node, int size, int n_hand, float pos_coef, float neg_coef, float coef) {
__global__ void discount_data_kernel(Node *node, int size, int n_hand, float pos_coef, float neg_coef, float coef) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if(i >= size) return;
node += i;
Expand Down
Loading

0 comments on commit 6742c56

Please sign in to comment.