Skip to content

Commit

Permalink
chinese segmentation and gdb debugger
Browse files Browse the repository at this point in the history
  • Loading branch information
candlewill committed Mar 12, 2017
1 parent b9fe83a commit ad195df
Show file tree
Hide file tree
Showing 22 changed files with 420 additions and 128 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# DST and NLU
topic: ~error_correction keep repeat ( "想 看" )
topic: ~error_correct keep repeat ( "想 看" )

u: ( 看 _* )
_0 '_0
_0 \n
'_0 \n
if ( _0='_0 )
{
你想看 _0
Expand Down
11 changes: 0 additions & 11 deletions BOTDATA/TEST/simplecontrol.top
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ u: (< alarm test) \[ alarm=5000 \] Alarm requested.

u: ( ) # main per-sentence processing

if (%revisedinput)
{
# %topic 当前真实话题
$$currenttopic = %topic # get the current topic at start of volley

Expand Down Expand Up @@ -124,15 +122,6 @@ u: ( ) # main per-sentence processing
[不要欺负本宝宝了,人家还在长大]
}

}

else
{
$$tokenizedInput = ^cn_segment(%originalinput)
^input($$tokenizedInput)
^fail(SENTENCE)
}


topic: ~XPOSTPROCESS system () # gambits only - not allowed to write to output - generates speech on windows -- for Linux you'd want to install Festival (or equivalent) and call that instead.

Expand Down
2 changes: 1 addition & 1 deletion BOTDATA/TEST/分词.top
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ topic: ~segment keep repeat ( 分词 )

t: 请输入测试语句:

u: (看 _*)
u: ( 分词 _* )
使用撇号: \n
'_0 \n
没有撇号: \n
Expand Down
Binary file modified ChatScript-7.3/BINARIES/ChatScript
Binary file not shown.
4 changes: 2 additions & 2 deletions ChatScript-7.3/SRC/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ clean:
-rm -f *.o

binary: $(OBJECTS)
$(CC) $(OBJECTS) $(LDFLAGS) $(DEFINES) $(INCLUDEDIRS) -o $(EXECUTABLE)
$(CC) -g $(OBJECTS) $(LDFLAGS) $(DEFINES) $(INCLUDEDIRS) -o $(EXECUTABLE)

.cpp.o:
$(CC) $(CFLAGS) $(DEFINES) $(INCLUDEDIRS) $< -o $@
$(CC) -g -O0 $(CFLAGS) $(DEFINES) $(INCLUDEDIRS) $< -o $@
1 change: 1 addition & 0 deletions ChatScript-7.3/SRC/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ using namespace std;

#ifdef PRIVATE_CODE
#include "privatesrc.h"
#include "../privatecode/preprocess.h"
#endif

#ifdef WIN32
Expand Down
21 changes: 19 additions & 2 deletions ChatScript-7.3/SRC/mainSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1401,6 +1401,11 @@ int PerformChatGivenTopic(char* user, char* usee, char* incoming,char* ip,char*
// INTERNAL STRINGS AND STUFF never have control characters either (/r /t /n converted only on output to user)
// WE internally use /r/n in file stuff for the user topic file.

// 中文分词
#ifdef PRIVATE_CODE
#include "../privatecode/preprocess.cpp"
#endif

int PerformChat(char* user, char* usee, char* incoming,char* ip,char* output) // returns volleycount or 0 if command done or -1 PENDING_RESTART
{ // primary entrypoint for chatbot -- null incoming treated as conversation start.
pendingUserReset = false;
Expand All @@ -1415,6 +1420,12 @@ int PerformChat(char* user, char* usee, char* incoming,char* ip,char* output) //
#endif
ClearVolleyWordMaps();
ResetEncryptTags();

// 增加中文分词功能
#ifdef PRIVATE_CODE
incoming = CNPreprocess(incoming);
#endif

mainInputBuffer = incoming;
mainOutputBuffer = output;
size_t len = strlen(incoming);
Expand Down Expand Up @@ -2575,9 +2586,10 @@ void PrepareSentence(char* input,bool mark,bool user, bool analyze,bool oobstart
#ifndef NOMAIN
int main(int argc, char * argv[])
{
// 解析命令行参数中是否有root=,如果有就将当前路劲切换到其指定的路径
for (int i = 1; i < argc; ++i)
{
if (!strnicmp(argv[i],"root=",5))
if (!strnicmp(argv[i],"root=",5)) // 比较字符串前n个字符是否相同
{
#ifdef WIN32
SetCurrentDirectory((char*)argv[i]+5);
Expand All @@ -2587,6 +2599,7 @@ int main(int argc, char * argv[])
}
}

// 依据头文件dictionarySystem.h所处路径,决定当前路径是否应该前往上一级
FILE* in = FopenStaticReadOnly((char*)"SRC/dictionarySystem.h"); // SRC/dictionarySystem.h
if (!in) // if we are not at top level, try going up a level
{
Expand All @@ -2598,10 +2611,14 @@ int main(int argc, char * argv[])
#endif
}
else FClose(in);

// 初始化系统
if (InitSystem(argc,argv)) myexit((char*)"failed to load memory\r\n");
if (!server)

if (!server)
{
quitting = false; // allow local bots to continue regardless
// 主循环
MainLoop();
}
else if (quitting) {;} // boot load requests quit
Expand Down
4 changes: 2 additions & 2 deletions ChatScript-7.3/SRC/os.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -883,9 +883,9 @@ void InitFileSystem(char* untouchedPath,char* readablePath,char* writeablePath)
void StartFile(const char* name)
{
if (strnicmp(name,"TMP",3)) maxFileLine = currentFileLine = 0;
strcpy(currentFilename,name); // in case name is simple
strcpy(currentFilename,name); // in case name is simple (char*赋值给char[]的方式是使用strcpy)

char* at = strrchr((char*) name,'/'); // last end of path
char* at = strrchr((char*) name,'/'); // last end of path (用于查找某字符在字符串中最后一次出现的位置)
if (at) strcpy(currentFilename,at+1);
at = strrchr(currentFilename,'\\'); // windows last end of path
if (at) strcpy(currentFilename,at+1);
Expand Down
17 changes: 17 additions & 0 deletions ChatScript-7.3/gdb.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
break MainLoop()
break PerformChat
break PerformChatGivenTopic
break ProcessInputFile
break /letv/workspace/Bots/ChatScript-7.3/SRC/mainSystem.cpp:1441
break /letv/workspace/Bots/ChatScript-7.3/SRC/mainSystem.cpp:1471
break ProcessInputFile
break FinishVolley
break MainLoop()
break PerformChat
break PerformChatGivenTopic
break ProcessInputFile
break /letv/workspace/Bots/ChatScript-7.3/SRC/mainSystem.cpp:1441
break /letv/workspace/Bots/ChatScript-7.3/SRC/mainSystem.cpp:1471
break ProcessInputFile
break FinishVolley
break CNPreprocess(char*)
38 changes: 38 additions & 0 deletions ChatScript-7.3/privatecode/preprocess.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include "cppjieba/Jieba.hpp"

const char* const DICT_PATH = "./privatecode/Jieba/DICT/jieba.dict.utf8";
const char* const HMM_PATH = "./privatecode/Jieba/DICT/hmm_model.utf8";
const char* const USER_DICT_PATH = "./privatecode/Jieba/DICT/user.dict.utf8";
const char* const IDF_PATH = "./privatecode/Jieba/DICT/idf.utf8";
const char* const STOP_WORD_PATH = "./privatecode/Jieba/DICT/stop_words.utf8";

cppjieba::Jieba cpp_jieba(DICT_PATH,
HMM_PATH,
USER_DICT_PATH,
IDF_PATH,
STOP_WORD_PATH);

char * CNPreprocess(char * incoming)
{
char * segmented_result;
if (strlen(incoming) == 0 || !strncmp(incoming, " :", 2) || !strncmp(incoming, ":", 1))
segmented_result = incoming;
else
{
vector<string> words;
vector<cppjieba::Word> jiebawords;
string s(incoming);
string result;

cpp_jieba.Cut(s, words, true);
result = limonp::Join(words.begin(), words.end(), " ");
char *pw = new char(strlen(incoming) + 1);

// Method #2: Allocate memory on stack and copy the contents of the
// original string. Keep in mind that once a current function returns,
// the memory is invalidated.
segmented_result = (char *)alloca(result.size() + 1);
memcpy(segmented_result, result.c_str(), result.size() + 1);
}
return segmented_result;
}
6 changes: 6 additions & 0 deletions ChatScript-7.3/privatecode/preprocess.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#ifndef PREPROCESSH
#define PREPROCESSH

char* CNPreprocess(char * incoming);

#endif
2 changes: 0 additions & 2 deletions ChatScript-7.3/privatecode/privatesrc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ sprintf(buffer,(char*)"%d",count); 将格式化的数据写入字符串
1. 常用无符号整形 unsigned int i;
*/

#include "privatesrc.h"
#include "cppjieba/Jieba.hpp"

const char* const DICT_PATH = "./privatecode/Jieba/DICT/jieba.dict.utf8";
Expand Down
47 changes: 47 additions & 0 deletions LOGS/startlog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,50 @@ System startup Test reset Tue Mar 07 17:13:49 2017
System startup createsystem Tue Mar 07 17:13:49 2017
System startup Test Tue Mar 07 17:15:57 2017
System startup createsystem Tue Mar 07 17:15:57 2017
System startup createsystem Thu Mar 09 17:22:54 2017
System startup Test reset Thu Mar 09 17:30:23 2017
System startup createsystem Thu Mar 09 17:30:23 2017
System startup Test Thu Mar 09 17:31:19 2017
System startup createsystem Thu Mar 09 17:31:20 2017
System startup Test reset Thu Mar 09 17:31:59 2017
System startup createsystem Thu Mar 09 17:31:59 2017
System startup Test Thu Mar 09 17:35:34 2017
System startup createsystem Thu Mar 09 17:35:35 2017
System startup createsystem Fri Mar 10 11:44:08 2017
System startup createsystem Fri Mar 10 11:46:18 2017
System startup createsystem Fri Mar 10 11:50:33 2017
System startup createsystem Fri Mar 10 11:54:20 2017
System startup createsystem Fri Mar 10 14:46:18 2017
System startup createsystem Fri Mar 10 15:27:03 2017
System startup createsystem Fri Mar 10 17:53:13 2017
System startup createsystem Fri Mar 10 18:06:07 2017
System startup createsystem Fri Mar 10 18:24:58 2017
System startup createsystem Fri Mar 10 18:43:20 2017
System startup createsystem Fri Mar 10 18:45:06 2017
System startup createsystem Fri Mar 10 20:04:13 2017
System startup createsystem Fri Mar 10 20:22:02 2017
System startup createsystem Sun Mar 12 13:42:31 2017
System startup createsystem Sun Mar 12 15:09:37 2017
System startup createsystem Sun Mar 12 15:41:09 2017
System startup createsystem Sun Mar 12 15:49:24 2017
System startup createsystem Sun Mar 12 15:58:40 2017
System startup createsystem Sun Mar 12 16:00:55 2017
System startup createsystem Sun Mar 12 16:16:03 2017
System startup createsystem Sun Mar 12 16:24:16 2017
System startup createsystem Sun Mar 12 16:34:08 2017
System startup createsystem Sun Mar 12 16:34:23 2017
System startup createsystem Sun Mar 12 17:05:16 2017
System startup createsystem Sun Mar 12 17:06:25 2017
System startup createsystem Sun Mar 12 17:07:29 2017
System startup createsystem Sun Mar 12 17:08:15 2017
System startup createsystem Sun Mar 12 17:08:38 2017
System startup createsystem Sun Mar 12 17:09:59 2017
System startup createsystem Sun Mar 12 17:12:55 2017
System startup createsystem Sun Mar 12 17:31:25 2017
System startup createsystem Sun Mar 12 17:33:20 2017
System startup createsystem Sun Mar 12 17:35:39 2017
System startup createsystem Sun Mar 12 17:45:47 2017
System startup createsystem Sun Mar 12 17:46:47 2017
System startup createsystem Sun Mar 12 17:53:10 2017
System startup Test reset Sun Mar 12 17:53:16 2017
System startup createsystem Sun Mar 12 17:53:17 2017
2 changes: 1 addition & 1 deletion TOPIC/BUILD1/keywords1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ T~send_msg_policy ( )
T~send_msg_nlg ( )
T~control ( )
T~xpostprocess ( )
T~error_correction ( 想_看 )
T~error_correct ( 想_看 )
T~segment ( 分词 )
Loading

0 comments on commit ad195df

Please sign in to comment.