1 Star 0 Fork 0

匿名者/项目boost搜索引擎

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
tool.hpp 3.43 KB
一键复制 编辑 原始数据 按行查看 历史
匿名者 提交于 2024-09-12 20:05 . 功能模组;增加去掉暂停词
#pragma once
#include<iostream>
#include<string>
#include<istream>
#include <fstream>
#include<vector>
#include <boost/algorithm/string.hpp> //使用boost split
#include "cppjieba/Jieba.hpp"
#include<mutex>
#include<unordered_map>
#include "log.hpp"
using namespace std;
namespace project_tool
{
class Filetool
{
public:
static bool divestfile(const string &files_gather,string *result)
{
ifstream in(files_gather, ios::in);
if(!in.is_open()){
cerr << "open file " << files_gather << " error" << endl;
return false;
}
string line;
while(getline(in, line)){
*result += line;
}
in.close();
return true;
}
};
class stringtool
{
public:
static bool Slice_strings(string &line,vector<string> *out,const string sep)
{
boost::split(*out,line,boost::is_any_of(sep),boost::token_compress_on);//"\3"
return true;
}
};
const char* const DICT_PATH = "./dict/jieba.dict.utf8";
const char* const HMM_PATH = "./dict/hmm_model.utf8";
const char* const USER_DICT_PATH = "./dict/user.dict.utf8";
const char* const IDF_PATH = "./dict/idf.utf8";
const char* const STOP_WORD_PATH = "./dict/stop_words.utf8";
class jiebatool
{
private:
cppjieba::Jieba jieba;
jiebatool()
:jieba(DICT_PATH,HMM_PATH,USER_DICT_PATH,IDF_PATH,STOP_WORD_PATH)
{}
unordered_map<string,bool> stop_word_map;
jiebatool(const jiebatool&)=delete;
jiebatool& operator=(const jiebatool&)=delete;
static jiebatool* instances;
//static cppjieba::Jieba jieba;
public:
static jiebatool* Get_instances()
{
static mutex mtx;
if(instances == nullptr)
{
mtx.lock();
if(instances == nullptr)
{
instances = new jiebatool();
instances->Initjiebatoolstop();
}
mtx.unlock();
}
return instances;
}
void Initjiebatoolstop()
{
ifstream in(STOP_WORD_PATH);
if(!in.is_open())
{
LOG(Fatal,"STOP_WORD_PATH open error");
return ;
}
string temp;
while(getline(in,temp))
{
stop_word_map.insert({temp,true});
}
in.close();
}
void Curstringstop(const std::string &src, std::vector<std::string> *out)
{
jieba.CutForSearch(src, *out);
for(auto it = out->begin();it!=out->end();)
{
auto temp = stop_word_map.find(*it);
if(temp != stop_word_map.end())
{
it = out->erase(it);
}
else
{
it++;
}
}
}
static void CutString(const std::string &src, std::vector<std::string> *out)
{
project_tool::jiebatool::Get_instances()->Curstringstop(src,out);
//jieba.CutForSearch(src, *out);
}
};
jiebatool * jiebatool::instances = nullptr;
//cppjieba::Jieba jiebatool::jieba(DICT_PATH,HMM_PATH,USER_DICT_PATH,IDF_PATH,STOP_WORD_PATH);//不用赋值初始化
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/adexiur/project-boost-search-engine.git
git@gitee.com:adexiur/project-boost-search-engine.git
adexiur
project-boost-search-engine
项目boost搜索引擎
master

搜索帮助