1 Star 0 Fork 0

刘梦林/Boost_search

Create your Gitee Account
Explore and code with more than 12 million developers,Free private repositories !:)
Sign up
文件
This repository doesn't specify license. Please pay attention to the specific project description and its upstream code dependency when using it.
Clone or Download
util.hpp 4.48 KB
Copy Edit Raw Blame History
刘梦林 authored 2023-02-26 09:07 . boost_search done
#pragma once
#include<iostream>
#include<string>
#include <unordered_map>
#include<fstream>
#include <boost/algorithm/string.hpp>
#include <mutex>
#include "ccjieba/Jieba.hpp"
#include "log.hpp"
namespace ns_util{
class FileUtil{
public:
static bool ReadFile(const std::string &file_path,std::string *out)
{
//输入流
std::ifstream in(file_path,std::ios::in/*表示读取*/);
if(!in.is_open()){
std::cerr<<"open file "<<file_path<<" error"<<std::endl;
return false;
}
std::string line;
while(std::getline(in,line)){
*out+=line;
//如何理解getline读取到文件结束呢??
//getline的返回值是一个&,while(bool), 本质是因为重载了强制类型转化。
//就是返回特定的引用对象当中
//即while判断这个对象结果是否合理的时候,
//对象的内容重载了强制类型转化,变成了bool值。
}
in.close();
return true;
}
};
class StringUtil{
public:
static void Split(const std::string &target,std::vector<std::string> *out,const std::string &sep)
{
//boost split
boost::split(*out, target, boost::is_any_of(sep), boost::token_compress_on);
//第一个参数就是切分的结果,第二个是数据源,第三个是分隔符,第四个是分隔符和分隔符之间是否需要压
//缩(比如:aaa/3vv/3nn/3/3/3/3gggg/3)(boost::token_compress_on(默认是off)就是要不要
//把中间的\3压缩为一个就是这个意思,如果不加,就会有很多空的数据)
}
};
const char* const DICT_PATH = "./dict/jieba.dict.utf8";
const char* const HMM_PATH = "./dict/hmm_model.utf8";
const char* const USER_DICT_PATH = "./dict/user.dict.utf8";
const char* const IDF_PATH = "./dict/idf.utf8";
const char* const STOP_WORD_PATH = "./dict/stop_words.utf8";
class JiebaUtil{
private:
//static cppjieba::Jieba jieba;
cppjieba::Jieba jieba;
std::unordered_map<std::string,bool> stop_words;
private:
JiebaUtil():jieba(DICT_PATH, HMM_PATH, USER_DICT_PATH, IDF_PATH, STOP_WORD_PATH)
{}
JiebaUtil(const JiebaUtil&) = delete;
static JiebaUtil *instance;
public:
static JiebaUtil* get_instance()
{
static std::mutex mtx;
if(nullptr == instance){
mtx.lock();
if(nullptr == instance){
instance = new JiebaUtil();
instance->InitJiebaUtil();
}
mtx.unlock();
}
return instance;
}
//把暂停词加载进来
void InitJiebaUtil()
{
std::ifstream in(STOP_WORD_PATH);
if(!in.is_open()){
LOG(FATAL, "load stop words file error");
return;
}
std::string line;
while(std::getline(in, line)){
stop_words.insert({line, true});
}
in.close();
}
void CutStringHelper(const std::string &src, std::vector<std::string> *out)
{
jieba.CutForSearch(src, *out);
for(auto iter = out->begin(); iter != out->end(); ){
auto it = stop_words.find(*iter);
if(it != stop_words.end()){
//说明当前的string 是暂停词,需要去掉
iter = out->erase(iter);
}
else{
iter++;
}
}
}
public:
static void CutString(const std::string &src,std::vector<std::string> *out)
{
ns_util::JiebaUtil::get_instance()->CutStringHelper(src, out);
//jieba.CutForSearch(src,*out);
}
};
JiebaUtil *JiebaUtil::instance = nullptr;
//cppjieba::Jieba JiebaUtil::jieba(DICT_PATH, HMM_PATH, USER_DICT_PATH, IDF_PATH, STOP_WORD_PATH);//静态成员在类外定义
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/bianchengxiaoliu/boost_search.git
git@gitee.com:bianchengxiaoliu/boost_search.git
bianchengxiaoliu
boost_search
Boost_search
master

Search