1 Star 0 Fork 0

匿名者/项目boost搜索引擎

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
Search.hpp 3.53 KB
一键复制 编辑 原始数据 按行查看 历史
匿名者 提交于 2024-10-06 13:13 . 搜索模块
#pragma once
#include "index.hpp"
#include <algorithm>
#include"jsoncpp/json/json.h"
#include"tool.hpp"
#include<iostream>
#include "log.hpp"
namespace project_search
{
struct more_Inverted_zipper
{
uint64_t docid;
vector<string> words;
int weight;
more_Inverted_zipper():docid(0),weight(0){};
};
class search
{
private:
project_index::index * Index;
public:
search(){}
~search(){}
void Initsearch(const string &input)
{
Index = project_index::index::GetIndex();
LOG(Info,"获取索引单例成功");
Index->Establish_index(input);
LOG(Info,"构建正排倒排索引成功");
}
void Search(string &keyword,string *json_word)
{
vector<string> result;
project_tool::jiebatool::CutString(keyword,&result);
//vector<project_index::Inverted_zipper> Inverted_listmax;
vector<more_Inverted_zipper> Inverted_listmax;
unordered_map<uint64_t,more_Inverted_zipper> part_map;
for(string s :result)
{
boost::to_lower(s);
vector<project_index::Inverted_zipper> *Inverted_list = Index->Getinverted_index(s);
if(nullptr == Inverted_list)
{
continue;
}
//Inverted_listmax.insert(Inverted_listmax.end(),Inverted_list->begin(),Inverted_list->end());//重复插入的问题
for(auto &it:*Inverted_list)
{
auto &temp = part_map[it.docid];
temp.docid = it.docid;
temp.weight += it.weight;
temp.words.push_back(move(it.keyword));
}
}
for(const auto &it : part_map){
Inverted_listmax.push_back(move(it.second));
}
sort(Inverted_listmax.begin(), Inverted_listmax.end(),
[](const more_Inverted_zipper &e1, const more_Inverted_zipper &e2){
return e1.weight > e2.weight;
});
Json::Value root;
for(auto &it : Inverted_listmax)
{
project_index::Format * doc = Index->GetFront_index(it.docid);
if(nullptr == doc)
{
continue;
}
Json::Value temp;
temp["title"] = doc->title;
temp["summary"] = Getsummary(doc->content,it.words[0]);//debug
temp["url"] = doc->url;
//
temp["weight"] = it.weight;
temp["docid"] = (int)it.docid;
root.append(temp);
}
//Json::StyledWriter writer;
Json::FastWriter writer;
*json_word = writer.write(root);
}
string Getsummary(const string &content,const string &keyword)//摘要
{
int Front_loaded = 30;
int Back_loaded = 70;
auto it = std::search(content.begin(),content.end(),
keyword.begin(),keyword.end(),[](int x,int y){
return (tolower(x) == tolower(y));
});
int pos = distance(content.begin(),it);
int begin = 0;
int end = content.size()-1;
if(pos-Front_loaded>begin)//size_t 负数和整形提升bug
begin = pos - Front_loaded;
if(pos+Back_loaded<end)
end = pos + Back_loaded;
string temp = content.substr(begin,end-begin);
temp += "...";
return temp;
}
};
};
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/adexiur/project-boost-search-engine.git
git@gitee.com:adexiur/project-boost-search-engine.git
adexiur
project-boost-search-engine
项目boost搜索引擎
master

搜索帮助