[Update] 使用搜索推荐接口优化搜索功能对搜索内容的相关性;

[Add] PixivURL 增加搜索推荐接口Url;
[Add] PixivSearchLinkBuilder 增加`getContent`方法以允许外部获取搜索原始内容;
[Add] CacheStoreCentral 增加`getSearchBody(PixivSearchLinkBuilder)`方法以允许直接提供builder进行请求;
[Change] CacheStoreCentral, PixivUtils 移除`getSearchBody(String, ...)`方法对PixivSearchLinkBuilder的构建, 将该部分转移至`PixivUtils.buildSearchLinkBuilder`方法;
[Update] BotCommandProcess 增加`addRecommendKeywords(PixivSearchLinkBuilder, int)`方法以针对PixivSearchLinkBuilder的搜索原始内容增加指定数量的搜索推荐关键词;
This commit is contained in:
LamGC 2020-07-15 09:00:00 +08:00
parent 575dc0c7fb
commit f976017a89
Signed by: LamGC
GPG Key ID: 6C5AE2A913941E1D
5 changed files with 181 additions and 65 deletions

View File

@ -11,12 +11,17 @@ import net.lamgc.cgj.bot.cache.JsonRedisCacheStore;
import net.lamgc.cgj.bot.event.BufferedMessageSender;
import net.lamgc.cgj.bot.sort.PreLoadDataAttribute;
import net.lamgc.cgj.bot.sort.PreLoadDataAttributeComparator;
import net.lamgc.cgj.bot.util.PixivUtils;
import net.lamgc.cgj.pixiv.PixivDownload;
import net.lamgc.cgj.pixiv.PixivDownload.PageQuality;
import net.lamgc.cgj.pixiv.PixivSearchLinkBuilder;
import net.lamgc.cgj.pixiv.PixivURL;
import net.lamgc.utils.base.runner.Argument;
import net.lamgc.utils.base.runner.Command;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -315,6 +320,16 @@ public class BotCommandProcess {
@Argument(name = "p", force = false, defaultValue = "1") int pagesIndex
) throws IOException, InterruptedException {
log.debug("正在执行搜索...");
PixivSearchLinkBuilder linkBuilder = PixivUtils.buildSearchLinkBuilder(content, type, area, includeKeywords,
excludeKeywords, contentOption, pagesIndex);
int recommendKeywordCount = 0;
try {
recommendKeywordCount = Integer.parseInt(SettingProperties.getProperties(fromGroup)
.getProperty("search.recommendKeywordCount", "0"));
} catch(NumberFormatException e) {
log.warn("配置项 search.recommendKeywordCount 的值无效");
}
addRecommendKeywords(linkBuilder, recommendKeywordCount);
JsonObject resultBody = CacheStoreCentral.getCentral()
.getSearchBody(content, type, area, includeKeywords, excludeKeywords, contentOption, pagesIndex);
@ -525,6 +540,43 @@ public class BotCommandProcess {
return "色图姬收到了你的报告,将屏蔽该作品并对作品违规情况进行核实,感谢你的反馈!";
}
/**
* 根据Pixiv搜索推荐列表补充关键词.
* <p>该操作可能会优化搜索效果.</p>
* @param searchLinkBuilder PixivSearchLinkBuilder对象
* @param includeKeywordsCount 需要添加的关键词数量
* @throws IOException 当获取推荐列表发生异常时抛出.
*/
private static void addRecommendKeywords(PixivSearchLinkBuilder searchLinkBuilder, int includeKeywordsCount)
throws IOException {
if(includeKeywordsCount <= 0) {
return;
}
HttpGet request = BotGlobal.getGlobal().getPixivDownload()
.createHttpGetRequest(PixivURL.PIXIV_SEARCH_RECOMMENDS
.replace("{content}", searchLinkBuilder.getContent()));
request.addHeader(HttpHeaders.REFERER, "https://www.pixiv.net/");
HttpResponse response = BotGlobal.getGlobal().getPixivDownload().getHttpClient().execute(request);
String responseBody = EntityUtils.toString(response.getEntity());
if(response.getStatusLine().getStatusCode() != 200) {
throw new IOException("Interface request failure: " + response.getStatusLine() +
", response body: '" + responseBody + "'");
}
JsonObject resultObject = BotGlobal.getGlobal().getGson()
.fromJson(responseBody, JsonObject.class);
if(!resultObject.has("candidates")) {
return;
}
JsonArray recommendsArr = resultObject.getAsJsonArray("candidates");
for (int count = 0; count < includeKeywordsCount && count < recommendsArr.size(); count++) {
searchLinkBuilder.addIncludeKeyword(
recommendsArr.get(count).getAsJsonObject().get("tag_name").getAsString());
}
}
/**
* 检查某一作品是否被报告
* @param illustId 作品Id

View File

@ -8,6 +8,7 @@ import net.lamgc.cgj.bot.BotCode;
import net.lamgc.cgj.bot.BotCommandProcess;
import net.lamgc.cgj.bot.SettingProperties;
import net.lamgc.cgj.bot.boot.BotGlobal;
import net.lamgc.cgj.bot.util.PixivUtils;
import net.lamgc.cgj.exception.HttpRequestException;
import net.lamgc.cgj.pixiv.PixivDownload;
import net.lamgc.cgj.pixiv.PixivSearchLinkBuilder;
@ -405,73 +406,11 @@ public final class CacheStoreCentral {
return PixivDownload.getRanking(result, start - 1, range);
}
/**
* 获取搜索结果
* @param content 搜索内容
* @param type 类型
* @param area 范围
* @param includeKeywords 包含关键词
* @param excludeKeywords 排除关键词
* @param contentOption 内容类型
* @return 返回完整搜索结果
* @throws IOException 当请求发生异常, 或接口返回异常信息时抛出.
*/
public JsonObject getSearchBody(
String content,
String type,
String area,
String includeKeywords,
String excludeKeywords,
String contentOption,
int pageIndex
) throws IOException {
PixivSearchLinkBuilder searchBuilder = new PixivSearchLinkBuilder(Strings.isNullOrEmpty(content) ? "" : content);
if (type != null) {
try {
searchBuilder.setSearchType(PixivSearchLinkBuilder.SearchType.valueOf(type.toUpperCase()));
} catch (IllegalArgumentException e) {
log.warn("不支持的SearchType: {}", type);
}
}
if (area != null) {
try {
searchBuilder.setSearchArea(PixivSearchLinkBuilder.SearchArea.valueOf(area));
} catch (IllegalArgumentException e) {
log.warn("不支持的SearchArea: {}", area);
}
}
if (contentOption != null) {
try {
searchBuilder.setSearchContentOption(
PixivSearchLinkBuilder.SearchContentOption.valueOf(contentOption.trim().toUpperCase()));
} catch (IllegalArgumentException e) {
log.warn("不支持的SearchContentOption: {}", contentOption);
}
}
if (!Strings.isNullOrEmpty(includeKeywords)) {
for (String keyword : includeKeywords.split(";")) {
searchBuilder.removeExcludeKeyword(keyword.trim());
searchBuilder.addIncludeKeyword(keyword.trim());
log.trace("已添加关键字: {}", keyword);
}
}
if (!Strings.isNullOrEmpty(excludeKeywords)) {
for (String keyword : excludeKeywords.split(";")) {
searchBuilder.removeIncludeKeyword(keyword.trim());
searchBuilder.addExcludeKeyword(keyword.trim());
log.trace("已添加排除关键字: {}", keyword);
}
}
if(pageIndex > 0) {
searchBuilder.setPage(pageIndex);
}
log.debug("正在搜索作品, 条件: {}", searchBuilder.getSearchCondition());
public JsonObject getSearchBody(PixivSearchLinkBuilder searchLinkBuilder) throws IOException {
log.debug("正在搜索作品, 条件: {}", searchLinkBuilder.getSearchCondition());
Locker<String> locker
= buildSyncKey(searchBuilder.buildURL());
= buildSyncKey(searchLinkBuilder.buildURL());
String requestUrl = locker.getKey();
log.debug("RequestUrl: {}", requestUrl);
JsonObject resultBody = null;
@ -524,6 +463,31 @@ public final class CacheStoreCentral {
return resultBody;
}
/**
* 获取搜索结果
* @param content 搜索内容
* @param type 类型
* @param area 范围
* @param includeKeywords 包含关键词
* @param excludeKeywords 排除关键词
* @param contentOption 内容类型
* @return 返回完整搜索结果
* @throws IOException 当请求发生异常, 或接口返回异常信息时抛出.
*/
public JsonObject getSearchBody(
String content,
String type,
String area,
String includeKeywords,
String excludeKeywords,
String contentOption,
int pageIndex
) throws IOException {
return getSearchBody(PixivUtils.buildSearchLinkBuilder(content, type, area, includeKeywords,
excludeKeywords, contentOption, pageIndex));
}
protected ImageChecksum getImageChecksum(int illustId, int pageIndex) {
String cacheKey = illustId + ":" + pageIndex;
if(!imageChecksumCache.exists(cacheKey)) {

View File

@ -0,0 +1,86 @@
package net.lamgc.cgj.bot.util;
import com.google.common.base.Strings;
import net.lamgc.cgj.pixiv.PixivSearchLinkBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Pixiv工具类
*/
public final class PixivUtils {
private final static Logger log = LoggerFactory.getLogger(PixivUtils.class);
private PixivUtils() {}
/**
* 快速构造一个PixivSearchLinkBuilder
* @param content 搜索内容
* @param type 搜索类型
* @param area 搜索范围
* @param includeKeywords 包含关键词
* @param excludeKeywords 排除关键词
* @param contentOption 内容级别选项
* @param pageIndex 搜索页数
* @return 返回PixivSearchLinkBuilder对象
* @see PixivSearchLinkBuilder
*/
public static PixivSearchLinkBuilder buildSearchLinkBuilder(
String content,
String type,
String area,
String includeKeywords,
String excludeKeywords,
String contentOption,
int pageIndex
) {
PixivSearchLinkBuilder searchBuilder = new PixivSearchLinkBuilder(Strings.isNullOrEmpty(content) ? "" : content);
if (type != null) {
try {
searchBuilder.setSearchType(PixivSearchLinkBuilder.SearchType.valueOf(type.toUpperCase()));
} catch (IllegalArgumentException e) {
log.warn("不支持的SearchType: {}", type);
}
}
if (area != null) {
try {
searchBuilder.setSearchArea(PixivSearchLinkBuilder.SearchArea.valueOf(area));
} catch (IllegalArgumentException e) {
log.warn("不支持的SearchArea: {}", area);
}
}
if (contentOption != null) {
try {
searchBuilder.setSearchContentOption(
PixivSearchLinkBuilder.SearchContentOption.valueOf(contentOption.trim().toUpperCase()));
} catch (IllegalArgumentException e) {
log.warn("不支持的SearchContentOption: {}", contentOption);
}
}
if (!Strings.isNullOrEmpty(includeKeywords)) {
for (String keyword : includeKeywords.split(";")) {
searchBuilder.removeExcludeKeyword(keyword.trim());
searchBuilder.addIncludeKeyword(keyword.trim());
log.trace("已添加关键字: {}", keyword);
}
}
if (!Strings.isNullOrEmpty(excludeKeywords)) {
for (String keyword : excludeKeywords.split(";")) {
searchBuilder.removeIncludeKeyword(keyword.trim());
searchBuilder.addExcludeKeyword(keyword.trim());
log.trace("已添加排除关键字: {}", keyword);
}
}
if(pageIndex > 0) {
searchBuilder.setPage(pageIndex);
}
return searchBuilder;
}
}

View File

@ -161,6 +161,10 @@ public class PixivSearchLinkBuilder {
'}';
}
public String getContent() {
return content;
}
public PixivSearchLinkBuilder setSearchArea(SearchArea searchArea) {
this.searchArea = Objects.requireNonNull(searchArea);
return this;

View File

@ -194,6 +194,16 @@ public final class PixivURL {
*/
public final static String PIXIV_USER_COLLECTION_PAGE = "https://www.pixiv.net/bookmark.php?rest=show&p={pageIndex}";
/**
* 搜索推荐接口.
* <p>可返回与搜索内容相关的标签信息, 以优化搜索效果.</p>
* 需要替换的文本:
* <ul>
* <li>{content} - 搜索内容</li>
* </ul>
*/
public final static String PIXIV_SEARCH_RECOMMENDS = "https://www.pixiv.net/rpc/cps.php?keywork={content}";
/**
* 获取排名榜
* @param mode 查询类型, 详细信息看{@link RankingMode}, 如本参数为null, 则为每天