提交 2a2daa29 authored 作者: kxjia's avatar kxjia

完善代码

上级 52ab6995
......@@ -79,9 +79,9 @@ public class CrawlController {
/**
* 获取数据源列表
*/
@GetMapping("/sources")
@GetMapping("/sources")
public ResponseEntity<ApiResponse<List<CrawlSource>>> getSources() {
List<CrawlSource> sources = crawlTaskService.getEnabledSources();
List<CrawlSource> sources = crawlTaskService.getAllSources();
return ResponseEntity.ok(ApiResponse.success(sources));
}
......@@ -91,7 +91,6 @@ public class CrawlController {
@GetMapping("/task/{id}")
public ResponseEntity<ApiResponse<CrawlTaskDTO>> getTaskById(@PathVariable Long id) {
CrawlTaskDTO task = crawlTaskService.findById(id);
if (task == null) {
return ResponseEntity.ok(ApiResponse.error(404, "任务不存在"));
}
......@@ -136,6 +135,15 @@ public class CrawlController {
return ResponseEntity.ok(ApiResponse.success("删除成功", null));
}
/**
* 清除爬取历史
*/
@DeleteMapping("/history")
public ResponseEntity<ApiResponse<Void>> clearHistory() {
crawlTaskService.clearHistory();
return ResponseEntity.ok(ApiResponse.success("清除成功", null));
}
/**
* 爬取单个数据源
*/
......
......@@ -45,11 +45,9 @@ public class PenaltyController {
public ResponseEntity<ApiResponse<PenaltyRecordDTO>> findById(@PathVariable Long id) {
log.debug("查询处罚记录详情: id={}", id);
PenaltyRecordDTO record = penaltyRecordService.findById(id);
if (record == null) {
return ResponseEntity.ok(ApiResponse.error(404, "记录不存在"));
}
return ResponseEntity.ok(ApiResponse.success(record));
}
......@@ -146,4 +144,18 @@ public class PenaltyController {
String report = penaltyRecordService.getAnalysisReport(id);
return ResponseEntity.ok(ApiResponse.success(report));
}
/**
* AI自然语言搜索
*/
@PostMapping("/ai-search")
public ResponseEntity<ApiResponse<PageResponse<PenaltyRecordDTO>>> aiSearch(
@RequestBody AISearchRequest request,
@RequestParam(defaultValue = "0") int page,
@RequestParam(defaultValue = "10") int size) {
log.debug("AI搜索: query={}, page={}, size={}", request.getQuery(), page, size);
PageResponse<PenaltyRecordDTO> result = penaltyRecordService.aiSearch(request.getQuery(), page, size);
return ResponseEntity.ok(ApiResponse.success(result));
}
}
package com.fintech.penalty.crawler;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fintech.penalty.config.CrawlerConfig;
import com.fintech.penalty.dto.CrawlResult;
import com.fintech.penalty.dto.PenaltyRecordDTO;
import com.fintech.penalty.entity.CrawlProgress;
import com.fintech.penalty.entity.PenaltyRecord;
import com.fintech.penalty.repository.CrawlProgressRepository;
import com.fintech.penalty.repository.PenaltyRecordRepository;
import com.fintech.penalty.service.CrawlTaskService;
import lombok.RequiredArgsConstructor;
......@@ -12,22 +16,30 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.math.BigDecimal;
import java.net.ConnectException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 处罚信息爬虫 - 核心爬虫类
* 处罚信息爬虫 - 核心爬虫类(支持断点续传)
*/
@Component
@RequiredArgsConstructor
......@@ -35,325 +47,608 @@ import java.util.concurrent.CompletableFuture;
public class PenaltyCrawler {
private final PenaltyRecordRepository penaltyRecordRepository;
private final CrawlProgressRepository crawlProgressRepository;
private final CrawlTaskService crawlTaskService;
private final CrawlerConfig crawlerConfig;
@Value("${crawler.resume.enabled:true}")
private boolean resumeEnabled; // 是否启用断点续传,默认启用
@Value("${crawler.resume.force-restart:false}")
private boolean forceRestart; // 是否强制重新开始,默认false
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd");
private static final DateTimeFormatter DATE_FORMATTER_CN = DateTimeFormatter.ofPattern("yyyy[-/年]MM[-/月]dd[日]");
// 线程安全的罚单编号计数器(生产环境建议用数据库序列或Redis)
private final AtomicInteger penaltySeq = new AtomicInteger(0);
// 当前任务的唯一标识
private String currentTaskId;
@Async("crawlExecutor")
public CompletableFuture<CrawlResult> crawl(CrawlerConfig.SourceConfig source) {
log.info("开始爬取: {} - {}", source.getName(), source.getListUrl());
// 生成任务ID
currentTaskId = UUID.randomUUID().toString();
log.info("========== 开始爬取任务 ==========");
log.info("任务ID: {}", currentTaskId);
log.info("数据源: {} - {}", source.getName(), source.getListUrl());
log.info("断点续传: {}", resumeEnabled ? "启用" : "禁用");
log.info("强制重启: {}", forceRestart ? "是" : "否");
CrawlResult result = CrawlResult.builder()
.sourceName(source.getName())
.build();
List<PenaltyRecord> allRecords = new ArrayList<>();
try {
List<PenaltyRecord> records = switch (source.getCode()) {
case "pbc" -> crawlPBC(source);
case "cbirc" -> crawlCBIRC(source);
case "csrc" -> crawlCSRC(source);
case "safe" -> crawlSAFE(source);
case "nfraf" -> crawlNFRAF(source);
default -> crawlDefault(source);
};
allRecords.addAll(records);
List<PenaltyRecord> records = crawlNFRA(source);
int newCount = 0;
for (PenaltyRecord record : records) {
boolean isNew = false;
if (record.getSourceUrl() != null && !record.getSourceUrl().isEmpty()) {
if (!penaltyRecordRepository.existsBySourceUrl(record.getSourceUrl())) {
penaltyRecordRepository.save(record);
newCount++;
isNew = true;
}
} else {
String key = record.getIllegalFacts() + "_" + record.getRegulator();
if (!penaltyRecordRepository.existsByIllegalFactsAndRegulator(record.getIllegalFacts(), record.getRegulator())) {
penaltyRecordRepository.save(record);
newCount++;
isNew = true;
}
}
}
// 标记任务完成
markTaskCompleted(source.getName());
result.setSuccess(true);
result.setTotalFound(records.size());
result.setNewRecords(newCount);
result.setNewRecords(records.size());
result.setRecords(records.stream().map(this::toDTO).toList());
log.info("爬取完成: {} - 共发现{}条记录, 新增{}条", source.getName(), records.size(), newCount);
log.info("爬取完成: {} - 共发现{}条记录", source.getName(), records.size());
} catch (Exception e) {
log.error("爬取失败: {} - {}", source.getName(), e.getMessage(), e);
result.setSuccess(false);
result.setErrorMessage(e.getMessage());
// 标记任务失败
markTaskFailed(source.getName());
}
crawlTaskService.saveCrawlResult(result);
return CompletableFuture.completedFuture(result);
}
private List<PenaltyRecord> crawlPBC(CrawlerConfig.SourceConfig source) throws Exception {
// ==================== NFRA 核心爬取(支持断点续传) ====================
private List<PenaltyRecord> crawlNFRA(CrawlerConfig.SourceConfig source) throws Exception {
List<PenaltyRecord> records = new ArrayList<>();
log.info("正在连接中国人民银行网站...");
List<PenaltyRecord> batchBuffer = new ArrayList<>();
ObjectMapper mapper = new ObjectMapper();
log.info("正在连接国家金融监督管理总局网站(nfra.gov.cn)...");
try {
Document doc = fetchWithRetry(source.getListUrl(), 3);
if (doc == null) {
log.warn("PBC: 无法获取页面内容");
return records;
String baseUrl = normalizeBaseUrl(source.getBaseUrl());
// ========== 获取上次爬取进度 ==========
int startPage = 1;
int pageSize = 18;
int totalPages = 4093;
int totalNewRecords = 0;
if (resumeEnabled && !forceRestart) {
Optional<CrawlProgress> progressOpt = getLastProgress(source.getName());
if (progressOpt.isPresent() && !isTaskCompleted(source.getName())) {
CrawlProgress progress = progressOpt.get();
startPage = progress.getLastPageIndex() + 1; // 从下一页开始
totalPages = progress.getTotalPages() != null ? progress.getTotalPages() : totalPages;
totalNewRecords = progress.getTotalRecordsCrawled() != null ? progress.getTotalRecordsCrawled() : 0;
log.info("========== 检测到上次未完成的任务 ==========");
log.info("上次任务ID: {}", progress.getTaskId());
log.info("上次完成页码: {}", progress.getLastPageIndex());
log.info("上次已爬取记录数: {}", totalNewRecords);
log.info("本次将从第 {} 页继续爬取", startPage);
} else if (progressOpt.isPresent() && isTaskCompleted(source.getName())) {
log.info("上次任务已完成,本次从头开始爬取");
// 清除旧的完成记录,开始新任务
clearProgress(source.getName());
} else {
log.info("未检测到未完成的任务,从头开始爬取");
}
} else {
log.info("断点续传已禁用或强制重启,从头开始爬取");
// 清除旧的进度记录
clearProgress(source.getName());
}
Elements items = doc.select("ul li, div.list-item, tr");
// 初始化进度记录
initProgress(source.getName(), startPage - 1, totalPages, totalNewRecords);
// 记录失败的页码,用于后续重试
List<Integer> failedPages = new ArrayList<>();
int pageIndex = startPage;
int maxPages = 1000000;
while (pageIndex <= totalPages && pageIndex <= maxPages) {
String listUrl = buildListApiUrl(source, pageIndex, pageSize);
log.info("正在抓取第{}页: {}", pageIndex, listUrl);
for (Element item : items) {
try {
Element link = item.selectFirst("a[href]");
if (link == null) continue;
String json = fetchJson(listUrl);
JsonNode rootNode = mapper.readTree(json);
String title = link.text().trim();
String href = link.attr("href");
if (rootNode.has("data") && rootNode.get("data").has("rows")) {
JsonNode rows = rootNode.get("data").get("rows");
JsonNode pageInfo = rootNode.get("data");
if (isPenaltyRelated(title)) {
String dateStr = extractDate(item.text());
if (pageInfo.has("totalPages")) {
totalPages = pageInfo.get("totalPages").asInt();
// 更新总页数到进度表
updateTotalPages(source.getName(), totalPages);
} else if (pageInfo.has("total")) {
int total = pageInfo.get("total").asInt();
totalPages = (int) Math.ceil((double) total / pageSize);
updateTotalPages(source.getName(), totalPages);
}
PenaltyRecord record = PenaltyRecord.builder()
.penaltyNumber(generatePenaltyNumber(source.getName()))
.institutionName(extractInstitutionName(title))
.institutionType(guessInstitutionType(title))
.penaltyType(guessPenaltyType(title))
.penaltyDate(parseDate(dateStr))
.regulator("中国人民银行")
.illegalFacts(title)
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
.createdAt(LocalDateTime.now())
.isNew(true)
.build();
int pageRecordCount = 0;
int failedCount = 0;
records.add(record);
for (int i = 0; i < rows.size(); i++) {
JsonNode row = rows.get(i);
try {
String docId = row.has("guid") ? row.get("guid").asText() :
(row.has("docId") ? row.get("docId").asText() : null);
String title = row.has("docTitle") ? row.get("docTitle").asText() : "";
if (docId == null || title.isEmpty()) {
log.debug("跳过无效记录: docId={}, title={}", docId, title);
failedCount++;
continue;
}
} catch (Exception e) {
log.debug("解析PBC条目失败: {}", e.getMessage());
String detailUrl = baseUrl + "cn/static/data/DocInfo/SelectByDocId/data_docId=" + docId + ".json";
PenaltyRecord record = fetchWithRetry(detailUrl, title, source, 3);
if (record != null) {
// 检查是否已存在
boolean isNew = false;
if (record.getSourceUrl() != null && !record.getSourceUrl().isEmpty()) {
if (!penaltyRecordRepository.existsBySourceUrl(record.getSourceUrl())) {
isNew = true;
}
} else {
if (!penaltyRecordRepository.existsByIllegalFactsAndRegulator(
record.getIllegalFacts(), record.getRegulator())) {
isNew = true;
}
}
log.info("PBC: 找到 {} 条相关记录", records.size());
if (isNew) {
batchBuffer.add(record);
records.add(record);
pageRecordCount++;
totalNewRecords++;
}
} else {
failedCount++;
}
} catch (Exception e) {
log.error("爬取PBC失败: {}", e.getMessage());
failedCount++;
log.error("解析第{}页第{}条记录失败: {}", pageIndex, i, e.getMessage());
}
return records;
}
private List<PenaltyRecord> crawlCBIRC(CrawlerConfig.SourceConfig source) throws Exception {
List<PenaltyRecord> records = new ArrayList<>();
log.info("正在连接中国银保监会网站...");
try {
Document doc = fetchWithRetry(source.getListUrl(), 3);
if (doc == null) {
log.warn("CBIRC: 无法获取页面内容");
return records;
// ========== 每抓取10页保存一次到数据库 ==========
if (pageIndex % 10 == 0 && !batchBuffer.isEmpty()) {
int savedCount = saveBatchToDatabase(batchBuffer);
log.info("===== 已抓取{}页,批量保存{}条新记录到数据库 =====", pageIndex, savedCount);
batchBuffer.clear();
}
Elements items = doc.select("li, div.item, tr");
// ========== 每抓取5页更新一次进度(断点续传) ==========
if (pageIndex % 5 == 0) {
updateProgress(source.getName(), pageIndex, totalNewRecords);
log.info("===== 已更新爬取进度: 第{}页,累计{}条记录 =====", pageIndex, totalNewRecords);
}
for (Element item : items) {
try {
Element link = item.selectFirst("a[href]");
if (link == null) continue;
// 输出页面进度
double progress = (double) pageIndex / totalPages * 100;
log.info("第{}页爬取完成,本页获取{}条新记录,失败{}条,累计{}条新记录,总进度:{:.2f}% ({}/{})",
pageIndex, pageRecordCount, failedCount, totalNewRecords, progress, pageIndex, totalPages);
String title = link.text().trim();
String href = link.attr("href");
} else if (rootNode.has("rptCode") && rootNode.get("rptCode").asText().equals("200")) {
log.warn("NFRA API返回结束标记(rptCode=200),停止爬取");
break;
} else {
log.warn("NFRA: API返回格式不正确,尝试HTML解析");
failedPages.add(pageIndex);
}
if (isPenaltyRelated(title)) {
String dateStr = extractDate(item.text());
pageIndex++;
PenaltyRecord record = PenaltyRecord.builder()
.penaltyNumber(generatePenaltyNumber(source.getName()))
.institutionName(extractInstitutionName(title))
.institutionType("银行")
.penaltyType(guessPenaltyType(title))
.penaltyDate(parseDate(dateStr))
.regulator("中国银保监会")
.illegalFacts(title)
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
.createdAt(LocalDateTime.now())
.isNew(true)
.build();
} catch (Exception e) {
log.error("抓取NFRA第{}页失败: {}", pageIndex, e.getMessage(), e);
failedPages.add(pageIndex);
pageIndex++;
}
records.add(record);
// 每爬完一页停留5秒
if (pageIndex <= totalPages && pageIndex <= maxPages) {
log.info("等待5秒后继续爬取下一页...");
Thread.sleep(1000);
}
} catch (Exception e) {
log.debug("解析CBIRC条目失败: {}", e.getMessage());
}
// ========== 保存剩余不足10页的数据 ==========
if (!batchBuffer.isEmpty()) {
int savedCount = saveBatchToDatabase(batchBuffer);
log.info("===== 爬取完成,保存最后{}条新记录到数据库 =====", savedCount);
batchBuffer.clear();
}
log.info("CBIRC: 找到 {} 条相关记录", records.size());
// 最终更新进度
updateProgress(source.getName(), pageIndex - 1, totalNewRecords);
} catch (Exception e) {
log.error("爬取CBIRC失败: {}", e.getMessage());
// 重试失败的页面
if (!failedPages.isEmpty()) {
log.info("开始重试失败的页面: {}", failedPages);
retryFailedPagesWithBatchSave(source, failedPages, records, mapper, baseUrl, pageSize);
}
return records;
if (records.isEmpty()) {
log.info("NFRA: JSON API未返回数据,尝试HTML解析...");
records = crawlNFRADirect(source);
if (!records.isEmpty()) {
saveBatchToDatabase(records);
}
}
private List<PenaltyRecord> crawlCSRC(CrawlerConfig.SourceConfig source) throws Exception {
List<PenaltyRecord> records = new ArrayList<>();
log.info("正在连接中国证监会网站...");
log.info("NFRA: 共解析{}条新记录,已全部保存到数据库", totalNewRecords);
} catch (Exception e) {
log.error("NFRA爬取失败: {}", e.getMessage(), e);
// 异常时也尝试保存已缓存的数据
if (!batchBuffer.isEmpty()) {
saveBatchToDatabase(batchBuffer);
}
// 更新失败状态
updateProgressStatus(source.getName(), "FAILED");
try {
Document doc = fetchWithRetry(source.getListUrl(), 3);
if (doc == null) {
log.warn("CSRC: 无法获取页面内容");
records = crawlNFRADirect(source);
if (!records.isEmpty()) {
saveBatchToDatabase(records);
}
} catch (Exception ex) {
log.error("NFRA HTML解析也失败: {}", ex.getMessage());
}
}
return records;
}
Elements items = doc.select("li, div.list-item, tr");
for (Element item : items) {
try {
Element link = item.selectFirst("a[href]");
if (link == null) continue;
// ==================== 断点续传相关方法 ====================
String title = link.text().trim();
String href = link.attr("href");
/**
* 获取上次的爬取进度
*/
private Optional<CrawlProgress> getLastProgress(String sourceName) {
return crawlProgressRepository.findBySourceNameAndStatus(sourceName, "RUNNING");
}
if (isPenaltyRelated(title)) {
String dateStr = extractDate(item.text());
/**
* 检查任务是否已完成
*/
private boolean isTaskCompleted(String sourceName) {
Optional<CrawlProgress> progressOpt = crawlProgressRepository.findBySourceNameAndStatus(sourceName, "COMPLETED");
return progressOpt.isPresent();
}
PenaltyRecord record = PenaltyRecord.builder()
.penaltyNumber(generatePenaltyNumber(source.getName()))
.institutionName(extractInstitutionName(title))
.institutionType(guessInstitutionType(title))
.penaltyType(guessPenaltyType(title))
.penaltyDate(parseDate(dateStr))
.regulator("中国证监会")
.illegalFacts(title)
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
.createdAt(LocalDateTime.now())
.isNew(true)
/**
* 初始化爬取进度
*/
private void initProgress(String sourceName, int lastPage, int totalPages, int totalRecords) {
try {
// 先删除旧的RUNNING状态记录
Optional<CrawlProgress> existing = crawlProgressRepository.findBySourceNameAndStatus(sourceName, "RUNNING");
if (existing.isPresent()) {
crawlProgressRepository.delete(existing.get());
}
CrawlProgress progress = CrawlProgress.builder()
.sourceName(sourceName)
.taskId(currentTaskId)
.lastPageIndex(lastPage)
.totalPages(totalPages)
.totalRecordsCrawled(totalRecords)
.status("RUNNING")
.startTime(LocalDateTime.now())
.updateTime(LocalDateTime.now())
.build();
records.add(record);
crawlProgressRepository.save(progress);
log.info("初始化爬取进度: source={}, startPage={}, totalPages={}", sourceName, lastPage + 1, totalPages);
} catch (Exception e) {
log.error("初始化爬取进度失败: {}", e.getMessage());
}
}
/**
* 更新爬取进度
*/
private void updateProgress(String sourceName, int pageIndex, int totalRecords) {
try {
crawlProgressRepository.updateProgress(sourceName, currentTaskId, pageIndex, LocalDateTime.now());
crawlProgressRepository.updateTotalRecords(sourceName, currentTaskId, totalRecords, LocalDateTime.now());
} catch (Exception e) {
log.debug("解析CSRC条目失败: {}", e.getMessage());
log.error("更新爬取进度失败: {}", e.getMessage());
}
}
log.info("CSRC: 找到 {} 条相关记录", records.size());
/**
* 更新总页数
*/
private void updateTotalPages(String sourceName, int totalPages) {
try {
crawlProgressRepository.updateTotalPages(sourceName, currentTaskId, totalPages, LocalDateTime.now());
} catch (Exception e) {
log.error("更新总页数失败: {}", e.getMessage());
}
}
/**
* 更新进度状态
*/
private void updateProgressStatus(String sourceName, String status) {
try {
crawlProgressRepository.updateStatus(sourceName, currentTaskId, status, LocalDateTime.now());
} catch (Exception e) {
log.error("爬取CSRC失败: {}", e.getMessage());
log.error("更新进度状态失败: {}", e.getMessage());
}
}
return records;
/**
* 标记任务完成
*/
private void markTaskCompleted(String sourceName) {
updateProgressStatus(sourceName, "COMPLETED");
log.info("任务完成: source={}, taskId={}", sourceName, currentTaskId);
}
private List<PenaltyRecord> crawlSAFE(CrawlerConfig.SourceConfig source) throws Exception {
List<PenaltyRecord> records = new ArrayList<>();
log.info("正在连接国家外汇管理局网站...");
/**
* 标记任务失败
*/
private void markTaskFailed(String sourceName) {
updateProgressStatus(sourceName, "FAILED");
log.info("任务失败: source={}, taskId={}", sourceName, currentTaskId);
}
/**
* 清除进度记录
*/
private void clearProgress(String sourceName) {
try {
Document doc = fetchWithRetry(source.getListUrl(), 3);
if (doc == null) {
log.warn("SAFE: 无法获取页面内容");
return records;
Optional<CrawlProgress> running = crawlProgressRepository.findBySourceNameAndStatus(sourceName, "RUNNING");
running.ifPresent(crawlProgressRepository::delete);
Optional<CrawlProgress> completed = crawlProgressRepository.findBySourceNameAndStatus(sourceName, "COMPLETED");
completed.ifPresent(crawlProgressRepository::delete);
log.info("已清除 {} 的进度记录", sourceName);
} catch (Exception e) {
log.error("清除进度记录失败: {}", e.getMessage());
}
}
Elements items = doc.select("li, div.list-item");
/**
* 批量保存记录到数据库
*/
private int saveBatchToDatabase(List<PenaltyRecord> records) {
if (records == null || records.isEmpty()) {
return 0;
}
for (Element item : items) {
int savedCount = 0;
for (PenaltyRecord record : records) {
try {
Element link = item.selectFirst("a[href]");
if (link == null) continue;
penaltyRecordRepository.save(record);
savedCount++;
} catch (Exception e) {
log.error("保存记录失败: {} - {}", record.getPenaltyNumber(), e.getMessage());
}
}
String title = link.text().trim();
String href = link.attr("href");
log.debug("批量保存完成: 成功保存{}条/共{}条", savedCount, records.size());
return savedCount;
}
if (isPenaltyRelated(title)) {
String dateStr = extractDate(item.text());
/**
* 重试失败的页面(支持批量保存)
*/
private void retryFailedPagesWithBatchSave(CrawlerConfig.SourceConfig source, List<Integer> failedPages,
List<PenaltyRecord> records, ObjectMapper mapper,
String baseUrl, int pageSize) {
List<PenaltyRecord> retryBuffer = new ArrayList<>();
PenaltyRecord record = PenaltyRecord.builder()
.penaltyNumber(generatePenaltyNumber(source.getName()))
.institutionName(extractInstitutionName(title))
.institutionType("其他")
.penaltyType(guessPenaltyType(title))
.penaltyDate(parseDate(dateStr))
.regulator("国家外汇管理局")
.illegalFacts(title)
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
.createdAt(LocalDateTime.now())
.isNew(true)
.build();
for (Integer pageIndex : failedPages) {
try {
log.info("重试第{}页", pageIndex);
String listUrl = buildListApiUrl(source, pageIndex, pageSize);
String json = fetchJson(listUrl);
JsonNode rootNode = mapper.readTree(json);
if (rootNode.has("data") && rootNode.get("data").has("rows")) {
JsonNode rows = rootNode.get("data").get("rows");
for (JsonNode row : rows) {
try {
String docId = row.has("guid") ? row.get("guid").asText() :
(row.has("docId") ? row.get("docId").asText() : null);
String title = row.has("docTitle") ? row.get("docTitle").asText() : "";
if (docId != null && !title.isEmpty()) {
String detailUrl = baseUrl + "cn/static/data/DocInfo/SelectByDocId/data_docId=" + docId + ".json";
PenaltyRecord record = fetchWithRetry(detailUrl, title, source, 2);
if (record != null && !records.contains(record)) {
boolean exists = false;
if (record.getSourceUrl() != null && !record.getSourceUrl().isEmpty()) {
exists = penaltyRecordRepository.existsBySourceUrl(record.getSourceUrl());
} else {
exists = penaltyRecordRepository.existsByIllegalFactsAndRegulator(
record.getIllegalFacts(), record.getRegulator());
}
if (!exists) {
retryBuffer.add(record);
records.add(record);
}
}
}
} catch (Exception e) {
log.debug("解析SAFE条目失败: {}", e.getMessage());
log.debug("重试时解析记录失败: {}", e.getMessage());
}
}
}
Thread.sleep(3000);
log.info("SAFE: 找到 {} 条相关记录", records.size());
if (retryBuffer.size() >= 50) {
saveBatchToDatabase(retryBuffer);
retryBuffer.clear();
}
} catch (Exception e) {
log.error("重试第{}页失败: {}", pageIndex, e.getMessage());
}
}
if (!retryBuffer.isEmpty()) {
saveBatchToDatabase(retryBuffer);
}
}
// 添加重试方法
private PenaltyRecord fetchWithRetry(String detailUrl, String title,
CrawlerConfig.SourceConfig source, int maxRetries) {
for (int attempt = 1; attempt <= maxRetries; attempt++) {
try {
return fetchAndParseDetail(detailUrl, title, source);
} catch (Exception e) {
log.error("爬取SAFE失败: {}", e.getMessage());
if (attempt == maxRetries) {
log.debug("获取详情失败(已重试{}次): {} - {}", maxRetries, detailUrl, e.getMessage());
return null;
}
try {
Thread.sleep(1000 * attempt);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
return null;
}
}
}
return null;
}
return records;
private String buildListApiUrl(CrawlerConfig.SourceConfig source, int pageIndex, int pageSize) {
String listUrl = source.getListUrl();
String itemId = extractItemId(listUrl);
if (pageIndex < 4) {
return "https://www.nfra.gov.cn/cn/static/data/DocInfo/getDocInfoListByItemId/data_itemId=" + itemId + ",pageIndex=" + pageIndex + ",pageSize=" + pageSize + ".json";
} else {
return "https://www.nfra.gov.cn/cbircweb/DocInfo/SelectDocByItemIdAndChild?itemId=" + itemId + "&pageSize=100&pageIndex="+pageIndex;
}
}
private List<PenaltyRecord> crawlDefault(CrawlerConfig.SourceConfig source) throws Exception {
List<PenaltyRecord> records = new ArrayList<>();
private String extractItemId(String url) {
Pattern pattern = Pattern.compile("itemId=(\\d+)");
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
return matcher.group(1);
}
return "1855";
}
private String normalizeBaseUrl(String baseUrl) {
if (baseUrl == null) return "https://www.nfra.gov.cn/";
if (!baseUrl.endsWith("/")) {
return baseUrl + "/";
}
return baseUrl;
}
private String fetchJson(String url) throws Exception {
HttpURLConnection conn = null;
try {
Document doc = fetchWithRetry(source.getListUrl(), 3);
if (doc == null) return records;
conn = (HttpURLConnection) new URL(url).openConnection();
conn.setRequestMethod("GET");
conn.setRequestProperty("User-Agent", USER_AGENT);
conn.setConnectTimeout(30000);
conn.setReadTimeout(30000);
conn.connect();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), "UTF-8"))) {
StringBuilder response = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
response.append(line);
}
return response.toString();
}
} finally {
if (conn != null) {
conn.disconnect();
}
}
}
Elements links = doc.select("a[href]");
private PenaltyRecord fetchAndParseDetail(String detailUrl, String title, CrawlerConfig.SourceConfig source) {
try {
ObjectMapper mapper = new ObjectMapper();
String json = fetchJson(detailUrl);
JsonNode rootNode = mapper.readTree(json);
for (Element link : links) {
String href = link.attr("href");
String text = link.text().trim();
if (!rootNode.has("data")) return null;
JsonNode data = rootNode.get("data");
if (isPenaltyRelated(text) && href.matches(".*\\.(html?|jsp)$")) {
PenaltyRecord record = PenaltyRecord.builder()
.penaltyNumber(generatePenaltyNumber(source.getName()))
.institutionName(extractInstitutionName(text))
.institutionType(guessInstitutionType(text))
.penaltyType(guessPenaltyType(text))
.penaltyDate(LocalDate.now())
.regulator(source.getName())
.illegalFacts(text)
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
String docTitle = data.has("docTitle") ? data.get("docTitle").asText() : title;
String publishDate = data.has("publishDate") ? data.get("publishDate").asText() : "";
String docClob = data.has("docClob") ? data.get("docClob").asText() : "";
String penaltyNumber = extractPenaltyNumber(docTitle);
String institutionName = extractInstitutionNameFromContent(docClob);
String penaltyAmountStr = extractPenaltyAmount(docClob);
String illegalFacts = extractIllegalFacts(docClob);
LocalDate penaltyDate = parseDateSafely(publishDate);
return PenaltyRecord.builder()
.penaltyNumber(penaltyNumber)
.institutionName(institutionName.isEmpty() ? extractInstitutionName(docTitle) : institutionName)
.institutionType(guessInstitutionType(institutionName))
.penaltyType("行政处罚")
.penaltyAmount(penaltyAmountStr != null ? new BigDecimal(penaltyAmountStr) : null)
.penaltyDate(penaltyDate)
.regulator("国家金融监督管理总局")
.illegalFacts(illegalFacts)
.penaltyBasis(extractPenaltyBasis(docClob))
.sourceUrl(detailUrl)
.createdAt(LocalDateTime.now())
.isNew(true)
.build();
records.add(record);
}
}
} catch (Exception e) {
log.error("默认爬取失败: {}", e.getMessage());
log.debug("解析NFRA详情失败: {}", e.getMessage());
return null;
}
}
return records;
private LocalDate parseDateSafely(String dateStr) {
if (dateStr == null || dateStr.isEmpty()) {
return LocalDate.now();
}
try {
String cleaned = dateStr.replace("年", "-").replace("月", "-").replace("日", "").trim();
if (cleaned.length() > 10) {
cleaned = cleaned.substring(0, 10);
}
return LocalDate.parse(cleaned, DATE_FORMATTER);
} catch (DateTimeParseException e) {
log.debug("日期解析失败: {}", dateStr);
return LocalDate.now();
}
}
// ==================== 通用工具方法 ====================
private Document fetchWithRetry(String url, int retries) {
for (int i = 0; i < retries; i++) {
try {
Thread.sleep(1000 * (i + 1));
Thread.sleep(1000L * (i + 1));
Document doc = Jsoup.connect(url)
.userAgent(USER_AGENT)
.timeout(crawlerConfig.getTimeout())
......@@ -361,11 +656,9 @@ public class PenaltyCrawler {
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
.header("Accept-Encoding", "gzip, deflate")
.header("Connection", "keep-alive")
.header("Cache-Control", "max-age=0")
.ignoreHttpErrors(true)
.followRedirects(true)
.get();
if (doc != null && !doc.body().text().isEmpty()) {
return doc;
}
......@@ -385,7 +678,7 @@ public class PenaltyCrawler {
private String extractDate(String text) {
if (text == null) return "";
java.util.regex.Matcher matcher = java.util.regex.Pattern.compile("\\d{4}[-/年]\\d{1,2}[-/月]\\d{1,2}[日]?").matcher(text);
Matcher matcher = Pattern.compile("\\d{4}[-/年]\\d{1,2}[-/月]\\d{1,2}[日]?").matcher(text);
if (matcher.find()) {
return matcher.group();
}
......@@ -398,9 +691,11 @@ public class PenaltyCrawler {
case "中国银保监会" -> "银保监";
case "中国证监会" -> "证监";
case "国家外汇管理局" -> "外管";
case "国家金融监督管理总局" -> "金监";
default -> "文";
};
return prefix + "罚决字〔" + LocalDate.now().getYear() + "〕" + String.format("%04d", new Random().nextInt(10000));
int seq = penaltySeq.incrementAndGet() % 10000;
return prefix + "罚决字〔" + LocalDate.now().getYear() + "〕" + String.format("%04d", seq);
}
private String extractInstitutionName(String title) {
......@@ -410,27 +705,35 @@ public class PenaltyCrawler {
"(.+?)被处罚",
"(.+?)罚款"
};
for (String pattern : patterns) {
java.util.regex.Matcher matcher = java.util.regex.Pattern.compile(pattern).matcher(title);
Matcher matcher = Pattern.compile(pattern).matcher(title);
if (matcher.find()) {
return matcher.group(1).trim();
String name = matcher.group(1).trim();
if (name.length() > 0 && name.length() <= 100) {
return name;
}
}
}
if (title.length() > 5 && title.length() <= 50) {
return title;
}
return "未知机构";
}
private String guessInstitutionType(String title) {
if (title.contains("银行")) return "银行";
if (title.contains("保险")) return "保险";
if (title.contains("证券")) return "证券";
if (title.contains("基金")) return "基金";
if (title.contains("期货")) return "期货";
private String guessInstitutionType(String name) {
if (name == null) return "其他";
if (name.contains("银行")) return "银行";
if (name.contains("保险")) return "保险";
if (name.contains("证券")) return "证券";
if (name.contains("基金")) return "基金";
if (name.contains("期货")) return "期货";
if (name.contains("信托")) return "信托";
if (name.contains("消费金融")) return "消费金融";
return "其他";
}
private String guessPenaltyType(String title) {
if (title == null) return "罚款";
if (title.contains("罚款")) return "罚款";
if (title.contains("警告")) return "警告";
if (title.contains("没收")) return "没收违法所得";
......@@ -440,63 +743,87 @@ public class PenaltyCrawler {
return "罚款";
}
private LocalDate parseDate(String dateStr) {
if (dateStr == null || dateStr.isEmpty()) {
return LocalDate.now();
private String buildFullUrl(String baseUrl, String relativeUrl) {
if (relativeUrl == null || relativeUrl.isEmpty()) return baseUrl;
if (relativeUrl.startsWith("http")) return relativeUrl;
if (relativeUrl.startsWith("/")) return baseUrl + relativeUrl;
return baseUrl + "/" + relativeUrl;
}
dateStr = dateStr.replace("年", "-").replace("月", "-").replace("日", "").trim();
// ==================== NFRA 内容解析 ====================
try {
return LocalDate.parse(dateStr, DATE_FORMATTER);
} catch (DateTimeParseException e) {
try {
return LocalDate.parse(dateStr.replace("/", "-"), DATE_FORMATTER);
} catch (DateTimeParseException e2) {
return LocalDate.now();
}
private String extractPenaltyNumber(String text) {
if (text == null) return "";
Pattern pattern = Pattern.compile("[\\u4e00-\\u9fa5]+罚决字[〔(]\\d+[〕)]\\d+号?");
Matcher matcher = pattern.matcher(text);
if (matcher.find()) {
return matcher.group();
}
return text.length() > 50 ? text.substring(0, 50) : text;
}
private String buildFullUrl(String baseUrl, String relativeUrl) {
if (relativeUrl == null || relativeUrl.isEmpty()) return baseUrl;
if (relativeUrl.startsWith("http")) return relativeUrl;
if (relativeUrl.startsWith("/")) return baseUrl + relativeUrl;
return baseUrl + "/" + relativeUrl;
private String extractInstitutionNameFromContent(String content) {
if (content == null) return "";
Pattern pattern = Pattern.compile("当事人[名称]*[::]\\s*([^\\n\\r,,。]+)");
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
String name = matcher.group(1).trim();
name = name.replaceAll("[\\s\\u3000]+", "");
if (name.length() > 0 && name.length() <= 100) {
return name;
}
}
return "";
}
private PenaltyRecordDTO toDTO(PenaltyRecord entity) {
return PenaltyRecordDTO.builder()
.id(entity.getId())
.penaltyNumber(entity.getPenaltyNumber())
.institutionName(entity.getInstitutionName())
.institutionType(entity.getInstitutionType())
.penaltyType(entity.getPenaltyType())
.penaltyAmount(entity.getPenaltyAmount())
.penaltyDate(entity.getPenaltyDate())
.regulator(entity.getRegulator())
.illegalFacts(entity.getIllegalFacts())
.penaltyBasis(entity.getPenaltyBasis())
.sourceUrl(entity.getSourceUrl())
.createdAt(entity.getCreatedAt())
.updatedAt(entity.getUpdatedAt())
.isNew(entity.getIsNew())
.build();
private String extractPenaltyAmount(String content) {
if (content == null) return null;
Pattern pattern = Pattern.compile("罚款[::]?\\s*(\\d+(?:\\.\\d+)?)\\s*(?:万元?|元)");
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
return matcher.group(1);
}
pattern = Pattern.compile("处以?\\s*(\\d+(?:\\.\\d+)?)\\s*(?:万元?|元)");
matcher = pattern.matcher(content);
if (matcher.find()) {
return matcher.group(1);
}
return null;
}
private List<PenaltyRecord> crawlNFRAF(CrawlerConfig.SourceConfig source) throws Exception {
List<PenaltyRecord> records = new ArrayList<>();
log.info("正在连接国家金融监督管理总局网站...");
private String extractIllegalFacts(String content) {
if (content == null) return "";
Pattern pattern = Pattern.compile("主要违法违规事实[::]?\\s*(.+?)(?=处罚内容|作出处罚决定|$)", Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
String facts = matcher.group(1).trim();
return facts.length() > 500 ? facts.substring(0, 500) : facts;
}
if (content.length() > 200) {
return content.substring(0, 200);
}
return content;
}
try {
Document doc = fetchWithRetry(source.getListUrl(), 3);
if (doc == null) {
log.warn("NFRAF: 无法获取页面内容");
return records;
private String extractPenaltyBasis(String content) {
if (content == null) return "";
Pattern pattern = Pattern.compile("处罚依据[::]?\\s*(.+?)(?=\\。\\s*\\《|\\、|作出处罚决定|$)", Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
String basis = matcher.group(1).trim();
return basis.length() > 300 ? basis.substring(0, 300) : basis;
}
return "";
}
// ==================== HTML 直接解析备用 ====================
Elements items = doc.select("ul li, div.list-item, tr, div.article-item, div.news-item");
private List<PenaltyRecord> crawlNFRADirect(CrawlerConfig.SourceConfig source) throws Exception {
List<PenaltyRecord> records = new ArrayList<>();
Document doc = fetchWithRetry(source.getListUrl(), 3);
if (doc == null) return records;
Elements items = doc.select(".list-item, ul li, div.article-item, tr[data-href]");
for (Element item : items) {
try {
Element link = item.selectFirst("a[href]");
......@@ -507,60 +834,103 @@ public class PenaltyCrawler {
if (isPenaltyRelated(title)) {
String dateStr = extractDate(item.text());
LocalDate penaltyDate = parseDateSafely(dateStr);
PenaltyRecord record = PenaltyRecord.builder()
.penaltyNumber(generatePenaltyNumber(source.getName()))
.institutionName(extractInstitutionName(title))
.institutionType(guessInstitutionType(title))
.penaltyType("行政处罚")
.penaltyDate(LocalDate.parse(dateStr, DATE_FORMATTER))
.penaltyType(guessPenaltyType(title))
.penaltyDate(penaltyDate)
.regulator("国家金融监督管理总局")
.illegalFacts(title)
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
.createdAt(LocalDateTime.now())
.isNew(true)
.build();
records.add(record);
}
} catch (Exception e) {
log.debug("解析NFRAF列表项失败: {}", e.getMessage());
log.debug("解析NFRA HTML项失败: {}", e.getMessage());
}
}
return records;
}
if (records.isEmpty()) {
log.info("NFRAF: 未找到处罚列表,尝试其他选择器...");
Elements altItems = doc.select("a[href*='punish'], a[href*='penalty'], a[href*='cf']");
for (Element link : altItems) {
// ==================== 其他监管机构爬取 ====================
private List<PenaltyRecord> crawlPBC(CrawlerConfig.SourceConfig source) throws Exception {
return crawlGeneric(source, "中国人民银行");
}
private List<PenaltyRecord> crawlCBIRC(CrawlerConfig.SourceConfig source) throws Exception {
return crawlGeneric(source, "中国银保监会");
}
private List<PenaltyRecord> crawlCSRC(CrawlerConfig.SourceConfig source) throws Exception {
return crawlGeneric(source, "中国证监会");
}
private List<PenaltyRecord> crawlSAFE(CrawlerConfig.SourceConfig source) throws Exception {
return crawlGeneric(source, "国家外汇管理局");
}
private List<PenaltyRecord> crawlGeneric(CrawlerConfig.SourceConfig source, String regulator) throws Exception {
List<PenaltyRecord> records = new ArrayList<>();
Document doc = fetchWithRetry(source.getListUrl(), 3);
if (doc == null) return records;
Elements items = doc.select("ul li, div.list-item, tr");
for (Element item : items) {
try {
Element link = item.selectFirst("a[href]");
if (link == null) continue;
String title = link.text().trim();
if (title.length() > 5 && isPenaltyRelated(title)) {
String href = link.attr("href");
if (isPenaltyRelated(title)) {
String dateStr = extractDate(item.text());
LocalDate penaltyDate = parseDateSafely(dateStr);
PenaltyRecord record = PenaltyRecord.builder()
.penaltyNumber(generatePenaltyNumber(source.getName()))
.penaltyNumber(generatePenaltyNumber(regulator))
.institutionName(extractInstitutionName(title))
.institutionType(guessInstitutionType(title))
.penaltyType("行政处罚")
.penaltyDate(LocalDate.now())
.regulator("国家金融监督管理总局")
.penaltyType(guessPenaltyType(title))
.penaltyDate(penaltyDate)
.regulator(regulator)
.illegalFacts(title)
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
.createdAt(LocalDateTime.now())
.isNew(true)
.build();
records.add(record);
}
} catch (Exception e) {
log.debug("解析NFRAF备选链接失败: {}", e.getMessage());
}
log.debug("解析{}条目失败: {}", regulator, e.getMessage());
}
}
log.info("NFRAF: 共解析{}条记录", records.size());
} catch (Exception e) {
log.error("NFRAF爬取失败: {}", e.getMessage(), e);
throw e;
log.info("{}: 找到 {} 条相关记录", regulator, records.size());
return records;
}
return records;
private PenaltyRecordDTO toDTO(PenaltyRecord entity) {
return PenaltyRecordDTO.builder()
.id(entity.getId())
.penaltyNumber(entity.getPenaltyNumber())
.institutionName(entity.getInstitutionName())
.institutionType(entity.getInstitutionType())
.penaltyType(entity.getPenaltyType())
.penaltyAmount(entity.getPenaltyAmount())
.penaltyDate(entity.getPenaltyDate())
.regulator(entity.getRegulator())
.illegalFacts(entity.getIllegalFacts())
.penaltyBasis(entity.getPenaltyBasis())
.sourceUrl(entity.getSourceUrl())
.createdAt(entity.getCreatedAt())
.updatedAt(entity.getUpdatedAt())
.isNew(entity.getIsNew())
.build();
}
}
\ No newline at end of file
package com.fintech.penalty.dto;
import lombok.Data;
@Data
public class AISearchRequest {
private String query;
}
\ No newline at end of file
......@@ -17,6 +17,7 @@ import java.time.LocalDateTime;
public class CrawlTaskDTO {
private Long id;
private Long sourceId;
private String taskName;
private String sourceName;
private String sourceUrl;
......
......@@ -5,6 +5,7 @@ import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.math.BigDecimal;
import java.time.LocalDate;
/**
......@@ -24,4 +25,6 @@ public class SearchCriteria {
private LocalDate endDate;
private Boolean isNew;
private String keyword;
private BigDecimal minAmount;
private BigDecimal maxAmount;
}
package com.fintech.penalty.entity;
import jakarta.persistence.*;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.time.LocalDateTime;
/**
* 爬取进度记录 - 用于断点续传
*/
@Entity
@Table(name = "crawl_progress",
uniqueConstraints = {@UniqueConstraint(columnNames = {"source_name", "task_id"})})
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class CrawlProgress {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column(name = "source_name", nullable = false, length = 100)
private String sourceName; // 数据源名称,如 "NFRA"
@Column(name = "task_id", length = 50)
private String taskId; // 任务ID,用于区分不同爬取任务
@Column(name = "last_page_index", nullable = false)
private Integer lastPageIndex; // 最后完成的页码
@Column(name = "total_pages")
private Integer totalPages; // 总页数
@Column(name = "total_records_crawled")
private Integer totalRecordsCrawled; // 已爬取记录数
@Column(name = "status", length = 20)
private String status; // RUNNING, COMPLETED, FAILED
@Column(name = "start_time")
private LocalDateTime startTime; // 开始时间
@Column(name = "update_time")
private LocalDateTime updateTime; // 更新时间
}
\ No newline at end of file
package com.fintech.penalty.repository;
import com.fintech.penalty.entity.CrawlProgress;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Modifying;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
import java.util.Optional;
@Repository
public interface CrawlProgressRepository extends JpaRepository<CrawlProgress, Long> {
Optional<CrawlProgress> findBySourceNameAndTaskId(String sourceName, String taskId);
Optional<CrawlProgress> findBySourceNameAndStatus(String sourceName, String status);
@Modifying
@Transactional
@Query("UPDATE CrawlProgress cp SET cp.lastPageIndex = :pageIndex, cp.updateTime = :updateTime WHERE cp.sourceName = :sourceName AND cp.taskId = :taskId")
void updateProgress(@Param("sourceName") String sourceName,
@Param("taskId") String taskId,
@Param("pageIndex") Integer pageIndex,
@Param("updateTime") LocalDateTime updateTime);
@Modifying
@Transactional
@Query("UPDATE CrawlProgress cp SET cp.totalPages = :totalPages, cp.updateTime = :updateTime WHERE cp.sourceName = :sourceName AND cp.taskId = :taskId")
void updateTotalPages(@Param("sourceName") String sourceName,
@Param("taskId") String taskId,
@Param("totalPages") Integer totalPages,
@Param("updateTime") LocalDateTime updateTime);
@Modifying
@Transactional
@Query("UPDATE CrawlProgress cp SET cp.totalRecordsCrawled = :totalRecords, cp.updateTime = :updateTime WHERE cp.sourceName = :sourceName AND cp.taskId = :taskId")
void updateTotalRecords(@Param("sourceName") String sourceName,
@Param("taskId") String taskId,
@Param("totalRecords") Integer totalRecords,
@Param("updateTime") LocalDateTime updateTime);
@Modifying
@Transactional
@Query("UPDATE CrawlProgress cp SET cp.status = :status, cp.updateTime = :updateTime WHERE cp.sourceName = :sourceName AND cp.taskId = :taskId")
void updateStatus(@Param("sourceName") String sourceName,
@Param("taskId") String taskId,
@Param("status") String status,
@Param("updateTime") LocalDateTime updateTime);
}
\ No newline at end of file
......@@ -21,4 +21,9 @@ public interface CrawlSourceRepository extends JpaRepository<CrawlSource, Long>
* 根据来源类型查询
*/
List<CrawlSource> findBySourceType(String sourceType);
/**
* 查询所有数据源(按排序)
*/
List<CrawlSource> findAllByOrderBySortOrder();
}
......@@ -25,6 +25,11 @@ public interface CrawlTaskRepository extends JpaRepository<CrawlTask, Long> {
*/
Optional<CrawlTask> findTopBySourceNameOrderByLastCrawlTimeDesc(String sourceName);
/**
* 根据来源名称查询所有任务(按时间倒序)
*/
List<CrawlTask> findBySourceNameOrderByLastCrawlTimeDesc(String sourceName);
/**
* 分页查询任务历史
*/
......@@ -34,4 +39,9 @@ public interface CrawlTaskRepository extends JpaRepository<CrawlTask, Long> {
* 查询正在运行的任务
*/
List<CrawlTask> findByStatusIn(List<String> statuses);
/**
* 删除所有任务记录
*/
void deleteAll();
}
......@@ -4,6 +4,7 @@ import com.fintech.penalty.config.CrawlerConfig;
import com.fintech.penalty.crawler.PenaltyCrawler;
import com.fintech.penalty.dto.CrawlResult;
import com.fintech.penalty.entity.CrawlSource;
import com.fintech.penalty.service.CrawlTaskService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
......@@ -23,6 +24,7 @@ public class CrawlScheduler {
private final PenaltyCrawler penaltyCrawler;
private final CrawlerConfig crawlerConfig;
private final CrawlTaskService crawlTaskService;
private volatile boolean isRunning = false;
......@@ -69,13 +71,11 @@ public class CrawlScheduler {
log.info("============ 开始爬取单个数据源: {} ============", source.getSourceName());
CrawlerConfig.SourceConfig config = new CrawlerConfig.SourceConfig();
config.setName(source.getSourceName());
config.setBaseUrl(source.getSourceUrl());
config.setEnabled(true);
CrawlerConfig.SourceConfig config = toSourceConfig(source);
try {
isRunning = true;
crawlTaskService.createTask(source.getSourceName(), config.getListUrl());
penaltyCrawler.crawl(config).get();
log.info("============ 单个数据源爬取完成: {} ============", source.getSourceName());
} catch (Exception e) {
......@@ -85,6 +85,19 @@ public class CrawlScheduler {
}
}
/**
* 将数据库实体转换为爬虫配置
*/
private CrawlerConfig.SourceConfig toSourceConfig(CrawlSource source) {
CrawlerConfig.SourceConfig config = new CrawlerConfig.SourceConfig();
config.setName(source.getSourceName());
config.setCode(source.getSourceType());
config.setBaseUrl(source.getSourceUrl());
config.setListUrl(source.getSourceUrl() + (source.getListUrlPattern() != null ? source.getListUrlPattern() : ""));
config.setEnabled(source.getIsEnabled());
return config;
}
/**
* 执行爬取
*/
......@@ -93,12 +106,16 @@ public class CrawlScheduler {
List<CrawlResult> results = new ArrayList<>();
try {
List<CrawlerConfig.SourceConfig> sources = crawlerConfig.getSources();
if (sources == null || sources.isEmpty()) {
log.warn("没有配置爬取数据源");
List<CrawlSource> dbSources = crawlTaskService.getEnabledSources();
if (dbSources == null || dbSources.isEmpty()) {
log.warn("数据库中没有配置爬取数据源");
return results;
}
List<CrawlerConfig.SourceConfig> sources = dbSources.stream()
.map(this::toSourceConfig)
.toList();
// 并行爬取各数据源
List<CompletableFuture<CrawlResult>> futures = sources.stream()
.filter(CrawlerConfig.SourceConfig::isEnabled)
......
......@@ -11,6 +11,7 @@ import com.fintech.penalty.repository.CrawlSourceRepository;
import com.fintech.penalty.repository.CrawlTaskRepository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.http.HttpEntity;
......@@ -43,6 +44,15 @@ public class CrawlTaskService {
private final CrawlTaskRepository crawlTaskRepository;
private final CrawlSourceRepository crawlSourceRepository;
@Value("${deepseek.api.url:https://api.deepseek.com}")
private String deepseekApiUrl;
@Value("${deepseek.api.key:}")
private String deepseekApiKey;
@Value("${deepseek.model:deepseek-chat}")
private String deepseekModel;
/**
* 获取爬取任务列表
*/
......@@ -60,6 +70,13 @@ public class CrawlTaskService {
.build();
}
/**
* 清除所有历史记录
*/
public void clearHistory() {
crawlTaskRepository.deleteAll();
}
/**
* 根据ID查询数据源
*/
......@@ -83,6 +100,17 @@ public class CrawlTaskService {
return crawlSourceRepository.findByIsEnabledTrueOrderBySortOrder();
}
/**
* 获取所有数据源
*/
public List<CrawlSource> getAllSources() {
return crawlSourceRepository.findAllByOrderBySortOrder();
}
public List<CrawlSource> getAllSources() {
return crawlSourceRepository.findAll();
}
/**
* 获取最新任务状态
*/
......@@ -90,8 +118,9 @@ public class CrawlTaskService {
List<CrawlSource> sources = getEnabledSources();
return sources.stream()
.map(source -> crawlTaskRepository.findTopBySourceNameOrderByLastCrawlTimeDesc(source.getSourceName())
.map(this::toDTO)
.map(task -> toDTO(task, source.getId()))
.orElse(CrawlTaskDTO.builder()
.sourceId(source.getId())
.sourceName(source.getSourceName())
.status("pending")
.build()))
......@@ -132,19 +161,29 @@ public class CrawlTaskService {
*/
@Transactional
public void saveCrawlResult(CrawlResult result) {
List<CrawlTask> existingTasks = crawlTaskRepository.findBySourceNameOrderByLastCrawlTimeDesc(result.getSourceName());
Optional<CrawlTask> taskOpt = crawlTaskRepository.findTopBySourceNameOrderByLastCrawlTimeDesc(result.getSourceName())
.stream()
.findFirst();
taskOpt.ifPresent(task -> {
if (!existingTasks.isEmpty()) {
CrawlTask task = existingTasks.get(0);
task.setStatus(result.isSuccess() ? "success" : "failed");
task.setLastCrawlTime(LocalDateTime.now());
task.setRecordsFound(result.getTotalFound());
task.setRecordsNew(result.getNewRecords());
task.setErrorMessage(result.getErrorMessage());
crawlTaskRepository.save(task);
});
} else {
CrawlTask task = CrawlTask.builder()
.taskName(result.getSourceName() + " 爬取任务")
.sourceName(result.getSourceName())
.sourceUrl("")
.status(result.isSuccess() ? "success" : "failed")
.lastCrawlTime(LocalDateTime.now())
.recordsFound(result.getTotalFound())
.recordsNew(result.getNewRecords())
.errorMessage(result.getErrorMessage())
.build();
crawlTaskRepository.save(task);
}
}
/**
......@@ -160,6 +199,11 @@ public class CrawlTaskService {
public List<Map<String, String>> searchSourcesByKeyword(String keyword) {
List<Map<String, String>> results = new ArrayList<>();
if (deepseekApiKey == null || deepseekApiKey.isEmpty()) {
log.warn("DeepSeek API 密钥未配置,使用默认数据源");
return getDefaultSources();
}
String searchPrompt = String.format("请搜索关于%s的金融监管处罚信息数据源URL,只返回URL和简要描述,不要其他内容。返回格式:名称|URL|描述,每行一个", keyword);
try {
......@@ -169,22 +213,20 @@ public class CrawlTaskService {
message.put("content", searchPrompt);
Map<String, Object> requestBody = new HashMap<>();
requestBody.put("model", "deepseek-chat");
requestBody.put("model", deepseekModel);
requestBody.put("messages", new Object[]{message});
requestBody.put("temperature", 0.7);
requestBody.put("max_tokens", 1000);
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
String apiKey = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
headers.setBearerAuth(apiKey);
headers.setBearerAuth(deepseekApiKey);
HttpEntity<Map<String, Object>> request = new HttpEntity<>(requestBody, headers);
RestTemplate restTemplate = new RestTemplate();
ResponseEntity<String> response = restTemplate.exchange(
"https://api.deepseek.com/v1/chat/completions",
deepseekApiUrl + "/v1/chat/completions",
HttpMethod.POST,
request,
String.class
......@@ -216,14 +258,23 @@ public class CrawlTaskService {
}
if (results.isEmpty()) {
results.add(Map.of("name", "中国人民银行官网", "url", "http://www.pbc.gov.cn", "description", "中国人民银行官方网站"));
results.add(Map.of("name", "银保监会官网", "url", "http://www.cbirc.gov.cn", "description", "中国银行保险监督管理委员会"));
results.add(Map.of("name", "证监会官网", "url", "http://www.csrc.gov.cn", "description", "中国证券监督管理委员会"));
results = getDefaultSources();
}
return results;
}
/**
* 获取默认数据源列表
*/
private List<Map<String, String>> getDefaultSources() {
List<Map<String, String>> defaults = new ArrayList<>();
defaults.add(Map.of("name", "中国人民银行官网", "url", "http://www.pbc.gov.cn", "description", "中国人民银行官方网站"));
defaults.add(Map.of("name", "银保监会官网", "url", "http://www.cbirc.gov.cn", "description", "中国银行保险监督管理委员会"));
defaults.add(Map.of("name", "证监会官网", "url", "http://www.csrc.gov.cn", "description", "中国证券监督管理委员会"));
return defaults;
}
/**
* 创建数据源
*/
......@@ -252,6 +303,27 @@ public class CrawlTaskService {
if (source.getSourceType() != null) {
existing.setSourceType(source.getSourceType());
}
if (source.getListUrlPattern() != null) {
existing.setListUrlPattern(source.getListUrlPattern());
}
if (source.getDetailUrlPattern() != null) {
existing.setDetailUrlPattern(source.getDetailUrlPattern());
}
if (source.getListSelector() != null) {
existing.setListSelector(source.getListSelector());
}
if (source.getTitleSelector() != null) {
existing.setTitleSelector(source.getTitleSelector());
}
if (source.getDateSelector() != null) {
existing.setDateSelector(source.getDateSelector());
}
if (source.getContentSelector() != null) {
existing.setContentSelector(source.getContentSelector());
}
if (source.getSortOrder() != null) {
existing.setSortOrder(source.getSortOrder());
}
if (source.getIsEnabled() != null) {
existing.setIsEnabled(source.getIsEnabled());
}
......@@ -271,8 +343,13 @@ public class CrawlTaskService {
* 转换为DTO
*/
private CrawlTaskDTO toDTO(CrawlTask entity) {
return toDTO(entity, null);
}
private CrawlTaskDTO toDTO(CrawlTask entity, Long sourceId) {
return CrawlTaskDTO.builder()
.id(entity.getId())
.sourceId(sourceId)
.taskName(entity.getTaskName())
.sourceName(entity.getSourceName())
.sourceUrl(entity.getSourceUrl())
......
......@@ -97,6 +97,12 @@ public class PenaltyRecordService {
cb.like(root.get("penaltyBasis"), keyword)
));
}
if (criteria.getMinAmount() != null) {
predicates.add(cb.ge(root.get("penaltyAmount"), criteria.getMinAmount()));
}
if (criteria.getMaxAmount() != null) {
predicates.add(cb.le(root.get("penaltyAmount"), criteria.getMaxAmount()));
}
query.orderBy(cb.desc(root.get("penaltyDate")));
return cb.and(predicates.toArray(new Predicate[0]));
......@@ -293,4 +299,86 @@ public class PenaltyRecordService {
}
return record.getAnalysisReport();
}
/**
* AI自然语言搜索 - 将自然语言转换为搜索条件
*/
public PageResponse<PenaltyRecordDTO> aiSearch(String query, int page, int size) {
SearchCriteria criteria = parseNaturalLanguage(query);
log.info("AI搜索解析结果: {}", criteria);
return search(criteria, page, size);
}
/**
* 解析自然语言搜索条件
*/
private SearchCriteria parseNaturalLanguage(String query) {
SearchCriteria criteria = new SearchCriteria();
String lowerQuery = query.toLowerCase();
if (lowerQuery.contains("银行")) {
criteria.setInstitutionType("银行");
} else if (lowerQuery.contains("保险")) {
criteria.setInstitutionType("保险");
} else if (lowerQuery.contains("证券")) {
criteria.setInstitutionType("证券");
} else if (lowerQuery.contains("基金")) {
criteria.setInstitutionType("基金");
} else if (lowerQuery.contains("期货")) {
criteria.setInstitutionType("期货");
}
if (lowerQuery.contains("罚款")) {
criteria.setPenaltyType("罚款");
} else if (lowerQuery.contains("警告")) {
criteria.setPenaltyType("警告");
} else if (lowerQuery.contains("没收")) {
criteria.setPenaltyType("没收违法所得");
} else if (lowerQuery.contains("停业")) {
criteria.setPenaltyType("停业");
} else if (lowerQuery.contains("吊销")) {
criteria.setPenaltyType("吊销许可证");
} else if (lowerQuery.contains("市场禁入")) {
criteria.setPenaltyType("市场禁入");
}
if (lowerQuery.contains("人行") || lowerQuery.contains("人民银行")) {
criteria.setRegulator("中国人民银行");
} else if (lowerQuery.contains("银保监会") || lowerQuery.contains("银保监")) {
criteria.setRegulator("中国银保监会");
} else if (lowerQuery.contains("证监会") || lowerQuery.contains("证券会")) {
criteria.setRegulator("中国证监会");
} else if (lowerQuery.contains("外汇")) {
criteria.setRegulator("国家外汇管理局");
}
if (lowerQuery.contains("2024")) {
criteria.setStartDate(LocalDate.of(2024, 1, 1));
criteria.setEndDate(LocalDate.of(2024, 12, 31));
} else if (lowerQuery.contains("2025")) {
criteria.setStartDate(LocalDate.of(2025, 1, 1));
criteria.setEndDate(LocalDate.of(2025, 12, 31));
} else if (lowerQuery.contains("2023")) {
criteria.setStartDate(LocalDate.of(2023, 1, 1));
criteria.setEndDate(LocalDate.of(2023, 12, 31));
}
java.util.regex.Pattern amountPattern = java.util.regex.Pattern.compile("(\\d+)\\s*[亿万]");
java.util.regex.Matcher matcher = amountPattern.matcher(query);
if (matcher.find()) {
long amount = Long.parseLong(matcher.group(1));
if (query.contains("亿")) {
criteria.setMinAmount(BigDecimal.valueOf(amount * 100000000));
} else if (query.contains("万")) {
criteria.setMinAmount(BigDecimal.valueOf(amount * 10000));
}
}
if (criteria.getInstitutionType() == null && criteria.getPenaltyType() == null
&& criteria.getRegulator() == null && criteria.getStartDate() == null) {
criteria.setKeyword(query);
}
return criteria;
}
}
......@@ -37,34 +37,16 @@ spring:
serialization:
write-dates-as-timestamps: false
# 爬虫配置
# 爬虫配置(数据源从数据库 crawl_sources 表读取)
crawler:
enabled: true
cron: "0 0 2 * * ?"
interval: 2000
max-retries: 3
timeout: 3000000
sources:
- name: 中国人民银行
code: pbc
base-url: https://www.pbc.gov.cn
list-url: https://www.pbc.gov.cn/zhenghuihuizhan/135153/index.html
enabled: true
- name: 中国银保监会
code: cbirc
base-url: https://www.cbirc.gov.cn
list-url: https://www.cbirc.gov.cn/cn/view/pages/Column.html?colIdId=11283&channelId=11283
enabled: true
- name: 中国证监会
code: csrc
base-url: https://www.csrc.gov.cn
list-url: https://www.csrc.gov.cn/csrc/c100103/index.html
enabled: true
- name: 国家外汇管理局
code: safe
base-url: https://www.safe.gov.cn
list-url: https://www.safe.gov.cn/safe/yjcf/index.html
enabled: true
resume:
enabled: true # 是否启用断点续传
force-restart: false # 是否强制重新开始(设为true会忽略之前的进度)
deepseek:
api:
......@@ -86,3 +68,8 @@ logging:
org.hibernate.SQL: DEBUG
pattern:
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n"
file: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n"
file:
name: logs/penalty-system.log # 日志文件路径和名称
max-size: 10MB # 单个文件最大大小
max-history: 30 # 保留30天的历史日志
\ No newline at end of file
......@@ -37,34 +37,16 @@ spring:
serialization:
write-dates-as-timestamps: false
# 爬虫配置
# 爬虫配置(数据源从数据库 crawl_sources 表读取)
crawler:
enabled: true
cron: "0 0 2 * * ?"
interval: 2000
max-retries: 3
timeout: 3000000
sources:
- name: 中国人民银行
code: pbc
base-url: https://www.pbc.gov.cn
list-url: https://www.pbc.gov.cn/zhenghuihuizhan/135153/index.html
enabled: true
- name: 中国银保监会
code: cbirc
base-url: https://www.cbirc.gov.cn
list-url: https://www.cbirc.gov.cn/cn/view/pages/Column.html?colIdId=11283&channelId=11283
enabled: true
- name: 中国证监会
code: csrc
base-url: https://www.csrc.gov.cn
list-url: https://www.csrc.gov.cn/csrc/c100103/index.html
enabled: true
- name: 国家外汇管理局
code: safe
base-url: https://www.safe.gov.cn
list-url: https://www.safe.gov.cn/safe/yjcf/index.html
enabled: true
resume:
enabled: true # 是否启用断点续传
force-restart: false # 是否强制重新开始(设为true会忽略之前的进度)
deepseek:
api:
......@@ -86,3 +68,8 @@ logging:
org.hibernate.SQL: DEBUG
pattern:
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n"
file: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n"
file:
name: logs/penalty-system.log # 日志文件路径和名称
max-size: 10MB # 单个文件最大大小
max-history: 30 # 保留30天的历史日志
\ No newline at end of file
......@@ -20,6 +20,7 @@ D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\entity\CrawlTask.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\repository\PenaltyRecordRepository.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\entity\SystemConfig.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\AISearchRequest.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\LoginResponse.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\controller\AnalysisKeywordController.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\crawler\PenaltyCrawler.java
......
.login-container[data-v-63b59978]{min-height:100vh;display:flex;flex-direction:column;justify-content:center;align-items:center;background:linear-gradient(135deg,#667eea,#764ba2)}.login-box[data-v-63b59978]{width:420px;padding:40px;background:#fff;border-radius:12px;box-shadow:0 20px 60px #0000004d}.login-header[data-v-63b59978]{text-align:center;margin-bottom:30px}.login-header h1[data-v-63b59978]{margin:16px 0 8px;font-size:24px;color:#333}.login-header p[data-v-63b59978]{margin:0;font-size:14px;color:#999}.login-form .login-button[data-v-63b59978]{width:100%}.login-footer[data-v-63b59978]{margin-top:24px;text-align:center}.login-footer p[data-v-63b59978]{color:#ffffffb3;font-size:14px}
......@@ -5,7 +5,7 @@
<link rel="icon" href="/favicon.ico">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>金融监管处罚监控系统</title>
<script type="module" crossorigin src="/assets/index-C_I9toNW.js"></script>
<script type="module" crossorigin src="/assets/index-BM812JBU.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-D_lRsYFa.css">
</head>
<body>
......
......@@ -4,8 +4,16 @@ import router from '../router'
const isDev = import.meta.env.DEV
let baseURL = '/api'
if (!isDev) {
const apiBase = import.meta.env.VITE_API_BASE_URL
if (apiBase) {
baseURL = apiBase.endsWith('/api') ? apiBase : apiBase + '/api'
}
}
const request = axios.create({
baseURL: isDev ? '/api' : (import.meta.env.VITE_API_BASE_URL || '/api'),
baseURL,
timeout: 300000
})
......@@ -153,6 +161,7 @@ export default {
triggerCrawl: () => request.post('/crawl/trigger'),
getCrawlStatus: () => request.get('/crawl/status'),
getCrawlHistory: (params) => request.get('/crawl/history', { params }),
clearCrawlHistory: () => request.delete('/crawl/history'),
getCrawlSources: () => request.get('/crawl/sources'),
createCrawlSource: (data) => request.post('/crawl/sources', data),
updateCrawlSource: (id, data) => request.put(`/crawl/sources/${id}`, data),
......@@ -164,5 +173,8 @@ export default {
health: () => request.get('/health'),
// 根据角色获取菜单
getMenusByRole: (roleCode) => request.get(`/menus/role/${roleCode || 'ADMIN'}`)
getMenusByRole: (roleCode) => request.get(`/menus/role/${roleCode || 'ADMIN'}`),
// AI搜索
aiSearch: (text) => request.post('/penalties/ai-search', { query: text })
}
......@@ -13,11 +13,16 @@
</div>
</div>
</template>
<el-table :data="sources" stripe>
<el-table-column prop="sourceName" label="数据源名称" width="180" />
<el-table-column prop="sourceUrl" label="URL" min-width="300" show-overflow-tooltip />
<el-table-column prop="sourceType" label="类型" width="120" />
<el-table-column prop="isEnabled" label="状态" width="120">
<el-table :data="sources" stripe :resizable="true" border="true">
<el-table-column prop="sourceName" label="数据源名称" width="250" />
<el-table-column prop="sourceUrl" label="基础URL" min-width="250" show-overflow-tooltip>
<template #default="{ row }">
<a :href="row.sourceUrl" target="_blank">{{ row.sourceUrl }}</a>
</template>
</el-table-column>
<el-table-column prop="sourceType" label="类型代码" width="100" />
<el-table-column prop="listUrlPattern" label="列表URL模板" min-width="400" show-overflow-tooltip />
<el-table-column prop="isEnabled" label="状态" width="100">
<template #default="{ row }">
<div class="status-cell">
<el-switch v-model="row.isEnabled" @change="handleToggleStatus(row)" />
......@@ -52,10 +57,10 @@
{{ getTaskStatus(row.sourceName)?.recordsNew || 0 }}
</template>
</el-table-column>
<el-table-column label="操作" width="300" fixed="right">
<el-table-column label="操作" width="200" fixed="right">
<template #default="{ row }" align="center">
<el-button type="primary" link @click="handleEditSource(row)">编辑</el-button>
<el-button type="success" link @click="handleCrawlSource(row)" :loading="crawlingSource === row.id">爬取</el-button>
<el-button type="success" link @click="openCrawlDrawer(row)" :loading="crawlingSource === row.id">爬取</el-button>
<el-button type="danger" link @click="handleDeleteSource(row)">删除</el-button>
</template>
</el-table-column>
......@@ -67,14 +72,17 @@
<template #header>
<div class="card-header">
<span>爬取历史</span>
<div>
<el-button type="danger" link @click="handleClearHistory">清除</el-button>
<el-button link @click="fetchHistory">
<el-icon><Refresh /></el-icon>
</el-button>
</div>
</div>
</template>
<el-table :data="historyData" v-loading="historyLoading" stripe>
<el-table-column prop="taskName" label="任务名称" width="200" />
<el-table-column prop="sourceName" label="数据源" width="150" />
<el-table :data="historyData" v-loading="historyLoading" border="true">
<el-table-column prop="taskName" label="任务名称" width="300" />
<el-table-column prop="sourceName" label="数据源" width="300" />
<el-table-column prop="status" label="状态" width="100">
<template #default="{ row }">
<el-tag :type="getStatusType(row.status)" size="small">
......@@ -122,62 +130,86 @@
</template>
</el-dialog>
<!-- 添加数据源抽屉 -->
<el-drawer v-model="drawerVisible" title="添加数据源" size="50%">
<!-- 添加/编辑数据源抽屉 -->
<el-drawer v-model="drawerVisible" :title="isEditMode ? '编辑数据源' : '添加数据源'" size="60%">
<div class="add-source-drawer">
<el-alert title="通过AI搜索金融监管处罚信息数据源,或手动添加URL" type="info" :closable="false" show-icon style="margin-bottom: 16px"/>
<el-tabs v-model="activeTab">
<el-tab-pane label="AI搜索" name="ai">
<div class="ai-search">
<el-input v-model="aiKeyword" placeholder="输入关键词搜索数据源,如:金融监管处罚、证监会处罚" @keyup.enter="handleAiSearch">
<template #append>
<el-button @click="handleAiSearch" :loading="aiLoading">搜索</el-button>
</template>
</el-input>
<div class="ai-results" v-if="aiResults.length > 0">
<div class="results-header">搜索结果:</div>
<el-card v-for="(item, index) in aiResults" :key="index" class="result-card">
<div class="result-name">{{ item.name }}</div>
<div class="result-url">{{ item.url }}</div>
<div class="result-actions">
<el-button type="primary" size="small" @click="addAiSource(item)">添加</el-button>
</div>
</el-card>
</div>
<el-empty v-else-if="aiSearched" description="未找到相关数据源" :image-size="60"/>
</div>
</el-tab-pane>
<el-tab-pane label="手动添加" name="manual">
<el-form ref="sourceFormRef" :model="sourceForm" :rules="sourceRules" label-width="100px">
<el-form ref="sourceFormRef" :model="sourceForm" :rules="sourceRules" label-width="120px">
<el-form-item label="数据源名称" prop="sourceName">
<el-input v-model="sourceForm.sourceName" placeholder="请输入数据源名称" />
</el-form-item>
<el-form-item label="URL" prop="sourceUrl">
<el-input v-model="sourceForm.sourceUrl" placeholder="请输入URL" />
<el-form-item label="基础URL" prop="sourceUrl">
<el-input v-model="sourceForm.sourceUrl" placeholder="https://www.example.com" />
</el-form-item>
<el-form-item label="来源类型" prop="sourceType">
<el-input v-model="sourceForm.sourceType" placeholder="如: pbc, cbirc, csrc, nfra" />
</el-form-item>
<el-form-item label="列表页URL模板">
<el-input v-model="sourceForm.listUrlPattern" placeholder="/path/list*.html" />
</el-form-item>
<el-form-item label="类型" prop="sourceType">
<el-select v-model="sourceForm.sourceType" placeholder="请选择类型" style="width: 100%">
<el-option label="官方网站" value="官网" />
<el-option label="新闻资讯" value="资讯" />
<el-option label="行业协会" value="协会" />
<el-option label="其他" value="其他" />
</el-select>
<el-form-item label="详情页URL模板">
<el-input v-model="sourceForm.detailUrlPattern" placeholder="/path/detail*.html" />
</el-form-item>
<el-form-item label="描述">
<el-input v-model="sourceForm.description" type="textarea" :rows="3" placeholder="请输入描述" />
<el-form-item label="列表选择器">
<el-input v-model="sourceForm.listSelector" placeholder="ul li, div.item" />
</el-form-item>
<el-form-item label="标题选择器">
<el-input v-model="sourceForm.titleSelector" placeholder="a[href]" />
</el-form-item>
<el-form-item label="日期选择器">
<el-input v-model="sourceForm.dateSelector" placeholder="日期选择器" />
</el-form-item>
<el-form-item label="内容选择器">
<el-input v-model="sourceForm.contentSelector" placeholder="内容选择器" />
</el-form-item>
<el-form-item label="排序">
<el-input-number v-model="sourceForm.sortOrder" :min="0" :max="999" />
</el-form-item>
<el-form-item label="启用">
<el-switch v-model="sourceForm.isEnabled" />
</el-form-item>
</el-form>
<div class="form-actions">
<el-button type="primary" @click="handleAddSource" :loading="addLoading">保存</el-button>
<el-button @click="drawerVisible = false">取消</el-button>
<el-button type="primary" @click="handleAddSource" :loading="addLoading">{{ isEditMode ? '保存修改' : '添加' }}</el-button>
</div>
</div>
</el-drawer>
<!-- 爬取过程抽屉 -->
<el-drawer v-model="crawlDrawerVisible" title="爬取过程" size="30%" :close-on-click-modal="false">
<div class="crawl-progress">
<div class="progress-header">
<el-tag :type="crawlProgress.status === 'success' ? 'success' : crawlProgress.status === 'failed' ? 'danger' : 'warning'">
{{ crawlProgress.statusText }}
</el-tag>
<span class="source-name">{{ crawlProgress.sourceName }}</span>
</div>
<el-timeline class="progress-timeline">
<el-timeline-item
v-for="(log, index) in crawlProgress.logs"
:key="index"
:timestamp="log.time"
:type="log.type === 'error' ? 'danger' : log.type === 'success' ? 'success' : 'primary'"
>
{{ log.message }}
</el-timeline-item>
</el-timeline>
<div class="progress-stats" v-if="crawlProgress.totalFound > 0">
<el-statistic title="发现记录" :value="crawlProgress.totalFound" />
<el-statistic title="新增记录" :value="crawlProgress.newRecords" />
<el-statistic title="跳过记录" :value="crawlProgress.skipped" />
</div>
<div class="progress-actions">
<el-button type="primary" @click="closeCrawlDrawer" v-if="crawlProgress.status !== 'running'">
关闭
</el-button>
<el-button @click="cancelCrawl" v-if="crawlProgress.status === 'running'">
取消
</el-button>
</div>
</el-tab-pane>
</el-tabs>
</div>
</el-drawer>
</div>
......@@ -198,24 +230,26 @@ const resultVisible = ref(false)
const crawlResult = ref(null)
const drawerVisible = ref(false)
const activeTab = ref('ai')
const aiKeyword = ref('')
const aiLoading = ref(false)
const aiSearched = ref(false)
const aiResults = ref([])
const isEditMode = ref(false)
const addLoading = ref(false)
const sourceFormRef = ref(null)
const sourceForm = reactive({
sourceName: '',
sourceUrl: '',
sourceType: '',
description: '',
listUrlPattern: '',
detailUrlPattern: '',
listSelector: '',
titleSelector: '',
dateSelector: '',
contentSelector: '',
sortOrder: 0,
isEnabled: true
})
const sourceRules = {
sourceName: [{ required: true, message: '请输入数据源名称', trigger: 'blur' }],
sourceUrl: [{ required: true, message: '请输入URL', trigger: 'blur' }],
sourceType: [{ required: true, message: '请选择类型', trigger: 'change' }]
sourceUrl: [{ required: true, message: '请输入基础URL', trigger: 'blur' }],
sourceType: [{ required: true, message: '请输入来源类型', trigger: 'blur' }]
}
const historyPage = ref(1)
......@@ -224,6 +258,19 @@ const historyTotal = ref(0)
const taskStatuses = ref({})
const crawlDrawerVisible = ref(false)
const crawlProgress = reactive({
sourceId: null,
sourceName: '',
status: 'pending',
statusText: '等待开始',
logs: [],
totalFound: 0,
newRecords: 0,
skipped: 0,
pollingInterval: null
})
const getStatusType = (status) => {
const map = { success: 'success', failed: 'danger', running: 'warning', pending: 'info' }
return map[status] || 'info'
......@@ -284,6 +331,19 @@ const fetchHistory = async () => {
}
}
const handleClearHistory = async () => {
try {
await ElMessageBox.confirm('确定要清除所有爬取历史记录吗?', '警告', { type: 'warning' })
await api.clearCrawlHistory()
ElMessage.success('清除成功')
fetchHistory()
} catch (error) {
if (error !== 'cancel') {
ElMessage.error('清除失败')
}
}
}
const handleTrigger = async () => {
loading.value = true
try {
......@@ -302,38 +362,21 @@ const handleTrigger = async () => {
}
const openAddSource = () => {
activeTab.value = 'ai'
aiKeyword.value = ''
aiResults.value = []
aiSearched.value = false
Object.assign(sourceForm, { sourceName: '', sourceUrl: '', sourceType: '', description: '', isEnabled: true })
drawerVisible.value = true
}
const handleAiSearch = async () => {
const keyword = aiKeyword.value.trim() || '金融监管机构发布监管处罚的网站'
aiLoading.value = true
aiSearched.value = true
try {
const res = await api.searchSources(keyword)
aiResults.value = res.data.data || []
} catch (error) {
console.error('AI搜索失败:', error)
ElMessage.error('搜索失败,请重试')
} finally {
aiLoading.value = false
}
}
const addAiSource = (item) => {
isEditMode.value = false
Object.assign(sourceForm, {
sourceName: item.name,
sourceUrl: item.url,
sourceType: '官网',
description: item.description || '',
sourceName: '',
sourceUrl: '',
sourceType: '',
listUrlPattern: '',
detailUrlPattern: '',
listSelector: '',
titleSelector: '',
dateSelector: '',
contentSelector: '',
sortOrder: 0,
isEnabled: true
})
activeTab.value = 'manual'
drawerVisible.value = true
}
const handleAddSource = async () => {
......@@ -342,12 +385,17 @@ const handleAddSource = async () => {
if (!valid) return
addLoading.value = true
try {
if (isEditMode.value && sourceForm.id) {
await api.updateCrawlSource(sourceForm.id, sourceForm)
ElMessage.success('修改成功')
} else {
await api.createCrawlSource(sourceForm)
ElMessage.success('添加成功')
}
drawerVisible.value = false
fetchSources()
} catch (error) {
ElMessage.error('添加失败')
ElMessage.error('操作失败')
} finally {
addLoading.value = false
}
......@@ -366,31 +414,115 @@ const handleToggleStatus = async (row) => {
}
const handleEditSource = (row) => {
isEditMode.value = true
Object.assign(sourceForm, {
id: row.id,
sourceName: row.sourceName,
sourceUrl: row.sourceUrl,
sourceType: row.sourceType,
description: row.description || '',
listUrlPattern: row.listUrlPattern || '',
detailUrlPattern: row.detailUrlPattern || '',
listSelector: row.listSelector || '',
titleSelector: row.titleSelector || '',
dateSelector: row.dateSelector || '',
contentSelector: row.contentSelector || '',
sortOrder: row.sortOrder || 0,
isEnabled: row.isEnabled
})
activeTab.value = 'manual'
drawerVisible.value = true
}
const handleCrawlSource = async (row) => {
const openCrawlDrawer = async (row) => {
crawlProgress.sourceId = row.id
crawlProgress.sourceName = row.sourceName
crawlProgress.status = 'running'
crawlProgress.statusText = '正在爬取...'
crawlProgress.logs = [{ time: new Date().toLocaleTimeString(), message: `开始爬取数据源: ${row.sourceName}`, type: 'primary' }]
crawlProgress.totalFound = 0
crawlProgress.newRecords = 0
crawlProgress.skipped = 0
crawlDrawerVisible.value = true
crawlingSource.value = row.id
try {
await api.crawlSingleSource(row.id)
ElMessage.success('爬取任务已触发')
fetchStatus()
crawlProgress.logs.push({ time: new Date().toLocaleTimeString(), message: '爬取任务已启动', type: 'primary' })
startPolling(row.id)
} catch (error) {
ElMessage.error('爬取失败')
} finally {
crawlProgress.status = 'failed'
crawlProgress.statusText = '爬取失败'
crawlProgress.logs.push({ time: new Date().toLocaleTimeString(), message: error.message || '启动爬取失败', type: 'error' })
crawlingSource.value = null
}
}
const startPolling = (sourceId) => {
const sourceName = crawlProgress.sourceName
if (crawlProgress.pollingInterval) {
clearInterval(crawlProgress.pollingInterval)
}
let pollCount = 0
const maxPolls = 150
crawlProgress.pollingInterval = setInterval(async () => {
pollCount++
if (pollCount > maxPolls) {
clearInterval(crawlProgress.pollingInterval)
crawlProgress.status = 'failed'
crawlProgress.statusText = '爬取超时'
crawlProgress.logs.push({ time: new Date().toLocaleTimeString(), message: '爬取超时,已自动结束', type: 'error' })
crawlingSource.value = null
return
}
try {
const res = await api.getCrawlStatus()
const tasks = res.data.tasks || []
const task = tasks.find(t => t.sourceName === sourceName)
if (task) {
if (task.status === 'running') {
if (task.recordsFound && task.recordsFound > 0) {
crawlProgress.logs.push({ time: new Date().toLocaleTimeString(), message: task.message || `已发现 ${task.recordsFound} 条记录...`, type: 'primary' })
crawlProgress.totalFound = task.recordsFound
} else {
crawlProgress.logs.push({ time: new Date().toLocaleTimeString(), message: task.message || '正在爬取...', type: 'primary' })
}
} else if (task.status === 'success') {
crawlProgress.status = 'success'
crawlProgress.statusText = '爬取完成'
crawlProgress.totalFound = task.recordsFound || 0
crawlProgress.newRecords = task.recordsNew || 0
crawlProgress.skipped = (task.recordsFound || 0) - (task.recordsNew || 0)
crawlProgress.logs.push({ time: new Date().toLocaleTimeString(), message: `爬取完成,发现${task.recordsFound}条记录,新增${task.recordsNew}条`, type: 'success' })
clearInterval(crawlProgress.pollingInterval)
crawlingSource.value = null
fetchSources()
fetchHistory()
} else if (task.status === 'failed') {
crawlProgress.status = 'failed'
crawlProgress.statusText = '爬取失败'
crawlProgress.logs.push({ time: new Date().toLocaleTimeString(), message: task.errorMessage || '爬取失败', type: 'error' })
clearInterval(crawlProgress.pollingInterval)
crawlingSource.value = null
}
}
} catch (error) {
console.error('轮询状态失败:', error)
}
}, 2000)
}
const closeCrawlDrawer = () => {
if (crawlProgress.pollingInterval) {
clearInterval(crawlProgress.pollingInterval)
crawlProgress.pollingInterval = null
}
crawlDrawerVisible.value = false
}
const cancelCrawl = () => {
closeCrawlDrawer()
ElMessage.info('爬取已取消')
}
const handleDeleteSource = async (row) => {
try {
await ElMessageBox.confirm(`确定要删除数据源 ${row.sourceName} 吗?`, '警告', { type: 'warning' })
......@@ -436,4 +568,47 @@ onMounted(() => {
display: flex;
justify-content: flex-end;
}
.crawl-progress {
height: 100%;
display: flex;
flex-direction: column;
.progress-header {
display: flex;
align-items: center;
gap: 12px;
margin-bottom: 20px;
padding-bottom: 16px;
border-bottom: 1px solid #eee;
.source-name {
font-size: 16px;
font-weight: 500;
}
}
.progress-timeline {
flex: 1;
overflow-y: auto;
max-height: 400px;
padding: 0 8px;
}
.progress-stats {
display: flex;
justify-content: space-around;
margin: 20px 0;
padding: 16px;
background: #f5f7fa;
border-radius: 8px;
}
.progress-actions {
display: flex;
justify-content: flex-end;
padding-top: 16px;
border-top: 1px solid #eee;
}
}
</style>
......@@ -49,7 +49,7 @@
</div>
<div class="login-footer">
<p>默认账号: admin / admin123</p>
<p>默认账号: admin / 1qaz@WSX</p>
</div>
</div>
</template>
......@@ -67,8 +67,8 @@ const formRef = ref(null)
const loading = ref(false)
const form = reactive({
username: '',
password: ''
username: 'admin',
password: '1qaz@WSX'
})
const rules = {
......
......@@ -34,8 +34,25 @@
<el-form-item>
<el-button type="primary" @click="handleSearch" :icon="Search">搜索</el-button>
<el-button @click="handleReset" :icon="Refresh">重置</el-button>
<el-button type="success" @click="showAISearch" :icon="Search">AI搜索</el-button>
</el-form-item>
</el-form>
<!-- AI搜索框 -->
<div v-if="showAISearchPanel" class="ai-search-panel">
<el-input
v-model="aiSearchText"
type="textarea"
:rows="3"
placeholder="请用自然语言描述搜索条件,例如:查找2024年银保监会对保险公司的罚款记录,金额超过50万的"
@keydown.enter.ctrl="handleAISearch"
/>
<div class="ai-search-actions">
<el-button type="primary" @click="handleAISearch" :loading="aiSearchLoading">开始搜索</el-button>
<el-button @click="closeAISearch">关闭</el-button>
<span class="ai-search-tip">支持 Ctrl+Enter 提交</span>
</div>
</div>
</el-card>
<!-- 操作栏 -->
......@@ -57,14 +74,9 @@
<el-table-column type="selection" width="55" />
<el-table-column prop="penaltyNumber" label="处罚编号" width="220" resizable>
<template #default="{ row }">
<el-link
v-if="row.analysisStatus === 'completed'"
type="primary"
@click="viewReport(row)"
>
<el-link type="primary" @click="viewDetail(row)">
{{ row.penaltyNumber }}
</el-link>
<span v-else>{{ row.penaltyNumber }}</span>
</template>
</el-table-column>
<el-table-column prop="institutionName" label="机构名称" min-width="200" show-overflow-tooltip resizable />
......@@ -100,7 +112,7 @@
<el-table-column label="操作" width="250" fixed="right">
<template #default="{ row }">
<el-button type="primary" link @click="viewDetail(row)">详情</el-button>
<el-button type="primary" link @click="viewReport(row)">报告</el-button>
<el-button type="primary" link @click="openReportDrawer(row)">报告</el-button>
<el-button
type="warning"
link
......@@ -167,20 +179,85 @@
</template>
</el-dialog>
<!-- 分析报告弹窗 -->
<el-dialog v-model="reportVisible" title="AI分析报告" width="55%" :fullscreen="reportFullscreen" show-fullscreen class="report-dialog">
<!-- 报告抽屉(80%宽度,含tabs) -->
<el-drawer v-model="reportDrawerVisible" :title="'报告 - ' + (currentRecord?.penaltyNumber || '')" size="80%">
<el-tabs v-model="reportActiveTab" class="report-tabs">
<!-- 报告内容Tab -->
<el-tab-pane label="报告内容" name="report">
<div v-loading="reportLoading" class="report-content">
<div
v-if="currentReport"
class="markdown-body"
v-html="renderedMarkdown"
<div v-if="currentReport" class="markdown-body" v-html="renderedMarkdown" />
<el-empty v-else description="暂无报告,请先点击'分析'按钮生成报告" />
</div>
</el-tab-pane>
<!-- AI搜索相关内容Tab -->
<el-tab-pane label="AI搜索相关处罚" name="ai-search">
<div class="ai-search-content">
<el-alert
:title="'根据当前记录「' + (currentRecord?.institutionName || '') + '」搜索相关信息'"
type="info"
:closable="false"
show-icon
style="margin-bottom: 16px"
/>
<el-empty v-else description="暂无报告" />
<!-- 搜索输入区 -->
<div class="search-input-area">
<el-input
v-model="aiSearchKeyword"
placeholder="输入关键词搜索相关处罚,如:保险诈骗、信贷违规"
@keyup.enter="handleRecordAiSearch"
style="margin-bottom: 12px"
>
<template #append>
<el-button @click="handleRecordAiSearch" :loading="aiSearchLoading">搜索</el-button>
</template>
</el-input>
<div class="search-actions">
<el-button
v-if="aiSearchLoading"
type="danger"
@click="stopAiSearch"
>
停止搜索
</el-button>
<span class="search-tip">可补充关键词后点击搜索</span>
</div>
<template #footer>
<el-button @click="reportVisible = false">关闭</el-button>
</div>
<!-- 搜索结果表格 -->
<div v-if="aiSearchResults.length > 0" class="search-results">
<div class="results-header">
<span>搜索结果(共 {{ aiSearchResults.length }} 条)</span>
</div>
<el-table :data="aiSearchResults" stripe border max-height="400">
<el-table-column prop="penaltyNumber" label="处罚编号" width="180" show-overflow-tooltip />
<el-table-column prop="institutionName" label="机构名称" min-width="150" show-overflow-tooltip />
<el-table-column prop="penaltyType" label="处罚类型" width="100" />
<el-table-column prop="penaltyAmount" label="处罚金额" width="120">
<template #default="{ row }">
{{ row.penaltyAmount ? formatAmount(row.penaltyAmount) : '-' }}
</template>
</el-dialog>
</el-table-column>
<el-table-column prop="penaltyDate" label="处罚日期" width="110" />
<el-table-column prop="regulator" label="监管机构" width="120" show-overflow-tooltip />
<el-table-column label="操作" width="120" fixed="right">
<template #default="{ row }">
<el-button type="primary" link @click="viewDetail(row)">查看详情</el-button>
</template>
</el-table-column>
</el-table>
</div>
<el-empty v-else-if="!aiSearchLoading && aiSearched" description="未找到相关处罚记录" :image-size="60" />
<div v-else-if="!aiSearchLoading && !aiSearched" class="search-hint">
<el-icon><InfoFilled /></el-icon>
<span>请在上方输入关键词,点击搜索按钮查找相关处罚记录</span>
</div>
</div>
</el-tab-pane>
</el-tabs>
</el-drawer>
<!-- 选择分析模版抽屉 -->
<el-drawer v-model="templateDialogVisible" title="选择分析模版" size="50%">
......@@ -253,7 +330,7 @@
<script setup>
import { ref, reactive, onMounted, computed } from 'vue'
import { ElMessage, ElMessageBox } from 'element-plus'
import { Search, Refresh, Download, Check, FullScreen } from '@element-plus/icons-vue'
import { Search, Refresh, Download, Check, FullScreen, InfoFilled } from '@element-plus/icons-vue'
import { marked } from 'marked'
import api from '../api'
......@@ -269,13 +346,20 @@ const loading = ref(false)
const tableData = ref([])
const detailVisible = ref(false)
const detailFullscreen = ref(false)
const reportVisible = ref(false)
const reportFullscreen = ref(false)
const reportDrawerVisible = ref(false)
const reportActiveTab = ref('report')
const reportLoading = ref(false)
const currentRecord = ref(null)
const currentReport = ref(null)
const dateRange = ref([])
// AI搜索相关
const aiSearchKeyword = ref('')
const aiSearchLoading = ref(false)
const aiSearched = ref(false)
const aiSearchResults = ref([])
const aiSearchController = ref(null)
const templateDialogVisible = ref(false)
const addTemplateDialogVisible = ref(false)
const templateList = ref([])
......@@ -309,6 +393,43 @@ const searchForm = reactive({
regulator: ''
})
const showAISearchPanel = ref(false)
const aiSearchText = ref('')
const showAISearch = () => {
showAISearchPanel.value = true
}
const closeAISearch = () => {
showAISearchPanel.value = false
aiSearchText.value = ''
}
const handleAISearch = async () => {
if (!aiSearchText.value.trim()) {
ElMessage.warning('请输入搜索内容')
return
}
aiSearchLoading.value = true
try {
const res = await api.aiSearch(aiSearchText.value)
if (res.data && res.data.content) {
tableData.value = res.data.content
pagination.total = res.data.totalElements || res.data.content.length
ElMessage.success(`找到 ${res.data.content.length} 条结果`)
} else {
tableData.value = []
pagination.total = 0
}
showAISearchPanel.value = false
} catch (error) {
console.error('AI搜索失败:', error)
ElMessage.error('AI搜索失败,请重试')
} finally {
aiSearchLoading.value = false
}
}
const pagination = reactive({
page: 1,
size: 10,
......@@ -443,15 +564,20 @@ const openAddTemplate = () => {
addTemplateDialogVisible.value = true
}
const viewReport = async (row) => {
const openReportDrawer = async (row) => {
currentRecord.value = row
reportDrawerVisible.value = true
reportActiveTab.value = 'report'
aiSearchKeyword.value = ''
aiSearchResults.value = []
aiSearched.value = false
// 获取分析报告
try {
reportLoading.value = true
reportVisible.value = true
// 获取分析报告
const res = await api.getAnalysisReport(row.id)
currentReport.value = res.data || row.analysisReport
} catch (error) {
ElMessage.error('获取报告失败')
console.error('获取报告失败:', error)
currentReport.value = null
} finally {
......@@ -459,6 +585,52 @@ const viewReport = async (row) => {
}
}
// 根据当前记录进行AI搜索
const handleRecordAiSearch = async () => {
if (!currentRecord.value) return
aiSearchLoading.value = true
aiSearched.value = true
aiSearchResults.value = []
// 构建搜索关键词
const baseKeyword = currentRecord.value.institutionName || ''
const keyword = aiSearchKeyword.value.trim()
const searchText = keyword ? `${baseKeyword} ${keyword}` : baseKeyword
if (!searchText.trim()) {
ElMessage.warning('请输入搜索关键词')
aiSearchLoading.value = false
return
}
try {
const res = await api.aiSearch(searchText)
if (res.data && res.data.content) {
// 过滤掉当前记录
aiSearchResults.value = res.data.content.filter(item => item.id !== currentRecord.value.id)
ElMessage.success(`找到 ${aiSearchResults.value.length} 条相关记录`)
} else {
aiSearchResults.value = []
}
} catch (error) {
console.error('AI搜索失败:', error)
ElMessage.error('搜索失败,请重试')
} finally {
aiSearchLoading.value = false
}
}
// 停止AI搜索
const stopAiSearch = () => {
if (aiSearchController.value) {
aiSearchController.value.abort()
aiSearchController.value = null
}
aiSearchLoading.value = false
ElMessage.info('已停止搜索')
}
const fetchData = async () => {
loading.value = true
try {
......@@ -516,6 +688,26 @@ const handleSubmitTemplate = async () => {
margin-bottom: 20px;
}
.ai-search-panel {
margin-top: 16px;
padding: 16px;
background: #f0f9ff;
border-radius: 4px;
border: 1px solid #bae7ff;
.ai-search-actions {
margin-top: 12px;
display: flex;
align-items: center;
gap: 12px;
}
.ai-search-tip {
font-size: 12px;
color: #909399;
}
}
.action-row {
margin-bottom: 20px;
}
......@@ -534,10 +726,63 @@ const handleSubmitTemplate = async () => {
}
.report-content {
max-height: 60vh;
max-height: calc(80vh - 120px);
overflow-y: auto;
}
.report-tabs {
height: 100%;
}
.ai-search-content {
display: flex;
flex-direction: column;
height: calc(80vh - 100px);
.search-input-area {
flex-shrink: 0;
padding: 12px;
background: #f5f7fa;
border-radius: 4px;
margin-bottom: 16px;
.search-actions {
display: flex;
align-items: center;
gap: 12px;
.search-tip {
font-size: 12px;
color: #909399;
}
}
}
.search-results {
flex: 1;
display: flex;
flex-direction: column;
overflow: hidden;
.results-header {
flex-shrink: 0;
margin-bottom: 12px;
font-weight: 500;
color: #303133;
}
}
.search-hint {
flex: 1;
display: flex;
align-items: center;
justify-content: center;
gap: 8px;
color: #909399;
font-size: 14px;
}
}
.template-drawer-content {
display: flex;
flex-direction: column;
......
import { defineConfig } from 'vite'
import vue from '@vitejs/plugin-vue'
const apiUrl = process.env.VITE_API_BASE_URL || 'http://localhost:8080'
export default defineConfig({
plugins: [vue()],
server: {
port: 3000,
proxy: {
'/api': {
target: process.env.VITE_API_BASE_URL || 'http://localhost:8080',
target: apiUrl,
changeOrigin: true
}
}
},
css: {
preprocessorOptions: {
scss: {
api: 'modern-compiler'
}
}
}
})
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论