提交 11c33355 authored 作者: kxjia's avatar kxjia

优化代码

上级 36181925
......@@ -35,9 +35,9 @@ public class CrawlerConfig {
private int maxRetries = 3;
/**
* 超时时间(毫秒)
* 超时时间(毫秒),0表示不限制
*/
private int timeout = 30000;
private int timeout = 0;
/**
* 数据源列表
......@@ -51,5 +51,7 @@ public class CrawlerConfig {
private String baseUrl;
private String listUrl;
private boolean enabled = true;
private String detailUrl;
private String detailUrlPattern;
}
}
......@@ -48,6 +48,18 @@ public class CrawlController {
return ResponseEntity.ok(ApiResponse.success(response));
}
/**
* 停止爬取任务
*/
@PostMapping("/stop")
public ResponseEntity<ApiResponse<Void>> stopCrawl() {
if (!crawlScheduler.isRunning()) {
return ResponseEntity.ok(ApiResponse.error(400, "没有正在执行的爬取任务"));
}
crawlScheduler.stopCrawl();
return ResponseEntity.ok(ApiResponse.success("爬取任务已停止", null));
}
/**
* 获取爬取状态
*/
......
......@@ -16,6 +16,7 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.safety.Safelist;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component;
......@@ -29,14 +30,13 @@ import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* 处罚信息爬虫 - 核心爬虫类(支持断点续传)
......@@ -52,24 +52,35 @@ public class PenaltyCrawler {
private final CrawlerConfig crawlerConfig;
@Value("${crawler.resume.enabled:true}")
private boolean resumeEnabled; // 是否启用断点续传,默认启用
private boolean resumeEnabled;
@Value("${crawler.resume.force-restart:false}")
private boolean forceRestart; // 是否强制重新开始,默认false
private boolean forceRestart;
@Value("${crawler.nfra.page-size:18}")
private int defaultPageSize;
@Value("${crawler.nfra.max-pages:10000}")
private int maxPages;
@Value("${crawler.nfra.request-interval-ms:1000}")
private long requestIntervalMs;
@Value("${crawler.nfra.batch-save-size:50}")
private int batchSaveSize;
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd");
private static final DateTimeFormatter DATE_FORMATTER_CN = DateTimeFormatter.ofPattern("yyyy[-/年]MM[-/月]dd[日]");
// 线程安全的罚单编号计数器(生产环境建议用数据库序列或Redis)
private final AtomicInteger penaltySeq = new AtomicInteger(0);
// 请求频率控制器
private final Map<String, AtomicLong> lastRequestTime = new ConcurrentHashMap<>();
// 当前任务的唯一标识
private String currentTaskId;
@Async("crawlExecutor")
public CompletableFuture<CrawlResult> crawl(CrawlerConfig.SourceConfig source) {
// 生成任务ID
currentTaskId = UUID.randomUUID().toString();
log.info("========== 开始爬取任务 ==========");
......@@ -85,7 +96,6 @@ public class PenaltyCrawler {
try {
List<PenaltyRecord> records = crawlNFRA(source);
// 标记任务完成
markTaskCompleted(source.getName());
result.setSuccess(true);
......@@ -99,7 +109,6 @@ public class PenaltyCrawler {
log.error("爬取失败: {} - {}", source.getName(), e.getMessage(), e);
result.setSuccess(false);
result.setErrorMessage(e.getMessage());
// 标记任务失败
markTaskFailed(source.getName());
}
......@@ -118,9 +127,8 @@ public class PenaltyCrawler {
try {
String baseUrl = normalizeBaseUrl(source.getBaseUrl());
// ========== 获取上次爬取进度 ==========
int startPage = 1;
int pageSize = 18;
int pageSize = defaultPageSize;
int totalPages = 4093;
int totalNewRecords = 0;
......@@ -128,7 +136,7 @@ public class PenaltyCrawler {
Optional<CrawlProgress> progressOpt = getLastProgress(source.getName());
if (progressOpt.isPresent() && !isTaskCompleted(source.getName())) {
CrawlProgress progress = progressOpt.get();
startPage = progress.getLastPageIndex() + 1; // 从下一页开始
startPage = progress.getLastPageIndex() + 1;
totalPages = progress.getTotalPages() != null ? progress.getTotalPages() : totalPages;
totalNewRecords = progress.getTotalRecordsCrawled() != null ? progress.getTotalRecordsCrawled() : 0;
log.info("========== 检测到上次未完成的任务 ==========");
......@@ -138,31 +146,29 @@ public class PenaltyCrawler {
log.info("本次将从第 {} 页继续爬取", startPage);
} else if (progressOpt.isPresent() && isTaskCompleted(source.getName())) {
log.info("上次任务已完成,本次从头开始爬取");
// 清除旧的完成记录,开始新任务
clearProgress(source.getName());
} else {
log.info("未检测到未完成的任务,从头开始爬取");
}
} else {
log.info("断点续传已禁用或强制重启,从头开始爬取");
// 清除旧的进度记录
clearProgress(source.getName());
}
// 初始化进度记录
initProgress(source.getName(), startPage - 1, totalPages, totalNewRecords);
// 记录失败的页码,用于后续重试
List<Integer> failedPages = new ArrayList<>();
int pageIndex = startPage;
int maxPages = 1000000;
while (pageIndex <= totalPages && pageIndex <= maxPages) {
String listUrl = buildListApiUrl(source, pageIndex, pageSize);
log.info("正在抓取第{}页: {}", pageIndex, listUrl);
try {
// 请求频率控制
waitForRateLimit(source.getName());
String json = fetchJson(listUrl);
JsonNode rootNode = mapper.readTree(json);
......@@ -172,7 +178,6 @@ public class PenaltyCrawler {
if (pageInfo.has("totalPages")) {
totalPages = pageInfo.get("totalPages").asInt();
// 更新总页数到进度表
updateTotalPages(source.getName(), totalPages);
} else if (pageInfo.has("total")) {
int total = pageInfo.get("total").asInt();
......@@ -180,72 +185,35 @@ public class PenaltyCrawler {
updateTotalPages(source.getName(), totalPages);
}
int pageRecordCount = 0;
int failedCount = 0;
for (int i = 0; i < rows.size(); i++) {
JsonNode row = rows.get(i);
try {
String docId = row.has("guid") ? row.get("guid").asText() :
(row.has("docId") ? row.get("docId").asText() : null);
String title = row.has("docTitle") ? row.get("docTitle").asText() : "";
if (docId == null || title.isEmpty()) {
log.debug("跳过无效记录: docId={}, title={}", docId, title);
failedCount++;
continue;
}
String detailUrl = baseUrl + "cn/static/data/DocInfo/SelectByDocId/data_docId=" + docId + ".json";
// 解析当前页面的记录
List<PenaltyRecord> pageRecords = parseRecordsFromJson(rows, source, baseUrl);
PenaltyRecord record = fetchWithRetry(detailUrl, title, source, 3);
if (record != null) {
// 检查是否已存在
boolean isNew = false;
if (record.getSourceUrl() != null && !record.getSourceUrl().isEmpty()) {
if (!penaltyRecordRepository.existsBySourceUrl(record.getSourceUrl())) {
isNew = true;
}
} else {
if (!penaltyRecordRepository.existsByIllegalFactsAndRegulator(
record.getIllegalFacts(), record.getRegulator())) {
isNew = true;
}
}
if (isNew) {
int pageNewCount = 0;
for (PenaltyRecord record : pageRecords) {
if (!isRecordExists(record)) {
batchBuffer.add(record);
records.add(record);
pageRecordCount++;
pageNewCount++;
totalNewRecords++;
}
} else {
failedCount++;
}
} catch (Exception e) {
failedCount++;
log.error("解析第{}页第{}条记录失败: {}", pageIndex, i, e.getMessage());
}
}
// ========== 每抓取10页保存一次到数据库 ==========
if (pageIndex % 10 == 0 && !batchBuffer.isEmpty()) {
// 批量保存
if (batchBuffer.size() >= batchSaveSize) {
int savedCount = saveBatchToDatabase(batchBuffer);
log.info("===== 已抓取{}页,批量保存{}条新记录到数据库 =====", pageIndex, savedCount);
batchBuffer.clear();
}
// ========== 每抓取5页更新一次进度(断点续传) ==========
// 更新进度
if (pageIndex % 5 == 0) {
updateProgress(source.getName(), pageIndex, totalNewRecords);
log.info("===== 已更新爬取进度: 第{}页,累计{}条记录 =====", pageIndex, totalNewRecords);
}
// 输出页面进度
double progress = (double) pageIndex / totalPages * 100;
log.info("第{}页爬取完成,本页获取{}条新记录,失败{}条,累计{}条新记录,总进度:{:.2f}% ({}/{})",
pageIndex, pageRecordCount, failedCount, totalNewRecords, progress, pageIndex, totalPages);
log.info("第{}页爬取完成,本页获取{}条新记录,累计{}条新记录,总进度:{:.2f}% ({}/{})",
pageIndex, pageNewCount, totalNewRecords, progress, pageIndex, totalPages);
} else if (rootNode.has("rptCode") && rootNode.get("rptCode").asText().equals("200")) {
log.warn("NFRA API返回结束标记(rptCode=200),停止爬取");
......@@ -263,27 +231,25 @@ public class PenaltyCrawler {
pageIndex++;
}
// 每爬完一页停留5秒
// 页面间隔
if (pageIndex <= totalPages && pageIndex <= maxPages) {
log.info("等待5秒后继续爬取下一页...");
Thread.sleep(1000);
Thread.sleep(requestIntervalMs);
}
}
// ========== 保存剩余不足10页的数据 ==========
// 保存剩余数据
if (!batchBuffer.isEmpty()) {
int savedCount = saveBatchToDatabase(batchBuffer);
log.info("===== 爬取完成,保存最后{}条新记录到数据库 =====", savedCount);
batchBuffer.clear();
}
// 最终更新进度
updateProgress(source.getName(), pageIndex - 1, totalNewRecords);
// 重试失败的页面
if (!failedPages.isEmpty()) {
log.info("开始重试失败的页面: {}", failedPages);
retryFailedPagesWithBatchSave(source, failedPages, records, mapper, baseUrl, pageSize);
retryFailedPages(source, failedPages, records, mapper, baseUrl, pageSize);
}
if (records.isEmpty()) {
......@@ -298,16 +264,14 @@ public class PenaltyCrawler {
} catch (Exception e) {
log.error("NFRA爬取失败: {}", e.getMessage(), e);
// 异常时也尝试保存已缓存的数据
if (!batchBuffer.isEmpty()) {
saveBatchToDatabase(batchBuffer);
}
// 更新失败状态
updateProgressStatus(source.getName(), "FAILED");
try {
records = crawlNFRADirect(source);
if (!records.isEmpty()) {
saveBatchToDatabase(records);
List<PenaltyRecord> htmlRecords = crawlNFRADirect(source);
if (!htmlRecords.isEmpty()) {
saveBatchToDatabase(htmlRecords);
}
} catch (Exception ex) {
log.error("NFRA HTML解析也失败: {}", ex.getMessage());
......@@ -316,33 +280,83 @@ public class PenaltyCrawler {
return records;
}
// ==================== 断点续传相关方法 ====================
/**
* 从JSON解析记录列表
*/
private List<PenaltyRecord> parseRecordsFromJson(JsonNode rows, CrawlerConfig.SourceConfig source, String baseUrl) {
List<PenaltyRecord> records = new ArrayList<>();
for (JsonNode row : rows) {
try {
String docId = row.has("guid") ? row.get("guid").asText() :
(row.has("docId") ? row.get("docId").asText() : null);
String title = row.has("docTitle") ? row.get("docTitle").asText() : "";
if (docId == null || title.isEmpty()) {
log.debug("跳过无效记录: docId={}, title={}", docId, title);
continue;
}
String detailUrl = baseUrl + "cn/static/data/DocInfo/SelectByDocId/data_docId=" + docId + ".json";
PenaltyRecord record = fetchWithRetry(detailUrl, title, source, 3);
if (record != null) {
records.add(record);
}
} catch (Exception e) {
log.debug("解析记录失败: {}", e.getMessage());
}
}
return records;
}
/**
* 获取上次的爬取进度
* 检查记录是否已存在
*/
private Optional<CrawlProgress> getLastProgress(String sourceName) {
return crawlProgressRepository.findBySourceNameAndStatus(sourceName, "RUNNING");
private boolean isRecordExists(PenaltyRecord record) {
try {
if (record.getSourceUrl() != null && !record.getSourceUrl().isEmpty()) {
return penaltyRecordRepository.existsBySourceUrl(record.getSourceUrl());
} else {
return penaltyRecordRepository.existsByIllegalFactsAndRegulator(
record.getIllegalFacts(), record.getRegulator());
}
} catch (Exception e) {
log.error("检查记录是否存在失败: {}", e.getMessage());
return false;
}
}
/**
* 检查任务是否已完成
* 请求频率控制
*/
private void waitForRateLimit(String source) {
AtomicLong lastTime = lastRequestTime.computeIfAbsent(source, k -> new AtomicLong(0));
long now = System.currentTimeMillis();
long elapsed = now - lastTime.get();
if (elapsed < requestIntervalMs) {
try {
Thread.sleep(requestIntervalMs - elapsed);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
lastTime.set(System.currentTimeMillis());
}
// ==================== 断点续传相关方法 ====================
private Optional<CrawlProgress> getLastProgress(String sourceName) {
return crawlProgressRepository.findBySourceNameAndStatus(sourceName, "RUNNING");
}
private boolean isTaskCompleted(String sourceName) {
Optional<CrawlProgress> progressOpt = crawlProgressRepository.findBySourceNameAndStatus(sourceName, "COMPLETED");
return progressOpt.isPresent();
}
/**
* 初始化爬取进度
*/
private void initProgress(String sourceName, int lastPage, int totalPages, int totalRecords) {
try {
// 先删除旧的RUNNING状态记录
Optional<CrawlProgress> existing = crawlProgressRepository.findBySourceNameAndStatus(sourceName, "RUNNING");
if (existing.isPresent()) {
crawlProgressRepository.delete(existing.get());
}
existing.ifPresent(crawlProgressRepository::delete);
CrawlProgress progress = CrawlProgress.builder()
.sourceName(sourceName)
......@@ -361,9 +375,6 @@ public class PenaltyCrawler {
}
}
/**
* 更新爬取进度
*/
private void updateProgress(String sourceName, int pageIndex, int totalRecords) {
try {
crawlProgressRepository.updateProgress(sourceName, currentTaskId, pageIndex, LocalDateTime.now());
......@@ -373,9 +384,6 @@ public class PenaltyCrawler {
}
}
/**
* 更新总页数
*/
private void updateTotalPages(String sourceName, int totalPages) {
try {
crawlProgressRepository.updateTotalPages(sourceName, currentTaskId, totalPages, LocalDateTime.now());
......@@ -384,9 +392,6 @@ public class PenaltyCrawler {
}
}
/**
* 更新进度状态
*/
private void updateProgressStatus(String sourceName, String status) {
try {
crawlProgressRepository.updateStatus(sourceName, currentTaskId, status, LocalDateTime.now());
......@@ -395,25 +400,16 @@ public class PenaltyCrawler {
}
}
/**
* 标记任务完成
*/
private void markTaskCompleted(String sourceName) {
updateProgressStatus(sourceName, "COMPLETED");
log.info("任务完成: source={}, taskId={}", sourceName, currentTaskId);
}
/**
* 标记任务失败
*/
private void markTaskFailed(String sourceName) {
updateProgressStatus(sourceName, "FAILED");
log.info("任务失败: source={}, taskId={}", sourceName, currentTaskId);
}
/**
* 清除进度记录
*/
private void clearProgress(String sourceName) {
try {
Optional<CrawlProgress> running = crawlProgressRepository.findBySourceNameAndStatus(sourceName, "RUNNING");
......@@ -429,15 +425,25 @@ public class PenaltyCrawler {
}
/**
* 批量保存记录到数据库
* 批量保存记录到数据库(带去重)
*/
private int saveBatchToDatabase(List<PenaltyRecord> records) {
if (records == null || records.isEmpty()) {
return 0;
}
// 去重
List<PenaltyRecord> distinctRecords = records.stream()
.filter(record -> !isRecordExists(record))
.collect(Collectors.toList());
if (distinctRecords.isEmpty()) {
log.debug("批量保存: 所有记录都已存在,跳过");
return 0;
}
int savedCount = 0;
for (PenaltyRecord record : records) {
for (PenaltyRecord record : distinctRecords) {
try {
penaltyRecordRepository.save(record);
savedCount++;
......@@ -446,14 +452,15 @@ public class PenaltyCrawler {
}
}
log.debug("批量保存完成: 成功保存{}条/共{}条", savedCount, records.size());
log.debug("批量保存完成: 成功保存{}条/共{}条(去重后{}条)",
savedCount, records.size(), distinctRecords.size());
return savedCount;
}
/**
* 重试失败的页面(支持批量保存)
* 重试失败的页面
*/
private void retryFailedPagesWithBatchSave(CrawlerConfig.SourceConfig source, List<Integer> failedPages,
private void retryFailedPages(CrawlerConfig.SourceConfig source, List<Integer> failedPages,
List<PenaltyRecord> records, ObjectMapper mapper,
String baseUrl, int pageSize) {
List<PenaltyRecord> retryBuffer = new ArrayList<>();
......@@ -461,44 +468,27 @@ public class PenaltyCrawler {
for (Integer pageIndex : failedPages) {
try {
log.info("重试第{}页", pageIndex);
waitForRateLimit(source.getName());
String listUrl = buildListApiUrl(source, pageIndex, pageSize);
String json = fetchJson(listUrl);
JsonNode rootNode = mapper.readTree(json);
if (rootNode.has("data") && rootNode.get("data").has("rows")) {
JsonNode rows = rootNode.get("data").get("rows");
for (JsonNode row : rows) {
try {
String docId = row.has("guid") ? row.get("guid").asText() :
(row.has("docId") ? row.get("docId").asText() : null);
String title = row.has("docTitle") ? row.get("docTitle").asText() : "";
if (docId != null && !title.isEmpty()) {
String detailUrl = baseUrl + "cn/static/data/DocInfo/SelectByDocId/data_docId=" + docId + ".json";
PenaltyRecord record = fetchWithRetry(detailUrl, title, source, 2);
if (record != null && !records.contains(record)) {
boolean exists = false;
if (record.getSourceUrl() != null && !record.getSourceUrl().isEmpty()) {
exists = penaltyRecordRepository.existsBySourceUrl(record.getSourceUrl());
} else {
exists = penaltyRecordRepository.existsByIllegalFactsAndRegulator(
record.getIllegalFacts(), record.getRegulator());
}
List<PenaltyRecord> pageRecords = parseRecordsFromJson(rows, source, baseUrl);
if (!exists) {
for (PenaltyRecord record : pageRecords) {
if (!isRecordExists(record) && !records.contains(record)) {
retryBuffer.add(record);
records.add(record);
}
}
}
} catch (Exception e) {
log.debug("重试时解析记录失败: {}", e.getMessage());
}
}
}
Thread.sleep(3000);
if (retryBuffer.size() >= 50) {
Thread.sleep(requestIntervalMs);
if (retryBuffer.size() >= batchSaveSize) {
saveBatchToDatabase(retryBuffer);
retryBuffer.clear();
}
......@@ -512,11 +502,11 @@ public class PenaltyCrawler {
}
}
// 添加重试方法
private PenaltyRecord fetchWithRetry(String detailUrl, String title,
CrawlerConfig.SourceConfig source, int maxRetries) {
for (int attempt = 1; attempt <= maxRetries; attempt++) {
try {
waitForRateLimit(source.getName());
return fetchAndParseDetail(detailUrl, title, source);
} catch (Exception e) {
if (attempt == maxRetries) {
......@@ -524,7 +514,7 @@ public class PenaltyCrawler {
return null;
}
try {
Thread.sleep(1000 * attempt);
Thread.sleep(1000L * attempt);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
return null;
......@@ -540,7 +530,7 @@ public class PenaltyCrawler {
if (pageIndex < 4) {
return "https://www.nfra.gov.cn/cn/static/data/DocInfo/getDocInfoListByItemId/data_itemId=" + itemId + ",pageIndex=" + pageIndex + ",pageSize=" + pageSize + ".json";
} else {
return "https://www.nfra.gov.cn/cbircweb/DocInfo/SelectDocByItemIdAndChild?itemId=" + itemId + "&pageSize=100&pageIndex="+pageIndex;
return "https://www.nfra.gov.cn/cbircweb/DocInfo/SelectDocByItemIdAndChild?itemId=" + itemId + "&pageSize=100&pageIndex=" + pageIndex;
}
}
......@@ -553,6 +543,13 @@ public class PenaltyCrawler {
return "1855";
}
private String buildSourceUrl(String detailUrlPattern, String docId) {
if (detailUrlPattern == null || detailUrlPattern.isEmpty()) {
return "";
}
return detailUrlPattern.replace("*", docId);
}
private String normalizeBaseUrl(String baseUrl) {
if (baseUrl == null) return "https://www.nfra.gov.cn/";
if (!baseUrl.endsWith("/")) {
......@@ -563,12 +560,15 @@ public class PenaltyCrawler {
private String fetchJson(String url) throws Exception {
HttpURLConnection conn = null;
long timeout = crawlerConfig.getTimeout();
try {
conn = (HttpURLConnection) new URL(url).openConnection();
conn.setRequestMethod("GET");
conn.setRequestProperty("User-Agent", USER_AGENT);
conn.setConnectTimeout(30000);
conn.setReadTimeout(30000);
if (timeout > 0) {
conn.setConnectTimeout((int) timeout);
conn.setReadTimeout((int) timeout);
}
conn.connect();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), "UTF-8"))) {
......@@ -586,6 +586,283 @@ public class PenaltyCrawler {
}
}
// ==================== NFRA 表格解析专用方法 ====================
/**
* 专门解析 NFRA 处罚决定书的表格内容
* 这种表格格式是固定的:第一列是标签,第二列是对应的值
*
* 表格结构示例:
* | 行政处罚决定书文号 | 金罚决字〔2024〕28号 |
* | 被处罚当事人 | xxx |
* | 主要违法违规事实 | xxx |
* | 行政处罚依据 | xxx |
* | 行政处罚决定 | xxx |
* | 作出处罚决定的机关名称 | xxx |
* | 作出处罚决定的日期 | xxx |
*/
private NFRAViolationInfo extractViolationInfoFromTable(String docClob) {
NFRAViolationInfo info = new NFRAViolationInfo();
if (docClob == null || docClob.isEmpty()) {
return info;
}
try {
// 解析HTML
Document document = Jsoup.parse(docClob);
// 查找所有表格
Elements tables = document.select("table");
for (Element table : tables) {
// 遍历表格行
Elements rows = table.select("tr");
for (Element row : rows) {
Elements cells = row.select("td");
if (cells.size() >= 2) {
// 第一列是标签,第二列是值
String label = cleanText(cells.get(0).text());
String value = cleanText(cells.get(1).text());
if (label.contains("行政处罚决定书文号")) {
info.penaltyNumber = value;
} else if (label.contains("被处罚当事人")) {
info.institutionName = value;
// 提取当事人姓名/名称
info.personName = extractPersonNameFromParty(value);
} else if (label.contains("主要违法违规事实")) {
info.illegalFacts = value;
} else if (label.contains("行政处罚依据")) {
info.penaltyBasis = value;
} else if (label.contains("行政处罚决定")) {
info.penaltyDecision = value;
// 从处罚决定中提取处罚类型和金额
info.penaltyType = extractPenaltyTypeFromDecision(value);
info.penaltyAmount = extractAmountFromDecision(value);
} else if (label.contains("作出处罚决定的机关名称")) {
info.regulator = value;
} else if (label.contains("作出处罚决定的日期")) {
info.penaltyDateStr = value;
}
}
}
}
// 如果表格解析没有获取到机构名称,尝试从文本中提取
if (info.institutionName.isEmpty()) {
info.institutionName = extractInstitutionFromText(document.text());
}
log.debug("表格解析结果: 文号={}, 机构={}, 违法事实={}, 处罚决定={}, 处罚金额={}",
info.penaltyNumber, info.institutionName, info.illegalFacts,
info.penaltyDecision, info.penaltyAmount);
} catch (Exception e) {
log.warn("解析处罚决定书表格失败: {}", e.getMessage());
}
return info;
}
/**
* 从当事人字符串中提取姓名/名称
* 格式如: "袁良明(时任中国信达资产管理股份有限公司风险管理部总经理)"
*/
private String extractPersonNameFromParty(String partyStr) {
if (partyStr == null || partyStr.isEmpty()) {
return "";
}
// 提取括号前的姓名
Pattern pattern = Pattern.compile("^([^((]+)");
Matcher matcher = pattern.matcher(partyStr);
if (matcher.find()) {
String name = matcher.group(1).trim();
if (name.length() > 0 && name.length() <= 50) {
return name;
}
}
return partyStr.length() > 50 ? partyStr.substring(0, 50) : partyStr;
}
/**
* 从处罚决定中提取处罚类型
*/
private String extractPenaltyTypeFromDecision(String decision) {
if (decision == null || decision.isEmpty()) {
return "行政处罚";
}
if (decision.contains("警告")) {
return "警告";
} else if (decision.contains("罚款")) {
return "罚款";
} else if (decision.contains("没收")) {
return "没收违法所得";
} else if (decision.contains("吊销")) {
return "吊销许可证";
} else if (decision.contains("停业")) {
return "停业整顿";
} else if (decision.contains("市场禁入")) {
return "市场禁入";
}
return "行政处罚";
}
/**
* 从处罚决定中提取罚款金额
*/
private BigDecimal extractAmountFromDecision(String decision) {
if (decision == null || decision.isEmpty()) {
return null;
}
// 匹配罚款金额
Pattern pattern = Pattern.compile("罚款\\s*(\\d+(?:,\\d{3})*(?:\\.\\d+)?)\\s*(?:万元?|元)");
Matcher matcher = pattern.matcher(decision);
if (matcher.find()) {
String amountStr = matcher.group(1).replace(",", "");
try {
BigDecimal amount = new BigDecimal(amountStr);
if (matcher.group().contains("万")) {
return amount.multiply(new BigDecimal("10000"));
}
return amount;
} catch (NumberFormatException e) {
log.debug("金额解析失败: {}", amountStr);
}
}
return null;
}
/**
* 从文本中提取机构名称
*/
private String extractInstitutionFromText(String text) {
if (text == null || text.isEmpty()) {
return "";
}
// 匹配 "被处罚当事人:xxx" 格式
Pattern pattern = Pattern.compile("被处罚当事人[::]\\s*([^\\n\\r,,。]+)");
Matcher matcher = pattern.matcher(text);
if (matcher.find()) {
String name = matcher.group(1).trim();
if (name.length() > 0 && name.length() <= 100) {
return name;
}
}
return "";
}
/**
* 清理文本,去除多余空白和特殊字符
*/
private String cleanText(String text) {
if (text == null) {
return "";
}
// 去除首尾空白,将多个空白替换为单个空格
return text.trim().replaceAll("\\s+", " ");
}
/**
* 从 docClob 中提取内容,只保留文本和表格
*/
private String extractCleanContentWithTables(String docClob) {
if (docClob == null || docClob.isEmpty()) {
return "";
}
try {
Document document = Jsoup.parse(docClob);
StringBuilder result = new StringBuilder();
// 提取表格内容
Elements tables = document.select("table");
if (!tables.isEmpty()) {
result.append("\n【表格内容】\n");
for (Element table : tables) {
String tableText = extractTableContent(table);
result.append(tableText).append("\n");
}
result.append("【表格内容结束】\n");
}
// 移除所有脚本、样式、链接等
document.select("script, style, link, meta, head, nav, footer, aside").remove();
// 获取纯文本
String textContent = document.text();
if (result.length() > 0) {
return result.toString() + "\n【其他内容】\n" + textContent;
} else {
return textContent;
}
} catch (Exception e) {
log.debug("解析 docClob 失败: {}", e.getMessage());
return Jsoup.clean(docClob, Safelist.simpleText());
}
}
/**
* 提取表格内容为可读格式
*/
private String extractTableContent(Element table) {
StringBuilder tableContent = new StringBuilder();
try {
Elements headers = table.select("th");
if (!headers.isEmpty()) {
tableContent.append("表头: ");
for (int i = 0; i < headers.size(); i++) {
if (i > 0) tableContent.append(" | ");
tableContent.append(headers.get(i).text().trim());
}
tableContent.append("\n");
}
Elements rows = table.select("tr");
int rowCount = 0;
for (Element row : rows) {
if (row.select("th").isEmpty()) {
Elements cells = row.select("td");
if (!cells.isEmpty()) {
rowCount++;
tableContent.append("第").append(rowCount).append("行: ");
for (int i = 0; i < cells.size(); i++) {
if (i > 0) tableContent.append(" | ");
String cellText = cells.get(i).text().trim();
if (cellText.length() > 200) {
cellText = cellText.substring(0, 200) + "...";
}
tableContent.append(cellText);
}
tableContent.append("\n");
}
}
}
if (rowCount == 0) {
tableContent.append("表格数据: ").append(table.text()).append("\n");
}
} catch (Exception e) {
log.debug("提取表格内容失败: {}", e.getMessage());
tableContent.append("表格数据: ").append(table.text()).append("\n");
}
return tableContent.toString();
}
private PenaltyRecord fetchAndParseDetail(String detailUrl, String title, CrawlerConfig.SourceConfig source) {
try {
ObjectMapper mapper = new ObjectMapper();
......@@ -599,24 +876,75 @@ public class PenaltyCrawler {
String publishDate = data.has("publishDate") ? data.get("publishDate").asText() : "";
String docClob = data.has("docClob") ? data.get("docClob").asText() : "";
String penaltyNumber = extractPenaltyNumber(docTitle);
String institutionName = extractInstitutionNameFromContent(docClob);
String penaltyAmountStr = extractPenaltyAmount(docClob);
String illegalFacts = extractIllegalFacts(docClob);
// 从API URL中提取docId,构造正确的展示链接
String docId = null;
Pattern docIdPattern = Pattern.compile("data_docId=([^\\.]+)");
Matcher docIdMatcher = docIdPattern.matcher(detailUrl);
if (docIdMatcher.find()) {
docId = docIdMatcher.group(1);
}
// 优先使用表格解析
NFRAViolationInfo tableInfo = extractViolationInfoFromTable(docClob);
String penaltyNumber;
String institutionName;
String illegalFacts;
String penaltyBasis;
BigDecimal penaltyAmount;
String penaltyType;
String regulator;
LocalDate penaltyDate;
// 如果表格解析有结果,使用表格数据;否则使用原有的正则解析
if (tableInfo != null && (tableInfo.penaltyNumber != null && !tableInfo.penaltyNumber.isEmpty()
|| tableInfo.institutionName != null && !tableInfo.institutionName.isEmpty())) {
// 使用表格解析的数据
penaltyNumber = !tableInfo.penaltyNumber.isEmpty() ? tableInfo.penaltyNumber : extractPenaltyNumber(docTitle);
institutionName = !tableInfo.institutionName.isEmpty() ? tableInfo.institutionName : extractInstitutionNameFromContent(docClob);
if (institutionName.isEmpty()) {
institutionName = extractInstitutionName(docTitle);
}
illegalFacts = !tableInfo.illegalFacts.isEmpty() ? tableInfo.illegalFacts : extractIllegalFacts(docClob);
penaltyBasis = !tableInfo.penaltyBasis.isEmpty() ? tableInfo.penaltyBasis : extractPenaltyBasis(docClob);
penaltyAmount = tableInfo.penaltyAmount != null ? tableInfo.penaltyAmount : extractPenaltyAmountImproved(docClob);
penaltyType = !tableInfo.penaltyType.isEmpty() ? tableInfo.penaltyType : guessPenaltyType(docTitle);
regulator = !tableInfo.regulator.isEmpty() ? tableInfo.regulator : "国家金融监督管理总局";
penaltyDate = !tableInfo.penaltyDateStr.isEmpty() ? parseDateSafely(tableInfo.penaltyDateStr) : parseDateSafely(publishDate);
log.debug("使用表格解析结果: 文号={}, 机构={}, 违法事实={}, 处罚依据={}, 处罚类型={}",
penaltyNumber, institutionName, illegalFacts, penaltyBasis, penaltyType);
} else {
// 回退到原有的正则解析
penaltyNumber = extractPenaltyNumber(docTitle);
institutionName = extractInstitutionNameFromContent(docClob);
if (institutionName.isEmpty()) {
institutionName = extractInstitutionName(docTitle);
}
illegalFacts = extractIllegalFacts(docClob);
penaltyBasis = extractPenaltyBasis(docClob);
penaltyAmount = extractPenaltyAmountImproved(docClob);
penaltyType = guessPenaltyType(docTitle);
regulator = "国家金融监督管理总局";
penaltyDate = parseDateSafely(publishDate);
}
LocalDate penaltyDate = parseDateSafely(publishDate);
String sourceUrl = buildSourceUrl(source.getDetailUrlPattern(), docId);
String province = extractProvince(institutionName, docClob);
return PenaltyRecord.builder()
.penaltyNumber(penaltyNumber)
.institutionName(institutionName.isEmpty() ? extractInstitutionName(docTitle) : institutionName)
.institutionName(institutionName)
.institutionType(guessInstitutionType(institutionName))
.penaltyType("行政处罚")
.penaltyAmount(penaltyAmountStr != null ? new BigDecimal(penaltyAmountStr) : null)
.penaltyType(penaltyType)
.penaltyAmount(penaltyAmount)
.penaltyDate(penaltyDate)
.regulator("国家金融监督管理总局")
.regulator(regulator)
.province(province)
.illegalFacts(illegalFacts)
.penaltyBasis(extractPenaltyBasis(docClob))
.sourceUrl(detailUrl)
.penaltyBasis(penaltyBasis)
.sourceUrl(sourceUrl)
.createdAt(LocalDateTime.now())
.isNew(true)
.build();
......@@ -627,6 +955,39 @@ public class PenaltyCrawler {
}
}
/**
* 改进的处罚金额解析(支持多种格式和单位转换)
*/
private BigDecimal extractPenaltyAmountImproved(String content) {
if (content == null) return null;
String[] patterns = {
"罚款[::]?\\s*(\\d+(?:,\\d{3})*(?:\\.\\d+)?)\\s*(?:万元?|元)",
"没收[^\\d]*(\\d+(?:,\\d{3})*(?:\\.\\d+)?)\\s*(?:万元?|元)",
"处以?\\s*(\\d+(?:,\\d{3})*(?:\\.\\d+)?)\\s*(?:万元?|元)",
"合计[^\\d]*(\\d+(?:,\\d{3})*(?:\\.\\d+)?)\\s*(?:万元?|元)",
"金额[::]?\\s*(\\d+(?:,\\d{3})*(?:\\.\\d+)?)\\s*(?:万元?|元)"
};
for (String pattern : patterns) {
Matcher matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE).matcher(content);
if (matcher.find()) {
String amountStr = matcher.group(1).replace(",", "");
try {
BigDecimal amount = new BigDecimal(amountStr);
String matchedText = matcher.group();
if (matchedText.contains("万")) {
return amount.multiply(new BigDecimal("10000"));
}
return amount;
} catch (NumberFormatException e) {
log.debug("金额数字解析失败: {}", amountStr);
}
}
}
return null;
}
private LocalDate parseDateSafely(String dateStr) {
if (dateStr == null || dateStr.isEmpty()) {
return LocalDate.now();
......@@ -694,8 +1055,8 @@ public class PenaltyCrawler {
case "国家金融监督管理总局" -> "金监";
default -> "文";
};
int seq = penaltySeq.incrementAndGet() % 10000;
return prefix + "罚决字〔" + LocalDate.now().getYear() + "〕" + String.format("%04d", seq);
String seq = String.format("%04d", System.currentTimeMillis() % 10000);
return prefix + "罚决字〔" + LocalDate.now().getYear() + "〕" + seq;
}
private String extractInstitutionName(String title) {
......@@ -750,7 +1111,7 @@ public class PenaltyCrawler {
return baseUrl + "/" + relativeUrl;
}
// ==================== NFRA 内容解析 ====================
// ==================== NFRA 内容解析(正则备用) ====================
private String extractPenaltyNumber(String text) {
if (text == null) return "";
......@@ -776,35 +1137,27 @@ public class PenaltyCrawler {
return "";
}
private String extractPenaltyAmount(String content) {
if (content == null) return null;
Pattern pattern = Pattern.compile("罚款[::]?\\s*(\\d+(?:\\.\\d+)?)\\s*(?:万元?|元)");
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
return matcher.group(1);
}
pattern = Pattern.compile("处以?\\s*(\\d+(?:\\.\\d+)?)\\s*(?:万元?|元)");
matcher = pattern.matcher(content);
if (matcher.find()) {
return matcher.group(1);
}
return null;
}
private String extractIllegalFacts(String content) {
if (content == null) return "";
Pattern pattern = Pattern.compile("主要违法违规事实[::]?\\s*(.+?)(?=处罚内容|作出处罚决定|$)", Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
String facts = matcher.group(1).trim();
facts = stripHtmlTags(facts);
return facts.length() > 500 ? facts.substring(0, 500) : facts;
}
content = stripHtmlTags(content);
if (content.length() > 200) {
return content.substring(0, 200);
}
return content;
}
private String stripHtmlTags(String html) {
if (html == null || html.isEmpty()) return "";
return html.replaceAll("<[^>]+>", "");
}
private String extractPenaltyBasis(String content) {
if (content == null) return "";
Pattern pattern = Pattern.compile("处罚依据[::]?\\s*(.+?)(?=\\。\\s*\\《|\\、|作出处罚决定|$)", Pattern.DOTALL);
......@@ -816,6 +1169,30 @@ public class PenaltyCrawler {
return "";
}
private String extractProvince(String institutionName, String content) {
if (institutionName == null) return "";
String[] provinces = {"北京", "天津", "河北", "山西", "内蒙古", "辽宁", "吉林", "黑龙江",
"上海", "江苏", "浙江", "安徽", "福建", "江西", "山东", "河南", "湖北", "湖南",
"广东", "广西", "海南", "重庆", "四川", "贵州", "云南", "西藏", "陕西", "甘肃",
"青海", "宁夏", "新疆", "深圳", "宁波", "青岛", "大连", "厦门"};
for (String province : provinces) {
if (institutionName.contains(province)) {
return province;
}
}
Pattern pattern = Pattern.compile("([\\u4e00-\\u9fa5]{2,6}(?:省|市|自治区|特别行政区))");
Matcher matcher = pattern.matcher(content);
while (matcher.find()) {
String match = matcher.group(1);
for (String province : provinces) {
if (match.contains(province)) {
return province;
}
}
}
return "";
}
// ==================== HTML 直接解析备用 ====================
private List<PenaltyRecord> crawlNFRADirect(CrawlerConfig.SourceConfig source) throws Exception {
......@@ -843,6 +1220,7 @@ public class PenaltyCrawler {
.penaltyType(guessPenaltyType(title))
.penaltyDate(penaltyDate)
.regulator("国家金融监督管理总局")
.province(extractProvince(extractInstitutionName(title), ""))
.illegalFacts(title)
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
.createdAt(LocalDateTime.now())
......@@ -900,6 +1278,7 @@ public class PenaltyCrawler {
.penaltyType(guessPenaltyType(title))
.penaltyDate(penaltyDate)
.regulator(regulator)
.province(extractProvince(extractInstitutionName(title), ""))
.illegalFacts(title)
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
.createdAt(LocalDateTime.now())
......@@ -933,4 +1312,22 @@ public class PenaltyCrawler {
.isNew(entity.getIsNew())
.build();
}
// ==================== 内部类 ====================
/**
* 内部类,用于存储从表格中提取的处罚信息
*/
private static class NFRAViolationInfo {
String penaltyNumber = "";
String institutionName = "";
String personName = "";
String illegalFacts = "";
String penaltyBasis = "";
String penaltyDecision = "";
String penaltyType = "";
String regulator = "";
String penaltyDateStr = "";
BigDecimal penaltyAmount = null;
}
}
\ No newline at end of file
......@@ -26,6 +26,7 @@ public class PenaltyRecordDTO {
private BigDecimal penaltyAmount;
private LocalDate penaltyDate;
private String regulator;
private String province;
private String illegalFacts;
private String penaltyBasis;
private String sourceUrl;
......
......@@ -20,6 +20,7 @@ import java.time.LocalDateTime;
@Index(name = "idx_penalty_type", columnList = "penaltyType"),
@Index(name = "idx_penalty_date", columnList = "penaltyDate"),
@Index(name = "idx_regulator", columnList = "regulator"),
@Index(name = "idx_province", columnList = "province"),
@Index(name = "idx_is_new", columnList = "isNew"),
@Index(name = "idx_created_at", columnList = "createdAt")
})
......@@ -75,6 +76,12 @@ public class PenaltyRecord {
@Column(name = "regulator", length = 100)
private String regulator;
/**
* 省份
*/
@Column(name = "province", length = 50)
private String province;
/**
* 违法事实
*/
......
......@@ -94,6 +94,7 @@ public class CrawlScheduler {
config.setCode(source.getSourceType());
config.setBaseUrl(source.getSourceUrl());
config.setListUrl(source.getSourceUrl() + (source.getListUrlPattern() != null ? source.getListUrlPattern() : ""));
config.setDetailUrlPattern(source.getDetailUrlPattern());
config.setEnabled(source.getIsEnabled());
return config;
}
......@@ -156,4 +157,16 @@ public class CrawlScheduler {
public boolean isRunning() {
return isRunning;
}
/**
* 停止爬取任务
*/
public void stopCrawl() {
if (!isRunning) {
log.warn("没有正在执行的爬取任务");
return;
}
isRunning = false;
log.info("爬取任务已请求停止");
}
}
......@@ -107,9 +107,6 @@ public class CrawlTaskService {
return crawlSourceRepository.findAllByOrderBySortOrder();
}
public List<CrawlSource> getAllSources() {
return crawlSourceRepository.findAll();
}
/**
* 获取最新任务状态
......
......@@ -243,6 +243,7 @@ public class PenaltyRecordService {
.penaltyAmount(entity.getPenaltyAmount())
.penaltyDate(entity.getPenaltyDate())
.regulator(entity.getRegulator())
.province(entity.getProvince())
.illegalFacts(entity.getIllegalFacts())
.penaltyBasis(entity.getPenaltyBasis())
.sourceUrl(entity.getSourceUrl())
......
server:
port: 8080
port: 8082
servlet:
context-path: /api
......@@ -10,7 +10,7 @@ spring:
datasource:
url: jdbc:mysql://localhost:3306/penalty_monitor?useUnicode=true&characterEncoding=utf8&serverTimezone=Asia/Shanghai&useSSL=false
username: root
password: 123456
password: ZhongRunChangHong/123
driver-class-name: com.mysql.cj.jdbc.Driver
hikari:
minimum-idle: 5
......@@ -38,6 +38,7 @@ spring:
write-dates-as-timestamps: false
# 爬虫配置(数据源从数据库 crawl_sources 表读取)
crawler:
enabled: true
cron: "0 0 2 * * ?"
......@@ -47,6 +48,10 @@ crawler:
resume:
enabled: true # 是否启用断点续传
force-restart: false # 是否强制重新开始(设为true会忽略之前的进度)
nfra:
page-size: 18
max-pages: 10000
request-interval-ms: 1000
deepseek:
api:
......
......@@ -38,6 +38,7 @@ spring:
write-dates-as-timestamps: false
# 爬虫配置(数据源从数据库 crawl_sources 表读取)
crawler:
enabled: true
cron: "0 0 2 * * ?"
......@@ -47,6 +48,10 @@ crawler:
resume:
enabled: true # 是否启用断点续传
force-restart: false # 是否强制重新开始(设为true会忽略之前的进度)
nfra:
page-size: 18
max-pages: 10000
request-interval-ms: 1000
deepseek:
api:
......
......@@ -46,6 +46,7 @@ D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\service\ReportTemplateService.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\entity\AnalysisKeyword.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\CreateUserRequest.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\entity\CrawlProgress.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\controller\ReportTemplateController.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\service\PermissionService.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\CreatePermissionRequest.java
......@@ -72,6 +73,7 @@ D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\PenaltyRecordDTO.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\controller\RoleController.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\StatisticsDTO.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\repository\CrawlProgressRepository.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\UpdatePermissionRequest.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\scheduler\CrawlScheduler.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\CreateMenuRequest.java
......
......@@ -5,7 +5,7 @@
<link rel="icon" href="/favicon.ico">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>金融监管处罚监控系统</title>
<script type="module" crossorigin src="/assets/index-BM812JBU.js"></script>
<script type="module" crossorigin src="/assets/index-D0VsMWb-.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-D_lRsYFa.css">
</head>
<body>
......
......@@ -5,7 +5,7 @@
"private": true,
"scripts": {
"dev": "vite",
"build": "vite build",
"build": "vite build --mode prod",
"preview": "vite preview"
},
"dependencies": {
......
......@@ -159,6 +159,7 @@ export default {
// 爬取任务
triggerCrawl: () => request.post('/crawl/trigger'),
stopCrawl: () => request.post('/crawl/stop'),
getCrawlStatus: () => request.get('/crawl/status'),
getCrawlHistory: (params) => request.get('/crawl/history', { params }),
clearCrawlHistory: () => request.delete('/crawl/history'),
......
......@@ -10,6 +10,9 @@
<el-button type="primary" @click="handleTrigger" :loading="loading" :icon="Refresh">
立即爬取
</el-button>
<el-button type="danger" @click="handleStop" :loading="stopLoading" :disabled="!isRunning">
停止爬取
</el-button>
</div>
</div>
</template>
......@@ -222,6 +225,8 @@ import api from '../api'
import dayjs from 'dayjs'
const loading = ref(false)
const stopLoading = ref(false)
const isRunning = ref(false)
const crawlingSource = ref(null)
const sources = ref([])
const historyData = ref([])
......@@ -305,6 +310,7 @@ const fetchSources = async () => {
const fetchStatus = async () => {
try {
const res = await api.getCrawlStatus()
isRunning.value = res.data.isRunning
const tasks = res.data.tasks || []
taskStatuses.value = {}
tasks.forEach(task => {
......@@ -358,6 +364,23 @@ const handleTrigger = async () => {
}
} finally {
loading.value = false
fetchStatus()
}
}
const handleStop = async () => {
try {
await ElMessageBox.confirm('确定要停止当前爬取任务吗?', '警告', { type: 'warning' })
stopLoading.value = true
await api.stopCrawl()
ElMessage.success('已发送停止请求')
fetchStatus()
} catch (error) {
if (error !== 'cancel') {
ElMessage.error('停止失败')
}
} finally {
stopLoading.value = false
}
}
......@@ -540,6 +563,9 @@ onMounted(() => {
fetchSources()
fetchStatus()
fetchHistory()
setInterval(() => {
fetchStatus()
}, 3000)
})
</script>
......
......@@ -66,9 +66,9 @@
<el-table
:data="tableData"
v-loading="loading"
stripe
@selection-change="handleSelectionChange"
border
border="true"
resizable
>
<el-table-column type="selection" width="55" />
......@@ -79,7 +79,7 @@
</el-link>
</template>
</el-table-column>
<el-table-column prop="institutionName" label="机构名称" min-width="200" show-overflow-tooltip resizable />
<el-table-column prop="institutionName" label="机构名称" min-width="100" show-overflow-tooltip resizable />
<el-table-column prop="institutionType" label="机构类型" width="100" resizable>
<template #default="{ row }">
<el-tag size="small" :type="getTypeColor(row.institutionType)">{{ row.institutionType }}</el-tag>
......@@ -93,14 +93,15 @@
</template>
</el-table-column>
<el-table-column prop="penaltyDate" label="处罚日期" width="120" resizable />
<el-table-column prop="regulator" label="监管机构" width="120" resizable />
<el-table-column prop="regulator" label="监管机构" width="250" show-overflow-tooltip resizable />
<el-table-column prop="province" label="省份" width="130" resizable />
<el-table-column prop="isNew" label="状态" width="80" resizable>
<template #default="{ row }">
<el-tag v-if="row.isNew" type="danger" size="small"></el-tag>
<span v-else style="color: #909399">已读</span>
</template>
</el-table-column>
<el-table-column label="分析状态" width="100" resizable>
<el-table-column label="分析状态" width="120">
<template #default="{ row }">
<el-tag v-if="row.analysisStatus == 'pending'" size="small">待分析</el-tag>
<el-tag v-else-if="row.analysisStatus == 'analyzing'" type="warning" size="small">分析中</el-tag>
......@@ -109,7 +110,7 @@
<span v-else>-</span>
</template>
</el-table-column>
<el-table-column label="操作" width="250" fixed="right">
<el-table-column label="操作" width="230" fixed="right" align="center">
<template #default="{ row }">
<el-button type="primary" link @click="viewDetail(row)">详情</el-button>
<el-button type="primary" link @click="openReportDrawer(row)">报告</el-button>
......@@ -142,7 +143,7 @@
</el-card>
<!-- 详情弹窗 -->
<el-dialog v-model="detailVisible" title="处罚详情" width="825px" :fullscreen="detailFullscreen" show-fullscreen>
<el-dialog v-model="detailVisible" title="处罚详情" width="750px" :fullscreen="detailFullscreen" show-fullscreen>
<template #header>
<div class="detail-header">
<span>处罚详情</span>
......@@ -151,7 +152,7 @@
</el-button>
</div>
</template>
<el-descriptions v-if="currentRecord" :column="2" border>
<el-descriptions v-if="currentRecord" :column="2" border label-width="100px">
<el-descriptions-item label="处罚编号" :span="2">{{ currentRecord.penaltyNumber }}</el-descriptions-item>
<el-descriptions-item label="被处罚机构" :span="2">{{ currentRecord.institutionName }}</el-descriptions-item>
<el-descriptions-item label="机构类型">{{ currentRecord.institutionType }}</el-descriptions-item>
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论