提交 81927d5a authored 作者: kxjia's avatar kxjia

保存

上级 11c33355
......@@ -612,8 +612,11 @@ public class PenaltyCrawler {
// 解析HTML
Document document = Jsoup.parse(docClob);
// 查找所有表格
// 查找所有表格,保存第一个表格的HTML(通常是主要的处罚信息表)
Elements tables = document.select("table");
if (!tables.isEmpty()) {
info.originalTableHtml = tables.first().outerHtml();
}
for (Element table : tables) {
// 遍历表格行
......@@ -631,15 +634,20 @@ public class PenaltyCrawler {
info.institutionName = value;
// 提取当事人姓名/名称
info.personName = extractPersonNameFromParty(value);
} else if (label.contains("主要违法违规事实")) {
} else if (label.contains("主要违法违规")) {
info.illegalFacts = value;
} else if (label.contains("行政处罚依据")) {
info.penaltyBasis = value;
} else if (label.contains("行政处罚决定")) {
info.penaltyDecision = value;
info.penaltyContent = value;
// 从处罚决定中提取处罚类型和金额
info.penaltyType = extractPenaltyTypeFromDecision(value);
info.penaltyAmount = extractAmountFromDecision(value);
} else if (label.contains("行政处罚内容")) {
if (info.penaltyContent.isEmpty()) {
info.penaltyContent = value;
}
} else if (label.contains("作出处罚决定的机关名称")) {
info.regulator = value;
} else if (label.contains("作出处罚决定的日期")) {
......@@ -891,6 +899,7 @@ public class PenaltyCrawler {
String institutionName;
String illegalFacts;
String penaltyBasis;
String penaltyContent;
BigDecimal penaltyAmount;
String penaltyType;
String regulator;
......@@ -908,10 +917,12 @@ public class PenaltyCrawler {
}
illegalFacts = !tableInfo.illegalFacts.isEmpty() ? tableInfo.illegalFacts : extractIllegalFacts(docClob);
penaltyBasis = !tableInfo.penaltyBasis.isEmpty() ? tableInfo.penaltyBasis : extractPenaltyBasis(docClob);
penaltyContent = tableInfo.penaltyContent;
penaltyAmount = tableInfo.penaltyAmount != null ? tableInfo.penaltyAmount : extractPenaltyAmountImproved(docClob);
penaltyType = !tableInfo.penaltyType.isEmpty() ? tableInfo.penaltyType : guessPenaltyType(docTitle);
regulator = !tableInfo.regulator.isEmpty() ? tableInfo.regulator : "国家金融监督管理总局";
penaltyDate = !tableInfo.penaltyDateStr.isEmpty() ? parseDateSafely(tableInfo.penaltyDateStr) : parseDateSafely(publishDate);
penaltyDate = !tableInfo.penaltyDateStr.isEmpty() ? parseDateSafely(
tableInfo.penaltyDateStr) : parseDateSafely(publishDate);
log.debug("使用表格解析结果: 文号={}, 机构={}, 违法事实={}, 处罚依据={}, 处罚类型={}",
penaltyNumber, institutionName, illegalFacts, penaltyBasis, penaltyType);
......@@ -924,6 +935,7 @@ public class PenaltyCrawler {
}
illegalFacts = extractIllegalFacts(docClob);
penaltyBasis = extractPenaltyBasis(docClob);
penaltyContent = "";
penaltyAmount = extractPenaltyAmountImproved(docClob);
penaltyType = guessPenaltyType(docTitle);
regulator = "国家金融监督管理总局";
......@@ -944,6 +956,7 @@ public class PenaltyCrawler {
.province(province)
.illegalFacts(illegalFacts)
.penaltyBasis(penaltyBasis)
.penaltyContent(penaltyContent)
.sourceUrl(sourceUrl)
.createdAt(LocalDateTime.now())
.isNew(true)
......@@ -993,10 +1006,22 @@ public class PenaltyCrawler {
return LocalDate.now();
}
try {
String cleaned = dateStr.replace("年", "-").replace("月", "-").replace("日", "").trim();
String cleaned = dateStr.replace("年", "-").replace("月", "-").replace("日", "").replace("号", "").trim();
if (cleaned.length() > 10) {
cleaned = cleaned.substring(0, 10);
}
String[] parts = cleaned.split("-");
if (parts.length == 3) {
String month = parts[1];
String day = parts[2];
if (month.length() == 1) {
month = "0" + month;
}
if (day.length() == 1) {
day = "0" + day;
}
cleaned = parts[0] + "-" + month + "-" + day;
}
return LocalDate.parse(cleaned, DATE_FORMATTER);
} catch (DateTimeParseException e) {
log.debug("日期解析失败: {}", dateStr);
......@@ -1139,7 +1164,7 @@ public class PenaltyCrawler {
private String extractIllegalFacts(String content) {
if (content == null) return "";
Pattern pattern = Pattern.compile("主要违法违规事实[::]?\\s*(.+?)(?=处罚内容|作出处罚决定|$)", Pattern.DOTALL);
Pattern pattern = Pattern.compile("主要违法违规[事实行为][::]?\\s*(.+?)(?=处罚内容|作出处罚决定|$)", Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
String facts = matcher.group(1).trim();
......@@ -1222,6 +1247,7 @@ public class PenaltyCrawler {
.regulator("国家金融监督管理总局")
.province(extractProvince(extractInstitutionName(title), ""))
.illegalFacts(title)
.penaltyContent("")
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
.createdAt(LocalDateTime.now())
.isNew(true)
......@@ -1280,6 +1306,7 @@ public class PenaltyCrawler {
.regulator(regulator)
.province(extractProvince(extractInstitutionName(title), ""))
.illegalFacts(title)
.penaltyContent("")
.sourceUrl(buildFullUrl(source.getBaseUrl(), href))
.createdAt(LocalDateTime.now())
.isNew(true)
......@@ -1325,9 +1352,11 @@ public class PenaltyCrawler {
String illegalFacts = "";
String penaltyBasis = "";
String penaltyDecision = "";
String penaltyContent = "";
String penaltyType = "";
String regulator = "";
String penaltyDateStr = "";
BigDecimal penaltyAmount = null;
String originalTableHtml = "";
}
}
\ No newline at end of file
......@@ -29,6 +29,8 @@ public class PenaltyRecordDTO {
private String province;
private String illegalFacts;
private String penaltyBasis;
private String penaltyContent;
private String originalTableHtml;
private String sourceUrl;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
......
......@@ -94,6 +94,18 @@ public class PenaltyRecord {
@Column(name = "penalty_basis", columnDefinition = "TEXT")
private String penaltyBasis;
/**
* 行政处罚内容
*/
@Column(name = "penalty_content", columnDefinition = "TEXT")
private String penaltyContent;
/**
* 原始表格HTML(用于保存完整的处罚信息表格结构)
*/
@Column(name = "original_table_html", columnDefinition = "TEXT")
private String originalTableHtml;
/**
* 来源URL
*/
......
server:
port: 8082
port: 8080
servlet:
context-path: /api
......@@ -10,7 +10,7 @@ spring:
datasource:
url: jdbc:mysql://localhost:3306/penalty_monitor?useUnicode=true&characterEncoding=utf8&serverTimezone=Asia/Shanghai&useSSL=false
username: root
password: ZhongRunChangHong/123
password: 123456
driver-class-name: com.mysql.cj.jdbc.Driver
hikari:
minimum-idle: 5
......
......@@ -32,6 +32,8 @@ CREATE TABLE IF NOT EXISTS penalty_records (
regulator VARCHAR(100) COMMENT '监管机构',
illegal_facts TEXT COMMENT '违法事实',
penalty_basis TEXT COMMENT '处罚依据',
penalty_content TEXT COMMENT '行政处罚内容',
original_table_html TEXT COMMENT '原始表格HTML',
source_url VARCHAR(500) COMMENT '来源URL',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间',
......
.dashboard[data-v-dd3ecc0b]{padding:20px}.stat-row[data-v-dd3ecc0b],.status-row[data-v-dd3ecc0b],.chart-row[data-v-dd3ecc0b],.recent-row[data-v-dd3ecc0b]{margin-bottom:20px}.stat-card[data-v-dd3ecc0b]{position:relative;overflow:hidden}.card-header[data-v-dd3ecc0b]{display:flex;justify-content:space-between;align-items:center}.source-status[data-v-dd3ecc0b]{text-align:center;padding:10px}.source-status .source-name[data-v-dd3ecc0b]{font-weight:600;margin-bottom:8px}.source-status .source-time[data-v-dd3ecc0b]{font-size:12px;color:#909399;margin-top:8px}.detail-content .detail-item[data-v-dd3ecc0b]{margin-bottom:16px}.detail-content .detail-item .detail-label[data-v-dd3ecc0b]{font-weight:600;margin-bottom:8px}.detail-content .detail-item .detail-text[data-v-dd3ecc0b]{line-height:1.8;color:#606266}
......@@ -5,7 +5,7 @@
<link rel="icon" href="/favicon.ico">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>金融监管处罚监控系统</title>
<script type="module" crossorigin src="/assets/index-D0VsMWb-.js"></script>
<script type="module" crossorigin src="/assets/index-CszAQfuQ.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-D_lRsYFa.css">
</head>
<body>
......
......@@ -138,6 +138,10 @@
<div class="detail-label">违法事实</div>
<div class="detail-text">{{ currentRecord.illegalFacts || '暂无' }}</div>
</div>
<div class="detail-item">
<div class="detail-label">行政处罚内容</div>
<div class="detail-text">{{ currentRecord.penaltyContent || '暂无' }}</div>
</div>
<div class="detail-item">
<div class="detail-label">处罚依据</div>
<div class="detail-text">{{ currentRecord.penaltyBasis || '暂无' }}</div>
......
......@@ -165,6 +165,9 @@
<el-descriptions-item label="违法事实" :span="2">
<div style="white-space: pre-wrap">{{ currentRecord.illegalFacts || '暂无' }}</div>
</el-descriptions-item>
<el-descriptions-item label="行政处罚内容" :span="2">
<div style="white-space: pre-wrap">{{ currentRecord.penaltyContent || '暂无' }}</div>
</el-descriptions-item>
<el-descriptions-item label="处罚依据" :span="2">
<div style="white-space: pre-wrap">{{ currentRecord.penaltyBasis || '暂无' }}</div>
</el-descriptions-item>
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论