提交 2a2daa29 authored 作者: kxjia's avatar kxjia

完善代码

上级 52ab6995
...@@ -79,9 +79,9 @@ public class CrawlController { ...@@ -79,9 +79,9 @@ public class CrawlController {
/** /**
* 获取数据源列表 * 获取数据源列表
*/ */
@GetMapping("/sources") @GetMapping("/sources")
public ResponseEntity<ApiResponse<List<CrawlSource>>> getSources() { public ResponseEntity<ApiResponse<List<CrawlSource>>> getSources() {
List<CrawlSource> sources = crawlTaskService.getEnabledSources(); List<CrawlSource> sources = crawlTaskService.getAllSources();
return ResponseEntity.ok(ApiResponse.success(sources)); return ResponseEntity.ok(ApiResponse.success(sources));
} }
...@@ -91,7 +91,6 @@ public class CrawlController { ...@@ -91,7 +91,6 @@ public class CrawlController {
@GetMapping("/task/{id}") @GetMapping("/task/{id}")
public ResponseEntity<ApiResponse<CrawlTaskDTO>> getTaskById(@PathVariable Long id) { public ResponseEntity<ApiResponse<CrawlTaskDTO>> getTaskById(@PathVariable Long id) {
CrawlTaskDTO task = crawlTaskService.findById(id); CrawlTaskDTO task = crawlTaskService.findById(id);
if (task == null) { if (task == null) {
return ResponseEntity.ok(ApiResponse.error(404, "任务不存在")); return ResponseEntity.ok(ApiResponse.error(404, "任务不存在"));
} }
...@@ -135,6 +134,15 @@ public class CrawlController { ...@@ -135,6 +134,15 @@ public class CrawlController {
crawlTaskService.deleteSource(id); crawlTaskService.deleteSource(id);
return ResponseEntity.ok(ApiResponse.success("删除成功", null)); return ResponseEntity.ok(ApiResponse.success("删除成功", null));
} }
/**
* 清除爬取历史
*/
@DeleteMapping("/history")
public ResponseEntity<ApiResponse<Void>> clearHistory() {
crawlTaskService.clearHistory();
return ResponseEntity.ok(ApiResponse.success("清除成功", null));
}
/** /**
* 爬取单个数据源 * 爬取单个数据源
......
...@@ -45,11 +45,9 @@ public class PenaltyController { ...@@ -45,11 +45,9 @@ public class PenaltyController {
public ResponseEntity<ApiResponse<PenaltyRecordDTO>> findById(@PathVariable Long id) { public ResponseEntity<ApiResponse<PenaltyRecordDTO>> findById(@PathVariable Long id) {
log.debug("查询处罚记录详情: id={}", id); log.debug("查询处罚记录详情: id={}", id);
PenaltyRecordDTO record = penaltyRecordService.findById(id); PenaltyRecordDTO record = penaltyRecordService.findById(id);
if (record == null) { if (record == null) {
return ResponseEntity.ok(ApiResponse.error(404, "记录不存在")); return ResponseEntity.ok(ApiResponse.error(404, "记录不存在"));
} }
return ResponseEntity.ok(ApiResponse.success(record)); return ResponseEntity.ok(ApiResponse.success(record));
} }
...@@ -146,4 +144,18 @@ public class PenaltyController { ...@@ -146,4 +144,18 @@ public class PenaltyController {
String report = penaltyRecordService.getAnalysisReport(id); String report = penaltyRecordService.getAnalysisReport(id);
return ResponseEntity.ok(ApiResponse.success(report)); return ResponseEntity.ok(ApiResponse.success(report));
} }
/**
* AI自然语言搜索
*/
@PostMapping("/ai-search")
public ResponseEntity<ApiResponse<PageResponse<PenaltyRecordDTO>>> aiSearch(
@RequestBody AISearchRequest request,
@RequestParam(defaultValue = "0") int page,
@RequestParam(defaultValue = "10") int size) {
log.debug("AI搜索: query={}, page={}, size={}", request.getQuery(), page, size);
PageResponse<PenaltyRecordDTO> result = penaltyRecordService.aiSearch(request.getQuery(), page, size);
return ResponseEntity.ok(ApiResponse.success(result));
}
} }
package com.fintech.penalty.dto;
import lombok.Data;
@Data
public class AISearchRequest {
private String query;
}
\ No newline at end of file
...@@ -17,6 +17,7 @@ import java.time.LocalDateTime; ...@@ -17,6 +17,7 @@ import java.time.LocalDateTime;
public class CrawlTaskDTO { public class CrawlTaskDTO {
private Long id; private Long id;
private Long sourceId;
private String taskName; private String taskName;
private String sourceName; private String sourceName;
private String sourceUrl; private String sourceUrl;
......
...@@ -5,6 +5,7 @@ import lombok.Builder; ...@@ -5,6 +5,7 @@ import lombok.Builder;
import lombok.Data; import lombok.Data;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import java.math.BigDecimal;
import java.time.LocalDate; import java.time.LocalDate;
/** /**
...@@ -24,4 +25,6 @@ public class SearchCriteria { ...@@ -24,4 +25,6 @@ public class SearchCriteria {
private LocalDate endDate; private LocalDate endDate;
private Boolean isNew; private Boolean isNew;
private String keyword; private String keyword;
private BigDecimal minAmount;
private BigDecimal maxAmount;
} }
package com.fintech.penalty.entity;
import jakarta.persistence.*;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.time.LocalDateTime;
/**
* 爬取进度记录 - 用于断点续传
*/
@Entity
@Table(name = "crawl_progress",
uniqueConstraints = {@UniqueConstraint(columnNames = {"source_name", "task_id"})})
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class CrawlProgress {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Column(name = "source_name", nullable = false, length = 100)
private String sourceName; // 数据源名称,如 "NFRA"
@Column(name = "task_id", length = 50)
private String taskId; // 任务ID,用于区分不同爬取任务
@Column(name = "last_page_index", nullable = false)
private Integer lastPageIndex; // 最后完成的页码
@Column(name = "total_pages")
private Integer totalPages; // 总页数
@Column(name = "total_records_crawled")
private Integer totalRecordsCrawled; // 已爬取记录数
@Column(name = "status", length = 20)
private String status; // RUNNING, COMPLETED, FAILED
@Column(name = "start_time")
private LocalDateTime startTime; // 开始时间
@Column(name = "update_time")
private LocalDateTime updateTime; // 更新时间
}
\ No newline at end of file
package com.fintech.penalty.repository;
import com.fintech.penalty.entity.CrawlProgress;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Modifying;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
import java.util.Optional;
@Repository
public interface CrawlProgressRepository extends JpaRepository<CrawlProgress, Long> {
Optional<CrawlProgress> findBySourceNameAndTaskId(String sourceName, String taskId);
Optional<CrawlProgress> findBySourceNameAndStatus(String sourceName, String status);
@Modifying
@Transactional
@Query("UPDATE CrawlProgress cp SET cp.lastPageIndex = :pageIndex, cp.updateTime = :updateTime WHERE cp.sourceName = :sourceName AND cp.taskId = :taskId")
void updateProgress(@Param("sourceName") String sourceName,
@Param("taskId") String taskId,
@Param("pageIndex") Integer pageIndex,
@Param("updateTime") LocalDateTime updateTime);
@Modifying
@Transactional
@Query("UPDATE CrawlProgress cp SET cp.totalPages = :totalPages, cp.updateTime = :updateTime WHERE cp.sourceName = :sourceName AND cp.taskId = :taskId")
void updateTotalPages(@Param("sourceName") String sourceName,
@Param("taskId") String taskId,
@Param("totalPages") Integer totalPages,
@Param("updateTime") LocalDateTime updateTime);
@Modifying
@Transactional
@Query("UPDATE CrawlProgress cp SET cp.totalRecordsCrawled = :totalRecords, cp.updateTime = :updateTime WHERE cp.sourceName = :sourceName AND cp.taskId = :taskId")
void updateTotalRecords(@Param("sourceName") String sourceName,
@Param("taskId") String taskId,
@Param("totalRecords") Integer totalRecords,
@Param("updateTime") LocalDateTime updateTime);
@Modifying
@Transactional
@Query("UPDATE CrawlProgress cp SET cp.status = :status, cp.updateTime = :updateTime WHERE cp.sourceName = :sourceName AND cp.taskId = :taskId")
void updateStatus(@Param("sourceName") String sourceName,
@Param("taskId") String taskId,
@Param("status") String status,
@Param("updateTime") LocalDateTime updateTime);
}
\ No newline at end of file
...@@ -21,4 +21,9 @@ public interface CrawlSourceRepository extends JpaRepository<CrawlSource, Long> ...@@ -21,4 +21,9 @@ public interface CrawlSourceRepository extends JpaRepository<CrawlSource, Long>
* 根据来源类型查询 * 根据来源类型查询
*/ */
List<CrawlSource> findBySourceType(String sourceType); List<CrawlSource> findBySourceType(String sourceType);
/**
* 查询所有数据源(按排序)
*/
List<CrawlSource> findAllByOrderBySortOrder();
} }
...@@ -25,6 +25,11 @@ public interface CrawlTaskRepository extends JpaRepository<CrawlTask, Long> { ...@@ -25,6 +25,11 @@ public interface CrawlTaskRepository extends JpaRepository<CrawlTask, Long> {
*/ */
Optional<CrawlTask> findTopBySourceNameOrderByLastCrawlTimeDesc(String sourceName); Optional<CrawlTask> findTopBySourceNameOrderByLastCrawlTimeDesc(String sourceName);
/**
* 根据来源名称查询所有任务(按时间倒序)
*/
List<CrawlTask> findBySourceNameOrderByLastCrawlTimeDesc(String sourceName);
/** /**
* 分页查询任务历史 * 分页查询任务历史
*/ */
...@@ -34,4 +39,9 @@ public interface CrawlTaskRepository extends JpaRepository<CrawlTask, Long> { ...@@ -34,4 +39,9 @@ public interface CrawlTaskRepository extends JpaRepository<CrawlTask, Long> {
* 查询正在运行的任务 * 查询正在运行的任务
*/ */
List<CrawlTask> findByStatusIn(List<String> statuses); List<CrawlTask> findByStatusIn(List<String> statuses);
/**
* 删除所有任务记录
*/
void deleteAll();
} }
...@@ -4,6 +4,7 @@ import com.fintech.penalty.config.CrawlerConfig; ...@@ -4,6 +4,7 @@ import com.fintech.penalty.config.CrawlerConfig;
import com.fintech.penalty.crawler.PenaltyCrawler; import com.fintech.penalty.crawler.PenaltyCrawler;
import com.fintech.penalty.dto.CrawlResult; import com.fintech.penalty.dto.CrawlResult;
import com.fintech.penalty.entity.CrawlSource; import com.fintech.penalty.entity.CrawlSource;
import com.fintech.penalty.service.CrawlTaskService;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled; import org.springframework.scheduling.annotation.Scheduled;
...@@ -23,6 +24,7 @@ public class CrawlScheduler { ...@@ -23,6 +24,7 @@ public class CrawlScheduler {
private final PenaltyCrawler penaltyCrawler; private final PenaltyCrawler penaltyCrawler;
private final CrawlerConfig crawlerConfig; private final CrawlerConfig crawlerConfig;
private final CrawlTaskService crawlTaskService;
private volatile boolean isRunning = false; private volatile boolean isRunning = false;
...@@ -69,13 +71,11 @@ public class CrawlScheduler { ...@@ -69,13 +71,11 @@ public class CrawlScheduler {
log.info("============ 开始爬取单个数据源: {} ============", source.getSourceName()); log.info("============ 开始爬取单个数据源: {} ============", source.getSourceName());
CrawlerConfig.SourceConfig config = new CrawlerConfig.SourceConfig(); CrawlerConfig.SourceConfig config = toSourceConfig(source);
config.setName(source.getSourceName());
config.setBaseUrl(source.getSourceUrl());
config.setEnabled(true);
try { try {
isRunning = true; isRunning = true;
crawlTaskService.createTask(source.getSourceName(), config.getListUrl());
penaltyCrawler.crawl(config).get(); penaltyCrawler.crawl(config).get();
log.info("============ 单个数据源爬取完成: {} ============", source.getSourceName()); log.info("============ 单个数据源爬取完成: {} ============", source.getSourceName());
} catch (Exception e) { } catch (Exception e) {
...@@ -85,6 +85,19 @@ public class CrawlScheduler { ...@@ -85,6 +85,19 @@ public class CrawlScheduler {
} }
} }
/**
* 将数据库实体转换为爬虫配置
*/
private CrawlerConfig.SourceConfig toSourceConfig(CrawlSource source) {
CrawlerConfig.SourceConfig config = new CrawlerConfig.SourceConfig();
config.setName(source.getSourceName());
config.setCode(source.getSourceType());
config.setBaseUrl(source.getSourceUrl());
config.setListUrl(source.getSourceUrl() + (source.getListUrlPattern() != null ? source.getListUrlPattern() : ""));
config.setEnabled(source.getIsEnabled());
return config;
}
/** /**
* 执行爬取 * 执行爬取
*/ */
...@@ -93,12 +106,16 @@ public class CrawlScheduler { ...@@ -93,12 +106,16 @@ public class CrawlScheduler {
List<CrawlResult> results = new ArrayList<>(); List<CrawlResult> results = new ArrayList<>();
try { try {
List<CrawlerConfig.SourceConfig> sources = crawlerConfig.getSources(); List<CrawlSource> dbSources = crawlTaskService.getEnabledSources();
if (sources == null || sources.isEmpty()) { if (dbSources == null || dbSources.isEmpty()) {
log.warn("没有配置爬取数据源"); log.warn("数据库中没有配置爬取数据源");
return results; return results;
} }
List<CrawlerConfig.SourceConfig> sources = dbSources.stream()
.map(this::toSourceConfig)
.toList();
// 并行爬取各数据源 // 并行爬取各数据源
List<CompletableFuture<CrawlResult>> futures = sources.stream() List<CompletableFuture<CrawlResult>> futures = sources.stream()
.filter(CrawlerConfig.SourceConfig::isEnabled) .filter(CrawlerConfig.SourceConfig::isEnabled)
......
...@@ -11,6 +11,7 @@ import com.fintech.penalty.repository.CrawlSourceRepository; ...@@ -11,6 +11,7 @@ import com.fintech.penalty.repository.CrawlSourceRepository;
import com.fintech.penalty.repository.CrawlTaskRepository; import com.fintech.penalty.repository.CrawlTaskRepository;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.domain.Page; import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest; import org.springframework.data.domain.PageRequest;
import org.springframework.http.HttpEntity; import org.springframework.http.HttpEntity;
...@@ -43,6 +44,15 @@ public class CrawlTaskService { ...@@ -43,6 +44,15 @@ public class CrawlTaskService {
private final CrawlTaskRepository crawlTaskRepository; private final CrawlTaskRepository crawlTaskRepository;
private final CrawlSourceRepository crawlSourceRepository; private final CrawlSourceRepository crawlSourceRepository;
@Value("${deepseek.api.url:https://api.deepseek.com}")
private String deepseekApiUrl;
@Value("${deepseek.api.key:}")
private String deepseekApiKey;
@Value("${deepseek.model:deepseek-chat}")
private String deepseekModel;
/** /**
* 获取爬取任务列表 * 获取爬取任务列表
*/ */
...@@ -59,7 +69,14 @@ public class CrawlTaskService { ...@@ -59,7 +69,14 @@ public class CrawlTaskService {
.hasPrevious(pageResult.hasPrevious()) .hasPrevious(pageResult.hasPrevious())
.build(); .build();
} }
/**
* 清除所有历史记录
*/
public void clearHistory() {
crawlTaskRepository.deleteAll();
}
/** /**
* 根据ID查询数据源 * 根据ID查询数据源
*/ */
...@@ -82,6 +99,17 @@ public class CrawlTaskService { ...@@ -82,6 +99,17 @@ public class CrawlTaskService {
public List<CrawlSource> getEnabledSources() { public List<CrawlSource> getEnabledSources() {
return crawlSourceRepository.findByIsEnabledTrueOrderBySortOrder(); return crawlSourceRepository.findByIsEnabledTrueOrderBySortOrder();
} }
/**
* 获取所有数据源
*/
public List<CrawlSource> getAllSources() {
return crawlSourceRepository.findAllByOrderBySortOrder();
}
public List<CrawlSource> getAllSources() {
return crawlSourceRepository.findAll();
}
/** /**
* 获取最新任务状态 * 获取最新任务状态
...@@ -90,8 +118,9 @@ public class CrawlTaskService { ...@@ -90,8 +118,9 @@ public class CrawlTaskService {
List<CrawlSource> sources = getEnabledSources(); List<CrawlSource> sources = getEnabledSources();
return sources.stream() return sources.stream()
.map(source -> crawlTaskRepository.findTopBySourceNameOrderByLastCrawlTimeDesc(source.getSourceName()) .map(source -> crawlTaskRepository.findTopBySourceNameOrderByLastCrawlTimeDesc(source.getSourceName())
.map(this::toDTO) .map(task -> toDTO(task, source.getId()))
.orElse(CrawlTaskDTO.builder() .orElse(CrawlTaskDTO.builder()
.sourceId(source.getId())
.sourceName(source.getSourceName()) .sourceName(source.getSourceName())
.status("pending") .status("pending")
.build())) .build()))
...@@ -132,19 +161,29 @@ public class CrawlTaskService { ...@@ -132,19 +161,29 @@ public class CrawlTaskService {
*/ */
@Transactional @Transactional
public void saveCrawlResult(CrawlResult result) { public void saveCrawlResult(CrawlResult result) {
List<CrawlTask> existingTasks = crawlTaskRepository.findBySourceNameOrderByLastCrawlTimeDesc(result.getSourceName());
Optional<CrawlTask> taskOpt = crawlTaskRepository.findTopBySourceNameOrderByLastCrawlTimeDesc(result.getSourceName())
.stream()
.findFirst();
taskOpt.ifPresent(task -> { if (!existingTasks.isEmpty()) {
CrawlTask task = existingTasks.get(0);
task.setStatus(result.isSuccess() ? "success" : "failed"); task.setStatus(result.isSuccess() ? "success" : "failed");
task.setLastCrawlTime(LocalDateTime.now()); task.setLastCrawlTime(LocalDateTime.now());
task.setRecordsFound(result.getTotalFound()); task.setRecordsFound(result.getTotalFound());
task.setRecordsNew(result.getNewRecords()); task.setRecordsNew(result.getNewRecords());
task.setErrorMessage(result.getErrorMessage()); task.setErrorMessage(result.getErrorMessage());
crawlTaskRepository.save(task); crawlTaskRepository.save(task);
}); } else {
CrawlTask task = CrawlTask.builder()
.taskName(result.getSourceName() + " 爬取任务")
.sourceName(result.getSourceName())
.sourceUrl("")
.status(result.isSuccess() ? "success" : "failed")
.lastCrawlTime(LocalDateTime.now())
.recordsFound(result.getTotalFound())
.recordsNew(result.getNewRecords())
.errorMessage(result.getErrorMessage())
.build();
crawlTaskRepository.save(task);
}
} }
/** /**
...@@ -160,6 +199,11 @@ public class CrawlTaskService { ...@@ -160,6 +199,11 @@ public class CrawlTaskService {
public List<Map<String, String>> searchSourcesByKeyword(String keyword) { public List<Map<String, String>> searchSourcesByKeyword(String keyword) {
List<Map<String, String>> results = new ArrayList<>(); List<Map<String, String>> results = new ArrayList<>();
if (deepseekApiKey == null || deepseekApiKey.isEmpty()) {
log.warn("DeepSeek API 密钥未配置,使用默认数据源");
return getDefaultSources();
}
String searchPrompt = String.format("请搜索关于%s的金融监管处罚信息数据源URL,只返回URL和简要描述,不要其他内容。返回格式:名称|URL|描述,每行一个", keyword); String searchPrompt = String.format("请搜索关于%s的金融监管处罚信息数据源URL,只返回URL和简要描述,不要其他内容。返回格式:名称|URL|描述,每行一个", keyword);
try { try {
...@@ -169,22 +213,20 @@ public class CrawlTaskService { ...@@ -169,22 +213,20 @@ public class CrawlTaskService {
message.put("content", searchPrompt); message.put("content", searchPrompt);
Map<String, Object> requestBody = new HashMap<>(); Map<String, Object> requestBody = new HashMap<>();
requestBody.put("model", "deepseek-chat"); requestBody.put("model", deepseekModel);
requestBody.put("messages", new Object[]{message}); requestBody.put("messages", new Object[]{message});
requestBody.put("temperature", 0.7); requestBody.put("temperature", 0.7);
requestBody.put("max_tokens", 1000); requestBody.put("max_tokens", 1000);
HttpHeaders headers = new HttpHeaders(); HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON); headers.setContentType(MediaType.APPLICATION_JSON);
headers.setBearerAuth(deepseekApiKey);
String apiKey = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
headers.setBearerAuth(apiKey);
HttpEntity<Map<String, Object>> request = new HttpEntity<>(requestBody, headers); HttpEntity<Map<String, Object>> request = new HttpEntity<>(requestBody, headers);
RestTemplate restTemplate = new RestTemplate(); RestTemplate restTemplate = new RestTemplate();
ResponseEntity<String> response = restTemplate.exchange( ResponseEntity<String> response = restTemplate.exchange(
"https://api.deepseek.com/v1/chat/completions", deepseekApiUrl + "/v1/chat/completions",
HttpMethod.POST, HttpMethod.POST,
request, request,
String.class String.class
...@@ -216,14 +258,23 @@ public class CrawlTaskService { ...@@ -216,14 +258,23 @@ public class CrawlTaskService {
} }
if (results.isEmpty()) { if (results.isEmpty()) {
results.add(Map.of("name", "中国人民银行官网", "url", "http://www.pbc.gov.cn", "description", "中国人民银行官方网站")); results = getDefaultSources();
results.add(Map.of("name", "银保监会官网", "url", "http://www.cbirc.gov.cn", "description", "中国银行保险监督管理委员会"));
results.add(Map.of("name", "证监会官网", "url", "http://www.csrc.gov.cn", "description", "中国证券监督管理委员会"));
} }
return results; return results;
} }
/**
* 获取默认数据源列表
*/
private List<Map<String, String>> getDefaultSources() {
List<Map<String, String>> defaults = new ArrayList<>();
defaults.add(Map.of("name", "中国人民银行官网", "url", "http://www.pbc.gov.cn", "description", "中国人民银行官方网站"));
defaults.add(Map.of("name", "银保监会官网", "url", "http://www.cbirc.gov.cn", "description", "中国银行保险监督管理委员会"));
defaults.add(Map.of("name", "证监会官网", "url", "http://www.csrc.gov.cn", "description", "中国证券监督管理委员会"));
return defaults;
}
/** /**
* 创建数据源 * 创建数据源
*/ */
...@@ -252,6 +303,27 @@ public class CrawlTaskService { ...@@ -252,6 +303,27 @@ public class CrawlTaskService {
if (source.getSourceType() != null) { if (source.getSourceType() != null) {
existing.setSourceType(source.getSourceType()); existing.setSourceType(source.getSourceType());
} }
if (source.getListUrlPattern() != null) {
existing.setListUrlPattern(source.getListUrlPattern());
}
if (source.getDetailUrlPattern() != null) {
existing.setDetailUrlPattern(source.getDetailUrlPattern());
}
if (source.getListSelector() != null) {
existing.setListSelector(source.getListSelector());
}
if (source.getTitleSelector() != null) {
existing.setTitleSelector(source.getTitleSelector());
}
if (source.getDateSelector() != null) {
existing.setDateSelector(source.getDateSelector());
}
if (source.getContentSelector() != null) {
existing.setContentSelector(source.getContentSelector());
}
if (source.getSortOrder() != null) {
existing.setSortOrder(source.getSortOrder());
}
if (source.getIsEnabled() != null) { if (source.getIsEnabled() != null) {
existing.setIsEnabled(source.getIsEnabled()); existing.setIsEnabled(source.getIsEnabled());
} }
...@@ -271,8 +343,13 @@ public class CrawlTaskService { ...@@ -271,8 +343,13 @@ public class CrawlTaskService {
* 转换为DTO * 转换为DTO
*/ */
private CrawlTaskDTO toDTO(CrawlTask entity) { private CrawlTaskDTO toDTO(CrawlTask entity) {
return toDTO(entity, null);
}
private CrawlTaskDTO toDTO(CrawlTask entity, Long sourceId) {
return CrawlTaskDTO.builder() return CrawlTaskDTO.builder()
.id(entity.getId()) .id(entity.getId())
.sourceId(sourceId)
.taskName(entity.getTaskName()) .taskName(entity.getTaskName())
.sourceName(entity.getSourceName()) .sourceName(entity.getSourceName())
.sourceUrl(entity.getSourceUrl()) .sourceUrl(entity.getSourceUrl())
......
...@@ -97,6 +97,12 @@ public class PenaltyRecordService { ...@@ -97,6 +97,12 @@ public class PenaltyRecordService {
cb.like(root.get("penaltyBasis"), keyword) cb.like(root.get("penaltyBasis"), keyword)
)); ));
} }
if (criteria.getMinAmount() != null) {
predicates.add(cb.ge(root.get("penaltyAmount"), criteria.getMinAmount()));
}
if (criteria.getMaxAmount() != null) {
predicates.add(cb.le(root.get("penaltyAmount"), criteria.getMaxAmount()));
}
query.orderBy(cb.desc(root.get("penaltyDate"))); query.orderBy(cb.desc(root.get("penaltyDate")));
return cb.and(predicates.toArray(new Predicate[0])); return cb.and(predicates.toArray(new Predicate[0]));
...@@ -293,4 +299,86 @@ public class PenaltyRecordService { ...@@ -293,4 +299,86 @@ public class PenaltyRecordService {
} }
return record.getAnalysisReport(); return record.getAnalysisReport();
} }
/**
* AI自然语言搜索 - 将自然语言转换为搜索条件
*/
public PageResponse<PenaltyRecordDTO> aiSearch(String query, int page, int size) {
SearchCriteria criteria = parseNaturalLanguage(query);
log.info("AI搜索解析结果: {}", criteria);
return search(criteria, page, size);
}
/**
* 解析自然语言搜索条件
*/
private SearchCriteria parseNaturalLanguage(String query) {
SearchCriteria criteria = new SearchCriteria();
String lowerQuery = query.toLowerCase();
if (lowerQuery.contains("银行")) {
criteria.setInstitutionType("银行");
} else if (lowerQuery.contains("保险")) {
criteria.setInstitutionType("保险");
} else if (lowerQuery.contains("证券")) {
criteria.setInstitutionType("证券");
} else if (lowerQuery.contains("基金")) {
criteria.setInstitutionType("基金");
} else if (lowerQuery.contains("期货")) {
criteria.setInstitutionType("期货");
}
if (lowerQuery.contains("罚款")) {
criteria.setPenaltyType("罚款");
} else if (lowerQuery.contains("警告")) {
criteria.setPenaltyType("警告");
} else if (lowerQuery.contains("没收")) {
criteria.setPenaltyType("没收违法所得");
} else if (lowerQuery.contains("停业")) {
criteria.setPenaltyType("停业");
} else if (lowerQuery.contains("吊销")) {
criteria.setPenaltyType("吊销许可证");
} else if (lowerQuery.contains("市场禁入")) {
criteria.setPenaltyType("市场禁入");
}
if (lowerQuery.contains("人行") || lowerQuery.contains("人民银行")) {
criteria.setRegulator("中国人民银行");
} else if (lowerQuery.contains("银保监会") || lowerQuery.contains("银保监")) {
criteria.setRegulator("中国银保监会");
} else if (lowerQuery.contains("证监会") || lowerQuery.contains("证券会")) {
criteria.setRegulator("中国证监会");
} else if (lowerQuery.contains("外汇")) {
criteria.setRegulator("国家外汇管理局");
}
if (lowerQuery.contains("2024")) {
criteria.setStartDate(LocalDate.of(2024, 1, 1));
criteria.setEndDate(LocalDate.of(2024, 12, 31));
} else if (lowerQuery.contains("2025")) {
criteria.setStartDate(LocalDate.of(2025, 1, 1));
criteria.setEndDate(LocalDate.of(2025, 12, 31));
} else if (lowerQuery.contains("2023")) {
criteria.setStartDate(LocalDate.of(2023, 1, 1));
criteria.setEndDate(LocalDate.of(2023, 12, 31));
}
java.util.regex.Pattern amountPattern = java.util.regex.Pattern.compile("(\\d+)\\s*[亿万]");
java.util.regex.Matcher matcher = amountPattern.matcher(query);
if (matcher.find()) {
long amount = Long.parseLong(matcher.group(1));
if (query.contains("亿")) {
criteria.setMinAmount(BigDecimal.valueOf(amount * 100000000));
} else if (query.contains("万")) {
criteria.setMinAmount(BigDecimal.valueOf(amount * 10000));
}
}
if (criteria.getInstitutionType() == null && criteria.getPenaltyType() == null
&& criteria.getRegulator() == null && criteria.getStartDate() == null) {
criteria.setKeyword(query);
}
return criteria;
}
} }
...@@ -37,34 +37,16 @@ spring: ...@@ -37,34 +37,16 @@ spring:
serialization: serialization:
write-dates-as-timestamps: false write-dates-as-timestamps: false
# 爬虫配置 # 爬虫配置(数据源从数据库 crawl_sources 表读取)
crawler: crawler:
enabled: true enabled: true
cron: "0 0 2 * * ?" cron: "0 0 2 * * ?"
interval: 2000 interval: 2000
max-retries: 3 max-retries: 3
timeout: 3000000 timeout: 3000000
sources: resume:
- name: 中国人民银行 enabled: true # 是否启用断点续传
code: pbc force-restart: false # 是否强制重新开始(设为true会忽略之前的进度)
base-url: https://www.pbc.gov.cn
list-url: https://www.pbc.gov.cn/zhenghuihuizhan/135153/index.html
enabled: true
- name: 中国银保监会
code: cbirc
base-url: https://www.cbirc.gov.cn
list-url: https://www.cbirc.gov.cn/cn/view/pages/Column.html?colIdId=11283&channelId=11283
enabled: true
- name: 中国证监会
code: csrc
base-url: https://www.csrc.gov.cn
list-url: https://www.csrc.gov.cn/csrc/c100103/index.html
enabled: true
- name: 国家外汇管理局
code: safe
base-url: https://www.safe.gov.cn
list-url: https://www.safe.gov.cn/safe/yjcf/index.html
enabled: true
deepseek: deepseek:
api: api:
...@@ -86,3 +68,8 @@ logging: ...@@ -86,3 +68,8 @@ logging:
org.hibernate.SQL: DEBUG org.hibernate.SQL: DEBUG
pattern: pattern:
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n" console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n"
file: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n"
file:
name: logs/penalty-system.log # 日志文件路径和名称
max-size: 10MB # 单个文件最大大小
max-history: 30 # 保留30天的历史日志
\ No newline at end of file
...@@ -37,34 +37,16 @@ spring: ...@@ -37,34 +37,16 @@ spring:
serialization: serialization:
write-dates-as-timestamps: false write-dates-as-timestamps: false
# 爬虫配置 # 爬虫配置(数据源从数据库 crawl_sources 表读取)
crawler: crawler:
enabled: true enabled: true
cron: "0 0 2 * * ?" cron: "0 0 2 * * ?"
interval: 2000 interval: 2000
max-retries: 3 max-retries: 3
timeout: 3000000 timeout: 3000000
sources: resume:
- name: 中国人民银行 enabled: true # 是否启用断点续传
code: pbc force-restart: false # 是否强制重新开始(设为true会忽略之前的进度)
base-url: https://www.pbc.gov.cn
list-url: https://www.pbc.gov.cn/zhenghuihuizhan/135153/index.html
enabled: true
- name: 中国银保监会
code: cbirc
base-url: https://www.cbirc.gov.cn
list-url: https://www.cbirc.gov.cn/cn/view/pages/Column.html?colIdId=11283&channelId=11283
enabled: true
- name: 中国证监会
code: csrc
base-url: https://www.csrc.gov.cn
list-url: https://www.csrc.gov.cn/csrc/c100103/index.html
enabled: true
- name: 国家外汇管理局
code: safe
base-url: https://www.safe.gov.cn
list-url: https://www.safe.gov.cn/safe/yjcf/index.html
enabled: true
deepseek: deepseek:
api: api:
...@@ -86,3 +68,8 @@ logging: ...@@ -86,3 +68,8 @@ logging:
org.hibernate.SQL: DEBUG org.hibernate.SQL: DEBUG
pattern: pattern:
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n" console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n"
file: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n"
file:
name: logs/penalty-system.log # 日志文件路径和名称
max-size: 10MB # 单个文件最大大小
max-history: 30 # 保留30天的历史日志
\ No newline at end of file
...@@ -20,6 +20,7 @@ D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\ ...@@ -20,6 +20,7 @@ D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\entity\CrawlTask.java D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\entity\CrawlTask.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\repository\PenaltyRecordRepository.java D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\repository\PenaltyRecordRepository.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\entity\SystemConfig.java D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\entity\SystemConfig.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\AISearchRequest.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\LoginResponse.java D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\dto\LoginResponse.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\controller\AnalysisKeywordController.java D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\controller\AnalysisKeywordController.java
D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\crawler\PenaltyCrawler.java D:\new_workspace_06\aispace\financial-penalty-monitor\backend\src\main\java\com\fintech\penalty\crawler\PenaltyCrawler.java
......
.login-container[data-v-63b59978]{min-height:100vh;display:flex;flex-direction:column;justify-content:center;align-items:center;background:linear-gradient(135deg,#667eea,#764ba2)}.login-box[data-v-63b59978]{width:420px;padding:40px;background:#fff;border-radius:12px;box-shadow:0 20px 60px #0000004d}.login-header[data-v-63b59978]{text-align:center;margin-bottom:30px}.login-header h1[data-v-63b59978]{margin:16px 0 8px;font-size:24px;color:#333}.login-header p[data-v-63b59978]{margin:0;font-size:14px;color:#999}.login-form .login-button[data-v-63b59978]{width:100%}.login-footer[data-v-63b59978]{margin-top:24px;text-align:center}.login-footer p[data-v-63b59978]{color:#ffffffb3;font-size:14px}
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
<link rel="icon" href="/favicon.ico"> <link rel="icon" href="/favicon.ico">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>金融监管处罚监控系统</title> <title>金融监管处罚监控系统</title>
<script type="module" crossorigin src="/assets/index-C_I9toNW.js"></script> <script type="module" crossorigin src="/assets/index-BM812JBU.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-D_lRsYFa.css"> <link rel="stylesheet" crossorigin href="/assets/index-D_lRsYFa.css">
</head> </head>
<body> <body>
......
...@@ -4,8 +4,16 @@ import router from '../router' ...@@ -4,8 +4,16 @@ import router from '../router'
const isDev = import.meta.env.DEV const isDev = import.meta.env.DEV
let baseURL = '/api'
if (!isDev) {
const apiBase = import.meta.env.VITE_API_BASE_URL
if (apiBase) {
baseURL = apiBase.endsWith('/api') ? apiBase : apiBase + '/api'
}
}
const request = axios.create({ const request = axios.create({
baseURL: isDev ? '/api' : (import.meta.env.VITE_API_BASE_URL || '/api'), baseURL,
timeout: 300000 timeout: 300000
}) })
...@@ -153,6 +161,7 @@ export default { ...@@ -153,6 +161,7 @@ export default {
triggerCrawl: () => request.post('/crawl/trigger'), triggerCrawl: () => request.post('/crawl/trigger'),
getCrawlStatus: () => request.get('/crawl/status'), getCrawlStatus: () => request.get('/crawl/status'),
getCrawlHistory: (params) => request.get('/crawl/history', { params }), getCrawlHistory: (params) => request.get('/crawl/history', { params }),
clearCrawlHistory: () => request.delete('/crawl/history'),
getCrawlSources: () => request.get('/crawl/sources'), getCrawlSources: () => request.get('/crawl/sources'),
createCrawlSource: (data) => request.post('/crawl/sources', data), createCrawlSource: (data) => request.post('/crawl/sources', data),
updateCrawlSource: (id, data) => request.put(`/crawl/sources/${id}`, data), updateCrawlSource: (id, data) => request.put(`/crawl/sources/${id}`, data),
...@@ -164,5 +173,8 @@ export default { ...@@ -164,5 +173,8 @@ export default {
health: () => request.get('/health'), health: () => request.get('/health'),
// 根据角色获取菜单 // 根据角色获取菜单
getMenusByRole: (roleCode) => request.get(`/menus/role/${roleCode || 'ADMIN'}`) getMenusByRole: (roleCode) => request.get(`/menus/role/${roleCode || 'ADMIN'}`),
// AI搜索
aiSearch: (text) => request.post('/penalties/ai-search', { query: text })
} }
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
</div> </div>
<div class="login-footer"> <div class="login-footer">
<p>默认账号: admin / admin123</p> <p>默认账号: admin / 1qaz@WSX</p>
</div> </div>
</div> </div>
</template> </template>
...@@ -67,8 +67,8 @@ const formRef = ref(null) ...@@ -67,8 +67,8 @@ const formRef = ref(null)
const loading = ref(false) const loading = ref(false)
const form = reactive({ const form = reactive({
username: '', username: 'admin',
password: '' password: '1qaz@WSX'
}) })
const rules = { const rules = {
......
import { defineConfig } from 'vite' import { defineConfig } from 'vite'
import vue from '@vitejs/plugin-vue' import vue from '@vitejs/plugin-vue'
const apiUrl = process.env.VITE_API_BASE_URL || 'http://localhost:8080'
export default defineConfig({ export default defineConfig({
plugins: [vue()], plugins: [vue()],
server: { server: {
port: 3000, port: 3000,
proxy: { proxy: {
'/api': { '/api': {
target: process.env.VITE_API_BASE_URL || 'http://localhost:8080', target: apiUrl,
changeOrigin: true changeOrigin: true
} }
} }
},
css: {
preprocessorOptions: {
scss: {
api: 'modern-compiler'
}
}
} }
}) })
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论