From 3359aa04a5e927423bf55e614206b1b79f7c409e Mon Sep 17 00:00:00 2001 From: shinae1023 Date: Mon, 15 Jun 2026 13:31:12 +0900 Subject: [PATCH 1/5] =?UTF-8?q?[Feat]=20=EC=9E=90=EC=86=8C=EC=84=9C=20?= =?UTF-8?q?=EB=B6=84=EC=84=9D=20=EC=8B=9C=20=EC=9C=A0=EC=82=AC=ED=95=9C=20?= =?UTF-8?q?=EA=B3=B5=EA=B3=A0=EC=99=80=20=EC=A7=88=EB=AC=B8=20=ED=95=A8?= =?UTF-8?q?=EA=BB=98=20=ED=8F=AC=ED=95=A8=20(#24)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../analysis/service/AnalysisAiClient.java | 72 +++++- .../AnalysisReferenceRetrievalService.java | 227 ++++++++++++++++++ .../service/CohereCorpusEmbeddingClient.java | 7 +- .../corpus/service/CorpusEmbeddingClient.java | 29 ++- .../service/CorpusEmbeddingSyncService.java | 4 +- src/main/resources/application-dev.yaml | 4 + src/main/resources/application-prod.yaml | 4 + src/main/resources/schema.sql | 10 +- 8 files changed, 345 insertions(+), 12 deletions(-) create mode 100644 src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java diff --git a/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisAiClient.java b/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisAiClient.java index 710b3bb..f5f7dc1 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisAiClient.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisAiClient.java @@ -3,6 +3,9 @@ import com.jobdri.jobdri_api.domain.analysis.dto.llm.AnalysisLlmResponse; import com.jobdri.jobdri_api.domain.analysis.entity.Question; import com.jobdri.jobdri_api.domain.jobposting.entity.JobPosting; +import com.jobdri.jobdri_api.domain.analysis.service.AnalysisReferenceRetrievalService.AnalysisReferenceContext; +import com.jobdri.jobdri_api.domain.analysis.service.AnalysisReferenceRetrievalService.RetrievedJobPostingReference; +import com.jobdri.jobdri_api.domain.analysis.service.AnalysisReferenceRetrievalService.RetrievedQuestionReference; import com.jobdri.jobdri_api.global.apiPayload.code.GeneralErrorCode; import com.jobdri.jobdri_api.global.apiPayload.exception.GeneralException; import com.openai.client.OpenAIClient; @@ -22,14 +25,16 @@ public class AnalysisAiClient { private final OpenAIClient openAIClient; + private final AnalysisReferenceRetrievalService analysisReferenceRetrievalService; @Value("${openai.model.cover-letter-analysis:gpt-4o-mini}") private String analysisModel; public AnalysisLlmResponse analyze(JobPosting jobPosting, List questions) { + AnalysisReferenceContext referenceContext = analysisReferenceRetrievalService.retrieve(jobPosting, questions); var params = ResponseCreateParams.builder() .model(analysisModel) - .input(buildPrompt(jobPosting, questions)) + .input(buildPrompt(jobPosting, questions, referenceContext)) .temperature(0.2) .text(AnalysisLlmResponse.class) .build(); @@ -48,7 +53,11 @@ public AnalysisLlmResponse analyze(JobPosting jobPosting, List questio } } - private String buildPrompt(JobPosting jobPosting, List questions) { + private String buildPrompt( + JobPosting jobPosting, + List questions, + AnalysisReferenceContext referenceContext + ) { String questionText = questions.stream() .map(question -> """ - questionId: %d @@ -61,6 +70,9 @@ private String buildPrompt(JobPosting jobPosting, List questions) { )) .reduce("", (left, right) -> left + "\n" + right); + String similarJobPostingText = formatJobPostingReferences(referenceContext.jobPostingReferences()); + String similarQuestionText = formatQuestionReferences(referenceContext.questionReferences()); + return """ [시스템 지시] 너는 한국 채용 담당자이자 자기소개서 평가 전문가다. @@ -126,6 +138,12 @@ private String buildPrompt(JobPosting jobPosting, List questions) { 우대 사항: %s + [유사 JD 검색 결과] + %s + + [유사 자소서 문항 검색 결과] + %s + [자소서 문항과 답변] %s @@ -148,10 +166,60 @@ private String buildPrompt(JobPosting jobPosting, List questions) { defaultString(jobPosting.getTask()), defaultString(jobPosting.getRequirement()), defaultString(jobPosting.getPreferred()), + similarJobPostingText, + similarQuestionText, questionText ); } + private String formatJobPostingReferences(List references) { + if (references == null || references.isEmpty()) { + return "없음"; + } + return references.stream() + .map(reference -> """ + - 회사명: %s + 직무명: %s + 주요 업무: %s + 자격 요건: %s + 우대 사항: %s + 거리: %.4f + """.formatted( + defaultString(reference.companyName()), + defaultString(reference.roleName()), + defaultString(reference.responsibilities()), + defaultString(reference.requirements()), + defaultString(reference.preferred()), + reference.distance() + )) + .reduce("", (left, right) -> left + "\n" + right) + .trim(); + } + + private String formatQuestionReferences(List references) { + if (references == null || references.isEmpty()) { + return "없음"; + } + return references.stream() + .map(reference -> """ + - 회사명: %s + 직무명: %s + 문항 유형: %s + 글자 수 제한: %s + 문항: %s + 거리: %.4f + """.formatted( + defaultString(reference.companyName()), + defaultString(reference.roleName()), + defaultString(reference.questionType()), + reference.charLimit() == null ? "" : reference.charLimit(), + defaultString(reference.questionText()), + reference.distance() + )) + .reduce("", (left, right) -> left + "\n" + right) + .trim(); + } + private AnalysisLlmResponse extractStructuredContent(StructuredResponse response) { return response.output().stream() .filter(item -> item.message().isPresent()) diff --git a/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java b/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java new file mode 100644 index 0000000..9721516 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java @@ -0,0 +1,227 @@ +package com.jobdri.jobdri_api.domain.analysis.service; + +import com.jobdri.jobdri_api.domain.analysis.entity.Question; +import com.jobdri.jobdri_api.domain.corpus.service.CorpusEmbeddingClient; +import com.jobdri.jobdri_api.domain.jobposting.entity.JobPosting; +import com.pgvector.PGvector; +import lombok.RequiredArgsConstructor; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.StringUtils; + +import javax.sql.DataSource; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; + +@Service +@RequiredArgsConstructor +@Transactional(readOnly = true) +public class AnalysisReferenceRetrievalService { + + @Value("${app.analysis.retrieval.jd-limit:3}") + private int jdLimit; + + @Value("${app.analysis.retrieval.question-limit:5}") + private int questionLimit; + + private final CorpusEmbeddingClient corpusEmbeddingClient; + private final DataSource dataSource; + + public AnalysisReferenceContext retrieve(JobPosting jobPosting, List questions) { + String jdQuery = buildJobPostingQuery(jobPosting); + String questionQuery = buildQuestionQuery(jobPosting, questions); + + List jobPostingReferences = StringUtils.hasText(jdQuery) + ? findSimilarJobPostings(jobPosting, jdQuery, jdLimit) + : List.of(); + + List questionReferences = StringUtils.hasText(questionQuery) + ? findSimilarQuestions(jobPosting, questionQuery, questionLimit) + : List.of(); + + return new AnalysisReferenceContext(jobPostingReferences, questionReferences); + } + + private List findSimilarJobPostings(JobPosting jobPosting, String query, int limit) { + String sql = """ + SELECT + c.id, + c.company_name, + c.role_l3, + c.responsibilities, + c.requirements, + c.preferred, + e.embedding <=> ? AS distance + FROM mock_job_posting_embeddings e + JOIN mock_job_posting_corpus c ON e.corpus_id = c.id + WHERE c.is_valid_for_embedding = true + AND (c.detail_classification_id = ? OR c.job_group_l1 = ?) + ORDER BY e.embedding <=> ? + LIMIT ? + """; + + float[] vector = corpusEmbeddingClient.embedQuery(query); + List result = new ArrayList<>(); + + try (Connection connection = dataSource.getConnection()) { + PGvector.registerTypes(connection); + try (PreparedStatement statement = connection.prepareStatement(sql)) { + statement.setObject(1, new PGvector(vector)); + statement.setObject(2, jobPosting.getDetailClassification().getId()); + statement.setString(3, jobPosting.getDetailClassification().getMiddleClassification().getClassification().getBigName()); + statement.setObject(4, new PGvector(vector)); + statement.setInt(5, limit); + + try (ResultSet rs = statement.executeQuery()) { + while (rs.next()) { + result.add(new RetrievedJobPostingReference( + rs.getLong("id"), + rs.getString("company_name"), + rs.getString("role_l3"), + rs.getString("responsibilities"), + rs.getString("requirements"), + rs.getString("preferred"), + rs.getDouble("distance") + )); + } + } + } + } catch (SQLException e) { + throw new IllegalStateException("유사 JD 검색 중 오류가 발생했습니다.", e); + } + + return result; + } + + private List findSimilarQuestions(JobPosting jobPosting, String query, int limit) { + String sql = """ + SELECT + c.id, + c.company_name, + c.role_l3, + c.question_type, + c.char_limit, + c.question_text, + e.embedding <=> ? AS distance + FROM mock_question_embeddings e + JOIN mock_question_corpus c ON e.corpus_id = c.id + WHERE c.is_valid_for_embedding = true + AND (c.detail_classification_id = ? OR c.job_group_l1 = ?) + ORDER BY e.embedding <=> ? + LIMIT ? + """; + + float[] vector = corpusEmbeddingClient.embedQuery(query); + List result = new ArrayList<>(); + + try (Connection connection = dataSource.getConnection()) { + PGvector.registerTypes(connection); + try (PreparedStatement statement = connection.prepareStatement(sql)) { + statement.setObject(1, new PGvector(vector)); + statement.setObject(2, jobPosting.getDetailClassification().getId()); + statement.setString(3, jobPosting.getDetailClassification().getMiddleClassification().getClassification().getBigName()); + statement.setObject(4, new PGvector(vector)); + statement.setInt(5, limit); + + try (ResultSet rs = statement.executeQuery()) { + while (rs.next()) { + result.add(new RetrievedQuestionReference( + rs.getLong("id"), + rs.getString("company_name"), + rs.getString("role_l3"), + rs.getString("question_type"), + getNullableInt(rs, "char_limit"), + rs.getString("question_text"), + rs.getDouble("distance") + )); + } + } + } + } catch (SQLException e) { + throw new IllegalStateException("유사 문항 검색 중 오류가 발생했습니다.", e); + } + + return result; + } + + private Integer getNullableInt(ResultSet rs, String columnName) throws SQLException { + int value = rs.getInt(columnName); + return rs.wasNull() ? null : value; + } + + private String buildJobPostingQuery(JobPosting jobPosting) { + return """ + 회사명: %s + 직무명: %s + 주요 업무: + %s + 자격 요건: + %s + 우대 사항: + %s + """.formatted( + defaultString(jobPosting.getCompany().getName()), + defaultString(jobPosting.getDetailClassification().getDetailName()), + defaultString(jobPosting.getTask()), + defaultString(jobPosting.getRequirement()), + defaultString(jobPosting.getPreferred()) + ).trim(); + } + + private String buildQuestionQuery(JobPosting jobPosting, List questions) { + String questionText = questions.stream() + .map(Question::getContent) + .filter(StringUtils::hasText) + .map(text -> "- " + text) + .reduce("", (left, right) -> left + "\n" + right) + .trim(); + + return """ + 회사명: %s + 직무명: %s + 자소서 문항: + %s + """.formatted( + defaultString(jobPosting.getCompany().getName()), + defaultString(jobPosting.getDetailClassification().getDetailName()), + questionText + ).trim(); + } + + private String defaultString(String value) { + return value == null ? "" : value; + } + + public record AnalysisReferenceContext( + List jobPostingReferences, + List questionReferences + ) { + } + + public record RetrievedJobPostingReference( + Long corpusId, + String companyName, + String roleName, + String responsibilities, + String requirements, + String preferred, + double distance + ) { + } + + public record RetrievedQuestionReference( + Long corpusId, + String companyName, + String roleName, + String questionType, + Integer charLimit, + String questionText, + double distance + ) { + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java index f5d747a..84390f8 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java @@ -25,11 +25,8 @@ public class CohereCorpusEmbeddingClient implements CorpusEmbeddingClient { @Value("${app.corpus.embedding.output-dimension:1024}") private int outputDimension; - @Value("${app.corpus.embedding.document-input-type:search_document}") - private String documentInputType; - @Override - public List embed(List texts) { + public List embed(List texts, InputType inputType) { if (!StringUtils.hasText(cohereApiKey)) { throw new IllegalStateException("Cohere API 키가 설정되지 않았습니다."); } @@ -48,7 +45,7 @@ public List embed(List texts) { .body(new EmbedRequest( texts, embeddingModel, - documentInputType, + inputType.value(), outputDimension, List.of("float") )) diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingClient.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingClient.java index 184791f..eaa00c8 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingClient.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingClient.java @@ -3,5 +3,32 @@ import java.util.List; public interface CorpusEmbeddingClient { - List embed(List texts); + List embed(List texts, InputType inputType); + + default List embedDocuments(List texts) { + return embed(texts, InputType.SEARCH_DOCUMENT); + } + + default float[] embedQuery(String text) { + List embeddings = embed(List.of(text), InputType.SEARCH_QUERY); + if (embeddings.isEmpty()) { + throw new IllegalStateException("쿼리 임베딩 결과가 비어 있습니다."); + } + return embeddings.getFirst(); + } + + enum InputType { + SEARCH_DOCUMENT("search_document"), + SEARCH_QUERY("search_query"); + + private final String value; + + InputType(String value) { + this.value = value; + } + + public String value() { + return value; + } + } } diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java index da15910..82255a4 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java @@ -81,7 +81,7 @@ public int syncQuestionEmbeddings(Integer limit) { private int upsertJobPostingEmbeddings(List corpusList) { int processed = 0; for (List batch : partition(corpusList, batchSize)) { - List embeddings = corpusEmbeddingClient.embed( + List embeddings = corpusEmbeddingClient.embedDocuments( batch.stream().map(MockJobPostingCorpus::getEmbeddingText).toList() ); upsertVectors(UPSERT_JOB_POSTING_SQL, batch.stream().map(MockJobPostingCorpus::getId).toList(), embeddings); @@ -93,7 +93,7 @@ private int upsertJobPostingEmbeddings(List corpusList) { private int upsertQuestionEmbeddings(List corpusList) { int processed = 0; for (List batch : partition(corpusList, batchSize)) { - List embeddings = corpusEmbeddingClient.embed( + List embeddings = corpusEmbeddingClient.embedDocuments( batch.stream().map(MockQuestionCorpus::getEmbeddingText).toList() ); upsertVectors(UPSERT_QUESTION_SQL, batch.stream().map(MockQuestionCorpus::getId).toList(), embeddings); diff --git a/src/main/resources/application-dev.yaml b/src/main/resources/application-dev.yaml index 6fc91f0..f83055c 100644 --- a/src/main/resources/application-dev.yaml +++ b/src/main/resources/application-dev.yaml @@ -80,6 +80,10 @@ app: output-dimension: ${APP_CORPUS_EMBEDDING_OUTPUT_DIMENSION:1024} document-input-type: ${APP_CORPUS_EMBEDDING_DOCUMENT_INPUT_TYPE:search_document} batch-size: ${APP_CORPUS_EMBEDDING_BATCH_SIZE:32} + analysis: + retrieval: + jd-limit: ${APP_ANALYSIS_RETRIEVAL_JD_LIMIT:3} + question-limit: ${APP_ANALYSIS_RETRIEVAL_QUESTION_LIMIT:5} server: port: 8080 diff --git a/src/main/resources/application-prod.yaml b/src/main/resources/application-prod.yaml index 07b08e8..ecfc62e 100644 --- a/src/main/resources/application-prod.yaml +++ b/src/main/resources/application-prod.yaml @@ -80,6 +80,10 @@ app: output-dimension: ${APP_CORPUS_EMBEDDING_OUTPUT_DIMENSION:1024} document-input-type: ${APP_CORPUS_EMBEDDING_DOCUMENT_INPUT_TYPE:search_document} batch-size: ${APP_CORPUS_EMBEDDING_BATCH_SIZE:32} + analysis: + retrieval: + jd-limit: ${APP_ANALYSIS_RETRIEVAL_JD_LIMIT:3} + question-limit: ${APP_ANALYSIS_RETRIEVAL_QUESTION_LIMIT:5} server: port: 8080 diff --git a/src/main/resources/schema.sql b/src/main/resources/schema.sql index 4902207..27e5d3d 100644 --- a/src/main/resources/schema.sql +++ b/src/main/resources/schema.sql @@ -5,7 +5,7 @@ CREATE TABLE IF NOT EXISTS mock_job_posting_embeddings ( id BIGSERIAL PRIMARY KEY, corpus_id BIGINT NOT NULL UNIQUE REFERENCES mock_job_posting_corpus(id) ON DELETE CASCADE, embedding_model VARCHAR(100) NOT NULL, - embedding vector NOT NULL, + embedding vector(1024) NOT NULL, created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); @@ -14,7 +14,7 @@ CREATE TABLE IF NOT EXISTS mock_question_embeddings ( id BIGSERIAL PRIMARY KEY, corpus_id BIGINT NOT NULL UNIQUE REFERENCES mock_question_corpus(id) ON DELETE CASCADE, embedding_model VARCHAR(100) NOT NULL, - embedding vector NOT NULL, + embedding vector(1024) NOT NULL, created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ); @@ -33,3 +33,9 @@ CREATE INDEX IF NOT EXISTS idx_mock_job_posting_embeddings_corpus CREATE INDEX IF NOT EXISTS idx_mock_question_embeddings_corpus ON mock_question_embeddings (corpus_id); + +CREATE INDEX IF NOT EXISTS idx_mock_job_posting_embeddings_hnsw + ON mock_job_posting_embeddings USING hnsw (embedding vector_cosine_ops); + +CREATE INDEX IF NOT EXISTS idx_mock_question_embeddings_hnsw + ON mock_question_embeddings USING hnsw (embedding vector_cosine_ops); From 7a16249073b27a3c4464fb2b87203c75d401ee10 Mon Sep 17 00:00:00 2001 From: shinae1023 Date: Mon, 15 Jun 2026 13:33:50 +0900 Subject: [PATCH 2/5] =?UTF-8?q?[Feat]=20=EC=9C=A0=EC=82=AC=20jd=20?= =?UTF-8?q?=ED=99=95=EC=9D=B8=20admin=20API=20(#24)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../controller/AnalysisAdminController.java | 37 +++++++++ .../AnalysisRetrievalPreviewRequest.java | 11 +++ .../AnalysisRetrievalPreviewResponse.java | 50 ++++++++++++ .../service/AnalysisAdminDebugService.java | 80 +++++++++++++++++++ 4 files changed, 178 insertions(+) create mode 100644 src/main/java/com/jobdri/jobdri_api/domain/analysis/controller/AnalysisAdminController.java create mode 100644 src/main/java/com/jobdri/jobdri_api/domain/analysis/dto/request/AnalysisRetrievalPreviewRequest.java create mode 100644 src/main/java/com/jobdri/jobdri_api/domain/analysis/dto/response/AnalysisRetrievalPreviewResponse.java create mode 100644 src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisAdminDebugService.java diff --git a/src/main/java/com/jobdri/jobdri_api/domain/analysis/controller/AnalysisAdminController.java b/src/main/java/com/jobdri/jobdri_api/domain/analysis/controller/AnalysisAdminController.java new file mode 100644 index 0000000..5b153b6 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/analysis/controller/AnalysisAdminController.java @@ -0,0 +1,37 @@ +package com.jobdri.jobdri_api.domain.analysis.controller; + +import com.jobdri.jobdri_api.domain.analysis.dto.request.AnalysisRetrievalPreviewRequest; +import com.jobdri.jobdri_api.domain.analysis.dto.response.AnalysisRetrievalPreviewResponse; +import com.jobdri.jobdri_api.domain.analysis.service.AnalysisAdminDebugService; +import com.jobdri.jobdri_api.global.apiPayload.ApiResponse; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.validation.Valid; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequiredArgsConstructor +@RequestMapping("/api/admin/analysis") +@Tag(name = "AnalysisAdmin", description = "관리자용 자소서 분석 디버그 API") +public class AnalysisAdminController { + + private final AnalysisAdminDebugService analysisAdminDebugService; + + @Operation( + summary = "분석 retrieval 미리보기", + description = "mockApplyId를 기준으로 실제 분석 전에 조회되는 유사 JD/문항 검색 결과를 반환합니다." + ) + @PostMapping("/retrieval-preview") + public ApiResponse previewRetrieval( + @Valid @RequestBody AnalysisRetrievalPreviewRequest request + ) { + return ApiResponse.onSuccess( + "분석 retrieval 미리보기에 성공했습니다.", + analysisAdminDebugService.previewRetrieval(request.mockApplyId()) + ); + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/analysis/dto/request/AnalysisRetrievalPreviewRequest.java b/src/main/java/com/jobdri/jobdri_api/domain/analysis/dto/request/AnalysisRetrievalPreviewRequest.java new file mode 100644 index 0000000..7c05701 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/analysis/dto/request/AnalysisRetrievalPreviewRequest.java @@ -0,0 +1,11 @@ +package com.jobdri.jobdri_api.domain.analysis.dto.request; + +import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.Positive; + +public record AnalysisRetrievalPreviewRequest( + @NotNull(message = "mockApplyId는 필수입니다.") + @Positive(message = "mockApplyId는 1 이상이어야 합니다.") + Long mockApplyId +) { +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/analysis/dto/response/AnalysisRetrievalPreviewResponse.java b/src/main/java/com/jobdri/jobdri_api/domain/analysis/dto/response/AnalysisRetrievalPreviewResponse.java new file mode 100644 index 0000000..24719d7 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/analysis/dto/response/AnalysisRetrievalPreviewResponse.java @@ -0,0 +1,50 @@ +package com.jobdri.jobdri_api.domain.analysis.dto.response; + +import java.util.List; + +public record AnalysisRetrievalPreviewResponse( + Long mockApplyId, + JobPostingSnapshot jobPosting, + List questions, + List similarJobPostings, + List similarQuestions +) { + public record JobPostingSnapshot( + Long jobPostingId, + String companyName, + String detailClassificationName, + String task, + String requirement, + String preferred + ) { + } + + public record QuestionSnapshot( + Long questionId, + String content, + String answer + ) { + } + + public record JobPostingReference( + Long corpusId, + String companyName, + String roleName, + String responsibilities, + String requirements, + String preferred, + double distance + ) { + } + + public record QuestionReference( + Long corpusId, + String companyName, + String roleName, + String questionType, + Integer charLimit, + String questionText, + double distance + ) { + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisAdminDebugService.java b/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisAdminDebugService.java new file mode 100644 index 0000000..99287b8 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisAdminDebugService.java @@ -0,0 +1,80 @@ +package com.jobdri.jobdri_api.domain.analysis.service; + +import com.jobdri.jobdri_api.domain.analysis.dto.response.AnalysisRetrievalPreviewResponse; +import com.jobdri.jobdri_api.domain.analysis.entity.Question; +import com.jobdri.jobdri_api.domain.analysis.repository.QuestionRepository; +import com.jobdri.jobdri_api.domain.analysis.service.AnalysisReferenceRetrievalService.AnalysisReferenceContext; +import com.jobdri.jobdri_api.domain.jobposting.entity.JobPosting; +import com.jobdri.jobdri_api.domain.mockapply.entity.MockApply; +import com.jobdri.jobdri_api.domain.mockapply.repository.MockApplyRepository; +import com.jobdri.jobdri_api.global.apiPayload.code.GeneralErrorCode; +import com.jobdri.jobdri_api.global.apiPayload.exception.GeneralException; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import java.util.List; + +@Service +@RequiredArgsConstructor +@Transactional(readOnly = true) +public class AnalysisAdminDebugService { + + private final MockApplyRepository mockApplyRepository; + private final QuestionRepository questionRepository; + private final AnalysisReferenceRetrievalService analysisReferenceRetrievalService; + + public AnalysisRetrievalPreviewResponse previewRetrieval(Long mockApplyId) { + MockApply mockApply = mockApplyRepository.findByIdWithJobPosting(mockApplyId) + .orElseThrow(() -> new GeneralException( + GeneralErrorCode.MOCK_APPLY_NOT_FOUND, + "해당 모의 서류 지원을 찾을 수 없습니다. mockApplyId=" + mockApplyId + )); + List questions = questionRepository.findAllByMockApplyIdOrderByIdAsc(mockApplyId); + JobPosting jobPosting = mockApply.getJobPosting(); + + AnalysisReferenceContext referenceContext = + analysisReferenceRetrievalService.retrieve(jobPosting, questions); + + return new AnalysisRetrievalPreviewResponse( + mockApply.getId(), + new AnalysisRetrievalPreviewResponse.JobPostingSnapshot( + jobPosting.getId(), + jobPosting.getCompany().getName(), + jobPosting.getDetailClassification().getDetailName(), + jobPosting.getTask(), + jobPosting.getRequirement(), + jobPosting.getPreferred() + ), + questions.stream() + .map(question -> new AnalysisRetrievalPreviewResponse.QuestionSnapshot( + question.getId(), + question.getContent(), + question.getAnswer() + )) + .toList(), + referenceContext.jobPostingReferences().stream() + .map(reference -> new AnalysisRetrievalPreviewResponse.JobPostingReference( + reference.corpusId(), + reference.companyName(), + reference.roleName(), + reference.responsibilities(), + reference.requirements(), + reference.preferred(), + reference.distance() + )) + .toList(), + referenceContext.questionReferences().stream() + .map(reference -> new AnalysisRetrievalPreviewResponse.QuestionReference( + reference.corpusId(), + reference.companyName(), + reference.roleName(), + reference.questionType(), + reference.charLimit(), + reference.questionText(), + reference.distance() + )) + .toList() + ); + } +} From 4b72e3ed493eee1f9c56a10762eba0471da34e2a Mon Sep 17 00:00:00 2001 From: shinae1023 Date: Mon, 15 Jun 2026 13:40:53 +0900 Subject: [PATCH 3/5] =?UTF-8?q?[Feat]=20=EC=9C=A0=EC=82=AC=20jd=20?= =?UTF-8?q?=EA=B2=80=EC=83=89=20=EA=B8=B0=EC=A4=80=EC=9D=84=20=EC=B6=94?= =?UTF-8?q?=EC=B6=9C=ED=95=9C=20jobposting=EC=9C=BC=EB=A1=9C=20=EC=84=A4?= =?UTF-8?q?=EC=A0=95=20(#24)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../AnalysisReferenceRetrievalService.java | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java b/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java index 9721516..eb071de 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java @@ -156,20 +156,33 @@ private Integer getNullableInt(ResultSet rs, String columnName) throws SQLExcept private String buildJobPostingQuery(JobPosting jobPosting) { return """ - 회사명: %s 직무명: %s - 주요 업무: - %s + 자격 요건: %s + 우대 사항: %s + + 주요 업무: + %s + + 핵심 요구 역량 요약: + %s + + 우대 역량 요약: + %s + + 참고 회사명: + %s """.formatted( - defaultString(jobPosting.getCompany().getName()), defaultString(jobPosting.getDetailClassification().getDetailName()), + defaultString(jobPosting.getRequirement()), + defaultString(jobPosting.getPreferred()), defaultString(jobPosting.getTask()), defaultString(jobPosting.getRequirement()), - defaultString(jobPosting.getPreferred()) + defaultString(jobPosting.getPreferred()), + defaultString(jobPosting.getCompany().getName()) ).trim(); } @@ -182,14 +195,18 @@ private String buildQuestionQuery(JobPosting jobPosting, List question .trim(); return """ - 회사명: %s 직무명: %s + 자격 요건: %s + 우대 사항: %s 자소서 문항: %s + 참고 회사명: %s """.formatted( - defaultString(jobPosting.getCompany().getName()), defaultString(jobPosting.getDetailClassification().getDetailName()), - questionText + defaultString(jobPosting.getRequirement()), + defaultString(jobPosting.getPreferred()), + questionText, + defaultString(jobPosting.getCompany().getName()) ).trim(); } From e1fe657f22121684b2456d582ec024762d52ef36 Mon Sep 17 00:00:00 2001 From: shinae1023 Date: Mon, 15 Jun 2026 13:42:48 +0900 Subject: [PATCH 4/5] =?UTF-8?q?[Feat]=20jd=20=EA=B2=80=EC=83=89=20fallback?= =?UTF-8?q?=20=EB=A1=9C=EC=A7=81=20=EC=B6=94=EA=B0=80=20(#24)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../AnalysisReferenceRetrievalService.java | 255 ++++++++++++++---- 1 file changed, 207 insertions(+), 48 deletions(-) diff --git a/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java b/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java index eb071de..f2b6dc9 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/analysis/service/AnalysisReferenceRetrievalService.java @@ -48,7 +48,7 @@ public AnalysisReferenceContext retrieve(JobPosting jobPosting, List q } private List findSimilarJobPostings(JobPosting jobPosting, String query, int limit) { - String sql = """ + String companyAndDetailSql = """ SELECT c.id, c.company_name, @@ -60,46 +60,113 @@ private List findSimilarJobPostings(JobPosting job FROM mock_job_posting_embeddings e JOIN mock_job_posting_corpus c ON e.corpus_id = c.id WHERE c.is_valid_for_embedding = true - AND (c.detail_classification_id = ? OR c.job_group_l1 = ?) + AND c.detail_classification_id = ? + AND lower(c.company_name) = lower(?) + ORDER BY e.embedding <=> ? + LIMIT ? + """; + String detailOnlySql = """ + SELECT + c.id, + c.company_name, + c.role_l3, + c.responsibilities, + c.requirements, + c.preferred, + e.embedding <=> ? AS distance + FROM mock_job_posting_embeddings e + JOIN mock_job_posting_corpus c ON e.corpus_id = c.id + WHERE c.is_valid_for_embedding = true + AND c.detail_classification_id = ? + ORDER BY e.embedding <=> ? + LIMIT ? + """; + String hierarchySql = """ + SELECT + c.id, + c.company_name, + c.role_l3, + c.responsibilities, + c.requirements, + c.preferred, + e.embedding <=> ? AS distance + FROM mock_job_posting_embeddings e + JOIN mock_job_posting_corpus c ON e.corpus_id = c.id + WHERE c.is_valid_for_embedding = true + AND c.job_group_l1 = ? + AND c.job_family_l2 = ? ORDER BY e.embedding <=> ? LIMIT ? """; float[] vector = corpusEmbeddingClient.embedQuery(query); - List result = new ArrayList<>(); - try (Connection connection = dataSource.getConnection()) { PGvector.registerTypes(connection); - try (PreparedStatement statement = connection.prepareStatement(sql)) { - statement.setObject(1, new PGvector(vector)); - statement.setObject(2, jobPosting.getDetailClassification().getId()); - statement.setString(3, jobPosting.getDetailClassification().getMiddleClassification().getClassification().getBigName()); - statement.setObject(4, new PGvector(vector)); - statement.setInt(5, limit); - - try (ResultSet rs = statement.executeQuery()) { - while (rs.next()) { - result.add(new RetrievedJobPostingReference( - rs.getLong("id"), - rs.getString("company_name"), - rs.getString("role_l3"), - rs.getString("responsibilities"), - rs.getString("requirements"), - rs.getString("preferred"), - rs.getDouble("distance") - )); + + List companyAndDetail = queryJobPostingReferences( + connection, + companyAndDetailSql, + vector, + statement -> { + statement.setObject(2, jobPosting.getDetailClassification().getId()); + statement.setString(3, jobPosting.getCompany().getName()); + statement.setObject(4, new PGvector(vector)); + statement.setInt(5, limit); } - } + ); + if (!companyAndDetail.isEmpty()) { + return companyAndDetail; + } + + List detailOnly = queryJobPostingReferences( + connection, + detailOnlySql, + vector, + statement -> { + statement.setObject(2, jobPosting.getDetailClassification().getId()); + statement.setObject(3, new PGvector(vector)); + statement.setInt(4, limit); + } + ); + if (!detailOnly.isEmpty()) { + return detailOnly; } + + return queryJobPostingReferences( + connection, + hierarchySql, + vector, + statement -> { + statement.setString(2, jobPosting.getDetailClassification().getMiddleClassification().getClassification().getBigName()); + statement.setString(3, jobPosting.getDetailClassification().getMiddleClassification().getMiddleName()); + statement.setObject(4, new PGvector(vector)); + statement.setInt(5, limit); + } + ); } catch (SQLException e) { throw new IllegalStateException("유사 JD 검색 중 오류가 발생했습니다.", e); } - - return result; } private List findSimilarQuestions(JobPosting jobPosting, String query, int limit) { - String sql = """ + String companyAndDetailSql = """ + SELECT + c.id, + c.company_name, + c.role_l3, + c.question_type, + c.char_limit, + c.question_text, + e.embedding <=> ? AS distance + FROM mock_question_embeddings e + JOIN mock_question_corpus c ON e.corpus_id = c.id + WHERE c.is_valid_for_embedding = true + AND c.detail_classification_id = ? + AND lower(c.company_name) = lower(?) + ORDER BY e.embedding <=> ? + LIMIT ? + """; + String detailOnlySql = """ SELECT c.id, c.company_name, @@ -111,41 +178,128 @@ private List findSimilarQuestions(JobPosting jobPost FROM mock_question_embeddings e JOIN mock_question_corpus c ON e.corpus_id = c.id WHERE c.is_valid_for_embedding = true - AND (c.detail_classification_id = ? OR c.job_group_l1 = ?) + AND c.detail_classification_id = ? + ORDER BY e.embedding <=> ? + LIMIT ? + """; + String hierarchySql = """ + SELECT + c.id, + c.company_name, + c.role_l3, + c.question_type, + c.char_limit, + c.question_text, + e.embedding <=> ? AS distance + FROM mock_question_embeddings e + JOIN mock_question_corpus c ON e.corpus_id = c.id + WHERE c.is_valid_for_embedding = true + AND c.job_group_l1 = ? + AND c.job_family_l2 = ? ORDER BY e.embedding <=> ? LIMIT ? """; float[] vector = corpusEmbeddingClient.embedQuery(query); - List result = new ArrayList<>(); - try (Connection connection = dataSource.getConnection()) { PGvector.registerTypes(connection); - try (PreparedStatement statement = connection.prepareStatement(sql)) { - statement.setObject(1, new PGvector(vector)); - statement.setObject(2, jobPosting.getDetailClassification().getId()); - statement.setString(3, jobPosting.getDetailClassification().getMiddleClassification().getClassification().getBigName()); - statement.setObject(4, new PGvector(vector)); - statement.setInt(5, limit); - - try (ResultSet rs = statement.executeQuery()) { - while (rs.next()) { - result.add(new RetrievedQuestionReference( - rs.getLong("id"), - rs.getString("company_name"), - rs.getString("role_l3"), - rs.getString("question_type"), - getNullableInt(rs, "char_limit"), - rs.getString("question_text"), - rs.getDouble("distance") - )); + + List companyAndDetail = queryQuestionReferences( + connection, + companyAndDetailSql, + vector, + statement -> { + statement.setObject(2, jobPosting.getDetailClassification().getId()); + statement.setString(3, jobPosting.getCompany().getName()); + statement.setObject(4, new PGvector(vector)); + statement.setInt(5, limit); } - } + ); + if (!companyAndDetail.isEmpty()) { + return companyAndDetail; + } + + List detailOnly = queryQuestionReferences( + connection, + detailOnlySql, + vector, + statement -> { + statement.setObject(2, jobPosting.getDetailClassification().getId()); + statement.setObject(3, new PGvector(vector)); + statement.setInt(4, limit); + } + ); + if (!detailOnly.isEmpty()) { + return detailOnly; } + + return queryQuestionReferences( + connection, + hierarchySql, + vector, + statement -> { + statement.setString(2, jobPosting.getDetailClassification().getMiddleClassification().getClassification().getBigName()); + statement.setString(3, jobPosting.getDetailClassification().getMiddleClassification().getMiddleName()); + statement.setObject(4, new PGvector(vector)); + statement.setInt(5, limit); + } + ); } catch (SQLException e) { throw new IllegalStateException("유사 문항 검색 중 오류가 발생했습니다.", e); } + } + + private List queryJobPostingReferences( + Connection connection, + String sql, + float[] vector, + StatementBinder binder + ) throws SQLException { + List result = new ArrayList<>(); + try (PreparedStatement statement = connection.prepareStatement(sql)) { + statement.setObject(1, new PGvector(vector)); + binder.bind(statement); + try (ResultSet rs = statement.executeQuery()) { + while (rs.next()) { + result.add(new RetrievedJobPostingReference( + rs.getLong("id"), + rs.getString("company_name"), + rs.getString("role_l3"), + rs.getString("responsibilities"), + rs.getString("requirements"), + rs.getString("preferred"), + rs.getDouble("distance") + )); + } + } + } + return result; + } + private List queryQuestionReferences( + Connection connection, + String sql, + float[] vector, + StatementBinder binder + ) throws SQLException { + List result = new ArrayList<>(); + try (PreparedStatement statement = connection.prepareStatement(sql)) { + statement.setObject(1, new PGvector(vector)); + binder.bind(statement); + try (ResultSet rs = statement.executeQuery()) { + while (rs.next()) { + result.add(new RetrievedQuestionReference( + rs.getLong("id"), + rs.getString("company_name"), + rs.getString("role_l3"), + rs.getString("question_type"), + getNullableInt(rs, "char_limit"), + rs.getString("question_text"), + rs.getDouble("distance") + )); + } + } + } return result; } @@ -241,4 +395,9 @@ public record RetrievedQuestionReference( double distance ) { } + + @FunctionalInterface + private interface StatementBinder { + void bind(PreparedStatement statement) throws SQLException; + } } From ab389af0b5fa40c0ee962d8d29514d351d9dbaf4 Mon Sep 17 00:00:00 2001 From: shinae1023 Date: Mon, 15 Jun 2026 13:50:50 +0900 Subject: [PATCH 5/5] =?UTF-8?q?[Fix]=20=EC=BD=94=EB=93=9C=EB=A6=AC?= =?UTF-8?q?=EB=B7=B0=20=EB=B0=98=EC=98=81=20(#26)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../DetailClassificationRepository.java | 4 +- .../controller/CorpusAdminController.java | 36 ++++++++++- .../MockJobPostingCorpusRepository.java | 5 +- .../MockQuestionCorpusRepository.java | 5 +- .../corpus/service/BootstrapAdminService.java | 6 +- .../service/CohereCorpusEmbeddingClient.java | 7 +++ .../corpus/service/CorpusAdminRunner.java | 14 ++++- .../service/CorpusClassificationResolver.java | 15 ++++- .../service/CorpusEmbeddingSyncService.java | 32 +++++----- .../corpus/service/CorpusImportService.java | 61 ++++++++++++++++--- src/main/resources/application-dev.yaml | 1 + src/main/resources/application-prod.yaml | 1 + src/main/resources/application.yaml | 2 + 13 files changed, 152 insertions(+), 37 deletions(-) diff --git a/src/main/java/com/jobdri/jobdri_api/domain/classification/repository/DetailClassificationRepository.java b/src/main/java/com/jobdri/jobdri_api/domain/classification/repository/DetailClassificationRepository.java index bc65caf..4e3ec52 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/classification/repository/DetailClassificationRepository.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/classification/repository/DetailClassificationRepository.java @@ -11,8 +11,8 @@ public interface DetailClassificationRepository extends JpaRepository { List findAllByMiddleClassificationId(Long middleClassificationId); - Optional findByDetailName(String detailName); - long countByDetailName(String detailName); + Optional findByDetailNameIgnoreCase(String detailName); + long countByDetailNameIgnoreCase(String detailName); @Query(""" SELECT dc diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/controller/CorpusAdminController.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/controller/CorpusAdminController.java index 7fa4253..8ac7132 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/controller/CorpusAdminController.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/controller/CorpusAdminController.java @@ -7,16 +7,20 @@ import com.jobdri.jobdri_api.domain.corpus.service.CorpusImportResult; import com.jobdri.jobdri_api.domain.corpus.service.CorpusImportService; import com.jobdri.jobdri_api.global.apiPayload.ApiResponse; +import com.jobdri.jobdri_api.global.apiPayload.code.GeneralErrorCode; +import com.jobdri.jobdri_api.global.apiPayload.exception.GeneralException; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import jakarta.validation.Valid; import lombok.RequiredArgsConstructor; +import org.springframework.beans.factory.annotation.Value; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import java.io.IOException; +import java.nio.file.InvalidPathException; import java.nio.file.Path; @RestController @@ -28,14 +32,18 @@ public class CorpusAdminController { private final CorpusImportService corpusImportService; private final CorpusEmbeddingSyncService corpusEmbeddingSyncService; + @Value("${app.corpus.import.allowed-root:}") + private String allowedImportRoot; + @Operation(summary = "corpus 엑셀 적재", description = "관리자가 xlsx 파일 경로를 넘겨 corpus 원본 테이블에 적재합니다.") @PostMapping("/import") public ApiResponse importCorpus( @Valid @RequestBody CorpusImportRequest request ) throws IOException { + Path validatedPath = validateImportPath(request.filePath()); return ApiResponse.onSuccess( "corpus 엑셀 적재에 성공했습니다.", - corpusImportService.importFromXlsx(Path.of(request.filePath())) + corpusImportService.importFromXlsx(validatedPath) ); } @@ -49,4 +57,30 @@ public ApiResponse syncEmbeddings( corpusEmbeddingSyncService.syncAll(request.limit()) ); } + + private Path validateImportPath(String rawPath) { + if (allowedImportRoot == null || allowedImportRoot.isBlank()) { + throw new GeneralException( + GeneralErrorCode.SERVICE_UNAVAILABLE, + "corpus import 허용 경로가 설정되지 않았습니다." + ); + } + + try { + Path normalizedPath = Path.of(rawPath).toAbsolutePath().normalize(); + Path allowedRootPath = Path.of(allowedImportRoot).toAbsolutePath().normalize(); + if (!normalizedPath.startsWith(allowedRootPath)) { + throw new GeneralException( + GeneralErrorCode.INVALID_PARAMETER, + "허용된 import 경로 밖의 파일에는 접근할 수 없습니다." + ); + } + return normalizedPath; + } catch (InvalidPathException e) { + throw new GeneralException( + GeneralErrorCode.INVALID_PARAMETER, + "유효하지 않은 파일 경로입니다." + ); + } + } } diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockJobPostingCorpusRepository.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockJobPostingCorpusRepository.java index 8b905bc..0f36868 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockJobPostingCorpusRepository.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockJobPostingCorpusRepository.java @@ -2,6 +2,7 @@ import com.jobdri.jobdri_api.domain.corpus.entity.MockJobPostingCorpus; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.domain.Pageable; import java.util.List; import java.util.Optional; @@ -11,5 +12,7 @@ public interface MockJobPostingCorpusRepository extends JpaRepository findAllByCompanyId(Long companyId); - List findAllByValidForEmbeddingTrueAndEmbeddingTextIsNotNullOrderByIdAsc(); + List findAllByValidForEmbeddingTrueOrderByIdAsc(Pageable pageable); + + List findAllByValidForEmbeddingTrueOrderByIdAsc(); } diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockQuestionCorpusRepository.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockQuestionCorpusRepository.java index 239af0a..6d3f7d2 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockQuestionCorpusRepository.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockQuestionCorpusRepository.java @@ -2,6 +2,7 @@ import com.jobdri.jobdri_api.domain.corpus.entity.MockQuestionCorpus; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.domain.Pageable; import java.util.List; import java.util.Optional; @@ -13,5 +14,7 @@ public interface MockQuestionCorpusRepository extends JpaRepository findAllByCompanyId(Long companyId); - List findAllByValidForEmbeddingTrueAndEmbeddingTextIsNotNullOrderByIdAsc(); + List findAllByValidForEmbeddingTrueOrderByIdAsc(Pageable pageable); + + List findAllByValidForEmbeddingTrueOrderByIdAsc(); } diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/BootstrapAdminService.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/BootstrapAdminService.java index 4d1b602..dfd6641 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/BootstrapAdminService.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/BootstrapAdminService.java @@ -31,7 +31,11 @@ public void promoteConfiguredAdmins() { .toList(); for (String email : emails) { - userRepository.findByEmail(email).ifPresent(this::promoteIfNeeded); + userRepository.findByEmail(email) + .ifPresentOrElse( + this::promoteIfNeeded, + () -> log.warn("bootstrap admin 대상 사용자를 찾지 못했습니다. email={}", email) + ); } } diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java index 84390f8..7d4ffb4 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java @@ -4,10 +4,12 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.http.HttpHeaders; import org.springframework.http.MediaType; +import org.springframework.http.client.SimpleClientHttpRequestFactory; import org.springframework.stereotype.Component; import org.springframework.util.StringUtils; import org.springframework.web.client.RestClient; +import java.time.Duration; import java.util.List; @Component @@ -34,8 +36,13 @@ public List embed(List texts, InputType inputType) { return List.of(); } + SimpleClientHttpRequestFactory requestFactory = new SimpleClientHttpRequestFactory(); + requestFactory.setConnectTimeout(Duration.ofSeconds(5)); + requestFactory.setReadTimeout(Duration.ofSeconds(10)); + RestClient client = restClientBuilder .baseUrl("https://api.cohere.com") + .requestFactory(requestFactory) .defaultHeader(HttpHeaders.AUTHORIZATION, "Bearer " + cohereApiKey) .defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE) .build(); diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusAdminRunner.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusAdminRunner.java index 54d203a..3698b97 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusAdminRunner.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusAdminRunner.java @@ -33,12 +33,20 @@ public void run(ApplicationArguments args) throws Exception { bootstrapAdminService.promoteConfiguredAdmins(); if (runImportOnStartup && StringUtils.hasText(importXlsxPath)) { - CorpusImportResult result = corpusImportService.importFromXlsx(Path.of(importXlsxPath)); - log.info("corpus import 완료: {}", result); + try { + CorpusImportResult result = corpusImportService.importFromXlsx(Path.of(importXlsxPath)); + log.info("corpus import 완료: {}", result); + } catch (Exception e) { + log.error("startup corpus import 실패. path={}", importXlsxPath, e); + } } if (syncEmbeddingsOnStartup) { - log.info("corpus embedding sync 완료: {}", corpusEmbeddingSyncService.syncAll(null)); + try { + log.info("corpus embedding sync 완료: {}", corpusEmbeddingSyncService.syncAll(null)); + } catch (Exception e) { + log.error("startup corpus embedding sync 실패", e); + } } } } diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusClassificationResolver.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusClassificationResolver.java index 99842d6..bb5577f 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusClassificationResolver.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusClassificationResolver.java @@ -4,6 +4,8 @@ import com.jobdri.jobdri_api.domain.classification.repository.DetailClassificationRepository; import com.jobdri.jobdri_api.domain.corpus.entity.CorpusClassificationMapping; import com.jobdri.jobdri_api.domain.corpus.repository.CorpusClassificationMappingRepository; +import com.jobdri.jobdri_api.global.apiPayload.code.GeneralErrorCode; +import com.jobdri.jobdri_api.global.apiPayload.exception.GeneralException; import lombok.RequiredArgsConstructor; import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; @@ -54,8 +56,8 @@ public Optional resolve( } } - if (detailClassificationRepository.countByDetailName(normalizedRole) == 1) { - return detailClassificationRepository.findByDetailName(normalizedRole); + if (detailClassificationRepository.countByDetailNameIgnoreCase(normalizedRole) == 1) { + return detailClassificationRepository.findByDetailNameIgnoreCase(normalizedRole); } return Optional.empty(); @@ -72,6 +74,15 @@ public CorpusClassificationMapping registerMapping( String normalizedJobFamily = normalize(jobFamilyL2); String normalizedRole = normalize(roleL3); + if (!StringUtils.hasText(normalizedJobGroup) + || !StringUtils.hasText(normalizedJobFamily) + || !StringUtils.hasText(normalizedRole)) { + throw new GeneralException( + GeneralErrorCode.INVALID_PARAMETER, + "분류 매핑을 등록하려면 대분류, 중분류, 소분류가 모두 필요합니다." + ); + } + return mappingRepository .findBySourceJobGroupL1AndSourceJobFamilyL2AndSourceRoleL3( normalizedJobGroup, diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java index 82255a4..7ffcb93 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java @@ -8,8 +8,10 @@ import com.pgvector.PGvector; import lombok.RequiredArgsConstructor; import org.springframework.beans.factory.annotation.Value; +import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import org.springframework.jdbc.datasource.DataSourceUtils; import javax.sql.DataSource; import java.sql.Connection; @@ -55,26 +57,26 @@ ON CONFLICT (corpus_id) private final CorpusEmbeddingClient corpusEmbeddingClient; private final DataSource dataSource; - @Transactional(readOnly = true) + @Transactional public CorpusEmbeddingSyncResponse syncAll(Integer limit) { int jobPostingCount = syncJobPostingEmbeddings(limit); int questionCount = syncQuestionEmbeddings(limit); return new CorpusEmbeddingSyncResponse(jobPostingCount, questionCount, embeddingModel); } - @Transactional(readOnly = true) + @Transactional public int syncJobPostingEmbeddings(Integer limit) { - List all = mockJobPostingCorpusRepository - .findAllByValidForEmbeddingTrueAndEmbeddingTextIsNotNullOrderByIdAsc(); - List corpusList = applyLimit(all, limit); + List corpusList = limit == null + ? mockJobPostingCorpusRepository.findAllByValidForEmbeddingTrueOrderByIdAsc() + : mockJobPostingCorpusRepository.findAllByValidForEmbeddingTrueOrderByIdAsc(PageRequest.of(0, limit)); return upsertJobPostingEmbeddings(corpusList); } - @Transactional(readOnly = true) + @Transactional public int syncQuestionEmbeddings(Integer limit) { - List all = mockQuestionCorpusRepository - .findAllByValidForEmbeddingTrueAndEmbeddingTextIsNotNullOrderByIdAsc(); - List corpusList = applyLimit(all, limit); + List corpusList = limit == null + ? mockQuestionCorpusRepository.findAllByValidForEmbeddingTrueOrderByIdAsc() + : mockQuestionCorpusRepository.findAllByValidForEmbeddingTrueOrderByIdAsc(PageRequest.of(0, limit)); return upsertQuestionEmbeddings(corpusList); } @@ -107,7 +109,8 @@ private void upsertVectors(String sql, List ids, List embeddings) throw new IllegalStateException("임베딩 결과 개수가 corpus 개수와 일치하지 않습니다."); } - try (Connection connection = dataSource.getConnection()) { + Connection connection = DataSourceUtils.getConnection(dataSource); + try { PGvector.registerTypes(connection); try (PreparedStatement statement = connection.prepareStatement(sql)) { Timestamp now = Timestamp.valueOf(LocalDateTime.now()); @@ -123,16 +126,11 @@ private void upsertVectors(String sql, List ids, List embeddings) } } catch (SQLException e) { throw new IllegalStateException("임베딩 벡터 저장 중 오류가 발생했습니다.", e); + } finally { + DataSourceUtils.releaseConnection(connection, dataSource); } } - private List applyLimit(List items, Integer limit) { - if (limit == null || limit >= items.size()) { - return items; - } - return items.subList(0, limit); - } - private List> partition(List items, int batchSize) { List> result = new ArrayList<>(); if (items.isEmpty()) { diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusImportService.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusImportService.java index 0440cbd..a4303d0 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusImportService.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusImportService.java @@ -18,6 +18,8 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.text.DecimalFormat; +import java.text.ParseException; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -47,9 +49,10 @@ public CorpusImportResult importWorkbook(Workbook workbook) { DataFormatter formatter = new DataFormatter(); FormulaEvaluator evaluator = workbook.getCreationHelper().createFormulaEvaluator(); ImportStats stats = new ImportStats(); + Map companyCache = new HashMap<>(); - importJobPostingSheet(workbook.getSheet(JD_SHEET_NAME), formatter, evaluator, stats); - importQuestionSheet(workbook.getSheet(QUESTION_SHEET_NAME), formatter, evaluator, stats); + importJobPostingSheet(workbook.getSheet(JD_SHEET_NAME), formatter, evaluator, stats, companyCache); + importQuestionSheet(workbook.getSheet(QUESTION_SHEET_NAME), formatter, evaluator, stats, companyCache); return stats.toResult(); } @@ -58,7 +61,8 @@ private void importJobPostingSheet( Sheet sheet, DataFormatter formatter, FormulaEvaluator evaluator, - ImportStats stats + ImportStats stats, + Map companyCache ) { if (sheet == null) { return; @@ -70,6 +74,11 @@ private void importJobPostingSheet( } Map headerMap = readHeaderMap(rows.next(), formatter, evaluator); + validateRequiredHeaders( + headerMap, + "analysis_id", "company_name", "job_group_l1", "job_family_l2", "role_l3", + "skills", "responsibilities", "requirements", "preferred", "embedding_text", "is_valid_for_embedding" + ); while (rows.hasNext()) { Row row = rows.next(); String sourceAnalysisId = getString(row, headerMap, "analysis_id", formatter, evaluator); @@ -78,7 +87,7 @@ private void importJobPostingSheet( } String companyName = getString(row, headerMap, "company_name", formatter, evaluator); - Company company = resolveCompany(companyName, stats); + Company company = resolveCompany(companyName, stats, companyCache); Optional detailClassification = resolveClassification(row, headerMap, formatter, evaluator, stats); MockJobPostingCorpus corpus = mockJobPostingCorpusRepository.findBySourceAnalysisId(sourceAnalysisId) @@ -130,7 +139,8 @@ private void importQuestionSheet( Sheet sheet, DataFormatter formatter, FormulaEvaluator evaluator, - ImportStats stats + ImportStats stats, + Map companyCache ) { if (sheet == null) { return; @@ -142,6 +152,11 @@ private void importQuestionSheet( } Map headerMap = readHeaderMap(rows.next(), formatter, evaluator); + validateRequiredHeaders( + headerMap, + "question_id", "analysis_id", "company_name", "job_group_l1", "job_family_l2", "role_l3", + "source", "question_text", "embedding_text", "is_valid_for_embedding" + ); while (rows.hasNext()) { Row row = rows.next(); String sourceQuestionId = getString(row, headerMap, "question_id", formatter, evaluator); @@ -150,7 +165,7 @@ private void importQuestionSheet( } String companyName = getString(row, headerMap, "company_name", formatter, evaluator); - Company company = resolveCompany(companyName, stats); + Company company = resolveCompany(companyName, stats, companyCache); Optional detailClassification = resolveClassification(row, headerMap, formatter, evaluator, stats); MockQuestionCorpus corpus = mockQuestionCorpusRepository.findBySourceQuestionId(sourceQuestionId) @@ -219,16 +234,22 @@ private Optional resolveClassification( return detailClassification; } - private Company resolveCompany(String companyName, ImportStats stats) { + private Company resolveCompany(String companyName, ImportStats stats, Map companyCache) { String normalizedCompanyName = normalize(companyName); if (!StringUtils.hasText(normalizedCompanyName)) { return null; } - return companyRepository.findByName(normalizedCompanyName) + Company cachedCompany = companyCache.get(normalizedCompanyName); + if (cachedCompany != null) { + return cachedCompany; + } + Company company = companyRepository.findByName(normalizedCompanyName) .orElseGet(() -> { stats.createdCompanies++; return companyRepository.save(Company.create(normalizedCompanyName, null)); }); + companyCache.put(normalizedCompanyName, company); + return company; } private Map readHeaderMap(Row headerRow, DataFormatter formatter, FormulaEvaluator evaluator) { @@ -244,6 +265,14 @@ private Map readHeaderMap(Row headerRow, DataFormatter formatte return headerMap; } + private void validateRequiredHeaders(Map headerMap, String... requiredColumns) { + for (String requiredColumn : requiredColumns) { + if (!headerMap.containsKey(requiredColumn)) { + throw new IllegalArgumentException("필수 헤더가 누락되었습니다. column=" + requiredColumn); + } + } + } + private String getString( Row row, Map headerMap, @@ -269,7 +298,21 @@ private Integer getInteger( if (!StringUtils.hasText(value)) { return null; } - return Integer.parseInt(value); + String normalized = value.replace(",", "").trim(); + try { + if (normalized.contains(".")) { + double decimalValue = Double.parseDouble(normalized); + return (int) Math.round(decimalValue); + } + return Integer.parseInt(normalized); + } catch (NumberFormatException e) { + try { + Number parsed = DecimalFormat.getInstance().parse(normalized); + return parsed == null ? null : parsed.intValue(); + } catch (ParseException ignored) { + return null; + } + } } private boolean getBoolean( diff --git a/src/main/resources/application-dev.yaml b/src/main/resources/application-dev.yaml index f83055c..b444e85 100644 --- a/src/main/resources/application-dev.yaml +++ b/src/main/resources/application-dev.yaml @@ -74,6 +74,7 @@ app: import: run-on-startup: ${APP_CORPUS_IMPORT_RUN_ON_STARTUP:false} xlsx-path: ${APP_CORPUS_IMPORT_XLSX_PATH:} + allowed-root: ${APP_CORPUS_IMPORT_ALLOWED_ROOT:} embedding: sync-on-startup: ${APP_CORPUS_EMBEDDING_SYNC_ON_STARTUP:false} model: ${APP_CORPUS_EMBEDDING_MODEL:embed-v4.0} diff --git a/src/main/resources/application-prod.yaml b/src/main/resources/application-prod.yaml index ecfc62e..a0d4eff 100644 --- a/src/main/resources/application-prod.yaml +++ b/src/main/resources/application-prod.yaml @@ -74,6 +74,7 @@ app: import: run-on-startup: ${APP_CORPUS_IMPORT_RUN_ON_STARTUP:false} xlsx-path: ${APP_CORPUS_IMPORT_XLSX_PATH:} + allowed-root: ${APP_CORPUS_IMPORT_ALLOWED_ROOT:} embedding: sync-on-startup: ${APP_CORPUS_EMBEDDING_SYNC_ON_STARTUP:false} model: ${APP_CORPUS_EMBEDDING_MODEL:embed-v4.0} diff --git a/src/main/resources/application.yaml b/src/main/resources/application.yaml index d2c0e0a..50a37d6 100644 --- a/src/main/resources/application.yaml +++ b/src/main/resources/application.yaml @@ -1,3 +1,5 @@ spring: profiles: active: ${SPRING_PROFILES_ACTIVE:dev} + jpa: + defer-datasource-initialization: true