diff --git a/build.gradle b/build.gradle index 45f7bea..e436553 100644 --- a/build.gradle +++ b/build.gradle @@ -37,8 +37,11 @@ dependencies { //web implementation 'org.springframework.boot:spring-boot-starter-actuator' + implementation 'org.springframework.boot:spring-boot-starter-jdbc' implementation 'org.springframework.boot:spring-boot-starter-validation' implementation 'org.springframework.boot:spring-boot-starter-web' + implementation 'org.apache.poi:poi-ooxml:5.4.1' + implementation 'com.pgvector:pgvector:0.1.6' //jwt implementation 'io.jsonwebtoken:jjwt-api:0.12.7' diff --git a/src/main/java/com/jobdri/jobdri_api/domain/classification/repository/DetailClassificationRepository.java b/src/main/java/com/jobdri/jobdri_api/domain/classification/repository/DetailClassificationRepository.java index 674accf..bc65caf 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/classification/repository/DetailClassificationRepository.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/classification/repository/DetailClassificationRepository.java @@ -12,6 +12,22 @@ public interface DetailClassificationRepository extends JpaRepository { List findAllByMiddleClassificationId(Long middleClassificationId); Optional findByDetailName(String detailName); + long countByDetailName(String detailName); + + @Query(""" + SELECT dc + FROM DetailClassification dc + JOIN dc.middleClassification mc + JOIN mc.classification c + WHERE lower(c.bigName) = lower(:bigName) + AND lower(mc.middleName) = lower(:middleName) + AND lower(dc.detailName) = lower(:detailName) + """) + Optional findByHierarchyNames( + @Param("bigName") String bigName, + @Param("middleName") String middleName, + @Param("detailName") String detailName + ); @Query(value = """ SELECT diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/controller/CorpusAdminController.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/controller/CorpusAdminController.java new file mode 100644 index 0000000..7fa4253 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/controller/CorpusAdminController.java @@ -0,0 +1,52 @@ +package com.jobdri.jobdri_api.domain.corpus.controller; + +import com.jobdri.jobdri_api.domain.corpus.dto.request.CorpusEmbeddingSyncRequest; +import com.jobdri.jobdri_api.domain.corpus.dto.request.CorpusImportRequest; +import com.jobdri.jobdri_api.domain.corpus.dto.response.CorpusEmbeddingSyncResponse; +import com.jobdri.jobdri_api.domain.corpus.service.CorpusEmbeddingSyncService; +import com.jobdri.jobdri_api.domain.corpus.service.CorpusImportResult; +import com.jobdri.jobdri_api.domain.corpus.service.CorpusImportService; +import com.jobdri.jobdri_api.global.apiPayload.ApiResponse; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.validation.Valid; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.io.IOException; +import java.nio.file.Path; + +@RestController +@RequiredArgsConstructor +@RequestMapping("/api/admin/corpus") +@Tag(name = "CorpusAdmin", description = "관리자용 corpus 적재/임베딩 API") +public class CorpusAdminController { + + private final CorpusImportService corpusImportService; + private final CorpusEmbeddingSyncService corpusEmbeddingSyncService; + + @Operation(summary = "corpus 엑셀 적재", description = "관리자가 xlsx 파일 경로를 넘겨 corpus 원본 테이블에 적재합니다.") + @PostMapping("/import") + public ApiResponse importCorpus( + @Valid @RequestBody CorpusImportRequest request + ) throws IOException { + return ApiResponse.onSuccess( + "corpus 엑셀 적재에 성공했습니다.", + corpusImportService.importFromXlsx(Path.of(request.filePath())) + ); + } + + @Operation(summary = "corpus 임베딩 동기화", description = "유효한 corpus 데이터를 읽어 pgvector 테이블에 임베딩을 저장합니다.") + @PostMapping("/embeddings/sync") + public ApiResponse syncEmbeddings( + @Valid @RequestBody CorpusEmbeddingSyncRequest request + ) { + return ApiResponse.onSuccess( + "corpus 임베딩 동기화에 성공했습니다.", + corpusEmbeddingSyncService.syncAll(request.limit()) + ); + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/dto/request/CorpusEmbeddingSyncRequest.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/dto/request/CorpusEmbeddingSyncRequest.java new file mode 100644 index 0000000..ad93b0f --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/dto/request/CorpusEmbeddingSyncRequest.java @@ -0,0 +1,9 @@ +package com.jobdri.jobdri_api.domain.corpus.dto.request; + +import jakarta.validation.constraints.Positive; + +public record CorpusEmbeddingSyncRequest( + @Positive(message = "limit는 1 이상이어야 합니다.") + Integer limit +) { +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/dto/request/CorpusImportRequest.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/dto/request/CorpusImportRequest.java new file mode 100644 index 0000000..2f0a87e --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/dto/request/CorpusImportRequest.java @@ -0,0 +1,9 @@ +package com.jobdri.jobdri_api.domain.corpus.dto.request; + +import jakarta.validation.constraints.NotBlank; + +public record CorpusImportRequest( + @NotBlank(message = "엑셀 파일 경로는 필수입니다.") + String filePath +) { +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/dto/response/CorpusEmbeddingSyncResponse.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/dto/response/CorpusEmbeddingSyncResponse.java new file mode 100644 index 0000000..6868ccd --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/dto/response/CorpusEmbeddingSyncResponse.java @@ -0,0 +1,8 @@ +package com.jobdri.jobdri_api.domain.corpus.dto.response; + +public record CorpusEmbeddingSyncResponse( + int jobPostingEmbeddingsUpserted, + int questionEmbeddingsUpserted, + String embeddingModel +) { +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/entity/CorpusClassificationMapping.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/entity/CorpusClassificationMapping.java new file mode 100644 index 0000000..e392139 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/entity/CorpusClassificationMapping.java @@ -0,0 +1,59 @@ +package com.jobdri.jobdri_api.domain.corpus.entity; + +import com.jobdri.jobdri_api.domain.classification.entity.DetailClassification; +import jakarta.persistence.*; +import lombok.*; + +import java.time.LocalDateTime; + +@Entity +@Getter +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PRIVATE) +@Builder(access = AccessLevel.PRIVATE) +@Table( + name = "corpus_classification_mappings", + uniqueConstraints = { + @UniqueConstraint( + name = "uk_corpus_classification_mapping_source_triplet", + columnNames = {"source_job_group_l1", "source_job_family_l2", "source_role_l3"} + ) + } +) +public class CorpusClassificationMapping { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Column(name = "source_job_group_l1", nullable = false, columnDefinition = "TEXT") + private String sourceJobGroupL1; + + @Column(name = "source_job_family_l2", nullable = false, columnDefinition = "TEXT") + private String sourceJobFamilyL2; + + @Column(name = "source_role_l3", nullable = false, columnDefinition = "TEXT") + private String sourceRoleL3; + + @ManyToOne(fetch = FetchType.LAZY, optional = false) + @JoinColumn(name = "detail_classification_id", nullable = false) + private DetailClassification detailClassification; + + @Column(nullable = false) + private LocalDateTime createdAt; + + public static CorpusClassificationMapping create( + String sourceJobGroupL1, + String sourceJobFamilyL2, + String sourceRoleL3, + DetailClassification detailClassification + ) { + return CorpusClassificationMapping.builder() + .sourceJobGroupL1(sourceJobGroupL1) + .sourceJobFamilyL2(sourceJobFamilyL2) + .sourceRoleL3(sourceRoleL3) + .detailClassification(detailClassification) + .createdAt(LocalDateTime.now()) + .build(); + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/entity/MockJobPostingCorpus.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/entity/MockJobPostingCorpus.java new file mode 100644 index 0000000..a5a9215 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/entity/MockJobPostingCorpus.java @@ -0,0 +1,155 @@ +package com.jobdri.jobdri_api.domain.corpus.entity; + +import com.jobdri.jobdri_api.domain.company.entity.Company; +import com.jobdri.jobdri_api.domain.classification.entity.DetailClassification; +import jakarta.persistence.*; +import lombok.*; + +import java.time.LocalDateTime; + +@Entity +@Getter +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PRIVATE) +@Builder(access = AccessLevel.PRIVATE) +@Table( + name = "mock_job_posting_corpus", + indexes = { + @Index(name = "idx_mock_job_posting_corpus_source_analysis", columnList = "source_analysis_id"), + @Index(name = "idx_mock_job_posting_corpus_company", columnList = "company_id"), + @Index(name = "idx_mock_job_posting_corpus_classification", columnList = "job_group_l1, job_family_l2, role_l3") + } +) +public class MockJobPostingCorpus { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Column(name = "source_analysis_id", nullable = false, length = 100, unique = true) + private String sourceAnalysisId; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "company_id") + private Company company; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "detail_classification_id") + private DetailClassification detailClassification; + + @Column(name = "company_name", columnDefinition = "TEXT") + private String companyName; + + @Column(columnDefinition = "TEXT") + private String industry; + + @Column(name = "job_group_l1", columnDefinition = "TEXT") + private String jobGroupL1; + + @Column(name = "job_family_l2", columnDefinition = "TEXT") + private String jobFamilyL2; + + @Column(name = "role_l3", columnDefinition = "TEXT") + private String roleL3; + + @Column(columnDefinition = "TEXT") + private String skills; + + @Column(name = "responsibilities", columnDefinition = "TEXT") + private String responsibilities; + + @Column(name = "requirements", columnDefinition = "TEXT") + private String requirements; + + @Column(name = "preferred", columnDefinition = "TEXT") + private String preferred; + + @Column(name = "embedding_text", nullable = false, columnDefinition = "TEXT") + private String embeddingText; + + @Column(name = "is_valid_for_embedding", nullable = false) + private boolean validForEmbedding; + + @Column(name = "invalid_reason", columnDefinition = "TEXT") + private String invalidReason; + + @Column(name = "created_at", nullable = false) + private LocalDateTime createdAt; + + public static MockJobPostingCorpus create( + String sourceAnalysisId, + Company company, + DetailClassification detailClassification, + String companyName, + String industry, + String jobGroupL1, + String jobFamilyL2, + String roleL3, + String skills, + String responsibilities, + String requirements, + String preferred, + String embeddingText, + boolean validForEmbedding, + String invalidReason + ) { + return MockJobPostingCorpus.builder() + .sourceAnalysisId(sourceAnalysisId) + .company(company) + .detailClassification(detailClassification) + .companyName(companyName) + .industry(industry) + .jobGroupL1(jobGroupL1) + .jobFamilyL2(jobFamilyL2) + .roleL3(roleL3) + .skills(skills) + .responsibilities(responsibilities) + .requirements(requirements) + .preferred(preferred) + .embeddingText(embeddingText) + .validForEmbedding(validForEmbedding) + .invalidReason(invalidReason) + .createdAt(LocalDateTime.now()) + .build(); + } + + public void assignCompany(Company company) { + this.company = company; + } + + public void assignDetailClassification(DetailClassification detailClassification) { + this.detailClassification = detailClassification; + } + + public void updateFromImport( + Company company, + DetailClassification detailClassification, + String companyName, + String industry, + String jobGroupL1, + String jobFamilyL2, + String roleL3, + String skills, + String responsibilities, + String requirements, + String preferred, + String embeddingText, + boolean validForEmbedding, + String invalidReason + ) { + this.company = company; + this.detailClassification = detailClassification; + this.companyName = companyName; + this.industry = industry; + this.jobGroupL1 = jobGroupL1; + this.jobFamilyL2 = jobFamilyL2; + this.roleL3 = roleL3; + this.skills = skills; + this.responsibilities = responsibilities; + this.requirements = requirements; + this.preferred = preferred; + this.embeddingText = embeddingText; + this.validForEmbedding = validForEmbedding; + this.invalidReason = invalidReason; + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/entity/MockQuestionCorpus.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/entity/MockQuestionCorpus.java new file mode 100644 index 0000000..8c203f4 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/entity/MockQuestionCorpus.java @@ -0,0 +1,147 @@ +package com.jobdri.jobdri_api.domain.corpus.entity; + +import com.jobdri.jobdri_api.domain.company.entity.Company; +import com.jobdri.jobdri_api.domain.classification.entity.DetailClassification; +import jakarta.persistence.*; +import lombok.*; + +import java.time.LocalDateTime; + +@Entity +@Getter +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PRIVATE) +@Builder(access = AccessLevel.PRIVATE) +@Table( + name = "mock_question_corpus", + indexes = { + @Index(name = "idx_mock_question_corpus_source_question", columnList = "source_question_id"), + @Index(name = "idx_mock_question_corpus_source_analysis", columnList = "source_analysis_id"), + @Index(name = "idx_mock_question_corpus_company", columnList = "company_id"), + @Index(name = "idx_mock_question_corpus_classification", columnList = "job_group_l1, job_family_l2, role_l3") + } +) +public class MockQuestionCorpus { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Column(name = "source_question_id", nullable = false, length = 120, unique = true) + private String sourceQuestionId; + + @Column(name = "source_analysis_id", nullable = false, length = 100) + private String sourceAnalysisId; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "company_id") + private Company company; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "detail_classification_id") + private DetailClassification detailClassification; + + @Column(name = "company_name", columnDefinition = "TEXT") + private String companyName; + + @Column(name = "job_group_l1", columnDefinition = "TEXT") + private String jobGroupL1; + + @Column(name = "job_family_l2", columnDefinition = "TEXT") + private String jobFamilyL2; + + @Column(name = "role_l3", columnDefinition = "TEXT") + private String roleL3; + + @Column(name = "source", length = 50) + private String source; + + @Column(name = "question_type", columnDefinition = "TEXT") + private String questionType; + + @Column(name = "char_limit") + private Integer charLimit; + + @Column(name = "question_text", nullable = false, columnDefinition = "TEXT") + private String questionText; + + @Column(name = "embedding_text", nullable = false, columnDefinition = "TEXT") + private String embeddingText; + + @Column(name = "is_valid_for_embedding", nullable = false) + private boolean validForEmbedding; + + @Column(name = "created_at", nullable = false) + private LocalDateTime createdAt; + + public static MockQuestionCorpus create( + String sourceQuestionId, + String sourceAnalysisId, + Company company, + DetailClassification detailClassification, + String companyName, + String jobGroupL1, + String jobFamilyL2, + String roleL3, + String source, + String questionType, + Integer charLimit, + String questionText, + String embeddingText, + boolean validForEmbedding + ) { + return MockQuestionCorpus.builder() + .sourceQuestionId(sourceQuestionId) + .sourceAnalysisId(sourceAnalysisId) + .company(company) + .detailClassification(detailClassification) + .companyName(companyName) + .jobGroupL1(jobGroupL1) + .jobFamilyL2(jobFamilyL2) + .roleL3(roleL3) + .source(source) + .questionType(questionType) + .charLimit(charLimit) + .questionText(questionText) + .embeddingText(embeddingText) + .validForEmbedding(validForEmbedding) + .createdAt(LocalDateTime.now()) + .build(); + } + + public void assignCompany(Company company) { + this.company = company; + } + + public void assignDetailClassification(DetailClassification detailClassification) { + this.detailClassification = detailClassification; + } + + public void updateFromImport( + Company company, + DetailClassification detailClassification, + String companyName, + String jobGroupL1, + String jobFamilyL2, + String roleL3, + String source, + String questionType, + Integer charLimit, + String questionText, + String embeddingText, + boolean validForEmbedding + ) { + this.company = company; + this.detailClassification = detailClassification; + this.companyName = companyName; + this.jobGroupL1 = jobGroupL1; + this.jobFamilyL2 = jobFamilyL2; + this.roleL3 = roleL3; + this.source = source; + this.questionType = questionType; + this.charLimit = charLimit; + this.questionText = questionText; + this.embeddingText = embeddingText; + this.validForEmbedding = validForEmbedding; + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/CorpusClassificationMappingRepository.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/CorpusClassificationMappingRepository.java new file mode 100644 index 0000000..52708f7 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/CorpusClassificationMappingRepository.java @@ -0,0 +1,14 @@ +package com.jobdri.jobdri_api.domain.corpus.repository; + +import com.jobdri.jobdri_api.domain.corpus.entity.CorpusClassificationMapping; +import org.springframework.data.jpa.repository.JpaRepository; + +import java.util.Optional; + +public interface CorpusClassificationMappingRepository extends JpaRepository { + Optional findBySourceJobGroupL1AndSourceJobFamilyL2AndSourceRoleL3( + String sourceJobGroupL1, + String sourceJobFamilyL2, + String sourceRoleL3 + ); +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockJobPostingCorpusRepository.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockJobPostingCorpusRepository.java new file mode 100644 index 0000000..8b905bc --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockJobPostingCorpusRepository.java @@ -0,0 +1,15 @@ +package com.jobdri.jobdri_api.domain.corpus.repository; + +import com.jobdri.jobdri_api.domain.corpus.entity.MockJobPostingCorpus; +import org.springframework.data.jpa.repository.JpaRepository; + +import java.util.List; +import java.util.Optional; + +public interface MockJobPostingCorpusRepository extends JpaRepository { + Optional findBySourceAnalysisId(String sourceAnalysisId); + + List findAllByCompanyId(Long companyId); + + List findAllByValidForEmbeddingTrueAndEmbeddingTextIsNotNullOrderByIdAsc(); +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockQuestionCorpusRepository.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockQuestionCorpusRepository.java new file mode 100644 index 0000000..239af0a --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/repository/MockQuestionCorpusRepository.java @@ -0,0 +1,17 @@ +package com.jobdri.jobdri_api.domain.corpus.repository; + +import com.jobdri.jobdri_api.domain.corpus.entity.MockQuestionCorpus; +import org.springframework.data.jpa.repository.JpaRepository; + +import java.util.List; +import java.util.Optional; + +public interface MockQuestionCorpusRepository extends JpaRepository { + Optional findBySourceQuestionId(String sourceQuestionId); + + List findAllBySourceAnalysisId(String sourceAnalysisId); + + List findAllByCompanyId(Long companyId); + + List findAllByValidForEmbeddingTrueAndEmbeddingTextIsNotNullOrderByIdAsc(); +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/BootstrapAdminService.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/BootstrapAdminService.java new file mode 100644 index 0000000..4d1b602 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/BootstrapAdminService.java @@ -0,0 +1,45 @@ +package com.jobdri.jobdri_api.domain.corpus.service; + +import com.jobdri.jobdri_api.domain.user.entity.User; +import com.jobdri.jobdri_api.domain.user.entity.UserRole; +import com.jobdri.jobdri_api.domain.user.repository.UserRepository; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.StringUtils; + +import java.util.Arrays; +import java.util.List; + +@Slf4j +@Service +@RequiredArgsConstructor +public class BootstrapAdminService { + + @Value("${app.admin.bootstrap-emails:}") + private String bootstrapEmails; + + private final UserRepository userRepository; + + @Transactional + public void promoteConfiguredAdmins() { + List emails = Arrays.stream(bootstrapEmails.split(",")) + .map(String::trim) + .filter(StringUtils::hasText) + .toList(); + + for (String email : emails) { + userRepository.findByEmail(email).ifPresent(this::promoteIfNeeded); + } + } + + private void promoteIfNeeded(User user) { + if (user.getRole() == UserRole.ADMIN) { + return; + } + user.promoteToAdmin(); + log.info("관리자 권한을 부여했습니다. email={}", user.getEmail()); + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java new file mode 100644 index 0000000..f5d747a --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CohereCorpusEmbeddingClient.java @@ -0,0 +1,92 @@ +package com.jobdri.jobdri_api.domain.corpus.service; + +import lombok.RequiredArgsConstructor; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.stereotype.Component; +import org.springframework.util.StringUtils; +import org.springframework.web.client.RestClient; + +import java.util.List; + +@Component +@RequiredArgsConstructor +public class CohereCorpusEmbeddingClient implements CorpusEmbeddingClient { + + private final RestClient.Builder restClientBuilder; + + @Value("${cohere.api.key:}") + private String cohereApiKey; + + @Value("${app.corpus.embedding.model:embed-v4.0}") + private String embeddingModel; + + @Value("${app.corpus.embedding.output-dimension:1024}") + private int outputDimension; + + @Value("${app.corpus.embedding.document-input-type:search_document}") + private String documentInputType; + + @Override + public List embed(List texts) { + if (!StringUtils.hasText(cohereApiKey)) { + throw new IllegalStateException("Cohere API 키가 설정되지 않았습니다."); + } + if (texts == null || texts.isEmpty()) { + return List.of(); + } + + RestClient client = restClientBuilder + .baseUrl("https://api.cohere.com") + .defaultHeader(HttpHeaders.AUTHORIZATION, "Bearer " + cohereApiKey) + .defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE) + .build(); + + EmbedResponse response = client.post() + .uri("/v2/embed") + .body(new EmbedRequest( + texts, + embeddingModel, + documentInputType, + outputDimension, + List.of("float") + )) + .retrieve() + .body(EmbedResponse.class); + + if (response == null || response.embeddings() == null || response.embeddings().floatEmbeddings() == null) { + throw new IllegalStateException("Cohere 임베딩 응답이 비어 있습니다."); + } + + return response.embeddings().floatEmbeddings().stream() + .map(this::toFloatArray) + .toList(); + } + + private float[] toFloatArray(List values) { + float[] array = new float[values.size()]; + for (int i = 0; i < values.size(); i++) { + array[i] = values.get(i).floatValue(); + } + return array; + } + + private record EmbedRequest( + List texts, + String model, + String input_type, + Integer output_dimension, + List embedding_types + ) { + } + + private record EmbedResponse(Embeddings embeddings) { + } + + private record Embeddings( + @com.fasterxml.jackson.annotation.JsonProperty("float") + List> floatEmbeddings + ) { + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusAdminRunner.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusAdminRunner.java new file mode 100644 index 0000000..54d203a --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusAdminRunner.java @@ -0,0 +1,44 @@ +package com.jobdri.jobdri_api.domain.corpus.service; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; +import org.springframework.stereotype.Component; +import org.springframework.util.StringUtils; + +import java.nio.file.Path; + +@Slf4j +@Component +@RequiredArgsConstructor +public class CorpusAdminRunner implements ApplicationRunner { + + @Value("${app.corpus.import.run-on-startup:false}") + private boolean runImportOnStartup; + + @Value("${app.corpus.import.xlsx-path:}") + private String importXlsxPath; + + @Value("${app.corpus.embedding.sync-on-startup:false}") + private boolean syncEmbeddingsOnStartup; + + private final BootstrapAdminService bootstrapAdminService; + private final CorpusImportService corpusImportService; + private final CorpusEmbeddingSyncService corpusEmbeddingSyncService; + + @Override + public void run(ApplicationArguments args) throws Exception { + bootstrapAdminService.promoteConfiguredAdmins(); + + if (runImportOnStartup && StringUtils.hasText(importXlsxPath)) { + CorpusImportResult result = corpusImportService.importFromXlsx(Path.of(importXlsxPath)); + log.info("corpus import 완료: {}", result); + } + + if (syncEmbeddingsOnStartup) { + log.info("corpus embedding sync 완료: {}", corpusEmbeddingSyncService.syncAll(null)); + } + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusClassificationResolver.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusClassificationResolver.java new file mode 100644 index 0000000..99842d6 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusClassificationResolver.java @@ -0,0 +1,95 @@ +package com.jobdri.jobdri_api.domain.corpus.service; + +import com.jobdri.jobdri_api.domain.classification.entity.DetailClassification; +import com.jobdri.jobdri_api.domain.classification.repository.DetailClassificationRepository; +import com.jobdri.jobdri_api.domain.corpus.entity.CorpusClassificationMapping; +import com.jobdri.jobdri_api.domain.corpus.repository.CorpusClassificationMappingRepository; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Component; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.StringUtils; + +import java.util.Optional; + +@Component +@RequiredArgsConstructor +@Transactional(readOnly = true) +public class CorpusClassificationResolver { + + private final CorpusClassificationMappingRepository mappingRepository; + private final DetailClassificationRepository detailClassificationRepository; + + public Optional resolve( + String jobGroupL1, + String jobFamilyL2, + String roleL3 + ) { + String normalizedJobGroup = normalize(jobGroupL1); + String normalizedJobFamily = normalize(jobFamilyL2); + String normalizedRole = normalize(roleL3); + + if (!StringUtils.hasText(normalizedRole)) { + return Optional.empty(); + } + + Optional mapped = mappingRepository + .findBySourceJobGroupL1AndSourceJobFamilyL2AndSourceRoleL3( + normalizedJobGroup, + normalizedJobFamily, + normalizedRole + ) + .map(CorpusClassificationMapping::getDetailClassification); + if (mapped.isPresent()) { + return mapped; + } + + if (StringUtils.hasText(normalizedJobGroup) && StringUtils.hasText(normalizedJobFamily)) { + Optional exactHierarchy = detailClassificationRepository.findByHierarchyNames( + normalizedJobGroup, + normalizedJobFamily, + normalizedRole + ); + if (exactHierarchy.isPresent()) { + return exactHierarchy; + } + } + + if (detailClassificationRepository.countByDetailName(normalizedRole) == 1) { + return detailClassificationRepository.findByDetailName(normalizedRole); + } + + return Optional.empty(); + } + + @Transactional + public CorpusClassificationMapping registerMapping( + String jobGroupL1, + String jobFamilyL2, + String roleL3, + DetailClassification detailClassification + ) { + String normalizedJobGroup = normalize(jobGroupL1); + String normalizedJobFamily = normalize(jobFamilyL2); + String normalizedRole = normalize(roleL3); + + return mappingRepository + .findBySourceJobGroupL1AndSourceJobFamilyL2AndSourceRoleL3( + normalizedJobGroup, + normalizedJobFamily, + normalizedRole + ) + .orElseGet(() -> mappingRepository.save(CorpusClassificationMapping.create( + normalizedJobGroup, + normalizedJobFamily, + normalizedRole, + detailClassification + ))); + } + + private String normalize(String value) { + if (!StringUtils.hasText(value)) { + return null; + } + return value.trim(); + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingClient.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingClient.java new file mode 100644 index 0000000..184791f --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingClient.java @@ -0,0 +1,7 @@ +package com.jobdri.jobdri_api.domain.corpus.service; + +import java.util.List; + +public interface CorpusEmbeddingClient { + List embed(List texts); +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java new file mode 100644 index 0000000..da15910 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusEmbeddingSyncService.java @@ -0,0 +1,147 @@ +package com.jobdri.jobdri_api.domain.corpus.service; + +import com.jobdri.jobdri_api.domain.corpus.dto.response.CorpusEmbeddingSyncResponse; +import com.jobdri.jobdri_api.domain.corpus.entity.MockJobPostingCorpus; +import com.jobdri.jobdri_api.domain.corpus.entity.MockQuestionCorpus; +import com.jobdri.jobdri_api.domain.corpus.repository.MockJobPostingCorpusRepository; +import com.jobdri.jobdri_api.domain.corpus.repository.MockQuestionCorpusRepository; +import com.pgvector.PGvector; +import lombok.RequiredArgsConstructor; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import javax.sql.DataSource; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; + +@Service +@RequiredArgsConstructor +public class CorpusEmbeddingSyncService { + + private static final String UPSERT_JOB_POSTING_SQL = """ + INSERT INTO mock_job_posting_embeddings (corpus_id, embedding_model, embedding, created_at, updated_at) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT (corpus_id) + DO UPDATE SET + embedding_model = EXCLUDED.embedding_model, + embedding = EXCLUDED.embedding, + updated_at = EXCLUDED.updated_at + """; + + private static final String UPSERT_QUESTION_SQL = """ + INSERT INTO mock_question_embeddings (corpus_id, embedding_model, embedding, created_at, updated_at) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT (corpus_id) + DO UPDATE SET + embedding_model = EXCLUDED.embedding_model, + embedding = EXCLUDED.embedding, + updated_at = EXCLUDED.updated_at + """; + + @Value("${app.corpus.embedding.model:embed-v4.0}") + private String embeddingModel; + + @Value("${app.corpus.embedding.batch-size:32}") + private int batchSize; + + private final MockJobPostingCorpusRepository mockJobPostingCorpusRepository; + private final MockQuestionCorpusRepository mockQuestionCorpusRepository; + private final CorpusEmbeddingClient corpusEmbeddingClient; + private final DataSource dataSource; + + @Transactional(readOnly = true) + public CorpusEmbeddingSyncResponse syncAll(Integer limit) { + int jobPostingCount = syncJobPostingEmbeddings(limit); + int questionCount = syncQuestionEmbeddings(limit); + return new CorpusEmbeddingSyncResponse(jobPostingCount, questionCount, embeddingModel); + } + + @Transactional(readOnly = true) + public int syncJobPostingEmbeddings(Integer limit) { + List all = mockJobPostingCorpusRepository + .findAllByValidForEmbeddingTrueAndEmbeddingTextIsNotNullOrderByIdAsc(); + List corpusList = applyLimit(all, limit); + return upsertJobPostingEmbeddings(corpusList); + } + + @Transactional(readOnly = true) + public int syncQuestionEmbeddings(Integer limit) { + List all = mockQuestionCorpusRepository + .findAllByValidForEmbeddingTrueAndEmbeddingTextIsNotNullOrderByIdAsc(); + List corpusList = applyLimit(all, limit); + return upsertQuestionEmbeddings(corpusList); + } + + private int upsertJobPostingEmbeddings(List corpusList) { + int processed = 0; + for (List batch : partition(corpusList, batchSize)) { + List embeddings = corpusEmbeddingClient.embed( + batch.stream().map(MockJobPostingCorpus::getEmbeddingText).toList() + ); + upsertVectors(UPSERT_JOB_POSTING_SQL, batch.stream().map(MockJobPostingCorpus::getId).toList(), embeddings); + processed += batch.size(); + } + return processed; + } + + private int upsertQuestionEmbeddings(List corpusList) { + int processed = 0; + for (List batch : partition(corpusList, batchSize)) { + List embeddings = corpusEmbeddingClient.embed( + batch.stream().map(MockQuestionCorpus::getEmbeddingText).toList() + ); + upsertVectors(UPSERT_QUESTION_SQL, batch.stream().map(MockQuestionCorpus::getId).toList(), embeddings); + processed += batch.size(); + } + return processed; + } + + private void upsertVectors(String sql, List ids, List embeddings) { + if (ids.size() != embeddings.size()) { + throw new IllegalStateException("임베딩 결과 개수가 corpus 개수와 일치하지 않습니다."); + } + + try (Connection connection = dataSource.getConnection()) { + PGvector.registerTypes(connection); + try (PreparedStatement statement = connection.prepareStatement(sql)) { + Timestamp now = Timestamp.valueOf(LocalDateTime.now()); + for (int i = 0; i < ids.size(); i++) { + statement.setLong(1, ids.get(i)); + statement.setString(2, embeddingModel); + statement.setObject(3, new PGvector(embeddings.get(i))); + statement.setTimestamp(4, now); + statement.setTimestamp(5, now); + statement.addBatch(); + } + statement.executeBatch(); + } + } catch (SQLException e) { + throw new IllegalStateException("임베딩 벡터 저장 중 오류가 발생했습니다.", e); + } + } + + private List applyLimit(List items, Integer limit) { + if (limit == null || limit >= items.size()) { + return items; + } + return items.subList(0, limit); + } + + private List> partition(List items, int batchSize) { + List> result = new ArrayList<>(); + if (items.isEmpty()) { + return result; + } + int actualBatchSize = Math.max(1, batchSize); + for (int i = 0; i < items.size(); i += actualBatchSize) { + result.add(items.subList(i, Math.min(items.size(), i + actualBatchSize))); + } + return result; + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusImportResult.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusImportResult.java new file mode 100644 index 0000000..3f4fc76 --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusImportResult.java @@ -0,0 +1,12 @@ +package com.jobdri.jobdri_api.domain.corpus.service; + +public record CorpusImportResult( + int createdCompanies, + int createdJobPostings, + int updatedJobPostings, + int createdQuestions, + int updatedQuestions, + int matchedClassifications, + int unmatchedClassifications +) { +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusImportService.java b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusImportService.java new file mode 100644 index 0000000..0440cbd --- /dev/null +++ b/src/main/java/com/jobdri/jobdri_api/domain/corpus/service/CorpusImportService.java @@ -0,0 +1,321 @@ +package com.jobdri.jobdri_api.domain.corpus.service; + +import com.jobdri.jobdri_api.domain.classification.entity.DetailClassification; +import com.jobdri.jobdri_api.domain.company.entity.Company; +import com.jobdri.jobdri_api.domain.company.repository.CompanyRepository; +import com.jobdri.jobdri_api.domain.corpus.entity.MockJobPostingCorpus; +import com.jobdri.jobdri_api.domain.corpus.entity.MockQuestionCorpus; +import com.jobdri.jobdri_api.domain.corpus.repository.MockJobPostingCorpusRepository; +import com.jobdri.jobdri_api.domain.corpus.repository.MockQuestionCorpusRepository; +import lombok.RequiredArgsConstructor; +import org.apache.poi.ss.usermodel.*; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.StringUtils; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Optional; + +@Service +@RequiredArgsConstructor +@Transactional +public class CorpusImportService { + + private static final String JD_SHEET_NAME = "jd_embed_corpus"; + private static final String QUESTION_SHEET_NAME = "question_embed_corpus"; + + private final CompanyRepository companyRepository; + private final MockJobPostingCorpusRepository mockJobPostingCorpusRepository; + private final MockQuestionCorpusRepository mockQuestionCorpusRepository; + private final CorpusClassificationResolver corpusClassificationResolver; + + public CorpusImportResult importFromXlsx(Path xlsxPath) throws IOException { + try (InputStream inputStream = Files.newInputStream(xlsxPath); + Workbook workbook = new XSSFWorkbook(inputStream)) { + return importWorkbook(workbook); + } + } + + public CorpusImportResult importWorkbook(Workbook workbook) { + DataFormatter formatter = new DataFormatter(); + FormulaEvaluator evaluator = workbook.getCreationHelper().createFormulaEvaluator(); + ImportStats stats = new ImportStats(); + + importJobPostingSheet(workbook.getSheet(JD_SHEET_NAME), formatter, evaluator, stats); + importQuestionSheet(workbook.getSheet(QUESTION_SHEET_NAME), formatter, evaluator, stats); + + return stats.toResult(); + } + + private void importJobPostingSheet( + Sheet sheet, + DataFormatter formatter, + FormulaEvaluator evaluator, + ImportStats stats + ) { + if (sheet == null) { + return; + } + + Iterator rows = sheet.rowIterator(); + if (!rows.hasNext()) { + return; + } + + Map headerMap = readHeaderMap(rows.next(), formatter, evaluator); + while (rows.hasNext()) { + Row row = rows.next(); + String sourceAnalysisId = getString(row, headerMap, "analysis_id", formatter, evaluator); + if (!StringUtils.hasText(sourceAnalysisId)) { + continue; + } + + String companyName = getString(row, headerMap, "company_name", formatter, evaluator); + Company company = resolveCompany(companyName, stats); + Optional detailClassification = resolveClassification(row, headerMap, formatter, evaluator, stats); + + MockJobPostingCorpus corpus = mockJobPostingCorpusRepository.findBySourceAnalysisId(sourceAnalysisId) + .orElse(null); + + if (corpus == null) { + mockJobPostingCorpusRepository.save(MockJobPostingCorpus.create( + sourceAnalysisId, + company, + detailClassification.orElse(null), + companyName, + getString(row, headerMap, "industry", formatter, evaluator), + getString(row, headerMap, "job_group_l1", formatter, evaluator), + getString(row, headerMap, "job_family_l2", formatter, evaluator), + getString(row, headerMap, "role_l3", formatter, evaluator), + getString(row, headerMap, "skills", formatter, evaluator), + getString(row, headerMap, "responsibilities", formatter, evaluator), + getString(row, headerMap, "requirements", formatter, evaluator), + getString(row, headerMap, "preferred", formatter, evaluator), + getString(row, headerMap, "embedding_text", formatter, evaluator), + getBoolean(row, headerMap, "is_valid_for_embedding", formatter, evaluator), + null + )); + stats.createdJobPostings++; + continue; + } + + corpus.updateFromImport( + company, + detailClassification.orElse(null), + companyName, + getString(row, headerMap, "industry", formatter, evaluator), + getString(row, headerMap, "job_group_l1", formatter, evaluator), + getString(row, headerMap, "job_family_l2", formatter, evaluator), + getString(row, headerMap, "role_l3", formatter, evaluator), + getString(row, headerMap, "skills", formatter, evaluator), + getString(row, headerMap, "responsibilities", formatter, evaluator), + getString(row, headerMap, "requirements", formatter, evaluator), + getString(row, headerMap, "preferred", formatter, evaluator), + getString(row, headerMap, "embedding_text", formatter, evaluator), + getBoolean(row, headerMap, "is_valid_for_embedding", formatter, evaluator), + null + ); + stats.updatedJobPostings++; + } + } + + private void importQuestionSheet( + Sheet sheet, + DataFormatter formatter, + FormulaEvaluator evaluator, + ImportStats stats + ) { + if (sheet == null) { + return; + } + + Iterator rows = sheet.rowIterator(); + if (!rows.hasNext()) { + return; + } + + Map headerMap = readHeaderMap(rows.next(), formatter, evaluator); + while (rows.hasNext()) { + Row row = rows.next(); + String sourceQuestionId = getString(row, headerMap, "question_id", formatter, evaluator); + if (!StringUtils.hasText(sourceQuestionId)) { + continue; + } + + String companyName = getString(row, headerMap, "company_name", formatter, evaluator); + Company company = resolveCompany(companyName, stats); + Optional detailClassification = resolveClassification(row, headerMap, formatter, evaluator, stats); + + MockQuestionCorpus corpus = mockQuestionCorpusRepository.findBySourceQuestionId(sourceQuestionId) + .orElse(null); + + if (corpus == null) { + mockQuestionCorpusRepository.save(MockQuestionCorpus.create( + sourceQuestionId, + getString(row, headerMap, "analysis_id", formatter, evaluator), + company, + detailClassification.orElse(null), + companyName, + getString(row, headerMap, "job_group_l1", formatter, evaluator), + getString(row, headerMap, "job_family_l2", formatter, evaluator), + getString(row, headerMap, "role_l3", formatter, evaluator), + getString(row, headerMap, "source", formatter, evaluator), + getString(row, headerMap, "question_type", formatter, evaluator), + getInteger(row, headerMap, "char_limit", formatter, evaluator), + getString(row, headerMap, "question_text", formatter, evaluator), + getString(row, headerMap, "embedding_text", formatter, evaluator), + getBoolean(row, headerMap, "is_valid_for_embedding", formatter, evaluator) + )); + stats.createdQuestions++; + continue; + } + + corpus.updateFromImport( + company, + detailClassification.orElse(null), + companyName, + getString(row, headerMap, "job_group_l1", formatter, evaluator), + getString(row, headerMap, "job_family_l2", formatter, evaluator), + getString(row, headerMap, "role_l3", formatter, evaluator), + getString(row, headerMap, "source", formatter, evaluator), + getString(row, headerMap, "question_type", formatter, evaluator), + getInteger(row, headerMap, "char_limit", formatter, evaluator), + getString(row, headerMap, "question_text", formatter, evaluator), + getString(row, headerMap, "embedding_text", formatter, evaluator), + getBoolean(row, headerMap, "is_valid_for_embedding", formatter, evaluator) + ); + stats.updatedQuestions++; + } + } + + private Optional resolveClassification( + Row row, + Map headerMap, + DataFormatter formatter, + FormulaEvaluator evaluator, + ImportStats stats + ) { + String jobGroupL1 = getString(row, headerMap, "job_group_l1", formatter, evaluator); + String jobFamilyL2 = getString(row, headerMap, "job_family_l2", formatter, evaluator); + String roleL3 = getString(row, headerMap, "role_l3", formatter, evaluator); + + Optional detailClassification = corpusClassificationResolver.resolve( + jobGroupL1, + jobFamilyL2, + roleL3 + ); + if (detailClassification.isPresent()) { + stats.matchedClassifications++; + } else if (StringUtils.hasText(roleL3)) { + stats.unmatchedClassifications++; + } + return detailClassification; + } + + private Company resolveCompany(String companyName, ImportStats stats) { + String normalizedCompanyName = normalize(companyName); + if (!StringUtils.hasText(normalizedCompanyName)) { + return null; + } + return companyRepository.findByName(normalizedCompanyName) + .orElseGet(() -> { + stats.createdCompanies++; + return companyRepository.save(Company.create(normalizedCompanyName, null)); + }); + } + + private Map readHeaderMap(Row headerRow, DataFormatter formatter, FormulaEvaluator evaluator) { + Map headerMap = new HashMap<>(); + short lastCellNum = headerRow.getLastCellNum(); + for (int i = 0; i < lastCellNum; i++) { + Cell cell = headerRow.getCell(i); + String value = normalize(getCellString(cell, formatter, evaluator)); + if (value != null) { + headerMap.put(value, i); + } + } + return headerMap; + } + + private String getString( + Row row, + Map headerMap, + String columnName, + DataFormatter formatter, + FormulaEvaluator evaluator + ) { + Integer index = headerMap.get(columnName); + if (index == null) { + return null; + } + return normalize(getCellString(row.getCell(index), formatter, evaluator)); + } + + private Integer getInteger( + Row row, + Map headerMap, + String columnName, + DataFormatter formatter, + FormulaEvaluator evaluator + ) { + String value = getString(row, headerMap, columnName, formatter, evaluator); + if (!StringUtils.hasText(value)) { + return null; + } + return Integer.parseInt(value); + } + + private boolean getBoolean( + Row row, + Map headerMap, + String columnName, + DataFormatter formatter, + FormulaEvaluator evaluator + ) { + String value = getString(row, headerMap, columnName, formatter, evaluator); + return Boolean.parseBoolean(value); + } + + private String getCellString(Cell cell, DataFormatter formatter, FormulaEvaluator evaluator) { + if (cell == null) { + return null; + } + return formatter.formatCellValue(cell, evaluator); + } + + private String normalize(String value) { + if (!StringUtils.hasText(value)) { + return null; + } + return value.trim(); + } + + private static final class ImportStats { + private int createdCompanies; + private int createdJobPostings; + private int updatedJobPostings; + private int createdQuestions; + private int updatedQuestions; + private int matchedClassifications; + private int unmatchedClassifications; + + private CorpusImportResult toResult() { + return new CorpusImportResult( + createdCompanies, + createdJobPostings, + updatedJobPostings, + createdQuestions, + updatedQuestions, + matchedClassifications, + unmatchedClassifications + ); + } + } +} diff --git a/src/main/java/com/jobdri/jobdri_api/domain/user/entity/User.java b/src/main/java/com/jobdri/jobdri_api/domain/user/entity/User.java index bed103e..7c9babd 100644 --- a/src/main/java/com/jobdri/jobdri_api/domain/user/entity/User.java +++ b/src/main/java/com/jobdri/jobdri_api/domain/user/entity/User.java @@ -106,4 +106,8 @@ public void decreaseCredit(int amount) { } this.credit -= amount; } + + public void promoteToAdmin() { + this.role = UserRole.ADMIN; + } } diff --git a/src/main/java/com/jobdri/jobdri_api/global/config/SecurityConfig.java b/src/main/java/com/jobdri/jobdri_api/global/config/SecurityConfig.java index a189b9d..a50794b 100644 --- a/src/main/java/com/jobdri/jobdri_api/global/config/SecurityConfig.java +++ b/src/main/java/com/jobdri/jobdri_api/global/config/SecurityConfig.java @@ -61,6 +61,7 @@ public SecurityFilterChain securityFilterChain(HttpSecurity http) throws Excepti .requestMatchers("/").permitAll() .requestMatchers("/oauth2/**", "/login/oauth2/**").permitAll() .requestMatchers("/api/auth/**").permitAll() + .requestMatchers("/api/admin/**").hasRole("ADMIN") .anyRequest().authenticated() ); diff --git a/src/main/resources/application-dev.yaml b/src/main/resources/application-dev.yaml index 6f38965..6fc91f0 100644 --- a/src/main/resources/application-dev.yaml +++ b/src/main/resources/application-dev.yaml @@ -12,6 +12,7 @@ spring: jpa: hibernate: ddl-auto: update + defer-datasource-initialization: true properties: hibernate: format_sql: true @@ -67,6 +68,18 @@ mail: app: oauth2: redirect-uri: ${APP_OAUTH2_REDIRECT_URI:http://localhost:3000/oauth2/redirect} + admin: + bootstrap-emails: ${APP_ADMIN_BOOTSTRAP_EMAILS:} + corpus: + import: + run-on-startup: ${APP_CORPUS_IMPORT_RUN_ON_STARTUP:false} + xlsx-path: ${APP_CORPUS_IMPORT_XLSX_PATH:} + embedding: + sync-on-startup: ${APP_CORPUS_EMBEDDING_SYNC_ON_STARTUP:false} + model: ${APP_CORPUS_EMBEDDING_MODEL:embed-v4.0} + output-dimension: ${APP_CORPUS_EMBEDDING_OUTPUT_DIMENSION:1024} + document-input-type: ${APP_CORPUS_EMBEDDING_DOCUMENT_INPUT_TYPE:search_document} + batch-size: ${APP_CORPUS_EMBEDDING_BATCH_SIZE:32} server: port: 8080 @@ -84,6 +97,10 @@ openai: model: job-posting-extractor: ${OPENAI_JOB_POSTING_MODEL:gpt-4o-mini} +cohere: + api: + key: ${COHERE_API_KEY:} + payment: toss: client-key: ${TOSS_CLIENT_KEY:} diff --git a/src/main/resources/application-prod.yaml b/src/main/resources/application-prod.yaml index cd43361..07b08e8 100644 --- a/src/main/resources/application-prod.yaml +++ b/src/main/resources/application-prod.yaml @@ -12,6 +12,7 @@ spring: jpa: hibernate: ddl-auto: ${JPA_DDL_AUTO:update} + defer-datasource-initialization: true properties: hibernate: format_sql: true @@ -67,6 +68,18 @@ mail: app: oauth2: redirect-uri: ${APP_OAUTH2_REDIRECT_URI} + admin: + bootstrap-emails: ${APP_ADMIN_BOOTSTRAP_EMAILS:} + corpus: + import: + run-on-startup: ${APP_CORPUS_IMPORT_RUN_ON_STARTUP:false} + xlsx-path: ${APP_CORPUS_IMPORT_XLSX_PATH:} + embedding: + sync-on-startup: ${APP_CORPUS_EMBEDDING_SYNC_ON_STARTUP:false} + model: ${APP_CORPUS_EMBEDDING_MODEL:embed-v4.0} + output-dimension: ${APP_CORPUS_EMBEDDING_OUTPUT_DIMENSION:1024} + document-input-type: ${APP_CORPUS_EMBEDDING_DOCUMENT_INPUT_TYPE:search_document} + batch-size: ${APP_CORPUS_EMBEDDING_BATCH_SIZE:32} server: port: 8080 @@ -96,6 +109,10 @@ openai: model: job-posting-extractor: ${OPENAI_JOB_POSTING_MODEL:gpt-4o-mini} +cohere: + api: + key: ${COHERE_API_KEY:} + payment: toss: client-key: ${TOSS_CLIENT_KEY} diff --git a/src/main/resources/schema.sql b/src/main/resources/schema.sql index 9d821bb..4902207 100644 --- a/src/main/resources/schema.sql +++ b/src/main/resources/schema.sql @@ -1,4 +1,23 @@ CREATE EXTENSION IF NOT EXISTS pg_trgm; +CREATE EXTENSION IF NOT EXISTS vector; + +CREATE TABLE IF NOT EXISTS mock_job_posting_embeddings ( + id BIGSERIAL PRIMARY KEY, + corpus_id BIGINT NOT NULL UNIQUE REFERENCES mock_job_posting_corpus(id) ON DELETE CASCADE, + embedding_model VARCHAR(100) NOT NULL, + embedding vector NOT NULL, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS mock_question_embeddings ( + id BIGSERIAL PRIMARY KEY, + corpus_id BIGINT NOT NULL UNIQUE REFERENCES mock_question_corpus(id) ON DELETE CASCADE, + embedding_model VARCHAR(100) NOT NULL, + embedding vector NOT NULL, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); CREATE INDEX IF NOT EXISTS idx_job_postings_company_detail ON job_postings (company_id, detail_classification_id); @@ -8,3 +27,9 @@ CREATE INDEX IF NOT EXISTS idx_job_postings_detail CREATE INDEX IF NOT EXISTS idx_job_postings_company ON job_postings (company_id); + +CREATE INDEX IF NOT EXISTS idx_mock_job_posting_embeddings_corpus + ON mock_job_posting_embeddings (corpus_id); + +CREATE INDEX IF NOT EXISTS idx_mock_question_embeddings_corpus + ON mock_question_embeddings (corpus_id);