From d92612fd9fd0f2a5f22e976d27680a6a7122abef Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 24 Mar 2026 18:49:53 +0300 Subject: [PATCH 01/26] refactor: new sealing order --- cmd/index_analyzer/main.go | 109 +++-- consts/consts.go | 22 +- frac/active_sealing_source.go | 141 ++---- frac/fraction_concurrency_test.go | 6 +- frac/fraction_test.go | 22 +- frac/index_cache.go | 15 +- frac/remote.go | 121 +++-- frac/sealed.go | 246 +++++----- frac/sealed/sealing/blocks_builder.go | 374 ++++++++------- frac/sealed/sealing/blocks_builder_test.go | 256 ++++------- frac/sealed/sealing/index.go | 506 +++++++++------------ frac/sealed/sealing/sealer.go | 162 ++++--- frac/sealed/token/provider.go | 7 +- frac/sealed/token/table_entry.go | 2 +- frac/sealed/token/table_loader.go | 10 +- frac/sealed_loader.go | 140 +++--- fracmanager/cache_maintainer.go | 7 +- fracmanager/frac_manifest.go | 129 ++++-- 18 files changed, 1210 insertions(+), 1065 deletions(-) diff --git a/cmd/index_analyzer/main.go b/cmd/index_analyzer/main.go index b1b22323..4ea8dd44 100644 --- a/cmd/index_analyzer/main.go +++ b/cmd/index_analyzer/main.go @@ -5,12 +5,14 @@ import ( "fmt" "hash/fnv" "os" + "strings" "sync" "time" "github.com/alecthomas/units" "go.uber.org/zap" + "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac/sealed" "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/token" @@ -21,7 +23,7 @@ import ( // Launch as: // -// > go run ./cmd/index_analyzer/... ./data/*.index | tee ~/report.txt +// > go run ./cmd/index_analyzer/... ./data/*.info | tee ~/report.txt func main() { if len(os.Args) < 2 { fmt.Println("No args") @@ -73,45 +75,80 @@ func getCacheMaintainer() (*fracmanager.CacheMaintainer, func()) { } } +// basePath strips any known index suffix to return the fraction base path. +func basePath(path string) string { + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + if strings.HasSuffix(path, suffix) { + return path[:len(path)-len(suffix)] + } + } + return path +} + +func openFile(path string) *os.File { + f, err := os.Open(path) + if err != nil { + panic(err) + } + return f +} + func analyzeIndex( path string, cm *fracmanager.CacheMaintainer, - reader *storage.ReadLimiter, + rl *storage.ReadLimiter, mergedTokensUniq map[string]map[string]int, allTokensValuesUniq map[string]int, ) Stats { + base := basePath(path) + indexCache := cm.CreateIndexCache() + + // Open per-section files. + infoFile := openFile(base + consts.InfoFileSuffix) + tokenFile := openFile(base + consts.TokenFileSuffix) + lidFile := openFile(base + consts.LIDFileSuffix) + defer infoFile.Close() + defer tokenFile.Close() + defer lidFile.Close() + + infoReader := storage.NewIndexReader(rl, infoFile.Name(), infoFile, indexCache.InfoRegistry) + tokenReader := storage.NewIndexReader(rl, tokenFile.Name(), tokenFile, indexCache.TokenRegistry) + lidReader := storage.NewIndexReader(rl, lidFile.Name(), lidFile, indexCache.LIDRegistry) + + // --- Info --- var blockIndex uint32 - cache := cm.CreateIndexCache() - - f, err := os.Open(path) + infoData, _, err := infoReader.ReadIndexBlock(0, nil) if err != nil { - panic(err) + logger.Fatal("error reading info block", zap.String("file", infoFile.Name()), zap.Error(err)) } + var b sealed.BlockInfo + if err := b.Unpack(infoData); err != nil { + logger.Fatal("error unpacking block info", zap.Error(err)) + } + docsCount := int(b.Info.DocsTotal) - indexReader := storage.NewIndexReader(reader, f.Name(), f, cache.Registry) - - readBlock := func() []byte { - data, _, err := indexReader.ReadIndexBlock(blockIndex, nil) + // --- Tokens (.token file) --- + // Token blocks start at index 0, followed by an empty separator, then token table blocks. + blockIndex = 0 + readTokenBlock := func() []byte { + data, _, err := tokenReader.ReadIndexBlock(blockIndex, nil) blockIndex++ if err != nil { - logger.Fatal("error reading block", zap.String("file", f.Name()), zap.Error(err)) + logger.Fatal("error reading token block", zap.String("file", tokenFile.Name()), zap.Error(err)) } return data } - // load info - var b sealed.BlockInfo - if err := b.Unpack(readBlock()); err != nil { - logger.Fatal("error unpacking block info", zap.Error(err)) - } - - docsCount := int(b.Info.DocsTotal) - - // load tokens tokens := [][]byte{} for { - data := readBlock() - if len(data) == 0 { // empty block - is section separator + data := readTokenBlock() + if len(data) == 0 { // empty block - section separator break } block := token.Block{} @@ -123,11 +160,10 @@ func analyzeIndex( } } - // load tokens table tokenTableBlocks := []token.TableBlock{} for { - data := readBlock() - if len(data) == 0 { // empty block - is section separator + data := readTokenBlock() + if len(data) == 0 { // empty block - section separator break } block := token.TableBlock{} @@ -136,28 +172,25 @@ func analyzeIndex( } tokenTable := token.TableFromBlocks(tokenTableBlocks) - // skip position - blockIndex++ - - // skip IDS - for { - data := readBlock() - if len(data) == 0 { // empty block - is section separator - break + // --- LIDs (.lid file) --- + blockIndex = 0 + readLIDBlock := func() []byte { + data, _, err := lidReader.ReadIndexBlock(blockIndex, nil) + blockIndex++ + if err != nil { + logger.Fatal("error reading lid block", zap.String("file", lidFile.Name()), zap.Error(err)) } - blockIndex++ // skip RID - blockIndex++ // skip Param + return data } - // load LIDs tid := 0 lidsTotal := 0 lidsUniq := map[[16]byte]int{} lidsLens := make([]int, len(tokens)) tokenLIDs := []uint32{} for { - data := readBlock() - if len(data) == 0 { // empty block - is section separator + data := readLIDBlock() + if len(data) == 0 { // empty block - section separator break } diff --git a/consts/consts.go b/consts/consts.go index ef84fd4a..7a8eb9a4 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -59,9 +59,25 @@ const ( SdocsTmpFileSuffix = "._sdocs" SdocsDelFileSuffix = ".sdocs.del" - IndexFileSuffix = ".index" - IndexTmpFileSuffix = "._index" - IndexDelFileSuffix = ".index.del" + InfoFileSuffix = ".info" + InfoTmpFileSuffix = "._info" + InfoDelFileSuffix = ".info.del" + + TokenFileSuffix = ".token" + TokenTmpFileSuffix = "._token" + TokenDelFileSuffix = ".token.del" + + OffsetsFileSuffix = ".offsets" + OffsetsTmpFileSuffix = "._offsets" + OffsetsDelFileSuffix = ".offsets.del" + + IDFileSuffix = ".id" + IDTmpFileSuffix = "._id" + IDDelFileSuffix = ".id.del" + + LIDFileSuffix = ".lid" + LIDTmpFileSuffix = "._lid" + LIDDelFileSuffix = ".lid.del" RemoteFractionSuffix = ".remote" diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 43ca0239..44aaa850 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -3,14 +3,12 @@ package frac import ( "bytes" "encoding/binary" - "errors" "io" "iter" "os" "path/filepath" "slices" "time" - "unsafe" "github.com/alecthomas/units" "go.uber.org/zap" @@ -160,45 +158,30 @@ func (src *ActiveSealingSource) Info() *common.Info { return src.info } -// TokenBlocks returns an iterator for token blocks for disk writing. -// Tokens are pre-sorted: first by fields, then lexicographically within each field. -// Each block contains up to blockSize bytes of data for efficient writing. -func (src *ActiveSealingSource) TokenBlocks(blockSize int) iter.Seq[[][]byte] { - const tokenLengthSize = int(unsafe.Sizeof(uint32(0))) - return func(yield func([][]byte) bool) { - if len(src.tids) == 0 { - return - } - if blockSize <= 0 { - src.lastErr = errors.New("sealing: token block size must be > 0") - return - } - - actualSize := 0 - block := make([][]byte, 0, blockSize) +// TokenAndLIDs returns an iterator that yields one (token, lids) pair at a time, in TID order. +// Tokens are pre-sorted: first by field, then lexicographically within each field. +// The lids slice is reused between yields and must not be retained by the caller. +func (src *ActiveSealingSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { + return func(yield func([]byte, []uint32) bool) { + var lidBuf []uint32 - // Iterate through all sorted TIDs for _, tid := range src.tids { - if actualSize >= blockSize { - if !yield(block) { - return - } - actualSize = 0 - block = block[:0] + oldLIDs := src.lids[tid].GetLIDs(src.mids, src.rids) + + lidBuf = slices.Grow(lidBuf[:0], len(oldLIDs)) + for _, lid := range oldLIDs { + lidBuf = append(lidBuf, src.oldToNewLIDs[lid]) + } + + if !yield(src.tokens[tid], lidBuf) { + return } - token := src.tokens[tid] - actualSize += tokenLengthSize // Add the size of the token length field - actualSize += len(token) // Add the size of the token itself - block = append(block, token) } - yield(block) } } -// Fields returns an iterator for sorted fields and their maximum TIDs. -// Fields are sorted lexicographically, ensuring predictable order -// when building disk index structures. -func (src *ActiveSealingSource) Fields() iter.Seq2[string, uint32] { +// Field returns an iterator for sorted fields and their maximum TIDs. +func (src *ActiveSealingSource) Field() iter.Seq2[string, uint32] { return func(yield func(string, uint32) bool) { for i, field := range src.fields { if !yield(field, src.fieldsMaxTIDs[i]) { @@ -208,75 +191,37 @@ func (src *ActiveSealingSource) Fields() iter.Seq2[string, uint32] { } } -// IDsBlocks returns an iterator for document ID blocks and corresponding positions. -// IDs are sorted. Block size is controlled by blockSize parameter for balance between -// performance and memory usage. -func (src *ActiveSealingSource) IDsBlocks(blockSize int) iter.Seq2[[]seq.ID, []seq.DocPos] { - return func(yield func([]seq.ID, []seq.DocPos) bool) { +// ID returns an iterator for document IDs and their positions, one pair at a time. +func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { + return func(yield func(seq.ID, seq.DocPos) bool) { mids := src.mids.vals rids := src.rids.vals - ids := make([]seq.ID, 0, blockSize) - pos := make([]seq.DocPos, 0, blockSize) - - // First reserved ID (system). This position is not used because Local IDs (LIDs) use 1-based indexing. - ids = append(ids, seq.ID{MID: seq.MID(mids[0]), RID: seq.RID(rids[0])}) - pos = append(pos, 0) + // First reserved ID (system). Position unused; LIDs use 1-based indexing. + if !yield(seq.ID{MID: seq.MID(mids[0]), RID: seq.RID(rids[0])}, 0) { + return + } - // Iterate through sorted LIDs for i, lid := range src.sortedLIDs { - if len(ids) == blockSize { - if !yield(ids, pos) { - return - } - ids = ids[:0] - pos = pos[:0] - } id := seq.ID{MID: seq.MID(mids[lid]), RID: seq.RID(rids[lid])} - ids = append(ids, id) - - // Use sorted or original positions + var pos seq.DocPos if len(src.docPosSorted) == 0 { - pos = append(pos, src.docPosMap[id]) + pos = src.docPosMap[id] } else { - pos = append(pos, src.docPosSorted[i+1]) // +1 for system document + pos = src.docPosSorted[i+1] // +1 for system document + } + if !yield(id, pos) { + return } } - yield(ids, pos) } } -// BlocksOffsets returns document block offsets. -func (src *ActiveSealingSource) BlocksOffsets() []uint64 { +// BlockOffsets returns document block offsets. +func (src *ActiveSealingSource) BlockOffsets() []uint64 { return src.blocksOffsets } -// TokenLIDs returns an iterator for LID lists for each token. -// LIDs are converted to new numbering after document sorting. -// Each iterator call returns a list of documents containing a specific token, -// in sorted order. -func (src *ActiveSealingSource) TokenLIDs() iter.Seq[[]uint32] { - return func(yield func([]uint32) bool) { - newLIDs := []uint32{} - - // For each sorted TID - for _, tid := range src.tids { - // Get original LIDs for this token - oldLIDs := src.lids[tid].GetLIDs(src.mids, src.rids) - newLIDs = slices.Grow(newLIDs[:0], len(oldLIDs)) - - // Convert old LIDs to new through mapping - for _, lid := range oldLIDs { - newLIDs = append(newLIDs, src.oldToNewLIDs[lid]) - } - - if !yield(newLIDs) { - return - } - } - } -} - // makeInverser creates an array for converting old LIDs to new ones. // sortedLIDs[i] = oldLID -> inverser[oldLID] = i+1 func makeInverser(sortedLIDs []uint32) []uint32 { @@ -297,22 +242,18 @@ func (src *ActiveSealingSource) Docs() iter.Seq2[seq.ID, []byte] { curDoc []byte ) - // Iterate through ID and position blocks - for ids, pos := range src.IDsBlocks(consts.IDsPerBlock) { - for i, id := range ids { - if id == systemSeqID { - curDoc = nil // reserved system document (no payload) - } else if id != prev { - // If ID changed, read new document - if curDoc, src.lastErr = src.doc(pos[i]); src.lastErr != nil { - return - } - } - prev = id - if !yield(id, curDoc) { + for id, pos := range src.ID() { + if id == systemSeqID { + curDoc = nil // reserved system document (no payload) + } else if id != prev { + if curDoc, src.lastErr = src.doc(pos); src.lastErr != nil { return } } + prev = id + if !yield(id, curDoc) { + return + } } } } diff --git a/frac/fraction_concurrency_test.go b/frac/fraction_concurrency_test.go index a5c19b22..95e96637 100644 --- a/frac/fraction_concurrency_test.go +++ b/frac/fraction_concurrency_test.go @@ -360,7 +360,11 @@ func seal(active *Active) (*Sealed, error) { LIDs: cache.NewCache[*lids.Block](nil, nil), Tokens: cache.NewCache[*token.Block](nil, nil), TokenTable: cache.NewCache[token.Table](nil, nil), - Registry: cache.NewCache[[]byte](nil, nil), + InfoRegistry: cache.NewCache[[]byte](nil, nil), + TokenRegistry: cache.NewCache[[]byte](nil, nil), + OffsetsRegistry: cache.NewCache[[]byte](nil, nil), + IDRegistry: cache.NewCache[[]byte](nil, nil), + LIDRegistry: cache.NewCache[[]byte](nil, nil), } sealed := NewSealedPreloaded( active.BaseFileName, diff --git a/frac/fraction_test.go b/frac/fraction_test.go index ec5f3d85..7326ce54 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -1841,8 +1841,8 @@ func (s *FractionTestSuite) TestFractionInfo() { "index on disk doesn't match. actual value: %d", info.IndexOnDisk) case *Remote: s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value") - s.Require().True(info.IndexOnDisk > uint64(1400) && info.IndexOnDisk < uint64(1500), - "index on disk doesn't match. actual value: %d", info.MetaOnDisk) + s.Require().True(info.IndexOnDisk > uint64(1400) && info.IndexOnDisk < uint64(1600), + "index on disk doesn't match. actual value: %d", info.IndexOnDisk) default: s.Require().Fail("unsupported fraction type") } @@ -2100,7 +2100,11 @@ func (s *FractionTestSuite) newSealed(bulks ...[]string) *Sealed { LIDs: cache.NewCache[*lids.Block](nil, nil), Tokens: cache.NewCache[*token.Block](nil, nil), TokenTable: cache.NewCache[token.Table](nil, nil), - Registry: cache.NewCache[[]byte](nil, nil), + InfoRegistry: cache.NewCache[[]byte](nil, nil), + TokenRegistry: cache.NewCache[[]byte](nil, nil), + OffsetsRegistry: cache.NewCache[[]byte](nil, nil), + IDRegistry: cache.NewCache[[]byte](nil, nil), + LIDRegistry: cache.NewCache[[]byte](nil, nil), } sealed := NewSealedPreloaded( @@ -2291,7 +2295,11 @@ func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Seal LIDs: cache.NewCache[*lids.Block](nil, nil), Tokens: cache.NewCache[*token.Block](nil, nil), TokenTable: cache.NewCache[token.Table](nil, nil), - Registry: cache.NewCache[[]byte](nil, nil), + InfoRegistry: cache.NewCache[[]byte](nil, nil), + TokenRegistry: cache.NewCache[[]byte](nil, nil), + OffsetsRegistry: cache.NewCache[[]byte](nil, nil), + IDRegistry: cache.NewCache[[]byte](nil, nil), + LIDRegistry: cache.NewCache[[]byte](nil, nil), } sealed = NewSealed( @@ -2359,7 +2367,11 @@ func (s *RemoteFractionTestSuite) SetupTest() { LIDs: cache.NewCache[*lids.Block](nil, nil), Tokens: cache.NewCache[*token.Block](nil, nil), TokenTable: cache.NewCache[token.Table](nil, nil), - Registry: cache.NewCache[[]byte](nil, nil), + InfoRegistry: cache.NewCache[[]byte](nil, nil), + TokenRegistry: cache.NewCache[[]byte](nil, nil), + OffsetsRegistry: cache.NewCache[[]byte](nil, nil), + IDRegistry: cache.NewCache[[]byte](nil, nil), + LIDRegistry: cache.NewCache[[]byte](nil, nil), } remoteFrac := NewRemote( diff --git a/frac/index_cache.go b/frac/index_cache.go index 4536fa22..852fe51f 100644 --- a/frac/index_cache.go +++ b/frac/index_cache.go @@ -8,7 +8,14 @@ import ( ) type IndexCache struct { - Registry *cache.Cache[[]byte] + // Per-file registry caches (each IndexReader needs its own). + InfoRegistry *cache.Cache[[]byte] + TokenRegistry *cache.Cache[[]byte] + OffsetsRegistry *cache.Cache[[]byte] + IDRegistry *cache.Cache[[]byte] + LIDRegistry *cache.Cache[[]byte] + + // Block-level data caches shared across all readers. MIDs *cache.Cache[[]byte] RIDs *cache.Cache[seqids.BlockRIDs] Params *cache.Cache[seqids.BlockParams] @@ -18,11 +25,15 @@ type IndexCache struct { } func (s *IndexCache) Release() { + s.InfoRegistry.Release() + s.TokenRegistry.Release() + s.OffsetsRegistry.Release() + s.IDRegistry.Release() + s.LIDRegistry.Release() s.LIDs.Release() s.MIDs.Release() s.RIDs.Release() s.Params.Release() - s.Registry.Release() s.Tokens.Release() s.TokenTable.Release() } diff --git a/frac/remote.go b/frac/remote.go index 7658e80e..7da03205 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -45,9 +45,20 @@ type Remote struct { docsCache *cache.Cache[[]byte] docsReader storage.DocsReader - indexFile storage.ImmutableFile - indexCache *IndexCache - indexReader storage.IndexReader + // Per-section index files and their readers. + infoFile storage.ImmutableFile + tokenFile storage.ImmutableFile + offsetsFile storage.ImmutableFile + idFile storage.ImmutableFile + lidFile storage.ImmutableFile + + infoReader storage.IndexReader + tokenReader storage.IndexReader + offsetsReader storage.IndexReader + idReader storage.IndexReader + lidReader storage.IndexReader + + indexCache *IndexCache loadMu *sync.RWMutex isLoaded bool @@ -100,15 +111,15 @@ func NewRemote( // I wrote a small proposal on how we can reduce impact of such events. // https://github.com/ozontech/seq-db/issues/92 - if err := f.openIndex(); err != nil { + if err := f.openInfoFile(); err != nil { logger.Error( - "cannot open index file: any subsequent operation will fail", + "cannot open info file: any subsequent operation will fail", zap.String("fraction", filepath.Base(f.BaseFileName)), zap.Error(err), ) } - f.info = loadHeader(f.indexFile, f.indexReader) + f.info = loadHeader(f.infoReader) return f } @@ -156,19 +167,21 @@ func (f *Remote) createDataProvider(ctx context.Context) (*sealedDataProvider, e return nil, err } return &sealedDataProvider{ - ctx: ctx, + ctx: ctx, + fractionTypeLabel: "remote", + info: f.info, config: f.Config, docsReader: &f.docsReader, blocksOffsets: f.blocksData.BlocksOffsets, lidsTable: f.blocksData.LIDsTable, - lidsLoader: lids.NewLoader(&f.indexReader, f.indexCache.LIDs), - tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.indexReader, f.indexCache.Tokens), - tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.indexReader, f.indexCache.TokenTable), + lidsLoader: lids.NewLoader(&f.lidReader, f.indexCache.LIDs), + tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.tokenReader, f.indexCache.Tokens), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.tokenReader, f.indexCache.TokenTable), idsTable: &f.blocksData.IDsTable, idsProvider: seqids.NewProvider( - &f.indexReader, + &f.idReader, f.indexCache.MIDs, f.indexCache.RIDs, f.indexCache.Params, @@ -196,7 +209,11 @@ func (f *Remote) Suicide() { files := []string{ filepath.Base(f.BaseFileName) + consts.DocsFileSuffix, filepath.Base(f.BaseFileName) + consts.SdocsFileSuffix, - filepath.Base(f.BaseFileName) + consts.IndexFileSuffix, + filepath.Base(f.BaseFileName) + consts.InfoFileSuffix, + filepath.Base(f.BaseFileName) + consts.TokenFileSuffix, + filepath.Base(f.BaseFileName) + consts.OffsetsFileSuffix, + filepath.Base(f.BaseFileName) + consts.IDFileSuffix, + filepath.Base(f.BaseFileName) + consts.LIDFileSuffix, } err := f.s3cli.Remove(f.ctx, files...) @@ -227,38 +244,88 @@ func (f *Remote) load() error { return err } - if err := f.openIndex(); err != nil { + if err := f.openIndexFiles(); err != nil { return err } - (&Loader{}).Load(&f.blocksData, f.info, &f.indexReader) + readers := IndexReaders{ + Info: f.infoReader, + Token: f.tokenReader, + Offsets: f.offsetsReader, + ID: f.idReader, + LID: f.lidReader, + } + (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true return nil } -func (f *Remote) openIndex() error { - if f.indexFile != nil { +func (f *Remote) openInfoFile() error { + if f.infoFile != nil { return nil } + return f.openRemoteFile( + consts.InfoFileSuffix, + func(file storage.ImmutableFile) { + f.infoFile = file + f.infoReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) + }, + ) +} + +func (f *Remote) openIndexFiles() error { + if err := f.openInfoFile(); err != nil { + return err + } + if f.tokenFile == nil { + if err := f.openRemoteFile(consts.TokenFileSuffix, func(file storage.ImmutableFile) { + f.tokenFile = file + f.tokenReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.TokenRegistry) + }); err != nil { + return err + } + } + if f.offsetsFile == nil { + if err := f.openRemoteFile(consts.OffsetsFileSuffix, func(file storage.ImmutableFile) { + f.offsetsFile = file + f.offsetsReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.OffsetsRegistry) + }); err != nil { + return err + } + } + if f.idFile == nil { + if err := f.openRemoteFile(consts.IDFileSuffix, func(file storage.ImmutableFile) { + f.idFile = file + f.idReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.IDRegistry) + }); err != nil { + return err + } + } + if f.lidFile == nil { + if err := f.openRemoteFile(consts.LIDFileSuffix, func(file storage.ImmutableFile) { + f.lidFile = file + f.lidReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.LIDRegistry) + }); err != nil { + return err + } + } + return nil +} - name := filepath.Base(f.BaseFileName) + consts.IndexFileSuffix +func (f *Remote) openRemoteFile(suffix string, assign func(storage.ImmutableFile)) error { + name := filepath.Base(f.BaseFileName) + suffix ok, err := f.s3cli.Exists(f.ctx, name) if err != nil { - return fmt.Errorf( - "cannot check existence of %q file: %w", - consts.IndexFileSuffix, err, - ) + return fmt.Errorf("cannot check existence of %q file: %w", suffix, err) } - - if ok { - f.indexFile = s3.NewReader(f.ctx, f.s3cli, name) - f.indexReader = storage.NewIndexReader(f.readLimiter, f.indexFile.Name(), f.indexFile, f.indexCache.Registry) - return nil + if !ok { + return fmt.Errorf("missing %q file", suffix) } - return fmt.Errorf("missing %q file", consts.IndexFileSuffix) + assign(s3.NewReader(f.ctx, f.s3cli, name)) + return nil } func (f *Remote) openDocs() error { diff --git a/frac/sealed.go b/frac/sealed.go index bda4fc72..7c419120 100644 --- a/frac/sealed.go +++ b/frac/sealed.go @@ -39,9 +39,20 @@ type Sealed struct { docsCache *cache.Cache[[]byte] docsReader storage.DocsReader - indexFile *os.File - indexCache *IndexCache - indexReader storage.IndexReader + // Per-section index files and their readers. + infoFile *os.File + tokenFile *os.File + offsetsFile *os.File + idFile *os.File + lidFile *os.File + + infoReader storage.IndexReader + tokenReader storage.IndexReader + offsetsReader storage.IndexReader + idReader storage.IndexReader + lidReader storage.IndexReader + + indexCache *IndexCache loadMu *sync.RWMutex isLoaded bool @@ -88,38 +99,83 @@ func NewSealed( skipMaskProvider: skipMaskProvider, } - // fast path if fraction-info cache exists AND it has valid index size + // Fast path: if info cache has valid index size, skip opening the info file now. if info != nil && info.IndexOnDisk > 0 { return f } - f.openIndex() - f.info = loadHeader(f.indexFile, f.indexReader) + f.openInfoFile() + f.info = loadHeader(f.infoReader) + f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName) return f } -func (f *Sealed) openIndex() { - if f.indexFile == nil { - var err error - name := f.BaseFileName + consts.IndexFileSuffix - f.indexFile, err = os.Open(name) +func (f *Sealed) openInfoFile() { + if f.infoFile == nil { + name := f.BaseFileName + consts.InfoFileSuffix + file, err := os.Open(name) if err != nil { - logger.Fatal("can't open index file", zap.String("file", name), zap.Error(err)) + logger.Fatal("can't open info file", zap.String("file", name), zap.Error(err)) } - f.indexReader = storage.NewIndexReader(f.readLimiter, f.indexFile.Name(), f.indexFile, f.indexCache.Registry) + f.infoFile = file + f.infoReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) + } +} + +func (f *Sealed) openIndexFiles() { + f.openInfoFile() + + if f.tokenFile == nil { + name := f.BaseFileName + consts.TokenFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal("can't open token file", zap.String("file", name), zap.Error(err)) + } + f.tokenFile = file + f.tokenReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.TokenRegistry) + } + + if f.offsetsFile == nil { + name := f.BaseFileName + consts.OffsetsFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal("can't open offsets file", zap.String("file", name), zap.Error(err)) + } + f.offsetsFile = file + f.offsetsReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.OffsetsRegistry) + } + + if f.idFile == nil { + name := f.BaseFileName + consts.IDFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal("can't open id file", zap.String("file", name), zap.Error(err)) + } + f.idFile = file + f.idReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.IDRegistry) + } + + if f.lidFile == nil { + name := f.BaseFileName + consts.LIDFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal("can't open lid file", zap.String("file", name), zap.Error(err)) + } + f.lidFile = file + f.lidReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.LIDRegistry) } } func (f *Sealed) openDocs() { if f.docsFile == nil { var err error - f.docsFile, err = os.Open(f.BaseFileName + consts.SdocsFileSuffix) // try first open *.sdocs file + f.docsFile, err = os.Open(f.BaseFileName + consts.SdocsFileSuffix) if err != nil { if !errors.Is(err, os.ErrNotExist) { logger.Fatal("can't open sdocs file", zap.String("frac", f.BaseFileName), zap.Error(err)) } - f.docsFile, err = os.Open(f.BaseFileName + consts.DocsFileSuffix) // fallback to *.docs file + f.docsFile, err = os.Open(f.BaseFileName + consts.DocsFileSuffix) if err != nil { logger.Fatal("can't open docs file", zap.String("frac", f.BaseFileName), zap.Error(err)) } @@ -154,13 +210,13 @@ func NewSealedPreloaded( skipMaskProvider: skipMaskProvider, } - // put the token table built during sealing into the cache of the sealed fraction + // Put token table built during sealing into the cache. indexCache.TokenTable.Get(token.CacheKeyTable, func() (token.Table, int) { return preloaded.TokenTable, preloaded.TokenTable.Size() }) f.openDocs() - f.openIndex() + f.openIndexFiles() docsCountK := float64(f.info.DocsTotal) / 1000 logger.Info("sealed fraction created from active", @@ -181,33 +237,41 @@ func (f *Sealed) load() { defer f.loadMu.Unlock() if !f.isLoaded { - f.openDocs() - f.openIndex() - - (&Loader{}).Load(&f.blocksData, f.info, &f.indexReader) + f.openIndexFiles() + + readers := IndexReaders{ + Info: f.infoReader, + Token: f.tokenReader, + Offsets: f.offsetsReader, + ID: f.idReader, + LID: f.lidReader, + } + (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true } } -// Offload saves `.docs` (or `.sdocs`) and `.index` files into remote storage. -// It does not free any of the occupied memory (nor on disk nor in memory). +// Offload saves all index files and docs to remote storage. func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) { f.loadMu.Lock() f.openDocs() - f.openIndex() + f.openIndexFiles() f.loadMu.Unlock() g, gctx := errgroup.WithContext(ctx) g.Go(func() error { return u.Upload(gctx, f.docsFile) }) - g.Go(func() error { return u.Upload(gctx, f.indexFile) }) + g.Go(func() error { return u.Upload(gctx, f.infoFile) }) + g.Go(func() error { return u.Upload(gctx, f.tokenFile) }) + g.Go(func() error { return u.Upload(gctx, f.offsetsFile) }) + g.Go(func() error { return u.Upload(gctx, f.idFile) }) + g.Go(func() error { return u.Upload(gctx, f.lidFile) }) if err := g.Wait(); err != nil { return true, err } remoteFracName := f.BaseFileName + consts.RemoteFractionSuffix - file, err := os.Create(remoteFracName) if err != nil { return true, err @@ -219,15 +283,11 @@ func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) } func (f *Sealed) Release() { - if f.docsFile != nil { - if err := f.docsFile.Close(); err != nil { - logger.Error("can't close docs file", zap.String("frac", f.BaseFileName), zap.Error(err)) - } - } - - if f.indexFile != nil { - if err := f.indexFile.Close(); err != nil { - logger.Error("can't close index file", zap.String("frac", f.BaseFileName), zap.Error(err)) + for _, file := range []*os.File{f.docsFile, f.infoFile, f.tokenFile, f.offsetsFile, f.idFile, f.lidFile} { + if file != nil { + if err := file.Close(); err != nil { + logger.Error("can't close file", zap.String("file", file.Name()), zap.Error(err)) + } } } @@ -238,67 +298,46 @@ func (f *Sealed) Release() { func (f *Sealed) Suicide() { f.Release() - // make some atomic magic, to be more stable on removing fractions + // Rename docs atomically first — this commits the intent to delete. oldPath := f.BaseFileName + consts.DocsFileSuffix newPath := f.BaseFileName + consts.DocsDelFileSuffix if err := os.Rename(oldPath, newPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't rename docs file", - zap.String("old_path", oldPath), - zap.String("new_path", newPath), - zap.Error(err), - ) + logger.Error("can't rename docs file", zap.String("old", oldPath), zap.String("new", newPath), zap.Error(err)) } oldPath = f.BaseFileName + consts.SdocsFileSuffix newPath = f.BaseFileName + consts.SdocsDelFileSuffix if err := os.Rename(oldPath, newPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't rename sdocs file", - zap.String("old_path", oldPath), - zap.String("new_path", newPath), - zap.Error(err), - ) + logger.Error("can't rename sdocs file", zap.String("old", oldPath), zap.String("new", newPath), zap.Error(err)) } if f.PartialSuicideMode == HalfRename { return } - oldPath = f.BaseFileName + consts.IndexFileSuffix - newPath = f.BaseFileName + consts.IndexDelFileSuffix - if err := os.Rename(oldPath, newPath); err != nil { - logger.Error("can't rename index file", - zap.String("old_path", oldPath), - zap.String("new_path", newPath), - zap.Error(err), - ) - } - - rmPath := f.BaseFileName + consts.DocsDelFileSuffix - if err := os.Remove(rmPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't remove docs file", - zap.String("file", rmPath), - zap.Error(err), - ) + // Delete all index files directly (they are regenerable; no atomic rename needed). + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + if err := os.Remove(f.BaseFileName + suffix); err != nil && !errors.Is(err, os.ErrNotExist) { + logger.Error("can't remove index file", zap.String("file", f.BaseFileName+suffix), zap.Error(err)) + } } - rmPath = f.BaseFileName + consts.SdocsDelFileSuffix - if err := os.Remove(rmPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't remove sdocs file", - zap.String("file", rmPath), - zap.Error(err), - ) + if err := os.Remove(f.BaseFileName + consts.DocsDelFileSuffix); err != nil && !errors.Is(err, os.ErrNotExist) { + logger.Error("can't remove docs del file", zap.String("frac", f.BaseFileName), zap.Error(err)) } if f.PartialSuicideMode == HalfRemove { return } - rmPath = f.BaseFileName + consts.IndexDelFileSuffix - if err := os.Remove(rmPath); err != nil { - logger.Error("can't remove index file", - zap.String("file", rmPath), - zap.Error(err), - ) + if err := os.Remove(f.BaseFileName + consts.SdocsDelFileSuffix); err != nil && !errors.Is(err, os.ErrNotExist) { + logger.Error("can't remove sdocs del file", zap.String("frac", f.BaseFileName), zap.Error(err)) } f.skipMaskProvider.RemoveFrac(f.info.Name()) @@ -311,14 +350,12 @@ func (f *Sealed) String() string { func (f *Sealed) Fetch(ctx context.Context, ids []seq.ID) ([][]byte, error) { dp := f.createDataProvider(ctx) defer dp.release() - return dp.Fetch(ids) } func (f *Sealed) Search(ctx context.Context, params processor.SearchParams) (*seq.QPR, error) { dp := f.createDataProvider(ctx) defer dp.release() - return dp.Search(params) } @@ -340,13 +377,13 @@ func (f *Sealed) createDataProvider(ctx context.Context) *sealedDataProvider { docsReader: &f.docsReader, blocksOffsets: f.blocksData.BlocksOffsets, lidsTable: f.blocksData.LIDsTable, - lidsLoader: lids.NewLoader(&f.indexReader, f.indexCache.LIDs), - tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.indexReader, f.indexCache.Tokens), - tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.indexReader, f.indexCache.TokenTable), + lidsLoader: lids.NewLoader(&f.lidReader, f.indexCache.LIDs), + tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.tokenReader, f.indexCache.Tokens), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.tokenReader, f.indexCache.TokenTable), idsTable: &f.blocksData.IDsTable, idsProvider: seqids.NewProvider( - &f.indexReader, + &f.idReader, f.indexCache.MIDs, f.indexCache.RIDs, f.indexCache.Params, @@ -370,39 +407,34 @@ func (f *Sealed) IsIntersecting(from, to seq.MID) bool { return f.info.IsIntersecting(from, to) } -func loadHeader( - indexFile storage.ImmutableFile, - indexReader storage.IndexReader, -) *common.Info { - block, _, err := indexReader.ReadIndexBlock(0, nil) +func loadHeader(infoReader storage.IndexReader) *common.Info { + block, _, err := infoReader.ReadIndexBlock(0, nil) if err != nil { - logger.Fatal( - "error reading info block from index", - zap.String("file", indexFile.Name()), - zap.Error(err), - ) + logger.Fatal("error reading info block", zap.Error(err)) } var bi sealed.BlockInfo if err := bi.Unpack(block); err != nil { - logger.Fatal( - "error unpacking info block", - zap.String("file", indexFile.Name()), - zap.Error(err), - ) + logger.Fatal("error unpacking info block", zap.Error(err)) } - info := bi.Info + return bi.Info +} - // set index size - stat, err := indexFile.Stat() - if err != nil { - logger.Fatal( - "can't stat index file", - zap.String("file", indexFile.Name()), - zap.Error(err), - ) +// computeIndexOnDisk returns the total on-disk size of all 5 index files for a local fraction. +func computeIndexOnDisk(basePath string) uint64 { + var total int64 + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + st, err := os.Stat(basePath + suffix) + if err != nil { + logger.Fatal("can't stat index file", zap.String("file", basePath+suffix), zap.Error(err)) + } + total += st.Size() } - - info.IndexOnDisk = uint64(stat.Size()) - return info + return uint64(total) } diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index 14a5cac7..9090db18 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -4,6 +4,7 @@ import ( "encoding/binary" "errors" "iter" + "unsafe" "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/seqids" @@ -54,9 +55,9 @@ func (bb *blocksBuilder) LastError() error { return bb.lastErr } -// BuildTokenBlocks converts token batches into token blocks with field tables. The function creates an iterator -// that returns token blocks and corresponding field tables describing which fields are covered by which tokens -// in the block. +// BuildTokenBlocks converts scalar (token, lids) pairs into token blocks with field tables. +// onLIDs is called for each token's LIDs immediately during iteration — the caller must not +// retain the slice after onLIDs returns. Errors from onLIDs are stored in bb.lastErr. // // Visualization of relationships between fields, tokens, and table entries: // @@ -64,72 +65,85 @@ func (bb *blocksBuilder) LastError() error { // Token Blocks: [.t1.t2.t3.t4.][.t5.t6.t7.t8.][.t9....etc...][.............][.............][.............] // Field Entries: {-----f1------}{-f1-}{---f2--}{--f2--}{-f3--}{------f3-----}{-f3-}{----f4-}{-----f4------} // -// So we split field ranges into field entries - sub-ranges of fields aligned to block boundaries. -// Each field table (token.FieldTable) links a field to a blocks and token ranges inside the blocks. -// // Parameters: -// - tokenBatches: Iterator of token batches, where each batch becomes a separate block +// - tokens: Scalar sequence of (token bytes, per-token LID list) pairs // - fields: Iterator of [fieldName, maxTID] pairs for all fields in ascending TID order -// -// Returns: Iterator of [token block, field table for block] pairs, where field table contains -// information about which fields and their ranges are represented in this block. +// - blockSize: Maximum payload size in bytes per token block +// - onLIDs: Called for each token's LIDs before the source advances to the next token func (bb *blocksBuilder) BuildTokenBlocks( - tokenBatches iter.Seq[[][]byte], + tokens iter.Seq2[[]byte, []uint32], fields iter.Seq2[string, uint32], + accumulate func([]uint32) error, + blockSize int, ) iter.Seq2[tokensSealBlock, []token.FieldTable] { return func(yield func(tokensSealBlock, []token.FieldTable) bool) { - // Create pull iterator for fields - convert Seq2 to a function that can be called on demand - getNextField, stop := iter.Pull2(fields) + nextField, stop := iter.Pull2(fields) defer stop() var ( hasMore bool - currentTID uint32 = 1 // Current TID to process - fieldMaxTID uint32 = 0 // Maximum TID of current field (0 = field not yet selected) - fieldName string // Current field name + currentTID uint32 = 1 + fieldMaxTID uint32 = 0 + fieldName string ) - // Iterate through all token blocks created from batches - for idx, block := range createTokensSealBlocks(tokenBatches) { - table := []token.FieldTable{} - // Process all TIDs in current block (from currentTID to block.ext.maxTID) + // Just wrap `accumulate` function to be able + // to track returned errors. + accumulate := func(lids []uint32) error { + if err := accumulate(lids); err != nil { + bb.lastErr = err + return err + } + return nil + } + + for blockIdx, block := range seqBlockToken(tokens, blockSize, accumulate) { + if bb.lastErr != nil { + return + } + + // A block may span multiple fields, and a field may span multiple blocks. + // We emit one TableEntry per (field, block) intersection so that lookups + // can find the exact position of any token given its field and TID. + var table []token.FieldTable for currentTID <= block.ext.maxTID { - // If current field doesn't cover currentTID, get next field - // This happens when: 1) field not yet selected, 2) current field has ended if fieldMaxTID < currentTID { - if fieldName, fieldMaxTID, hasMore = getNextField(); !hasMore { + if fieldName, fieldMaxTID, hasMore = nextField(); !hasMore { bb.lastErr = errors.New("not enough fields to cover all TIDs") return } } - // Entry covers TIDs from currentTID to min(fieldMaxTID, block.ext.maxTID) - entry := createTokenTableEntry(currentTID, fieldMaxTID, idx, block) - table = append(table, token.FieldTable{Field: fieldName, Entries: []*token.TableEntry{entry}}) + + entry := newTokenTableEntry(currentTID, fieldMaxTID, blockIdx, block) currentTID += entry.ValCount + + table = append(table, token.FieldTable{ + Field: fieldName, + Entries: []*token.TableEntry{entry}}, + ) } if !yield(block, table) { - return // Consumer requested stop + return } } - // Verify consistency + if bb.lastErr != nil { + return + } + if currentTID-1 != fieldMaxTID { bb.lastErr = errors.New("fields and tokens not consistent") - } else if _, _, hasMore = getNextField(); hasMore { + } else if _, _, hasMore = nextField(); hasMore { bb.lastErr = errors.New("excess field after processing all blocks") } } } -// createTokenTableEntry creates a token table entry for a field-block span. -// Calculates the range of tokens belonging to a field within a specific block. -// Parameters: -// - entryStartTID: Starting token ID for this entry -// - fieldMaxTID: Maximum token ID for the field -// - blockIndex: Index of the current token block -// - block: Current token block data -func createTokenTableEntry(entryStartTID, fieldMaxTID, blockIndex uint32, block tokensSealBlock) *token.TableEntry { +func newTokenTableEntry( + entryStartTID, fieldMaxTID, + blockIndex uint32, block tokensSealBlock, +) *token.TableEntry { // Convert global TIDs to block-local indices firstIndex := entryStartTID - block.ext.minTID lastIndex := min(fieldMaxTID, block.ext.maxTID) - block.ext.minTID @@ -148,159 +162,197 @@ func createTokenTableEntry(entryStartTID, fieldMaxTID, blockIndex uint32, block } } -// BuildLIDsBlocks constructs LID blocks from Token LID sequences. -// Processes LIDs grouped by TID and creates optimally sized blocks: -// - Splits large LID sequences across multiple blocks -// - Tracks continuation status between blocks +// seqBlockID accumulates scalar (ID, position) pairs into sealed ID blocks. +// A new block is yielded every `blockSize` IDs. +func seqBlockID( + ids iter.Seq2[seq.ID, seq.DocPos], + blockSize int, +) iter.Seq[idsSealBlock] { + return func(yield func(idsSealBlock) bool) { + var block idsSealBlock + + for id, pos := range ids { + block.mids.Values = append(block.mids.Values, uint64(id.MID)) + block.rids.Values = append(block.rids.Values, uint64(id.RID)) + block.params.Values = append(block.params.Values, uint64(pos)) + + if len(block.mids.Values) == blockSize { + if !yield(block) { + return + } + + block.mids.Values = block.mids.Values[:0] + block.rids.Values = block.rids.Values[:0] + block.params.Values = block.params.Values[:0] + } + } + + if len(block.mids.Values) > 0 { + yield(block) + } + } +} + +// seqBlockToken accumulates scalar (token, lids) pairs into sealed token blocks. +// A new block is started whenever the accumulated payload would exceed blockSize bytes. +// onLIDs is called for each token's LIDs immediately during iteration — the caller must not +// retain the slice after onLIDs returns. If onLIDs returns a non-nil error, iteration stops. // // Parameters: -// - tokenLIDs: Sequence of LID arrays, one per TokenID, in TID order -// - blockCapacity: Maximum number of LIDs per block +// - tokens: Scalar sequence of (token bytes, per-token LID list) pairs +// - blockSize: Maximum payload size in bytes before starting a new block +// - onLIDs: Called for each token's LIDs before the source advances to the next token // // Returns: -// - iter.Seq[lidsSealBlock]: Sequence of sealed LID blocks -func (bb *blocksBuilder) BuildLIDsBlocks(tokenLIDs iter.Seq[[]uint32], blockCapacity int) iter.Seq[lidsSealBlock] { - return func(yield func(lidsSealBlock) bool) { - if blockCapacity <= 0 { - bb.lastErr = errors.New("sealing: LID block size must be > 0") - return - } +// - iter.Seq2[uint32, tokensSealBlock]: Sequence of (block index, sealed token block) pairs +func seqBlockToken( + tokens iter.Seq2[[]byte, []uint32], + blockSize int, accumulate func([]uint32) error, +) iter.Seq2[uint32, tokensSealBlock] { + return func(yield func(uint32, tokensSealBlock) bool) { var ( - currentTID uint32 // Current TID being processed - currentBlock lidsSealBlock // Current block under construction - isEndOfToken bool // Flag for end of current token's LIDs - isContinued bool // Flag for block continuation + idx uint32 // 0-based block index + currentTID uint32 // monotonically increasing TID + block tokensSealBlock // block under construction + actualSize int // accumulated payload bytes ) - // Initialize first block - currentBlock.ext.minTID = 1 - currentBlock.payload = lids.Block{ - LIDs: make([]uint32, 0, blockCapacity), // Pre-allocate with capacity - Offsets: []uint32{0}, // Start with initial offset - } + block.ext.minTID = 1 + flush := func() bool { + block.ext.maxTID = currentTID - // finalizeBlock prepares and yields the current block - finalizeBlock := func() bool { - if !isEndOfToken { - // Add final offset for current token if not already done - currentBlock.payload.Offsets = append(currentBlock.payload.Offsets, uint32(len(currentBlock.payload.LIDs))) + if !yield(idx, block) { + return false } - currentBlock.payload.IsLastLID = isEndOfToken // TODO(eguguchkin): Remove legacy field - currentBlock.ext.isContinued = isContinued // TODO(eguguchkin): Remove legacy field - isContinued = !isEndOfToken - return yield(currentBlock) + + idx++ + + // We yielded complete token block several lines earlier. + // And now we prepare token block for the next batch. + block.payload.Payload = block.payload.Payload[:0] + block.payload.Offsets = block.payload.Offsets[:0] + + // Here we increment currentTID by one because + // it points to TID at the end of the *currently* yielded block. + block.ext.minTID = currentTID + 1 + + actualSize = 0 + return true } - // Process LIDs for each TID - for lidsBatch := range tokenLIDs { - currentTID++ + for token, lids := range tokens { + // We encode token as [size](4B)[token](?B). + tokenSize := int(unsafe.Sizeof(uint32(0))) + len(token) - for _, lid := range lidsBatch { - // Check if block reached capacity - if len(currentBlock.payload.LIDs) == blockCapacity { - if !finalizeBlock() { - return - } - // Initialize new block - currentBlock.ext.minTID = currentTID - currentBlock.payload.LIDs = currentBlock.payload.LIDs[:0] - currentBlock.payload.Offsets = currentBlock.payload.Offsets[:1] // Reset to initial offset + needsFlushing := actualSize > 0 && + actualSize+tokenSize > blockSize + + if needsFlushing { + if !flush() { + return } + } + + block.payload.Offsets = append( + block.payload.Offsets, + uint32(len(block.payload.Payload)), + ) - isEndOfToken = false - currentBlock.ext.maxTID = currentTID - currentBlock.payload.LIDs = append(currentBlock.payload.LIDs, lid) // Add each LID to the block + block.payload.Payload = binary.LittleEndian.AppendUint32( + block.payload.Payload, + uint32(len(token)), + ) + + block.payload.Payload = append( + block.payload.Payload, + token..., + ) + + if err := accumulate(lids); err != nil { + return } - // Store offset and mark end of current token - currentBlock.payload.Offsets = append(currentBlock.payload.Offsets, uint32(len(currentBlock.payload.LIDs))) - isEndOfToken = true + currentTID += 1 + actualSize += tokenSize } - // Yield the final block - finalizeBlock() + if actualSize > 0 { + flush() + } } } -// createIDsSealBlocks converts sequences of IDs and positions into sealed ID blocks. -// Transforms raw ID sequences into optimized block format for storage: -// - Processes IDs in batches for efficiency -// - Maintains correlation between IDs and their positions -// - Creates separate slices for MIDs, RIDs, and positions -// -// Parameters: -// - idsBatches: Sequence of ID batches with corresponding document positions -// -// Returns: -// - iter.Seq[idsSealBlock]: Sequence of sealed ID blocks -func createIDsSealBlocks(idsBatches iter.Seq2[[]seq.ID, []seq.DocPos]) iter.Seq[idsSealBlock] { - return func(yield func(idsSealBlock) bool) { - block := idsSealBlock{} - - // Process each batch of IDs and positions - for ids, positions := range idsBatches { - // Reset block arrays for new batch - block.mids.Values = block.mids.Values[:0] - block.rids.Values = block.rids.Values[:0] - block.params.Values = block.params.Values[:0] - - // Convert each ID and position to storage format - for i, id := range ids { - block.mids.Values = append(block.mids.Values, uint64(id.MID)) - block.rids.Values = append(block.rids.Values, uint64(id.RID)) - block.params.Values = append(block.params.Values, uint64(positions[i])) - } +// lidBlocksAcc incrementally builds LID blocks from per-token LID lists. +// Call Add for each token's LIDs in TID order, passing a callback that is invoked +// for each completed block before its backing arrays are reused. +// Call Flush once after all Add calls to handle the final (possibly partial) block. +type lidBlocksAcc struct { + blockCap int + currentTID uint32 + currentBlock lidsSealBlock + isEndOfToken bool + isContinued bool +} - // Yield completed block - if !yield(block) { - return - } - } +func newLIDBlocksAccumulator(blockCap int) *lidBlocksAcc { + a := &lidBlocksAcc{blockCap: blockCap} + a.currentBlock.ext.minTID = 1 + a.currentBlock.payload = lids.Block{ + LIDs: make([]uint32, 0, blockCap), + Offsets: []uint32{0}, } + return a } -// createTokensSealBlocks converts raw token sequences into sealed token blocks. -// Transforms batches of tokens into optimized storage format: -// - Merges a set of byte slices into a contiguous slice Payload and a slice of Offsets -// - Tracks token ID ranges for indexing [MinTID, MaxTID] +// Add processes LIDs of one token (must be called in TID order). // -// Parameters: -// - tokenBatches: Sequence of token batches to process -// -// Returns: -// - iter.Seq[uint32, tokensSealBlock]: Sequence of sealed token blocks with their indexes -func createTokensSealBlocks(tokenBatches iter.Seq[[][]byte]) iter.Seq2[uint32, tokensSealBlock] { - return func(yield func(uint32, tokensSealBlock) bool) { - var ( - idx uint32 // 1-based block index - currentTID uint32 // Current token ID counter - block tokensSealBlock // Current block under construction - ) +// For each block that fills up, `onBlock` is called immediately +// before the backing arrays are reset, so `onBlock` may read the +// block data but must not retain references to it. +func (a *lidBlocksAcc) Add(lids []uint32, onBlock func(lidsSealBlock) error) error { + a.currentTID++ + + for _, lid := range lids { + if len(a.currentBlock.payload.LIDs) == a.blockCap { + if err := onBlock(a.finalizeBlock()); err != nil { + return err + } - // Process each batch of tokens - for tokens := range tokenBatches { - idx++ - // Initialize new block - block.ext.minTID = currentTID + 1 - block.payload.Payload = block.payload.Payload[:0] - block.payload.Offsets = block.payload.Offsets[:0] + a.currentBlock.ext.minTID = a.currentTID + a.currentBlock.payload.LIDs = a.currentBlock.payload.LIDs[:0] + a.currentBlock.payload.Offsets = a.currentBlock.payload.Offsets[:1] + } - // Process each token in current batch - for _, tokenData := range tokens { - currentTID++ - // Store offset to current token - block.payload.Offsets = append(block.payload.Offsets, uint32(len(block.payload.Payload))) - // Store token length (little-endian) followed by token bytes - block.payload.Payload = binary.LittleEndian.AppendUint32(block.payload.Payload, uint32(len(tokenData))) - block.payload.Payload = append(block.payload.Payload, tokenData...) - } + a.isEndOfToken = false + a.currentBlock.ext.maxTID = a.currentTID + a.currentBlock.payload.LIDs = append(a.currentBlock.payload.LIDs, lid) + } - block.ext.maxTID = currentTID + a.isEndOfToken = true + a.currentBlock.payload.Offsets = append( + a.currentBlock.payload.Offsets, + uint32(len(a.currentBlock.payload.LIDs)), + ) - // Yield completed block - if !yield(idx, block) { - return - } - } + return nil +} + +func (a *lidBlocksAcc) Flush() lidsSealBlock { + return a.finalizeBlock() +} + +func (a *lidBlocksAcc) finalizeBlock() lidsSealBlock { + if !a.isEndOfToken { + a.currentBlock.payload.Offsets = append( + a.currentBlock.payload.Offsets, + uint32(len(a.currentBlock.payload.LIDs)), + ) } + + result := a.currentBlock + result.payload.IsLastLID = a.isEndOfToken + result.ext.isContinued = a.isContinued + a.isContinued = !a.isEndOfToken + + return result } diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index 80892ca2..d5637dc8 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -27,7 +27,7 @@ type mockSource struct { func (m *mockSource) Info() common.Info { return m.info } -func (m *mockSource) Fields() iter.Seq2[string, uint32] { +func (m *mockSource) Field() iter.Seq2[string, uint32] { return func(yield func(string, uint32) bool) { for i := range len(m.fields) { if !yield(m.fields[i], m.fieldMaxTIDs[i]) { @@ -37,56 +37,32 @@ func (m *mockSource) Fields() iter.Seq2[string, uint32] { } } -func (m *mockSource) IDsBlocks(size int) iter.Seq2[[]seq.ID, []seq.DocPos] { - return func(yield func([]seq.ID, []seq.DocPos) bool) { - ids := make([]seq.ID, 0, size) - pos := make([]seq.DocPos, 0, size) +func (m *mockSource) ID() iter.Seq2[seq.ID, seq.DocPos] { + return func(yield func(seq.ID, seq.DocPos) bool) { for i, id := range m.ids { - if len(ids) == size { - if !yield(ids, pos) { - return - } - ids = ids[:0] - pos = pos[:0] + if !yield(id, m.pos[i]) { + return } - ids = append(ids, id) - pos = append(pos, m.pos[i]) } - yield(ids, pos) } } -func (m *mockSource) TokenBlocks(size int) iter.Seq[[][]byte] { - return func(yield func([][]byte) bool) { - block := [][]byte{} - blockSize := 0 - for _, token := range m.tokens { - if blockSize >= size { - if !yield(block) { - return - } - blockSize = 0 - block = block[:0] +func (m *mockSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { + return func(yield func([]byte, []uint32) bool) { + for i, token := range m.tokens { + var lids []uint32 + if i < len(m.tokenLIDs) { + lids = m.tokenLIDs[i] } - block = append(block, token) - blockSize += len(token) + 4 - } - yield(block) - } -} - -func (m *mockSource) TokenLIDs() iter.Seq[[]uint32] { - return func(yield func([]uint32) bool) { - for _, lids := range m.tokenLIDs { - if !yield(lids) { + if !yield(token, lids) { return } } } } -func (m *mockSource) BlocksOffsets() []uint64 { return m.blocksOffsets } -func (m *mockSource) LastError() error { return m.lastError } +func (m *mockSource) BlockOffsets() []uint64 { return m.blocksOffsets } +func (m *mockSource) LastError() error { return m.lastError } func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { src := mockSource{ @@ -112,13 +88,43 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { }, fields: []string{"f1", "f2", "f3", "f4", "f5", "f6"}, fieldMaxTIDs: []uint32{2, 7, 9, 12, 13, 14}, + tokenLIDs: [][]uint32{ + {10, 20, 30, 40}, // 1 + {2}, // 2 + {3}, // 3 + {4}, // 4 + {5}, // 5 + {6}, // 6 + {7}, // 7 + {8}, // 8 + {9}, // 9 + {10}, // 10 + {11}, // 11 + {12}, // 12 + {13}, // 13 + {14}, // 14 + }, } // Block size in bytes. const blockSize = 24 - - bb := blocksBuilder{} - tokenBlocks := bb.BuildTokenBlocks(src.TokenBlocks(blockSize), src.Fields()) + const lidBlockCap = 3 + + var bb blocksBuilder + lidAccum := newLIDBlocksAccumulator(lidBlockCap) + var lidBlocks []lidsSealBlock + tokenBlocks := bb.BuildTokenBlocks( + src.TokenAndLIDs(), src.Field(), + func(lids []uint32) error { + return lidAccum.Add(lids, func(block lidsSealBlock) error { + block.payload.LIDs = slices.Clone(block.payload.LIDs) + block.payload.Offsets = slices.Clone(block.payload.Offsets) + lidBlocks = append(lidBlocks, block) + return nil + }) + }, + blockSize, + ) // In our test case, each token is 4 bytes long. Also for each token we use uint32 to encode the length. // So 3 tokens take up exactly 24 bytes. And we expect all token blocks to contain 3 tokens except the last one. @@ -128,11 +134,11 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { blockIndex := 0 allFieldsTables := []token.FieldTable{} - for block, fieldsTables := range tokenBlocks { - assert.Equal(t, expectedSizes[blockIndex], block.payload.Len()) - for i := range block.payload.Len() { + for result, fieldsTables := range tokenBlocks { + assert.Equal(t, expectedSizes[blockIndex], result.payload.Len()) + for i := range result.payload.Len() { tid++ - assert.Equal(t, src.tokens[tid-1], block.payload.GetToken(i)) + assert.Equal(t, src.tokens[tid-1], result.payload.GetToken(i)) } allFieldsTables = append(allFieldsTables, fieldsTables...) blockIndex++ @@ -149,7 +155,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 0, StartTID: 1, - BlockIndex: 1, + BlockIndex: 0, ValCount: 2, MinVal: "f1v1", MaxVal: "f1v2", @@ -161,21 +167,21 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 2, StartTID: 3, - BlockIndex: 1, + BlockIndex: 0, ValCount: 1, MinVal: "f2v1", MaxVal: "f2v1", }, { StartIndex: 0, StartTID: 4, - BlockIndex: 2, + BlockIndex: 1, ValCount: 3, MinVal: "f2v2", MaxVal: "f2v4", }, { StartIndex: 0, StartTID: 7, - BlockIndex: 3, + BlockIndex: 2, ValCount: 1, MinVal: "f2v5", MaxVal: "f2v5", @@ -187,7 +193,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 1, StartTID: 8, - BlockIndex: 3, + BlockIndex: 2, ValCount: 2, MinVal: "f3v1", MaxVal: "f3v2", @@ -199,7 +205,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 0, StartTID: 10, - BlockIndex: 4, + BlockIndex: 3, ValCount: 3, MinVal: "f4v1", MaxVal: "f4v3", @@ -211,7 +217,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 0, StartTID: 13, - BlockIndex: 5, + BlockIndex: 4, ValCount: 1, MinVal: "f5v1", MaxVal: "f5v1", @@ -223,7 +229,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 1, StartTID: 14, - BlockIndex: 5, + BlockIndex: 4, ValCount: 1, MinVal: "f6v1", MaxVal: "f6v1", @@ -233,6 +239,39 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { }, } assert.Equal(t, actualTokenTable.FieldsTables, expectedTokenTable.FieldsTables) + + finalBlock := lidAccum.Flush() + finalBlock.payload.LIDs = slices.Clone(finalBlock.payload.LIDs) + finalBlock.payload.Offsets = slices.Clone(finalBlock.payload.Offsets) + lidBlocks = append(lidBlocks, finalBlock) + + expectedLIDBlocks := []lidsSealBlock{ + { + ext: lidsExt{minTID: 1, maxTID: 1, isContinued: false}, + payload: lids.Block{LIDs: []uint32{10, 20, 30}, Offsets: []uint32{0, 3}, IsLastLID: false}, + }, + { + ext: lidsExt{minTID: 1, maxTID: 3, isContinued: true}, + payload: lids.Block{LIDs: []uint32{40, 2, 3}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + }, + { + ext: lidsExt{minTID: 4, maxTID: 6, isContinued: false}, + payload: lids.Block{LIDs: []uint32{4, 5, 6}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + }, + { + ext: lidsExt{minTID: 7, maxTID: 9, isContinued: false}, + payload: lids.Block{LIDs: []uint32{7, 8, 9}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + }, + { + ext: lidsExt{minTID: 10, maxTID: 12, isContinued: false}, + payload: lids.Block{LIDs: []uint32{10, 11, 12}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + }, + { + ext: lidsExt{minTID: 13, maxTID: 14, isContinued: false}, + payload: lids.Block{LIDs: []uint32{13, 14}, Offsets: []uint32{0, 1, 2}, IsLastLID: true}, + }, + } + assert.Equal(t, expectedLIDBlocks, lidBlocks) } func TestBlocksBuilder_IDsBlocks(t *testing.T) { @@ -268,7 +307,7 @@ func TestBlocksBuilder_IDsBlocks(t *testing.T) { i := 0 ids := []seq.ID{} pos := []seq.DocPos{} - for block := range createIDsSealBlocks(src.IDsBlocks(3)) { + for block := range seqBlockID(src.ID(), 3) { assert.Equal(t, expectedSizes[i], len(block.mids.Values)) assert.Equal(t, expectedSizes[i], len(block.rids.Values)) assert.Equal(t, expectedSizes[i], len(block.params.Values)) @@ -284,112 +323,3 @@ func TestBlocksBuilder_IDsBlocks(t *testing.T) { assert.Equal(t, src.ids, ids) assert.Equal(t, src.pos, pos) } - -func TestBlocksBuilder_BuildLIDsBlocks(t *testing.T) { - src := mockSource{ - tokenLIDs: [][]uint32{ - { - 10, // block 1, tid 1 - 20, // block 1, tid 1 - 30, // block 1, tid 1 - - 40, // block 2, tid 1 - }, { - 11, // block 2, tid 2 - 21, // block 2, tid 2 - - 31, // block 3, tid 2 - 41, // block 3, tid 2 - }, { - 10, // block 3, tid 3 - - 11, // block 4, tid 3 - 20, // block 4, tid 3 - 21, // block 4, tid 3 - - }, { - 30, // block 5, tid 4 - 40, // block 5, tid 4 - 50, // block 5, tid 4 - - 60, // block 6, tid 4 - }, - }, - } - - expected := []lidsSealBlock{{ - ext: lidsExt{ - minTID: 1, - maxTID: 1, - isContinued: false, - }, - payload: lids.Block{ - LIDs: []uint32{10, 20, 30}, - Offsets: []uint32{0, 3}, - IsLastLID: false, - }, - }, { - ext: lidsExt{ - minTID: 1, - maxTID: 2, - isContinued: true, - }, - payload: lids.Block{ - LIDs: []uint32{40, 11, 21}, - Offsets: []uint32{0, 1, 3}, - IsLastLID: false, - }, - }, { - ext: lidsExt{ - minTID: 2, - maxTID: 3, - isContinued: true, - }, - payload: lids.Block{ - LIDs: []uint32{31, 41, 10}, - Offsets: []uint32{0, 2, 3}, - IsLastLID: false, - }, - }, { - ext: lidsExt{ - minTID: 3, - maxTID: 3, - isContinued: true, - }, - payload: lids.Block{ - LIDs: []uint32{11, 20, 21}, - Offsets: []uint32{0, 3}, - IsLastLID: true, - }, - }, { - ext: lidsExt{ - minTID: 4, - maxTID: 4, - isContinued: false, - }, - payload: lids.Block{ - LIDs: []uint32{30, 40, 50}, - Offsets: []uint32{0, 3}, - IsLastLID: false, - }, - }, { - ext: lidsExt{ - minTID: 4, - maxTID: 4, - isContinued: true, - }, - payload: lids.Block{ - LIDs: []uint32{60}, - Offsets: []uint32{0, 1}, - IsLastLID: true, - }}, - } - bb := blocksBuilder{} - blocks := []lidsSealBlock{} - for block := range bb.BuildLIDsBlocks(src.TokenLIDs(), 3) { - block.payload.LIDs = slices.Clone(block.payload.LIDs) // copy lids - block.payload.Offsets = slices.Clone(block.payload.Offsets) // copy offsets - blocks = append(blocks, block) - } - assert.Equal(t, expected, blocks) -} diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 491c7233..57a3b3ad 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -5,7 +5,6 @@ import ( "encoding/binary" "io" "iter" - "time" "github.com/alecthomas/units" @@ -22,25 +21,27 @@ import ( "github.com/ozontech/seq-db/zstd" ) -// IndexSealer is responsible for creating and writing the index structure for sealed fractions. -// It organizes data into blocks, compresses them, and builds the complete index file with: -// - Multiple data sections (info, tokens, token table, offsets, IDs, LIDs) -// - Compression using ZSTD with configurable levels -// - Registry for quick access to block locations -// - PreloadedData structures for fast initialization instance of sealed fraction +// IndexSealer writes sealed fraction index data across multiple files. +// Each Write*File method writes one section to an independent file using the +// standard [prefix][blocks][registry] format so each file has its own IndexReader. +// +// Call order matters for PreloadedData: +// +// WriteTokenAndLIDFiles → WriteOffsetsFile → WriteIDFile +// +// (TokenTable is populated by WriteTokenAndLIDFiles; IDsTable by WriteOffsetsFile+WriteIDFile; +// LIDsTable by WriteTokenAndLIDFiles.) type IndexSealer struct { - lastErr error // Last error encountered during processing - buf1 []byte // Reusable buffer for packing raw data before compression - buf2 []byte // Reusable buffer for compressed data - params common.SealParams // Configuration parameters for sealing process - - // PreloadedData structures built during sealing for fast initialization of sealed fraction - idsTable seqids.Table // Table mapping document IDs to blocks - lidsTable lids.Table // Table mapping token IDs to LID blocks - tokenTable token.Table // Table mapping fields to token blocks + lastErr error + buf1 []byte + buf2 []byte + params common.SealParams + + idsTable seqids.Table + lidsTable lids.Table + tokenTable token.Table } -// NewIndexSealer creates a new IndexSealer instance with the given parameters. func NewIndexSealer(params common.SealParams) *IndexSealer { return &IndexSealer{ params: params, @@ -49,75 +50,58 @@ func NewIndexSealer(params common.SealParams) *IndexSealer { } } -// indexBlock represents a single block of data in the index file. -// Each block can be compressed and contains metadata for efficient retrieval. +// indexBlock is one compressed (or not) block with its registry metadata. type indexBlock struct { - codec storage.Codec // Compression codec used (No compression or ZSTD) - payload []byte // The actual block data (may be compressed) - rawLen uint32 // Original uncompressed data length - ext1 uint64 // Extended metadata field 1 (block-specific usage) - ext2 uint64 // Extended metadata field 2 (block-specific usage) + codec storage.Codec + payload []byte + rawLen uint32 + ext1 uint64 + ext2 uint64 } -// Bin converts the indexBlock to its binary representation for storage. -// It creates a header with metadata and returns the header + payload. -// Parameters: -// - pos: The file position where this block will be written -// -// Returns: -// - storage.IndexBlockHeader: The block header with metadata -// - []byte: The payload data to write func (i indexBlock) Bin(pos int64) (storage.IndexBlockHeader, []byte) { - header := storage.NewIndexBlockHeader(pos, i.ext1, i.ext2, uint32(len(i.payload)), i.rawLen, i.codec) - return header, i.payload + return storage.NewIndexBlockHeader(pos, i.ext1, i.ext2, uint32(len(i.payload)), i.rawLen, i.codec), i.payload } -// WriteIndex writes the complete index structure to the provided writer. -// The index file structure: -// +----------------+----------------+----------------+ -// | Prefix | Data Blocks | Registry | -// | (16 bytes) | (multiple) | (block headers)| -// +----------------+----------------+----------------+ -// -// Prefix contains: -// - 8 bytes: Position of registry start -// - 8 bytes: Size of registry -// -// Parameters: -// - ws: WriteSeeker to write the index data to -// - src: Source interface providing the data to be sealed -// -// Returns: -// - error: Any error encountered during writing -func (s *IndexSealer) WriteIndex(ws io.WriteSeeker, src Source) error { - const prefixSize = 16 // Size of prefix that will hold registry position and size +const filePrefixSize = 16 - // Skip prefix area initially - we'll write it at the end - if _, err := ws.Seek(prefixSize, io.SeekStart); err != nil { +// write writes blocks to ws using [16-byte prefix][blocks][registry]. +// The prefix is written last (via seek-back) and stores registry position + size. +func (s *IndexSealer) write(ws io.WriteSeeker, blocks iter.Seq[indexBlock]) error { + if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { return err } - // Create buffers for headers and payload writing - hw := bytes.NewBuffer(nil) // Headers writer - collects all block headers - bw := bytespool.AcquireWriterSize(ws, int(units.MiB)) // Buffered writer for payload + hw := bytes.NewBuffer(nil) + bw := bytespool.AcquireWriterSize(ws, int(units.MiB)) defer bytespool.ReleaseWriter(bw) - // Write all index blocks and collect headers - if err := s.writeBlocks(prefixSize, bw, hw, src); err != nil { - return err + pos := filePrefixSize + for block := range blocks { + if s.lastErr != nil { + return s.lastErr + } + header, payload := block.Bin(int64(pos)) + if _, err := bw.Write(payload); err != nil { + return err + } + if _, err := hw.Write(header); err != nil { + return err + } + pos += len(payload) + } + if s.lastErr != nil { + return s.lastErr } if err := bw.Flush(); err != nil { return err } - // Calculate registry position and size - size := hw.Len() // Registry size (all headers) - pos, err := ws.Seek(0, io.SeekEnd) // Current end position = registry start + size := hw.Len() + regPos, err := ws.Seek(0, io.SeekEnd) if err != nil { return err } - - // Write registry (all block headers) at the end of file if _, err := bw.Write(hw.Bytes()); err != nil { return err } @@ -125,325 +109,289 @@ func (s *IndexSealer) WriteIndex(ws io.WriteSeeker, src Source) error { return err } - // Write prefix at beginning of file with registry metadata - prefix := make([]byte, 0, prefixSize) - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(pos)) // Registry position - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(size)) // Registry size + prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(size)) if _, err := ws.Seek(0, io.SeekStart); err != nil { return err } - if _, err = ws.Write(prefix); err != nil { - return err + _, err = ws.Write(prefix) + return err +} + +// fileStreamWriter writes blocks incrementally to a single file using the +// [prefix][blocks][registry] format, allowing interleaved writes to multiple files. +type fileStreamWriter struct { + ws io.WriteSeeker + bw *bytespool.Writer + hw bytes.Buffer + pos int +} + +func newFileStreamWriter(ws io.WriteSeeker) (*fileStreamWriter, error) { + if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { + return nil, err } + return &fileStreamWriter{ + ws: ws, + bw: bytespool.AcquireWriterSize(ws, int(units.MiB)), + pos: filePrefixSize, + }, nil +} +func (fw *fileStreamWriter) writeBlock(block indexBlock) error { + header, payload := block.Bin(int64(fw.pos)) + if _, err := fw.bw.Write(payload); err != nil { + return err + } + fw.hw.Write(header) // bytes.Buffer.Write never fails + fw.pos += len(payload) return nil } -// writeBlocks processes all index blocks from the source and writes them to the output. -// It simultaneously writes payload data to one writer and headers to another. -// Parameters: -// - pos: Starting position for the first block -// - payloadWriter: Writer for block payload data -// - headersWriter: Writer for block headers (registry) -// - src: Data source -// -// Returns: -// - error: Any error encountered during processing -func (s *IndexSealer) writeBlocks(pos int, payloadWriter, headersWriter io.Writer, src Source) error { - // Process each index block from the source - for block := range s.indexBlocks(src) { - header, payload := block.Bin(int64(pos)) - // Write payload to main data section - if _, err := payloadWriter.Write(payload); err != nil { - return err - } - // Write header to registry - if _, err := headersWriter.Write(header); err != nil { - return err - } - pos += len(payload) // Advance position for next block +func (fw *fileStreamWriter) finalize() (err error) { + defer fw.release() + if err = fw.bw.Flush(); err != nil { + return } - if s.lastErr != nil { - return s.lastErr + var regPos int64 + if regPos, err = fw.ws.Seek(0, io.SeekEnd); err != nil { + return } - return nil + if _, err = fw.bw.Write(fw.hw.Bytes()); err != nil { + return + } + if err = fw.bw.Flush(); err != nil { + return + } + prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(fw.hw.Len())) + if _, err = fw.ws.Seek(0, io.SeekStart); err != nil { + return + } + _, err = fw.ws.Write(prefix) + return +} + +func (fw *fileStreamWriter) release() { + if fw.bw != nil { + bytespool.ReleaseWriter(fw.bw) + fw.bw = nil + } +} + +// WriteInfoFile writes the .info file containing a single BlockInfo block. +func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { + return s.write(ws, func(yield func(indexBlock) bool) { + yield(s.packInfoBlock(sealed.BlockInfo{Info: src.Info()})) + }) } -// indexBlocks generates a sequence of index blocks from the source data. -// The blocks are organized in specific sections: -// 1. Info Section - Basic fraction metadata -// 2. Tokens Section - Token data blocks -// 3. Token Table Section - Field-to-token mapping table -// 4. Offsets Section - Document block offsets -// 5. IDs Section - Document ID blocks (MIDs, RIDs, Positions) -// 6. LIDs Section - Token ID to LID mapping blocks +// WriteTokenAndLIDFiles writes the .token and .lid files in a single pass over the source data. // -// Returns: -// - iter.Seq[indexBlock]: Sequence of index blocks to write -func (s *IndexSealer) indexBlocks(src Source) iter.Seq[indexBlock] { - return func(yield func(indexBlock) bool) { - bb := blocksBuilder{} - blocksCounter := uint32(0) // Global block counter for indexing - statsOverall := startStats() // Overall statistics collector - - // Helper to push a block and update statistics - push := func(b indexBlock, statsSection *blocksStats) bool { - blocksCounter++ - statsOverall.takeStock(b) - statsSection.takeStock(b) - return yield(b) - } +// .token file: [token blocks...] [separator] [token-table block] [separator] +// .lid file: [LID blocks...] [separator] +// +// LID blocks are written interleaved with token block processing so that both files +// are produced from one sequential scan of the (token, LID) data. +func (s *IndexSealer) WriteTokenAndLIDFiles(tokenWS, lidWS io.WriteSeeker, src Source) error { + tokenFW, err := newFileStreamWriter(tokenWS) + if err != nil { + return err + } + defer tokenFW.release() - // Helper to write section separator (empty block) - sectionSeparator := func() bool { - blocksCounter++ - return yield(indexBlock{}) // empty block as separator - } + lidFW, err := newFileStreamWriter(lidWS) + if err != nil { + return err + } + defer lidFW.release() + + var ( + bb blocksBuilder + allFieldsTables []token.FieldTable + lidAccum = newLIDBlocksAccumulator(consts.LIDBlockCap) + ) + + accumulate := func(lids []uint32) error { + return lidAccum.Add(lids, func(block lidsSealBlock) error { + return lidFW.writeBlock(s.packLIDsBlock(block)) + }) + } - // SECTION 1: Info Section - statsInfo := startStats() - info := src.Info() - if !push(s.packInfoBlock(sealed.BlockInfo{Info: info}), &statsInfo) { - return - } + blocks := bb.BuildTokenBlocks( + src.TokenAndLIDs(), src.Field(), + accumulate, consts.RegularBlockSize, + ) - // SECTION 2: Tokens Section - statsTokens := startStats() - allFieldsTables := []token.FieldTable{} - tokensBlocks := bb.BuildTokenBlocks(src.TokenBlocks(consts.RegularBlockSize), src.Fields()) - for block, fieldsTables := range tokensBlocks { - if !push(s.packTokenBlock(block), &statsTokens) { - return - } - allFieldsTables = append(allFieldsTables, fieldsTables...) - } - if s.lastErr = util.CollapseErrors([]error{src.LastError(), bb.LastError()}); s.lastErr != nil { - return + for block, fieldsTables := range blocks { + if err := tokenFW.writeBlock(s.packTokenBlock(block)); err != nil { + return err } + allFieldsTables = append(allFieldsTables, fieldsTables...) + } - if !sectionSeparator() { - return - } + if s.lastErr = util.CollapseErrors([]error{src.LastError(), bb.LastError()}); s.lastErr != nil { + return s.lastErr + } - // SECTION 3: Token Table Section - statsTokenTable := startStats() - tokenTableBlock := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} - if !push(s.packTokenTableBlock(tokenTableBlock), &statsTokenTable) { - return - } + // Write the final (possibly partial) LID block and trailing separator. + if err := lidFW.writeBlock(s.packLIDsBlock(lidAccum.Flush())); err != nil { + return err + } - if !sectionSeparator() { - return - } + if err := lidFW.writeBlock(indexBlock{}); err != nil { // trailing separator + return err + } + + if err := lidFW.finalize(); err != nil { + return err + } - // SECTION 4: Offsets Section - statsOffsets := startStats() + // Write token section separator, token table, trailing separator. + if err := tokenFW.writeBlock(indexBlock{}); err != nil { // section separator + return err + } + tokenTableBlock := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} + if err := tokenFW.writeBlock(s.packTokenTableBlock(tokenTableBlock)); err != nil { + return err + } + if err := tokenFW.writeBlock(indexBlock{}); err != nil { // trailing separator + return err + } + return tokenFW.finalize() +} + +// WriteOffsetsFile writes the .offsets file containing a single BlockOffsets block. +func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { + return s.write(ws, func(yield func(indexBlock) bool) { offsets := sealed.BlockOffsets{ - IDsTotal: info.DocsTotal + 1, // +1 for system ID at position zero - Offsets: src.BlocksOffsets(), - } - if !push(s.packBlocksOffsetsBlock(offsets), &statsOffsets) { - return + IDsTotal: src.Info().DocsTotal + 1, + Offsets: src.BlockOffsets(), } + yield(s.packBlocksOffsetsBlock(offsets)) + }) +} - // SECTION 5: IDs Section - s.idsTable.StartBlockIndex = blocksCounter // Record starting position for IDs blocks - statsMIDs, statsRIDs, statsParams := startStats(), startStats(), startStats() - for block := range createIDsSealBlocks(src.IDsBlocks(consts.IDsPerBlock)) { - if !push(s.packMIDsBlock(block), &statsMIDs) { - return - } - if !push(s.packRIDsBlock(block), &statsRIDs) { +func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { + return s.write(ws, func(yield func(indexBlock) bool) { + for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { + if !yield(s.packMIDsBlock(block)) { return } - if !push(s.packPosBlock(block), &statsParams) { + + if !yield(s.packRIDsBlock(block)) { return } - } - if s.lastErr = src.LastError(); s.lastErr != nil { - return - } - - if !sectionSeparator() { - return - } - // SECTION 6: LIDs Section - statsLIDs := startStats() - s.lidsTable.StartBlockIndex = blocksCounter - for block := range bb.BuildLIDsBlocks(src.TokenLIDs(), consts.LIDBlockCap) { - if !push(s.packLIDsBlock(block), &statsLIDs) { + if !yield(s.packPosBlock(block)) { return } } - if s.lastErr = util.CollapseErrors([]error{src.LastError(), bb.LastError()}); s.lastErr != nil { - return - } - if !sectionSeparator() { + if s.lastErr = src.LastError(); s.lastErr != nil { return } - // Log statistics for all sections - endTime := time.Now() - statsInfo.log("info", statsTokens.start) - statsTokens.log("tokens", statsTokenTable.start) - statsTokenTable.log("tokenTable", statsOffsets.start) - statsOffsets.log("offsets", statsMIDs.start) - statsMIDs.log("mids", statsLIDs.start) - statsRIDs.log("rids", statsLIDs.start) - statsParams.log("pos", statsLIDs.start) - statsLIDs.log("lids", endTime) - statsOverall.log("overall", endTime) - } + yield(indexBlock{}) // trailing separator + }) } -// collapseOrderedFieldsTables merges field tables with identical field names -// Assumes the input array is already sorted by the Field property +// collapseOrderedFieldsTables merges FieldTables with the same field name. +// Assumes input is sorted by Field. func collapseOrderedFieldsTables(src []token.FieldTable) []token.FieldTable { if len(src) == 0 { return nil } + current := src[0] - dst := []token.FieldTable{} + var dst []token.FieldTable for _, ft := range src[1:] { if current.Field == ft.Field { current.Entries = append(current.Entries, ft.Entries...) continue } + dst = append(dst, current) current = ft } - dst = append(dst, current) - return dst + + return append(dst, current) } -// newIndexBlock creates an uncompressed index block. func newIndexBlock(raw []byte) indexBlock { - return indexBlock{ - codec: storage.CodecNo, - rawLen: uint32(len(raw)), - payload: raw, - } + return indexBlock{codec: storage.CodecNo, rawLen: uint32(len(raw)), payload: raw} } -// newIndexBlockZSTD creates a compressed index block using ZSTD compression. -// Falls back to uncompressed if compression doesn't provide benefits. func (s *IndexSealer) newIndexBlockZSTD(raw []byte, level int) indexBlock { s.buf2 = zstd.CompressLevel(raw, s.buf2[:0], level) - // Only use compression if it actually reduces size if len(s.buf2) < len(raw) { - return indexBlock{ - codec: storage.CodecZSTD, - rawLen: uint32(len(raw)), - payload: s.buf2, - } + return indexBlock{codec: storage.CodecZSTD, rawLen: uint32(len(raw)), payload: s.buf2} } return newIndexBlock(raw) } -// packInfoBlock packs fraction information into an index block. func (s *IndexSealer) packInfoBlock(block sealed.BlockInfo) indexBlock { s.buf1 = block.Pack(s.buf1[:0]) - return newIndexBlock(s.buf1) // Info block is typically small, no compression + return newIndexBlock(s.buf1) } -// packTokenBlock packs token data into a compressed index block. func (s *IndexSealer) packTokenBlock(block tokensSealBlock) indexBlock { - s.buf1 = block.payload.Pack(s.buf1[:0]) // Pack token data + s.buf1 = block.payload.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.TokenListZstdLevel) - // Store TID range in extended metadata b.ext1 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) return b } -// packTokenTableBlock packs the token table into a compressed index block. -func (s *IndexSealer) packTokenTableBlock(tokenTableBlock token.TableBlock) indexBlock { - s.tokenTable = token.TableFromBlocks([]token.TableBlock{tokenTableBlock}) // Store for PreloadedData - - // Packing block - s.buf1 = tokenTableBlock.Pack(s.buf1[:0]) +func (s *IndexSealer) packTokenTableBlock(tb token.TableBlock) indexBlock { + s.tokenTable = token.TableFromBlocks([]token.TableBlock{tb}) + s.buf1 = tb.Pack(s.buf1[:0]) return s.newIndexBlockZSTD(s.buf1, s.params.TokenTableZstdLevel) } -// packBlocksOffsetsBlock packs document block offsets into a compressed index block. func (s *IndexSealer) packBlocksOffsetsBlock(block sealed.BlockOffsets) indexBlock { - // Update IDs table for PreloadedData - s.idsTable.IDsTotal = block.IDsTotal // Total number of IDs - s.idsTable.IDBlocksTotal = uint32(len(block.Offsets)) // Number of ID blocks - - // Packing block + s.idsTable.IDsTotal = block.IDsTotal + s.idsTable.IDBlocksTotal = uint32(len(block.Offsets)) s.buf1 = block.Pack(s.buf1[:0]) - b := s.newIndexBlockZSTD(s.buf1, s.params.DocsPositionsZstdLevel) - return b + return s.newIndexBlockZSTD(s.buf1, s.params.DocsPositionsZstdLevel) } -// packMIDsBlock packs MIDs into a compressed index block. func (s *IndexSealer) packMIDsBlock(block idsSealBlock) indexBlock { - // Get the last ID in the block (smallest due to descending order) last := len(block.mids.Values) - 1 - minID := seq.ID{ - MID: seq.MID(block.mids.Values[last]), - RID: seq.RID(block.rids.Values[last]), - } - s.idsTable.MinBlockIDs = append(s.idsTable.MinBlockIDs, minID) // Store for PreloadedData - - // Packing block + minID := seq.ID{MID: seq.MID(block.mids.Values[last]), RID: seq.RID(block.rids.Values[last])} + s.idsTable.MinBlockIDs = append(s.idsTable.MinBlockIDs, minID) s.buf1 = block.mids.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) - // Store min MID and RID in extended metadata b.ext1 = uint64(minID.MID) b.ext2 = uint64(minID.RID) return b } -// packRIDsBlock packs RIDs into a compressed index block. func (s *IndexSealer) packRIDsBlock(block idsSealBlock) indexBlock { s.buf1 = block.rids.Pack(s.buf1[:0]) - b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) - return b + return s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) } -// packPosBlock packs document positions into a compressed index block. func (s *IndexSealer) packPosBlock(block idsSealBlock) indexBlock { s.buf1 = block.params.Pack(s.buf1[:0]) - b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) - return b + return s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) } -// packLIDsBlock packs Local IDs (LIDs) into a compressed index block. -// Also updates LIDs table for preloaded data access. func (s *IndexSealer) packLIDsBlock(block lidsSealBlock) indexBlock { var ext1 uint64 - if block.ext.isContinued { // todo: Legacy continuation flag + if block.ext.isContinued { ext1 = 1 - block.ext.minTID++ // Adjust for legacy format + block.ext.minTID++ } - - // Update LIDs table for PreloadedData s.lidsTable.MinTIDs = append(s.lidsTable.MinTIDs, block.ext.minTID) s.lidsTable.MaxTIDs = append(s.lidsTable.MaxTIDs, block.ext.maxTID) s.lidsTable.IsContinued = append(s.lidsTable.IsContinued, block.ext.isContinued) - - // Packing block s.buf1 = block.payload.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.LIDsZstdLevel) - b.ext1 = ext1 // Legacy continuation flag - b.ext2 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) // TID range + b.ext1 = ext1 + b.ext2 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) return b } -// LIDsTable returns the built LIDs table for fast initialization of sealed fraction. -func (s *IndexSealer) LIDsTable() lids.Table { - return s.lidsTable -} - -// TokenTable returns the built token table for fast initialization of sealed fraction. -func (s *IndexSealer) TokenTable() token.Table { - return s.tokenTable -} - -// IDsTable returns the built IDs table for fast initialization of sealed fraction. -func (s *IndexSealer) IDsTable() seqids.Table { - return s.idsTable -} +func (s *IndexSealer) LIDsTable() lids.Table { return s.lidsTable } +func (s *IndexSealer) TokenTable() token.Table { return s.tokenTable } +func (s *IndexSealer) IDsTable() seqids.Table { return s.idsTable } diff --git a/frac/sealed/sealing/sealer.go b/frac/sealed/sealing/sealer.go index 3eb00761..233f0aa2 100644 --- a/frac/sealed/sealing/sealer.go +++ b/frac/sealed/sealing/sealer.go @@ -16,85 +16,145 @@ import ( // Source interface defines the contract for data sources that can be sealed. // Provides access to all necessary data components for index creation. type Source interface { - Info() *common.Info // Fraction metadata information - IDsBlocks(size int) iter.Seq2[[]seq.ID, []seq.DocPos] // Ordered sequence of document IDs and their positions, divided into blocks - TokenBlocks(size int) iter.Seq[[][]byte] // Ordered sequence of tokens divided into blocks - Fields() iter.Seq2[string, uint32] // Ordered sequence of fields with their max field's TID value - TokenLIDs() iter.Seq[[]uint32] // Sequence of Token LIDs ordered by TID and LID - BlocksOffsets() []uint64 // Offsets of DocBlock's in the doc file - LastError() error // Last error encountered during data retrieval + Info() *common.Info // Fraction metadata information + ID() iter.Seq2[seq.ID, seq.DocPos] // Ordered sequence of document IDs and their positions + TokenAndLIDs() iter.Seq2[[]byte, []uint32] // Ordered sequence of tokens paired with their LID list + Field() iter.Seq2[string, uint32] // Ordered sequence of fields with their max TID value + BlockOffsets() []uint64 // Offsets of DocBlocks in the doc file + LastError() error // Last error encountered during data retrieval } -// Seal is the main entry point for sealing a fraction. -// It performs the complete sealing process: -// 1. Creates the index file structure -// 2. Writes all index blocks with compression -// 3. Builds PreloadedData structures for fast initialization of sealed fraction -// 4. Handles file system operations and error recovery -// -// Parameters: -// - src: Data source providing all fraction data -// - params: Sealing parameters including compression levels -// -// Returns: -// - *sealed.PreloadedData: Preloaded data structures for initialization of sealed fraction -// - error: Any error encountered during the sealing process +// createAndWrite creates a tmp file, calls write, syncs, closes, then renames to finalPath. +func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { + f, err := os.Create(tmpPath) + if err != nil { + return err + } + if err := write(f); err != nil { + f.Close() + return err + } + if err := f.Sync(); err != nil { + f.Close() + return err + } + if err := f.Close(); err != nil { + return err + } + return os.Rename(tmpPath, finalPath) +} + +// createAndWriteBoth creates two tmp files, calls write with both, syncs and closes them, +// then renames both to their final paths. +func createAndWriteBoth(tmpPath1, finalPath1, tmpPath2, finalPath2 string, write func(*os.File, *os.File) error) error { + f1, err := os.Create(tmpPath1) + if err != nil { + return err + } + f2, err := os.Create(tmpPath2) + if err != nil { + f1.Close() + return err + } + if err := write(f1, f2); err != nil { + f1.Close() + f2.Close() + return err + } + if err := f1.Sync(); err != nil { + f1.Close() + f2.Close() + return err + } + if err := f1.Close(); err != nil { + f2.Close() + return err + } + if err := f2.Sync(); err != nil { + f2.Close() + return err + } + if err := f2.Close(); err != nil { + return err + } + if err := os.Rename(tmpPath1, finalPath1); err != nil { + return err + } + return os.Rename(tmpPath2, finalPath2) +} + +// Seal writes five index files (.info, .token, .offsets, .id, .lid) for the fraction +// and returns PreloadedData for fast initialization of the sealed fraction. func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { info := src.Info() - // Validate that we're not sealing an empty fraction if info.To == 0 { return nil, errors.New("sealing of an empty active fraction is not supported") } - // Create temporary index file (will be renamed on success) - indexFile, err := os.Create(info.Path + consts.IndexTmpFileSuffix) - if err != nil { - return nil, err - } - - // Create index sealer and write the index structure - indexSealer := NewIndexSealer(params) - if err := indexSealer.WriteIndex(indexFile, src); err != nil { - return nil, err - } + sealer := NewIndexSealer(params) - // Ensure data is flushed to disk - if err := indexFile.Sync(); err != nil { + if err := createAndWrite( + info.Path+consts.InfoTmpFileSuffix, + info.Path+consts.InfoFileSuffix, + func(f *os.File) error { return sealer.WriteInfoFile(f, src) }, + ); err != nil { return nil, err } - // Get final file size for metadata - stat, err := indexFile.Stat() - if err != nil { + if err := createAndWrite( + info.Path+consts.OffsetsTmpFileSuffix, + info.Path+consts.OffsetsFileSuffix, + func(f *os.File) error { return sealer.WriteOffsetsFile(f, src) }, + ); err != nil { return nil, err } - info.IndexOnDisk = uint64(stat.Size()) - // Close file before renaming - if err := indexFile.Close(); err != nil { + if err := createAndWrite( + info.Path+consts.IDTmpFileSuffix, + info.Path+consts.IDFileSuffix, + func(f *os.File) error { return sealer.WriteIDFile(f, src) }, + ); err != nil { return nil, err } - // Atomically rename temporary file to final name - if err := os.Rename(indexFile.Name(), info.Path+consts.IndexFileSuffix); err != nil { + if err := createAndWriteBoth( + info.Path+consts.TokenTmpFileSuffix, info.Path+consts.TokenFileSuffix, + info.Path+consts.LIDTmpFileSuffix, info.Path+consts.LIDFileSuffix, + func(tokenF, lidF *os.File) error { return sealer.WriteTokenAndLIDFiles(tokenF, lidF, src) }, + ); err != nil { return nil, err } - // Ensure directory metadata is synced to disk util.MustSyncPath(filepath.Dir(info.Path)) - // Build preloaded data structure for fast query access - lidsTable := indexSealer.LIDsTable() - preloaded := sealed.PreloadedData{ + // Compute total index size as sum of all 5 files. + var totalSize uint64 + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + st, err := os.Stat(info.Path + suffix) + if err != nil { + return nil, err + } + totalSize += uint64(st.Size()) + } + info.IndexOnDisk = totalSize + + lidsTable := sealer.LIDsTable() + preloaded := &sealed.PreloadedData{ Info: info, - TokenTable: indexSealer.TokenTable(), + TokenTable: sealer.TokenTable(), BlocksData: sealed.BlocksData{ - IDsTable: indexSealer.IDsTable(), + IDsTable: sealer.IDsTable(), LIDsTable: &lidsTable, - BlocksOffsets: src.BlocksOffsets(), + BlocksOffsets: src.BlockOffsets(), }, } - return &preloaded, nil + return preloaded, nil } diff --git a/frac/sealed/token/provider.go b/frac/sealed/token/provider.go index 6d18ff68..a650c266 100644 --- a/frac/sealed/token/provider.go +++ b/frac/sealed/token/provider.go @@ -1,6 +1,7 @@ package token import ( + "math" "sort" ) @@ -15,9 +16,9 @@ type Provider struct { func NewProvider(loader *BlockLoader, entries []*TableEntry) *Provider { return &Provider{ - loader: loader, - entries: entries, - curEntry: nil, + loader: loader, + entries: entries, + curBlockIndex: math.MaxUint32, // sentinel: no block loaded yet } } diff --git a/frac/sealed/token/table_entry.go b/frac/sealed/token/table_entry.go index a16b9a55..6e1df9c9 100644 --- a/frac/sealed/token/table_entry.go +++ b/frac/sealed/token/table_entry.go @@ -12,7 +12,7 @@ type TableEntry struct { } func (t *TableEntry) GetIndexInTokensBlock(tid uint32) int { - return int(t.StartIndex + tid - t.StartTID) + return int(t.StartIndex + (tid - t.StartTID)) } func (t *TableEntry) getLastTID() uint32 { diff --git a/frac/sealed/token/table_loader.go b/frac/sealed/token/table_loader.go index 6c3a5936..a0bf87be 100644 --- a/frac/sealed/token/table_loader.go +++ b/frac/sealed/token/table_loader.go @@ -50,6 +50,7 @@ func (l *TableLoader) Load() Table { func TableFromBlocks(blocks []TableBlock) Table { table := make(Table) + for _, block := range blocks { for _, ft := range block.FieldsTables { fd, ok := table[ft.Field] @@ -62,13 +63,16 @@ func TableFromBlocks(blocks []TableBlock) Table { } else if minVal < fd.MinVal { fd.MinVal = minVal } + for _, e := range ft.Entries { e.MinVal = "" fd.Entries = append(fd.Entries, e) } + table[ft.Field] = fd } } + return table } @@ -89,10 +93,8 @@ func (l *TableLoader) readBlock() ([]byte, error) { } func (l *TableLoader) loadBlocks() ([]TableBlock, error) { - // todo: scan all headers in sealed_loader and remember startIndex for each sections - // todo: than use this startIndex to load sections on demand (do not scan every time) - l.i = 1 - for h := l.readHeader(); h.Len() > 0; h = l.readHeader() { // skip actual token blocks, go for token table + l.i = 0 + for h := l.readHeader(); h.Len() > 0; h = l.readHeader() { // skip token blocks, go for token table } blocks := make([]TableBlock, 0) diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index ae639862..c20272ee 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -16,32 +16,43 @@ import ( "github.com/ozontech/seq-db/util" ) +// IndexReaders holds one IndexReader per split index file. +type IndexReaders struct { + Info storage.IndexReader + Token storage.IndexReader + Offsets storage.IndexReader + ID storage.IndexReader + LID storage.IndexReader +} + +// Loader reads the per-section index files to populate BlocksData. +// Token data is loaded lazily (BlockLoader / TableLoader use the Token reader directly). +// Info is loaded separately via loadHeader before Load is called. type Loader struct { - reader *storage.IndexReader - blockIndex uint32 - blockBuf []byte + buf []byte } -func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, indexReader *storage.IndexReader) { +// Load populates blocksData from the .offsets, .id, and .lid files. +func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, readers IndexReaders) { t := time.Now() - l.reader = indexReader - l.blockIndex = 1 // skipping info block that's already read - - l.skipTokens() - var err error - if blocksData.IDsTable, blocksData.BlocksOffsets, err = l.loadIDs(info.BinaryDataVer); err != nil { - logger.Fatal("load ids error", zap.Error(err)) + var blockOffsets sealed.BlockOffsets + blockOffsets, err = l.loadBlocksOffsets(readers.Offsets) + if err != nil { + logger.Fatal("load offsets error", zap.Error(err)) } + blocksData.BlocksOffsets = blockOffsets.Offsets - if blocksData.LIDsTable, err = l.loadLIDsBlocksTable(); err != nil { + blocksData.IDsTable = l.loadIDsTable(readers.ID, blockOffsets.IDsTotal, info.BinaryDataVer) + + blocksData.LIDsTable, err = l.loadLIDsTable(readers.LID) + if err != nil { logger.Fatal("load lids error", zap.Error(err)) } took := time.Since(t) - docsTotalK := float64(info.DocsTotal) / 1000 indexOnDiskMb := util.SizeToUnit(info.IndexOnDisk, "mb") throughput := indexOnDiskMb / util.DurationToUnit(took, "s") @@ -56,43 +67,34 @@ func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, indexRea ) } -func (l *Loader) nextIndexBlock() ([]byte, error) { - data, _, err := l.reader.ReadIndexBlock(l.blockIndex, l.blockBuf) - l.blockBuf = data - l.blockIndex++ - return data, err -} - -func (l *Loader) skipBlock() storage.IndexBlockHeader { - header, err := l.reader.GetBlockHeader(l.blockIndex) +// loadBlocksOffsets reads block 0 from the .offsets file. +func (l *Loader) loadBlocksOffsets(r storage.IndexReader) (sealed.BlockOffsets, error) { + data, _, err := r.ReadIndexBlock(0, l.buf) + l.buf = data if err != nil { - logger.Panic("error reading block header", zap.Error(err)) - } - l.blockIndex++ - return header -} - -func (l *Loader) loadIDs(fracVersion config.BinaryDataVersion) (idsTable seqids.Table, blocksOffsets []uint64, err error) { - var result []byte - - if result, err = l.nextIndexBlock(); err != nil { - return idsTable, nil, err + return sealed.BlockOffsets{}, err } - b := sealed.BlockOffsets{} - if err := b.Unpack(result); err != nil { - return idsTable, nil, err + if err := b.Unpack(data); err != nil { + return sealed.BlockOffsets{}, err } + return b, nil +} - blocksOffsets = b.Offsets - idsTable.IDsTotal = b.IDsTotal - idsTable.IDBlocksTotal = uint32(len(b.Offsets)) - idsTable.StartBlockIndex = l.blockIndex +// loadIDsTable scans block headers in the .id file to build seqids.Table. +// Blocks are stored as (MIDs, RIDs, Pos) triplets; we only need MIDs headers. +func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersion config.BinaryDataVersion) seqids.Table { + table := seqids.Table{ + StartBlockIndex: 0, + IDsTotal: idsTotal, + } - for { - // get MIDs block header - header := l.skipBlock() - if header.Len() == 0 { + for blockIdx := uint32(0); ; { + header, err := r.GetBlockHeader(blockIdx) + if err != nil { + logger.Fatal("error reading id block header", zap.Error(err)) + } + if header.Len() == 0 { // separator break } @@ -102,58 +104,36 @@ func (l *Loader) loadIDs(fracVersion config.BinaryDataVersion) (idsTable seqids. } else { mid = seq.MID(header.GetExt1()) } - - idsTable.MinBlockIDs = append(idsTable.MinBlockIDs, seq.ID{ + table.MinBlockIDs = append(table.MinBlockIDs, seq.ID{ MID: mid, RID: seq.RID(header.GetExt2()), }) + table.IDBlocksTotal++ - // skipping RIDs and Pos blocks - l.skipBlock() - l.skipBlock() + blockIdx += 3 // skip RIDs and Pos blocks } - return idsTable, blocksOffsets, nil + return table } -func (l *Loader) skipTokens() { - for { - // skip actual token blocks - header := l.skipBlock() - if header.Len() == 0 { - break - } - } +// loadLIDsTable scans block headers in the .lid file to build lids.Table. +func (l *Loader) loadLIDsTable(r storage.IndexReader) (*lids.Table, error) { + var maxTIDs, minTIDs []uint32 + var isContinued []bool - for { - // skip token table - header := l.skipBlock() - if header.Len() == 0 { - break + for blockIdx := uint32(0); ; blockIdx++ { + header, err := r.GetBlockHeader(blockIdx) + if err != nil { + return nil, err } - } -} - -func (l *Loader) loadLIDsBlocksTable() (*lids.Table, error) { - maxTIDs := make([]uint32, 0) - minTIDs := make([]uint32, 0) - isContinued := make([]bool, 0) - - startIndex := l.blockIndex - for { - header := l.skipBlock() if header.Len() == 0 { break } - - ext1 := header.GetExt1() ext2 := header.GetExt2() - maxTIDs = append(maxTIDs, uint32(ext2>>32)) minTIDs = append(minTIDs, uint32(ext2&0xFFFFFFFF)) - - isContinued = append(isContinued, ext1 == 1) + isContinued = append(isContinued, header.GetExt1() == 1) } - return lids.NewTable(startIndex, minTIDs, maxTIDs, isContinued), nil + return lids.NewTable(0, minTIDs, maxTIDs, isContinued), nil } diff --git a/fracmanager/cache_maintainer.go b/fracmanager/cache_maintainer.go index 70e5f956..2a6ac6dd 100644 --- a/fracmanager/cache_maintainer.go +++ b/fracmanager/cache_maintainer.go @@ -149,7 +149,12 @@ func (cm *CacheMaintainer) CreateIndexCache() *frac.IndexCache { LIDs: newCache[*lids.Block](cm, lidsName), Tokens: newCache[*token.Block](cm, tokensName), TokenTable: newCache[token.Table](cm, tokenTableName), - Registry: newCache[[]byte](cm, indexName), + // Each index file gets its own registry cache (they all use key=1 internally). + InfoRegistry: newCache[[]byte](cm, indexName), + TokenRegistry: newCache[[]byte](cm, indexName), + OffsetsRegistry: newCache[[]byte](cm, indexName), + IDRegistry: newCache[[]byte](cm, indexName), + LIDRegistry: newCache[[]byte](cm, indexName), } } diff --git a/fracmanager/frac_manifest.go b/fracmanager/frac_manifest.go index 2a258bda..9fc15fe9 100644 --- a/fracmanager/frac_manifest.go +++ b/fracmanager/frac_manifest.go @@ -19,20 +19,35 @@ import ( type fracManifest struct { basePath string // base path to fraction files (without extension) hasDocs bool // presence of main documents file - hasIndex bool // presence of index file hasMeta bool // presence of meta-information (legacy WAL format) hasWal bool // presence of WAL with meta (new WAL format) + hasIndex bool // presence of index file hasSdocs bool // presence of sorted documents hasRemote bool // presence of remote fraction + // Split index file flags + hasInfo bool + hasToken bool + hasOffsets bool + hasID bool + hasLID bool + // Deletion marker file flags hasDocsDel bool // documents deletion marker hasSdocsDel bool // sorted documents deletion marker - hasIndexDel bool // index deletion marker // Temporary file flags - hasIndexTmp bool // temporary index file - hasSdocsTmp bool // temporary sorted documents file + hasInfoTmp bool + hasTokenTmp bool + hasOffsetsTmp bool + hasIDTmp bool + hasLIDTmp bool + hasSdocsTmp bool // temporary sorted documents file +} + +// hasAllIndexFiles reports whether all 5 split index files are present. +func (m *fracManifest) hasAllIndexFiles() bool { + return m.hasInfo && m.hasToken && m.hasOffsets && m.hasID && m.hasLID } // AddExtension adds information about a file with the specified extension @@ -47,20 +62,35 @@ func (m *fracManifest) AddExtension(ext string) error { m.hasWal = true case consts.SdocsFileSuffix: m.hasSdocs = true - case consts.IndexFileSuffix: - m.hasIndex = true case consts.RemoteFractionSuffix: m.hasRemote = true + case consts.InfoFileSuffix: + m.hasInfo = true + case consts.TokenFileSuffix: + m.hasToken = true + case consts.OffsetsFileSuffix: + m.hasOffsets = true + case consts.IDFileSuffix: + m.hasID = true + case consts.LIDFileSuffix: + m.hasLID = true + case consts.DocsDelFileSuffix: m.hasDocsDel = true case consts.SdocsDelFileSuffix: m.hasSdocsDel = true - case consts.IndexDelFileSuffix: - m.hasIndexDel = true - case consts.IndexTmpFileSuffix: - m.hasIndexTmp = true + case consts.InfoTmpFileSuffix: + m.hasInfoTmp = true + case consts.TokenTmpFileSuffix: + m.hasTokenTmp = true + case consts.OffsetsTmpFileSuffix: + m.hasOffsetsTmp = true + case consts.IDTmpFileSuffix: + m.hasIDTmp = true + case consts.LIDTmpFileSuffix: + m.hasLIDTmp = true case consts.SdocsTmpFileSuffix: m.hasSdocsTmp = true @@ -88,13 +118,13 @@ func (m *fracManifest) Stage() fracStage { if m.hasRemote { return fracStageRemote } - if m.hasIndex && (m.hasSdocs || m.hasDocs) { + if m.hasAllIndexFiles() && (m.hasSdocs || m.hasDocs) { return fracStageSealed } if (m.hasMeta || m.hasWal) && m.hasDocs { return fracStageActive } - if m.hasDocsDel || m.hasIndexDel || m.hasSdocsDel { + if m.hasDocsDel || m.hasSdocsDel { return fracStageZombie } return fracStageUnknown @@ -125,18 +155,21 @@ func removeMeta(m *fracManifest) { } } -func removeIndex(m *fracManifest) { - if m.hasIndex { - util.RemoveFile(m.basePath + consts.IndexFileSuffix) - m.hasIndex = false - } -} - -func removeIndexDel(m *fracManifest) { - if m.hasIndexDel { - util.RemoveFile(m.basePath + consts.IndexDelFileSuffix) - m.hasIndexDel = false +func removeIndexFiles(m *fracManifest) { + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + util.RemoveFile(m.basePath + suffix) } + m.hasInfo = false + m.hasToken = false + m.hasOffsets = false + m.hasID = false + m.hasLID = false } func removeSdocsDel(m *fracManifest) { @@ -154,10 +187,20 @@ func removeDocsDel(m *fracManifest) { } func removeIndexTmp(m *fracManifest) { - if m.hasIndexTmp { - util.RemoveFile(m.basePath + consts.IndexTmpFileSuffix) - m.hasIndexTmp = false + for _, suffix := range []string{ + consts.InfoTmpFileSuffix, + consts.TokenTmpFileSuffix, + consts.OffsetsTmpFileSuffix, + consts.IDTmpFileSuffix, + consts.LIDTmpFileSuffix, + } { + util.RemoveFile(m.basePath + suffix) } + m.hasInfoTmp = false + m.hasTokenTmp = false + m.hasOffsetsTmp = false + m.hasIDTmp = false + m.hasLIDTmp = false } func removeSdocsTmp(m *fracManifest) { @@ -240,8 +283,7 @@ func cleanupRemoteFrac(m *fracManifest) { removeMeta(m) removeDocs(m) removeSdocs(m) - removeIndex(m) - removeIndexDel(m) + removeIndexFiles(m) } // cleanupSealedFrac cleans files for sealed fractions @@ -265,17 +307,26 @@ func cleanupTemporary(m *fracManifest) { // removeAllFiles completely removes all fraction files // Used for cleaning up partially deleted or corrupted fractions func removeAllFiles(basePath string) { - // Remove main files first, then deletion markers to preserve deletion intent - util.RemoveFile(basePath + consts.IndexFileSuffix) - util.RemoveFile(basePath + consts.DocsFileSuffix) - util.RemoveFile(basePath + consts.SdocsFileSuffix) - util.RemoveFile(basePath + consts.MetaFileSuffix) - - util.RemoveFile(basePath + consts.IndexDelFileSuffix) - util.RemoveFile(basePath + consts.DocsDelFileSuffix) - util.RemoveFile(basePath + consts.SdocsDelFileSuffix) - util.RemoveFile(basePath + consts.SdocsTmpFileSuffix) - util.RemoveFile(basePath + consts.IndexTmpFileSuffix) + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + consts.DocsFileSuffix, + consts.SdocsFileSuffix, + consts.MetaFileSuffix, + consts.DocsDelFileSuffix, + consts.SdocsDelFileSuffix, + consts.SdocsTmpFileSuffix, + consts.InfoTmpFileSuffix, + consts.TokenTmpFileSuffix, + consts.OffsetsTmpFileSuffix, + consts.IDTmpFileSuffix, + consts.LIDTmpFileSuffix, + } { + util.RemoveFile(basePath + suffix) + } } // parseFilePath extracts components from a fraction file path From 714dd78ec954df7161e17c92d08806be6c701eb4 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Fri, 27 Mar 2026 15:59:06 +0300 Subject: [PATCH 02/26] refactor: change `sealing.Source` interface --- frac/active_sealing_source.go | 228 +++++++++------------ frac/fraction_concurrency_test.go | 18 +- frac/sealed/sealing/blocks_builder.go | 224 +++++++------------- frac/sealed/sealing/blocks_builder_test.go | 24 ++- frac/sealed/sealing/index.go | 2 +- frac/sealed/sealing/sealer.go | 24 ++- 6 files changed, 213 insertions(+), 307 deletions(-) diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 44aaa850..8d56bdd4 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -22,66 +22,55 @@ import ( "github.com/ozontech/seq-db/util" ) -// ActiveSealingSource transforms data from in-memory (frac.Active) storage -// into a format suitable for disk writing during index creation. -// -// The main purpose of this type is to provide access to sorted data -// through a set of iterators that allow sequential processing of -// data in sized blocks for disk writing: -// -// - TokenBlocks() - iterator for token blocks, sorted by fields and values -// - Fields() - iterator for sorted fields with maximum TIDs -// - IDsBlocks() - iterator for document ID blocks and their positions -// - TokenLIDs() - iterator for LID lists for each token -// - Docs() - iterator for documents themselves with duplicate handling -// -// All iterators work with pre-sorted data and return information -// in an order optimal for creating disk index structures. type ActiveSealingSource struct { - params common.SealParams // Sealing parameters - info *common.Info // fraction Info - created time.Time // Creation time of the source - sortedLIDs []uint32 // Sorted LIDs (Local ID) - oldToNewLIDs []uint32 // Mapping from old LIDs to new ones (after sorting) - mids *UInt64s // MIDs - rids *UInt64s // RIDs - fields []string // Sorted field names - fieldsMaxTIDs []uint32 // Maximum TIDs for each field - tids []uint32 // Sorted TIDs (Token ID) - tokens [][]byte // Tokens (values) by TID - lids []*TokenLIDs // LID lists for each token - docPosMap map[seq.ID]seq.DocPos // Original document positions - docPosSorted []seq.DocPos // Document positions after sorting - blocksOffsets []uint64 // Document block offsets - docsReader *storage.DocsReader // Document storage reader - lastErr error // Last error + params common.SealParams // Sealing parameters + + info *common.Info // fraction Info + created time.Time // Creation time of the source + + blocksOffsets []uint64 // Document block offsets + + sortedLIDs []uint32 // Sorted LIDs (Local ID) + oldToNewLIDs []uint32 // Mapping from old LIDs to new ones (after sorting) + + mids *UInt64s // MIDs + rids *UInt64s // RIDs + + fields []string // Sorted field names + fieldTid map[string][]uint32 // Each field contains sorted TIDs based on token value + tokens [][]byte // Tokens (values) by TID + lids []*TokenLIDs // LID lists for each token + + docPosMap map[seq.ID]seq.DocPos // Original document positions + docPosSorted []seq.DocPos // Document positions after sorting + docsReader *storage.DocsReader // Document storage reader + + lastErr error // Last error } -// NewActiveSealingSource creates a new data source for sealing -// based on an active in-memory index. func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSealingSource, error) { info := *active.info // copy + sortedLIDs := active.GetAllDocuments() + fields, fieldTid := sortFields(active.TokenList) - // Sort fields and get maximum TIDs for each field - sortedFields, fieldsMaxTIDs := sortFields(active.TokenList) + src := ActiveSealingSource{ + params: params, - // Sort tokens within each field - sortedTIDs := sortTokens(sortedFields, active.TokenList) + info: &info, + created: time.Now(), + + sortedLIDs: sortedLIDs, + oldToNewLIDs: makeInverser(sortedLIDs), // Create LID mapping + + mids: active.MIDs, + rids: active.RIDs, + + fields: fields, + fieldTid: fieldTid, + tokens: active.TokenList.tidToVal, + lids: active.TokenList.tidToLIDs, - src := ActiveSealingSource{ - params: params, - info: &info, - created: time.Now(), - sortedLIDs: sortedLIDs, - oldToNewLIDs: makeInverser(sortedLIDs), // Create LID mapping - mids: active.MIDs, - rids: active.RIDs, - fields: sortedFields, - tids: sortedTIDs, - fieldsMaxTIDs: fieldsMaxTIDs, - tokens: active.TokenList.tidToVal, - lids: active.TokenList.tidToLIDs, docPosMap: active.DocsPositions.idToPos, blocksOffsets: active.DocBlocks.vals, docsReader: &active.sortReader, @@ -99,49 +88,61 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe return &src, nil } -// sortFields sorts field names and calculates maximum TIDs for each field. -// Returns sorted field list and array of maximum TIDs. -func sortFields(tl *TokenList) ([]string, []uint32) { +func sortFields(tl *TokenList) ([]string, map[string][]uint32) { fields := make([]string, 0, len(tl.FieldTIDs)) - for field := range tl.FieldTIDs { + fieldTid := make(map[string][]uint32, len(tl.FieldTIDs)) + + for field, tids := range tl.FieldTIDs { fields = append(fields, field) - } - slices.Sort(fields) - pos := 0 - maxTIDs := make([]uint32, 0, len(fields)) - for _, field := range fields { - pos += len(tl.FieldTIDs[field]) - maxTIDs = append(maxTIDs, uint32(pos)) + // Make a copy because this memory is shared + // with concurrent readers (user search queries). + cp := slices.Clone(tids) + + slices.SortFunc(cp, func(i, j uint32) int { + return bytes.Compare(tl.tidToVal[i], tl.tidToVal[j]) + }) + + fieldTid[field] = cp } - return fields, maxTIDs + slices.Sort(fields) + return fields, fieldTid } -// sortTokens sorts tokens lexicographically within each field. -// Returns sorted list of TIDs. -func sortTokens(sortedFields []string, tl *TokenList) []uint32 { - pos := 0 - tids := make([]uint32, 0, len(tl.tidToVal)) - for _, field := range sortedFields { - tids = append(tids, tl.FieldTIDs[field]...) - chunk := tids[pos:] - slices.SortFunc(chunk, func(i, j uint32) int { - a := tl.tidToVal[i] - b := tl.tidToVal[j] - return bytes.Compare(a, b) // Sort by token value - }) - pos = len(tids) +func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { + return func(yield func(seq.ID, seq.DocPos) bool) { + mids := src.mids.vals + rids := src.rids.vals + + // First reserved ID (system). Position unused; LIDs use 1-based indexing. + if !yield(seq.ID{MID: seq.MID(mids[0]), RID: seq.RID(rids[0])}, 0) { + return + } + + for i, lid := range src.sortedLIDs { + id := seq.ID{MID: seq.MID(mids[lid]), RID: seq.RID(rids[lid])} + var pos seq.DocPos + if len(src.docPosSorted) == 0 { + pos = src.docPosMap[id] + } else { + pos = src.docPosSorted[i+1] // +1 for system document + } + if !yield(id, pos) { + return + } + } } - return tids } -// LastError returns the last error that occurred during processing. +func (src *ActiveSealingSource) BlockOffsets() []uint64 { + return src.blocksOffsets +} + func (src *ActiveSealingSource) LastError() error { return src.lastErr } -// prepareInfo prepares metadata for disk writing. func (src *ActiveSealingSource) prepareInfo() { src.info.MetaOnDisk = 0 src.info.SealingTime = uint64(src.created.UnixMilli()) @@ -153,77 +154,40 @@ func (src *ActiveSealingSource) prepareInfo() { src.info.BuildDistribution(mids) } -// Info returns index metadata information. func (src *ActiveSealingSource) Info() *common.Info { return src.info } -// TokenAndLIDs returns an iterator that yields one (token, lids) pair at a time, in TID order. -// Tokens are pre-sorted: first by field, then lexicographically within each field. -// The lids slice is reused between yields and must not be retained by the caller. -func (src *ActiveSealingSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { - return func(yield func([]byte, []uint32) bool) { - var lidBuf []uint32 - - for _, tid := range src.tids { - oldLIDs := src.lids[tid].GetLIDs(src.mids, src.rids) - - lidBuf = slices.Grow(lidBuf[:0], len(oldLIDs)) - for _, lid := range oldLIDs { - lidBuf = append(lidBuf, src.oldToNewLIDs[lid]) - } - - if !yield(src.tokens[tid], lidBuf) { - return - } - } - } -} - -// Field returns an iterator for sorted fields and their maximum TIDs. -func (src *ActiveSealingSource) Field() iter.Seq2[string, uint32] { - return func(yield func(string, uint32) bool) { - for i, field := range src.fields { - if !yield(field, src.fieldsMaxTIDs[i]) { +func (src *ActiveSealingSource) Iterator() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { + return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + for _, field := range src.fields { + if !yield(field, src.tokensForField(field)) { return } } } } -// ID returns an iterator for document IDs and their positions, one pair at a time. -func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { - return func(yield func(seq.ID, seq.DocPos) bool) { - mids := src.mids.vals - rids := src.rids.vals +func (src *ActiveSealingSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] { + var lidsbuf []uint32 + return func(yield func([]byte, []uint32) bool) { + for _, tid := range src.fieldTid[field] { + token := src.tokens[tid] - // First reserved ID (system). Position unused; LIDs use 1-based indexing. - if !yield(seq.ID{MID: seq.MID(mids[0]), RID: seq.RID(rids[0])}, 0) { - return - } + lids := src.lids[tid].GetLIDs(src.mids, src.rids) + lidsbuf = slices.Grow(lidsbuf[:0], len(lids)) - for i, lid := range src.sortedLIDs { - id := seq.ID{MID: seq.MID(mids[lid]), RID: seq.RID(rids[lid])} - var pos seq.DocPos - if len(src.docPosSorted) == 0 { - pos = src.docPosMap[id] - } else { - pos = src.docPosSorted[i+1] // +1 for system document + for _, lid := range lids { + lidsbuf = append(lidsbuf, src.oldToNewLIDs[lid]) } - if !yield(id, pos) { + + if !yield(token, lidsbuf) { return } } } } -// BlockOffsets returns document block offsets. -func (src *ActiveSealingSource) BlockOffsets() []uint64 { - return src.blocksOffsets -} - -// makeInverser creates an array for converting old LIDs to new ones. -// sortedLIDs[i] = oldLID -> inverser[oldLID] = i+1 func makeInverser(sortedLIDs []uint32) []uint32 { inverser := make([]uint32, len(sortedLIDs)+1) for i, lid := range sortedLIDs { diff --git a/frac/fraction_concurrency_test.go b/frac/fraction_concurrency_test.go index 95e96637..138586fd 100644 --- a/frac/fraction_concurrency_test.go +++ b/frac/fraction_concurrency_test.go @@ -24,7 +24,7 @@ import ( "github.com/ozontech/seq-db/parser" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" - test_common "github.com/ozontech/seq-db/tests/common" + testcommon "github.com/ozontech/seq-db/tests/common" "github.com/ozontech/seq-db/tokenizer" ) @@ -38,9 +38,9 @@ func TestConcurrentAppendAndQuery(t *testing.T) { docs, bulks, fromTime, toTime := generatesMessages(numWriters*numMessagesPerWriter, bulkSize) - tmpDir := test_common.CreateTempDir() + tmpDir := testcommon.CreateTempDir() fracPath := filepath.Join(tmpDir, "test_fraction") - defer test_common.RemoveDir(fracPath) + defer testcommon.RemoveDir(fracPath) activeIndexer, stop := NewActiveIndexer(numIndexWorkers, 1000) defer stop() @@ -354,12 +354,12 @@ func seal(active *Active) (*Sealed, error) { return nil, err } indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), InfoRegistry: cache.NewCache[[]byte](nil, nil), TokenRegistry: cache.NewCache[[]byte](nil, nil), OffsetsRegistry: cache.NewCache[[]byte](nil, nil), diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index 9090db18..4183ae46 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -2,7 +2,6 @@ package sealing import ( "encoding/binary" - "errors" "iter" "unsafe" @@ -55,40 +54,11 @@ func (bb *blocksBuilder) LastError() error { return bb.lastErr } -// BuildTokenBlocks converts scalar (token, lids) pairs into token blocks with field tables. -// onLIDs is called for each token's LIDs immediately during iteration — the caller must not -// retain the slice after onLIDs returns. Errors from onLIDs are stored in bb.lastErr. -// -// Visualization of relationships between fields, tokens, and table entries: -// -// Field Ranges: <-------f1----------><------f2-------><------------f3------------><----------f4----------> -// Token Blocks: [.t1.t2.t3.t4.][.t5.t6.t7.t8.][.t9....etc...][.............][.............][.............] -// Field Entries: {-----f1------}{-f1-}{---f2--}{--f2--}{-f3--}{------f3-----}{-f3-}{----f4-}{-----f4------} -// -// Parameters: -// - tokens: Scalar sequence of (token bytes, per-token LID list) pairs -// - fields: Iterator of [fieldName, maxTID] pairs for all fields in ascending TID order -// - blockSize: Maximum payload size in bytes per token block -// - onLIDs: Called for each token's LIDs before the source advances to the next token func (bb *blocksBuilder) BuildTokenBlocks( - tokens iter.Seq2[[]byte, []uint32], - fields iter.Seq2[string, uint32], - accumulate func([]uint32) error, - blockSize int, + it iter.Seq2[string, iter.Seq2[[]byte, []uint32]], + accumulate func([]uint32) error, blockCapacity int, ) iter.Seq2[tokensSealBlock, []token.FieldTable] { return func(yield func(tokensSealBlock, []token.FieldTable) bool) { - nextField, stop := iter.Pull2(fields) - defer stop() - - var ( - hasMore bool - currentTID uint32 = 1 - fieldMaxTID uint32 = 0 - fieldName string - ) - - // Just wrap `accumulate` function to be able - // to track returned errors. accumulate := func(lids []uint32) error { if err := accumulate(lids); err != nil { bb.lastErr = err @@ -97,56 +67,95 @@ func (bb *blocksBuilder) BuildTokenBlocks( return nil } - for blockIdx, block := range seqBlockToken(tokens, blockSize, accumulate) { - if bb.lastErr != nil { + var ( + block tokensSealBlock + blockIdx uint32 + blockSize int + ) + + var ( + currentTID uint32 + pendingTable []token.FieldTable + fieldName string + fieldEntryStartTID uint32 + ) + + emitFieldEntry := func() { + if fieldName == "" || fieldEntryStartTID > currentTID { return } - // A block may span multiple fields, and a field may span multiple blocks. - // We emit one TableEntry per (field, block) intersection so that lookups - // can find the exact position of any token given its field and TID. - var table []token.FieldTable - for currentTID <= block.ext.maxTID { - if fieldMaxTID < currentTID { - if fieldName, fieldMaxTID, hasMore = nextField(); !hasMore { - bb.lastErr = errors.New("not enough fields to cover all TIDs") + entry := newTokenTableEntry(fieldEntryStartTID, currentTID, blockIdx, block) + pendingTable = append(pendingTable, token.FieldTable{ + Field: fieldName, + Entries: []*token.TableEntry{entry}, + }) + } + + flushBlock := func() bool { + emitFieldEntry() + block.ext.maxTID = currentTID + + if !yield(block, pendingTable) { + return false + } + + block.payload.Payload = block.payload.Payload[:0] + block.payload.Offsets = block.payload.Offsets[:0] + block.ext.minTID = currentTID + 1 + + blockIdx++ + blockSize = 0 + + pendingTable = pendingTable[:0] + fieldEntryStartTID = currentTID + 1 + + return true + } + + block.ext.minTID = 1 + for field, tokIt := range it { + emitFieldEntry() + + fieldName = field + fieldEntryStartTID = currentTID + 1 + + for tok, lids := range tokIt { + tokenSize := int(unsafe.Sizeof(uint32(0))) + len(tok) + + if blockSize > 0 && blockSize+tokenSize > blockCapacity { + if !flushBlock() { return } } - entry := newTokenTableEntry(currentTID, fieldMaxTID, blockIdx, block) - currentTID += entry.ValCount + block.payload.Offsets = append(block.payload.Offsets, uint32(len(block.payload.Payload))) + block.payload.Payload = binary.LittleEndian.AppendUint32(block.payload.Payload, uint32(len(tok))) + block.payload.Payload = append(block.payload.Payload, tok...) - table = append(table, token.FieldTable{ - Field: fieldName, - Entries: []*token.TableEntry{entry}}, - ) - } + if err := accumulate(lids); err != nil { + bb.lastErr = err + return + } - if !yield(block, table) { - return + currentTID++ + blockSize += tokenSize } } - if bb.lastErr != nil { - return - } - - if currentTID-1 != fieldMaxTID { - bb.lastErr = errors.New("fields and tokens not consistent") - } else if _, _, hasMore = nextField(); hasMore { - bb.lastErr = errors.New("excess field after processing all blocks") + if blockSize > 0 { + flushBlock() } } } func newTokenTableEntry( - entryStartTID, fieldMaxTID, + entryStartTID, entryEndTID uint32, blockIndex uint32, block tokensSealBlock, ) *token.TableEntry { // Convert global TIDs to block-local indices firstIndex := entryStartTID - block.ext.minTID - lastIndex := min(fieldMaxTID, block.ext.maxTID) - block.ext.minTID + lastIndex := entryEndTID - block.ext.minTID // Extract min and max token values for the entry range minVal := string(block.payload.GetToken(int(firstIndex))) @@ -193,99 +202,6 @@ func seqBlockID( } } -// seqBlockToken accumulates scalar (token, lids) pairs into sealed token blocks. -// A new block is started whenever the accumulated payload would exceed blockSize bytes. -// onLIDs is called for each token's LIDs immediately during iteration — the caller must not -// retain the slice after onLIDs returns. If onLIDs returns a non-nil error, iteration stops. -// -// Parameters: -// - tokens: Scalar sequence of (token bytes, per-token LID list) pairs -// - blockSize: Maximum payload size in bytes before starting a new block -// - onLIDs: Called for each token's LIDs before the source advances to the next token -// -// Returns: -// - iter.Seq2[uint32, tokensSealBlock]: Sequence of (block index, sealed token block) pairs -func seqBlockToken( - tokens iter.Seq2[[]byte, []uint32], - blockSize int, accumulate func([]uint32) error, -) iter.Seq2[uint32, tokensSealBlock] { - return func(yield func(uint32, tokensSealBlock) bool) { - var ( - idx uint32 // 0-based block index - currentTID uint32 // monotonically increasing TID - block tokensSealBlock // block under construction - actualSize int // accumulated payload bytes - ) - - block.ext.minTID = 1 - flush := func() bool { - block.ext.maxTID = currentTID - - if !yield(idx, block) { - return false - } - - idx++ - - // We yielded complete token block several lines earlier. - // And now we prepare token block for the next batch. - block.payload.Payload = block.payload.Payload[:0] - block.payload.Offsets = block.payload.Offsets[:0] - - // Here we increment currentTID by one because - // it points to TID at the end of the *currently* yielded block. - block.ext.minTID = currentTID + 1 - - actualSize = 0 - return true - } - - for token, lids := range tokens { - // We encode token as [size](4B)[token](?B). - tokenSize := int(unsafe.Sizeof(uint32(0))) + len(token) - - needsFlushing := actualSize > 0 && - actualSize+tokenSize > blockSize - - if needsFlushing { - if !flush() { - return - } - } - - block.payload.Offsets = append( - block.payload.Offsets, - uint32(len(block.payload.Payload)), - ) - - block.payload.Payload = binary.LittleEndian.AppendUint32( - block.payload.Payload, - uint32(len(token)), - ) - - block.payload.Payload = append( - block.payload.Payload, - token..., - ) - - if err := accumulate(lids); err != nil { - return - } - - currentTID += 1 - actualSize += tokenSize - } - - if actualSize > 0 { - flush() - } - } -} - -// lidBlocksAcc incrementally builds LID blocks from per-token LID lists. -// Call Add for each token's LIDs in TID order, passing a callback that is invoked -// for each completed block before its backing arrays are reused. -// Call Flush once after all Add calls to handle the final (possibly partial) block. type lidBlocksAcc struct { blockCap int currentTID uint32 diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index d5637dc8..95ae545d 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -27,12 +27,26 @@ type mockSource struct { func (m *mockSource) Info() common.Info { return m.info } -func (m *mockSource) Field() iter.Seq2[string, uint32] { - return func(yield func(string, uint32) bool) { - for i := range len(m.fields) { - if !yield(m.fields[i], m.fieldMaxTIDs[i]) { +func (m *mockSource) Iterator() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { + return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + start := 0 + for i, field := range m.fields { + end := int(m.fieldMaxTIDs[i]) + tokenStart, tokenEnd := start, end + if !yield(field, func(yield func([]byte, []uint32) bool) { + for j := tokenStart; j < tokenEnd; j++ { + var lids []uint32 + if j < len(m.tokenLIDs) { + lids = m.tokenLIDs[j] + } + if !yield(m.tokens[j], lids) { + return + } + } + }) { return } + start = end } } } @@ -114,7 +128,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { lidAccum := newLIDBlocksAccumulator(lidBlockCap) var lidBlocks []lidsSealBlock tokenBlocks := bb.BuildTokenBlocks( - src.TokenAndLIDs(), src.Field(), + src.Iterator(), func(lids []uint32) error { return lidAccum.Add(lids, func(block lidsSealBlock) error { block.payload.LIDs = slices.Clone(block.payload.LIDs) diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 57a3b3ad..27eb2823 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -219,7 +219,7 @@ func (s *IndexSealer) WriteTokenAndLIDFiles(tokenWS, lidWS io.WriteSeeker, src S } blocks := bb.BuildTokenBlocks( - src.TokenAndLIDs(), src.Field(), + src.Iterator(), accumulate, consts.RegularBlockSize, ) diff --git a/frac/sealed/sealing/sealer.go b/frac/sealed/sealing/sealer.go index 233f0aa2..484270b1 100644 --- a/frac/sealed/sealing/sealer.go +++ b/frac/sealed/sealing/sealer.go @@ -16,12 +16,24 @@ import ( // Source interface defines the contract for data sources that can be sealed. // Provides access to all necessary data components for index creation. type Source interface { - Info() *common.Info // Fraction metadata information - ID() iter.Seq2[seq.ID, seq.DocPos] // Ordered sequence of document IDs and their positions - TokenAndLIDs() iter.Seq2[[]byte, []uint32] // Ordered sequence of tokens paired with their LID list - Field() iter.Seq2[string, uint32] // Ordered sequence of fields with their max TID value - BlockOffsets() []uint64 // Offsets of DocBlocks in the doc file - LastError() error // Last error encountered during data retrieval + // Info returns information about [sealing.Source]. + // For example, in one case it returns information about [frac.Active]. + Info() *common.Info + + // ID returns a view into [sealing.Source] stored ids. + // Identificators are returned in sorted order starting with the biggest seq.ID. + ID() iter.Seq2[seq.ID, seq.DocPos] + + // BlockOffsets returns all offsets to [storage.DocBlock] + // stored nside `.docs` file that is owned by [sealing.Source]. + BlockOffsets() []uint64 + + Iterator() iter.Seq2[ + string, // Field name + iter.Seq2[[]byte, []uint32], // Token value and lids for this token + ] + + LastError() error // Last error encountered during data retrieval } // createAndWrite creates a tmp file, calls write, syncs, closes, then renames to finalPath. From 2060d1118c8db20ec6f8764e4966cbcc8307acd0 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 12:13:07 +0300 Subject: [PATCH 03/26] refactor: comments, split files --- consts/consts.go | 18 +- frac/active_sealing_source.go | 27 +- frac/sealed/sealing/blocks_builder.go | 9 +- frac/sealed/sealing/index.go | 357 ++++++++++++-------------- frac/sealed/sealing/sealer.go | 56 ++-- frac/sealed/sealing/writer.go | 74 ++++++ seq/seq.go | 14 +- 7 files changed, 314 insertions(+), 241 deletions(-) create mode 100644 frac/sealed/sealing/writer.go diff --git a/consts/consts.go b/consts/consts.go index 7a8eb9a4..40abbdab 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -63,21 +63,21 @@ const ( InfoTmpFileSuffix = "._info" InfoDelFileSuffix = ".info.del" - TokenFileSuffix = ".token" - TokenTmpFileSuffix = "._token" - TokenDelFileSuffix = ".token.del" + TokenFileSuffix = ".tokens" + TokenTmpFileSuffix = "._tokens" + TokenDelFileSuffix = ".tokens.del" OffsetsFileSuffix = ".offsets" OffsetsTmpFileSuffix = "._offsets" OffsetsDelFileSuffix = ".offsets.del" - IDFileSuffix = ".id" - IDTmpFileSuffix = "._id" - IDDelFileSuffix = ".id.del" + IDFileSuffix = ".ids" + IDTmpFileSuffix = "._ids" + IDDelFileSuffix = ".ids.del" - LIDFileSuffix = ".lid" - LIDTmpFileSuffix = "._lid" - LIDDelFileSuffix = ".lid.del" + LIDFileSuffix = ".lids" + LIDTmpFileSuffix = "._lids" + LIDDelFileSuffix = ".lids.del" RemoteFractionSuffix = ".remote" diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 8d56bdd4..b90c0297 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -115,20 +115,29 @@ func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { mids := src.mids.vals rids := src.rids.vals - // First reserved ID (system). Position unused; LIDs use 1-based indexing. - if !yield(seq.ID{MID: seq.MID(mids[0]), RID: seq.RID(rids[0])}, 0) { + // System ID and DocPos are not stored in `src.sortedLIDs`. + // However we do have to yield them to preserve 1-baseed indexing for ids. + if !yield(seq.SystemID, seq.SystemDocPos) { return } for i, lid := range src.sortedLIDs { - id := seq.ID{MID: seq.MID(mids[lid]), RID: seq.RID(rids[lid])} - var pos seq.DocPos + id := seq.ID{ + MID: seq.MID(mids[lid]), + RID: seq.RID(rids[lid]), + } + + // Documents were not sorted previously. if len(src.docPosSorted) == 0 { - pos = src.docPosMap[id] - } else { - pos = src.docPosSorted[i+1] // +1 for system document + if !yield(id, src.docPosMap[id]) { + return + } + continue } - if !yield(id, pos) { + + // `i` in range [0; len(src.sortedLIDs)) + // but lids indexes are 1-based. + if !yield(id, src.docPosSorted[i+1]) { return } } @@ -158,7 +167,7 @@ func (src *ActiveSealingSource) Info() *common.Info { return src.info } -func (src *ActiveSealingSource) Iterator() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { +func (src *ActiveSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { for _, field := range src.fields { if !yield(field, src.tokensForField(field)) { diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index 4183ae46..ea506402 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -81,6 +81,7 @@ func (bb *blocksBuilder) BuildTokenBlocks( ) emitFieldEntry := func() { + // Handle case when field does not have tokens. if fieldName == "" || fieldEntryStartTID > currentTID { return } @@ -203,20 +204,24 @@ func seqBlockID( } type lidBlocksAcc struct { - blockCap int + blockCap int + currentTID uint32 currentBlock lidsSealBlock + isEndOfToken bool isContinued bool } func newLIDBlocksAccumulator(blockCap int) *lidBlocksAcc { a := &lidBlocksAcc{blockCap: blockCap} + a.currentBlock.ext.minTID = 1 a.currentBlock.payload = lids.Block{ LIDs: make([]uint32, 0, blockCap), Offsets: []uint32{0}, } + return a } @@ -268,7 +273,7 @@ func (a *lidBlocksAcc) finalizeBlock() lidsSealBlock { result := a.currentBlock result.payload.IsLastLID = a.isEndOfToken result.ext.isContinued = a.isContinued - a.isContinued = !a.isEndOfToken + a.isContinued = !a.isEndOfToken return result } diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 27eb2823..2ac8d885 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -21,35 +21,6 @@ import ( "github.com/ozontech/seq-db/zstd" ) -// IndexSealer writes sealed fraction index data across multiple files. -// Each Write*File method writes one section to an independent file using the -// standard [prefix][blocks][registry] format so each file has its own IndexReader. -// -// Call order matters for PreloadedData: -// -// WriteTokenAndLIDFiles → WriteOffsetsFile → WriteIDFile -// -// (TokenTable is populated by WriteTokenAndLIDFiles; IDsTable by WriteOffsetsFile+WriteIDFile; -// LIDsTable by WriteTokenAndLIDFiles.) -type IndexSealer struct { - lastErr error - buf1 []byte - buf2 []byte - params common.SealParams - - idsTable seqids.Table - lidsTable lids.Table - tokenTable token.Table -} - -func NewIndexSealer(params common.SealParams) *IndexSealer { - return &IndexSealer{ - params: params, - buf1: make([]byte, 0, consts.RegularBlockSize), - buf2: make([]byte, 0, consts.RegularBlockSize), - } -} - // indexBlock is one compressed (or not) block with its registry metadata. type indexBlock struct { codec storage.Codec @@ -63,137 +34,75 @@ func (i indexBlock) Bin(pos int64) (storage.IndexBlockHeader, []byte) { return storage.NewIndexBlockHeader(pos, i.ext1, i.ext2, uint32(len(i.payload)), i.rawLen, i.codec), i.payload } -const filePrefixSize = 16 - -// write writes blocks to ws using [16-byte prefix][blocks][registry]. -// The prefix is written last (via seek-back) and stores registry position + size. -func (s *IndexSealer) write(ws io.WriteSeeker, blocks iter.Seq[indexBlock]) error { - if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { - return err - } +type IndexSealer struct { + params common.SealParams - hw := bytes.NewBuffer(nil) - bw := bytespool.AcquireWriterSize(ws, int(units.MiB)) - defer bytespool.ReleaseWriter(bw) + buf1 []byte + buf2 []byte - pos := filePrefixSize - for block := range blocks { - if s.lastErr != nil { - return s.lastErr - } - header, payload := block.Bin(int64(pos)) - if _, err := bw.Write(payload); err != nil { - return err - } - if _, err := hw.Write(header); err != nil { - return err - } - pos += len(payload) - } - if s.lastErr != nil { - return s.lastErr - } - if err := bw.Flush(); err != nil { - return err - } - - size := hw.Len() - regPos, err := ws.Seek(0, io.SeekEnd) - if err != nil { - return err - } - if _, err := bw.Write(hw.Bytes()); err != nil { - return err - } - if err := bw.Flush(); err != nil { - return err - } + idsTable seqids.Table + lidsTable lids.Table + tokenTable token.Table - prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(size)) - if _, err := ws.Seek(0, io.SeekStart); err != nil { - return err - } - _, err = ws.Write(prefix) - return err + lastErr error } -// fileStreamWriter writes blocks incrementally to a single file using the -// [prefix][blocks][registry] format, allowing interleaved writes to multiple files. -type fileStreamWriter struct { - ws io.WriteSeeker - bw *bytespool.Writer - hw bytes.Buffer - pos int +func NewIndexSealer(params common.SealParams) *IndexSealer { + return &IndexSealer{ + params: params, + buf1: make([]byte, 0, consts.RegularBlockSize), + buf2: make([]byte, 0, consts.RegularBlockSize), + } } -func newFileStreamWriter(ws io.WriteSeeker) (*fileStreamWriter, error) { - if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { - return nil, err - } - return &fileStreamWriter{ - ws: ws, - bw: bytespool.AcquireWriterSize(ws, int(units.MiB)), - pos: filePrefixSize, - }, nil +func (s *IndexSealer) LIDsTable() lids.Table { + return s.lidsTable } -func (fw *fileStreamWriter) writeBlock(block indexBlock) error { - header, payload := block.Bin(int64(fw.pos)) - if _, err := fw.bw.Write(payload); err != nil { - return err - } - fw.hw.Write(header) // bytes.Buffer.Write never fails - fw.pos += len(payload) - return nil +func (s *IndexSealer) TokenTable() token.Table { + return s.tokenTable } -func (fw *fileStreamWriter) finalize() (err error) { - defer fw.release() - if err = fw.bw.Flush(); err != nil { - return - } - var regPos int64 - if regPos, err = fw.ws.Seek(0, io.SeekEnd); err != nil { - return - } - if _, err = fw.bw.Write(fw.hw.Bytes()); err != nil { - return - } - if err = fw.bw.Flush(); err != nil { - return - } - prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(fw.hw.Len())) - if _, err = fw.ws.Seek(0, io.SeekStart); err != nil { - return - } - _, err = fw.ws.Write(prefix) - return +func (s *IndexSealer) IDsTable() seqids.Table { + return s.idsTable } -func (fw *fileStreamWriter) release() { - if fw.bw != nil { - bytespool.ReleaseWriter(fw.bw) - fw.bw = nil - } +// WriteOffsetsFile writes the .offsets file containing a single BlockOffsets block. +func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { + return s.write(ws, func(yield func(indexBlock) bool) { + offsets := sealed.BlockOffsets{ + IDsTotal: src.Info().DocsTotal + 1, + Offsets: src.BlockOffsets(), + } + yield(s.packBlocksOffsetsBlock(offsets)) + }) } -// WriteInfoFile writes the .info file containing a single BlockInfo block. -func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { +func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { return s.write(ws, func(yield func(indexBlock) bool) { - yield(s.packInfoBlock(sealed.BlockInfo{Info: src.Info()})) + for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { + if !yield(s.packMIDsBlock(block)) { + return + } + + if !yield(s.packRIDsBlock(block)) { + return + } + + if !yield(s.packPosBlock(block)) { + return + } + } + + if s.lastErr = src.LastError(); s.lastErr != nil { + return + } + + yield(indexBlock{}) // trailing separator }) } -// WriteTokenAndLIDFiles writes the .token and .lid files in a single pass over the source data. -// -// .token file: [token blocks...] [separator] [token-table block] [separator] -// .lid file: [LID blocks...] [separator] -// -// LID blocks are written interleaved with token block processing so that both files -// are produced from one sequential scan of the (token, LID) data. -func (s *IndexSealer) WriteTokenAndLIDFiles(tokenWS, lidWS io.WriteSeeker, src Source) error { +func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Source) error { tokenFW, err := newFileStreamWriter(tokenWS) if err != nil { return err @@ -219,7 +128,7 @@ func (s *IndexSealer) WriteTokenAndLIDFiles(tokenWS, lidWS io.WriteSeeker, src S } blocks := bb.BuildTokenBlocks( - src.Iterator(), + src.TokenTriplet(), accumulate, consts.RegularBlockSize, ) @@ -261,38 +170,9 @@ func (s *IndexSealer) WriteTokenAndLIDFiles(tokenWS, lidWS io.WriteSeeker, src S return tokenFW.finalize() } -// WriteOffsetsFile writes the .offsets file containing a single BlockOffsets block. -func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { - return s.write(ws, func(yield func(indexBlock) bool) { - offsets := sealed.BlockOffsets{ - IDsTotal: src.Info().DocsTotal + 1, - Offsets: src.BlockOffsets(), - } - yield(s.packBlocksOffsetsBlock(offsets)) - }) -} - -func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { +func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { return s.write(ws, func(yield func(indexBlock) bool) { - for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { - if !yield(s.packMIDsBlock(block)) { - return - } - - if !yield(s.packRIDsBlock(block)) { - return - } - - if !yield(s.packPosBlock(block)) { - return - } - } - - if s.lastErr = src.LastError(); s.lastErr != nil { - return - } - - yield(indexBlock{}) // trailing separator + yield(s.packInfoBlock(sealed.BlockInfo{Info: src.Info()})) }) } @@ -318,6 +198,67 @@ func collapseOrderedFieldsTables(src []token.FieldTable) []token.FieldTable { return append(dst, current) } +// write writes blocks to ws using [16-byte prefix][blocks][registry]. +// The prefix is written last (via seek-back) and stores registry position + size. +func (s *IndexSealer) write(ws io.WriteSeeker, blocks iter.Seq[indexBlock]) error { + if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { + return err + } + + hw := bytes.NewBuffer(nil) + bw := bytespool.AcquireWriterSize(ws, int(units.MiB)) + defer bytespool.ReleaseWriter(bw) + + pos := filePrefixSize + for block := range blocks { + if s.lastErr != nil { + return s.lastErr + } + + header, payload := block.Bin(int64(pos)) + if _, err := bw.Write(payload); err != nil { + return err + } + + if _, err := hw.Write(header); err != nil { + return err + } + + pos += len(payload) + } + + if s.lastErr != nil { + return s.lastErr + } + + if err := bw.Flush(); err != nil { + return err + } + + size := hw.Len() + regPos, err := ws.Seek(0, io.SeekEnd) + if err != nil { + return err + } + + if _, err := bw.Write(hw.Bytes()); err != nil { + return err + } + + if err := bw.Flush(); err != nil { + return err + } + + prefix := binary.LittleEndian.AppendUint64(nil, uint64(regPos)) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(size)) + if _, err := ws.Seek(0, io.SeekStart); err != nil { + return err + } + + _, err = ws.Write(prefix) + return err +} + func newIndexBlock(raw []byte) indexBlock { return indexBlock{codec: storage.CodecNo, rawLen: uint32(len(raw)), payload: raw} } @@ -330,68 +271,98 @@ func (s *IndexSealer) newIndexBlockZSTD(raw []byte, level int) indexBlock { return newIndexBlock(raw) } +// packInfoBlock packs fraction information into an index block. func (s *IndexSealer) packInfoBlock(block sealed.BlockInfo) indexBlock { s.buf1 = block.Pack(s.buf1[:0]) - return newIndexBlock(s.buf1) + return newIndexBlock(s.buf1) // Info block is typically small, no compression } +// packTokenBlock packs token data into a compressed index block. func (s *IndexSealer) packTokenBlock(block tokensSealBlock) indexBlock { - s.buf1 = block.payload.Pack(s.buf1[:0]) + s.buf1 = block.payload.Pack(s.buf1[:0]) // Pack token data b := s.newIndexBlockZSTD(s.buf1, s.params.TokenListZstdLevel) + // Store TID range in extended metadata b.ext1 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) return b } -func (s *IndexSealer) packTokenTableBlock(tb token.TableBlock) indexBlock { - s.tokenTable = token.TableFromBlocks([]token.TableBlock{tb}) - s.buf1 = tb.Pack(s.buf1[:0]) +// packTokenTableBlock packs the token table into a compressed index block. +func (s *IndexSealer) packTokenTableBlock(tokenTableBlock token.TableBlock) indexBlock { + s.tokenTable = token.TableFromBlocks([]token.TableBlock{tokenTableBlock}) // Store for PreloadedData + + // Packing block + s.buf1 = tokenTableBlock.Pack(s.buf1[:0]) return s.newIndexBlockZSTD(s.buf1, s.params.TokenTableZstdLevel) } +// packBlocksOffsetsBlock packs document block offsets into a compressed index block. func (s *IndexSealer) packBlocksOffsetsBlock(block sealed.BlockOffsets) indexBlock { - s.idsTable.IDsTotal = block.IDsTotal - s.idsTable.IDBlocksTotal = uint32(len(block.Offsets)) + // Update IDs table for PreloadedData + s.idsTable.IDsTotal = block.IDsTotal // Total number of IDs + s.idsTable.IDBlocksTotal = uint32(len(block.Offsets)) // Number of ID blocks + + // Packing block s.buf1 = block.Pack(s.buf1[:0]) - return s.newIndexBlockZSTD(s.buf1, s.params.DocsPositionsZstdLevel) + b := s.newIndexBlockZSTD(s.buf1, s.params.DocsPositionsZstdLevel) + return b } +// packMIDsBlock packs MIDs into a compressed index block. func (s *IndexSealer) packMIDsBlock(block idsSealBlock) indexBlock { + // Get the last ID in the block (smallest due to descending order) last := len(block.mids.Values) - 1 - minID := seq.ID{MID: seq.MID(block.mids.Values[last]), RID: seq.RID(block.rids.Values[last])} - s.idsTable.MinBlockIDs = append(s.idsTable.MinBlockIDs, minID) + + minID := seq.ID{ + MID: seq.MID(block.mids.Values[last]), + RID: seq.RID(block.rids.Values[last]), + } + + s.idsTable.MinBlockIDs = append(s.idsTable.MinBlockIDs, minID) // Store for PreloadedData + + // Packing block s.buf1 = block.mids.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) + + // Store min MID and RID in extended metadata b.ext1 = uint64(minID.MID) b.ext2 = uint64(minID.RID) + return b } +// packRIDsBlock packs RIDs into a compressed index block. func (s *IndexSealer) packRIDsBlock(block idsSealBlock) indexBlock { s.buf1 = block.rids.Pack(s.buf1[:0]) - return s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) + b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) + return b } +// packPosBlock packs document positions into a compressed index block. func (s *IndexSealer) packPosBlock(block idsSealBlock) indexBlock { s.buf1 = block.params.Pack(s.buf1[:0]) - return s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) + b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) + return b } +// packLIDsBlock packs Local IDs (LIDs) into a compressed index block. +// Also updates LIDs table for preloaded data access. func (s *IndexSealer) packLIDsBlock(block lidsSealBlock) indexBlock { var ext1 uint64 - if block.ext.isContinued { + if block.ext.isContinued { // todo: Legacy continuation flag ext1 = 1 - block.ext.minTID++ + block.ext.minTID++ // Adjust for legacy format } + + // Update LIDs table for PreloadedData s.lidsTable.MinTIDs = append(s.lidsTable.MinTIDs, block.ext.minTID) s.lidsTable.MaxTIDs = append(s.lidsTable.MaxTIDs, block.ext.maxTID) s.lidsTable.IsContinued = append(s.lidsTable.IsContinued, block.ext.isContinued) + + // Packing block s.buf1 = block.payload.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.LIDsZstdLevel) - b.ext1 = ext1 - b.ext2 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) + b.ext1 = ext1 // Legacy continuation flag + b.ext2 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) // TID range + return b } - -func (s *IndexSealer) LIDsTable() lids.Table { return s.lidsTable } -func (s *IndexSealer) TokenTable() token.Table { return s.tokenTable } -func (s *IndexSealer) IDsTable() seqids.Table { return s.idsTable } diff --git a/frac/sealed/sealing/sealer.go b/frac/sealed/sealing/sealer.go index 484270b1..ab97091f 100644 --- a/frac/sealed/sealing/sealer.go +++ b/frac/sealed/sealing/sealer.go @@ -14,45 +14,52 @@ import ( ) // Source interface defines the contract for data sources that can be sealed. -// Provides access to all necessary data components for index creation. +// Provides access to all necessary data components for index creation type Source interface { - // Info returns information about [sealing.Source]. - // For example, in one case it returns information about [frac.Active]. + // Info returns metadata describing this source. Info() *common.Info - // ID returns a view into [sealing.Source] stored ids. - // Identificators are returned in sorted order starting with the biggest seq.ID. + // ID returns an iterator over stored document identifiers paired with + // their positions, in descending [seq.ID] order. ID() iter.Seq2[seq.ID, seq.DocPos] - // BlockOffsets returns all offsets to [storage.DocBlock] - // stored nside `.docs` file that is owned by [sealing.Source]. + // BlockOffsets returns byte offsets to each document block + // within this source's `.docs` file. BlockOffsets() []uint64 - Iterator() iter.Seq2[ - string, // Field name - iter.Seq2[[]byte, []uint32], // Token value and lids for this token - ] + // TokenTriplet iterates over fields in lexicographic order. + // For each field, it yields tokens (lexicographically sorted) + // paired with the local document ID list for that token. + TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] - LastError() error // Last error encountered during data retrieval + // LastError returns the last error encountered during iteration, + // or nil if no error occurred. + LastError() error } -// createAndWrite creates a tmp file, calls write, syncs, closes, then renames to finalPath. -func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { +func createAndWrite( + tmpPath, finalPath string, + write func(*os.File) error, +) error { f, err := os.Create(tmpPath) if err != nil { return err } + if err := write(f); err != nil { f.Close() return err } + if err := f.Sync(); err != nil { f.Close() return err } + if err := f.Close(); err != nil { return err } + return os.Rename(tmpPath, finalPath) } @@ -106,14 +113,6 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { sealer := NewIndexSealer(params) - if err := createAndWrite( - info.Path+consts.InfoTmpFileSuffix, - info.Path+consts.InfoFileSuffix, - func(f *os.File) error { return sealer.WriteInfoFile(f, src) }, - ); err != nil { - return nil, err - } - if err := createAndWrite( info.Path+consts.OffsetsTmpFileSuffix, info.Path+consts.OffsetsFileSuffix, @@ -133,7 +132,15 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { if err := createAndWriteBoth( info.Path+consts.TokenTmpFileSuffix, info.Path+consts.TokenFileSuffix, info.Path+consts.LIDTmpFileSuffix, info.Path+consts.LIDFileSuffix, - func(tokenF, lidF *os.File) error { return sealer.WriteTokenAndLIDFiles(tokenF, lidF, src) }, + func(tokenF, lidF *os.File) error { return sealer.WriteTokenTriplet(tokenF, lidF, src) }, + ); err != nil { + return nil, err + } + + if err := createAndWrite( + info.Path+consts.InfoTmpFileSuffix, + info.Path+consts.InfoFileSuffix, + func(f *os.File) error { return sealer.WriteInfoFile(f, src) }, ); err != nil { return nil, err } @@ -155,9 +162,10 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { } totalSize += uint64(st.Size()) } - info.IndexOnDisk = totalSize + info.IndexOnDisk = totalSize lidsTable := sealer.LIDsTable() + preloaded := &sealed.PreloadedData{ Info: info, TokenTable: sealer.TokenTable(), diff --git a/frac/sealed/sealing/writer.go b/frac/sealed/sealing/writer.go new file mode 100644 index 00000000..9c003fb9 --- /dev/null +++ b/frac/sealed/sealing/writer.go @@ -0,0 +1,74 @@ +package sealing + +import ( + "bytes" + "encoding/binary" + "io" + + "github.com/alecthomas/units" + "github.com/ozontech/seq-db/bytespool" +) + +const filePrefixSize = 16 + +// fileStreamWriter writes blocks incrementally to a single file using the +// [prefix][blocks][registry] format, allowing interleaved writes to multiple files. +type fileStreamWriter struct { + ws io.WriteSeeker + bw *bytespool.Writer + hw bytes.Buffer + pos int +} + +func newFileStreamWriter(ws io.WriteSeeker) (*fileStreamWriter, error) { + if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { + return nil, err + } + + return &fileStreamWriter{ + ws: ws, + bw: bytespool.AcquireWriterSize(ws, int(units.MiB)), + pos: filePrefixSize, + }, nil +} + +func (fw *fileStreamWriter) writeBlock(block indexBlock) error { + header, payload := block.Bin(int64(fw.pos)) + if _, err := fw.bw.Write(payload); err != nil { + return err + } + fw.hw.Write(header) // bytes.Buffer.Write never fails + fw.pos += len(payload) + return nil +} + +func (fw *fileStreamWriter) finalize() (err error) { + defer fw.release() + if err = fw.bw.Flush(); err != nil { + return + } + var regPos int64 + if regPos, err = fw.ws.Seek(0, io.SeekEnd); err != nil { + return + } + if _, err = fw.bw.Write(fw.hw.Bytes()); err != nil { + return + } + if err = fw.bw.Flush(); err != nil { + return + } + prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(fw.hw.Len())) + if _, err = fw.ws.Seek(0, io.SeekStart); err != nil { + return + } + _, err = fw.ws.Write(prefix) + return +} + +func (fw *fileStreamWriter) release() { + if fw.bw != nil { + bytespool.ReleaseWriter(fw.bw) + fw.bw = nil + } +} diff --git a/seq/seq.go b/seq/seq.go index 6a5a0039..64168d16 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -15,9 +15,16 @@ type ID struct { RID RID } -type MID uint64 // nanoseconds part of ID -type RID uint64 // random part of ID -type LID uint32 // local id for a fraction +var ( + SystemID = ID{math.MaxUint64, math.MaxUint64} + SystemDocPos = DocPos(0) +) + +type ( + MID uint64 // nanoseconds part of ID + RID uint64 // random part of ID + LID uint32 // local id for a fraction +) func (m MID) Time() time.Time { nanosPerSecond := uint64(time.Second) @@ -100,7 +107,6 @@ func FromString(x string) (ID, error) { } rid, err := hex.DecodeString(x[17:]) - if err != nil { return id, err } From 59226f99e0d54d9dcc091da1f6b699061b5d5b68 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 13:08:57 +0300 Subject: [PATCH 04/26] refactor: reuse writer for all blocks --- frac/sealed/sealing/index.go | 158 +++++++++++++--------------------- frac/sealed/sealing/writer.go | 91 +++++++++++--------- 2 files changed, 112 insertions(+), 137 deletions(-) diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 2ac8d885..558cd4a5 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -1,14 +1,8 @@ package sealing import ( - "bytes" - "encoding/binary" "io" - "iter" - "github.com/alecthomas/units" - - "github.com/ozontech/seq-db/bytespool" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed" @@ -69,47 +63,66 @@ func (s *IndexSealer) IDsTable() seqids.Table { // WriteOffsetsFile writes the .offsets file containing a single BlockOffsets block. func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { - return s.write(ws, func(yield func(indexBlock) bool) { - offsets := sealed.BlockOffsets{ - IDsTotal: src.Info().DocsTotal + 1, - Offsets: src.BlockOffsets(), - } - yield(s.packBlocksOffsetsBlock(offsets)) - }) + w, err := newWriter(ws) + if err != nil { + return err + } + defer w.release() + + offsets := sealed.BlockOffsets{ + IDsTotal: src.Info().DocsTotal + 1, + Offsets: src.BlockOffsets(), + } + + if err := w.writeBlock(s.packBlocksOffsetsBlock(offsets)); err != nil { + return err + } + + // Emit trailing separator. + if err := w.writeBlock(indexBlock{}); err != nil { + return err + } + + return w.finalize() } func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { - return s.write(ws, func(yield func(indexBlock) bool) { - for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { - if !yield(s.packMIDsBlock(block)) { - return - } - - if !yield(s.packRIDsBlock(block)) { - return - } - - if !yield(s.packPosBlock(block)) { - return - } + w, err := newWriter(ws) + if err != nil { + return err + } + defer w.release() + + for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { + if err := w.writeBlock(s.packMIDsBlock(block)); err != nil { + return err } - if s.lastErr = src.LastError(); s.lastErr != nil { - return + if err := w.writeBlock(s.packRIDsBlock(block)); err != nil { + return err + } + + if err := w.writeBlock(s.packPosBlock(block)); err != nil { + return err } + } + + // Emit trailing separator. + if err := w.writeBlock(indexBlock{}); err != nil { + return err + } - yield(indexBlock{}) // trailing separator - }) + return w.finalize() } func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Source) error { - tokenFW, err := newFileStreamWriter(tokenWS) + tokenFW, err := newWriter(tokenWS) if err != nil { return err } defer tokenFW.release() - lidFW, err := newFileStreamWriter(lidWS) + lidFW, err := newWriter(lidWS) if err != nil { return err } @@ -171,9 +184,23 @@ func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Sourc } func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { - return s.write(ws, func(yield func(indexBlock) bool) { - yield(s.packInfoBlock(sealed.BlockInfo{Info: src.Info()})) - }) + w, err := newWriter(ws) + if err != nil { + return err + } + defer w.release() + + block := sealed.BlockInfo{Info: src.Info()} + if err := w.writeBlock(s.packInfoBlock(block)); err != nil { + return err + } + + // Emit trailing separator. + if err := w.writeBlock(indexBlock{}); err != nil { + return err + } + + return w.finalize() } // collapseOrderedFieldsTables merges FieldTables with the same field name. @@ -198,67 +225,6 @@ func collapseOrderedFieldsTables(src []token.FieldTable) []token.FieldTable { return append(dst, current) } -// write writes blocks to ws using [16-byte prefix][blocks][registry]. -// The prefix is written last (via seek-back) and stores registry position + size. -func (s *IndexSealer) write(ws io.WriteSeeker, blocks iter.Seq[indexBlock]) error { - if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { - return err - } - - hw := bytes.NewBuffer(nil) - bw := bytespool.AcquireWriterSize(ws, int(units.MiB)) - defer bytespool.ReleaseWriter(bw) - - pos := filePrefixSize - for block := range blocks { - if s.lastErr != nil { - return s.lastErr - } - - header, payload := block.Bin(int64(pos)) - if _, err := bw.Write(payload); err != nil { - return err - } - - if _, err := hw.Write(header); err != nil { - return err - } - - pos += len(payload) - } - - if s.lastErr != nil { - return s.lastErr - } - - if err := bw.Flush(); err != nil { - return err - } - - size := hw.Len() - regPos, err := ws.Seek(0, io.SeekEnd) - if err != nil { - return err - } - - if _, err := bw.Write(hw.Bytes()); err != nil { - return err - } - - if err := bw.Flush(); err != nil { - return err - } - - prefix := binary.LittleEndian.AppendUint64(nil, uint64(regPos)) - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(size)) - if _, err := ws.Seek(0, io.SeekStart); err != nil { - return err - } - - _, err = ws.Write(prefix) - return err -} - func newIndexBlock(raw []byte) indexBlock { return indexBlock{codec: storage.CodecNo, rawLen: uint32(len(raw)), payload: raw} } diff --git a/frac/sealed/sealing/writer.go b/frac/sealed/sealing/writer.go index 9c003fb9..e67b8123 100644 --- a/frac/sealed/sealing/writer.go +++ b/frac/sealed/sealing/writer.go @@ -9,66 +9,75 @@ import ( "github.com/ozontech/seq-db/bytespool" ) -const filePrefixSize = 16 - -// fileStreamWriter writes blocks incrementally to a single file using the -// [prefix][blocks][registry] format, allowing interleaved writes to multiple files. -type fileStreamWriter struct { - ws io.WriteSeeker - bw *bytespool.Writer - hw bytes.Buffer +const prefixSize = 16 + +// writer writes blocks incrementally to a single file using the +// [prefix][blocks][registry] format. +type writer struct { + ws io.WriteSeeker + + wpayload *bytespool.Writer + wheader bytes.Buffer + pos int } -func newFileStreamWriter(ws io.WriteSeeker) (*fileStreamWriter, error) { - if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { +func newWriter(ws io.WriteSeeker) (*writer, error) { + if _, err := ws.Seek(prefixSize, io.SeekStart); err != nil { return nil, err } - return &fileStreamWriter{ - ws: ws, - bw: bytespool.AcquireWriterSize(ws, int(units.MiB)), - pos: filePrefixSize, + return &writer{ + ws: ws, + wpayload: bytespool.AcquireWriterSize(ws, int(units.MiB)), + pos: prefixSize, }, nil } -func (fw *fileStreamWriter) writeBlock(block indexBlock) error { - header, payload := block.Bin(int64(fw.pos)) - if _, err := fw.bw.Write(payload); err != nil { +func (w *writer) writeBlock(block indexBlock) error { + header, payload := block.Bin(int64(w.pos)) + + if _, err := w.wpayload.Write(payload); err != nil { return err } - fw.hw.Write(header) // bytes.Buffer.Write never fails - fw.pos += len(payload) + + w.wheader.Write(header) + w.pos += len(payload) + return nil } -func (fw *fileStreamWriter) finalize() (err error) { - defer fw.release() - if err = fw.bw.Flush(); err != nil { - return +func (w *writer) finalize() error { + if err := w.wpayload.Flush(); err != nil { + return err } - var regPos int64 - if regPos, err = fw.ws.Seek(0, io.SeekEnd); err != nil { - return + + regpos, err := w.ws.Seek(0, io.SeekEnd) + if err != nil { + return err } - if _, err = fw.bw.Write(fw.hw.Bytes()); err != nil { - return + + if _, err := w.wpayload.Write(w.wheader.Bytes()); err != nil { + return err } - if err = fw.bw.Flush(); err != nil { - return + + if err := w.wpayload.Flush(); err != nil { + return err } - prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(fw.hw.Len())) - if _, err = fw.ws.Seek(0, io.SeekStart); err != nil { - return + + prefix := make([]byte, 0, prefixSize) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(regpos)) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(w.wheader.Len())) + + if _, err := w.ws.Seek(0, io.SeekStart); err != nil { + return err } - _, err = fw.ws.Write(prefix) - return + + _, err = w.ws.Write(prefix) + return err } -func (fw *fileStreamWriter) release() { - if fw.bw != nil { - bytespool.ReleaseWriter(fw.bw) - fw.bw = nil - } +func (w *writer) release() { + bytespool.ReleaseWriter(w.wpayload) + w.wpayload = nil } From 71e639e2c7372341354d1bc436e8c3940e624fad Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 13:29:47 +0300 Subject: [PATCH 05/26] refactor: split token triple writing --- frac/sealed/sealing/index.go | 38 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 558cd4a5..a983d9de 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -134,18 +134,14 @@ func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Sourc lidAccum = newLIDBlocksAccumulator(consts.LIDBlockCap) ) + // NOTE(dkharms): This is so ugly but I cannot come up with other solution here. accumulate := func(lids []uint32) error { return lidAccum.Add(lids, func(block lidsSealBlock) error { return lidFW.writeBlock(s.packLIDsBlock(block)) }) } - blocks := bb.BuildTokenBlocks( - src.TokenTriplet(), - accumulate, consts.RegularBlockSize, - ) - - for block, fieldsTables := range blocks { + for block, fieldsTables := range bb.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { if err := tokenFW.writeBlock(s.packTokenBlock(block)); err != nil { return err } @@ -156,31 +152,43 @@ func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Sourc return s.lastErr } - // Write the final (possibly partial) LID block and trailing separator. - if err := lidFW.writeBlock(s.packLIDsBlock(lidAccum.Flush())); err != nil { + if err := s.finalizeLIDFile(lidFW, lidAccum); err != nil { return err } - if err := lidFW.writeBlock(indexBlock{}); err != nil { // trailing separator + return s.finalizeTokenFile(tokenFW, allFieldsTables) +} + +func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccum *lidBlocksAcc) error { + if err := w.writeBlock(s.packLIDsBlock(lidAccum.Flush())); err != nil { return err } - if err := lidFW.finalize(); err != nil { + // Emit trailing separator. + if err := w.writeBlock(indexBlock{}); err != nil { return err } - // Write token section separator, token table, trailing separator. - if err := tokenFW.writeBlock(indexBlock{}); err != nil { // section separator + return w.finalize() +} + +func (s *IndexSealer) finalizeTokenFile(w *writer, allFieldsTables []token.FieldTable) error { + // Emit section separator. + if err := w.writeBlock(indexBlock{}); err != nil { return err } + tokenTableBlock := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} - if err := tokenFW.writeBlock(s.packTokenTableBlock(tokenTableBlock)); err != nil { + if err := w.writeBlock(s.packTokenTableBlock(tokenTableBlock)); err != nil { return err } - if err := tokenFW.writeBlock(indexBlock{}); err != nil { // trailing separator + + // Emit trailing separator. + if err := w.writeBlock(indexBlock{}); err != nil { return err } - return tokenFW.finalize() + + return w.finalize() } func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { From d90db8c52cd7edfadca05b25dd3338d9b5e55b30 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 13:54:43 +0300 Subject: [PATCH 06/26] refactor: one more refactoring --- frac/remote.go | 5 ++-- frac/sealed.go | 5 ++-- frac/sealed/sealing/sealer.go | 54 ++++++++++++----------------------- frac/sealed_loader.go | 26 ++++++++++++----- 4 files changed, 42 insertions(+), 48 deletions(-) diff --git a/frac/remote.go b/frac/remote.go index 7da03205..dc4e7118 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -23,9 +23,7 @@ import ( "github.com/ozontech/seq-db/util" ) -var ( - _ Fraction = (*Remote)(nil) -) +var _ Fraction = (*Remote)(nil) // Remote fraction is a fraction that is backed by remote storage. // @@ -255,6 +253,7 @@ func (f *Remote) load() error { ID: f.idReader, LID: f.lidReader, } + (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true diff --git a/frac/sealed.go b/frac/sealed.go index 7c419120..1c152735 100644 --- a/frac/sealed.go +++ b/frac/sealed.go @@ -24,9 +24,7 @@ import ( "github.com/ozontech/seq-db/util" ) -var ( - _ Fraction = (*Sealed)(nil) -) +var _ Fraction = (*Sealed)(nil) type Sealed struct { Config *Config @@ -247,6 +245,7 @@ func (f *Sealed) load() { ID: f.idReader, LID: f.lidReader, } + (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true } diff --git a/frac/sealed/sealing/sealer.go b/frac/sealed/sealing/sealer.go index ab97091f..888f7973 100644 --- a/frac/sealed/sealing/sealer.go +++ b/frac/sealed/sealing/sealer.go @@ -37,68 +37,52 @@ type Source interface { LastError() error } -func createAndWrite( - tmpPath, finalPath string, - write func(*os.File) error, -) error { - f, err := os.Create(tmpPath) - if err != nil { - return err - } - - if err := write(f); err != nil { +func syncAndClose(f *os.File) error { + if err := f.Sync(); err != nil { f.Close() return err } + return f.Close() +} - if err := f.Sync(); err != nil { - f.Close() +func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { + f, err := os.Create(tmpPath) + if err != nil { return err } - if err := f.Close(); err != nil { + if err := errors.Join(write(f), syncAndClose(f)); err != nil { return err } return os.Rename(tmpPath, finalPath) } -// createAndWriteBoth creates two tmp files, calls write with both, syncs and closes them, -// then renames both to their final paths. -func createAndWriteBoth(tmpPath1, finalPath1, tmpPath2, finalPath2 string, write func(*os.File, *os.File) error) error { +func createAndWriteBoth( + tmpPath1, finalPath1, + tmpPath2, finalPath2 string, + write func(*os.File, *os.File) error, +) error { f1, err := os.Create(tmpPath1) if err != nil { return err } + f2, err := os.Create(tmpPath2) if err != nil { f1.Close() return err } - if err := write(f1, f2); err != nil { - f1.Close() - f2.Close() - return err - } - if err := f1.Sync(); err != nil { - f1.Close() - f2.Close() - return err - } - if err := f1.Close(); err != nil { - f2.Close() - return err - } - if err := f2.Sync(); err != nil { - f2.Close() - return err - } - if err := f2.Close(); err != nil { + + writeErr := write(f1, f2) + if err := errors.Join(writeErr, syncAndClose(f1), syncAndClose(f2)); err != nil { return err } + if err := os.Rename(tmpPath1, finalPath1); err != nil { return err } + return os.Rename(tmpPath2, finalPath2) } diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index c20272ee..6f74f155 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -36,15 +36,17 @@ type Loader struct { func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, readers IndexReaders) { t := time.Now() - var err error + var ( + err error + blockOffsets sealed.BlockOffsets + ) - var blockOffsets sealed.BlockOffsets blockOffsets, err = l.loadBlocksOffsets(readers.Offsets) if err != nil { logger.Fatal("load offsets error", zap.Error(err)) } - blocksData.BlocksOffsets = blockOffsets.Offsets + blocksData.BlocksOffsets = blockOffsets.Offsets blocksData.IDsTable = l.loadIDsTable(readers.ID, blockOffsets.IDsTotal, info.BinaryDataVer) blocksData.LIDsTable, err = l.loadLIDsTable(readers.LID) @@ -71,13 +73,16 @@ func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, readers func (l *Loader) loadBlocksOffsets(r storage.IndexReader) (sealed.BlockOffsets, error) { data, _, err := r.ReadIndexBlock(0, l.buf) l.buf = data + if err != nil { return sealed.BlockOffsets{}, err } - b := sealed.BlockOffsets{} + + var b sealed.BlockOffsets if err := b.Unpack(data); err != nil { return sealed.BlockOffsets{}, err } + return b, nil } @@ -104,12 +109,13 @@ func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersio } else { mid = seq.MID(header.GetExt1()) } + table.MinBlockIDs = append(table.MinBlockIDs, seq.ID{ MID: mid, RID: seq.RID(header.GetExt2()), }) - table.IDBlocksTotal++ + table.IDBlocksTotal++ blockIdx += 3 // skip RIDs and Pos blocks } @@ -118,20 +124,26 @@ func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersio // loadLIDsTable scans block headers in the .lid file to build lids.Table. func (l *Loader) loadLIDsTable(r storage.IndexReader) (*lids.Table, error) { - var maxTIDs, minTIDs []uint32 - var isContinued []bool + var ( + maxTIDs []uint32 + minTIDs []uint32 + isContinued []bool + ) for blockIdx := uint32(0); ; blockIdx++ { header, err := r.GetBlockHeader(blockIdx) if err != nil { return nil, err } + if header.Len() == 0 { break } + ext2 := header.GetExt2() maxTIDs = append(maxTIDs, uint32(ext2>>32)) minTIDs = append(minTIDs, uint32(ext2&0xFFFFFFFF)) + isContinued = append(isContinued, header.GetExt1() == 1) } From 352b2a7e163ba982b7daa345c928f5f1f06e0d18 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 14:36:09 +0300 Subject: [PATCH 07/26] chore: add backwards compatibility --- consts/consts.go | 3 + frac/fraction_test.go | 42 ++++++---- frac/sealed.go | 110 +++++++++++++++++++------ frac/sealed_loader.go | 137 +++++++++++++++++++++++++++++++ fracmanager/frac_manifest.go | 7 +- fracmanager/fraction_provider.go | 3 +- fracmanager/loader.go | 8 +- 7 files changed, 260 insertions(+), 50 deletions(-) diff --git a/consts/consts.go b/consts/consts.go index 40abbdab..421f44c5 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -79,6 +79,9 @@ const ( LIDTmpFileSuffix = "._lids" LIDDelFileSuffix = ".lids.del" + // IndexFileSuffix is the legacy single-file index format (pre-split). + IndexFileSuffix = ".index" + RemoteFractionSuffix = ".remote" FracCacheFileSuffix = ".frac-cache" diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 7326ce54..9d4f7422 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -2094,12 +2094,12 @@ func (s *FractionTestSuite) newSealed(bulks ...[]string) *Sealed { s.Require().NoError(err, "Sealing failed") indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), InfoRegistry: cache.NewCache[[]byte](nil, nil), TokenRegistry: cache.NewCache[[]byte](nil, nil), OffsetsRegistry: cache.NewCache[[]byte](nil, nil), @@ -2116,6 +2116,7 @@ func (s *FractionTestSuite) newSealed(bulks ...[]string) *Sealed { s.config, testSkipMaskProvider{}, ) + active.Release() return sealed } @@ -2289,12 +2290,12 @@ func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Seal sealed.Release() indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), InfoRegistry: cache.NewCache[[]byte](nil, nil), TokenRegistry: cache.NewCache[[]byte](nil, nil), OffsetsRegistry: cache.NewCache[[]byte](nil, nil), @@ -2311,6 +2312,11 @@ func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Seal s.config, testSkipMaskProvider{}, ) + s.config) + s.config, + false, + ) + s.fraction = sealed return sealed } @@ -2361,12 +2367,12 @@ func (s *RemoteFractionTestSuite) SetupTest() { s.Require().True(offloaded, "didn't offload frac") indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), InfoRegistry: cache.NewCache[[]byte](nil, nil), TokenRegistry: cache.NewCache[[]byte](nil, nil), OffsetsRegistry: cache.NewCache[[]byte](nil, nil), diff --git a/frac/sealed.go b/frac/sealed.go index 1c152735..b1ec2eb0 100644 --- a/frac/sealed.go +++ b/frac/sealed.go @@ -37,7 +37,12 @@ type Sealed struct { docsCache *cache.Cache[[]byte] docsReader storage.DocsReader - // Per-section index files and their readers. + // isLegacy is true for fractions that use the old single .index file format. + isLegacy bool + legacyFile *os.File + legacyReader storage.IndexReader + + // Per-section index files and their readers (new split format only). infoFile *os.File tokenFile *os.File offsetsFile *os.File @@ -80,6 +85,7 @@ func NewSealed( info *common.Info, config *Config, skipMaskProvider skipMaskProvider, + isLegacy bool, ) *Sealed { f := &Sealed{ loadMu: &sync.RWMutex{}, @@ -88,6 +94,7 @@ func NewSealed( docsCache: docsCache, indexCache: indexCache, + isLegacy: isLegacy, info: info, BaseFileName: baseFile, Config: config, @@ -104,12 +111,26 @@ func NewSealed( f.openInfoFile() f.info = loadHeader(f.infoReader) - f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName) + f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName, f.isLegacy) return f } func (f *Sealed) openInfoFile() { + if f.isLegacy { + if f.legacyFile == nil { + name := f.BaseFileName + consts.IndexFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal("can't open legacy index file", zap.String("file", name), zap.Error(err)) + } + f.legacyFile = file + f.legacyReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) + } + f.infoReader = f.legacyReader // loadHeader uses infoReader + return + } + if f.infoFile == nil { name := f.BaseFileName + consts.InfoFileSuffix file, err := os.Open(name) @@ -122,6 +143,11 @@ func (f *Sealed) openInfoFile() { } func (f *Sealed) openIndexFiles() { + if f.isLegacy { + f.openInfoFile() // opens legacyFile if not already open + return + } + f.openInfoFile() if f.tokenFile == nil { @@ -238,15 +264,19 @@ func (f *Sealed) load() { f.openDocs() f.openIndexFiles() - readers := IndexReaders{ - Info: f.infoReader, - Token: f.tokenReader, - Offsets: f.offsetsReader, - ID: f.idReader, - LID: f.lidReader, + if f.isLegacy { + (&LegacyLoader{}).Load(&f.blocksData, f.info, f.legacyReader) + } else { + readers := IndexReaders{ + Info: f.infoReader, + Token: f.tokenReader, + Offsets: f.offsetsReader, + ID: f.idReader, + LID: f.lidReader, + } + (&Loader{}).Load(&f.blocksData, f.info, readers) } - (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true } } @@ -260,11 +290,15 @@ func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) g, gctx := errgroup.WithContext(ctx) g.Go(func() error { return u.Upload(gctx, f.docsFile) }) - g.Go(func() error { return u.Upload(gctx, f.infoFile) }) - g.Go(func() error { return u.Upload(gctx, f.tokenFile) }) - g.Go(func() error { return u.Upload(gctx, f.offsetsFile) }) - g.Go(func() error { return u.Upload(gctx, f.idFile) }) - g.Go(func() error { return u.Upload(gctx, f.lidFile) }) + if f.isLegacy { + g.Go(func() error { return u.Upload(gctx, f.legacyFile) }) + } else { + g.Go(func() error { return u.Upload(gctx, f.infoFile) }) + g.Go(func() error { return u.Upload(gctx, f.tokenFile) }) + g.Go(func() error { return u.Upload(gctx, f.offsetsFile) }) + g.Go(func() error { return u.Upload(gctx, f.idFile) }) + g.Go(func() error { return u.Upload(gctx, f.lidFile) }) + } if err := g.Wait(); err != nil { return true, err @@ -282,7 +316,11 @@ func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) } func (f *Sealed) Release() { - for _, file := range []*os.File{f.docsFile, f.infoFile, f.tokenFile, f.offsetsFile, f.idFile, f.lidFile} { + indexFiles := []*os.File{f.infoFile, f.tokenFile, f.offsetsFile, f.idFile, f.lidFile} + if f.isLegacy { + indexFiles = []*os.File{f.legacyFile} + } + for _, file := range append([]*os.File{f.docsFile}, indexFiles...) { if file != nil { if err := file.Close(); err != nil { logger.Error("can't close file", zap.String("file", file.Name()), zap.Error(err)) @@ -315,13 +353,17 @@ func (f *Sealed) Suicide() { } // Delete all index files directly (they are regenerable; no atomic rename needed). - for _, suffix := range []string{ + indexSuffixes := []string{ consts.InfoFileSuffix, consts.TokenFileSuffix, consts.OffsetsFileSuffix, consts.IDFileSuffix, consts.LIDFileSuffix, - } { + } + if f.isLegacy { + indexSuffixes = []string{consts.IndexFileSuffix} + } + for _, suffix := range indexSuffixes { if err := os.Remove(f.BaseFileName + suffix); err != nil && !errors.Is(err, os.ErrNotExist) { logger.Error("can't remove index file", zap.String("file", f.BaseFileName+suffix), zap.Error(err)) } @@ -367,6 +409,17 @@ func (f *Sealed) FindLIDs(ctx context.Context, ids []seq.ID) ([]seq.LID, error) func (f *Sealed) createDataProvider(ctx context.Context) *sealedDataProvider { f.load() + + tokenReader := &f.tokenReader + lidReader := &f.lidReader + idReader := &f.idReader + + if f.isLegacy { + tokenReader = &f.legacyReader + lidReader = &f.legacyReader + idReader = &f.legacyReader + } + return &sealedDataProvider{ ctx: ctx, fractionTypeLabel: "sealed", @@ -376,13 +429,13 @@ func (f *Sealed) createDataProvider(ctx context.Context) *sealedDataProvider { docsReader: &f.docsReader, blocksOffsets: f.blocksData.BlocksOffsets, lidsTable: f.blocksData.LIDsTable, - lidsLoader: lids.NewLoader(&f.lidReader, f.indexCache.LIDs), - tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.tokenReader, f.indexCache.Tokens), - tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.tokenReader, f.indexCache.TokenTable), + lidsLoader: lids.NewLoader(lidReader, f.indexCache.LIDs), + tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, tokenReader, f.indexCache.Tokens), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, tokenReader, f.indexCache.TokenTable), idsTable: &f.blocksData.IDsTable, idsProvider: seqids.NewProvider( - &f.idReader, + idReader, f.indexCache.MIDs, f.indexCache.RIDs, f.indexCache.Params, @@ -419,16 +472,21 @@ func loadHeader(infoReader storage.IndexReader) *common.Info { return bi.Info } -// computeIndexOnDisk returns the total on-disk size of all 5 index files for a local fraction. -func computeIndexOnDisk(basePath string) uint64 { - var total int64 - for _, suffix := range []string{ +// computeIndexOnDisk returns the total on-disk size of index files for a local fraction. +func computeIndexOnDisk(basePath string, isLegacy bool) uint64 { + suffixes := []string{ consts.InfoFileSuffix, consts.TokenFileSuffix, consts.OffsetsFileSuffix, consts.IDFileSuffix, consts.LIDFileSuffix, - } { + } + if isLegacy { + suffixes = []string{consts.IndexFileSuffix} + } + + var total int64 + for _, suffix := range suffixes { st, err := os.Stat(basePath + suffix) if err != nil { logger.Fatal("can't stat index file", zap.String("file", basePath+suffix), zap.Error(err)) diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index 6f74f155..588c5fee 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -16,6 +16,143 @@ import ( "github.com/ozontech/seq-db/util" ) +// LegacyLoader reads the old single .index file format by scanning blocks sequentially. +// Block indices stored in lids.Table and seqids.Table are absolute within the .index file, +// so the same IndexReader can be passed to all sub-loaders unchanged. +type LegacyLoader struct { + reader storage.IndexReader + blockIndex uint32 +} + +// Load populates blocksData from a single legacy .index file. +// It starts at block 1 (block 0 is the Info block, already read by loadHeader). +func (l *LegacyLoader) Load(blocksData *sealed.BlocksData, info *common.Info, reader storage.IndexReader) { + t := time.Now() + + l.reader = reader + l.blockIndex = 1 // skip Info block at index 0 + + l.skipSection() // skip token blocks + l.skipSection() // skip token table blocks + + var err error + blocksData.IDsTable, blocksData.BlocksOffsets, err = l.loadIDs(info.BinaryDataVer) + if err != nil { + logger.Fatal("legacy load ids error", zap.Error(err)) + } + + blocksData.LIDsTable, err = l.loadLIDs() + if err != nil { + logger.Fatal("legacy load lids error", zap.Error(err)) + } + + took := time.Since(t) + docsTotalK := float64(info.DocsTotal) / 1000 + indexOnDiskMb := util.SizeToUnit(info.IndexOnDisk, "mb") + throughput := indexOnDiskMb / util.DurationToUnit(took, "s") + logger.Info("sealed fraction loaded (legacy format)", + zap.String("fraction", info.Path), + util.ZapMsTsAsESTimeStr("creation_time", info.CreationTime), + zap.String("from", info.From.String()), + zap.String("to", info.To.String()), + util.ZapFloat64WithPrec("docs_k", docsTotalK, 1), + util.ZapDurationWithPrec("took_ms", took, "ms", 1), + util.ZapFloat64WithPrec("throughput_mb_sec", throughput, 1), + ) +} + +// skipSection advances past one separator-delimited section (reads headers until Len() == 0). +func (l *LegacyLoader) skipSection() { + for { + h, err := l.reader.GetBlockHeader(l.blockIndex) + if err != nil { + logger.Panic("error reading block header", zap.Error(err)) + } + + l.blockIndex++ + if h.Len() == 0 { + return + } + } +} + +// loadIDs reads the BlockOffsets block and then scans MID/RID/Pos triplets. +func (l *LegacyLoader) loadIDs(fracVersion config.BinaryDataVersion) (seqids.Table, []uint64, error) { + var buf []byte + data, _, err := l.reader.ReadIndexBlock(l.blockIndex, buf) + l.blockIndex++ + if err != nil { + return seqids.Table{}, nil, err + } + + var offsets sealed.BlockOffsets + if err := offsets.Unpack(data); err != nil { + return seqids.Table{}, nil, err + } + + table := seqids.Table{ + StartBlockIndex: l.blockIndex, // absolute index of first MID block in .index + IDsTotal: offsets.IDsTotal, + IDBlocksTotal: uint32(len(offsets.Offsets)), + } + + for { + h, err := l.reader.GetBlockHeader(l.blockIndex) + if err != nil { + logger.Fatal("error reading id block header", zap.Error(err)) + } + + l.blockIndex++ + if h.Len() == 0 { + break + } + + mid := seq.MID(h.GetExt1()) + if fracVersion < config.BinaryDataV2 { + mid = seq.MillisToMID(h.GetExt1()) + } + + table.MinBlockIDs = append(table.MinBlockIDs, seq.ID{ + MID: mid, + RID: seq.RID(h.GetExt2()), + }) + + l.blockIndex += 2 // skip RIDs and Pos blocks + } + + return table, offsets.Offsets, nil +} + +// loadLIDs scans LID block headers, recording the absolute start index for lids.Table. +func (l *LegacyLoader) loadLIDs() (*lids.Table, error) { + startIndex := l.blockIndex // absolute index of first LID block in .index + + var ( + maxTIDs []uint32 + minTIDs []uint32 + isContinued []bool + ) + + for { + h, err := l.reader.GetBlockHeader(l.blockIndex) + if err != nil { + return nil, err + } + + l.blockIndex++ + if h.Len() == 0 { + break + } + + maxTIDs = append(maxTIDs, uint32(h.GetExt2()>>32)) + minTIDs = append(minTIDs, uint32(h.GetExt2()&0xFFFFFFFF)) + + isContinued = append(isContinued, h.GetExt1() == 1) + } + + return lids.NewTable(startIndex, minTIDs, maxTIDs, isContinued), nil +} + // IndexReaders holds one IndexReader per split index file. type IndexReaders struct { Info storage.IndexReader diff --git a/fracmanager/frac_manifest.go b/fracmanager/frac_manifest.go index 9fc15fe9..5b07b8dc 100644 --- a/fracmanager/frac_manifest.go +++ b/fracmanager/frac_manifest.go @@ -62,6 +62,8 @@ func (m *fracManifest) AddExtension(ext string) error { m.hasWal = true case consts.SdocsFileSuffix: m.hasSdocs = true + case consts.IndexFileSuffix: + m.hasIndex = true case consts.RemoteFractionSuffix: m.hasRemote = true @@ -118,7 +120,7 @@ func (m *fracManifest) Stage() fracStage { if m.hasRemote { return fracStageRemote } - if m.hasAllIndexFiles() && (m.hasSdocs || m.hasDocs) { + if (m.hasAllIndexFiles() || m.hasIndex) && (m.hasSdocs || m.hasDocs) { return fracStageSealed } if (m.hasMeta || m.hasWal) && m.hasDocs { @@ -162,6 +164,7 @@ func removeIndexFiles(m *fracManifest) { consts.OffsetsFileSuffix, consts.IDFileSuffix, consts.LIDFileSuffix, + consts.IndexFileSuffix, } { util.RemoveFile(m.basePath + suffix) } @@ -170,6 +173,7 @@ func removeIndexFiles(m *fracManifest) { m.hasOffsets = false m.hasID = false m.hasLID = false + m.hasIndex = false } func removeSdocsDel(m *fracManifest) { @@ -313,6 +317,7 @@ func removeAllFiles(basePath string) { consts.OffsetsFileSuffix, consts.IDFileSuffix, consts.LIDFileSuffix, + consts.IndexFileSuffix, consts.DocsFileSuffix, consts.SdocsFileSuffix, consts.MetaFileSuffix, diff --git a/fracmanager/fraction_provider.go b/fracmanager/fraction_provider.go index 73deb907..1e9eafec 100644 --- a/fracmanager/fraction_provider.go +++ b/fracmanager/fraction_provider.go @@ -66,7 +66,7 @@ func (fp *fractionProvider) NewActive(name string) *frac.Active { ) } -func (fp *fractionProvider) NewSealed(name string, cachedInfo *common.Info) *frac.Sealed { +func (fp *fractionProvider) NewSealed(name string, cachedInfo *common.Info, isLegacy bool) *frac.Sealed { return frac.NewSealed( name, fp.readLimiter, @@ -75,6 +75,7 @@ func (fp *fractionProvider) NewSealed(name string, cachedInfo *common.Info) *fra cachedInfo, // Preloaded meta information &fp.config.Fraction, fp.skipMaskProvider, + isLegacy, ) } diff --git a/fracmanager/loader.go b/fracmanager/loader.go index 6eb788ee..143b6d64 100644 --- a/fracmanager/loader.go +++ b/fracmanager/loader.go @@ -136,7 +136,7 @@ func (l *Loader) discover(ctx context.Context) ([]*frac.Active, []*frac.Sealed, case fracStageActive: actives = append(actives, l.provider.NewActive(manifest.basePath)) case fracStageSealed: - locals = append(locals, l.loadSealed(manifest.basePath, loadedInfoCache)) + locals = append(locals, l.loadSealed(manifest, loadedInfoCache)) case fracStageRemote: remotes = append(remotes, l.loadRemote(ctx, manifest.basePath, loadedInfoCache)) default: @@ -153,11 +153,11 @@ func (l *Loader) discover(ctx context.Context) ([]*frac.Active, []*frac.Sealed, } // loadSealed loads a sealed fraction using cache -func (l *Loader) loadSealed(basePath string, loadedInfoCache *fracInfoCache) *frac.Sealed { - info, found := loadedInfoCache.Get(filepath.Base(basePath)) +func (l *Loader) loadSealed(manifest *fracManifest, loadedInfoCache *fracInfoCache) *frac.Sealed { + info, found := loadedInfoCache.Get(filepath.Base(manifest.basePath)) l.updateStats(found) - f := l.provider.NewSealed(basePath, info) + f := l.provider.NewSealed(manifest.basePath, info, manifest.hasIndex) l.infoCache.Add(f.Info()) return f } From c6d75b5b993b8078b423be99bd9b0c69d471d3db Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 16:32:22 +0300 Subject: [PATCH 08/26] perf: sort lids on creation --- frac/active_sealing_source.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index b90c0297..e9d814ca 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -77,6 +77,7 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe } src.prepareInfo() + src.prepareLids() // Sort documents if not skipped in configuration if !active.Config.SkipSortDocs { @@ -163,6 +164,12 @@ func (src *ActiveSealingSource) prepareInfo() { src.info.BuildDistribution(mids) } +func (src *ActiveSealingSource) prepareLids() { + for _, tl := range src.lids[1:] { + tl.GetLIDs(src.mids, src.rids) + } +} + func (src *ActiveSealingSource) Info() *common.Info { return src.info } From 2fd86dfb65bf23c871075e2293be7e252330738b Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 20:24:00 +0300 Subject: [PATCH 09/26] chore: add statistics reporting on sealing --- frac/sealed/sealing/blocks_builder.go | 4 +- frac/sealed/sealing/blocks_builder_test.go | 12 ++--- frac/sealed/sealing/index.go | 48 ++++++++--------- frac/sealed/sealing/stats.go | 42 --------------- frac/sealed/sealing/writer.go | 62 ++++++++++++++++++++-- indexer/processor.go | 1 - 6 files changed, 89 insertions(+), 80 deletions(-) delete mode 100644 frac/sealed/sealing/stats.go diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index ea506402..a4c7b074 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -230,10 +230,10 @@ func newLIDBlocksAccumulator(blockCap int) *lidBlocksAcc { // For each block that fills up, `onBlock` is called immediately // before the backing arrays are reset, so `onBlock` may read the // block data but must not retain references to it. -func (a *lidBlocksAcc) Add(lids []uint32, onBlock func(lidsSealBlock) error) error { +func (a *lidBlocksAcc) Add(lidsbuf []uint32, onBlock func(lidsSealBlock) error) error { a.currentTID++ - for _, lid := range lids { + for _, lid := range lidsbuf { if len(a.currentBlock.payload.LIDs) == a.blockCap { if err := onBlock(a.finalizeBlock()); err != nil { return err diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index 95ae545d..e2d3770e 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -35,11 +35,11 @@ func (m *mockSource) Iterator() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { tokenStart, tokenEnd := start, end if !yield(field, func(yield func([]byte, []uint32) bool) { for j := tokenStart; j < tokenEnd; j++ { - var lids []uint32 + var lidsbuf []uint32 if j < len(m.tokenLIDs) { - lids = m.tokenLIDs[j] + lidsbuf = m.tokenLIDs[j] } - if !yield(m.tokens[j], lids) { + if !yield(m.tokens[j], lidsbuf) { return } } @@ -64,11 +64,11 @@ func (m *mockSource) ID() iter.Seq2[seq.ID, seq.DocPos] { func (m *mockSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { return func(yield func([]byte, []uint32) bool) { for i, token := range m.tokens { - var lids []uint32 + var lidsbuf []uint32 if i < len(m.tokenLIDs) { - lids = m.tokenLIDs[i] + lidsbuf = m.tokenLIDs[i] } - if !yield(token, lids) { + if !yield(token, lidsbuf) { return } } diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index a983d9de..6c6d57eb 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -74,12 +74,12 @@ func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { Offsets: src.BlockOffsets(), } - if err := w.writeBlock(s.packBlocksOffsetsBlock(offsets)); err != nil { + if err := w.writeBlock(btypeOffset, s.packBlocksOffsetsBlock(offsets)); err != nil { return err } // Emit trailing separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { return err } @@ -94,55 +94,55 @@ func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { defer w.release() for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { - if err := w.writeBlock(s.packMIDsBlock(block)); err != nil { + if err := w.writeBlock(btypeMid, s.packMIDsBlock(block)); err != nil { return err } - if err := w.writeBlock(s.packRIDsBlock(block)); err != nil { + if err := w.writeBlock(btypeRid, s.packRIDsBlock(block)); err != nil { return err } - if err := w.writeBlock(s.packPosBlock(block)); err != nil { + if err := w.writeBlock(btypeDocPos, s.packPosBlock(block)); err != nil { return err } } // Emit trailing separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { return err } return w.finalize() } -func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Source) error { - tokenFW, err := newWriter(tokenWS) +func (s *IndexSealer) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) error { + tw, err := newWriter(tws) if err != nil { return err } - defer tokenFW.release() + defer tw.release() - lidFW, err := newWriter(lidWS) + lw, err := newWriter(lws) if err != nil { return err } - defer lidFW.release() + defer lw.release() var ( bb blocksBuilder allFieldsTables []token.FieldTable - lidAccum = newLIDBlocksAccumulator(consts.LIDBlockCap) + lidacc = newLIDBlocksAccumulator(consts.LIDBlockCap) ) // NOTE(dkharms): This is so ugly but I cannot come up with other solution here. accumulate := func(lids []uint32) error { - return lidAccum.Add(lids, func(block lidsSealBlock) error { - return lidFW.writeBlock(s.packLIDsBlock(block)) + return lidacc.Add(lids, func(block lidsSealBlock) error { + return lw.writeBlock(btypeLid, s.packLIDsBlock(block)) }) } for block, fieldsTables := range bb.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { - if err := tokenFW.writeBlock(s.packTokenBlock(block)); err != nil { + if err := tw.writeBlock(btypeToken, s.packTokenBlock(block)); err != nil { return err } allFieldsTables = append(allFieldsTables, fieldsTables...) @@ -152,20 +152,20 @@ func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Sourc return s.lastErr } - if err := s.finalizeLIDFile(lidFW, lidAccum); err != nil { + if err := s.finalizeLIDFile(lw, lidacc); err != nil { return err } - return s.finalizeTokenFile(tokenFW, allFieldsTables) + return s.finalizeTokenFile(tw, allFieldsTables) } func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccum *lidBlocksAcc) error { - if err := w.writeBlock(s.packLIDsBlock(lidAccum.Flush())); err != nil { + if err := w.writeBlock(btypeLid, s.packLIDsBlock(lidAccum.Flush())); err != nil { return err } // Emit trailing separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { return err } @@ -174,17 +174,17 @@ func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccum *lidBlocksAcc) error { func (s *IndexSealer) finalizeTokenFile(w *writer, allFieldsTables []token.FieldTable) error { // Emit section separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeToken, indexBlock{}); err != nil { return err } tokenTableBlock := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} - if err := w.writeBlock(s.packTokenTableBlock(tokenTableBlock)); err != nil { + if err := w.writeBlock(btypeTokenTable, s.packTokenTableBlock(tokenTableBlock)); err != nil { return err } // Emit trailing separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { return err } @@ -199,12 +199,12 @@ func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { defer w.release() block := sealed.BlockInfo{Info: src.Info()} - if err := w.writeBlock(s.packInfoBlock(block)); err != nil { + if err := w.writeBlock(btypeInfo, s.packInfoBlock(block)); err != nil { return err } // Emit trailing separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { return err } diff --git a/frac/sealed/sealing/stats.go b/frac/sealed/sealing/stats.go deleted file mode 100644 index 5b119d60..00000000 --- a/frac/sealed/sealing/stats.go +++ /dev/null @@ -1,42 +0,0 @@ -package sealing - -import ( - "time" - - "go.uber.org/zap" - - "github.com/ozontech/seq-db/logger" - "github.com/ozontech/seq-db/util" -) - -type blocksStats struct { - start time.Time - len int - rawLen int - blocksCount int -} - -func startStats() blocksStats { - return blocksStats{start: time.Now()} -} - -func (s *blocksStats) takeStock(block indexBlock) { - s.blocksCount++ - s.len += len(block.payload) - s.rawLen += int(block.rawLen) -} - -func (s *blocksStats) log(name string, endTime time.Time) { - var ratio float64 - if s.len > 0 { - ratio = float64(s.rawLen) / float64(s.len) - } - logger.Info("seal block stats", - zap.String("type", name), - util.ZapUint64AsSizeStr("raw", uint64(s.rawLen)), - util.ZapUint64AsSizeStr("compressed", uint64(s.len)), - util.ZapFloat64WithPrec("ratio", ratio, 2), - zap.Uint64("blocks_count", uint64(s.blocksCount)), - util.ZapDurationWithPrec("write_duration_ms", endTime.Sub(s.start), "ms", 0), - ) -} diff --git a/frac/sealed/sealing/writer.go b/frac/sealed/sealing/writer.go index e67b8123..5bcfe0f3 100644 --- a/frac/sealed/sealing/writer.go +++ b/frac/sealed/sealing/writer.go @@ -5,12 +5,29 @@ import ( "encoding/binary" "io" + "go.uber.org/zap" + "github.com/alecthomas/units" + "github.com/ozontech/seq-db/bytespool" + "github.com/ozontech/seq-db/logger" + "github.com/ozontech/seq-db/util" ) const prefixSize = 16 +const ( + btypeInfo = "info" + btypeOffset = "offset" + btypeToken = "token" + btypeTokenTable = "token-table" + btypeMid = "mid" + btypeRid = "rid" + btypeDocPos = "doc-pos" + btypeLid = "lid" + btypeBlackhole = "blackhole" +) + // writer writes blocks incrementally to a single file using the // [prefix][blocks][registry] format. type writer struct { @@ -19,7 +36,26 @@ type writer struct { wpayload *bytespool.Writer wheader bytes.Buffer - pos int + pos int + stats map[string]blockstat +} + +type blockstat struct { + count int + raw int + compressed int + header int +} + +func (b blockstat) log(btype string) { + logger.Info( + "seal block stats", + zap.String("type", btype), + util.ZapUint64AsSizeStr("raw", uint64(b.raw)), + util.ZapUint64AsSizeStr("compressed", uint64(b.compressed)), + util.ZapUint64AsSizeStr("header", uint64(b.header)), + zap.Uint64("blocks_count", uint64(b.count)), + ) } func newWriter(ws io.WriteSeeker) (*writer, error) { @@ -31,16 +67,25 @@ func newWriter(ws io.WriteSeeker) (*writer, error) { ws: ws, wpayload: bytespool.AcquireWriterSize(ws, int(units.MiB)), pos: prefixSize, + stats: make(map[string]blockstat), }, nil } -func (w *writer) writeBlock(block indexBlock) error { +func (w *writer) writeBlock(btype string, block indexBlock) error { header, payload := block.Bin(int64(w.pos)) - if _, err := w.wpayload.Write(payload); err != nil { return err } + if btype != btypeBlackhole { + w.stats[btype] = blockstat{ + count: w.stats[btype].count + 1, + raw: w.stats[btype].raw + int(block.rawLen), + compressed: w.stats[btype].compressed + len(block.payload), + header: w.stats[btype].header + len(header), + } + } + w.wheader.Write(header) w.pos += len(payload) @@ -73,8 +118,15 @@ func (w *writer) finalize() error { return err } - _, err = w.ws.Write(prefix) - return err + if _, err := w.ws.Write(prefix); err != nil { + return err + } + + for btype, stats := range w.stats { + stats.log(btype) + } + + return nil } func (w *writer) release() { diff --git a/indexer/processor.go b/indexer/processor.go index dbf7c106..9ca83938 100644 --- a/indexer/processor.go +++ b/indexer/processor.go @@ -13,7 +13,6 @@ import ( "go.uber.org/zap" "github.com/ozontech/seq-db/consts" - "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/tokenizer" From 9a3dfd47b58e95baf84d6940e4fd325490320725 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Thu, 2 Apr 2026 10:36:50 +0300 Subject: [PATCH 10/26] refactor: add remote backwards compatibility --- consts/consts.go | 8 +- frac/fraction_test.go | 5 + frac/remote.go | 125 +++++++++++--- frac/sealed.go | 237 ++++++++++++++++++-------- frac/sealed/sealing/blocks_builder.go | 19 +-- frac/sealed/token/table_loader.go | 1 + frac/sealed_loader.go | 5 +- fracmanager/frac_manifest.go | 37 +--- fracmanager/fraction_provider.go | 7 +- fracmanager/loader.go | 23 ++- 10 files changed, 312 insertions(+), 155 deletions(-) diff --git a/consts/consts.go b/consts/consts.go index 421f44c5..fc027f24 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -61,26 +61,22 @@ const ( InfoFileSuffix = ".info" InfoTmpFileSuffix = "._info" - InfoDelFileSuffix = ".info.del" TokenFileSuffix = ".tokens" TokenTmpFileSuffix = "._tokens" - TokenDelFileSuffix = ".tokens.del" OffsetsFileSuffix = ".offsets" OffsetsTmpFileSuffix = "._offsets" - OffsetsDelFileSuffix = ".offsets.del" IDFileSuffix = ".ids" IDTmpFileSuffix = "._ids" - IDDelFileSuffix = ".ids.del" LIDFileSuffix = ".lids" LIDTmpFileSuffix = "._lids" - LIDDelFileSuffix = ".lids.del" // IndexFileSuffix is the legacy single-file index format (pre-split). - IndexFileSuffix = ".index" + IndexFileSuffix = ".index" + IndexTmpFileSuffix = "._index" RemoteFractionSuffix = ".remote" diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 9d4f7422..8113251a 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -2391,6 +2391,11 @@ func (s *RemoteFractionTestSuite) SetupTest() { s3cli, testSkipMaskProvider{}, ) + s3cli) + s3cli, + false, + ) + s.fraction = remoteFrac } } diff --git a/frac/remote.go b/frac/remote.go index dc4e7118..c5afa37b 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -43,7 +43,12 @@ type Remote struct { docsCache *cache.Cache[[]byte] docsReader storage.DocsReader - // Per-section index files and their readers. + // IsLegacy is true for fractions that use the old single .index file format. + IsLegacy bool + legacyFile storage.ImmutableFile + legacyReader storage.IndexReader + + // Per-section index files and their readers (new split format only). infoFile storage.ImmutableFile tokenFile storage.ImmutableFile offsetsFile storage.ImmutableFile @@ -78,6 +83,7 @@ func NewRemote( config *Config, s3cli *s3.Client, skipMaskProvider skipMaskProvider, + isLegacy bool, ) *Remote { f := &Remote{ ctx: ctx, @@ -95,6 +101,9 @@ func NewRemote( s3cli: s3cli, skipMaskProvider: skipMaskProvider, + s3cli: s3cli, + s3cli: s3cli, + IsLegacy: isLegacy, } // Fast path if fraction-info cache exists AND it has valid index size. @@ -109,7 +118,7 @@ func NewRemote( // I wrote a small proposal on how we can reduce impact of such events. // https://github.com/ozontech/seq-db/issues/92 - if err := f.openInfoFile(); err != nil { + if err := f.openInfo(); err != nil { logger.Error( "cannot open info file: any subsequent operation will fail", zap.String("fraction", filepath.Base(f.BaseFileName)), @@ -117,7 +126,7 @@ func NewRemote( ) } - f.info = loadHeader(f.infoReader) + f.info = loadInfo(f.infoReader) return f } @@ -164,6 +173,17 @@ func (f *Remote) createDataProvider(ctx context.Context) (*sealedDataProvider, e ) return nil, err } + + tokenReader := &f.tokenReader + lidReader := &f.lidReader + idReader := &f.idReader + + if f.IsLegacy { + tokenReader = &f.legacyReader + lidReader = &f.legacyReader + idReader = &f.legacyReader + } + return &sealedDataProvider{ ctx: ctx, fractionTypeLabel: "remote", @@ -173,13 +193,13 @@ func (f *Remote) createDataProvider(ctx context.Context) (*sealedDataProvider, e docsReader: &f.docsReader, blocksOffsets: f.blocksData.BlocksOffsets, lidsTable: f.blocksData.LIDsTable, - lidsLoader: lids.NewLoader(&f.lidReader, f.indexCache.LIDs), - tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.tokenReader, f.indexCache.Tokens), - tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.tokenReader, f.indexCache.TokenTable), + lidsLoader: lids.NewLoader(lidReader, f.indexCache.LIDs), + tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, tokenReader, f.indexCache.Tokens), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, tokenReader, f.indexCache.TokenTable), idsTable: &f.blocksData.IDsTable, idsProvider: seqids.NewProvider( - &f.idReader, + idReader, f.indexCache.MIDs, f.indexCache.RIDs, f.indexCache.Params, @@ -207,6 +227,9 @@ func (f *Remote) Suicide() { files := []string{ filepath.Base(f.BaseFileName) + consts.DocsFileSuffix, filepath.Base(f.BaseFileName) + consts.SdocsFileSuffix, + // Legacy single-file format. + filepath.Base(f.BaseFileName) + consts.IndexFileSuffix, + // New split format. filepath.Base(f.BaseFileName) + consts.InfoFileSuffix, filepath.Base(f.BaseFileName) + consts.TokenFileSuffix, filepath.Base(f.BaseFileName) + consts.OffsetsFileSuffix, @@ -242,73 +265,117 @@ func (f *Remote) load() error { return err } - if err := f.openIndexFiles(); err != nil { + if err := f.openIndex(); err != nil { return err } - readers := IndexReaders{ + if f.IsLegacy { + (&LegacyLoader{}).Load(&f.blocksData, f.info, f.legacyReader) + f.isLoaded = true + return nil + } + + (&Loader{}).Load(&f.blocksData, f.info, IndexReaders{ Info: f.infoReader, Token: f.tokenReader, Offsets: f.offsetsReader, ID: f.idReader, LID: f.lidReader, - } + }) - (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true - return nil } -func (f *Remote) openInfoFile() error { +func (f *Remote) openInfo() error { + if f.IsLegacy { + if f.legacyFile != nil { + return nil + } + + indexName := filepath.Base(f.BaseFileName) + consts.IndexFileSuffix + f.legacyFile = s3.NewReader(f.ctx, f.s3cli, indexName) + + f.legacyReader = storage.NewIndexReader( + f.readLimiter, indexName, + f.legacyFile, f.indexCache.InfoRegistry, + ) + + // infoReader is used by [loadInfo] + f.infoReader = f.legacyReader + return nil + } + if f.infoFile != nil { return nil } - return f.openRemoteFile( - consts.InfoFileSuffix, - func(file storage.ImmutableFile) { - f.infoFile = file - f.infoReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) - }, - ) + + return f.openRemoteFile(consts.InfoFileSuffix, func(file storage.ImmutableFile) { + f.infoFile = file + f.infoReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.InfoRegistry, + ) + }) } -func (f *Remote) openIndexFiles() error { - if err := f.openInfoFile(); err != nil { +func (f *Remote) openIndex() error { + if err := f.openInfo(); err != nil { return err } + + if f.IsLegacy { + return nil + } + if f.tokenFile == nil { if err := f.openRemoteFile(consts.TokenFileSuffix, func(file storage.ImmutableFile) { f.tokenFile = file - f.tokenReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.TokenRegistry) + f.tokenReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.TokenRegistry, + ) }); err != nil { return err } } + if f.offsetsFile == nil { if err := f.openRemoteFile(consts.OffsetsFileSuffix, func(file storage.ImmutableFile) { f.offsetsFile = file - f.offsetsReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.OffsetsRegistry) + f.offsetsReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.OffsetsRegistry, + ) }); err != nil { return err } } + if f.idFile == nil { if err := f.openRemoteFile(consts.IDFileSuffix, func(file storage.ImmutableFile) { f.idFile = file - f.idReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.IDRegistry) + f.idReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.IDRegistry, + ) }); err != nil { return err } } + if f.lidFile == nil { if err := f.openRemoteFile(consts.LIDFileSuffix, func(file storage.ImmutableFile) { f.lidFile = file - f.lidReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.LIDRegistry) + f.lidReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.LIDRegistry, + ) }); err != nil { return err } } + return nil } @@ -317,8 +384,12 @@ func (f *Remote) openRemoteFile(suffix string, assign func(storage.ImmutableFile ok, err := f.s3cli.Exists(f.ctx, name) if err != nil { - return fmt.Errorf("cannot check existence of %q file: %w", suffix, err) + return fmt.Errorf( + "cannot check existence of %q file: %w", + suffix, err, + ) } + if !ok { return fmt.Errorf("missing %q file", suffix) } diff --git a/frac/sealed.go b/frac/sealed.go index b1ec2eb0..c7c92023 100644 --- a/frac/sealed.go +++ b/frac/sealed.go @@ -37,8 +37,8 @@ type Sealed struct { docsCache *cache.Cache[[]byte] docsReader storage.DocsReader - // isLegacy is true for fractions that use the old single .index file format. - isLegacy bool + // IsLegacy is true for fractions that use the old single .index file format. + IsLegacy bool legacyFile *os.File legacyReader storage.IndexReader @@ -94,7 +94,7 @@ func NewSealed( docsCache: docsCache, indexCache: indexCache, - isLegacy: isLegacy, + IsLegacy: isLegacy, info: info, BaseFileName: baseFile, Config: config, @@ -109,46 +109,66 @@ func NewSealed( return f } - f.openInfoFile() - f.info = loadHeader(f.infoReader) - f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName, f.isLegacy) + f.openInfo() + f.info = loadInfo(f.infoReader) + f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName, f.IsLegacy) return f } -func (f *Sealed) openInfoFile() { - if f.isLegacy { - if f.legacyFile == nil { - name := f.BaseFileName + consts.IndexFileSuffix - file, err := os.Open(name) - if err != nil { - logger.Fatal("can't open legacy index file", zap.String("file", name), zap.Error(err)) - } - f.legacyFile = file - f.legacyReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) +func (f *Sealed) openInfo() { + if f.IsLegacy { + if f.legacyFile != nil { + return } - f.infoReader = f.legacyReader // loadHeader uses infoReader - return - } - if f.infoFile == nil { - name := f.BaseFileName + consts.InfoFileSuffix + name := f.BaseFileName + consts.IndexFileSuffix file, err := os.Open(name) if err != nil { - logger.Fatal("can't open info file", zap.String("file", name), zap.Error(err)) + logger.Fatal( + "can't open legacy index file", + zap.String("file", name), + zap.Error(err), + ) } - f.infoFile = file - f.infoReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) + + f.legacyFile = file + f.legacyReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.InfoRegistry, + ) + + // infoReader is used by [loadInfo] + f.infoReader = f.legacyReader + return } -} -func (f *Sealed) openIndexFiles() { - if f.isLegacy { - f.openInfoFile() // opens legacyFile if not already open + if f.infoFile != nil { return } - f.openInfoFile() + name := f.BaseFileName + consts.InfoFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal( + "can't open info file", + zap.String("file", name), + zap.Error(err), + ) + } + + f.infoFile = file + f.infoReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.InfoRegistry, + ) +} + +func (f *Sealed) openIndex() { + f.openInfo() + if f.IsLegacy { + return + } if f.tokenFile == nil { name := f.BaseFileName + consts.TokenFileSuffix @@ -192,20 +212,32 @@ func (f *Sealed) openIndexFiles() { } func (f *Sealed) openDocs() { - if f.docsFile == nil { - var err error - f.docsFile, err = os.Open(f.BaseFileName + consts.SdocsFileSuffix) + if f.docsFile != nil { + return + } + + var err error + f.docsFile, err = os.Open(f.BaseFileName + consts.SdocsFileSuffix) + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + logger.Fatal( + "can't open sdocs file", + zap.String("frac", f.BaseFileName), + zap.Error(err), + ) + } + + f.docsFile, err = os.Open(f.BaseFileName + consts.DocsFileSuffix) if err != nil { - if !errors.Is(err, os.ErrNotExist) { - logger.Fatal("can't open sdocs file", zap.String("frac", f.BaseFileName), zap.Error(err)) - } - f.docsFile, err = os.Open(f.BaseFileName + consts.DocsFileSuffix) - if err != nil { - logger.Fatal("can't open docs file", zap.String("frac", f.BaseFileName), zap.Error(err)) - } + logger.Fatal( + "can't open docs file", + zap.String("frac", f.BaseFileName), + zap.Error(err), + ) } - f.docsReader = storage.NewDocsReader(f.readLimiter, f.docsFile, f.docsCache) } + + f.docsReader = storage.NewDocsReader(f.readLimiter, f.docsFile, f.docsCache) } func NewSealedPreloaded( @@ -240,7 +272,7 @@ func NewSealedPreloaded( }) f.openDocs() - f.openIndexFiles() + f.openIndex() docsCountK := float64(f.info.DocsTotal) / 1000 logger.Info("sealed fraction created from active", @@ -260,37 +292,41 @@ func (f *Sealed) load() { f.loadMu.Lock() defer f.loadMu.Unlock() - if !f.isLoaded { - f.openDocs() - f.openIndexFiles() - - if f.isLegacy { - (&LegacyLoader{}).Load(&f.blocksData, f.info, f.legacyReader) - } else { - readers := IndexReaders{ - Info: f.infoReader, - Token: f.tokenReader, - Offsets: f.offsetsReader, - ID: f.idReader, - LID: f.lidReader, - } - (&Loader{}).Load(&f.blocksData, f.info, readers) - } + if f.isLoaded { + return + } + f.openDocs() + f.openIndex() + + if f.IsLegacy { + (&LegacyLoader{}).Load(&f.blocksData, f.info, f.legacyReader) f.isLoaded = true + return } + + (&Loader{}).Load(&f.blocksData, f.info, IndexReaders{ + Info: f.infoReader, + Token: f.tokenReader, + Offsets: f.offsetsReader, + ID: f.idReader, + LID: f.lidReader, + }) + + f.isLoaded = true } // Offload saves all index files and docs to remote storage. func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) { f.loadMu.Lock() f.openDocs() - f.openIndexFiles() + f.openIndex() f.loadMu.Unlock() g, gctx := errgroup.WithContext(ctx) g.Go(func() error { return u.Upload(gctx, f.docsFile) }) - if f.isLegacy { + + if f.IsLegacy { g.Go(func() error { return u.Upload(gctx, f.legacyFile) }) } else { g.Go(func() error { return u.Upload(gctx, f.infoFile) }) @@ -316,14 +352,30 @@ func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) } func (f *Sealed) Release() { - indexFiles := []*os.File{f.infoFile, f.tokenFile, f.offsetsFile, f.idFile, f.lidFile} - if f.isLegacy { - indexFiles = []*os.File{f.legacyFile} + indexFiles := []*os.File{ + f.docsFile, + f.infoFile, + f.tokenFile, + f.offsetsFile, + f.idFile, + f.lidFile, + } + + if f.IsLegacy { + indexFiles = []*os.File{ + f.docsFile, + f.legacyFile, + } } - for _, file := range append([]*os.File{f.docsFile}, indexFiles...) { + + for _, file := range indexFiles { if file != nil { if err := file.Close(); err != nil { - logger.Error("can't close file", zap.String("file", file.Name()), zap.Error(err)) + logger.Error( + "can't close file", + zap.String("file", file.Name()), + zap.Error(err), + ) } } } @@ -339,13 +391,23 @@ func (f *Sealed) Suicide() { oldPath := f.BaseFileName + consts.DocsFileSuffix newPath := f.BaseFileName + consts.DocsDelFileSuffix if err := os.Rename(oldPath, newPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't rename docs file", zap.String("old", oldPath), zap.String("new", newPath), zap.Error(err)) + logger.Error( + "can't rename docs file", + zap.String("old", oldPath), + zap.String("new", newPath), + zap.Error(err), + ) } oldPath = f.BaseFileName + consts.SdocsFileSuffix newPath = f.BaseFileName + consts.SdocsDelFileSuffix if err := os.Rename(oldPath, newPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't rename sdocs file", zap.String("old", oldPath), zap.String("new", newPath), zap.Error(err)) + logger.Error( + "can't rename sdocs file", + zap.String("old", oldPath), + zap.String("new", newPath), + zap.Error(err), + ) } if f.PartialSuicideMode == HalfRename { @@ -360,17 +422,29 @@ func (f *Sealed) Suicide() { consts.IDFileSuffix, consts.LIDFileSuffix, } - if f.isLegacy { - indexSuffixes = []string{consts.IndexFileSuffix} + + if f.IsLegacy { + indexSuffixes = []string{ + consts.IndexFileSuffix, + } } + for _, suffix := range indexSuffixes { if err := os.Remove(f.BaseFileName + suffix); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't remove index file", zap.String("file", f.BaseFileName+suffix), zap.Error(err)) + logger.Error( + "can't remove index file", + zap.String("file", f.BaseFileName+suffix), + zap.Error(err), + ) } } if err := os.Remove(f.BaseFileName + consts.DocsDelFileSuffix); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't remove docs del file", zap.String("frac", f.BaseFileName), zap.Error(err)) + logger.Error( + "can't remove docs del file", + zap.String("frac", f.BaseFileName), + zap.Error(err), + ) } if f.PartialSuicideMode == HalfRemove { @@ -378,7 +452,11 @@ func (f *Sealed) Suicide() { } if err := os.Remove(f.BaseFileName + consts.SdocsDelFileSuffix); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't remove sdocs del file", zap.String("frac", f.BaseFileName), zap.Error(err)) + logger.Error( + "can't remove sdocs del file", + zap.String("frac", f.BaseFileName), + zap.Error(err), + ) } f.skipMaskProvider.RemoveFrac(f.info.Name()) @@ -414,7 +492,7 @@ func (f *Sealed) createDataProvider(ctx context.Context) *sealedDataProvider { lidReader := &f.lidReader idReader := &f.idReader - if f.isLegacy { + if f.IsLegacy { tokenReader = &f.legacyReader lidReader = &f.legacyReader idReader = &f.legacyReader @@ -459,7 +537,7 @@ func (f *Sealed) IsIntersecting(from, to seq.MID) bool { return f.info.IsIntersecting(from, to) } -func loadHeader(infoReader storage.IndexReader) *common.Info { +func loadInfo(infoReader storage.IndexReader) *common.Info { block, _, err := infoReader.ReadIndexBlock(0, nil) if err != nil { logger.Fatal("error reading info block", zap.Error(err)) @@ -469,6 +547,7 @@ func loadHeader(infoReader storage.IndexReader) *common.Info { if err := bi.Unpack(block); err != nil { logger.Fatal("error unpacking info block", zap.Error(err)) } + return bi.Info } @@ -481,17 +560,25 @@ func computeIndexOnDisk(basePath string, isLegacy bool) uint64 { consts.IDFileSuffix, consts.LIDFileSuffix, } + if isLegacy { - suffixes = []string{consts.IndexFileSuffix} + suffixes = []string{ + consts.IndexFileSuffix, + } } var total int64 for _, suffix := range suffixes { st, err := os.Stat(basePath + suffix) if err != nil { - logger.Fatal("can't stat index file", zap.String("file", basePath+suffix), zap.Error(err)) + logger.Fatal( + "can't stat index file", + zap.String("file", basePath+suffix), + zap.Error(err), + ) } total += st.Size() } + return uint64(total) } diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index a4c7b074..f91a4f9a 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -173,11 +173,8 @@ func newTokenTableEntry( } // seqBlockID accumulates scalar (ID, position) pairs into sealed ID blocks. -// A new block is yielded every `blockSize` IDs. -func seqBlockID( - ids iter.Seq2[seq.ID, seq.DocPos], - blockSize int, -) iter.Seq[idsSealBlock] { +// A new block is yielded every `blockCapacity` IDs. +func seqBlockID(ids iter.Seq2[seq.ID, seq.DocPos], blockCapacity int) iter.Seq[idsSealBlock] { return func(yield func(idsSealBlock) bool) { var block idsSealBlock @@ -186,7 +183,7 @@ func seqBlockID( block.rids.Values = append(block.rids.Values, uint64(id.RID)) block.params.Values = append(block.params.Values, uint64(pos)) - if len(block.mids.Values) == blockSize { + if len(block.mids.Values) == blockCapacity { if !yield(block) { return } @@ -204,7 +201,7 @@ func seqBlockID( } type lidBlocksAcc struct { - blockCap int + blockCapacity int currentTID uint32 currentBlock lidsSealBlock @@ -213,12 +210,12 @@ type lidBlocksAcc struct { isContinued bool } -func newLIDBlocksAccumulator(blockCap int) *lidBlocksAcc { - a := &lidBlocksAcc{blockCap: blockCap} +func newLIDBlocksAccumulator(blockCapacity int) *lidBlocksAcc { + a := &lidBlocksAcc{blockCapacity: blockCapacity} a.currentBlock.ext.minTID = 1 a.currentBlock.payload = lids.Block{ - LIDs: make([]uint32, 0, blockCap), + LIDs: make([]uint32, 0, blockCapacity), Offsets: []uint32{0}, } @@ -234,7 +231,7 @@ func (a *lidBlocksAcc) Add(lidsbuf []uint32, onBlock func(lidsSealBlock) error) a.currentTID++ for _, lid := range lidsbuf { - if len(a.currentBlock.payload.LIDs) == a.blockCap { + if len(a.currentBlock.payload.LIDs) == a.blockCapacity { if err := onBlock(a.finalizeBlock()); err != nil { return err } diff --git a/frac/sealed/token/table_loader.go b/frac/sealed/token/table_loader.go index a0bf87be..0750de62 100644 --- a/frac/sealed/token/table_loader.go +++ b/frac/sealed/token/table_loader.go @@ -106,6 +106,7 @@ func (l *TableLoader) loadBlocks() ([]TableBlock, error) { tb.Unpack(blockData) blocks = append(blocks, tb) } + return blocks, nil } diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index 588c5fee..28b9ef9f 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -79,8 +79,8 @@ func (l *LegacyLoader) skipSection() { // loadIDs reads the BlockOffsets block and then scans MID/RID/Pos triplets. func (l *LegacyLoader) loadIDs(fracVersion config.BinaryDataVersion) (seqids.Table, []uint64, error) { var buf []byte + data, _, err := l.reader.ReadIndexBlock(l.blockIndex, buf) - l.blockIndex++ if err != nil { return seqids.Table{}, nil, err } @@ -90,6 +90,9 @@ func (l *LegacyLoader) loadIDs(fracVersion config.BinaryDataVersion) (seqids.Tab return seqids.Table{}, nil, err } + // Move to the first block of ID section. + l.blockIndex++ + table := seqids.Table{ StartBlockIndex: l.blockIndex, // absolute index of first MID block in .index IDsTotal: offsets.IDsTotal, diff --git a/fracmanager/frac_manifest.go b/fracmanager/frac_manifest.go index 5b07b8dc..8dc6dc72 100644 --- a/fracmanager/frac_manifest.go +++ b/fracmanager/frac_manifest.go @@ -35,14 +35,6 @@ type fracManifest struct { // Deletion marker file flags hasDocsDel bool // documents deletion marker hasSdocsDel bool // sorted documents deletion marker - - // Temporary file flags - hasInfoTmp bool - hasTokenTmp bool - hasOffsetsTmp bool - hasIDTmp bool - hasLIDTmp bool - hasSdocsTmp bool // temporary sorted documents file } // hasAllIndexFiles reports whether all 5 split index files are present. @@ -83,18 +75,12 @@ func (m *fracManifest) AddExtension(ext string) error { case consts.SdocsDelFileSuffix: m.hasSdocsDel = true - case consts.InfoTmpFileSuffix: - m.hasInfoTmp = true - case consts.TokenTmpFileSuffix: - m.hasTokenTmp = true - case consts.OffsetsTmpFileSuffix: - m.hasOffsetsTmp = true - case consts.IDTmpFileSuffix: - m.hasIDTmp = true - case consts.LIDTmpFileSuffix: - m.hasLIDTmp = true - case consts.SdocsTmpFileSuffix: - m.hasSdocsTmp = true + case consts.IndexTmpFileSuffix, + consts.InfoTmpFileSuffix, consts.TokenTmpFileSuffix, + consts.OffsetsTmpFileSuffix, consts.IDTmpFileSuffix, + consts.LIDTmpFileSuffix, consts.SdocsTmpFileSuffix: + + // Just handle temporary files (which were not commited). default: return fmt.Errorf("unknown fraction file type %s", ext) @@ -192,6 +178,7 @@ func removeDocsDel(m *fracManifest) { func removeIndexTmp(m *fracManifest) { for _, suffix := range []string{ + consts.IndexTmpFileSuffix, consts.InfoTmpFileSuffix, consts.TokenTmpFileSuffix, consts.OffsetsTmpFileSuffix, @@ -200,18 +187,10 @@ func removeIndexTmp(m *fracManifest) { } { util.RemoveFile(m.basePath + suffix) } - m.hasInfoTmp = false - m.hasTokenTmp = false - m.hasOffsetsTmp = false - m.hasIDTmp = false - m.hasLIDTmp = false } func removeSdocsTmp(m *fracManifest) { - if m.hasSdocsTmp { - util.RemoveFile(m.basePath + consts.SdocsTmpFileSuffix) - m.hasSdocsTmp = false - } + util.RemoveFile(m.basePath + consts.SdocsTmpFileSuffix) } // analyzeFiles analyzes fraction files and groups them by fraction ID diff --git a/fracmanager/fraction_provider.go b/fracmanager/fraction_provider.go index 1e9eafec..66e6477b 100644 --- a/fracmanager/fraction_provider.go +++ b/fracmanager/fraction_provider.go @@ -91,7 +91,7 @@ func (fp *fractionProvider) NewSealedPreloaded(name string, preloadedData *seale ) } -func (fp *fractionProvider) NewRemote(ctx context.Context, name string, cachedInfo *common.Info) *frac.Remote { +func (fp *fractionProvider) NewRemote(ctx context.Context, name string, cachedInfo *common.Info, isLegacy bool) *frac.Remote { return frac.NewRemote( ctx, name, @@ -102,6 +102,7 @@ func (fp *fractionProvider) NewRemote(ctx context.Context, name string, cachedIn &fp.config.Fraction, fp.s3cli, fp.skipMaskProvider, + isLegacy, ) } @@ -144,9 +145,11 @@ func (fp *fractionProvider) Offload(ctx context.Context, f *frac.Sealed) (*frac. if err != nil { return nil, err } + if !mustBeOffloaded { return nil, nil } + info := f.Info() - return fp.NewRemote(ctx, info.Path, info), nil + return fp.NewRemote(ctx, info.Path, info, f.IsLegacy), nil } diff --git a/fracmanager/loader.go b/fracmanager/loader.go index 143b6d64..69ff7c02 100644 --- a/fracmanager/loader.go +++ b/fracmanager/loader.go @@ -9,6 +9,7 @@ import ( "go.uber.org/zap" "golang.org/x/sync/errgroup" + "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/logger" ) @@ -138,7 +139,21 @@ func (l *Loader) discover(ctx context.Context) ([]*frac.Active, []*frac.Sealed, case fracStageSealed: locals = append(locals, l.loadSealed(manifest, loadedInfoCache)) case fracStageRemote: - remotes = append(remotes, l.loadRemote(ctx, manifest.basePath, loadedInfoCache)) + // TODO(dkharms): Drop this compatibility check. + + indexName := filepath.Base(manifest.basePath) + consts.IndexFileSuffix + hasIndex, err := l.provider.s3cli.Exists(ctx, indexName) + if err != nil { + logger.Error( + "will skip fraction: cannot check existence of .index file", + zap.String("fraction", filepath.Base(manifest.basePath)), + zap.Error(err), + ) + continue + } + + manifest.hasIndex = hasIndex + remotes = append(remotes, l.loadRemote(ctx, manifest, loadedInfoCache)) default: logger.Error("unexpected fraction stage", zap.Any("manifest", manifest)) } @@ -163,11 +178,11 @@ func (l *Loader) loadSealed(manifest *fracManifest, loadedInfoCache *fracInfoCac } // loadRemote loads a remote fraction -func (l *Loader) loadRemote(ctx context.Context, basePath string, loadedInfoCache *fracInfoCache) *frac.Remote { - info, found := loadedInfoCache.Get(filepath.Base(basePath)) +func (l *Loader) loadRemote(ctx context.Context, manifest *fracManifest, loadedInfoCache *fracInfoCache) *frac.Remote { + info, found := loadedInfoCache.Get(filepath.Base(manifest.basePath)) l.updateStats(found) - f := l.provider.NewRemote(ctx, basePath, info) + f := l.provider.NewRemote(ctx, manifest.basePath, info, manifest.hasIndex) l.infoCache.Add(f.Info()) return f } From 3b8571410cb300419646d8d15948beee8cb32af2 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Thu, 2 Apr 2026 11:42:12 +0300 Subject: [PATCH 11/26] fix: fix deletion logic in fractions loader --- consts/consts.go | 3 ++ frac/active.go | 22 ++++---------- frac/active_sealing_source.go | 2 +- frac/sealed/sealing/blocks_builder_test.go | 8 +++-- frac/sealed/sealing/writer.go | 3 +- fracmanager/frac_manifest.go | 35 +++++++++++----------- seq/seq.go | 12 ++++---- 7 files changed, 39 insertions(+), 46 deletions(-) diff --git a/consts/consts.go b/consts/consts.go index fc027f24..8cc1ee75 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -77,6 +77,9 @@ const ( // IndexFileSuffix is the legacy single-file index format (pre-split). IndexFileSuffix = ".index" IndexTmpFileSuffix = "._index" + // TODO(dkharms): [IndexDelFileSuffix] is actually not necessary. + // We can remove it in the future releases. + IndexDelFileSuffix = ".index.del" RemoteFractionSuffix = ".remote" diff --git a/frac/active.go b/frac/active.go index 7c3691c1..91e25c22 100644 --- a/frac/active.go +++ b/frac/active.go @@ -3,7 +3,6 @@ package frac import ( "context" "io" - "math" "os" "path/filepath" "sync" @@ -26,9 +25,7 @@ import ( "github.com/ozontech/seq-db/util" ) -var ( - _ Fraction = (*Active)(nil) -) +var _ Fraction = (*Active)(nil) type Active struct { Config *Config @@ -64,16 +61,6 @@ type Active struct { skipMaskProvider skipMaskProvider } -const ( - systemMID = math.MaxUint64 - systemRID = math.MaxUint64 -) - -var systemSeqID = seq.ID{ - MID: systemMID, - RID: systemRID, -} - func NewActive( baseFileName string, activeIndexer *ActiveIndexer, @@ -116,8 +103,8 @@ func NewActive( } // use of 0 as keys in maps is prohibited – it's system key, so add first element - f.MIDs.Append(systemMID) - f.RIDs.Append(systemRID) + f.MIDs.Append(uint64(seq.SystemMID)) + f.RIDs.Append(uint64(seq.SystemRID)) logger.Info("active fraction created", zap.String("fraction", baseFileName)) @@ -128,7 +115,8 @@ func mustOpenMetaWriter( baseFileName string, readLimiter *storage.ReadLimiter, docsFile *os.File, - docsStats os.FileInfo) (*os.File, *ActiveWriter, *storage.DocBlocksReader, *storage.WalReader, uint64) { + docsStats os.FileInfo, +) (*os.File, *ActiveWriter, *storage.DocBlocksReader, *storage.WalReader, uint64) { legacyMetaFileName := baseFileName + consts.MetaFileSuffix if _, err := os.Stat(legacyMetaFileName); err == nil { diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index e9d814ca..8c960b41 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -223,7 +223,7 @@ func (src *ActiveSealingSource) Docs() iter.Seq2[seq.ID, []byte] { ) for id, pos := range src.ID() { - if id == systemSeqID { + if id == seq.SystemID { curDoc = nil // reserved system document (no payload) } else if id != prev { if curDoc, src.lastErr = src.doc(pos); src.lastErr != nil { diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index e2d3770e..4d32ad2a 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -13,6 +13,8 @@ import ( "github.com/ozontech/seq-db/seq" ) +var _ Source = (*mockSource)(nil) + type mockSource struct { info common.Info tokens [][]byte @@ -25,9 +27,9 @@ type mockSource struct { lastError error } -func (m *mockSource) Info() common.Info { return m.info } +func (m *mockSource) Info() *common.Info { return &m.info } -func (m *mockSource) Iterator() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { +func (m *mockSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { start := 0 for i, field := range m.fields { @@ -128,7 +130,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { lidAccum := newLIDBlocksAccumulator(lidBlockCap) var lidBlocks []lidsSealBlock tokenBlocks := bb.BuildTokenBlocks( - src.Iterator(), + src.TokenTriplet(), func(lids []uint32) error { return lidAccum.Add(lids, func(block lidsSealBlock) error { block.payload.LIDs = slices.Clone(block.payload.LIDs) diff --git a/frac/sealed/sealing/writer.go b/frac/sealed/sealing/writer.go index 5bcfe0f3..c0e9e645 100644 --- a/frac/sealed/sealing/writer.go +++ b/frac/sealed/sealing/writer.go @@ -5,9 +5,8 @@ import ( "encoding/binary" "io" - "go.uber.org/zap" - "github.com/alecthomas/units" + "go.uber.org/zap" "github.com/ozontech/seq-db/bytespool" "github.com/ozontech/seq-db/logger" diff --git a/fracmanager/frac_manifest.go b/fracmanager/frac_manifest.go index 8dc6dc72..a7f8b81c 100644 --- a/fracmanager/frac_manifest.go +++ b/fracmanager/frac_manifest.go @@ -35,6 +35,7 @@ type fracManifest struct { // Deletion marker file flags hasDocsDel bool // documents deletion marker hasSdocsDel bool // sorted documents deletion marker + hasIndexDel bool // index deletion marker } // hasAllIndexFiles reports whether all 5 split index files are present. @@ -74,6 +75,8 @@ func (m *fracManifest) AddExtension(ext string) error { m.hasDocsDel = true case consts.SdocsDelFileSuffix: m.hasSdocsDel = true + case consts.IndexDelFileSuffix: + m.hasIndexDel = true case consts.IndexTmpFileSuffix, consts.InfoTmpFileSuffix, consts.TokenTmpFileSuffix, @@ -81,6 +84,7 @@ func (m *fracManifest) AddExtension(ext string) error { consts.LIDTmpFileSuffix, consts.SdocsTmpFileSuffix: // Just handle temporary files (which were not commited). + // We will just drop them in all possible cases. default: return fmt.Errorf("unknown fraction file type %s", ext) @@ -112,7 +116,7 @@ func (m *fracManifest) Stage() fracStage { if (m.hasMeta || m.hasWal) && m.hasDocs { return fracStageActive } - if m.hasDocsDel || m.hasSdocsDel { + if m.hasDocsDel || m.hasSdocsDel || m.hasIndexDel { return fracStageZombie } return fracStageUnknown @@ -126,7 +130,7 @@ func removeDocs(m *fracManifest) { } func removeSdocs(m *fracManifest) { - if m.hasDocs { + if m.hasSdocs { util.RemoveFile(m.basePath + consts.SdocsFileSuffix) m.hasSdocs = false } @@ -291,23 +295,18 @@ func cleanupTemporary(m *fracManifest) { // Used for cleaning up partially deleted or corrupted fractions func removeAllFiles(basePath string) { for _, suffix := range []string{ - consts.InfoFileSuffix, - consts.TokenFileSuffix, - consts.OffsetsFileSuffix, - consts.IDFileSuffix, - consts.LIDFileSuffix, - consts.IndexFileSuffix, - consts.DocsFileSuffix, - consts.SdocsFileSuffix, + consts.DocsFileSuffix, consts.DocsDelFileSuffix, + consts.SdocsFileSuffix, consts.SdocsDelFileSuffix, consts.SdocsTmpFileSuffix, + consts.IndexFileSuffix, consts.IndexDelFileSuffix, consts.IndexTmpFileSuffix, + + consts.InfoFileSuffix, consts.InfoTmpFileSuffix, + consts.TokenFileSuffix, consts.TokenTmpFileSuffix, + consts.OffsetsFileSuffix, consts.OffsetsTmpFileSuffix, + consts.IDFileSuffix, consts.IDTmpFileSuffix, + consts.LIDFileSuffix, consts.LIDTmpFileSuffix, + consts.MetaFileSuffix, - consts.DocsDelFileSuffix, - consts.SdocsDelFileSuffix, - consts.SdocsTmpFileSuffix, - consts.InfoTmpFileSuffix, - consts.TokenTmpFileSuffix, - consts.OffsetsTmpFileSuffix, - consts.IDTmpFileSuffix, - consts.LIDTmpFileSuffix, + consts.WalFileSuffix, } { util.RemoveFile(basePath + suffix) } diff --git a/seq/seq.go b/seq/seq.go index 64168d16..adae4265 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -10,16 +10,18 @@ import ( "github.com/ozontech/seq-db/util" ) +var ( + SystemMID MID = math.MaxUint64 + SystemRID RID = math.MaxUint64 + SystemID ID = ID{SystemMID, SystemRID} + SystemDocPos DocPos = DocPos(0) +) + type ID struct { MID MID RID RID } -var ( - SystemID = ID{math.MaxUint64, math.MaxUint64} - SystemDocPos = DocPos(0) -) - type ( MID uint64 // nanoseconds part of ID RID uint64 // random part of ID From 3349041699790c93f0e044bc1ccc33235a867454 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Wed, 15 Apr 2026 13:20:13 +0300 Subject: [PATCH 12/26] chore: fix merge conflicts --- frac/fraction_test.go | 6 ------ frac/remote.go | 6 ++---- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 8113251a..26488e94 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -2311,9 +2311,6 @@ func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Seal nil, s.config, testSkipMaskProvider{}, - ) - s.config) - s.config, false, ) @@ -2390,9 +2387,6 @@ func (s *RemoteFractionTestSuite) SetupTest() { s.config, s3cli, testSkipMaskProvider{}, - ) - s3cli) - s3cli, false, ) diff --git a/frac/remote.go b/frac/remote.go index c5afa37b..2d8506af 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -98,11 +98,9 @@ func NewRemote( BaseFileName: baseFile, Config: config, - s3cli: s3cli, - + s3cli: s3cli, skipMaskProvider: skipMaskProvider, - s3cli: s3cli, - s3cli: s3cli, + IsLegacy: isLegacy, } From f0f2fbecc9d989718ed770737309184c0a434d3c Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Thu, 16 Apr 2026 16:17:10 +0300 Subject: [PATCH 13/26] refactor: return error in iterators --- frac/active_sealing_source.go | 135 ++++++++++++++------- frac/sealed/sealing/blocks_builder.go | 60 ++++----- frac/sealed/sealing/blocks_builder_test.go | 59 ++++----- frac/sealed/sealing/index.go | 22 ++-- frac/sealed/sealing/sealer.go | 114 ++++++++--------- util/pair.go | 6 + 6 files changed, 223 insertions(+), 173 deletions(-) create mode 100644 util/pair.go diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 8c960b41..ad7db7a8 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -22,6 +22,13 @@ import ( "github.com/ozontech/seq-db/util" ) +type ( + Document = util.Pair[seq.ID, []byte] + TokenPosting = util.Pair[[]byte, []uint32] + DocLocation = util.Pair[seq.ID, seq.DocPos] + IndexedDocBlock = util.Pair[[]byte, []seq.DocPos] +) + type ActiveSealingSource struct { params common.SealParams // Sealing parameters @@ -44,8 +51,6 @@ type ActiveSealingSource struct { docPosMap map[seq.ID]seq.DocPos // Original document positions docPosSorted []seq.DocPos // Document positions after sorting docsReader *storage.DocsReader // Document storage reader - - lastErr error // Last error } func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSealingSource, error) { @@ -111,26 +116,30 @@ func sortFields(tl *TokenList) ([]string, map[string][]uint32) { return fields, fieldTid } -func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { - return func(yield func(seq.ID, seq.DocPos) bool) { +func (src *ActiveSealingSource) ID() iter.Seq2[DocLocation, error] { + return func(yield func(DocLocation, error) bool) { mids := src.mids.vals rids := src.rids.vals // System ID and DocPos are not stored in `src.sortedLIDs`. // However we do have to yield them to preserve 1-baseed indexing for ids. - if !yield(seq.SystemID, seq.SystemDocPos) { + dloc := DocLocation{First: seq.SystemID, Second: seq.SystemDocPos} + if !yield(dloc, nil) { return } for i, lid := range src.sortedLIDs { - id := seq.ID{ - MID: seq.MID(mids[lid]), - RID: seq.RID(rids[lid]), + dloc := DocLocation{ + First: seq.ID{ + MID: seq.MID(mids[lid]), + RID: seq.RID(rids[lid]), + }, } // Documents were not sorted previously. if len(src.docPosSorted) == 0 { - if !yield(id, src.docPosMap[id]) { + dloc.Second = src.docPosMap[dloc.First] + if !yield(dloc, nil) { return } continue @@ -138,7 +147,8 @@ func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { // `i` in range [0; len(src.sortedLIDs)) // but lids indexes are 1-based. - if !yield(id, src.docPosSorted[i+1]) { + dloc.Second = src.docPosSorted[i+1] + if !yield(dloc, nil) { return } } @@ -149,10 +159,6 @@ func (src *ActiveSealingSource) BlockOffsets() []uint64 { return src.blocksOffsets } -func (src *ActiveSealingSource) LastError() error { - return src.lastErr -} - func (src *ActiveSealingSource) prepareInfo() { src.info.MetaOnDisk = 0 src.info.SealingTime = uint64(src.created.UnixMilli()) @@ -174,19 +180,19 @@ func (src *ActiveSealingSource) Info() *common.Info { return src.info } -func (src *ActiveSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { - return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { +func (src *ActiveSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] { + return func(yield func(string, iter.Seq2[TokenPosting, error]) bool) { for _, field := range src.fields { - if !yield(field, src.tokensForField(field)) { + if !yield(field, src.postingsForField(field)) { return } } } } -func (src *ActiveSealingSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] { +func (src *ActiveSealingSource) postingsForField(field string) iter.Seq2[TokenPosting, error] { var lidsbuf []uint32 - return func(yield func([]byte, []uint32) bool) { + return func(yield func(TokenPosting, error) bool) { for _, tid := range src.fieldTid[field] { token := src.tokens[tid] @@ -197,7 +203,8 @@ func (src *ActiveSealingSource) tokensForField(field string) iter.Seq2[[]byte, [ lidsbuf = append(lidsbuf, src.oldToNewLIDs[lid]) } - if !yield(token, lidsbuf) { + tpost := TokenPosting{First: token, Second: lidsbuf} + if !yield(tpost, nil) { return } } @@ -214,24 +221,34 @@ func makeInverser(sortedLIDs []uint32) []uint32 { // Docs returns an iterator for documents with their IDs. // Handles duplicate IDs (for nested indexes). -func (src *ActiveSealingSource) Docs() iter.Seq2[seq.ID, []byte] { - src.lastErr = nil - return func(yield func(seq.ID, []byte) bool) { +func (src *ActiveSealingSource) Docs() iter.Seq2[Document, error] { + return func(yield func(Document, error) bool) { var ( - prev seq.ID - curDoc []byte + curdoc []byte + prev seq.ID = seq.SystemID ) - for id, pos := range src.ID() { - if id == seq.SystemID { - curDoc = nil // reserved system document (no payload) - } else if id != prev { - if curDoc, src.lastErr = src.doc(pos); src.lastErr != nil { + for dloc, err := range src.ID() { + if err != nil { + yield(Document{}, err) + return + } + + id, pos := dloc.First, dloc.Second + + if id != prev { + xcurdoc, xerr := src.doc(pos) + if xerr != nil { + yield(Document{}, xerr) return } + curdoc = xcurdoc } + prev = id - if !yield(id, curDoc) { + doc := Document{First: id, Second: curdoc} + + if !yield(doc, nil) { return } } @@ -244,13 +261,17 @@ func (src *ActiveSealingSource) doc(pos seq.DocPos) ([]byte, error) { blockOffset := src.blocksOffsets[blockIndex] var doc []byte - err := src.docsReader.ReadDocsFunc(blockOffset, []uint64{docOffset}, func(b []byte) error { - doc = b - return nil - }) + err := src.docsReader.ReadDocsFunc( + blockOffset, []uint64{docOffset}, + func(b []byte) error { + doc = b + return nil + }, + ) if err != nil { return nil, err } + return doc, nil } @@ -274,10 +295,10 @@ func (src *ActiveSealingSource) SortDocs() error { // Write blocks and get new offsets and positions blocksOffsets, positions, err := src.writeDocs(blocks, bw) - - if err := util.CollapseErrors([]error{src.lastErr, err}); err != nil { + if err != nil { return err } + if err := bw.Flush(); err != nil { return err } @@ -296,12 +317,15 @@ func (src *ActiveSealingSource) SortDocs() error { if err := sdocsFile.Sync(); err != nil { return err } + if err := sdocsFile.Close(); err != nil { return err } + if err := os.Rename(sdocsFile.Name(), src.info.Path+consts.SdocsFileSuffix); err != nil { return err } + if err := util.SyncPath(filepath.Dir(src.info.Path)); err != nil { return err } @@ -322,32 +346,39 @@ func (src *ActiveSealingSource) SortDocs() error { // writeDocs compresses and writes document blocks, calculating new offsets // and collecting document positions. -func (src *ActiveSealingSource) writeDocs(blocks iter.Seq2[[]byte, []seq.DocPos], w io.Writer) ([]uint64, []seq.DocPos, error) { +func (src *ActiveSealingSource) writeDocs(blocks iter.Seq2[IndexedDocBlock, error], w io.Writer) ([]uint64, []seq.DocPos, error) { offset := 0 buf := make([]byte, 0) blocksOffsets := make([]uint64, 0) allPositions := make([]seq.DocPos, 0, len(src.mids.vals)) // Process each document block - for block, positions := range blocks { - allPositions = append(allPositions, positions...) + for docBlock, err := range blocks { + if err != nil { + return nil, nil, err + } + + allPositions = append(allPositions, docBlock.Second...) blocksOffsets = append(blocksOffsets, uint64(offset)) // Compress document block - buf = storage.CompressDocBlock(block, buf[:0], src.params.DocBlocksZstdLevel) + buf = storage.CompressDocBlock(docBlock.First, buf[:0], src.params.DocBlocksZstdLevel) if _, err := w.Write(buf); err != nil { return nil, nil, err } + offset += len(buf) } + return blocksOffsets, allPositions, nil } // docBlocks groups documents into fixed-size blocks. // Returns an iterator for blocks and corresponding document positions. -func docBlocks(docs iter.Seq2[seq.ID, []byte], blockSize int) iter.Seq2[[]byte, []seq.DocPos] { - return func(yield func([]byte, []seq.DocPos) bool) { +func docBlocks(docs iter.Seq2[Document, error], blockSize int) iter.Seq2[IndexedDocBlock, error] { + return func(yield func(IndexedDocBlock, error) bool) { const defaultBlockSize = 128 * units.KiB + if blockSize <= 0 { blockSize = int(defaultBlockSize) logger.Warn("document block size not specified", zap.Int("default_size", blockSize)) @@ -357,24 +388,34 @@ func docBlocks(docs iter.Seq2[seq.ID, []byte], blockSize int) iter.Seq2[[]byte, prev seq.ID index uint32 // Current block index ) + pos := make([]seq.DocPos, 0) buf := make([]byte, 0, blockSize) // Iterate through documents - for id, doc := range docs { + for doc, err := range docs { + if err != nil { + yield(IndexedDocBlock{}, err) + return + } + + id, doc := doc.First, doc.Second if id == prev { // Duplicate IDs (for nested indexes) - store document once, // but create positions for each LID pos = append(pos, seq.PackDocPos(index, uint64(len(buf)))) continue } + prev = id // If block is full, yield it if len(buf) >= blockSize { - if !yield(buf, pos) { + docBlock := IndexedDocBlock{First: buf, Second: pos} + if !yield(docBlock, nil) { return } + index++ buf = buf[:0] pos = pos[:0] @@ -387,6 +428,8 @@ func docBlocks(docs iter.Seq2[seq.ID, []byte], blockSize int) iter.Seq2[[]byte, buf = binary.LittleEndian.AppendUint32(buf, uint32(len(doc))) buf = append(buf, doc...) } - yield(buf, pos) + + docBlock := IndexedDocBlock{First: buf, Second: pos} + yield(docBlock, nil) } } diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index f91a4f9a..6c295903 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -8,7 +8,11 @@ import ( "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/seqids" "github.com/ozontech/seq-db/frac/sealed/token" - "github.com/ozontech/seq-db/seq" + "github.com/ozontech/seq-db/util" +) + +type ( + TokenBlock = util.Pair[tokensSealBlock, []token.FieldTable] ) // tokensExt represents the token ID range contained in a block. @@ -45,28 +49,13 @@ type idsSealBlock struct { // blocksBuilder constructs sealed blocks from various data sources. // Provides error tracking and consistency validation during block construction. -type blocksBuilder struct { - lastErr error // Last error encountered during processing -} - -// LastError returns the last error encountered during block processing. -func (bb *blocksBuilder) LastError() error { - return bb.lastErr -} +type blocksBuilder struct{} func (bb *blocksBuilder) BuildTokenBlocks( - it iter.Seq2[string, iter.Seq2[[]byte, []uint32]], + it iter.Seq2[string, iter.Seq2[TokenPosting, error]], accumulate func([]uint32) error, blockCapacity int, -) iter.Seq2[tokensSealBlock, []token.FieldTable] { - return func(yield func(tokensSealBlock, []token.FieldTable) bool) { - accumulate := func(lids []uint32) error { - if err := accumulate(lids); err != nil { - bb.lastErr = err - return err - } - return nil - } - +) iter.Seq2[TokenBlock, error] { + return func(yield func(TokenBlock, error) bool) { var ( block tokensSealBlock blockIdx uint32 @@ -97,7 +86,8 @@ func (bb *blocksBuilder) BuildTokenBlocks( emitFieldEntry() block.ext.maxTID = currentTID - if !yield(block, pendingTable) { + pair := TokenBlock{First: block, Second: pendingTable} + if !yield(pair, nil) { return false } @@ -121,7 +111,13 @@ func (bb *blocksBuilder) BuildTokenBlocks( fieldName = field fieldEntryStartTID = currentTID + 1 - for tok, lids := range tokIt { + for pair, err := range tokIt { + if err != nil { + yield(TokenBlock{}, err) + return + } + + tok, tlids := pair.First, pair.Second tokenSize := int(unsafe.Sizeof(uint32(0))) + len(tok) if blockSize > 0 && blockSize+tokenSize > blockCapacity { @@ -134,8 +130,8 @@ func (bb *blocksBuilder) BuildTokenBlocks( block.payload.Payload = binary.LittleEndian.AppendUint32(block.payload.Payload, uint32(len(tok))) block.payload.Payload = append(block.payload.Payload, tok...) - if err := accumulate(lids); err != nil { - bb.lastErr = err + if err := accumulate(tlids); err != nil { + yield(TokenBlock{}, err) return } @@ -174,17 +170,23 @@ func newTokenTableEntry( // seqBlockID accumulates scalar (ID, position) pairs into sealed ID blocks. // A new block is yielded every `blockCapacity` IDs. -func seqBlockID(ids iter.Seq2[seq.ID, seq.DocPos], blockCapacity int) iter.Seq[idsSealBlock] { - return func(yield func(idsSealBlock) bool) { +func seqBlockID(ids iter.Seq2[DocLocation, error], blockCapacity int) iter.Seq2[idsSealBlock, error] { + return func(yield func(idsSealBlock, error) bool) { var block idsSealBlock - for id, pos := range ids { + for pair, err := range ids { + if err != nil { + yield(idsSealBlock{}, err) + return + } + + id, pos := pair.First, pair.Second block.mids.Values = append(block.mids.Values, uint64(id.MID)) block.rids.Values = append(block.rids.Values, uint64(id.RID)) block.params.Values = append(block.params.Values, uint64(pos)) if len(block.mids.Values) == blockCapacity { - if !yield(block) { + if !yield(block, nil) { return } @@ -195,7 +197,7 @@ func seqBlockID(ids iter.Seq2[seq.ID, seq.DocPos], blockCapacity int) iter.Seq[i } if len(block.mids.Values) > 0 { - yield(block) + yield(block, nil) } } } diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index 4d32ad2a..a0d1ff2b 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -24,28 +24,16 @@ type mockSource struct { pos []seq.DocPos tokenLIDs [][]uint32 blocksOffsets []uint64 - lastError error } func (m *mockSource) Info() *common.Info { return &m.info } -func (m *mockSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { - return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { +func (m *mockSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] { + return func(yield func(string, iter.Seq2[TokenPosting, error]) bool) { start := 0 for i, field := range m.fields { end := int(m.fieldMaxTIDs[i]) - tokenStart, tokenEnd := start, end - if !yield(field, func(yield func([]byte, []uint32) bool) { - for j := tokenStart; j < tokenEnd; j++ { - var lidsbuf []uint32 - if j < len(m.tokenLIDs) { - lidsbuf = m.tokenLIDs[j] - } - if !yield(m.tokens[j], lidsbuf) { - return - } - } - }) { + if !yield(field, m.tokensForField(start, end)) { return } start = end @@ -53,24 +41,25 @@ func (m *mockSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32 } } -func (m *mockSource) ID() iter.Seq2[seq.ID, seq.DocPos] { - return func(yield func(seq.ID, seq.DocPos) bool) { - for i, id := range m.ids { - if !yield(id, m.pos[i]) { +func (m *mockSource) tokensForField(start, end int) iter.Seq2[TokenPosting, error] { + return func(yield func(TokenPosting, error) bool) { + for j := start; j < end; j++ { + var lidsbuf []uint32 + if j < len(m.tokenLIDs) { + lidsbuf = m.tokenLIDs[j] + } + pair := TokenPosting{First: m.tokens[j], Second: lidsbuf} + if !yield(pair, nil) { return } } } } -func (m *mockSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { - return func(yield func([]byte, []uint32) bool) { - for i, token := range m.tokens { - var lidsbuf []uint32 - if i < len(m.tokenLIDs) { - lidsbuf = m.tokenLIDs[i] - } - if !yield(token, lidsbuf) { +func (m *mockSource) ID() iter.Seq2[DocLocation, error] { + return func(yield func(DocLocation, error) bool) { + for i, id := range m.ids { + if !yield(DocLocation{First: id, Second: m.pos[i]}, nil) { return } } @@ -78,7 +67,6 @@ func (m *mockSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { } func (m *mockSource) BlockOffsets() []uint64 { return m.blocksOffsets } -func (m *mockSource) LastError() error { return m.lastError } func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { src := mockSource{ @@ -150,11 +138,13 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { blockIndex := 0 allFieldsTables := []token.FieldTable{} - for result, fieldsTables := range tokenBlocks { - assert.Equal(t, expectedSizes[blockIndex], result.payload.Len()) - for i := range result.payload.Len() { + for pair, err := range tokenBlocks { + assert.NoError(t, err) + block, fieldsTables := pair.First, pair.Second + assert.Equal(t, expectedSizes[blockIndex], block.payload.Len()) + for i := range block.payload.Len() { tid++ - assert.Equal(t, src.tokens[tid-1], result.payload.GetToken(i)) + assert.Equal(t, src.tokens[tid-1], block.payload.GetToken(i)) } allFieldsTables = append(allFieldsTables, fieldsTables...) blockIndex++ @@ -323,10 +313,13 @@ func TestBlocksBuilder_IDsBlocks(t *testing.T) { i := 0 ids := []seq.ID{} pos := []seq.DocPos{} - for block := range seqBlockID(src.ID(), 3) { + for block, err := range seqBlockID(src.ID(), 3) { + assert.NoError(t, err) + assert.Equal(t, expectedSizes[i], len(block.mids.Values)) assert.Equal(t, expectedSizes[i], len(block.rids.Values)) assert.Equal(t, expectedSizes[i], len(block.params.Values)) + i++ j := 0 for _, mid := range block.mids.Values { diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 6c6d57eb..52c38308 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -11,7 +11,6 @@ import ( "github.com/ozontech/seq-db/frac/sealed/token" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" - "github.com/ozontech/seq-db/util" "github.com/ozontech/seq-db/zstd" ) @@ -37,8 +36,6 @@ type IndexSealer struct { idsTable seqids.Table lidsTable lids.Table tokenTable token.Table - - lastErr error } func NewIndexSealer(params common.SealParams) *IndexSealer { @@ -93,7 +90,11 @@ func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { } defer w.release() - for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { + for block, err := range seqBlockID(src.ID(), consts.IDsPerBlock) { + if err != nil { + return err + } + if err := w.writeBlock(btypeMid, s.packMIDsBlock(block)); err != nil { return err } @@ -141,15 +142,16 @@ func (s *IndexSealer) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) err }) } - for block, fieldsTables := range bb.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { - if err := tw.writeBlock(btypeToken, s.packTokenBlock(block)); err != nil { + for pair, err := range bb.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { + if err != nil { + return err + } + + if err := tw.writeBlock(btypeToken, s.packTokenBlock(pair.First)); err != nil { return err } - allFieldsTables = append(allFieldsTables, fieldsTables...) - } - if s.lastErr = util.CollapseErrors([]error{src.LastError(), bb.LastError()}); s.lastErr != nil { - return s.lastErr + allFieldsTables = append(allFieldsTables, pair.Second...) } if err := s.finalizeLIDFile(lw, lidacc); err != nil { diff --git a/frac/sealed/sealing/sealer.go b/frac/sealed/sealing/sealer.go index 888f7973..57863d82 100644 --- a/frac/sealed/sealing/sealer.go +++ b/frac/sealed/sealing/sealer.go @@ -13,6 +13,11 @@ import ( "github.com/ozontech/seq-db/util" ) +type ( + DocLocation = util.Pair[seq.ID, seq.DocPos] + TokenPosting = util.Pair[[]byte, []uint32] +) + // Source interface defines the contract for data sources that can be sealed. // Provides access to all necessary data components for index creation type Source interface { @@ -21,7 +26,7 @@ type Source interface { // ID returns an iterator over stored document identifiers paired with // their positions, in descending [seq.ID] order. - ID() iter.Seq2[seq.ID, seq.DocPos] + ID() iter.Seq2[DocLocation, error] // BlockOffsets returns byte offsets to each document block // within this source's `.docs` file. @@ -30,60 +35,7 @@ type Source interface { // TokenTriplet iterates over fields in lexicographic order. // For each field, it yields tokens (lexicographically sorted) // paired with the local document ID list for that token. - TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] - - // LastError returns the last error encountered during iteration, - // or nil if no error occurred. - LastError() error -} - -func syncAndClose(f *os.File) error { - if err := f.Sync(); err != nil { - f.Close() - return err - } - return f.Close() -} - -func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { - f, err := os.Create(tmpPath) - if err != nil { - return err - } - - if err := errors.Join(write(f), syncAndClose(f)); err != nil { - return err - } - - return os.Rename(tmpPath, finalPath) -} - -func createAndWriteBoth( - tmpPath1, finalPath1, - tmpPath2, finalPath2 string, - write func(*os.File, *os.File) error, -) error { - f1, err := os.Create(tmpPath1) - if err != nil { - return err - } - - f2, err := os.Create(tmpPath2) - if err != nil { - f1.Close() - return err - } - - writeErr := write(f1, f2) - if err := errors.Join(writeErr, syncAndClose(f1), syncAndClose(f2)); err != nil { - return err - } - - if err := os.Rename(tmpPath1, finalPath1); err != nil { - return err - } - - return os.Rename(tmpPath2, finalPath2) + TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] } // Seal writes five index files (.info, .token, .offsets, .id, .lid) for the fraction @@ -162,3 +114,55 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { return preloaded, nil } + +func syncAndClose(f *os.File) error { + if err := f.Sync(); err != nil { + f.Close() + return err + } + return f.Close() +} + +func createAndWrite( + tmp, final string, + write func(*os.File) error, +) error { + f, err := os.Create(tmp) + if err != nil { + return err + } + + if err := errors.Join(write(f), syncAndClose(f)); err != nil { + return err + } + + return os.Rename(tmp, final) +} + +func createAndWriteBoth( + tmpa, finala, + tmpb, finalb string, + write func(*os.File, *os.File) error, +) error { + a, err := os.Create(tmpa) + if err != nil { + return err + } + + b, err := os.Create(tmpb) + if err != nil { + a.Close() + return err + } + + writeErr := write(a, b) + if err := errors.Join(writeErr, syncAndClose(a), syncAndClose(b)); err != nil { + return err + } + + if err := os.Rename(tmpa, finala); err != nil { + return err + } + + return os.Rename(tmpb, finalb) +} diff --git a/util/pair.go b/util/pair.go new file mode 100644 index 00000000..2930fee9 --- /dev/null +++ b/util/pair.go @@ -0,0 +1,6 @@ +package util + +type Pair[F, S any] struct { + First F + Second S +} From 365782b0612d9ca7895ea31c4b38708d37983072 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Thu, 16 Apr 2026 19:05:56 +0300 Subject: [PATCH 14/26] perf: unsafe way to receive lids --- frac/active_lids.go | 7 ++++++- frac/active_sealing_source.go | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/frac/active_lids.go b/frac/active_lids.go index 4875deb8..236136ef 100644 --- a/frac/active_lids.go +++ b/frac/active_lids.go @@ -41,13 +41,18 @@ func (tl *TokenLIDs) GetLIDs(mids, rids *UInt64s) []uint32 { return tl.sorted } +// SortedLIDs returns pre-merged LIDs. +// Only safe to call after the fraction is frozen and lids queue was drained. +func (tl *TokenLIDs) SortedLIDsUnsafe() []uint32 { + return tl.sorted +} + type SeqIDCmp struct { mid []uint64 rid []uint64 } func (c *SeqIDCmp) compare(a, b uint32) int { - midA, midB := c.mid[a], c.mid[b] if midA > midB { diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index ad7db7a8..147fd08a 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -196,7 +196,7 @@ func (src *ActiveSealingSource) postingsForField(field string) iter.Seq2[TokenPo for _, tid := range src.fieldTid[field] { token := src.tokens[tid] - lids := src.lids[tid].GetLIDs(src.mids, src.rids) + lids := src.lids[tid].SortedLIDsUnsafe() lidsbuf = slices.Grow(lidsbuf[:0], len(lids)) for _, lid := range lids { From 4acc3e93e8cc5200321ba5199fbccb96ba92ced9 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Fri, 17 Apr 2026 13:08:29 +0300 Subject: [PATCH 15/26] perf: use linear array for token ids --- frac/active_sealing_source.go | 46 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 147fd08a..a625da42 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -5,6 +5,7 @@ import ( "encoding/binary" "io" "iter" + "maps" "os" "path/filepath" "slices" @@ -43,10 +44,11 @@ type ActiveSealingSource struct { mids *UInt64s // MIDs rids *UInt64s // RIDs - fields []string // Sorted field names - fieldTid map[string][]uint32 // Each field contains sorted TIDs based on token value - tokens [][]byte // Tokens (values) by TID - lids []*TokenLIDs // LID lists for each token + fields []string // Sorted field names + fieldTids [][]uint32 // Each field contains sorted TIDs based on token value + + tokens [][]byte // Tokens (values) by TID + lids []*TokenLIDs // LID lists for each token docPosMap map[seq.ID]seq.DocPos // Original document positions docPosSorted []seq.DocPos // Document positions after sorting @@ -57,7 +59,7 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe info := *active.info // copy sortedLIDs := active.GetAllDocuments() - fields, fieldTid := sortFields(active.TokenList) + fields, fieldTids := sortFields(active.TokenList) src := ActiveSealingSource{ params: params, @@ -71,10 +73,10 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe mids: active.MIDs, rids: active.RIDs, - fields: fields, - fieldTid: fieldTid, - tokens: active.TokenList.tidToVal, - lids: active.TokenList.tidToLIDs, + fields: fields, + fieldTids: fieldTids, + tokens: active.TokenList.tidToVal, + lids: active.TokenList.tidToLIDs, docPosMap: active.DocsPositions.idToPos, blocksOffsets: active.DocBlocks.vals, @@ -94,26 +96,24 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe return &src, nil } -func sortFields(tl *TokenList) ([]string, map[string][]uint32) { - fields := make([]string, 0, len(tl.FieldTIDs)) - fieldTid := make(map[string][]uint32, len(tl.FieldTIDs)) - - for field, tids := range tl.FieldTIDs { - fields = append(fields, field) +func sortFields(tl *TokenList) ([]string, [][]uint32) { + fields := slices.Collect(maps.Keys(tl.FieldTIDs)) + slices.Sort(fields) + fieldTids := make([][]uint32, len(tl.FieldTIDs)) + for i, field := range fields { // Make a copy because this memory is shared // with concurrent readers (user search queries). - cp := slices.Clone(tids) + cp := slices.Clone(tl.FieldTIDs[field]) slices.SortFunc(cp, func(i, j uint32) int { return bytes.Compare(tl.tidToVal[i], tl.tidToVal[j]) }) - fieldTid[field] = cp + fieldTids[i] = cp } - slices.Sort(fields) - return fields, fieldTid + return fields, fieldTids } func (src *ActiveSealingSource) ID() iter.Seq2[DocLocation, error] { @@ -182,18 +182,18 @@ func (src *ActiveSealingSource) Info() *common.Info { func (src *ActiveSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] { return func(yield func(string, iter.Seq2[TokenPosting, error]) bool) { - for _, field := range src.fields { - if !yield(field, src.postingsForField(field)) { + for idx, field := range src.fields { + if !yield(field, src.postingsForField(field, idx)) { return } } } } -func (src *ActiveSealingSource) postingsForField(field string) iter.Seq2[TokenPosting, error] { +func (src *ActiveSealingSource) postingsForField(field string, idx int) iter.Seq2[TokenPosting, error] { var lidsbuf []uint32 return func(yield func(TokenPosting, error) bool) { - for _, tid := range src.fieldTid[field] { + for _, tid := range src.fieldTids[idx] { token := src.tokens[tid] lids := src.lids[tid].SortedLIDsUnsafe() From c221ba0d2e184b3acae79a98890d7cf581f87d1e Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Mon, 27 Apr 2026 10:25:09 +0300 Subject: [PATCH 16/26] refactor: introduce `blockbuilder` and `indexwriter` package --- blockbuilder/blocks_builder.go | 303 ++++++++++++++++++ .../blocks_builder_test.go | 90 +++--- frac/fraction_concurrency_test.go | 2 +- frac/fraction_test.go | 2 +- frac/sealed/sealing/blocks_builder.go | 278 ---------------- fracmanager/fraction_provider.go | 2 +- fracmanager/sealer_test.go | 2 +- {frac/sealed/sealing => indexwriter}/index.go | 127 ++++---- .../sealed/sealing => indexwriter}/writer.go | 2 +- {frac/sealed/sealing => sealing}/sealer.go | 137 ++++---- 10 files changed, 467 insertions(+), 478 deletions(-) create mode 100644 blockbuilder/blocks_builder.go rename {frac/sealed/sealing => blockbuilder}/blocks_builder_test.go (71%) delete mode 100644 frac/sealed/sealing/blocks_builder.go rename {frac/sealed/sealing => indexwriter}/index.go (67%) rename {frac/sealed/sealing => indexwriter}/writer.go (99%) rename {frac/sealed/sealing => sealing}/sealer.go (58%) diff --git a/blockbuilder/blocks_builder.go b/blockbuilder/blocks_builder.go new file mode 100644 index 00000000..193b061e --- /dev/null +++ b/blockbuilder/blocks_builder.go @@ -0,0 +1,303 @@ +package blockbuilder + +import ( + "encoding/binary" + "iter" + "unsafe" + + "github.com/ozontech/seq-db/frac/sealed/lids" + "github.com/ozontech/seq-db/frac/sealed/seqids" + "github.com/ozontech/seq-db/frac/sealed/token" + "github.com/ozontech/seq-db/seq" + "github.com/ozontech/seq-db/util" +) + +type ( + DocLocation = util.Pair[seq.ID, seq.DocPos] + TokenPosting = util.Pair[[]byte, []uint32] + TokenBlock = util.Pair[TokensSealBlock, []token.FieldTable] +) + +// TokensExt represents the token ID range contained in a block. +type TokensExt struct { + MinTID uint32 // First token ID in the block + MaxTID uint32 // Last token ID in the block +} + +// TokensSealBlock represents a sealed block containing token data with metadata. +type TokensSealBlock struct { + Ext TokensExt // Tokens block metadata for registry marking + Payload token.Block // Actual token data payload +} + +// LidsExt represents the range and continuation status of LID blocks. +type LidsExt struct { + MinTID uint32 // First token ID in the LID block + MaxTID uint32 // Last token ID in the LID block + IsContinued bool // Whether LID sequence continues in next block +} + +// LidsSealBlock represents a sealed block containing LID (Local ID) data. +type LidsSealBlock struct { + Ext LidsExt // LIDs block metadata for registry marking + Payload lids.Block // LID data payload +} + +// IdsSealBlock represents a sealed block containing various identifier types. +type IdsSealBlock struct { + MIDs seqids.BlockMIDs + RIDs seqids.BlockRIDs + Params seqids.BlockParams +} + +// BlocksBuilder constructs sealed blocks from various data sources. +type BlocksBuilder struct{} + +func (bb *BlocksBuilder) BuildTokenBlocks( + it iter.Seq2[string, iter.Seq2[TokenPosting, error]], + accumulate func([]uint32) error, blockCapacity int, +) iter.Seq2[TokenBlock, error] { + return func(yield func(TokenBlock, error) bool) { + var ( + block TokensSealBlock + blockIdx uint32 + blockSize int + ) + + var ( + currentTID uint32 + pendingTable []token.FieldTable + fieldName string + fieldEntryStartTID uint32 + ) + + emitFieldEntry := func() { + // Handle case when field does not have tokens. + if fieldName == "" || fieldEntryStartTID > currentTID { + return + } + + entry := newTokenTableEntry(fieldEntryStartTID, currentTID, blockIdx, block) + pendingTable = append(pendingTable, token.FieldTable{ + Field: fieldName, + Entries: []*token.TableEntry{entry}, + }) + } + + flushBlock := func() bool { + emitFieldEntry() + block.Ext.MaxTID = currentTID + + pair := TokenBlock{First: block, Second: pendingTable} + if !yield(pair, nil) { + return false + } + + block.Payload.Payload = block.Payload.Payload[:0] + block.Payload.Offsets = block.Payload.Offsets[:0] + block.Ext.MinTID = currentTID + 1 + + blockIdx++ + blockSize = 0 + + pendingTable = pendingTable[:0] + fieldEntryStartTID = currentTID + 1 + + return true + } + + block.Ext.MinTID = 1 + for field, tokIt := range it { + emitFieldEntry() + + fieldName = field + fieldEntryStartTID = currentTID + 1 + + for pair, err := range tokIt { + if err != nil { + yield(TokenBlock{}, err) + return + } + + tok, tlids := pair.First, pair.Second + tokenSize := int(unsafe.Sizeof(uint32(0))) + len(tok) + + if blockSize > 0 && blockSize+tokenSize > blockCapacity { + if !flushBlock() { + return + } + } + + block.Payload.Offsets = append(block.Payload.Offsets, uint32(len(block.Payload.Payload))) + block.Payload.Payload = binary.LittleEndian.AppendUint32(block.Payload.Payload, uint32(len(tok))) + block.Payload.Payload = append(block.Payload.Payload, tok...) + + if err := accumulate(tlids); err != nil { + yield(TokenBlock{}, err) + return + } + + currentTID++ + blockSize += tokenSize + } + } + + if blockSize > 0 { + flushBlock() + } + } +} + +func newTokenTableEntry( + entryStartTID, entryEndTID uint32, + blockIndex uint32, block TokensSealBlock, +) *token.TableEntry { + // Convert global TIDs to block-local indices + firstIndex := entryStartTID - block.Ext.MinTID + lastIndex := entryEndTID - block.Ext.MinTID + + // Extract min and max token values for the entry range + minVal := string(block.Payload.GetToken(int(firstIndex))) + maxVal := string(block.Payload.GetToken(int(lastIndex))) + + return &token.TableEntry{ + StartIndex: firstIndex, // Starting index within the block + StartTID: entryStartTID, // Starting token ID (global) + BlockIndex: blockIndex, // Reference to containing block + ValCount: lastIndex - firstIndex + 1, // Number of tokens in this entry + MinVal: minVal, // Smallest token value in range + MaxVal: maxVal, // Largest token value in range + } +} + +// SeqBlockID accumulates scalar (ID, position) pairs into sealed ID blocks. +// A new block is yielded every `blockCapacity` IDs. +func SeqBlockID(ids iter.Seq2[DocLocation, error], blockCapacity int) iter.Seq2[IdsSealBlock, error] { + return func(yield func(IdsSealBlock, error) bool) { + var block IdsSealBlock + + for pair, err := range ids { + if err != nil { + yield(IdsSealBlock{}, err) + return + } + + id, pos := pair.First, pair.Second + block.MIDs.Values = append(block.MIDs.Values, uint64(id.MID)) + block.RIDs.Values = append(block.RIDs.Values, uint64(id.RID)) + block.Params.Values = append(block.Params.Values, uint64(pos)) + + if len(block.MIDs.Values) == blockCapacity { + if !yield(block, nil) { + return + } + + block.MIDs.Values = block.MIDs.Values[:0] + block.RIDs.Values = block.RIDs.Values[:0] + block.Params.Values = block.Params.Values[:0] + } + } + + if len(block.MIDs.Values) > 0 { + yield(block, nil) + } + } +} + +// LidBlocksAcc accumulates LIDs into sealed LID blocks. +type LidBlocksAcc struct { + blockCapacity int + + currentTID uint32 + currentBlock LidsSealBlock + + isEndOfToken bool + isContinued bool +} + +func NewLIDBlocksAccumulator(blockCapacity int) *LidBlocksAcc { + a := &LidBlocksAcc{blockCapacity: blockCapacity} + + a.currentBlock.Ext.MinTID = 1 + a.currentBlock.Payload = lids.Block{ + LIDs: make([]uint32, 0, blockCapacity), + Offsets: []uint32{0}, + } + + return a +} + +// Add processes LIDs of one token (must be called in TID order). +// +// For each block that fills up, `onBlock` is called immediately +// before the backing arrays are reset, so `onBlock` may read the +// block data but must not retain references to it. +func (a *LidBlocksAcc) Add(lidsbuf []uint32, onBlock func(LidsSealBlock) error) error { + a.currentTID++ + + for _, lid := range lidsbuf { + if len(a.currentBlock.Payload.LIDs) == a.blockCapacity { + if err := onBlock(a.finalizeBlock()); err != nil { + return err + } + + a.currentBlock.Ext.MinTID = a.currentTID + a.currentBlock.Payload.LIDs = a.currentBlock.Payload.LIDs[:0] + a.currentBlock.Payload.Offsets = a.currentBlock.Payload.Offsets[:1] + } + + a.isEndOfToken = false + a.currentBlock.Ext.MaxTID = a.currentTID + a.currentBlock.Payload.LIDs = append(a.currentBlock.Payload.LIDs, lid) + } + + a.isEndOfToken = true + a.currentBlock.Payload.Offsets = append( + a.currentBlock.Payload.Offsets, + uint32(len(a.currentBlock.Payload.LIDs)), + ) + + return nil +} + +func (a *LidBlocksAcc) Flush() LidsSealBlock { + return a.finalizeBlock() +} + +func (a *LidBlocksAcc) finalizeBlock() LidsSealBlock { + if !a.isEndOfToken { + a.currentBlock.Payload.Offsets = append( + a.currentBlock.Payload.Offsets, + uint32(len(a.currentBlock.Payload.LIDs)), + ) + } + + result := a.currentBlock + result.Payload.IsLastLID = a.isEndOfToken + result.Ext.IsContinued = a.isContinued + + a.isContinued = !a.isEndOfToken + return result +} + +// CollapseOrderedFieldsTables merges FieldTables with the same field name. +// Assumes input is sorted by Field. +func CollapseOrderedFieldsTables(src []token.FieldTable) []token.FieldTable { + if len(src) == 0 { + return nil + } + + current := src[0] + var dst []token.FieldTable + for _, ft := range src[1:] { + if current.Field == ft.Field { + current.Entries = append(current.Entries, ft.Entries...) + continue + } + + dst = append(dst, current) + current = ft + } + + return append(dst, current) +} diff --git a/frac/sealed/sealing/blocks_builder_test.go b/blockbuilder/blocks_builder_test.go similarity index 71% rename from frac/sealed/sealing/blocks_builder_test.go rename to blockbuilder/blocks_builder_test.go index a0d1ff2b..4cee4ced 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/blockbuilder/blocks_builder_test.go @@ -1,4 +1,4 @@ -package sealing +package blockbuilder import ( "iter" @@ -7,27 +7,20 @@ import ( "github.com/stretchr/testify/assert" - "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/token" "github.com/ozontech/seq-db/seq" ) -var _ Source = (*mockSource)(nil) - type mockSource struct { - info common.Info - tokens [][]byte - fields []string - fieldMaxTIDs []uint32 - ids []seq.ID - pos []seq.DocPos - tokenLIDs [][]uint32 - blocksOffsets []uint64 + tokens [][]byte + fields []string + fieldMaxTIDs []uint32 + ids []seq.ID + pos []seq.DocPos + tokenLIDs [][]uint32 } -func (m *mockSource) Info() *common.Info { return &m.info } - func (m *mockSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] { return func(yield func(string, iter.Seq2[TokenPosting, error]) bool) { start := 0 @@ -48,8 +41,7 @@ func (m *mockSource) tokensForField(start, end int) iter.Seq2[TokenPosting, erro if j < len(m.tokenLIDs) { lidsbuf = m.tokenLIDs[j] } - pair := TokenPosting{First: m.tokens[j], Second: lidsbuf} - if !yield(pair, nil) { + if !yield(TokenPosting{First: m.tokens[j], Second: lidsbuf}, nil) { return } } @@ -66,8 +58,6 @@ func (m *mockSource) ID() iter.Seq2[DocLocation, error] { } } -func (m *mockSource) BlockOffsets() []uint64 { return m.blocksOffsets } - func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { src := mockSource{ tokens: [][]byte{ @@ -114,15 +104,15 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { const blockSize = 24 const lidBlockCap = 3 - var bb blocksBuilder - lidAccum := newLIDBlocksAccumulator(lidBlockCap) - var lidBlocks []lidsSealBlock + var bb BlocksBuilder + lidAccum := NewLIDBlocksAccumulator(lidBlockCap) + var lidBlocks []LidsSealBlock tokenBlocks := bb.BuildTokenBlocks( src.TokenTriplet(), func(lids []uint32) error { - return lidAccum.Add(lids, func(block lidsSealBlock) error { - block.payload.LIDs = slices.Clone(block.payload.LIDs) - block.payload.Offsets = slices.Clone(block.payload.Offsets) + return lidAccum.Add(lids, func(block LidsSealBlock) error { + block.Payload.LIDs = slices.Clone(block.Payload.LIDs) + block.Payload.Offsets = slices.Clone(block.Payload.Offsets) lidBlocks = append(lidBlocks, block) return nil }) @@ -141,16 +131,16 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { for pair, err := range tokenBlocks { assert.NoError(t, err) block, fieldsTables := pair.First, pair.Second - assert.Equal(t, expectedSizes[blockIndex], block.payload.Len()) - for i := range block.payload.Len() { + assert.Equal(t, expectedSizes[blockIndex], block.Payload.Len()) + for i := range block.Payload.Len() { tid++ - assert.Equal(t, src.tokens[tid-1], block.payload.GetToken(i)) + assert.Equal(t, src.tokens[tid-1], block.Payload.GetToken(i)) } allFieldsTables = append(allFieldsTables, fieldsTables...) blockIndex++ } - actualTokenTable := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} + actualTokenTable := token.TableBlock{FieldsTables: CollapseOrderedFieldsTables(allFieldsTables)} assert.Equal(t, tid, len(src.tokens)) expectedTokenTable := token.TableBlock{ @@ -247,34 +237,34 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { assert.Equal(t, actualTokenTable.FieldsTables, expectedTokenTable.FieldsTables) finalBlock := lidAccum.Flush() - finalBlock.payload.LIDs = slices.Clone(finalBlock.payload.LIDs) - finalBlock.payload.Offsets = slices.Clone(finalBlock.payload.Offsets) + finalBlock.Payload.LIDs = slices.Clone(finalBlock.Payload.LIDs) + finalBlock.Payload.Offsets = slices.Clone(finalBlock.Payload.Offsets) lidBlocks = append(lidBlocks, finalBlock) - expectedLIDBlocks := []lidsSealBlock{ + expectedLIDBlocks := []LidsSealBlock{ { - ext: lidsExt{minTID: 1, maxTID: 1, isContinued: false}, - payload: lids.Block{LIDs: []uint32{10, 20, 30}, Offsets: []uint32{0, 3}, IsLastLID: false}, + Ext: LidsExt{MinTID: 1, MaxTID: 1, IsContinued: false}, + Payload: lids.Block{LIDs: []uint32{10, 20, 30}, Offsets: []uint32{0, 3}, IsLastLID: false}, }, { - ext: lidsExt{minTID: 1, maxTID: 3, isContinued: true}, - payload: lids.Block{LIDs: []uint32{40, 2, 3}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + Ext: LidsExt{MinTID: 1, MaxTID: 3, IsContinued: true}, + Payload: lids.Block{LIDs: []uint32{40, 2, 3}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, }, { - ext: lidsExt{minTID: 4, maxTID: 6, isContinued: false}, - payload: lids.Block{LIDs: []uint32{4, 5, 6}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + Ext: LidsExt{MinTID: 4, MaxTID: 6, IsContinued: false}, + Payload: lids.Block{LIDs: []uint32{4, 5, 6}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, }, { - ext: lidsExt{minTID: 7, maxTID: 9, isContinued: false}, - payload: lids.Block{LIDs: []uint32{7, 8, 9}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + Ext: LidsExt{MinTID: 7, MaxTID: 9, IsContinued: false}, + Payload: lids.Block{LIDs: []uint32{7, 8, 9}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, }, { - ext: lidsExt{minTID: 10, maxTID: 12, isContinued: false}, - payload: lids.Block{LIDs: []uint32{10, 11, 12}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + Ext: LidsExt{MinTID: 10, MaxTID: 12, IsContinued: false}, + Payload: lids.Block{LIDs: []uint32{10, 11, 12}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, }, { - ext: lidsExt{minTID: 13, maxTID: 14, isContinued: false}, - payload: lids.Block{LIDs: []uint32{13, 14}, Offsets: []uint32{0, 1, 2}, IsLastLID: true}, + Ext: LidsExt{MinTID: 13, MaxTID: 14, IsContinued: false}, + Payload: lids.Block{LIDs: []uint32{13, 14}, Offsets: []uint32{0, 1, 2}, IsLastLID: true}, }, } assert.Equal(t, expectedLIDBlocks, lidBlocks) @@ -313,18 +303,18 @@ func TestBlocksBuilder_IDsBlocks(t *testing.T) { i := 0 ids := []seq.ID{} pos := []seq.DocPos{} - for block, err := range seqBlockID(src.ID(), 3) { + for block, err := range SeqBlockID(src.ID(), 3) { assert.NoError(t, err) - assert.Equal(t, expectedSizes[i], len(block.mids.Values)) - assert.Equal(t, expectedSizes[i], len(block.rids.Values)) - assert.Equal(t, expectedSizes[i], len(block.params.Values)) + assert.Equal(t, expectedSizes[i], len(block.MIDs.Values)) + assert.Equal(t, expectedSizes[i], len(block.RIDs.Values)) + assert.Equal(t, expectedSizes[i], len(block.Params.Values)) i++ j := 0 - for _, mid := range block.mids.Values { - ids = append(ids, seq.ID{MID: seq.MID(mid), RID: seq.RID(block.rids.Values[j])}) - pos = append(pos, seq.DocPos(block.params.Values[j])) + for _, mid := range block.MIDs.Values { + ids = append(ids, seq.ID{MID: seq.MID(mid), RID: seq.RID(block.RIDs.Values[j])}) + pos = append(pos, seq.DocPos(block.Params.Values[j])) j++ } } diff --git a/frac/fraction_concurrency_test.go b/frac/fraction_concurrency_test.go index 138586fd..7f9bdc85 100644 --- a/frac/fraction_concurrency_test.go +++ b/frac/fraction_concurrency_test.go @@ -17,11 +17,11 @@ import ( "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/processor" "github.com/ozontech/seq-db/frac/sealed/lids" - "github.com/ozontech/seq-db/frac/sealed/sealing" "github.com/ozontech/seq-db/frac/sealed/seqids" "github.com/ozontech/seq-db/frac/sealed/token" "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/parser" + "github.com/ozontech/seq-db/sealing" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" testcommon "github.com/ozontech/seq-db/tests/common" diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 26488e94..873dcda1 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -23,12 +23,12 @@ import ( "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/processor" "github.com/ozontech/seq-db/frac/sealed/lids" - "github.com/ozontech/seq-db/frac/sealed/sealing" "github.com/ozontech/seq-db/frac/sealed/seqids" "github.com/ozontech/seq-db/frac/sealed/token" "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/node" "github.com/ozontech/seq-db/parser" + "github.com/ozontech/seq-db/sealing" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" "github.com/ozontech/seq-db/storage/s3" diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go deleted file mode 100644 index 6c295903..00000000 --- a/frac/sealed/sealing/blocks_builder.go +++ /dev/null @@ -1,278 +0,0 @@ -package sealing - -import ( - "encoding/binary" - "iter" - "unsafe" - - "github.com/ozontech/seq-db/frac/sealed/lids" - "github.com/ozontech/seq-db/frac/sealed/seqids" - "github.com/ozontech/seq-db/frac/sealed/token" - "github.com/ozontech/seq-db/util" -) - -type ( - TokenBlock = util.Pair[tokensSealBlock, []token.FieldTable] -) - -// tokensExt represents the token ID range contained in a block. -type tokensExt struct { - minTID uint32 // First token ID in the block - maxTID uint32 // Last token ID in the block -} - -// tokensSealBlock represents a sealed block containing token data with metadata. -type tokensSealBlock struct { - ext tokensExt // Tokens block metadata for registry marking - payload token.Block // Actual token data payload -} - -// lidsExt represents the range and continuation status of LID blocks. -type lidsExt struct { - minTID uint32 // First token ID in the LID block - maxTID uint32 // Last token ID in the LID block - isContinued bool // Whether LID sequence continues in next block -} - -// lidsSealBlock represents a sealed block containing LID (Local ID) data. -type lidsSealBlock struct { - ext lidsExt // LIDs block metadata for registry marking - payload lids.Block // LID data payload -} - -// idsSealBlock represents a sealed block containing various identifier types. -type idsSealBlock struct { - mids seqids.BlockMIDs - rids seqids.BlockRIDs - params seqids.BlockParams -} - -// blocksBuilder constructs sealed blocks from various data sources. -// Provides error tracking and consistency validation during block construction. -type blocksBuilder struct{} - -func (bb *blocksBuilder) BuildTokenBlocks( - it iter.Seq2[string, iter.Seq2[TokenPosting, error]], - accumulate func([]uint32) error, blockCapacity int, -) iter.Seq2[TokenBlock, error] { - return func(yield func(TokenBlock, error) bool) { - var ( - block tokensSealBlock - blockIdx uint32 - blockSize int - ) - - var ( - currentTID uint32 - pendingTable []token.FieldTable - fieldName string - fieldEntryStartTID uint32 - ) - - emitFieldEntry := func() { - // Handle case when field does not have tokens. - if fieldName == "" || fieldEntryStartTID > currentTID { - return - } - - entry := newTokenTableEntry(fieldEntryStartTID, currentTID, blockIdx, block) - pendingTable = append(pendingTable, token.FieldTable{ - Field: fieldName, - Entries: []*token.TableEntry{entry}, - }) - } - - flushBlock := func() bool { - emitFieldEntry() - block.ext.maxTID = currentTID - - pair := TokenBlock{First: block, Second: pendingTable} - if !yield(pair, nil) { - return false - } - - block.payload.Payload = block.payload.Payload[:0] - block.payload.Offsets = block.payload.Offsets[:0] - block.ext.minTID = currentTID + 1 - - blockIdx++ - blockSize = 0 - - pendingTable = pendingTable[:0] - fieldEntryStartTID = currentTID + 1 - - return true - } - - block.ext.minTID = 1 - for field, tokIt := range it { - emitFieldEntry() - - fieldName = field - fieldEntryStartTID = currentTID + 1 - - for pair, err := range tokIt { - if err != nil { - yield(TokenBlock{}, err) - return - } - - tok, tlids := pair.First, pair.Second - tokenSize := int(unsafe.Sizeof(uint32(0))) + len(tok) - - if blockSize > 0 && blockSize+tokenSize > blockCapacity { - if !flushBlock() { - return - } - } - - block.payload.Offsets = append(block.payload.Offsets, uint32(len(block.payload.Payload))) - block.payload.Payload = binary.LittleEndian.AppendUint32(block.payload.Payload, uint32(len(tok))) - block.payload.Payload = append(block.payload.Payload, tok...) - - if err := accumulate(tlids); err != nil { - yield(TokenBlock{}, err) - return - } - - currentTID++ - blockSize += tokenSize - } - } - - if blockSize > 0 { - flushBlock() - } - } -} - -func newTokenTableEntry( - entryStartTID, entryEndTID uint32, - blockIndex uint32, block tokensSealBlock, -) *token.TableEntry { - // Convert global TIDs to block-local indices - firstIndex := entryStartTID - block.ext.minTID - lastIndex := entryEndTID - block.ext.minTID - - // Extract min and max token values for the entry range - minVal := string(block.payload.GetToken(int(firstIndex))) - maxVal := string(block.payload.GetToken(int(lastIndex))) - - return &token.TableEntry{ - StartIndex: firstIndex, // Starting index within the block - StartTID: entryStartTID, // Starting token ID (global) - BlockIndex: blockIndex, // Reference to containing block - ValCount: lastIndex - firstIndex + 1, // Number of tokens in this entry - MinVal: minVal, // Smallest token value in range - MaxVal: maxVal, // Largest token value in range - } -} - -// seqBlockID accumulates scalar (ID, position) pairs into sealed ID blocks. -// A new block is yielded every `blockCapacity` IDs. -func seqBlockID(ids iter.Seq2[DocLocation, error], blockCapacity int) iter.Seq2[idsSealBlock, error] { - return func(yield func(idsSealBlock, error) bool) { - var block idsSealBlock - - for pair, err := range ids { - if err != nil { - yield(idsSealBlock{}, err) - return - } - - id, pos := pair.First, pair.Second - block.mids.Values = append(block.mids.Values, uint64(id.MID)) - block.rids.Values = append(block.rids.Values, uint64(id.RID)) - block.params.Values = append(block.params.Values, uint64(pos)) - - if len(block.mids.Values) == blockCapacity { - if !yield(block, nil) { - return - } - - block.mids.Values = block.mids.Values[:0] - block.rids.Values = block.rids.Values[:0] - block.params.Values = block.params.Values[:0] - } - } - - if len(block.mids.Values) > 0 { - yield(block, nil) - } - } -} - -type lidBlocksAcc struct { - blockCapacity int - - currentTID uint32 - currentBlock lidsSealBlock - - isEndOfToken bool - isContinued bool -} - -func newLIDBlocksAccumulator(blockCapacity int) *lidBlocksAcc { - a := &lidBlocksAcc{blockCapacity: blockCapacity} - - a.currentBlock.ext.minTID = 1 - a.currentBlock.payload = lids.Block{ - LIDs: make([]uint32, 0, blockCapacity), - Offsets: []uint32{0}, - } - - return a -} - -// Add processes LIDs of one token (must be called in TID order). -// -// For each block that fills up, `onBlock` is called immediately -// before the backing arrays are reset, so `onBlock` may read the -// block data but must not retain references to it. -func (a *lidBlocksAcc) Add(lidsbuf []uint32, onBlock func(lidsSealBlock) error) error { - a.currentTID++ - - for _, lid := range lidsbuf { - if len(a.currentBlock.payload.LIDs) == a.blockCapacity { - if err := onBlock(a.finalizeBlock()); err != nil { - return err - } - - a.currentBlock.ext.minTID = a.currentTID - a.currentBlock.payload.LIDs = a.currentBlock.payload.LIDs[:0] - a.currentBlock.payload.Offsets = a.currentBlock.payload.Offsets[:1] - } - - a.isEndOfToken = false - a.currentBlock.ext.maxTID = a.currentTID - a.currentBlock.payload.LIDs = append(a.currentBlock.payload.LIDs, lid) - } - - a.isEndOfToken = true - a.currentBlock.payload.Offsets = append( - a.currentBlock.payload.Offsets, - uint32(len(a.currentBlock.payload.LIDs)), - ) - - return nil -} - -func (a *lidBlocksAcc) Flush() lidsSealBlock { - return a.finalizeBlock() -} - -func (a *lidBlocksAcc) finalizeBlock() lidsSealBlock { - if !a.isEndOfToken { - a.currentBlock.payload.Offsets = append( - a.currentBlock.payload.Offsets, - uint32(len(a.currentBlock.payload.LIDs)), - ) - } - - result := a.currentBlock - result.payload.IsLastLID = a.isEndOfToken - result.ext.isContinued = a.isContinued - - a.isContinued = !a.isEndOfToken - return result -} diff --git a/fracmanager/fraction_provider.go b/fracmanager/fraction_provider.go index 66e6477b..db5feb33 100644 --- a/fracmanager/fraction_provider.go +++ b/fracmanager/fraction_provider.go @@ -12,8 +12,8 @@ import ( "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed" - "github.com/ozontech/seq-db/frac/sealed/sealing" "github.com/ozontech/seq-db/node" + "github.com/ozontech/seq-db/sealing" "github.com/ozontech/seq-db/storage" "github.com/ozontech/seq-db/storage/s3" ) diff --git a/fracmanager/sealer_test.go b/fracmanager/sealer_test.go index f85c3f8f..51c16b6b 100644 --- a/fracmanager/sealer_test.go +++ b/fracmanager/sealer_test.go @@ -19,8 +19,8 @@ import ( "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed" - "github.com/ozontech/seq-db/frac/sealed/sealing" "github.com/ozontech/seq-db/indexer" + "github.com/ozontech/seq-db/sealing" "github.com/ozontech/seq-db/seq" testscommon "github.com/ozontech/seq-db/tests/common" ) diff --git a/frac/sealed/sealing/index.go b/indexwriter/index.go similarity index 67% rename from frac/sealed/sealing/index.go rename to indexwriter/index.go index 52c38308..a4c5117a 100644 --- a/frac/sealed/sealing/index.go +++ b/indexwriter/index.go @@ -1,8 +1,10 @@ -package sealing +package indexwriter import ( "io" + "iter" + "github.com/ozontech/seq-db/blockbuilder" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed" @@ -14,6 +16,25 @@ import ( "github.com/ozontech/seq-db/zstd" ) +// Source defines the data required to write all index files for a fraction. +type Source interface { + // Info returns metadata describing this source. + Info() *common.Info + + // ID returns an iterator over stored document identifiers paired with + // their positions, in descending [seq.ID] order. + ID() iter.Seq2[blockbuilder.DocLocation, error] + + // BlockOffsets returns byte offsets to each document block + // within this source's `.docs` file. + BlockOffsets() []uint64 + + // TokenTriplet iterates over fields in lexicographic order. + // For each field, it yields tokens (lexicographically sorted) + // paired with the local document ID list for that token. + TokenTriplet() iter.Seq2[string, iter.Seq2[blockbuilder.TokenPosting, error]] +} + // indexBlock is one compressed (or not) block with its registry metadata. type indexBlock struct { codec storage.Codec @@ -27,7 +48,7 @@ func (i indexBlock) Bin(pos int64) (storage.IndexBlockHeader, []byte) { return storage.NewIndexBlockHeader(pos, i.ext1, i.ext2, uint32(len(i.payload)), i.rawLen, i.codec), i.payload } -type IndexSealer struct { +type IndexWriter struct { params common.SealParams buf1 []byte @@ -38,28 +59,28 @@ type IndexSealer struct { tokenTable token.Table } -func NewIndexSealer(params common.SealParams) *IndexSealer { - return &IndexSealer{ +func New(params common.SealParams) *IndexWriter { + return &IndexWriter{ params: params, buf1: make([]byte, 0, consts.RegularBlockSize), buf2: make([]byte, 0, consts.RegularBlockSize), } } -func (s *IndexSealer) LIDsTable() lids.Table { +func (s *IndexWriter) LIDsTable() lids.Table { return s.lidsTable } -func (s *IndexSealer) TokenTable() token.Table { +func (s *IndexWriter) TokenTable() token.Table { return s.tokenTable } -func (s *IndexSealer) IDsTable() seqids.Table { +func (s *IndexWriter) IDsTable() seqids.Table { return s.idsTable } // WriteOffsetsFile writes the .offsets file containing a single BlockOffsets block. -func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { +func (s *IndexWriter) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { w, err := newWriter(ws) if err != nil { return err @@ -83,14 +104,14 @@ func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { return w.finalize() } -func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { +func (s *IndexWriter) WriteIDFile(ws io.WriteSeeker, src Source) error { w, err := newWriter(ws) if err != nil { return err } defer w.release() - for block, err := range seqBlockID(src.ID(), consts.IDsPerBlock) { + for block, err := range blockbuilder.SeqBlockID(src.ID(), consts.IDsPerBlock) { if err != nil { return err } @@ -116,7 +137,7 @@ func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { return w.finalize() } -func (s *IndexSealer) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) error { +func (s *IndexWriter) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) error { tw, err := newWriter(tws) if err != nil { return err @@ -130,14 +151,14 @@ func (s *IndexSealer) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) err defer lw.release() var ( - bb blocksBuilder + bb blockbuilder.BlocksBuilder allFieldsTables []token.FieldTable - lidacc = newLIDBlocksAccumulator(consts.LIDBlockCap) + lidacc = blockbuilder.NewLIDBlocksAccumulator(consts.LIDBlockCap) ) // NOTE(dkharms): This is so ugly but I cannot come up with other solution here. accumulate := func(lids []uint32) error { - return lidacc.Add(lids, func(block lidsSealBlock) error { + return lidacc.Add(lids, func(block blockbuilder.LidsSealBlock) error { return lw.writeBlock(btypeLid, s.packLIDsBlock(block)) }) } @@ -161,7 +182,7 @@ func (s *IndexSealer) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) err return s.finalizeTokenFile(tw, allFieldsTables) } -func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccum *lidBlocksAcc) error { +func (s *IndexWriter) finalizeLIDFile(w *writer, lidAccum *blockbuilder.LidBlocksAcc) error { if err := w.writeBlock(btypeLid, s.packLIDsBlock(lidAccum.Flush())); err != nil { return err } @@ -174,13 +195,13 @@ func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccum *lidBlocksAcc) error { return w.finalize() } -func (s *IndexSealer) finalizeTokenFile(w *writer, allFieldsTables []token.FieldTable) error { +func (s *IndexWriter) finalizeTokenFile(w *writer, allFieldsTables []token.FieldTable) error { // Emit section separator. if err := w.writeBlock(btypeToken, indexBlock{}); err != nil { return err } - tokenTableBlock := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} + tokenTableBlock := token.TableBlock{FieldsTables: blockbuilder.CollapseOrderedFieldsTables(allFieldsTables)} if err := w.writeBlock(btypeTokenTable, s.packTokenTableBlock(tokenTableBlock)); err != nil { return err } @@ -193,7 +214,7 @@ func (s *IndexSealer) finalizeTokenFile(w *writer, allFieldsTables []token.Field return w.finalize() } -func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { +func (s *IndexWriter) WriteInfoFile(ws io.WriteSeeker, src Source) error { w, err := newWriter(ws) if err != nil { return err @@ -213,33 +234,11 @@ func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { return w.finalize() } -// collapseOrderedFieldsTables merges FieldTables with the same field name. -// Assumes input is sorted by Field. -func collapseOrderedFieldsTables(src []token.FieldTable) []token.FieldTable { - if len(src) == 0 { - return nil - } - - current := src[0] - var dst []token.FieldTable - for _, ft := range src[1:] { - if current.Field == ft.Field { - current.Entries = append(current.Entries, ft.Entries...) - continue - } - - dst = append(dst, current) - current = ft - } - - return append(dst, current) -} - func newIndexBlock(raw []byte) indexBlock { return indexBlock{codec: storage.CodecNo, rawLen: uint32(len(raw)), payload: raw} } -func (s *IndexSealer) newIndexBlockZSTD(raw []byte, level int) indexBlock { +func (s *IndexWriter) newIndexBlockZSTD(raw []byte, level int) indexBlock { s.buf2 = zstd.CompressLevel(raw, s.buf2[:0], level) if len(s.buf2) < len(raw) { return indexBlock{codec: storage.CodecZSTD, rawLen: uint32(len(raw)), payload: s.buf2} @@ -248,22 +247,22 @@ func (s *IndexSealer) newIndexBlockZSTD(raw []byte, level int) indexBlock { } // packInfoBlock packs fraction information into an index block. -func (s *IndexSealer) packInfoBlock(block sealed.BlockInfo) indexBlock { +func (s *IndexWriter) packInfoBlock(block sealed.BlockInfo) indexBlock { s.buf1 = block.Pack(s.buf1[:0]) return newIndexBlock(s.buf1) // Info block is typically small, no compression } // packTokenBlock packs token data into a compressed index block. -func (s *IndexSealer) packTokenBlock(block tokensSealBlock) indexBlock { - s.buf1 = block.payload.Pack(s.buf1[:0]) // Pack token data +func (s *IndexWriter) packTokenBlock(block blockbuilder.TokensSealBlock) indexBlock { + s.buf1 = block.Payload.Pack(s.buf1[:0]) // Pack token data b := s.newIndexBlockZSTD(s.buf1, s.params.TokenListZstdLevel) // Store TID range in extended metadata - b.ext1 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) + b.ext1 = uint64(block.Ext.MaxTID)<<32 | uint64(block.Ext.MinTID) return b } // packTokenTableBlock packs the token table into a compressed index block. -func (s *IndexSealer) packTokenTableBlock(tokenTableBlock token.TableBlock) indexBlock { +func (s *IndexWriter) packTokenTableBlock(tokenTableBlock token.TableBlock) indexBlock { s.tokenTable = token.TableFromBlocks([]token.TableBlock{tokenTableBlock}) // Store for PreloadedData // Packing block @@ -272,7 +271,7 @@ func (s *IndexSealer) packTokenTableBlock(tokenTableBlock token.TableBlock) inde } // packBlocksOffsetsBlock packs document block offsets into a compressed index block. -func (s *IndexSealer) packBlocksOffsetsBlock(block sealed.BlockOffsets) indexBlock { +func (s *IndexWriter) packBlocksOffsetsBlock(block sealed.BlockOffsets) indexBlock { // Update IDs table for PreloadedData s.idsTable.IDsTotal = block.IDsTotal // Total number of IDs s.idsTable.IDBlocksTotal = uint32(len(block.Offsets)) // Number of ID blocks @@ -284,19 +283,19 @@ func (s *IndexSealer) packBlocksOffsetsBlock(block sealed.BlockOffsets) indexBlo } // packMIDsBlock packs MIDs into a compressed index block. -func (s *IndexSealer) packMIDsBlock(block idsSealBlock) indexBlock { +func (s *IndexWriter) packMIDsBlock(block blockbuilder.IdsSealBlock) indexBlock { // Get the last ID in the block (smallest due to descending order) - last := len(block.mids.Values) - 1 + last := len(block.MIDs.Values) - 1 minID := seq.ID{ - MID: seq.MID(block.mids.Values[last]), - RID: seq.RID(block.rids.Values[last]), + MID: seq.MID(block.MIDs.Values[last]), + RID: seq.RID(block.RIDs.Values[last]), } s.idsTable.MinBlockIDs = append(s.idsTable.MinBlockIDs, minID) // Store for PreloadedData // Packing block - s.buf1 = block.mids.Pack(s.buf1[:0]) + s.buf1 = block.MIDs.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) // Store min MID and RID in extended metadata @@ -307,38 +306,38 @@ func (s *IndexSealer) packMIDsBlock(block idsSealBlock) indexBlock { } // packRIDsBlock packs RIDs into a compressed index block. -func (s *IndexSealer) packRIDsBlock(block idsSealBlock) indexBlock { - s.buf1 = block.rids.Pack(s.buf1[:0]) +func (s *IndexWriter) packRIDsBlock(block blockbuilder.IdsSealBlock) indexBlock { + s.buf1 = block.RIDs.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) return b } // packPosBlock packs document positions into a compressed index block. -func (s *IndexSealer) packPosBlock(block idsSealBlock) indexBlock { - s.buf1 = block.params.Pack(s.buf1[:0]) +func (s *IndexWriter) packPosBlock(block blockbuilder.IdsSealBlock) indexBlock { + s.buf1 = block.Params.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) return b } // packLIDsBlock packs Local IDs (LIDs) into a compressed index block. // Also updates LIDs table for preloaded data access. -func (s *IndexSealer) packLIDsBlock(block lidsSealBlock) indexBlock { +func (s *IndexWriter) packLIDsBlock(block blockbuilder.LidsSealBlock) indexBlock { var ext1 uint64 - if block.ext.isContinued { // todo: Legacy continuation flag + if block.Ext.IsContinued { // todo: Legacy continuation flag ext1 = 1 - block.ext.minTID++ // Adjust for legacy format + block.Ext.MinTID++ // Adjust for legacy format } // Update LIDs table for PreloadedData - s.lidsTable.MinTIDs = append(s.lidsTable.MinTIDs, block.ext.minTID) - s.lidsTable.MaxTIDs = append(s.lidsTable.MaxTIDs, block.ext.maxTID) - s.lidsTable.IsContinued = append(s.lidsTable.IsContinued, block.ext.isContinued) + s.lidsTable.MinTIDs = append(s.lidsTable.MinTIDs, block.Ext.MinTID) + s.lidsTable.MaxTIDs = append(s.lidsTable.MaxTIDs, block.Ext.MaxTID) + s.lidsTable.IsContinued = append(s.lidsTable.IsContinued, block.Ext.IsContinued) // Packing block - s.buf1 = block.payload.Pack(s.buf1[:0]) + s.buf1 = block.Payload.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.LIDsZstdLevel) b.ext1 = ext1 // Legacy continuation flag - b.ext2 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) // TID range + b.ext2 = uint64(block.Ext.MaxTID)<<32 | uint64(block.Ext.MinTID) // TID range return b } diff --git a/frac/sealed/sealing/writer.go b/indexwriter/writer.go similarity index 99% rename from frac/sealed/sealing/writer.go rename to indexwriter/writer.go index c0e9e645..6544e4d4 100644 --- a/frac/sealed/sealing/writer.go +++ b/indexwriter/writer.go @@ -1,4 +1,4 @@ -package sealing +package indexwriter import ( "bytes" diff --git a/frac/sealed/sealing/sealer.go b/sealing/sealer.go similarity index 58% rename from frac/sealed/sealing/sealer.go rename to sealing/sealer.go index 57863d82..d3af4baf 100644 --- a/frac/sealed/sealing/sealer.go +++ b/sealing/sealer.go @@ -2,40 +2,67 @@ package sealing import ( "errors" - "iter" "os" "path/filepath" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed" - "github.com/ozontech/seq-db/seq" + "github.com/ozontech/seq-db/indexwriter" "github.com/ozontech/seq-db/util" ) -type ( - DocLocation = util.Pair[seq.ID, seq.DocPos] - TokenPosting = util.Pair[[]byte, []uint32] -) +// Source defines the contract for data sources that can be sealed. +// Provides access to all necessary data components for index creation. +type Source = indexwriter.Source + +func syncAndClose(f *os.File) error { + if err := f.Sync(); err != nil { + f.Close() + return err + } + return f.Close() +} + +func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { + f, err := os.Create(tmpPath) + if err != nil { + return err + } + + if err := errors.Join(write(f), syncAndClose(f)); err != nil { + return err + } + + return os.Rename(tmpPath, finalPath) +} -// Source interface defines the contract for data sources that can be sealed. -// Provides access to all necessary data components for index creation -type Source interface { - // Info returns metadata describing this source. - Info() *common.Info +func createAndWriteBoth( + tmpPath1, finalPath1, + tmpPath2, finalPath2 string, + write func(*os.File, *os.File) error, +) error { + f1, err := os.Create(tmpPath1) + if err != nil { + return err + } + + f2, err := os.Create(tmpPath2) + if err != nil { + f1.Close() + return err + } - // ID returns an iterator over stored document identifiers paired with - // their positions, in descending [seq.ID] order. - ID() iter.Seq2[DocLocation, error] + writeErr := write(f1, f2) + if err := errors.Join(writeErr, syncAndClose(f1), syncAndClose(f2)); err != nil { + return err + } - // BlockOffsets returns byte offsets to each document block - // within this source's `.docs` file. - BlockOffsets() []uint64 + if err := os.Rename(tmpPath1, finalPath1); err != nil { + return err + } - // TokenTriplet iterates over fields in lexicographic order. - // For each field, it yields tokens (lexicographically sorted) - // paired with the local document ID list for that token. - TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] + return os.Rename(tmpPath2, finalPath2) } // Seal writes five index files (.info, .token, .offsets, .id, .lid) for the fraction @@ -47,12 +74,12 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { return nil, errors.New("sealing of an empty active fraction is not supported") } - sealer := NewIndexSealer(params) + writer := indexwriter.New(params) if err := createAndWrite( info.Path+consts.OffsetsTmpFileSuffix, info.Path+consts.OffsetsFileSuffix, - func(f *os.File) error { return sealer.WriteOffsetsFile(f, src) }, + func(f *os.File) error { return writer.WriteOffsetsFile(f, src) }, ); err != nil { return nil, err } @@ -60,7 +87,7 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { if err := createAndWrite( info.Path+consts.IDTmpFileSuffix, info.Path+consts.IDFileSuffix, - func(f *os.File) error { return sealer.WriteIDFile(f, src) }, + func(f *os.File) error { return writer.WriteIDFile(f, src) }, ); err != nil { return nil, err } @@ -68,7 +95,7 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { if err := createAndWriteBoth( info.Path+consts.TokenTmpFileSuffix, info.Path+consts.TokenFileSuffix, info.Path+consts.LIDTmpFileSuffix, info.Path+consts.LIDFileSuffix, - func(tokenF, lidF *os.File) error { return sealer.WriteTokenTriplet(tokenF, lidF, src) }, + func(tokenF, lidF *os.File) error { return writer.WriteTokenTriplet(tokenF, lidF, src) }, ); err != nil { return nil, err } @@ -76,7 +103,7 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { if err := createAndWrite( info.Path+consts.InfoTmpFileSuffix, info.Path+consts.InfoFileSuffix, - func(f *os.File) error { return sealer.WriteInfoFile(f, src) }, + func(f *os.File) error { return writer.WriteInfoFile(f, src) }, ); err != nil { return nil, err } @@ -100,13 +127,13 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { } info.IndexOnDisk = totalSize - lidsTable := sealer.LIDsTable() + lidsTable := writer.LIDsTable() preloaded := &sealed.PreloadedData{ Info: info, - TokenTable: sealer.TokenTable(), + TokenTable: writer.TokenTable(), BlocksData: sealed.BlocksData{ - IDsTable: sealer.IDsTable(), + IDsTable: writer.IDsTable(), LIDsTable: &lidsTable, BlocksOffsets: src.BlockOffsets(), }, @@ -114,55 +141,3 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { return preloaded, nil } - -func syncAndClose(f *os.File) error { - if err := f.Sync(); err != nil { - f.Close() - return err - } - return f.Close() -} - -func createAndWrite( - tmp, final string, - write func(*os.File) error, -) error { - f, err := os.Create(tmp) - if err != nil { - return err - } - - if err := errors.Join(write(f), syncAndClose(f)); err != nil { - return err - } - - return os.Rename(tmp, final) -} - -func createAndWriteBoth( - tmpa, finala, - tmpb, finalb string, - write func(*os.File, *os.File) error, -) error { - a, err := os.Create(tmpa) - if err != nil { - return err - } - - b, err := os.Create(tmpb) - if err != nil { - a.Close() - return err - } - - writeErr := write(a, b) - if err := errors.Join(writeErr, syncAndClose(a), syncAndClose(b)); err != nil { - return err - } - - if err := os.Rename(tmpa, finala); err != nil { - return err - } - - return os.Rename(tmpb, finalb) -} From 40e3a10904f4fc6168ebcb5f8115a1f70e6dbf6c Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Mon, 27 Apr 2026 10:26:45 +0300 Subject: [PATCH 17/26] refactor: filename similar to package name --- blockbuilder/{blocks_builder.go => block_builder.go} | 0 blockbuilder/{blocks_builder_test.go => block_builder_test.go} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename blockbuilder/{blocks_builder.go => block_builder.go} (100%) rename blockbuilder/{blocks_builder_test.go => block_builder_test.go} (100%) diff --git a/blockbuilder/blocks_builder.go b/blockbuilder/block_builder.go similarity index 100% rename from blockbuilder/blocks_builder.go rename to blockbuilder/block_builder.go diff --git a/blockbuilder/blocks_builder_test.go b/blockbuilder/block_builder_test.go similarity index 100% rename from blockbuilder/blocks_builder_test.go rename to blockbuilder/block_builder_test.go From b157354b9a97e158a4c00ade85a2b6f98fe52bf0 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Mon, 27 Apr 2026 10:30:58 +0300 Subject: [PATCH 18/26] refactor: remove `BlockBuilder` type --- blockbuilder/block_builder.go | 5 +---- blockbuilder/block_builder_test.go | 4 ++-- indexwriter/index.go | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/blockbuilder/block_builder.go b/blockbuilder/block_builder.go index 193b061e..262a2c77 100644 --- a/blockbuilder/block_builder.go +++ b/blockbuilder/block_builder.go @@ -50,10 +50,7 @@ type IdsSealBlock struct { Params seqids.BlockParams } -// BlocksBuilder constructs sealed blocks from various data sources. -type BlocksBuilder struct{} - -func (bb *BlocksBuilder) BuildTokenBlocks( +func BuildTokenBlocks( it iter.Seq2[string, iter.Seq2[TokenPosting, error]], accumulate func([]uint32) error, blockCapacity int, ) iter.Seq2[TokenBlock, error] { diff --git a/blockbuilder/block_builder_test.go b/blockbuilder/block_builder_test.go index 4cee4ced..d8f5f2b5 100644 --- a/blockbuilder/block_builder_test.go +++ b/blockbuilder/block_builder_test.go @@ -104,10 +104,10 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { const blockSize = 24 const lidBlockCap = 3 - var bb BlocksBuilder lidAccum := NewLIDBlocksAccumulator(lidBlockCap) + var lidBlocks []LidsSealBlock - tokenBlocks := bb.BuildTokenBlocks( + tokenBlocks := BuildTokenBlocks( src.TokenTriplet(), func(lids []uint32) error { return lidAccum.Add(lids, func(block LidsSealBlock) error { diff --git a/indexwriter/index.go b/indexwriter/index.go index a4c5117a..19ba5cac 100644 --- a/indexwriter/index.go +++ b/indexwriter/index.go @@ -151,7 +151,6 @@ func (s *IndexWriter) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) err defer lw.release() var ( - bb blockbuilder.BlocksBuilder allFieldsTables []token.FieldTable lidacc = blockbuilder.NewLIDBlocksAccumulator(consts.LIDBlockCap) ) @@ -163,7 +162,7 @@ func (s *IndexWriter) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) err }) } - for pair, err := range bb.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { + for pair, err := range blockbuilder.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { if err != nil { return err } From a1eae0f7459d521857bdb5e176d3eaa4af603212 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Mon, 27 Apr 2026 10:33:20 +0300 Subject: [PATCH 19/26] refactor: move unexported functions --- sealing/sealer.go | 98 +++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/sealing/sealer.go b/sealing/sealer.go index d3af4baf..edd263fe 100644 --- a/sealing/sealer.go +++ b/sealing/sealer.go @@ -16,55 +16,6 @@ import ( // Provides access to all necessary data components for index creation. type Source = indexwriter.Source -func syncAndClose(f *os.File) error { - if err := f.Sync(); err != nil { - f.Close() - return err - } - return f.Close() -} - -func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { - f, err := os.Create(tmpPath) - if err != nil { - return err - } - - if err := errors.Join(write(f), syncAndClose(f)); err != nil { - return err - } - - return os.Rename(tmpPath, finalPath) -} - -func createAndWriteBoth( - tmpPath1, finalPath1, - tmpPath2, finalPath2 string, - write func(*os.File, *os.File) error, -) error { - f1, err := os.Create(tmpPath1) - if err != nil { - return err - } - - f2, err := os.Create(tmpPath2) - if err != nil { - f1.Close() - return err - } - - writeErr := write(f1, f2) - if err := errors.Join(writeErr, syncAndClose(f1), syncAndClose(f2)); err != nil { - return err - } - - if err := os.Rename(tmpPath1, finalPath1); err != nil { - return err - } - - return os.Rename(tmpPath2, finalPath2) -} - // Seal writes five index files (.info, .token, .offsets, .id, .lid) for the fraction // and returns PreloadedData for fast initialization of the sealed fraction. func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { @@ -141,3 +92,52 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { return preloaded, nil } + +func syncAndClose(f *os.File) error { + if err := f.Sync(); err != nil { + f.Close() + return err + } + return f.Close() +} + +func createAndWrite(tmp, final string, write func(*os.File) error) error { + f, err := os.Create(tmp) + if err != nil { + return err + } + + if err := errors.Join(write(f), syncAndClose(f)); err != nil { + return err + } + + return os.Rename(tmp, final) +} + +func createAndWriteBoth( + atmp, afinal, + btmp, bfinal string, + write func(*os.File, *os.File) error, +) error { + a, err := os.Create(atmp) + if err != nil { + return err + } + + b, err := os.Create(btmp) + if err != nil { + a.Close() + return err + } + + writeErr := write(a, b) + if err := errors.Join(writeErr, syncAndClose(a), syncAndClose(b)); err != nil { + return err + } + + if err := os.Rename(atmp, afinal); err != nil { + return err + } + + return os.Rename(btmp, bfinal) +} From f1a5a12ae1507f2a02171395735e473f8a256ad2 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Mon, 27 Apr 2026 12:14:03 +0300 Subject: [PATCH 20/26] refactor: do not store ids count in offsets --- frac/sealed/block_offsets.go | 20 ++++++++++++++++---- frac/sealed/seqids/loader.go | 1 - frac/sealed_loader.go | 21 +++++++++------------ indexwriter/index.go | 11 ++--------- 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/frac/sealed/block_offsets.go b/frac/sealed/block_offsets.go index 2be59942..d644a0f7 100644 --- a/frac/sealed/block_offsets.go +++ b/frac/sealed/block_offsets.go @@ -6,13 +6,17 @@ import ( ) type BlockOffsets struct { - IDsTotal uint32 // todo: the best place for this field is Info block - Offsets []uint64 + Offsets []uint64 } func (b *BlockOffsets) Pack(buf []byte) []byte { buf = binary.LittleEndian.AppendUint32(buf, uint32(len(b.Offsets))) - buf = binary.LittleEndian.AppendUint32(buf, b.IDsTotal) + + // NOTE(dkharms): Previously we stored here amount of documents ids. + // + // I've created a task which will require fraction binary version bumping + // to get rid of this: https://github.com/ozontech/seq-db/issues/409 + buf = binary.LittleEndian.AppendUint32(buf, 0) var prev uint64 for _, pos := range b.Offsets { @@ -26,13 +30,16 @@ func (b *BlockOffsets) Unpack(data []byte) error { if len(data) < 4 { return errors.New("blocks offset decoding error: truncated header (missing offsets count)") } + idsBlocksCount := binary.LittleEndian.Uint32(data) data = data[4:] if len(data) < 4 { return errors.New("blocks offset decoding error: truncated header (missing IDsTotal)") } - b.IDsTotal = binary.LittleEndian.Uint32(data) + + // NOTE(dkharms): Previously we stored here amount of documents ids. + _ = binary.LittleEndian.Uint32(data) data = data[4:] offset := uint64(0) @@ -42,15 +49,20 @@ func (b *BlockOffsets) Unpack(data []byte) error { if n == 0 { return errors.New("blocks offset decoding error: varint returned 0") } + if n < 0 { return errors.New("blocks offset decoding error: varint overflow") } + data = data[n:] offset += uint64(delta) + b.Offsets = append(b.Offsets, offset) } + if uint32(len(b.Offsets)) != idsBlocksCount { return errors.New("blocks offset decoding error: offset count mismatch") } + return nil } diff --git a/frac/sealed/seqids/loader.go b/frac/sealed/seqids/loader.go index a4c9ecdb..1f0c05de 100644 --- a/frac/sealed/seqids/loader.go +++ b/frac/sealed/seqids/loader.go @@ -13,7 +13,6 @@ import ( type Table struct { MinBlockIDs []seq.ID // from max to min - IDBlocksTotal uint32 IDsTotal uint32 StartBlockIndex uint32 } diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index 28b9ef9f..2b32865d 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -36,7 +36,7 @@ func (l *LegacyLoader) Load(blocksData *sealed.BlocksData, info *common.Info, re l.skipSection() // skip token table blocks var err error - blocksData.IDsTable, blocksData.BlocksOffsets, err = l.loadIDs(info.BinaryDataVer) + blocksData.IDsTable, blocksData.BlocksOffsets, err = l.loadIDs(info) if err != nil { logger.Fatal("legacy load ids error", zap.Error(err)) } @@ -77,7 +77,7 @@ func (l *LegacyLoader) skipSection() { } // loadIDs reads the BlockOffsets block and then scans MID/RID/Pos triplets. -func (l *LegacyLoader) loadIDs(fracVersion config.BinaryDataVersion) (seqids.Table, []uint64, error) { +func (l *LegacyLoader) loadIDs(info *common.Info) (seqids.Table, []uint64, error) { var buf []byte data, _, err := l.reader.ReadIndexBlock(l.blockIndex, buf) @@ -94,9 +94,8 @@ func (l *LegacyLoader) loadIDs(fracVersion config.BinaryDataVersion) (seqids.Tab l.blockIndex++ table := seqids.Table{ - StartBlockIndex: l.blockIndex, // absolute index of first MID block in .index - IDsTotal: offsets.IDsTotal, - IDBlocksTotal: uint32(len(offsets.Offsets)), + StartBlockIndex: l.blockIndex, // absolute index of first MID block in .index + IDsTotal: info.DocsTotal + 1, // Increment by one for [seq.SystemID] } for { @@ -111,7 +110,7 @@ func (l *LegacyLoader) loadIDs(fracVersion config.BinaryDataVersion) (seqids.Tab } mid := seq.MID(h.GetExt1()) - if fracVersion < config.BinaryDataV2 { + if info.BinaryDataVer < config.BinaryDataV2 { mid = seq.MillisToMID(h.GetExt1()) } @@ -185,10 +184,9 @@ func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, readers if err != nil { logger.Fatal("load offsets error", zap.Error(err)) } - blocksData.BlocksOffsets = blockOffsets.Offsets - blocksData.IDsTable = l.loadIDsTable(readers.ID, blockOffsets.IDsTotal, info.BinaryDataVer) + blocksData.IDsTable = l.loadIDsTable(readers.ID, info) blocksData.LIDsTable, err = l.loadLIDsTable(readers.LID) if err != nil { logger.Fatal("load lids error", zap.Error(err)) @@ -228,10 +226,10 @@ func (l *Loader) loadBlocksOffsets(r storage.IndexReader) (sealed.BlockOffsets, // loadIDsTable scans block headers in the .id file to build seqids.Table. // Blocks are stored as (MIDs, RIDs, Pos) triplets; we only need MIDs headers. -func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersion config.BinaryDataVersion) seqids.Table { +func (l *Loader) loadIDsTable(r storage.IndexReader, info *common.Info) seqids.Table { table := seqids.Table{ StartBlockIndex: 0, - IDsTotal: idsTotal, + IDsTotal: info.DocsTotal + 1, // Increment by one for [seq.SystemID] } for blockIdx := uint32(0); ; { @@ -244,7 +242,7 @@ func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersio } var mid seq.MID - if fracVersion < config.BinaryDataV2 { + if info.BinaryDataVer < config.BinaryDataV2 { mid = seq.MillisToMID(header.GetExt1()) } else { mid = seq.MID(header.GetExt1()) @@ -255,7 +253,6 @@ func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersio RID: seq.RID(header.GetExt2()), }) - table.IDBlocksTotal++ blockIdx += 3 // skip RIDs and Pos blocks } diff --git a/indexwriter/index.go b/indexwriter/index.go index 19ba5cac..d0a7c348 100644 --- a/indexwriter/index.go +++ b/indexwriter/index.go @@ -87,11 +87,7 @@ func (s *IndexWriter) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { } defer w.release() - offsets := sealed.BlockOffsets{ - IDsTotal: src.Info().DocsTotal + 1, - Offsets: src.BlockOffsets(), - } - + offsets := sealed.BlockOffsets{Offsets: src.BlockOffsets()} if err := w.writeBlock(btypeOffset, s.packBlocksOffsetsBlock(offsets)); err != nil { return err } @@ -247,6 +243,7 @@ func (s *IndexWriter) newIndexBlockZSTD(raw []byte, level int) indexBlock { // packInfoBlock packs fraction information into an index block. func (s *IndexWriter) packInfoBlock(block sealed.BlockInfo) indexBlock { + s.idsTable.IDsTotal = block.Info.DocsTotal + 1 // Increment by one for [seq.SystemID] s.buf1 = block.Pack(s.buf1[:0]) return newIndexBlock(s.buf1) // Info block is typically small, no compression } @@ -271,10 +268,6 @@ func (s *IndexWriter) packTokenTableBlock(tokenTableBlock token.TableBlock) inde // packBlocksOffsetsBlock packs document block offsets into a compressed index block. func (s *IndexWriter) packBlocksOffsetsBlock(block sealed.BlockOffsets) indexBlock { - // Update IDs table for PreloadedData - s.idsTable.IDsTotal = block.IDsTotal // Total number of IDs - s.idsTable.IDBlocksTotal = uint32(len(block.Offsets)) // Number of ID blocks - // Packing block s.buf1 = block.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.DocsPositionsZstdLevel) From 1fe41b5640e4354162ea3ecf5276c72ce8812f78 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Thu, 9 Apr 2026 13:58:25 +0300 Subject: [PATCH 21/26] feat: k-way fraction merge --- compaction/heap.go | 1 + compaction/merge.go | 131 ++++++++++++++++++ compaction/merge_source.go | 223 +++++++++++++++++++++++++++++++ compaction/merge_source_test.go | 230 ++++++++++++++++++++++++++++++++ consts/consts.go | 1 + frac/sealed_source.go | 160 ++++++++++++++++++++++ 6 files changed, 746 insertions(+) create mode 100644 compaction/heap.go create mode 100644 compaction/merge.go create mode 100644 compaction/merge_source.go create mode 100644 compaction/merge_source_test.go create mode 100644 frac/sealed_source.go diff --git a/compaction/heap.go b/compaction/heap.go new file mode 100644 index 00000000..d1d3cde1 --- /dev/null +++ b/compaction/heap.go @@ -0,0 +1 @@ +package compaction diff --git a/compaction/merge.go b/compaction/merge.go new file mode 100644 index 00000000..23d60002 --- /dev/null +++ b/compaction/merge.go @@ -0,0 +1,131 @@ +package compaction + +import ( + "errors" + "fmt" + "os" + + "github.com/ozontech/seq-db/consts" + "github.com/ozontech/seq-db/frac/common" + "github.com/ozontech/seq-db/indexwriter" +) + +func Merge(filename string, srcs ...Source) error { + mergeDocs(filename, srcs...) + + src := NewMergeSource(filename, srcs) + + // FIXME(dkharms): [common.SealParams] must be passed into [Merge] function. + writer := indexwriter.New(common.SealParams{ + IDsZstdLevel: 3, + LIDsZstdLevel: 3, + TokenListZstdLevel: 3, + DocsPositionsZstdLevel: 3, + TokenTableZstdLevel: 3, + DocBlocksZstdLevel: 3, + DocBlockSize: 3, + }) + + if err := createAndWrite( + filename+consts.OffsetsTmpFileSuffix, + filename+consts.OffsetsFileSuffix, + func(f *os.File) error { return writer.WriteOffsetsFile(f, src) }, + ); err != nil { + return err + } + + if err := createAndWrite( + filename+consts.IDTmpFileSuffix, + filename+consts.IDFileSuffix, + func(f *os.File) error { return writer.WriteIDFile(f, src) }, + ); err != nil { + return err + } + + if err := createAndWriteBoth( + filename+consts.TokenTmpFileSuffix, + filename+consts.TokenFileSuffix, + filename+consts.LIDTmpFileSuffix, + filename+consts.LIDFileSuffix, + func(tf, lf *os.File) error { return writer.WriteTokenTriplet(tf, lf, src) }, + ); err != nil { + return err + } + + if err := createAndWrite( + filename+consts.InfoTmpFileSuffix, + filename+consts.InfoFileSuffix, + func(f *os.File) error { return writer.WriteInfoFile(f, src) }, + ); err != nil { + return err + } + + return nil +} + +func syncAndClose(f *os.File) error { + if err := f.Sync(); err != nil { + f.Close() + return err + } + return f.Close() +} + +func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { + f, err := os.Create(tmpPath) + if err != nil { + return err + } + + if err := errors.Join(write(f), syncAndClose(f)); err != nil { + return err + } + + return os.Rename(tmpPath, finalPath) +} + +func createAndWriteBoth( + tmpPath1, finalPath1, + tmpPath2, finalPath2 string, + write func(*os.File, *os.File) error, +) error { + f1, err := os.Create(tmpPath1) + if err != nil { + return err + } + + f2, err := os.Create(tmpPath2) + if err != nil { + f1.Close() + return err + } + + writeErr := write(f1, f2) + if err := errors.Join(writeErr, syncAndClose(f1), syncAndClose(f2)); err != nil { + return err + } + + if err := os.Rename(tmpPath1, finalPath1); err != nil { + return err + } + + return os.Rename(tmpPath2, finalPath2) +} + +// FIXME(dkharms): Create buffered writer for file. +func mergeDocs(filename string, srcs ...Source) error { + return createAndWrite( + filename+consts.DocsTmpFileSuffix, + filename+consts.DocsFileSuffix, + func(f *os.File) error { + for _, src := range srcs { + for block := range src.DocBlock() { + if _, err := f.Write(block); err != nil { + return err + } + } + } + return nil + }, + ) +} diff --git a/compaction/merge_source.go b/compaction/merge_source.go new file mode 100644 index 00000000..663aac3a --- /dev/null +++ b/compaction/merge_source.go @@ -0,0 +1,223 @@ +package compaction + +import ( + "cmp" + "iter" + "slices" + "strings" + + "github.com/ozontech/seq-db/frac/common" + "github.com/ozontech/seq-db/indexwriter" + "github.com/ozontech/seq-db/seq" +) + +type Source interface { + indexwriter.Source + DocBlock() iter.Seq[[]byte] +} + +type MergeSource struct { + filename string + + // sources is a slice of [sealing.Source] + // which provide view into underlying fractions. + sources []Source + + // docblockcount is populated during [MergeSource.BlockOffsets] call. + // This slice is used for changing block indexes in [seq.DocPos]. + docblockcount []int + + // lidmapping describes the transformation of lids + // after k-merge of several fractions. + // + // i-th index of lidmapping correponds to i-th fraction. + // j-th index of i-th lidmapping corresponds to rename of i-th lid. + lidmapping [][]uint32 +} + +func NewMergeSource(filename string, sources []Source) *MergeSource { + lidmapping := make([][]uint32, len(sources)) + for i, src := range sources { + lidmapping[i] = make([]uint32, src.Info().DocsTotal+1) + } + return &MergeSource{sources: sources, lidmapping: lidmapping} +} + +// FIXME(dkharms): now this is just a placeholder. +// And info can be caculated after all merges. +func (s *MergeSource) Info() *common.Info { + var ( + docsOnDisk uint64 + indexOnDisk uint64 + ) + + for i := range s.sources { + docsOnDisk += s.sources[i].Info().DocsOnDisk + indexOnDisk += s.sources[i].Info().IndexOnDisk + } + + return common.NewInfo(s.filename, docsOnDisk, 0) +} + +func (s *MergeSource) BlockOffsets() []uint64 { + var ( + docsSize uint64 + offsets []uint64 + ) + + s.docblockcount = append(s.docblockcount, 0) + for i := 0; i < len(s.sources); i++ { + for _, offset := range s.sources[i].BlockOffsets() { + offsets = append(offsets, uint64(offset)+docsSize) + } + docsSize += s.sources[i].Info().DocsOnDisk + s.docblockcount = append(s.docblockcount, len(offsets)) + } + + return offsets +} + +func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { + // FIXME(dkharms): For now, I will use stupid-simple linear scan for k-way merge. + // + // Its time complexity O(k*n) so it's not efficient enough if we compare it + // against time complexity of min-heap (which is O(n*log(k))) + // or another great data structure -- tournament tree -- which is O(n * log(k)) as well. + // + // However, tournament tree performs less comparisons than min-heap + // and it is around log(k) vs 2*log(k). + + type entry struct { + id seq.ID + docpos seq.DocPos + + sourceIdx int + oldlid uint32 + } + + var ids []entry + for i := 0; i < len(s.sources); i++ { + var lid uint32 + for id, docpos := range s.sources[i].ID() { + // Skip system [seq.ID]. + if id == seq.SystemID { + lid += 1 + continue + } + + blockIdx, offset := docpos.Unpack() + docpos = seq.PackDocPos(uint32(s.docblockcount[i]+int(blockIdx)), offset) + ids = append(ids, entry{id, docpos, i, lid}) + + lid += 1 + } + } + + slices.SortFunc(ids, func(x, y entry) int { + if x.id.MID == y.id.MID { + return -cmp.Compare(x.id.RID, y.id.RID) + } + return -cmp.Compare(x.id.MID, y.id.MID) + }) + + for i, entry := range ids { + s.lidmapping[entry.sourceIdx][entry.oldlid] = uint32(i + 1) + } + + return func(yield func(seq.ID, seq.DocPos) bool) { + // Emit system id since we skipped all such ids previously. + if !yield(seq.SystemID, seq.SystemDocPos) { + return + } + + for _, v := range ids { + if !yield(v.id, v.docpos) { + return + } + } + } +} + +type key struct { + field string + token string +} + +type value struct { + idx int + lids []uint32 +} + +func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { + // TODO(dkharms): Use heap or other more efficient data structure. + // For now, I'll just dump everything into one array. + + values := make(map[key][]value) + for i := 0; i < len(s.sources); i++ { + for field, tokIter := range s.sources[i].TokenTriplet() { + for tok, lids := range tokIter { + k := key{field, string(tok)} + values[k] = append(values[k], value{i, slices.Clone(lids)}) + } + } + } + + var keys []key + for k := range values { + keys = append(keys, k) + } + + slices.SortFunc(keys, func(x, y key) int { + if x.field != y.field { + return strings.Compare(x.field, y.field) + } + return strings.Compare(x.token, y.token) + }) + + return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + var previous string + for _, k := range keys { + if k.field == previous { + continue + } + + if !yield(k.field, s.tokensForField(k.field, keys, values)) { + return + } + + previous = k.field + } + } +} + +func (s *MergeSource) tokensForField( + field string, keys []key, values map[key][]value, +) iter.Seq2[[]byte, []uint32] { + var filtered []key + for _, k := range keys { + if k.field == field { + filtered = append(filtered, k) + } + } + + return func(yield func([]byte, []uint32) bool) { + for _, k := range filtered { + var buf []uint32 + + for _, v := range values[k] { + for _, lid := range v.lids { + buf = append(buf, s.lidmapping[v.idx][lid]) + } + } + + slices.Sort(buf) + if !yield([]byte(k.token), buf) { + return + } + } + } +} + +func (s *MergeSource) LastError() error { + return nil +} diff --git a/compaction/merge_source_test.go b/compaction/merge_source_test.go new file mode 100644 index 00000000..c01b15a2 --- /dev/null +++ b/compaction/merge_source_test.go @@ -0,0 +1,230 @@ +package compaction + +import ( + "iter" + "slices" + "testing" + + "github.com/ozontech/seq-db/frac/common" + "github.com/ozontech/seq-db/seq" + "github.com/stretchr/testify/require" +) + +// mockSealingSource is a test implementation of sealing.Source. +// +// IDs must be provided in descending order (MID DESC, RID DESC); the mock +// automatically prepends the system ID when iterating, matching the contract +// expected by MergeSource.ID(). +// +// Fields maps field name → token value → list of 1-based LIDs. +// Fields and tokens are yielded in sorted order. +type mockSealingSource struct { + ids []seq.ID + pos []seq.DocPos + blocks []uint64 + // docsOnDisk is the total compressed size of the .docs file, + // used by MergeSource to adjust block offsets across sources. + docsOnDisk uint64 + // fields maps field → token → lids (1-based). + fields map[string]map[string][]uint32 +} + +func (m *mockSealingSource) Info() *common.Info { + return &common.Info{ + DocsTotal: uint32(len(m.ids)), + DocsOnDisk: m.docsOnDisk, + } +} + +func (m *mockSealingSource) BlockOffsets() []uint64 { + return m.blocks +} + +func (m *mockSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { + return func(yield func(seq.ID, seq.DocPos) bool) { + if !yield(seq.SystemID, seq.SystemDocPos) { + return + } + for i, id := range m.ids { + if !yield(id, m.pos[i]) { + return + } + } + } +} + +func (m *mockSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { + fieldNames := make([]string, 0, len(m.fields)) + for f := range m.fields { + fieldNames = append(fieldNames, f) + } + slices.Sort(fieldNames) + + return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + for _, field := range fieldNames { + tokens := make([]string, 0, len(m.fields[field])) + for t := range m.fields[field] { + tokens = append(tokens, t) + } + slices.Sort(tokens) + + if !yield(field, func(yield func([]byte, []uint32) bool) { + for _, tok := range tokens { + if !yield([]byte(tok), m.fields[field][tok]) { + return + } + } + }) { + return + } + } + } +} + +func (m *mockSealingSource) DocBlock() iter.Seq[[]byte] { + return func(yield func([]byte) bool) { + if !yield(nil) { + return + } + } +} + +func (m *mockSealingSource) LastError() error { + return nil +} + +func TestMergeSource(t *testing.T) { + first := &mockSealingSource{ + ids: []seq.ID{ + {MID: 3}, + {MID: 2}, + {MID: 1}, + }, + + pos: []seq.DocPos{ + seq.PackDocPos(0, 0), + seq.PackDocPos(0, 1024), + seq.PackDocPos(0, 2048), + }, + + fields: map[string]map[string][]uint32{ + "level": { + "error": {1, 3}, + "info": {2, 3}, + }, + }, + + blocks: []uint64{0}, + docsOnDisk: 1024, + } + + second := &mockSealingSource{ + ids: []seq.ID{ + {MID: 6}, + {MID: 5}, + }, + + pos: []seq.DocPos{ + seq.PackDocPos(0, 0), + seq.PackDocPos(0, 2048), + }, + + fields: map[string]map[string][]uint32{ + "level": { + "debug": {1}, + "info": {2}, + }, + }, + + blocks: []uint64{0}, + docsOnDisk: 2048, + } + + source := NewMergeSource("inmemory", []Source{first, second}) + + { + // Validate correctness of [storage.DocBlock] calculation. + offsets := source.BlockOffsets() + require.Equal(t, []uint64{0, 1024}, offsets) + } + + { + var ( + ids []seq.ID + docpos []seq.DocPos + ) + + for id, dp := range source.ID() { + ids = append(ids, id) + docpos = append(docpos, dp) + } + + require.Equal(t, + []seq.ID{ + seq.SystemID, + // seq.ID from the second source + {MID: 6}, + {MID: 5}, + // seq.ID from the first source + {MID: 3}, + {MID: 2}, + {MID: 1}, + }, + ids, + ) + + require.Equal(t, + []seq.DocPos{ + seq.SystemDocPos, + // seq.DocPos from the second source + seq.PackDocPos(1, 0), seq.PackDocPos(1, 2048), + // seq.DocPos from the first source + seq.PackDocPos(0, 0), seq.PackDocPos(0, 1024), seq.PackDocPos(0, 2048), + }, + docpos, + ) + } + + { + var ( + fields []string + tokens [][]byte + lids [][]uint32 + ) + + for field, fieldIt := range source.TokenTriplet() { + fields = append(fields, field) + + for token, lidsbuf := range fieldIt { + tokens = append(tokens, token) + lids = append(lids, slices.Clone(lidsbuf)) + } + } + + // Both sources have the same and the only field + require.Equal(t, []string{"level"}, fields) + + // Ensure tokens are sorted in ascending order + require.Equal(t, + [][]byte{[]byte("debug"), []byte("error"), []byte("info")}, + tokens, + ) + + // Ensure correctness of lids remapping + // ----------------- + // seq.MID 6 5 3 2 1 + // seq.LID 1 2 3 4 5 + // ----------------- + require.Equal(t, + [][]uint32{ + // Sequence of [seq.LID] for token `debug` + {1}, + // Sequence of [seq.LID] for token `error` + {3, 5}, + // Sequence of [seq.LID] for token `info` + {2, 4, 5}, + }, + lids, + ) + } +} diff --git a/consts/consts.go b/consts/consts.go index 8cc1ee75..4c481c4e 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -53,6 +53,7 @@ const ( WalFileSuffix = ".wal" DocsFileSuffix = ".docs" + DocsTmpFileSuffix = "._docs" DocsDelFileSuffix = ".docs.del" SdocsFileSuffix = ".sdocs" diff --git a/frac/sealed_source.go b/frac/sealed_source.go new file mode 100644 index 00000000..633855a4 --- /dev/null +++ b/frac/sealed_source.go @@ -0,0 +1,160 @@ +package frac + +import ( + "iter" + "slices" + + "github.com/ozontech/seq-db/frac/common" + "github.com/ozontech/seq-db/frac/sealed/lids" + "github.com/ozontech/seq-db/frac/sealed/seqids" + "github.com/ozontech/seq-db/frac/sealed/token" + "github.com/ozontech/seq-db/seq" + "github.com/ozontech/seq-db/storage" +) + +// SealedSource implements [indexwriter.Source] for a sealed fraction. +// Used as input to [compaction.MergeSource] when compacting multiple fractions. +type SealedSource struct { + f *Sealed + + idsProvider *seqids.Provider + lidsLoader *lids.Loader + + tokenBlockLoader *token.BlockLoader + tokenTableLoader *token.TableLoader + + lastErr error +} + +func NewSealedSource(f *Sealed) *SealedSource { + f.load() + return &SealedSource{ + f: f, + idsProvider: seqids.NewProvider( + &f.idReader, + f.indexCache.MIDs, + f.indexCache.RIDs, + f.indexCache.Params, + &f.blocksData.IDsTable, + f.info.BinaryDataVer, + ), + lidsLoader: lids.NewLoader(&f.lidReader, f.indexCache.LIDs), + tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.tokenReader, f.indexCache.Tokens), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.tokenReader, f.indexCache.TokenTable), + } +} + +func (s *SealedSource) Info() *common.Info { + return s.f.info +} + +func (s *SealedSource) BlockOffsets() []uint64 { + return s.f.blocksData.BlocksOffsets +} + +func (s *SealedSource) ID() iter.Seq2[seq.ID, seq.DocPos] { + return func(yield func(seq.ID, seq.DocPos) bool) { + for lid := uint32(0); lid < s.f.blocksData.IDsTable.IDsTotal; lid++ { + mid, err := s.idsProvider.MID(seq.LID(lid)) + if err != nil { + s.lastErr = err + return + } + + rid, err := s.idsProvider.RID(seq.LID(lid)) + if err != nil { + s.lastErr = err + return + } + + pos, err := s.idsProvider.DocPos(seq.LID(lid)) + if err != nil { + s.lastErr = err + return + } + + if !yield(seq.ID{MID: mid, RID: rid}, pos) { + return + } + } + } +} + +func (s *SealedSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { + tokenTable := s.tokenTableLoader.Load() + + fields := make([]string, 0, len(tokenTable)) + for field := range tokenTable { + fields = append(fields, field) + } + + slices.Sort(fields) + return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + for _, field := range fields { + if !yield(field, s.tokensForField(field)) { + return + } + } + } +} + +func (s *SealedSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] { + lidsTable := s.f.blocksData.LIDsTable + tokenTable := s.tokenTableLoader.Load() + + var lidsbuf []uint32 + return func(yield func([]byte, []uint32) bool) { + for _, entry := range tokenTable[field].Entries { + block := s.tokenBlockLoader.Load(entry.BlockIndex) + + for tid := entry.StartTID; tid < entry.StartTID+entry.ValCount; tid++ { + lidsbuf = lidsbuf[:0] + + tokenVal := block.GetToken(entry.GetIndexInTokensBlock(tid)) + firstBlock := lidsTable.GetFirstBlockIndexForTID(tid) + lastBlock := lidsTable.GetLastBlockIndexForTID(tid) + + for bi := firstBlock; bi <= lastBlock; bi++ { + lidBlock, err := s.lidsLoader.GetLIDsBlock(bi) + if err != nil { + s.lastErr = err + return + } + + chunkIdx := lidsTable.GetChunkIndex(bi, tid) + lidsbuf = append(lidsbuf, lidBlock.LIDs[lidBlock.Offsets[chunkIdx]:lidBlock.Offsets[chunkIdx+1]]...) + } + + if !yield(tokenVal, lidsbuf) { + return + } + } + } + } +} + +func (s *SealedSource) DocBlock() iter.Seq[[]byte] { + return func(yield func([]byte) bool) { + // We do not want to cache payload of DocBlock because + // it will just pollute cache and cause unnecessary evictions. + r := storage.NewDocBlocksReader(s.f.readLimiter, s.f.docsFile) + + for _, offset := range s.f.blocksData.BlocksOffsets { + // Read DocBlock payload (including its header) but do not decompress it. + // Caller of [SealedSource.DocBlock] will decide whether it requires decompressed data. + payload, _, err := r.ReadDocBlock(int64(offset)) + if err != nil { + s.lastErr = err + return + } + + if !yield(payload) { + return + } + } + } +} + +func (s *SealedSource) LastError() error { + return s.lastErr +} From 007c90f62d4264f3ab1591e9894b57ba23757bc4 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Fri, 10 Apr 2026 13:43:43 +0300 Subject: [PATCH 22/26] feat: calculate information correctly --- compaction/merge.go | 48 ++++++++++++++++++++++++++------- compaction/merge_source.go | 44 +++++++++++++++++++++--------- compaction/merge_source_test.go | 36 ++++++++++++++++++++----- frac/common/info.go | 8 +++--- 4 files changed, 106 insertions(+), 30 deletions(-) diff --git a/compaction/merge.go b/compaction/merge.go index 23d60002..600e929f 100644 --- a/compaction/merge.go +++ b/compaction/merge.go @@ -2,17 +2,15 @@ package compaction import ( "errors" - "fmt" "os" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac/common" + "github.com/ozontech/seq-db/frac/sealed" "github.com/ozontech/seq-db/indexwriter" ) -func Merge(filename string, srcs ...Source) error { - mergeDocs(filename, srcs...) - +func Merge(filename string, srcs ...Source) (*sealed.PreloadedData, error) { src := NewMergeSource(filename, srcs) // FIXME(dkharms): [common.SealParams] must be passed into [Merge] function. @@ -31,7 +29,7 @@ func Merge(filename string, srcs ...Source) error { filename+consts.OffsetsFileSuffix, func(f *os.File) error { return writer.WriteOffsetsFile(f, src) }, ); err != nil { - return err + return nil, err } if err := createAndWrite( @@ -39,7 +37,7 @@ func Merge(filename string, srcs ...Source) error { filename+consts.IDFileSuffix, func(f *os.File) error { return writer.WriteIDFile(f, src) }, ); err != nil { - return err + return nil, err } if err := createAndWriteBoth( @@ -49,7 +47,7 @@ func Merge(filename string, srcs ...Source) error { filename+consts.LIDFileSuffix, func(tf, lf *os.File) error { return writer.WriteTokenTriplet(tf, lf, src) }, ); err != nil { - return err + return nil, err } if err := createAndWrite( @@ -57,10 +55,42 @@ func Merge(filename string, srcs ...Source) error { filename+consts.InfoFileSuffix, func(f *os.File) error { return writer.WriteInfoFile(f, src) }, ); err != nil { - return err + return nil, err + } + + if err := mergeDocs(filename, srcs...); err != nil { + return nil, err + } + + info := src.Info() + info.IndexOnDisk = 0 + + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + st, err := os.Stat(info.Path + suffix) + if err != nil { + return nil, err + } + info.IndexOnDisk += uint64(st.Size()) + } + + lidsTable := writer.LIDsTable() + preloaded := &sealed.PreloadedData{ + Info: info, + TokenTable: writer.TokenTable(), + BlocksData: sealed.BlocksData{ + LIDsTable: &lidsTable, + IDsTable: writer.IDsTable(), + BlocksOffsets: src.BlockOffsets(), + }, } - return nil + return preloaded, nil } func syncAndClose(f *os.File) error { diff --git a/compaction/merge_source.go b/compaction/merge_source.go index 663aac3a..f8ed6796 100644 --- a/compaction/merge_source.go +++ b/compaction/merge_source.go @@ -3,6 +3,7 @@ package compaction import ( "cmp" "iter" + "math" "slices" "strings" @@ -18,6 +19,7 @@ type Source interface { type MergeSource struct { filename string + info *common.Info // sources is a slice of [sealing.Source] // which provide view into underlying fractions. @@ -33,6 +35,8 @@ type MergeSource struct { // i-th index of lidmapping correponds to i-th fraction. // j-th index of i-th lidmapping corresponds to rename of i-th lid. lidmapping [][]uint32 + + from, to seq.MID } func NewMergeSource(filename string, sources []Source) *MergeSource { @@ -40,23 +44,36 @@ func NewMergeSource(filename string, sources []Source) *MergeSource { for i, src := range sources { lidmapping[i] = make([]uint32, src.Info().DocsTotal+1) } - return &MergeSource{sources: sources, lidmapping: lidmapping} + + info := common.NewInfo(filename, 0, 0) + info.SealingTime = info.CreationTime + + return &MergeSource{ + info: info, + filename: filename, + + sources: sources, + lidmapping: lidmapping, + + from: math.MaxUint64, to: 0, + } } -// FIXME(dkharms): now this is just a placeholder. -// And info can be caculated after all merges. func (s *MergeSource) Info() *common.Info { - var ( - docsOnDisk uint64 - indexOnDisk uint64 - ) - for i := range s.sources { - docsOnDisk += s.sources[i].Info().DocsOnDisk - indexOnDisk += s.sources[i].Info().IndexOnDisk + sinfo := s.sources[i].Info() + + s.info.DocsRaw += sinfo.DocsRaw + s.info.DocsTotal += sinfo.DocsTotal + s.info.DocsOnDisk += sinfo.DocsOnDisk + + // NOTE(dkharms): [IndexOnDisk] is calculated later. } - return common.NewInfo(s.filename, docsOnDisk, 0) + s.info.From = s.from + s.info.To = s.to + + return s.info } func (s *MergeSource) BlockOffsets() []uint64 { @@ -82,7 +99,7 @@ func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { // // Its time complexity O(k*n) so it's not efficient enough if we compare it // against time complexity of min-heap (which is O(n*log(k))) - // or another great data structure -- tournament tree -- which is O(n * log(k)) as well. + // or another great data structure -- tournament tree -- which is O(n*log(k)) as well. // // However, tournament tree performs less comparisons than min-heap // and it is around log(k) vs 2*log(k). @@ -110,6 +127,9 @@ func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { ids = append(ids, entry{id, docpos, i, lid}) lid += 1 + + s.from = min(s.from, id.MID) + s.to = max(s.to, id.MID) } } diff --git a/compaction/merge_source_test.go b/compaction/merge_source_test.go index c01b15a2..df471c8f 100644 --- a/compaction/merge_source_test.go +++ b/compaction/merge_source_test.go @@ -1,6 +1,7 @@ package compaction import ( + "cmp" "iter" "slices" "testing" @@ -31,8 +32,17 @@ type mockSealingSource struct { func (m *mockSealingSource) Info() *common.Info { return &common.Info{ + DocsRaw: m.docsOnDisk, DocsTotal: uint32(len(m.ids)), DocsOnDisk: m.docsOnDisk, + + From: slices.MinFunc(m.ids, func(x, y seq.ID) int { + return cmp.Compare(x.MID, y.MID) + }).MID, + + To: slices.MaxFunc(m.ids, func(x, y seq.ID) int { + return cmp.Compare(x.MID, y.MID) + }).MID, } } @@ -142,13 +152,13 @@ func TestMergeSource(t *testing.T) { source := NewMergeSource("inmemory", []Source{first, second}) - { + t.Run("offsets", func(t *testing.T) { // Validate correctness of [storage.DocBlock] calculation. offsets := source.BlockOffsets() require.Equal(t, []uint64{0, 1024}, offsets) - } + }) - { + t.Run("ids", func(t *testing.T) { var ( ids []seq.ID docpos []seq.DocPos @@ -183,9 +193,9 @@ func TestMergeSource(t *testing.T) { }, docpos, ) - } + }) - { + t.Run("tokens-lids", func(t *testing.T) { var ( fields []string tokens [][]byte @@ -226,5 +236,19 @@ func TestMergeSource(t *testing.T) { }, lids, ) - } + }) + + t.Run("info", func(t *testing.T) { + merged := source.Info() + finfo, sinfo := first.Info(), second.Info() + + // Validate correctness of fraction time-range. + require.Equal(t, merged.From, min(finfo.From, sinfo.From)) + require.Equal(t, merged.To, max(finfo.To, sinfo.To)) + + // Validate correctness of total documents of merged fractions. + require.Equal(t, merged.DocsTotal, finfo.DocsTotal+sinfo.DocsTotal) + require.Equal(t, merged.DocsOnDisk, finfo.DocsOnDisk+sinfo.DocsOnDisk) + require.Equal(t, merged.DocsRaw, finfo.DocsRaw+sinfo.DocsRaw) + }) } diff --git a/frac/common/info.go b/frac/common/info.go index 69121408..20e7f7c2 100644 --- a/frac/common/info.go +++ b/frac/common/info.go @@ -15,9 +15,11 @@ import ( "github.com/ozontech/seq-db/seq" ) -const DistributionMaxInterval = 24 * time.Hour -const DistributionBucket = time.Minute -const DistributionSpreadThreshold = 10 * time.Minute +const ( + DistributionMaxInterval = 24 * time.Hour + DistributionBucket = time.Minute + DistributionSpreadThreshold = 10 * time.Minute +) type Info struct { Path string `json:"name"` From d551a39561c897b0a2b42df6975cda17fdac30ea Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Mon, 13 Apr 2026 17:45:47 +0300 Subject: [PATCH 23/26] feat: use linear scan for k-way merge --- compaction/heap.go | 1 - compaction/merge.go | 36 ++-- compaction/merge_source.go | 331 +++++++++++++++++++++++--------- compaction/merge_source_test.go | 114 +++++++++-- frac/sealed_source.go | 8 +- seq/seq.go | 10 +- 6 files changed, 369 insertions(+), 131 deletions(-) delete mode 100644 compaction/heap.go diff --git a/compaction/heap.go b/compaction/heap.go deleted file mode 100644 index d1d3cde1..00000000 --- a/compaction/heap.go +++ /dev/null @@ -1 +0,0 @@ -package compaction diff --git a/compaction/merge.go b/compaction/merge.go index 600e929f..1ff9b5a6 100644 --- a/compaction/merge.go +++ b/compaction/merge.go @@ -4,26 +4,21 @@ import ( "errors" "os" + "github.com/alecthomas/units" + "go.uber.org/zap" + + "github.com/ozontech/seq-db/bytespool" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed" "github.com/ozontech/seq-db/indexwriter" + "github.com/ozontech/seq-db/logger" ) -func Merge(filename string, srcs ...Source) (*sealed.PreloadedData, error) { +func Merge(filename string, params common.SealParams, srcs ...Source) (*sealed.PreloadedData, error) { + writer := indexwriter.New(params) src := NewMergeSource(filename, srcs) - // FIXME(dkharms): [common.SealParams] must be passed into [Merge] function. - writer := indexwriter.New(common.SealParams{ - IDsZstdLevel: 3, - LIDsZstdLevel: 3, - TokenListZstdLevel: 3, - DocsPositionsZstdLevel: 3, - TokenTableZstdLevel: 3, - DocBlocksZstdLevel: 3, - DocBlockSize: 3, - }) - if err := createAndWrite( filename+consts.OffsetsTmpFileSuffix, filename+consts.OffsetsFileSuffix, @@ -142,19 +137,32 @@ func createAndWriteBoth( return os.Rename(tmpPath2, finalPath2) } -// FIXME(dkharms): Create buffered writer for file. func mergeDocs(filename string, srcs ...Source) error { return createAndWrite( filename+consts.DocsTmpFileSuffix, filename+consts.DocsFileSuffix, func(f *os.File) error { + w := bytespool.AcquireWriterSize(f, int(units.MiB)) + + defer func() { + if err := w.Flush(); err != nil { + logger.Error( + "cannot flush compacted .docs file", + zap.Error(err), + zap.String("fraction", filename), + ) + } + bytespool.ReleaseWriter(w) + }() + for _, src := range srcs { for block := range src.DocBlock() { - if _, err := f.Write(block); err != nil { + if _, err := w.Write(block); err != nil { return err } } } + return nil }, ) diff --git a/compaction/merge_source.go b/compaction/merge_source.go index f8ed6796..19768c93 100644 --- a/compaction/merge_source.go +++ b/compaction/merge_source.go @@ -1,11 +1,10 @@ package compaction import ( - "cmp" + "bytes" "iter" "math" "slices" - "strings" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/indexwriter" @@ -25,22 +24,23 @@ type MergeSource struct { // which provide view into underlying fractions. sources []Source - // docblockcount is populated during [MergeSource.BlockOffsets] call. + // docBlockCount is populated during [MergeSource.BlockOffsets] call. // This slice is used for changing block indexes in [seq.DocPos]. - docblockcount []int + docBlockCount []int - // lidmapping describes the transformation of lids + // lidMapping describes the transformation of lids // after k-merge of several fractions. // - // i-th index of lidmapping correponds to i-th fraction. - // j-th index of i-th lidmapping corresponds to rename of i-th lid. - lidmapping [][]uint32 + // i-th index of [lidMapping] correponds to i-th fraction. + // j-th index of i-th [lidMapping] corresponds to rename of j-th lid. + lidMapping [][]uint32 from, to seq.MID } func NewMergeSource(filename string, sources []Source) *MergeSource { lidmapping := make([][]uint32, len(sources)) + for i, src := range sources { lidmapping[i] = make([]uint32, src.Info().DocsTotal+1) } @@ -53,7 +53,7 @@ func NewMergeSource(filename string, sources []Source) *MergeSource { filename: filename, sources: sources, - lidmapping: lidmapping, + lidMapping: lidmapping, from: math.MaxUint64, to: 0, } @@ -82,20 +82,21 @@ func (s *MergeSource) BlockOffsets() []uint64 { offsets []uint64 ) - s.docblockcount = append(s.docblockcount, 0) + // Initially s.docBlockCount + s.docBlockCount = append(s.docBlockCount, 0) for i := 0; i < len(s.sources); i++ { for _, offset := range s.sources[i].BlockOffsets() { offsets = append(offsets, uint64(offset)+docsSize) } docsSize += s.sources[i].Info().DocsOnDisk - s.docblockcount = append(s.docblockcount, len(offsets)) + s.docBlockCount = append(s.docBlockCount, len(offsets)) } return offsets } func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { - // FIXME(dkharms): For now, I will use stupid-simple linear scan for k-way merge. + // TODO(dkharms): For now, I will use stupid-simple linear scan for k-way merge. // // Its time complexity O(k*n) so it's not efficient enough if we compare it // against time complexity of min-heap (which is O(n*log(k))) @@ -104,136 +105,284 @@ func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { // However, tournament tree performs less comparisons than min-heap // and it is around log(k) vs 2*log(k). - type entry struct { + type cursor struct { + next func() (seq.ID, seq.DocPos, bool) + stop func() + id seq.ID - docpos seq.DocPos + docPos seq.DocPos + lidOld uint32 - sourceIdx int - oldlid uint32 + ok bool } - var ids []entry - for i := 0; i < len(s.sources); i++ { - var lid uint32 - for id, docpos := range s.sources[i].ID() { - // Skip system [seq.ID]. - if id == seq.SystemID { - lid += 1 - continue - } - - blockIdx, offset := docpos.Unpack() - docpos = seq.PackDocPos(uint32(s.docblockcount[i]+int(blockIdx)), offset) - ids = append(ids, entry{id, docpos, i, lid}) + return func(yield func(seq.ID, seq.DocPos) bool) { + var cursors []cursor - lid += 1 + for i := range s.sources { + src := s.sources[i] + next, stop := iter.Pull2(src.ID()) - s.from = min(s.from, id.MID) - s.to = max(s.to, id.MID) - } - } + // Skip [seq.SystemID] and [seq.SystemDocPos]. + _, _, _ = next() - slices.SortFunc(ids, func(x, y entry) int { - if x.id.MID == y.id.MID { - return -cmp.Compare(x.id.RID, y.id.RID) + id, docpos, ok := next() + cursors = append(cursors, cursor{ + next: next, stop: stop, + id: id, docPos: docpos, lidOld: 1, + ok: ok, + }) } - return -cmp.Compare(x.id.MID, y.id.MID) - }) - for i, entry := range ids { - s.lidmapping[entry.sourceIdx][entry.oldlid] = uint32(i + 1) - } + defer func() { + for _, c := range cursors { + c.stop() + } + }() - return func(yield func(seq.ID, seq.DocPos) bool) { - // Emit system id since we skipped all such ids previously. + lid := uint32(1) + // We've previosly dropped [seq.SystemID] from + // iterators however we do have to emit one such id. if !yield(seq.SystemID, seq.SystemDocPos) { return } - for _, v := range ids { - if !yield(v.id, v.docpos) { + for { + var ( + id seq.ID = seq.MinID + idx int = -1 + ) + + for i, c := range cursors { + // We exhausted i-th cursor so there is nothing pull. + if !c.ok { + continue + } + + if seq.Less(id, c.id) { + id = c.id + idx = i + } + } + + // All pull-iterators are exhausted. + // Close all iterators and return. + if idx == -1 { + break + } + + c := cursors[idx] + minid, mindocpos, oldlid := c.id, c.docPos, c.lidOld + + blockIdx, offset := mindocpos.Unpack() + mindocpos = seq.PackDocPos(uint32(s.docBlockCount[idx]+int(blockIdx)), offset) + + if !yield(minid, mindocpos) { return } + + // Rename lid from picked cursor to the new value. + s.lidMapping[idx][oldlid] = lid + + c.id, c.docPos, c.ok = c.next() + c.lidOld += 1 + + s.from = min(s.from, minid.MID) + s.to = max(s.to, minid.MID) + + lid += 1 + cursors[idx] = c } } } -type key struct { - field string - token string -} +func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { + // TODO(dkharms): For now, I will use stupid-simple linear scan for k-way merge. + // + // Its time complexity O(k*n) so it's not efficient enough if we compare it + // against time complexity of min-heap (which is O(n*log(k))) + // or another great data structure -- tournament tree -- which is O(n*log(k)) as well. + // + // However, tournament tree performs less comparisons than min-heap + // and it is around log(k) vs 2*log(k). -type value struct { - idx int - lids []uint32 -} + type cursor struct { + next func() (string, iter.Seq2[[]byte, []uint32], bool) + stop func() -func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { - // TODO(dkharms): Use heap or other more efficient data structure. - // For now, I'll just dump everything into one array. + field string + tokIt iter.Seq2[[]byte, []uint32] - values := make(map[key][]value) - for i := 0; i < len(s.sources); i++ { - for field, tokIter := range s.sources[i].TokenTriplet() { - for tok, lids := range tokIter { - k := key{field, string(tok)} - values[k] = append(values[k], value{i, slices.Clone(lids)}) + ok bool + } + + minimal := func(cursors []cursor) (string, bool) { + var ( + set bool + field string + ) + + for _, c := range cursors { + if !c.ok { + continue + } + + if !set { + field = c.field + set = true + continue } + + field = min(field, c.field) } - } - var keys []key - for k := range values { - keys = append(keys, k) + return field, set } - slices.SortFunc(keys, func(x, y key) int { - if x.field != y.field { - return strings.Compare(x.field, y.field) + return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + var cursors []cursor + + for i := range s.sources { + src := s.sources[i] + + next, stop := iter.Pull2(src.TokenTriplet()) + field, tokIt, has := next() + + cursors = append(cursors, cursor{ + next: next, stop: stop, + field: field, tokIt: tokIt, + ok: has, + }) } - return strings.Compare(x.token, y.token) - }) - return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { - var previous string - for _, k := range keys { - if k.field == previous { - continue + defer func() { + for _, c := range cursors { + c.stop() + } + }() + + for { + field, ok := minimal(cursors) + if !ok { + break } - if !yield(k.field, s.tokensForField(k.field, keys, values)) { + var ( + idxs []int + iters []iter.Seq2[[]byte, []uint32] + ) + + for i, c := range cursors { + if !c.ok || c.field != field { + continue + } + + idxs = append(idxs, i) + iters = append(iters, c.tokIt) + } + + if !yield(field, s.tokensForField(idxs, iters)) { return } - previous = k.field + // Advance all cursors that were on this field. + for _, idx := range idxs { + c := cursors[idx] + c.field, c.tokIt, c.ok = c.next() + cursors[idx] = c + } } } } func (s *MergeSource) tokensForField( - field string, keys []key, values map[key][]value, + idxs []int, iters []iter.Seq2[[]byte, []uint32], ) iter.Seq2[[]byte, []uint32] { - var filtered []key - for _, k := range keys { - if k.field == field { - filtered = append(filtered, k) + type cursor struct { + next func() ([]byte, []uint32, bool) + stop func() + + idx int + token []byte + lids []uint32 + + ok bool + } + + minimal := func(cursors []cursor) ([]byte, bool) { + var ( + set bool + token []byte + ) + + for _, c := range cursors { + if !c.ok { + continue + } + + if !set { + token = c.token + set = true + continue + } + + if bytes.Compare(c.token, token) < 0 { + token = c.token + } } + + return token, set } + // NB: This buffer will be reused across + // all calls within current field. + var lidRenamed []uint32 + return func(yield func([]byte, []uint32) bool) { - for _, k := range filtered { - var buf []uint32 + var cursors []cursor + + for i := range iters { + next, stop := iter.Pull2(iters[i]) + token, lids, ok := next() + cursors = append(cursors, cursor{ + next: next, stop: stop, + idx: idxs[i], token: token, lids: lids, + ok: ok, + }) + } + + defer func() { + for _, c := range cursors { + c.stop() + } + }() + + for { + token, ok := minimal(cursors) + if !ok { + break + } + + // Collect and remap lids from all cursors at this token, then advance them. + for i, c := range cursors { + if !c.ok || !bytes.Equal(c.token, token) { + continue + } - for _, v := range values[k] { - for _, lid := range v.lids { - buf = append(buf, s.lidmapping[v.idx][lid]) + for _, lid := range c.lids { + lidRenamed = append(lidRenamed, s.lidMapping[c.idx][lid]) } + + c.token, c.lids, c.ok = c.next() + cursors[i] = c } - slices.Sort(buf) - if !yield([]byte(k.token), buf) { + slices.Sort(lidRenamed) + if !yield(token, lidRenamed) { return } + + lidRenamed = lidRenamed[:0] } } } diff --git a/compaction/merge_source_test.go b/compaction/merge_source_test.go index df471c8f..5a045503 100644 --- a/compaction/merge_source_test.go +++ b/compaction/merge_source_test.go @@ -2,32 +2,24 @@ package compaction import ( "cmp" + "fmt" "iter" + "math/rand" "slices" "testing" + "github.com/stretchr/testify/require" + "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/seq" - "github.com/stretchr/testify/require" ) -// mockSealingSource is a test implementation of sealing.Source. -// -// IDs must be provided in descending order (MID DESC, RID DESC); the mock -// automatically prepends the system ID when iterating, matching the contract -// expected by MergeSource.ID(). -// -// Fields maps field name → token value → list of 1-based LIDs. -// Fields and tokens are yielded in sorted order. type mockSealingSource struct { - ids []seq.ID - pos []seq.DocPos - blocks []uint64 - // docsOnDisk is the total compressed size of the .docs file, - // used by MergeSource to adjust block offsets across sources. + ids []seq.ID + pos []seq.DocPos + blocks []uint64 docsOnDisk uint64 - // fields maps field → token → lids (1-based). - fields map[string]map[string][]uint32 + fields map[string]map[string][]uint32 } func (m *mockSealingSource) Info() *common.Info { @@ -222,8 +214,9 @@ func TestMergeSource(t *testing.T) { // Ensure correctness of lids remapping // ----------------- - // seq.MID 6 5 3 2 1 - // seq.LID 1 2 3 4 5 + // seq.MID 6 5 | 3 2 1 + // seq.LID (old) 1 2 | 1 2 3 + // seq.LID (new) 1 2 | 3 4 5 // ----------------- require.Equal(t, [][]uint32{ @@ -252,3 +245,88 @@ func TestMergeSource(t *testing.T) { require.Equal(t, merged.DocsRaw, finfo.DocsRaw+sinfo.DocsRaw) }) } + +func BenchmarkMergeSource(b *testing.B) { + const ( + numSources = 4 + docsPerSource = 512_000 + + // Total pairs of (field, token) will be + // [numFields] * [numTokens]. + numFields = 512 + numTokens = 16384 + ) + + rng := rand.New(rand.NewSource(42)) + + fieldNames := make([]string, numFields) + for i := range fieldNames { + fieldNames[i] = fmt.Sprintf("field-%d", i) + } + + tokenNames := make([]string, numTokens) + for i := range tokenNames { + tokenNames[i] = fmt.Sprintf("token-%d", i) + } + + makeSource := func(midOffset seq.MID) Source { + ids := make([]seq.ID, docsPerSource) + pos := make([]seq.DocPos, docsPerSource) + + for j := range ids { + // IDs must be in descending MID order within each source. + ids[j] = seq.ID{MID: midOffset + seq.MID(docsPerSource-j)} + pos[j] = seq.PackDocPos(0, uint64(j*64)) + } + + // Assign each lid to a random (field, token) pair from the vocabulary + // so that total lids per source equals [docsPerSource]. + fields := make(map[string]map[string][]uint32) + for lid := uint32(1); lid <= uint32(docsPerSource); lid++ { + field := fieldNames[rng.Intn(numFields)] + token := tokenNames[rng.Intn(numTokens)] + + if fields[field] == nil { + fields[field] = make(map[string][]uint32) + } + + fields[field][token] = append(fields[field][token], lid) + } + + for _, tokens := range fields { + for tok, lids := range tokens { + slices.Sort(lids) + tokens[tok] = lids + } + } + + return &mockSealingSource{ + ids: ids, + pos: pos, + blocks: []uint64{0}, + docsOnDisk: docsPerSource * 64, + fields: fields, + } + } + + sources := make([]Source, numSources) + for i := range sources { + sources[i] = makeSource(seq.MID(i * docsPerSource)) + } + + b.ResetTimer() + b.ReportAllocs() + + for b.Loop() { + ms := NewMergeSource("bench", sources) + + ms.BlockOffsets() + for range ms.ID() { + } + + for _, tokIt := range ms.TokenTriplet() { + for range tokIt { + } + } + } +} diff --git a/frac/sealed_source.go b/frac/sealed_source.go index 633855a4..29a3d31b 100644 --- a/frac/sealed_source.go +++ b/frac/sealed_source.go @@ -102,13 +102,13 @@ func (s *SealedSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] lidsTable := s.f.blocksData.LIDsTable tokenTable := s.tokenTableLoader.Load() - var lidsbuf []uint32 + var lidsBuf []uint32 return func(yield func([]byte, []uint32) bool) { for _, entry := range tokenTable[field].Entries { block := s.tokenBlockLoader.Load(entry.BlockIndex) for tid := entry.StartTID; tid < entry.StartTID+entry.ValCount; tid++ { - lidsbuf = lidsbuf[:0] + lidsBuf = lidsBuf[:0] tokenVal := block.GetToken(entry.GetIndexInTokensBlock(tid)) firstBlock := lidsTable.GetFirstBlockIndexForTID(tid) @@ -122,10 +122,10 @@ func (s *SealedSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] } chunkIdx := lidsTable.GetChunkIndex(bi, tid) - lidsbuf = append(lidsbuf, lidBlock.LIDs[lidBlock.Offsets[chunkIdx]:lidBlock.Offsets[chunkIdx+1]]...) + lidsBuf = append(lidsBuf, lidBlock.LIDs[lidBlock.Offsets[chunkIdx]:lidBlock.Offsets[chunkIdx+1]]...) } - if !yield(tokenVal, lidsbuf) { + if !yield(tokenVal, lidsBuf) { return } } diff --git a/seq/seq.go b/seq/seq.go index adae4265..d3557a16 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -11,9 +11,13 @@ import ( ) var ( - SystemMID MID = math.MaxUint64 - SystemRID RID = math.MaxUint64 - SystemID ID = ID{SystemMID, SystemRID} + SystemMID MID = math.MaxUint64 + SystemRID RID = math.MaxUint64 + + SystemID ID = ID{SystemMID, SystemRID} + MinID ID = ID{0, 0} + MaxID ID = SystemID + SystemDocPos DocPos = DocPos(0) ) From 0f4492dd31ae90471c4088b07e292a1ecfd9198a Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Mon, 27 Apr 2026 14:14:42 +0300 Subject: [PATCH 24/26] fix: calculate offsets and info once --- compaction/merge.go | 90 +++++++-------- compaction/merge_source.go | 191 +++++++++++++++++++------------- compaction/merge_source_test.go | 36 +++--- frac/sealed_source.go | 45 ++++---- 4 files changed, 198 insertions(+), 164 deletions(-) diff --git a/compaction/merge.go b/compaction/merge.go index 1ff9b5a6..b21cd9c3 100644 --- a/compaction/merge.go +++ b/compaction/merge.go @@ -4,15 +4,10 @@ import ( "errors" "os" - "github.com/alecthomas/units" - "go.uber.org/zap" - - "github.com/ozontech/seq-db/bytespool" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed" "github.com/ozontech/seq-db/indexwriter" - "github.com/ozontech/seq-db/logger" ) func Merge(filename string, params common.SealParams, srcs ...Source) (*sealed.PreloadedData, error) { @@ -88,6 +83,33 @@ func Merge(filename string, params common.SealParams, srcs ...Source) (*sealed.P return preloaded, nil } +func mergeDocs(filename string, srcs ...Source) error { + return createAndWrite( + filename+consts.DocsTmpFileSuffix, + filename+consts.DocsFileSuffix, + func(f *os.File) error { + var docsSize uint64 + + for _, src := range srcs { + for loc, err := range src.DocBlock() { + if err != nil { + return err + } + + payload, offset := loc.First, loc.Second + if _, err := f.WriteAt(payload, int64(offset+docsSize)); err != nil { + return err + } + } + + docsSize += src.Info().DocsOnDisk + } + + return nil + }, + ) +} + func syncAndClose(f *os.File) error { if err := f.Sync(); err != nil { f.Close() @@ -96,8 +118,11 @@ func syncAndClose(f *os.File) error { return f.Close() } -func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { - f, err := os.Create(tmpPath) +func createAndWrite( + tmp, final string, + write func(*os.File) error, +) error { + f, err := os.Create(tmp) if err != nil { return err } @@ -106,64 +131,33 @@ func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error return err } - return os.Rename(tmpPath, finalPath) + return os.Rename(tmp, final) } func createAndWriteBoth( - tmpPath1, finalPath1, - tmpPath2, finalPath2 string, + atmp, afinal, + btmp, bfinal string, write func(*os.File, *os.File) error, ) error { - f1, err := os.Create(tmpPath1) + a, err := os.Create(atmp) if err != nil { return err } - f2, err := os.Create(tmpPath2) + b, err := os.Create(btmp) if err != nil { - f1.Close() + a.Close() return err } - writeErr := write(f1, f2) - if err := errors.Join(writeErr, syncAndClose(f1), syncAndClose(f2)); err != nil { + writeErr := write(a, b) + if err := errors.Join(writeErr, syncAndClose(a), syncAndClose(b)); err != nil { return err } - if err := os.Rename(tmpPath1, finalPath1); err != nil { + if err := os.Rename(atmp, afinal); err != nil { return err } - return os.Rename(tmpPath2, finalPath2) -} - -func mergeDocs(filename string, srcs ...Source) error { - return createAndWrite( - filename+consts.DocsTmpFileSuffix, - filename+consts.DocsFileSuffix, - func(f *os.File) error { - w := bytespool.AcquireWriterSize(f, int(units.MiB)) - - defer func() { - if err := w.Flush(); err != nil { - logger.Error( - "cannot flush compacted .docs file", - zap.Error(err), - zap.String("fraction", filename), - ) - } - bytespool.ReleaseWriter(w) - }() - - for _, src := range srcs { - for block := range src.DocBlock() { - if _, err := w.Write(block); err != nil { - return err - } - } - } - - return nil - }, - ) + return os.Rename(btmp, bfinal) } diff --git a/compaction/merge_source.go b/compaction/merge_source.go index 19768c93..d9f16428 100644 --- a/compaction/merge_source.go +++ b/compaction/merge_source.go @@ -5,25 +5,39 @@ import ( "iter" "math" "slices" + "sync" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/indexwriter" "github.com/ozontech/seq-db/seq" + "github.com/ozontech/seq-db/util" +) + +type ( + Document = util.Pair[seq.ID, []byte] + DocBlockLocation = util.Pair[[]byte, uint64] + TokenPosting = util.Pair[[]byte, []uint32] + DocLocation = util.Pair[seq.ID, seq.DocPos] + IndexedDocBlock = util.Pair[[]byte, []seq.DocPos] ) type Source interface { indexwriter.Source - DocBlock() iter.Seq[[]byte] + DocBlock() iter.Seq2[DocBlockLocation, error] } type MergeSource struct { filename string + info *common.Info + infoOnce sync.Once // sources is a slice of [sealing.Source] // which provide view into underlying fractions. sources []Source + offsets []uint64 + offsetsOnce sync.Once // docBlockCount is populated during [MergeSource.BlockOffsets] call. // This slice is used for changing block indexes in [seq.DocPos]. docBlockCount []int @@ -60,42 +74,47 @@ func NewMergeSource(filename string, sources []Source) *MergeSource { } func (s *MergeSource) Info() *common.Info { - for i := range s.sources { - sinfo := s.sources[i].Info() + s.infoOnce.Do(func() { + for i := range s.sources { + sinfo := s.sources[i].Info() - s.info.DocsRaw += sinfo.DocsRaw - s.info.DocsTotal += sinfo.DocsTotal - s.info.DocsOnDisk += sinfo.DocsOnDisk + s.info.DocsRaw += sinfo.DocsRaw + s.info.DocsTotal += sinfo.DocsTotal + s.info.DocsOnDisk += sinfo.DocsOnDisk - // NOTE(dkharms): [IndexOnDisk] is calculated later. - } + // NOTE(dkharms): [IndexOnDisk] is calculated later. + } - s.info.From = s.from - s.info.To = s.to + s.info.From = s.from + s.info.To = s.to + }) return s.info } func (s *MergeSource) BlockOffsets() []uint64 { - var ( - docsSize uint64 - offsets []uint64 - ) - - // Initially s.docBlockCount - s.docBlockCount = append(s.docBlockCount, 0) - for i := 0; i < len(s.sources); i++ { - for _, offset := range s.sources[i].BlockOffsets() { - offsets = append(offsets, uint64(offset)+docsSize) + s.offsetsOnce.Do(func() { + var ( + docsSize uint64 + offsets []uint64 + ) + + s.docBlockCount = append(s.docBlockCount, 0) + for i := 0; i < len(s.sources); i++ { + for _, offset := range s.sources[i].BlockOffsets() { + offsets = append(offsets, uint64(offset)+docsSize) + } + docsSize += s.sources[i].Info().DocsOnDisk + s.docBlockCount = append(s.docBlockCount, len(offsets)) } - docsSize += s.sources[i].Info().DocsOnDisk - s.docBlockCount = append(s.docBlockCount, len(offsets)) - } - return offsets + s.offsets = offsets + }) + + return s.offsets } -func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { +func (s *MergeSource) ID() iter.Seq2[DocLocation, error] { // TODO(dkharms): For now, I will use stupid-simple linear scan for k-way merge. // // Its time complexity O(k*n) so it's not efficient enough if we compare it @@ -106,19 +125,24 @@ func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { // and it is around log(k) vs 2*log(k). type cursor struct { - next func() (seq.ID, seq.DocPos, bool) + next func() (DocLocation, error, bool) stop func() - id seq.ID - docPos seq.DocPos + loc DocLocation lidOld uint32 ok bool } - return func(yield func(seq.ID, seq.DocPos) bool) { + return func(yield func(DocLocation, error) bool) { var cursors []cursor + defer func() { + for _, c := range cursors { + c.stop() + } + }() + for i := range s.sources { src := s.sources[i] next, stop := iter.Pull2(src.ID()) @@ -126,24 +150,23 @@ func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { // Skip [seq.SystemID] and [seq.SystemDocPos]. _, _, _ = next() - id, docpos, ok := next() + loc, err, ok := next() cursors = append(cursors, cursor{ next: next, stop: stop, - id: id, docPos: docpos, lidOld: 1, - ok: ok, + loc: loc, lidOld: 1, + ok: ok && err == nil, }) - } - defer func() { - for _, c := range cursors { - c.stop() + if err != nil { + yield(DocLocation{}, err) + return } - }() + } lid := uint32(1) // We've previosly dropped [seq.SystemID] from // iterators however we do have to emit one such id. - if !yield(seq.SystemID, seq.SystemDocPos) { + if !yield(DocLocation{First: seq.SystemID, Second: seq.SystemDocPos}, nil) { return } @@ -159,8 +182,8 @@ func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { continue } - if seq.Less(id, c.id) { - id = c.id + if seq.Less(id, c.loc.First) { + id = c.loc.First idx = i } } @@ -172,21 +195,28 @@ func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { } c := cursors[idx] - minid, mindocpos, oldlid := c.id, c.docPos, c.lidOld + minid, oldlid := c.loc.First, c.lidOld - blockIdx, offset := mindocpos.Unpack() - mindocpos = seq.PackDocPos(uint32(s.docBlockCount[idx]+int(blockIdx)), offset) + blockIdx, offset := c.loc.Second.Unpack() + mindocpos := seq.PackDocPos(uint32(s.docBlockCount[idx]+int(blockIdx)), offset) - if !yield(minid, mindocpos) { + if !yield(DocLocation{First: minid, Second: mindocpos}, nil) { return } // Rename lid from picked cursor to the new value. s.lidMapping[idx][oldlid] = lid - c.id, c.docPos, c.ok = c.next() + var err error + c.loc, err, c.ok = c.next() c.lidOld += 1 + if err != nil { + cursors[idx] = c + yield(DocLocation{}, err) + return + } + s.from = min(s.from, minid.MID) s.to = max(s.to, minid.MID) @@ -196,7 +226,7 @@ func (s *MergeSource) ID() iter.Seq2[seq.ID, seq.DocPos] { } } -func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { +func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] { // TODO(dkharms): For now, I will use stupid-simple linear scan for k-way merge. // // Its time complexity O(k*n) so it's not efficient enough if we compare it @@ -207,11 +237,11 @@ func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint3 // and it is around log(k) vs 2*log(k). type cursor struct { - next func() (string, iter.Seq2[[]byte, []uint32], bool) + next func() (string, iter.Seq2[TokenPosting, error], bool) stop func() field string - tokIt iter.Seq2[[]byte, []uint32] + tokIt iter.Seq2[TokenPosting, error] ok bool } @@ -239,7 +269,7 @@ func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint3 return field, set } - return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + return func(yield func(string, iter.Seq2[TokenPosting, error]) bool) { var cursors []cursor for i := range s.sources { @@ -269,7 +299,7 @@ func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint3 var ( idxs []int - iters []iter.Seq2[[]byte, []uint32] + iters []iter.Seq2[TokenPosting, error] ) for i, c := range cursors { @@ -296,15 +326,14 @@ func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint3 } func (s *MergeSource) tokensForField( - idxs []int, iters []iter.Seq2[[]byte, []uint32], -) iter.Seq2[[]byte, []uint32] { + idxs []int, iters []iter.Seq2[TokenPosting, error], +) iter.Seq2[TokenPosting, error] { type cursor struct { - next func() ([]byte, []uint32, bool) + next func() (TokenPosting, error, bool) stop func() - idx int - token []byte - lids []uint32 + idx int + posting TokenPosting ok bool } @@ -321,13 +350,13 @@ func (s *MergeSource) tokensForField( } if !set { - token = c.token + token = c.posting.First set = true continue } - if bytes.Compare(c.token, token) < 0 { - token = c.token + if bytes.Compare(c.posting.First, token) < 0 { + token = c.posting.First } } @@ -338,24 +367,30 @@ func (s *MergeSource) tokensForField( // all calls within current field. var lidRenamed []uint32 - return func(yield func([]byte, []uint32) bool) { + return func(yield func(TokenPosting, error) bool) { var cursors []cursor + defer func() { + for _, c := range cursors { + c.stop() + } + }() + for i := range iters { next, stop := iter.Pull2(iters[i]) - token, lids, ok := next() + posting, err, ok := next() + cursors = append(cursors, cursor{ next: next, stop: stop, - idx: idxs[i], token: token, lids: lids, - ok: ok, + idx: idxs[i], posting: posting, + ok: ok && err == nil, }) - } - defer func() { - for _, c := range cursors { - c.stop() + if err != nil { + yield(TokenPosting{}, err) + return } - }() + } for { token, ok := minimal(cursors) @@ -365,20 +400,28 @@ func (s *MergeSource) tokensForField( // Collect and remap lids from all cursors at this token, then advance them. for i, c := range cursors { - if !c.ok || !bytes.Equal(c.token, token) { + if !c.ok || !bytes.Equal(c.posting.First, token) { continue } - for _, lid := range c.lids { + for _, lid := range c.posting.Second { lidRenamed = append(lidRenamed, s.lidMapping[c.idx][lid]) } - c.token, c.lids, c.ok = c.next() + var err error + c.posting, err, c.ok = c.next() + + if err != nil { + cursors[i] = c + yield(TokenPosting{}, err) + return + } + cursors[i] = c } slices.Sort(lidRenamed) - if !yield(token, lidRenamed) { + if !yield(TokenPosting{First: token, Second: lidRenamed}, nil) { return } @@ -386,7 +429,3 @@ func (s *MergeSource) tokensForField( } } } - -func (s *MergeSource) LastError() error { - return nil -} diff --git a/compaction/merge_source_test.go b/compaction/merge_source_test.go index 5a045503..fdedac98 100644 --- a/compaction/merge_source_test.go +++ b/compaction/merge_source_test.go @@ -42,27 +42,27 @@ func (m *mockSealingSource) BlockOffsets() []uint64 { return m.blocks } -func (m *mockSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { - return func(yield func(seq.ID, seq.DocPos) bool) { - if !yield(seq.SystemID, seq.SystemDocPos) { +func (m *mockSealingSource) ID() iter.Seq2[DocLocation, error] { + return func(yield func(DocLocation, error) bool) { + if !yield(DocLocation{First: seq.SystemID, Second: seq.SystemDocPos}, nil) { return } for i, id := range m.ids { - if !yield(id, m.pos[i]) { + if !yield(DocLocation{First: id, Second: m.pos[i]}, nil) { return } } } } -func (m *mockSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { +func (m *mockSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] { fieldNames := make([]string, 0, len(m.fields)) for f := range m.fields { fieldNames = append(fieldNames, f) } slices.Sort(fieldNames) - return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + return func(yield func(string, iter.Seq2[TokenPosting, error]) bool) { for _, field := range fieldNames { tokens := make([]string, 0, len(m.fields[field])) for t := range m.fields[field] { @@ -70,9 +70,9 @@ func (m *mockSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, [ } slices.Sort(tokens) - if !yield(field, func(yield func([]byte, []uint32) bool) { + if !yield(field, func(yield func(TokenPosting, error) bool) { for _, tok := range tokens { - if !yield([]byte(tok), m.fields[field][tok]) { + if !yield(TokenPosting{First: []byte(tok), Second: m.fields[field][tok]}, nil) { return } } @@ -83,9 +83,9 @@ func (m *mockSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, [ } } -func (m *mockSealingSource) DocBlock() iter.Seq[[]byte] { - return func(yield func([]byte) bool) { - if !yield(nil) { +func (m *mockSealingSource) DocBlock() iter.Seq2[DocBlockLocation, error] { + return func(yield func(DocBlockLocation, error) bool) { + if !yield(DocBlockLocation{}, nil) { return } } @@ -156,9 +156,10 @@ func TestMergeSource(t *testing.T) { docpos []seq.DocPos ) - for id, dp := range source.ID() { - ids = append(ids, id) - docpos = append(docpos, dp) + for loc, err := range source.ID() { + require.NoError(t, err) + ids = append(ids, loc.First) + docpos = append(docpos, loc.Second) } require.Equal(t, @@ -197,9 +198,10 @@ func TestMergeSource(t *testing.T) { for field, fieldIt := range source.TokenTriplet() { fields = append(fields, field) - for token, lidsbuf := range fieldIt { - tokens = append(tokens, token) - lids = append(lids, slices.Clone(lidsbuf)) + for posting, err := range fieldIt { + require.NoError(t, err) + tokens = append(tokens, posting.First) + lids = append(lids, slices.Clone(posting.Second)) } } diff --git a/frac/sealed_source.go b/frac/sealed_source.go index 29a3d31b..f6eebf07 100644 --- a/frac/sealed_source.go +++ b/frac/sealed_source.go @@ -4,14 +4,18 @@ import ( "iter" "slices" + "github.com/ozontech/seq-db/blockbuilder" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/seqids" "github.com/ozontech/seq-db/frac/sealed/token" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" + "github.com/ozontech/seq-db/util" ) +type DocBlockLocation = util.Pair[[]byte, uint64] + // SealedSource implements [indexwriter.Source] for a sealed fraction. // Used as input to [compaction.MergeSource] when compacting multiple fractions. type SealedSource struct { @@ -22,8 +26,6 @@ type SealedSource struct { tokenBlockLoader *token.BlockLoader tokenTableLoader *token.TableLoader - - lastErr error } func NewSealedSource(f *Sealed) *SealedSource { @@ -52,35 +54,35 @@ func (s *SealedSource) BlockOffsets() []uint64 { return s.f.blocksData.BlocksOffsets } -func (s *SealedSource) ID() iter.Seq2[seq.ID, seq.DocPos] { - return func(yield func(seq.ID, seq.DocPos) bool) { +func (s *SealedSource) ID() iter.Seq2[blockbuilder.DocLocation, error] { + return func(yield func(blockbuilder.DocLocation, error) bool) { for lid := uint32(0); lid < s.f.blocksData.IDsTable.IDsTotal; lid++ { mid, err := s.idsProvider.MID(seq.LID(lid)) if err != nil { - s.lastErr = err + yield(blockbuilder.DocLocation{}, err) return } rid, err := s.idsProvider.RID(seq.LID(lid)) if err != nil { - s.lastErr = err + yield(blockbuilder.DocLocation{}, err) return } pos, err := s.idsProvider.DocPos(seq.LID(lid)) if err != nil { - s.lastErr = err + yield(blockbuilder.DocLocation{}, err) return } - if !yield(seq.ID{MID: mid, RID: rid}, pos) { + if !yield(blockbuilder.DocLocation{First: seq.ID{MID: mid, RID: rid}, Second: pos}, nil) { return } } } } -func (s *SealedSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { +func (s *SealedSource) TokenTriplet() iter.Seq2[string, iter.Seq2[blockbuilder.TokenPosting, error]] { tokenTable := s.tokenTableLoader.Load() fields := make([]string, 0, len(tokenTable)) @@ -89,21 +91,21 @@ func (s *SealedSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint } slices.Sort(fields) - return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + return func(yield func(string, iter.Seq2[blockbuilder.TokenPosting, error]) bool) { for _, field := range fields { - if !yield(field, s.tokensForField(field)) { + if !yield(field, s.postingsForField(field)) { return } } } } -func (s *SealedSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] { +func (s *SealedSource) postingsForField(field string) iter.Seq2[blockbuilder.TokenPosting, error] { lidsTable := s.f.blocksData.LIDsTable tokenTable := s.tokenTableLoader.Load() var lidsBuf []uint32 - return func(yield func([]byte, []uint32) bool) { + return func(yield func(blockbuilder.TokenPosting, error) bool) { for _, entry := range tokenTable[field].Entries { block := s.tokenBlockLoader.Load(entry.BlockIndex) @@ -117,7 +119,7 @@ func (s *SealedSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] for bi := firstBlock; bi <= lastBlock; bi++ { lidBlock, err := s.lidsLoader.GetLIDsBlock(bi) if err != nil { - s.lastErr = err + yield(blockbuilder.TokenPosting{}, err) return } @@ -125,7 +127,7 @@ func (s *SealedSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] lidsBuf = append(lidsBuf, lidBlock.LIDs[lidBlock.Offsets[chunkIdx]:lidBlock.Offsets[chunkIdx+1]]...) } - if !yield(tokenVal, lidsBuf) { + if !yield(blockbuilder.TokenPosting{First: tokenVal, Second: lidsBuf}, nil) { return } } @@ -133,8 +135,8 @@ func (s *SealedSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] } } -func (s *SealedSource) DocBlock() iter.Seq[[]byte] { - return func(yield func([]byte) bool) { +func (s *SealedSource) DocBlock() iter.Seq2[DocBlockLocation, error] { + return func(yield func(DocBlockLocation, error) bool) { // We do not want to cache payload of DocBlock because // it will just pollute cache and cause unnecessary evictions. r := storage.NewDocBlocksReader(s.f.readLimiter, s.f.docsFile) @@ -144,17 +146,14 @@ func (s *SealedSource) DocBlock() iter.Seq[[]byte] { // Caller of [SealedSource.DocBlock] will decide whether it requires decompressed data. payload, _, err := r.ReadDocBlock(int64(offset)) if err != nil { - s.lastErr = err + yield(DocBlockLocation{}, err) return } - if !yield(payload) { + loc := DocBlockLocation{First: payload, Second: offset} + if !yield(loc, nil) { return } } } } - -func (s *SealedSource) LastError() error { - return s.lastErr -} From 28935cbb41a82da542a7a7ba3185a318220e1f47 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Mon, 27 Apr 2026 14:48:09 +0300 Subject: [PATCH 25/26] feat: build distribution for compacted fraction --- compaction/merge_source.go | 43 ++++++++++++++++++++------------- compaction/merge_source_test.go | 5 ++++ frac/common/info.go | 7 ++++++ 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/compaction/merge_source.go b/compaction/merge_source.go index d9f16428..d73671fa 100644 --- a/compaction/merge_source.go +++ b/compaction/merge_source.go @@ -3,7 +3,6 @@ package compaction import ( "bytes" "iter" - "math" "slices" "sync" @@ -48,8 +47,6 @@ type MergeSource struct { // i-th index of [lidMapping] correponds to i-th fraction. // j-th index of i-th [lidMapping] corresponds to rename of j-th lid. lidMapping [][]uint32 - - from, to seq.MID } func NewMergeSource(filename string, sources []Source) *MergeSource { @@ -59,18 +56,34 @@ func NewMergeSource(filename string, sources []Source) *MergeSource { lidmapping[i] = make([]uint32, src.Info().DocsTotal+1) } - info := common.NewInfo(filename, 0, 0) - info.SealingTime = info.CreationTime - - return &MergeSource{ - info: info, - filename: filename, - + s := &MergeSource{ + filename: filename, sources: sources, lidMapping: lidmapping, + } + + s.info = s.prepareInfo() + return s +} + +func (s *MergeSource) prepareInfo() *common.Info { + info := common.NewInfo(s.filename, 0, 0) - from: math.MaxUint64, to: 0, + var ( + from seq.MID = seq.MaxID.MID + to seq.MID = seq.MinID.MID + ) + + for _, src := range s.sources { + from = min(from, src.Info().From) + to = max(to, src.Info().To) } + + info.From, info.To = from, to + info.SealingTime = info.CreationTime + + info.InitEmptyDistribution() + return info } func (s *MergeSource) Info() *common.Info { @@ -84,9 +97,6 @@ func (s *MergeSource) Info() *common.Info { // NOTE(dkharms): [IndexOnDisk] is calculated later. } - - s.info.From = s.from - s.info.To = s.to }) return s.info @@ -195,7 +205,9 @@ func (s *MergeSource) ID() iter.Seq2[DocLocation, error] { } c := cursors[idx] + minid, oldlid := c.loc.First, c.lidOld + s.info.AddMID(uint64(minid.MID)) blockIdx, offset := c.loc.Second.Unpack() mindocpos := seq.PackDocPos(uint32(s.docBlockCount[idx]+int(blockIdx)), offset) @@ -217,9 +229,6 @@ func (s *MergeSource) ID() iter.Seq2[DocLocation, error] { return } - s.from = min(s.from, minid.MID) - s.to = max(s.to, minid.MID) - lid += 1 cursors[idx] = c } diff --git a/compaction/merge_source_test.go b/compaction/merge_source_test.go index fdedac98..5a8962d3 100644 --- a/compaction/merge_source_test.go +++ b/compaction/merge_source_test.go @@ -245,6 +245,11 @@ func TestMergeSource(t *testing.T) { require.Equal(t, merged.DocsTotal, finfo.DocsTotal+sinfo.DocsTotal) require.Equal(t, merged.DocsOnDisk, finfo.DocsOnDisk+sinfo.DocsOnDisk) require.Equal(t, merged.DocsRaw, finfo.DocsRaw+sinfo.DocsRaw) + + // Validate correctness of distribution. + require.NotNil(t, merged.Distribution) + require.True(t, merged.IsIntersecting(finfo.From, finfo.To)) + require.True(t, merged.IsIntersecting(sinfo.From, sinfo.To)) }) } diff --git a/frac/common/info.go b/frac/common/info.go index 20e7f7c2..d714f660 100644 --- a/frac/common/info.go +++ b/frac/common/info.go @@ -82,6 +82,13 @@ func (s *Info) BuildDistribution(mids []uint64) { } } +func (s *Info) AddMID(mid uint64) { + if s.Distribution == nil { + return + } + s.Distribution.Add(seq.MID(mid)) +} + func (s *Info) InitEmptyDistribution() bool { from := s.From.Time() creationTime := time.UnixMilli(int64(s.CreationTime)) From c46a562649df94dae9e8439b3ec797473534c044 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 28 Apr 2026 09:34:32 +0300 Subject: [PATCH 26/26] refactor: consistent naming --- compaction/merge.go | 1 - compaction/merge_source.go | 4 +- compaction/merge_source_test.go | 75 +++++++++++++++++++-------------- 3 files changed, 46 insertions(+), 34 deletions(-) diff --git a/compaction/merge.go b/compaction/merge.go index b21cd9c3..928b3044 100644 --- a/compaction/merge.go +++ b/compaction/merge.go @@ -89,7 +89,6 @@ func mergeDocs(filename string, srcs ...Source) error { filename+consts.DocsFileSuffix, func(f *os.File) error { var docsSize uint64 - for _, src := range srcs { for loc, err := range src.DocBlock() { if err != nil { diff --git a/compaction/merge_source.go b/compaction/merge_source.go index d73671fa..5b41271b 100644 --- a/compaction/merge_source.go +++ b/compaction/merge_source.go @@ -320,7 +320,7 @@ func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, e iters = append(iters, c.tokIt) } - if !yield(field, s.tokensForField(idxs, iters)) { + if !yield(field, s.postingsForField(idxs, iters)) { return } @@ -334,7 +334,7 @@ func (s *MergeSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, e } } -func (s *MergeSource) tokensForField( +func (s *MergeSource) postingsForField( idxs []int, iters []iter.Seq2[TokenPosting, error], ) iter.Seq2[TokenPosting, error] { type cursor struct { diff --git a/compaction/merge_source_test.go b/compaction/merge_source_test.go index 5a8962d3..12ba093c 100644 --- a/compaction/merge_source_test.go +++ b/compaction/merge_source_test.go @@ -44,11 +44,14 @@ func (m *mockSealingSource) BlockOffsets() []uint64 { func (m *mockSealingSource) ID() iter.Seq2[DocLocation, error] { return func(yield func(DocLocation, error) bool) { - if !yield(DocLocation{First: seq.SystemID, Second: seq.SystemDocPos}, nil) { + docloc := DocLocation{First: seq.SystemID, Second: seq.SystemDocPos} + if !yield(docloc, nil) { return } + for i, id := range m.ids { - if !yield(DocLocation{First: id, Second: m.pos[i]}, nil) { + docloc = DocLocation{First: id, Second: m.pos[i]} + if !yield(docloc, nil) { return } } @@ -56,27 +59,36 @@ func (m *mockSealingSource) ID() iter.Seq2[DocLocation, error] { } func (m *mockSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] { - fieldNames := make([]string, 0, len(m.fields)) + fields := make([]string, 0, len(m.fields)) for f := range m.fields { - fieldNames = append(fieldNames, f) + fields = append(fields, f) } - slices.Sort(fieldNames) + slices.Sort(fields) return func(yield func(string, iter.Seq2[TokenPosting, error]) bool) { - for _, field := range fieldNames { - tokens := make([]string, 0, len(m.fields[field])) - for t := range m.fields[field] { - tokens = append(tokens, t) + for _, field := range fields { + if !yield(field, m.postingsForField(field)) { + return + } + } + } +} + +func (m *mockSealingSource) postingsForField(field string) iter.Seq2[TokenPosting, error] { + return func(yield func(TokenPosting, error) bool) { + tokens := make([]string, 0, len(m.fields[field])) + for t := range m.fields[field] { + tokens = append(tokens, t) + } + + slices.Sort(tokens) + for _, tok := range tokens { + posting := TokenPosting{ + First: []byte(tok), + Second: m.fields[field][tok], } - slices.Sort(tokens) - - if !yield(field, func(yield func(TokenPosting, error) bool) { - for _, tok := range tokens { - if !yield(TokenPosting{First: []byte(tok), Second: m.fields[field][tok]}, nil) { - return - } - } - }) { + + if !yield(posting, nil) { return } } @@ -165,10 +177,10 @@ func TestMergeSource(t *testing.T) { require.Equal(t, []seq.ID{ seq.SystemID, - // seq.ID from the second source + // [seq.ID] from the second source. {MID: 6}, {MID: 5}, - // seq.ID from the first source + // [seq.ID] from the first source. {MID: 3}, {MID: 2}, {MID: 1}, @@ -179,9 +191,9 @@ func TestMergeSource(t *testing.T) { require.Equal(t, []seq.DocPos{ seq.SystemDocPos, - // seq.DocPos from the second source + // [seq.DocPos] from the second source. seq.PackDocPos(1, 0), seq.PackDocPos(1, 2048), - // seq.DocPos from the first source + // [seq.DocPos] from the first source. seq.PackDocPos(0, 0), seq.PackDocPos(0, 1024), seq.PackDocPos(0, 2048), }, docpos, @@ -205,28 +217,28 @@ func TestMergeSource(t *testing.T) { } } - // Both sources have the same and the only field + // Both sources have the same and the only field. require.Equal(t, []string{"level"}, fields) - // Ensure tokens are sorted in ascending order + // Ensure tokens are sorted in ascending order. require.Equal(t, [][]byte{[]byte("debug"), []byte("error"), []byte("info")}, tokens, ) - // Ensure correctness of lids remapping - // ----------------- + // Ensure correctness of lids remapping: + // ------------------------- // seq.MID 6 5 | 3 2 1 // seq.LID (old) 1 2 | 1 2 3 // seq.LID (new) 1 2 | 3 4 5 - // ----------------- + // ------------------------- require.Equal(t, [][]uint32{ - // Sequence of [seq.LID] for token `debug` + // Sequence of [seq.LID] for token `debug`. {1}, - // Sequence of [seq.LID] for token `error` + // Sequence of [seq.LID] for token `error`. {3, 5}, - // Sequence of [seq.LID] for token `info` + // Sequence of [seq.LID] for token `info`. {2, 4, 5}, }, lids, @@ -250,6 +262,7 @@ func TestMergeSource(t *testing.T) { require.NotNil(t, merged.Distribution) require.True(t, merged.IsIntersecting(finfo.From, finfo.To)) require.True(t, merged.IsIntersecting(sinfo.From, sinfo.To)) + require.True(t, merged.IsIntersecting(min(finfo.From, sinfo.From), max(finfo.To, sinfo.To))) }) } @@ -258,7 +271,7 @@ func BenchmarkMergeSource(b *testing.B) { numSources = 4 docsPerSource = 512_000 - // Total pairs of (field, token) will be + // Total count of pairs of (field, token) will be // [numFields] * [numTokens]. numFields = 512 numTokens = 16384