From 94a6f244f820f2bb188d498e6713147b2cdb91e0 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 24 Mar 2026 18:49:53 +0300 Subject: [PATCH 01/19] refactor: new sealing order --- cmd/index_analyzer/main.go | 109 +++-- consts/consts.go | 22 +- frac/active_sealing_source.go | 141 ++---- frac/fraction_concurrency_test.go | 6 +- frac/fraction_test.go | 22 +- frac/index_cache.go | 15 +- frac/remote.go | 121 +++-- frac/sealed.go | 246 +++++----- frac/sealed/sealing/blocks_builder.go | 374 ++++++++------- frac/sealed/sealing/blocks_builder_test.go | 256 ++++------- frac/sealed/sealing/index.go | 506 +++++++++------------ frac/sealed/sealing/sealer.go | 162 ++++--- frac/sealed/token/provider.go | 7 +- frac/sealed/token/table_entry.go | 2 +- frac/sealed/token/table_loader.go | 10 +- frac/sealed_loader.go | 140 +++--- fracmanager/cache_maintainer.go | 7 +- fracmanager/frac_manifest.go | 129 ++++-- 18 files changed, 1210 insertions(+), 1065 deletions(-) diff --git a/cmd/index_analyzer/main.go b/cmd/index_analyzer/main.go index b1b22323..4ea8dd44 100644 --- a/cmd/index_analyzer/main.go +++ b/cmd/index_analyzer/main.go @@ -5,12 +5,14 @@ import ( "fmt" "hash/fnv" "os" + "strings" "sync" "time" "github.com/alecthomas/units" "go.uber.org/zap" + "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac/sealed" "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/token" @@ -21,7 +23,7 @@ import ( // Launch as: // -// > go run ./cmd/index_analyzer/... ./data/*.index | tee ~/report.txt +// > go run ./cmd/index_analyzer/... ./data/*.info | tee ~/report.txt func main() { if len(os.Args) < 2 { fmt.Println("No args") @@ -73,45 +75,80 @@ func getCacheMaintainer() (*fracmanager.CacheMaintainer, func()) { } } +// basePath strips any known index suffix to return the fraction base path. +func basePath(path string) string { + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + if strings.HasSuffix(path, suffix) { + return path[:len(path)-len(suffix)] + } + } + return path +} + +func openFile(path string) *os.File { + f, err := os.Open(path) + if err != nil { + panic(err) + } + return f +} + func analyzeIndex( path string, cm *fracmanager.CacheMaintainer, - reader *storage.ReadLimiter, + rl *storage.ReadLimiter, mergedTokensUniq map[string]map[string]int, allTokensValuesUniq map[string]int, ) Stats { + base := basePath(path) + indexCache := cm.CreateIndexCache() + + // Open per-section files. + infoFile := openFile(base + consts.InfoFileSuffix) + tokenFile := openFile(base + consts.TokenFileSuffix) + lidFile := openFile(base + consts.LIDFileSuffix) + defer infoFile.Close() + defer tokenFile.Close() + defer lidFile.Close() + + infoReader := storage.NewIndexReader(rl, infoFile.Name(), infoFile, indexCache.InfoRegistry) + tokenReader := storage.NewIndexReader(rl, tokenFile.Name(), tokenFile, indexCache.TokenRegistry) + lidReader := storage.NewIndexReader(rl, lidFile.Name(), lidFile, indexCache.LIDRegistry) + + // --- Info --- var blockIndex uint32 - cache := cm.CreateIndexCache() - - f, err := os.Open(path) + infoData, _, err := infoReader.ReadIndexBlock(0, nil) if err != nil { - panic(err) + logger.Fatal("error reading info block", zap.String("file", infoFile.Name()), zap.Error(err)) } + var b sealed.BlockInfo + if err := b.Unpack(infoData); err != nil { + logger.Fatal("error unpacking block info", zap.Error(err)) + } + docsCount := int(b.Info.DocsTotal) - indexReader := storage.NewIndexReader(reader, f.Name(), f, cache.Registry) - - readBlock := func() []byte { - data, _, err := indexReader.ReadIndexBlock(blockIndex, nil) + // --- Tokens (.token file) --- + // Token blocks start at index 0, followed by an empty separator, then token table blocks. + blockIndex = 0 + readTokenBlock := func() []byte { + data, _, err := tokenReader.ReadIndexBlock(blockIndex, nil) blockIndex++ if err != nil { - logger.Fatal("error reading block", zap.String("file", f.Name()), zap.Error(err)) + logger.Fatal("error reading token block", zap.String("file", tokenFile.Name()), zap.Error(err)) } return data } - // load info - var b sealed.BlockInfo - if err := b.Unpack(readBlock()); err != nil { - logger.Fatal("error unpacking block info", zap.Error(err)) - } - - docsCount := int(b.Info.DocsTotal) - - // load tokens tokens := [][]byte{} for { - data := readBlock() - if len(data) == 0 { // empty block - is section separator + data := readTokenBlock() + if len(data) == 0 { // empty block - section separator break } block := token.Block{} @@ -123,11 +160,10 @@ func analyzeIndex( } } - // load tokens table tokenTableBlocks := []token.TableBlock{} for { - data := readBlock() - if len(data) == 0 { // empty block - is section separator + data := readTokenBlock() + if len(data) == 0 { // empty block - section separator break } block := token.TableBlock{} @@ -136,28 +172,25 @@ func analyzeIndex( } tokenTable := token.TableFromBlocks(tokenTableBlocks) - // skip position - blockIndex++ - - // skip IDS - for { - data := readBlock() - if len(data) == 0 { // empty block - is section separator - break + // --- LIDs (.lid file) --- + blockIndex = 0 + readLIDBlock := func() []byte { + data, _, err := lidReader.ReadIndexBlock(blockIndex, nil) + blockIndex++ + if err != nil { + logger.Fatal("error reading lid block", zap.String("file", lidFile.Name()), zap.Error(err)) } - blockIndex++ // skip RID - blockIndex++ // skip Param + return data } - // load LIDs tid := 0 lidsTotal := 0 lidsUniq := map[[16]byte]int{} lidsLens := make([]int, len(tokens)) tokenLIDs := []uint32{} for { - data := readBlock() - if len(data) == 0 { // empty block - is section separator + data := readLIDBlock() + if len(data) == 0 { // empty block - section separator break } diff --git a/consts/consts.go b/consts/consts.go index ef84fd4a..7a8eb9a4 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -59,9 +59,25 @@ const ( SdocsTmpFileSuffix = "._sdocs" SdocsDelFileSuffix = ".sdocs.del" - IndexFileSuffix = ".index" - IndexTmpFileSuffix = "._index" - IndexDelFileSuffix = ".index.del" + InfoFileSuffix = ".info" + InfoTmpFileSuffix = "._info" + InfoDelFileSuffix = ".info.del" + + TokenFileSuffix = ".token" + TokenTmpFileSuffix = "._token" + TokenDelFileSuffix = ".token.del" + + OffsetsFileSuffix = ".offsets" + OffsetsTmpFileSuffix = "._offsets" + OffsetsDelFileSuffix = ".offsets.del" + + IDFileSuffix = ".id" + IDTmpFileSuffix = "._id" + IDDelFileSuffix = ".id.del" + + LIDFileSuffix = ".lid" + LIDTmpFileSuffix = "._lid" + LIDDelFileSuffix = ".lid.del" RemoteFractionSuffix = ".remote" diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 43ca0239..44aaa850 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -3,14 +3,12 @@ package frac import ( "bytes" "encoding/binary" - "errors" "io" "iter" "os" "path/filepath" "slices" "time" - "unsafe" "github.com/alecthomas/units" "go.uber.org/zap" @@ -160,45 +158,30 @@ func (src *ActiveSealingSource) Info() *common.Info { return src.info } -// TokenBlocks returns an iterator for token blocks for disk writing. -// Tokens are pre-sorted: first by fields, then lexicographically within each field. -// Each block contains up to blockSize bytes of data for efficient writing. -func (src *ActiveSealingSource) TokenBlocks(blockSize int) iter.Seq[[][]byte] { - const tokenLengthSize = int(unsafe.Sizeof(uint32(0))) - return func(yield func([][]byte) bool) { - if len(src.tids) == 0 { - return - } - if blockSize <= 0 { - src.lastErr = errors.New("sealing: token block size must be > 0") - return - } - - actualSize := 0 - block := make([][]byte, 0, blockSize) +// TokenAndLIDs returns an iterator that yields one (token, lids) pair at a time, in TID order. +// Tokens are pre-sorted: first by field, then lexicographically within each field. +// The lids slice is reused between yields and must not be retained by the caller. +func (src *ActiveSealingSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { + return func(yield func([]byte, []uint32) bool) { + var lidBuf []uint32 - // Iterate through all sorted TIDs for _, tid := range src.tids { - if actualSize >= blockSize { - if !yield(block) { - return - } - actualSize = 0 - block = block[:0] + oldLIDs := src.lids[tid].GetLIDs(src.mids, src.rids) + + lidBuf = slices.Grow(lidBuf[:0], len(oldLIDs)) + for _, lid := range oldLIDs { + lidBuf = append(lidBuf, src.oldToNewLIDs[lid]) + } + + if !yield(src.tokens[tid], lidBuf) { + return } - token := src.tokens[tid] - actualSize += tokenLengthSize // Add the size of the token length field - actualSize += len(token) // Add the size of the token itself - block = append(block, token) } - yield(block) } } -// Fields returns an iterator for sorted fields and their maximum TIDs. -// Fields are sorted lexicographically, ensuring predictable order -// when building disk index structures. -func (src *ActiveSealingSource) Fields() iter.Seq2[string, uint32] { +// Field returns an iterator for sorted fields and their maximum TIDs. +func (src *ActiveSealingSource) Field() iter.Seq2[string, uint32] { return func(yield func(string, uint32) bool) { for i, field := range src.fields { if !yield(field, src.fieldsMaxTIDs[i]) { @@ -208,75 +191,37 @@ func (src *ActiveSealingSource) Fields() iter.Seq2[string, uint32] { } } -// IDsBlocks returns an iterator for document ID blocks and corresponding positions. -// IDs are sorted. Block size is controlled by blockSize parameter for balance between -// performance and memory usage. -func (src *ActiveSealingSource) IDsBlocks(blockSize int) iter.Seq2[[]seq.ID, []seq.DocPos] { - return func(yield func([]seq.ID, []seq.DocPos) bool) { +// ID returns an iterator for document IDs and their positions, one pair at a time. +func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { + return func(yield func(seq.ID, seq.DocPos) bool) { mids := src.mids.vals rids := src.rids.vals - ids := make([]seq.ID, 0, blockSize) - pos := make([]seq.DocPos, 0, blockSize) - - // First reserved ID (system). This position is not used because Local IDs (LIDs) use 1-based indexing. - ids = append(ids, seq.ID{MID: seq.MID(mids[0]), RID: seq.RID(rids[0])}) - pos = append(pos, 0) + // First reserved ID (system). Position unused; LIDs use 1-based indexing. + if !yield(seq.ID{MID: seq.MID(mids[0]), RID: seq.RID(rids[0])}, 0) { + return + } - // Iterate through sorted LIDs for i, lid := range src.sortedLIDs { - if len(ids) == blockSize { - if !yield(ids, pos) { - return - } - ids = ids[:0] - pos = pos[:0] - } id := seq.ID{MID: seq.MID(mids[lid]), RID: seq.RID(rids[lid])} - ids = append(ids, id) - - // Use sorted or original positions + var pos seq.DocPos if len(src.docPosSorted) == 0 { - pos = append(pos, src.docPosMap[id]) + pos = src.docPosMap[id] } else { - pos = append(pos, src.docPosSorted[i+1]) // +1 for system document + pos = src.docPosSorted[i+1] // +1 for system document + } + if !yield(id, pos) { + return } } - yield(ids, pos) } } -// BlocksOffsets returns document block offsets. -func (src *ActiveSealingSource) BlocksOffsets() []uint64 { +// BlockOffsets returns document block offsets. +func (src *ActiveSealingSource) BlockOffsets() []uint64 { return src.blocksOffsets } -// TokenLIDs returns an iterator for LID lists for each token. -// LIDs are converted to new numbering after document sorting. -// Each iterator call returns a list of documents containing a specific token, -// in sorted order. -func (src *ActiveSealingSource) TokenLIDs() iter.Seq[[]uint32] { - return func(yield func([]uint32) bool) { - newLIDs := []uint32{} - - // For each sorted TID - for _, tid := range src.tids { - // Get original LIDs for this token - oldLIDs := src.lids[tid].GetLIDs(src.mids, src.rids) - newLIDs = slices.Grow(newLIDs[:0], len(oldLIDs)) - - // Convert old LIDs to new through mapping - for _, lid := range oldLIDs { - newLIDs = append(newLIDs, src.oldToNewLIDs[lid]) - } - - if !yield(newLIDs) { - return - } - } - } -} - // makeInverser creates an array for converting old LIDs to new ones. // sortedLIDs[i] = oldLID -> inverser[oldLID] = i+1 func makeInverser(sortedLIDs []uint32) []uint32 { @@ -297,22 +242,18 @@ func (src *ActiveSealingSource) Docs() iter.Seq2[seq.ID, []byte] { curDoc []byte ) - // Iterate through ID and position blocks - for ids, pos := range src.IDsBlocks(consts.IDsPerBlock) { - for i, id := range ids { - if id == systemSeqID { - curDoc = nil // reserved system document (no payload) - } else if id != prev { - // If ID changed, read new document - if curDoc, src.lastErr = src.doc(pos[i]); src.lastErr != nil { - return - } - } - prev = id - if !yield(id, curDoc) { + for id, pos := range src.ID() { + if id == systemSeqID { + curDoc = nil // reserved system document (no payload) + } else if id != prev { + if curDoc, src.lastErr = src.doc(pos); src.lastErr != nil { return } } + prev = id + if !yield(id, curDoc) { + return + } } } } diff --git a/frac/fraction_concurrency_test.go b/frac/fraction_concurrency_test.go index a5c19b22..95e96637 100644 --- a/frac/fraction_concurrency_test.go +++ b/frac/fraction_concurrency_test.go @@ -360,7 +360,11 @@ func seal(active *Active) (*Sealed, error) { LIDs: cache.NewCache[*lids.Block](nil, nil), Tokens: cache.NewCache[*token.Block](nil, nil), TokenTable: cache.NewCache[token.Table](nil, nil), - Registry: cache.NewCache[[]byte](nil, nil), + InfoRegistry: cache.NewCache[[]byte](nil, nil), + TokenRegistry: cache.NewCache[[]byte](nil, nil), + OffsetsRegistry: cache.NewCache[[]byte](nil, nil), + IDRegistry: cache.NewCache[[]byte](nil, nil), + LIDRegistry: cache.NewCache[[]byte](nil, nil), } sealed := NewSealedPreloaded( active.BaseFileName, diff --git a/frac/fraction_test.go b/frac/fraction_test.go index ec5f3d85..7326ce54 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -1841,8 +1841,8 @@ func (s *FractionTestSuite) TestFractionInfo() { "index on disk doesn't match. actual value: %d", info.IndexOnDisk) case *Remote: s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value") - s.Require().True(info.IndexOnDisk > uint64(1400) && info.IndexOnDisk < uint64(1500), - "index on disk doesn't match. actual value: %d", info.MetaOnDisk) + s.Require().True(info.IndexOnDisk > uint64(1400) && info.IndexOnDisk < uint64(1600), + "index on disk doesn't match. actual value: %d", info.IndexOnDisk) default: s.Require().Fail("unsupported fraction type") } @@ -2100,7 +2100,11 @@ func (s *FractionTestSuite) newSealed(bulks ...[]string) *Sealed { LIDs: cache.NewCache[*lids.Block](nil, nil), Tokens: cache.NewCache[*token.Block](nil, nil), TokenTable: cache.NewCache[token.Table](nil, nil), - Registry: cache.NewCache[[]byte](nil, nil), + InfoRegistry: cache.NewCache[[]byte](nil, nil), + TokenRegistry: cache.NewCache[[]byte](nil, nil), + OffsetsRegistry: cache.NewCache[[]byte](nil, nil), + IDRegistry: cache.NewCache[[]byte](nil, nil), + LIDRegistry: cache.NewCache[[]byte](nil, nil), } sealed := NewSealedPreloaded( @@ -2291,7 +2295,11 @@ func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Seal LIDs: cache.NewCache[*lids.Block](nil, nil), Tokens: cache.NewCache[*token.Block](nil, nil), TokenTable: cache.NewCache[token.Table](nil, nil), - Registry: cache.NewCache[[]byte](nil, nil), + InfoRegistry: cache.NewCache[[]byte](nil, nil), + TokenRegistry: cache.NewCache[[]byte](nil, nil), + OffsetsRegistry: cache.NewCache[[]byte](nil, nil), + IDRegistry: cache.NewCache[[]byte](nil, nil), + LIDRegistry: cache.NewCache[[]byte](nil, nil), } sealed = NewSealed( @@ -2359,7 +2367,11 @@ func (s *RemoteFractionTestSuite) SetupTest() { LIDs: cache.NewCache[*lids.Block](nil, nil), Tokens: cache.NewCache[*token.Block](nil, nil), TokenTable: cache.NewCache[token.Table](nil, nil), - Registry: cache.NewCache[[]byte](nil, nil), + InfoRegistry: cache.NewCache[[]byte](nil, nil), + TokenRegistry: cache.NewCache[[]byte](nil, nil), + OffsetsRegistry: cache.NewCache[[]byte](nil, nil), + IDRegistry: cache.NewCache[[]byte](nil, nil), + LIDRegistry: cache.NewCache[[]byte](nil, nil), } remoteFrac := NewRemote( diff --git a/frac/index_cache.go b/frac/index_cache.go index 4536fa22..852fe51f 100644 --- a/frac/index_cache.go +++ b/frac/index_cache.go @@ -8,7 +8,14 @@ import ( ) type IndexCache struct { - Registry *cache.Cache[[]byte] + // Per-file registry caches (each IndexReader needs its own). + InfoRegistry *cache.Cache[[]byte] + TokenRegistry *cache.Cache[[]byte] + OffsetsRegistry *cache.Cache[[]byte] + IDRegistry *cache.Cache[[]byte] + LIDRegistry *cache.Cache[[]byte] + + // Block-level data caches shared across all readers. MIDs *cache.Cache[[]byte] RIDs *cache.Cache[seqids.BlockRIDs] Params *cache.Cache[seqids.BlockParams] @@ -18,11 +25,15 @@ type IndexCache struct { } func (s *IndexCache) Release() { + s.InfoRegistry.Release() + s.TokenRegistry.Release() + s.OffsetsRegistry.Release() + s.IDRegistry.Release() + s.LIDRegistry.Release() s.LIDs.Release() s.MIDs.Release() s.RIDs.Release() s.Params.Release() - s.Registry.Release() s.Tokens.Release() s.TokenTable.Release() } diff --git a/frac/remote.go b/frac/remote.go index 7658e80e..7da03205 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -45,9 +45,20 @@ type Remote struct { docsCache *cache.Cache[[]byte] docsReader storage.DocsReader - indexFile storage.ImmutableFile - indexCache *IndexCache - indexReader storage.IndexReader + // Per-section index files and their readers. + infoFile storage.ImmutableFile + tokenFile storage.ImmutableFile + offsetsFile storage.ImmutableFile + idFile storage.ImmutableFile + lidFile storage.ImmutableFile + + infoReader storage.IndexReader + tokenReader storage.IndexReader + offsetsReader storage.IndexReader + idReader storage.IndexReader + lidReader storage.IndexReader + + indexCache *IndexCache loadMu *sync.RWMutex isLoaded bool @@ -100,15 +111,15 @@ func NewRemote( // I wrote a small proposal on how we can reduce impact of such events. // https://github.com/ozontech/seq-db/issues/92 - if err := f.openIndex(); err != nil { + if err := f.openInfoFile(); err != nil { logger.Error( - "cannot open index file: any subsequent operation will fail", + "cannot open info file: any subsequent operation will fail", zap.String("fraction", filepath.Base(f.BaseFileName)), zap.Error(err), ) } - f.info = loadHeader(f.indexFile, f.indexReader) + f.info = loadHeader(f.infoReader) return f } @@ -156,19 +167,21 @@ func (f *Remote) createDataProvider(ctx context.Context) (*sealedDataProvider, e return nil, err } return &sealedDataProvider{ - ctx: ctx, + ctx: ctx, + fractionTypeLabel: "remote", + info: f.info, config: f.Config, docsReader: &f.docsReader, blocksOffsets: f.blocksData.BlocksOffsets, lidsTable: f.blocksData.LIDsTable, - lidsLoader: lids.NewLoader(&f.indexReader, f.indexCache.LIDs), - tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.indexReader, f.indexCache.Tokens), - tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.indexReader, f.indexCache.TokenTable), + lidsLoader: lids.NewLoader(&f.lidReader, f.indexCache.LIDs), + tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.tokenReader, f.indexCache.Tokens), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.tokenReader, f.indexCache.TokenTable), idsTable: &f.blocksData.IDsTable, idsProvider: seqids.NewProvider( - &f.indexReader, + &f.idReader, f.indexCache.MIDs, f.indexCache.RIDs, f.indexCache.Params, @@ -196,7 +209,11 @@ func (f *Remote) Suicide() { files := []string{ filepath.Base(f.BaseFileName) + consts.DocsFileSuffix, filepath.Base(f.BaseFileName) + consts.SdocsFileSuffix, - filepath.Base(f.BaseFileName) + consts.IndexFileSuffix, + filepath.Base(f.BaseFileName) + consts.InfoFileSuffix, + filepath.Base(f.BaseFileName) + consts.TokenFileSuffix, + filepath.Base(f.BaseFileName) + consts.OffsetsFileSuffix, + filepath.Base(f.BaseFileName) + consts.IDFileSuffix, + filepath.Base(f.BaseFileName) + consts.LIDFileSuffix, } err := f.s3cli.Remove(f.ctx, files...) @@ -227,38 +244,88 @@ func (f *Remote) load() error { return err } - if err := f.openIndex(); err != nil { + if err := f.openIndexFiles(); err != nil { return err } - (&Loader{}).Load(&f.blocksData, f.info, &f.indexReader) + readers := IndexReaders{ + Info: f.infoReader, + Token: f.tokenReader, + Offsets: f.offsetsReader, + ID: f.idReader, + LID: f.lidReader, + } + (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true return nil } -func (f *Remote) openIndex() error { - if f.indexFile != nil { +func (f *Remote) openInfoFile() error { + if f.infoFile != nil { return nil } + return f.openRemoteFile( + consts.InfoFileSuffix, + func(file storage.ImmutableFile) { + f.infoFile = file + f.infoReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) + }, + ) +} + +func (f *Remote) openIndexFiles() error { + if err := f.openInfoFile(); err != nil { + return err + } + if f.tokenFile == nil { + if err := f.openRemoteFile(consts.TokenFileSuffix, func(file storage.ImmutableFile) { + f.tokenFile = file + f.tokenReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.TokenRegistry) + }); err != nil { + return err + } + } + if f.offsetsFile == nil { + if err := f.openRemoteFile(consts.OffsetsFileSuffix, func(file storage.ImmutableFile) { + f.offsetsFile = file + f.offsetsReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.OffsetsRegistry) + }); err != nil { + return err + } + } + if f.idFile == nil { + if err := f.openRemoteFile(consts.IDFileSuffix, func(file storage.ImmutableFile) { + f.idFile = file + f.idReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.IDRegistry) + }); err != nil { + return err + } + } + if f.lidFile == nil { + if err := f.openRemoteFile(consts.LIDFileSuffix, func(file storage.ImmutableFile) { + f.lidFile = file + f.lidReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.LIDRegistry) + }); err != nil { + return err + } + } + return nil +} - name := filepath.Base(f.BaseFileName) + consts.IndexFileSuffix +func (f *Remote) openRemoteFile(suffix string, assign func(storage.ImmutableFile)) error { + name := filepath.Base(f.BaseFileName) + suffix ok, err := f.s3cli.Exists(f.ctx, name) if err != nil { - return fmt.Errorf( - "cannot check existence of %q file: %w", - consts.IndexFileSuffix, err, - ) + return fmt.Errorf("cannot check existence of %q file: %w", suffix, err) } - - if ok { - f.indexFile = s3.NewReader(f.ctx, f.s3cli, name) - f.indexReader = storage.NewIndexReader(f.readLimiter, f.indexFile.Name(), f.indexFile, f.indexCache.Registry) - return nil + if !ok { + return fmt.Errorf("missing %q file", suffix) } - return fmt.Errorf("missing %q file", consts.IndexFileSuffix) + assign(s3.NewReader(f.ctx, f.s3cli, name)) + return nil } func (f *Remote) openDocs() error { diff --git a/frac/sealed.go b/frac/sealed.go index bda4fc72..7c419120 100644 --- a/frac/sealed.go +++ b/frac/sealed.go @@ -39,9 +39,20 @@ type Sealed struct { docsCache *cache.Cache[[]byte] docsReader storage.DocsReader - indexFile *os.File - indexCache *IndexCache - indexReader storage.IndexReader + // Per-section index files and their readers. + infoFile *os.File + tokenFile *os.File + offsetsFile *os.File + idFile *os.File + lidFile *os.File + + infoReader storage.IndexReader + tokenReader storage.IndexReader + offsetsReader storage.IndexReader + idReader storage.IndexReader + lidReader storage.IndexReader + + indexCache *IndexCache loadMu *sync.RWMutex isLoaded bool @@ -88,38 +99,83 @@ func NewSealed( skipMaskProvider: skipMaskProvider, } - // fast path if fraction-info cache exists AND it has valid index size + // Fast path: if info cache has valid index size, skip opening the info file now. if info != nil && info.IndexOnDisk > 0 { return f } - f.openIndex() - f.info = loadHeader(f.indexFile, f.indexReader) + f.openInfoFile() + f.info = loadHeader(f.infoReader) + f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName) return f } -func (f *Sealed) openIndex() { - if f.indexFile == nil { - var err error - name := f.BaseFileName + consts.IndexFileSuffix - f.indexFile, err = os.Open(name) +func (f *Sealed) openInfoFile() { + if f.infoFile == nil { + name := f.BaseFileName + consts.InfoFileSuffix + file, err := os.Open(name) if err != nil { - logger.Fatal("can't open index file", zap.String("file", name), zap.Error(err)) + logger.Fatal("can't open info file", zap.String("file", name), zap.Error(err)) } - f.indexReader = storage.NewIndexReader(f.readLimiter, f.indexFile.Name(), f.indexFile, f.indexCache.Registry) + f.infoFile = file + f.infoReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) + } +} + +func (f *Sealed) openIndexFiles() { + f.openInfoFile() + + if f.tokenFile == nil { + name := f.BaseFileName + consts.TokenFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal("can't open token file", zap.String("file", name), zap.Error(err)) + } + f.tokenFile = file + f.tokenReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.TokenRegistry) + } + + if f.offsetsFile == nil { + name := f.BaseFileName + consts.OffsetsFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal("can't open offsets file", zap.String("file", name), zap.Error(err)) + } + f.offsetsFile = file + f.offsetsReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.OffsetsRegistry) + } + + if f.idFile == nil { + name := f.BaseFileName + consts.IDFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal("can't open id file", zap.String("file", name), zap.Error(err)) + } + f.idFile = file + f.idReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.IDRegistry) + } + + if f.lidFile == nil { + name := f.BaseFileName + consts.LIDFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal("can't open lid file", zap.String("file", name), zap.Error(err)) + } + f.lidFile = file + f.lidReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.LIDRegistry) } } func (f *Sealed) openDocs() { if f.docsFile == nil { var err error - f.docsFile, err = os.Open(f.BaseFileName + consts.SdocsFileSuffix) // try first open *.sdocs file + f.docsFile, err = os.Open(f.BaseFileName + consts.SdocsFileSuffix) if err != nil { if !errors.Is(err, os.ErrNotExist) { logger.Fatal("can't open sdocs file", zap.String("frac", f.BaseFileName), zap.Error(err)) } - f.docsFile, err = os.Open(f.BaseFileName + consts.DocsFileSuffix) // fallback to *.docs file + f.docsFile, err = os.Open(f.BaseFileName + consts.DocsFileSuffix) if err != nil { logger.Fatal("can't open docs file", zap.String("frac", f.BaseFileName), zap.Error(err)) } @@ -154,13 +210,13 @@ func NewSealedPreloaded( skipMaskProvider: skipMaskProvider, } - // put the token table built during sealing into the cache of the sealed fraction + // Put token table built during sealing into the cache. indexCache.TokenTable.Get(token.CacheKeyTable, func() (token.Table, int) { return preloaded.TokenTable, preloaded.TokenTable.Size() }) f.openDocs() - f.openIndex() + f.openIndexFiles() docsCountK := float64(f.info.DocsTotal) / 1000 logger.Info("sealed fraction created from active", @@ -181,33 +237,41 @@ func (f *Sealed) load() { defer f.loadMu.Unlock() if !f.isLoaded { - f.openDocs() - f.openIndex() - - (&Loader{}).Load(&f.blocksData, f.info, &f.indexReader) + f.openIndexFiles() + + readers := IndexReaders{ + Info: f.infoReader, + Token: f.tokenReader, + Offsets: f.offsetsReader, + ID: f.idReader, + LID: f.lidReader, + } + (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true } } -// Offload saves `.docs` (or `.sdocs`) and `.index` files into remote storage. -// It does not free any of the occupied memory (nor on disk nor in memory). +// Offload saves all index files and docs to remote storage. func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) { f.loadMu.Lock() f.openDocs() - f.openIndex() + f.openIndexFiles() f.loadMu.Unlock() g, gctx := errgroup.WithContext(ctx) g.Go(func() error { return u.Upload(gctx, f.docsFile) }) - g.Go(func() error { return u.Upload(gctx, f.indexFile) }) + g.Go(func() error { return u.Upload(gctx, f.infoFile) }) + g.Go(func() error { return u.Upload(gctx, f.tokenFile) }) + g.Go(func() error { return u.Upload(gctx, f.offsetsFile) }) + g.Go(func() error { return u.Upload(gctx, f.idFile) }) + g.Go(func() error { return u.Upload(gctx, f.lidFile) }) if err := g.Wait(); err != nil { return true, err } remoteFracName := f.BaseFileName + consts.RemoteFractionSuffix - file, err := os.Create(remoteFracName) if err != nil { return true, err @@ -219,15 +283,11 @@ func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) } func (f *Sealed) Release() { - if f.docsFile != nil { - if err := f.docsFile.Close(); err != nil { - logger.Error("can't close docs file", zap.String("frac", f.BaseFileName), zap.Error(err)) - } - } - - if f.indexFile != nil { - if err := f.indexFile.Close(); err != nil { - logger.Error("can't close index file", zap.String("frac", f.BaseFileName), zap.Error(err)) + for _, file := range []*os.File{f.docsFile, f.infoFile, f.tokenFile, f.offsetsFile, f.idFile, f.lidFile} { + if file != nil { + if err := file.Close(); err != nil { + logger.Error("can't close file", zap.String("file", file.Name()), zap.Error(err)) + } } } @@ -238,67 +298,46 @@ func (f *Sealed) Release() { func (f *Sealed) Suicide() { f.Release() - // make some atomic magic, to be more stable on removing fractions + // Rename docs atomically first — this commits the intent to delete. oldPath := f.BaseFileName + consts.DocsFileSuffix newPath := f.BaseFileName + consts.DocsDelFileSuffix if err := os.Rename(oldPath, newPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't rename docs file", - zap.String("old_path", oldPath), - zap.String("new_path", newPath), - zap.Error(err), - ) + logger.Error("can't rename docs file", zap.String("old", oldPath), zap.String("new", newPath), zap.Error(err)) } oldPath = f.BaseFileName + consts.SdocsFileSuffix newPath = f.BaseFileName + consts.SdocsDelFileSuffix if err := os.Rename(oldPath, newPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't rename sdocs file", - zap.String("old_path", oldPath), - zap.String("new_path", newPath), - zap.Error(err), - ) + logger.Error("can't rename sdocs file", zap.String("old", oldPath), zap.String("new", newPath), zap.Error(err)) } if f.PartialSuicideMode == HalfRename { return } - oldPath = f.BaseFileName + consts.IndexFileSuffix - newPath = f.BaseFileName + consts.IndexDelFileSuffix - if err := os.Rename(oldPath, newPath); err != nil { - logger.Error("can't rename index file", - zap.String("old_path", oldPath), - zap.String("new_path", newPath), - zap.Error(err), - ) - } - - rmPath := f.BaseFileName + consts.DocsDelFileSuffix - if err := os.Remove(rmPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't remove docs file", - zap.String("file", rmPath), - zap.Error(err), - ) + // Delete all index files directly (they are regenerable; no atomic rename needed). + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + if err := os.Remove(f.BaseFileName + suffix); err != nil && !errors.Is(err, os.ErrNotExist) { + logger.Error("can't remove index file", zap.String("file", f.BaseFileName+suffix), zap.Error(err)) + } } - rmPath = f.BaseFileName + consts.SdocsDelFileSuffix - if err := os.Remove(rmPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't remove sdocs file", - zap.String("file", rmPath), - zap.Error(err), - ) + if err := os.Remove(f.BaseFileName + consts.DocsDelFileSuffix); err != nil && !errors.Is(err, os.ErrNotExist) { + logger.Error("can't remove docs del file", zap.String("frac", f.BaseFileName), zap.Error(err)) } if f.PartialSuicideMode == HalfRemove { return } - rmPath = f.BaseFileName + consts.IndexDelFileSuffix - if err := os.Remove(rmPath); err != nil { - logger.Error("can't remove index file", - zap.String("file", rmPath), - zap.Error(err), - ) + if err := os.Remove(f.BaseFileName + consts.SdocsDelFileSuffix); err != nil && !errors.Is(err, os.ErrNotExist) { + logger.Error("can't remove sdocs del file", zap.String("frac", f.BaseFileName), zap.Error(err)) } f.skipMaskProvider.RemoveFrac(f.info.Name()) @@ -311,14 +350,12 @@ func (f *Sealed) String() string { func (f *Sealed) Fetch(ctx context.Context, ids []seq.ID) ([][]byte, error) { dp := f.createDataProvider(ctx) defer dp.release() - return dp.Fetch(ids) } func (f *Sealed) Search(ctx context.Context, params processor.SearchParams) (*seq.QPR, error) { dp := f.createDataProvider(ctx) defer dp.release() - return dp.Search(params) } @@ -340,13 +377,13 @@ func (f *Sealed) createDataProvider(ctx context.Context) *sealedDataProvider { docsReader: &f.docsReader, blocksOffsets: f.blocksData.BlocksOffsets, lidsTable: f.blocksData.LIDsTable, - lidsLoader: lids.NewLoader(&f.indexReader, f.indexCache.LIDs), - tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.indexReader, f.indexCache.Tokens), - tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.indexReader, f.indexCache.TokenTable), + lidsLoader: lids.NewLoader(&f.lidReader, f.indexCache.LIDs), + tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.tokenReader, f.indexCache.Tokens), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.tokenReader, f.indexCache.TokenTable), idsTable: &f.blocksData.IDsTable, idsProvider: seqids.NewProvider( - &f.indexReader, + &f.idReader, f.indexCache.MIDs, f.indexCache.RIDs, f.indexCache.Params, @@ -370,39 +407,34 @@ func (f *Sealed) IsIntersecting(from, to seq.MID) bool { return f.info.IsIntersecting(from, to) } -func loadHeader( - indexFile storage.ImmutableFile, - indexReader storage.IndexReader, -) *common.Info { - block, _, err := indexReader.ReadIndexBlock(0, nil) +func loadHeader(infoReader storage.IndexReader) *common.Info { + block, _, err := infoReader.ReadIndexBlock(0, nil) if err != nil { - logger.Fatal( - "error reading info block from index", - zap.String("file", indexFile.Name()), - zap.Error(err), - ) + logger.Fatal("error reading info block", zap.Error(err)) } var bi sealed.BlockInfo if err := bi.Unpack(block); err != nil { - logger.Fatal( - "error unpacking info block", - zap.String("file", indexFile.Name()), - zap.Error(err), - ) + logger.Fatal("error unpacking info block", zap.Error(err)) } - info := bi.Info + return bi.Info +} - // set index size - stat, err := indexFile.Stat() - if err != nil { - logger.Fatal( - "can't stat index file", - zap.String("file", indexFile.Name()), - zap.Error(err), - ) +// computeIndexOnDisk returns the total on-disk size of all 5 index files for a local fraction. +func computeIndexOnDisk(basePath string) uint64 { + var total int64 + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + st, err := os.Stat(basePath + suffix) + if err != nil { + logger.Fatal("can't stat index file", zap.String("file", basePath+suffix), zap.Error(err)) + } + total += st.Size() } - - info.IndexOnDisk = uint64(stat.Size()) - return info + return uint64(total) } diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index 14a5cac7..9090db18 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -4,6 +4,7 @@ import ( "encoding/binary" "errors" "iter" + "unsafe" "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/seqids" @@ -54,9 +55,9 @@ func (bb *blocksBuilder) LastError() error { return bb.lastErr } -// BuildTokenBlocks converts token batches into token blocks with field tables. The function creates an iterator -// that returns token blocks and corresponding field tables describing which fields are covered by which tokens -// in the block. +// BuildTokenBlocks converts scalar (token, lids) pairs into token blocks with field tables. +// onLIDs is called for each token's LIDs immediately during iteration — the caller must not +// retain the slice after onLIDs returns. Errors from onLIDs are stored in bb.lastErr. // // Visualization of relationships between fields, tokens, and table entries: // @@ -64,72 +65,85 @@ func (bb *blocksBuilder) LastError() error { // Token Blocks: [.t1.t2.t3.t4.][.t5.t6.t7.t8.][.t9....etc...][.............][.............][.............] // Field Entries: {-----f1------}{-f1-}{---f2--}{--f2--}{-f3--}{------f3-----}{-f3-}{----f4-}{-----f4------} // -// So we split field ranges into field entries - sub-ranges of fields aligned to block boundaries. -// Each field table (token.FieldTable) links a field to a blocks and token ranges inside the blocks. -// // Parameters: -// - tokenBatches: Iterator of token batches, where each batch becomes a separate block +// - tokens: Scalar sequence of (token bytes, per-token LID list) pairs // - fields: Iterator of [fieldName, maxTID] pairs for all fields in ascending TID order -// -// Returns: Iterator of [token block, field table for block] pairs, where field table contains -// information about which fields and their ranges are represented in this block. +// - blockSize: Maximum payload size in bytes per token block +// - onLIDs: Called for each token's LIDs before the source advances to the next token func (bb *blocksBuilder) BuildTokenBlocks( - tokenBatches iter.Seq[[][]byte], + tokens iter.Seq2[[]byte, []uint32], fields iter.Seq2[string, uint32], + accumulate func([]uint32) error, + blockSize int, ) iter.Seq2[tokensSealBlock, []token.FieldTable] { return func(yield func(tokensSealBlock, []token.FieldTable) bool) { - // Create pull iterator for fields - convert Seq2 to a function that can be called on demand - getNextField, stop := iter.Pull2(fields) + nextField, stop := iter.Pull2(fields) defer stop() var ( hasMore bool - currentTID uint32 = 1 // Current TID to process - fieldMaxTID uint32 = 0 // Maximum TID of current field (0 = field not yet selected) - fieldName string // Current field name + currentTID uint32 = 1 + fieldMaxTID uint32 = 0 + fieldName string ) - // Iterate through all token blocks created from batches - for idx, block := range createTokensSealBlocks(tokenBatches) { - table := []token.FieldTable{} - // Process all TIDs in current block (from currentTID to block.ext.maxTID) + // Just wrap `accumulate` function to be able + // to track returned errors. + accumulate := func(lids []uint32) error { + if err := accumulate(lids); err != nil { + bb.lastErr = err + return err + } + return nil + } + + for blockIdx, block := range seqBlockToken(tokens, blockSize, accumulate) { + if bb.lastErr != nil { + return + } + + // A block may span multiple fields, and a field may span multiple blocks. + // We emit one TableEntry per (field, block) intersection so that lookups + // can find the exact position of any token given its field and TID. + var table []token.FieldTable for currentTID <= block.ext.maxTID { - // If current field doesn't cover currentTID, get next field - // This happens when: 1) field not yet selected, 2) current field has ended if fieldMaxTID < currentTID { - if fieldName, fieldMaxTID, hasMore = getNextField(); !hasMore { + if fieldName, fieldMaxTID, hasMore = nextField(); !hasMore { bb.lastErr = errors.New("not enough fields to cover all TIDs") return } } - // Entry covers TIDs from currentTID to min(fieldMaxTID, block.ext.maxTID) - entry := createTokenTableEntry(currentTID, fieldMaxTID, idx, block) - table = append(table, token.FieldTable{Field: fieldName, Entries: []*token.TableEntry{entry}}) + + entry := newTokenTableEntry(currentTID, fieldMaxTID, blockIdx, block) currentTID += entry.ValCount + + table = append(table, token.FieldTable{ + Field: fieldName, + Entries: []*token.TableEntry{entry}}, + ) } if !yield(block, table) { - return // Consumer requested stop + return } } - // Verify consistency + if bb.lastErr != nil { + return + } + if currentTID-1 != fieldMaxTID { bb.lastErr = errors.New("fields and tokens not consistent") - } else if _, _, hasMore = getNextField(); hasMore { + } else if _, _, hasMore = nextField(); hasMore { bb.lastErr = errors.New("excess field after processing all blocks") } } } -// createTokenTableEntry creates a token table entry for a field-block span. -// Calculates the range of tokens belonging to a field within a specific block. -// Parameters: -// - entryStartTID: Starting token ID for this entry -// - fieldMaxTID: Maximum token ID for the field -// - blockIndex: Index of the current token block -// - block: Current token block data -func createTokenTableEntry(entryStartTID, fieldMaxTID, blockIndex uint32, block tokensSealBlock) *token.TableEntry { +func newTokenTableEntry( + entryStartTID, fieldMaxTID, + blockIndex uint32, block tokensSealBlock, +) *token.TableEntry { // Convert global TIDs to block-local indices firstIndex := entryStartTID - block.ext.minTID lastIndex := min(fieldMaxTID, block.ext.maxTID) - block.ext.minTID @@ -148,159 +162,197 @@ func createTokenTableEntry(entryStartTID, fieldMaxTID, blockIndex uint32, block } } -// BuildLIDsBlocks constructs LID blocks from Token LID sequences. -// Processes LIDs grouped by TID and creates optimally sized blocks: -// - Splits large LID sequences across multiple blocks -// - Tracks continuation status between blocks +// seqBlockID accumulates scalar (ID, position) pairs into sealed ID blocks. +// A new block is yielded every `blockSize` IDs. +func seqBlockID( + ids iter.Seq2[seq.ID, seq.DocPos], + blockSize int, +) iter.Seq[idsSealBlock] { + return func(yield func(idsSealBlock) bool) { + var block idsSealBlock + + for id, pos := range ids { + block.mids.Values = append(block.mids.Values, uint64(id.MID)) + block.rids.Values = append(block.rids.Values, uint64(id.RID)) + block.params.Values = append(block.params.Values, uint64(pos)) + + if len(block.mids.Values) == blockSize { + if !yield(block) { + return + } + + block.mids.Values = block.mids.Values[:0] + block.rids.Values = block.rids.Values[:0] + block.params.Values = block.params.Values[:0] + } + } + + if len(block.mids.Values) > 0 { + yield(block) + } + } +} + +// seqBlockToken accumulates scalar (token, lids) pairs into sealed token blocks. +// A new block is started whenever the accumulated payload would exceed blockSize bytes. +// onLIDs is called for each token's LIDs immediately during iteration — the caller must not +// retain the slice after onLIDs returns. If onLIDs returns a non-nil error, iteration stops. // // Parameters: -// - tokenLIDs: Sequence of LID arrays, one per TokenID, in TID order -// - blockCapacity: Maximum number of LIDs per block +// - tokens: Scalar sequence of (token bytes, per-token LID list) pairs +// - blockSize: Maximum payload size in bytes before starting a new block +// - onLIDs: Called for each token's LIDs before the source advances to the next token // // Returns: -// - iter.Seq[lidsSealBlock]: Sequence of sealed LID blocks -func (bb *blocksBuilder) BuildLIDsBlocks(tokenLIDs iter.Seq[[]uint32], blockCapacity int) iter.Seq[lidsSealBlock] { - return func(yield func(lidsSealBlock) bool) { - if blockCapacity <= 0 { - bb.lastErr = errors.New("sealing: LID block size must be > 0") - return - } +// - iter.Seq2[uint32, tokensSealBlock]: Sequence of (block index, sealed token block) pairs +func seqBlockToken( + tokens iter.Seq2[[]byte, []uint32], + blockSize int, accumulate func([]uint32) error, +) iter.Seq2[uint32, tokensSealBlock] { + return func(yield func(uint32, tokensSealBlock) bool) { var ( - currentTID uint32 // Current TID being processed - currentBlock lidsSealBlock // Current block under construction - isEndOfToken bool // Flag for end of current token's LIDs - isContinued bool // Flag for block continuation + idx uint32 // 0-based block index + currentTID uint32 // monotonically increasing TID + block tokensSealBlock // block under construction + actualSize int // accumulated payload bytes ) - // Initialize first block - currentBlock.ext.minTID = 1 - currentBlock.payload = lids.Block{ - LIDs: make([]uint32, 0, blockCapacity), // Pre-allocate with capacity - Offsets: []uint32{0}, // Start with initial offset - } + block.ext.minTID = 1 + flush := func() bool { + block.ext.maxTID = currentTID - // finalizeBlock prepares and yields the current block - finalizeBlock := func() bool { - if !isEndOfToken { - // Add final offset for current token if not already done - currentBlock.payload.Offsets = append(currentBlock.payload.Offsets, uint32(len(currentBlock.payload.LIDs))) + if !yield(idx, block) { + return false } - currentBlock.payload.IsLastLID = isEndOfToken // TODO(eguguchkin): Remove legacy field - currentBlock.ext.isContinued = isContinued // TODO(eguguchkin): Remove legacy field - isContinued = !isEndOfToken - return yield(currentBlock) + + idx++ + + // We yielded complete token block several lines earlier. + // And now we prepare token block for the next batch. + block.payload.Payload = block.payload.Payload[:0] + block.payload.Offsets = block.payload.Offsets[:0] + + // Here we increment currentTID by one because + // it points to TID at the end of the *currently* yielded block. + block.ext.minTID = currentTID + 1 + + actualSize = 0 + return true } - // Process LIDs for each TID - for lidsBatch := range tokenLIDs { - currentTID++ + for token, lids := range tokens { + // We encode token as [size](4B)[token](?B). + tokenSize := int(unsafe.Sizeof(uint32(0))) + len(token) - for _, lid := range lidsBatch { - // Check if block reached capacity - if len(currentBlock.payload.LIDs) == blockCapacity { - if !finalizeBlock() { - return - } - // Initialize new block - currentBlock.ext.minTID = currentTID - currentBlock.payload.LIDs = currentBlock.payload.LIDs[:0] - currentBlock.payload.Offsets = currentBlock.payload.Offsets[:1] // Reset to initial offset + needsFlushing := actualSize > 0 && + actualSize+tokenSize > blockSize + + if needsFlushing { + if !flush() { + return } + } + + block.payload.Offsets = append( + block.payload.Offsets, + uint32(len(block.payload.Payload)), + ) - isEndOfToken = false - currentBlock.ext.maxTID = currentTID - currentBlock.payload.LIDs = append(currentBlock.payload.LIDs, lid) // Add each LID to the block + block.payload.Payload = binary.LittleEndian.AppendUint32( + block.payload.Payload, + uint32(len(token)), + ) + + block.payload.Payload = append( + block.payload.Payload, + token..., + ) + + if err := accumulate(lids); err != nil { + return } - // Store offset and mark end of current token - currentBlock.payload.Offsets = append(currentBlock.payload.Offsets, uint32(len(currentBlock.payload.LIDs))) - isEndOfToken = true + currentTID += 1 + actualSize += tokenSize } - // Yield the final block - finalizeBlock() + if actualSize > 0 { + flush() + } } } -// createIDsSealBlocks converts sequences of IDs and positions into sealed ID blocks. -// Transforms raw ID sequences into optimized block format for storage: -// - Processes IDs in batches for efficiency -// - Maintains correlation between IDs and their positions -// - Creates separate slices for MIDs, RIDs, and positions -// -// Parameters: -// - idsBatches: Sequence of ID batches with corresponding document positions -// -// Returns: -// - iter.Seq[idsSealBlock]: Sequence of sealed ID blocks -func createIDsSealBlocks(idsBatches iter.Seq2[[]seq.ID, []seq.DocPos]) iter.Seq[idsSealBlock] { - return func(yield func(idsSealBlock) bool) { - block := idsSealBlock{} - - // Process each batch of IDs and positions - for ids, positions := range idsBatches { - // Reset block arrays for new batch - block.mids.Values = block.mids.Values[:0] - block.rids.Values = block.rids.Values[:0] - block.params.Values = block.params.Values[:0] - - // Convert each ID and position to storage format - for i, id := range ids { - block.mids.Values = append(block.mids.Values, uint64(id.MID)) - block.rids.Values = append(block.rids.Values, uint64(id.RID)) - block.params.Values = append(block.params.Values, uint64(positions[i])) - } +// lidBlocksAcc incrementally builds LID blocks from per-token LID lists. +// Call Add for each token's LIDs in TID order, passing a callback that is invoked +// for each completed block before its backing arrays are reused. +// Call Flush once after all Add calls to handle the final (possibly partial) block. +type lidBlocksAcc struct { + blockCap int + currentTID uint32 + currentBlock lidsSealBlock + isEndOfToken bool + isContinued bool +} - // Yield completed block - if !yield(block) { - return - } - } +func newLIDBlocksAccumulator(blockCap int) *lidBlocksAcc { + a := &lidBlocksAcc{blockCap: blockCap} + a.currentBlock.ext.minTID = 1 + a.currentBlock.payload = lids.Block{ + LIDs: make([]uint32, 0, blockCap), + Offsets: []uint32{0}, } + return a } -// createTokensSealBlocks converts raw token sequences into sealed token blocks. -// Transforms batches of tokens into optimized storage format: -// - Merges a set of byte slices into a contiguous slice Payload and a slice of Offsets -// - Tracks token ID ranges for indexing [MinTID, MaxTID] +// Add processes LIDs of one token (must be called in TID order). // -// Parameters: -// - tokenBatches: Sequence of token batches to process -// -// Returns: -// - iter.Seq[uint32, tokensSealBlock]: Sequence of sealed token blocks with their indexes -func createTokensSealBlocks(tokenBatches iter.Seq[[][]byte]) iter.Seq2[uint32, tokensSealBlock] { - return func(yield func(uint32, tokensSealBlock) bool) { - var ( - idx uint32 // 1-based block index - currentTID uint32 // Current token ID counter - block tokensSealBlock // Current block under construction - ) +// For each block that fills up, `onBlock` is called immediately +// before the backing arrays are reset, so `onBlock` may read the +// block data but must not retain references to it. +func (a *lidBlocksAcc) Add(lids []uint32, onBlock func(lidsSealBlock) error) error { + a.currentTID++ + + for _, lid := range lids { + if len(a.currentBlock.payload.LIDs) == a.blockCap { + if err := onBlock(a.finalizeBlock()); err != nil { + return err + } - // Process each batch of tokens - for tokens := range tokenBatches { - idx++ - // Initialize new block - block.ext.minTID = currentTID + 1 - block.payload.Payload = block.payload.Payload[:0] - block.payload.Offsets = block.payload.Offsets[:0] + a.currentBlock.ext.minTID = a.currentTID + a.currentBlock.payload.LIDs = a.currentBlock.payload.LIDs[:0] + a.currentBlock.payload.Offsets = a.currentBlock.payload.Offsets[:1] + } - // Process each token in current batch - for _, tokenData := range tokens { - currentTID++ - // Store offset to current token - block.payload.Offsets = append(block.payload.Offsets, uint32(len(block.payload.Payload))) - // Store token length (little-endian) followed by token bytes - block.payload.Payload = binary.LittleEndian.AppendUint32(block.payload.Payload, uint32(len(tokenData))) - block.payload.Payload = append(block.payload.Payload, tokenData...) - } + a.isEndOfToken = false + a.currentBlock.ext.maxTID = a.currentTID + a.currentBlock.payload.LIDs = append(a.currentBlock.payload.LIDs, lid) + } - block.ext.maxTID = currentTID + a.isEndOfToken = true + a.currentBlock.payload.Offsets = append( + a.currentBlock.payload.Offsets, + uint32(len(a.currentBlock.payload.LIDs)), + ) - // Yield completed block - if !yield(idx, block) { - return - } - } + return nil +} + +func (a *lidBlocksAcc) Flush() lidsSealBlock { + return a.finalizeBlock() +} + +func (a *lidBlocksAcc) finalizeBlock() lidsSealBlock { + if !a.isEndOfToken { + a.currentBlock.payload.Offsets = append( + a.currentBlock.payload.Offsets, + uint32(len(a.currentBlock.payload.LIDs)), + ) } + + result := a.currentBlock + result.payload.IsLastLID = a.isEndOfToken + result.ext.isContinued = a.isContinued + a.isContinued = !a.isEndOfToken + + return result } diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index 80892ca2..d5637dc8 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -27,7 +27,7 @@ type mockSource struct { func (m *mockSource) Info() common.Info { return m.info } -func (m *mockSource) Fields() iter.Seq2[string, uint32] { +func (m *mockSource) Field() iter.Seq2[string, uint32] { return func(yield func(string, uint32) bool) { for i := range len(m.fields) { if !yield(m.fields[i], m.fieldMaxTIDs[i]) { @@ -37,56 +37,32 @@ func (m *mockSource) Fields() iter.Seq2[string, uint32] { } } -func (m *mockSource) IDsBlocks(size int) iter.Seq2[[]seq.ID, []seq.DocPos] { - return func(yield func([]seq.ID, []seq.DocPos) bool) { - ids := make([]seq.ID, 0, size) - pos := make([]seq.DocPos, 0, size) +func (m *mockSource) ID() iter.Seq2[seq.ID, seq.DocPos] { + return func(yield func(seq.ID, seq.DocPos) bool) { for i, id := range m.ids { - if len(ids) == size { - if !yield(ids, pos) { - return - } - ids = ids[:0] - pos = pos[:0] + if !yield(id, m.pos[i]) { + return } - ids = append(ids, id) - pos = append(pos, m.pos[i]) } - yield(ids, pos) } } -func (m *mockSource) TokenBlocks(size int) iter.Seq[[][]byte] { - return func(yield func([][]byte) bool) { - block := [][]byte{} - blockSize := 0 - for _, token := range m.tokens { - if blockSize >= size { - if !yield(block) { - return - } - blockSize = 0 - block = block[:0] +func (m *mockSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { + return func(yield func([]byte, []uint32) bool) { + for i, token := range m.tokens { + var lids []uint32 + if i < len(m.tokenLIDs) { + lids = m.tokenLIDs[i] } - block = append(block, token) - blockSize += len(token) + 4 - } - yield(block) - } -} - -func (m *mockSource) TokenLIDs() iter.Seq[[]uint32] { - return func(yield func([]uint32) bool) { - for _, lids := range m.tokenLIDs { - if !yield(lids) { + if !yield(token, lids) { return } } } } -func (m *mockSource) BlocksOffsets() []uint64 { return m.blocksOffsets } -func (m *mockSource) LastError() error { return m.lastError } +func (m *mockSource) BlockOffsets() []uint64 { return m.blocksOffsets } +func (m *mockSource) LastError() error { return m.lastError } func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { src := mockSource{ @@ -112,13 +88,43 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { }, fields: []string{"f1", "f2", "f3", "f4", "f5", "f6"}, fieldMaxTIDs: []uint32{2, 7, 9, 12, 13, 14}, + tokenLIDs: [][]uint32{ + {10, 20, 30, 40}, // 1 + {2}, // 2 + {3}, // 3 + {4}, // 4 + {5}, // 5 + {6}, // 6 + {7}, // 7 + {8}, // 8 + {9}, // 9 + {10}, // 10 + {11}, // 11 + {12}, // 12 + {13}, // 13 + {14}, // 14 + }, } // Block size in bytes. const blockSize = 24 - - bb := blocksBuilder{} - tokenBlocks := bb.BuildTokenBlocks(src.TokenBlocks(blockSize), src.Fields()) + const lidBlockCap = 3 + + var bb blocksBuilder + lidAccum := newLIDBlocksAccumulator(lidBlockCap) + var lidBlocks []lidsSealBlock + tokenBlocks := bb.BuildTokenBlocks( + src.TokenAndLIDs(), src.Field(), + func(lids []uint32) error { + return lidAccum.Add(lids, func(block lidsSealBlock) error { + block.payload.LIDs = slices.Clone(block.payload.LIDs) + block.payload.Offsets = slices.Clone(block.payload.Offsets) + lidBlocks = append(lidBlocks, block) + return nil + }) + }, + blockSize, + ) // In our test case, each token is 4 bytes long. Also for each token we use uint32 to encode the length. // So 3 tokens take up exactly 24 bytes. And we expect all token blocks to contain 3 tokens except the last one. @@ -128,11 +134,11 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { blockIndex := 0 allFieldsTables := []token.FieldTable{} - for block, fieldsTables := range tokenBlocks { - assert.Equal(t, expectedSizes[blockIndex], block.payload.Len()) - for i := range block.payload.Len() { + for result, fieldsTables := range tokenBlocks { + assert.Equal(t, expectedSizes[blockIndex], result.payload.Len()) + for i := range result.payload.Len() { tid++ - assert.Equal(t, src.tokens[tid-1], block.payload.GetToken(i)) + assert.Equal(t, src.tokens[tid-1], result.payload.GetToken(i)) } allFieldsTables = append(allFieldsTables, fieldsTables...) blockIndex++ @@ -149,7 +155,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 0, StartTID: 1, - BlockIndex: 1, + BlockIndex: 0, ValCount: 2, MinVal: "f1v1", MaxVal: "f1v2", @@ -161,21 +167,21 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 2, StartTID: 3, - BlockIndex: 1, + BlockIndex: 0, ValCount: 1, MinVal: "f2v1", MaxVal: "f2v1", }, { StartIndex: 0, StartTID: 4, - BlockIndex: 2, + BlockIndex: 1, ValCount: 3, MinVal: "f2v2", MaxVal: "f2v4", }, { StartIndex: 0, StartTID: 7, - BlockIndex: 3, + BlockIndex: 2, ValCount: 1, MinVal: "f2v5", MaxVal: "f2v5", @@ -187,7 +193,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 1, StartTID: 8, - BlockIndex: 3, + BlockIndex: 2, ValCount: 2, MinVal: "f3v1", MaxVal: "f3v2", @@ -199,7 +205,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 0, StartTID: 10, - BlockIndex: 4, + BlockIndex: 3, ValCount: 3, MinVal: "f4v1", MaxVal: "f4v3", @@ -211,7 +217,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 0, StartTID: 13, - BlockIndex: 5, + BlockIndex: 4, ValCount: 1, MinVal: "f5v1", MaxVal: "f5v1", @@ -223,7 +229,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { { StartIndex: 1, StartTID: 14, - BlockIndex: 5, + BlockIndex: 4, ValCount: 1, MinVal: "f6v1", MaxVal: "f6v1", @@ -233,6 +239,39 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { }, } assert.Equal(t, actualTokenTable.FieldsTables, expectedTokenTable.FieldsTables) + + finalBlock := lidAccum.Flush() + finalBlock.payload.LIDs = slices.Clone(finalBlock.payload.LIDs) + finalBlock.payload.Offsets = slices.Clone(finalBlock.payload.Offsets) + lidBlocks = append(lidBlocks, finalBlock) + + expectedLIDBlocks := []lidsSealBlock{ + { + ext: lidsExt{minTID: 1, maxTID: 1, isContinued: false}, + payload: lids.Block{LIDs: []uint32{10, 20, 30}, Offsets: []uint32{0, 3}, IsLastLID: false}, + }, + { + ext: lidsExt{minTID: 1, maxTID: 3, isContinued: true}, + payload: lids.Block{LIDs: []uint32{40, 2, 3}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + }, + { + ext: lidsExt{minTID: 4, maxTID: 6, isContinued: false}, + payload: lids.Block{LIDs: []uint32{4, 5, 6}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + }, + { + ext: lidsExt{minTID: 7, maxTID: 9, isContinued: false}, + payload: lids.Block{LIDs: []uint32{7, 8, 9}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + }, + { + ext: lidsExt{minTID: 10, maxTID: 12, isContinued: false}, + payload: lids.Block{LIDs: []uint32{10, 11, 12}, Offsets: []uint32{0, 1, 2, 3}, IsLastLID: true}, + }, + { + ext: lidsExt{minTID: 13, maxTID: 14, isContinued: false}, + payload: lids.Block{LIDs: []uint32{13, 14}, Offsets: []uint32{0, 1, 2}, IsLastLID: true}, + }, + } + assert.Equal(t, expectedLIDBlocks, lidBlocks) } func TestBlocksBuilder_IDsBlocks(t *testing.T) { @@ -268,7 +307,7 @@ func TestBlocksBuilder_IDsBlocks(t *testing.T) { i := 0 ids := []seq.ID{} pos := []seq.DocPos{} - for block := range createIDsSealBlocks(src.IDsBlocks(3)) { + for block := range seqBlockID(src.ID(), 3) { assert.Equal(t, expectedSizes[i], len(block.mids.Values)) assert.Equal(t, expectedSizes[i], len(block.rids.Values)) assert.Equal(t, expectedSizes[i], len(block.params.Values)) @@ -284,112 +323,3 @@ func TestBlocksBuilder_IDsBlocks(t *testing.T) { assert.Equal(t, src.ids, ids) assert.Equal(t, src.pos, pos) } - -func TestBlocksBuilder_BuildLIDsBlocks(t *testing.T) { - src := mockSource{ - tokenLIDs: [][]uint32{ - { - 10, // block 1, tid 1 - 20, // block 1, tid 1 - 30, // block 1, tid 1 - - 40, // block 2, tid 1 - }, { - 11, // block 2, tid 2 - 21, // block 2, tid 2 - - 31, // block 3, tid 2 - 41, // block 3, tid 2 - }, { - 10, // block 3, tid 3 - - 11, // block 4, tid 3 - 20, // block 4, tid 3 - 21, // block 4, tid 3 - - }, { - 30, // block 5, tid 4 - 40, // block 5, tid 4 - 50, // block 5, tid 4 - - 60, // block 6, tid 4 - }, - }, - } - - expected := []lidsSealBlock{{ - ext: lidsExt{ - minTID: 1, - maxTID: 1, - isContinued: false, - }, - payload: lids.Block{ - LIDs: []uint32{10, 20, 30}, - Offsets: []uint32{0, 3}, - IsLastLID: false, - }, - }, { - ext: lidsExt{ - minTID: 1, - maxTID: 2, - isContinued: true, - }, - payload: lids.Block{ - LIDs: []uint32{40, 11, 21}, - Offsets: []uint32{0, 1, 3}, - IsLastLID: false, - }, - }, { - ext: lidsExt{ - minTID: 2, - maxTID: 3, - isContinued: true, - }, - payload: lids.Block{ - LIDs: []uint32{31, 41, 10}, - Offsets: []uint32{0, 2, 3}, - IsLastLID: false, - }, - }, { - ext: lidsExt{ - minTID: 3, - maxTID: 3, - isContinued: true, - }, - payload: lids.Block{ - LIDs: []uint32{11, 20, 21}, - Offsets: []uint32{0, 3}, - IsLastLID: true, - }, - }, { - ext: lidsExt{ - minTID: 4, - maxTID: 4, - isContinued: false, - }, - payload: lids.Block{ - LIDs: []uint32{30, 40, 50}, - Offsets: []uint32{0, 3}, - IsLastLID: false, - }, - }, { - ext: lidsExt{ - minTID: 4, - maxTID: 4, - isContinued: true, - }, - payload: lids.Block{ - LIDs: []uint32{60}, - Offsets: []uint32{0, 1}, - IsLastLID: true, - }}, - } - bb := blocksBuilder{} - blocks := []lidsSealBlock{} - for block := range bb.BuildLIDsBlocks(src.TokenLIDs(), 3) { - block.payload.LIDs = slices.Clone(block.payload.LIDs) // copy lids - block.payload.Offsets = slices.Clone(block.payload.Offsets) // copy offsets - blocks = append(blocks, block) - } - assert.Equal(t, expected, blocks) -} diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 491c7233..57a3b3ad 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -5,7 +5,6 @@ import ( "encoding/binary" "io" "iter" - "time" "github.com/alecthomas/units" @@ -22,25 +21,27 @@ import ( "github.com/ozontech/seq-db/zstd" ) -// IndexSealer is responsible for creating and writing the index structure for sealed fractions. -// It organizes data into blocks, compresses them, and builds the complete index file with: -// - Multiple data sections (info, tokens, token table, offsets, IDs, LIDs) -// - Compression using ZSTD with configurable levels -// - Registry for quick access to block locations -// - PreloadedData structures for fast initialization instance of sealed fraction +// IndexSealer writes sealed fraction index data across multiple files. +// Each Write*File method writes one section to an independent file using the +// standard [prefix][blocks][registry] format so each file has its own IndexReader. +// +// Call order matters for PreloadedData: +// +// WriteTokenAndLIDFiles → WriteOffsetsFile → WriteIDFile +// +// (TokenTable is populated by WriteTokenAndLIDFiles; IDsTable by WriteOffsetsFile+WriteIDFile; +// LIDsTable by WriteTokenAndLIDFiles.) type IndexSealer struct { - lastErr error // Last error encountered during processing - buf1 []byte // Reusable buffer for packing raw data before compression - buf2 []byte // Reusable buffer for compressed data - params common.SealParams // Configuration parameters for sealing process - - // PreloadedData structures built during sealing for fast initialization of sealed fraction - idsTable seqids.Table // Table mapping document IDs to blocks - lidsTable lids.Table // Table mapping token IDs to LID blocks - tokenTable token.Table // Table mapping fields to token blocks + lastErr error + buf1 []byte + buf2 []byte + params common.SealParams + + idsTable seqids.Table + lidsTable lids.Table + tokenTable token.Table } -// NewIndexSealer creates a new IndexSealer instance with the given parameters. func NewIndexSealer(params common.SealParams) *IndexSealer { return &IndexSealer{ params: params, @@ -49,75 +50,58 @@ func NewIndexSealer(params common.SealParams) *IndexSealer { } } -// indexBlock represents a single block of data in the index file. -// Each block can be compressed and contains metadata for efficient retrieval. +// indexBlock is one compressed (or not) block with its registry metadata. type indexBlock struct { - codec storage.Codec // Compression codec used (No compression or ZSTD) - payload []byte // The actual block data (may be compressed) - rawLen uint32 // Original uncompressed data length - ext1 uint64 // Extended metadata field 1 (block-specific usage) - ext2 uint64 // Extended metadata field 2 (block-specific usage) + codec storage.Codec + payload []byte + rawLen uint32 + ext1 uint64 + ext2 uint64 } -// Bin converts the indexBlock to its binary representation for storage. -// It creates a header with metadata and returns the header + payload. -// Parameters: -// - pos: The file position where this block will be written -// -// Returns: -// - storage.IndexBlockHeader: The block header with metadata -// - []byte: The payload data to write func (i indexBlock) Bin(pos int64) (storage.IndexBlockHeader, []byte) { - header := storage.NewIndexBlockHeader(pos, i.ext1, i.ext2, uint32(len(i.payload)), i.rawLen, i.codec) - return header, i.payload + return storage.NewIndexBlockHeader(pos, i.ext1, i.ext2, uint32(len(i.payload)), i.rawLen, i.codec), i.payload } -// WriteIndex writes the complete index structure to the provided writer. -// The index file structure: -// +----------------+----------------+----------------+ -// | Prefix | Data Blocks | Registry | -// | (16 bytes) | (multiple) | (block headers)| -// +----------------+----------------+----------------+ -// -// Prefix contains: -// - 8 bytes: Position of registry start -// - 8 bytes: Size of registry -// -// Parameters: -// - ws: WriteSeeker to write the index data to -// - src: Source interface providing the data to be sealed -// -// Returns: -// - error: Any error encountered during writing -func (s *IndexSealer) WriteIndex(ws io.WriteSeeker, src Source) error { - const prefixSize = 16 // Size of prefix that will hold registry position and size +const filePrefixSize = 16 - // Skip prefix area initially - we'll write it at the end - if _, err := ws.Seek(prefixSize, io.SeekStart); err != nil { +// write writes blocks to ws using [16-byte prefix][blocks][registry]. +// The prefix is written last (via seek-back) and stores registry position + size. +func (s *IndexSealer) write(ws io.WriteSeeker, blocks iter.Seq[indexBlock]) error { + if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { return err } - // Create buffers for headers and payload writing - hw := bytes.NewBuffer(nil) // Headers writer - collects all block headers - bw := bytespool.AcquireWriterSize(ws, int(units.MiB)) // Buffered writer for payload + hw := bytes.NewBuffer(nil) + bw := bytespool.AcquireWriterSize(ws, int(units.MiB)) defer bytespool.ReleaseWriter(bw) - // Write all index blocks and collect headers - if err := s.writeBlocks(prefixSize, bw, hw, src); err != nil { - return err + pos := filePrefixSize + for block := range blocks { + if s.lastErr != nil { + return s.lastErr + } + header, payload := block.Bin(int64(pos)) + if _, err := bw.Write(payload); err != nil { + return err + } + if _, err := hw.Write(header); err != nil { + return err + } + pos += len(payload) + } + if s.lastErr != nil { + return s.lastErr } if err := bw.Flush(); err != nil { return err } - // Calculate registry position and size - size := hw.Len() // Registry size (all headers) - pos, err := ws.Seek(0, io.SeekEnd) // Current end position = registry start + size := hw.Len() + regPos, err := ws.Seek(0, io.SeekEnd) if err != nil { return err } - - // Write registry (all block headers) at the end of file if _, err := bw.Write(hw.Bytes()); err != nil { return err } @@ -125,325 +109,289 @@ func (s *IndexSealer) WriteIndex(ws io.WriteSeeker, src Source) error { return err } - // Write prefix at beginning of file with registry metadata - prefix := make([]byte, 0, prefixSize) - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(pos)) // Registry position - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(size)) // Registry size + prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(size)) if _, err := ws.Seek(0, io.SeekStart); err != nil { return err } - if _, err = ws.Write(prefix); err != nil { - return err + _, err = ws.Write(prefix) + return err +} + +// fileStreamWriter writes blocks incrementally to a single file using the +// [prefix][blocks][registry] format, allowing interleaved writes to multiple files. +type fileStreamWriter struct { + ws io.WriteSeeker + bw *bytespool.Writer + hw bytes.Buffer + pos int +} + +func newFileStreamWriter(ws io.WriteSeeker) (*fileStreamWriter, error) { + if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { + return nil, err } + return &fileStreamWriter{ + ws: ws, + bw: bytespool.AcquireWriterSize(ws, int(units.MiB)), + pos: filePrefixSize, + }, nil +} +func (fw *fileStreamWriter) writeBlock(block indexBlock) error { + header, payload := block.Bin(int64(fw.pos)) + if _, err := fw.bw.Write(payload); err != nil { + return err + } + fw.hw.Write(header) // bytes.Buffer.Write never fails + fw.pos += len(payload) return nil } -// writeBlocks processes all index blocks from the source and writes them to the output. -// It simultaneously writes payload data to one writer and headers to another. -// Parameters: -// - pos: Starting position for the first block -// - payloadWriter: Writer for block payload data -// - headersWriter: Writer for block headers (registry) -// - src: Data source -// -// Returns: -// - error: Any error encountered during processing -func (s *IndexSealer) writeBlocks(pos int, payloadWriter, headersWriter io.Writer, src Source) error { - // Process each index block from the source - for block := range s.indexBlocks(src) { - header, payload := block.Bin(int64(pos)) - // Write payload to main data section - if _, err := payloadWriter.Write(payload); err != nil { - return err - } - // Write header to registry - if _, err := headersWriter.Write(header); err != nil { - return err - } - pos += len(payload) // Advance position for next block +func (fw *fileStreamWriter) finalize() (err error) { + defer fw.release() + if err = fw.bw.Flush(); err != nil { + return } - if s.lastErr != nil { - return s.lastErr + var regPos int64 + if regPos, err = fw.ws.Seek(0, io.SeekEnd); err != nil { + return } - return nil + if _, err = fw.bw.Write(fw.hw.Bytes()); err != nil { + return + } + if err = fw.bw.Flush(); err != nil { + return + } + prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(fw.hw.Len())) + if _, err = fw.ws.Seek(0, io.SeekStart); err != nil { + return + } + _, err = fw.ws.Write(prefix) + return +} + +func (fw *fileStreamWriter) release() { + if fw.bw != nil { + bytespool.ReleaseWriter(fw.bw) + fw.bw = nil + } +} + +// WriteInfoFile writes the .info file containing a single BlockInfo block. +func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { + return s.write(ws, func(yield func(indexBlock) bool) { + yield(s.packInfoBlock(sealed.BlockInfo{Info: src.Info()})) + }) } -// indexBlocks generates a sequence of index blocks from the source data. -// The blocks are organized in specific sections: -// 1. Info Section - Basic fraction metadata -// 2. Tokens Section - Token data blocks -// 3. Token Table Section - Field-to-token mapping table -// 4. Offsets Section - Document block offsets -// 5. IDs Section - Document ID blocks (MIDs, RIDs, Positions) -// 6. LIDs Section - Token ID to LID mapping blocks +// WriteTokenAndLIDFiles writes the .token and .lid files in a single pass over the source data. // -// Returns: -// - iter.Seq[indexBlock]: Sequence of index blocks to write -func (s *IndexSealer) indexBlocks(src Source) iter.Seq[indexBlock] { - return func(yield func(indexBlock) bool) { - bb := blocksBuilder{} - blocksCounter := uint32(0) // Global block counter for indexing - statsOverall := startStats() // Overall statistics collector - - // Helper to push a block and update statistics - push := func(b indexBlock, statsSection *blocksStats) bool { - blocksCounter++ - statsOverall.takeStock(b) - statsSection.takeStock(b) - return yield(b) - } +// .token file: [token blocks...] [separator] [token-table block] [separator] +// .lid file: [LID blocks...] [separator] +// +// LID blocks are written interleaved with token block processing so that both files +// are produced from one sequential scan of the (token, LID) data. +func (s *IndexSealer) WriteTokenAndLIDFiles(tokenWS, lidWS io.WriteSeeker, src Source) error { + tokenFW, err := newFileStreamWriter(tokenWS) + if err != nil { + return err + } + defer tokenFW.release() - // Helper to write section separator (empty block) - sectionSeparator := func() bool { - blocksCounter++ - return yield(indexBlock{}) // empty block as separator - } + lidFW, err := newFileStreamWriter(lidWS) + if err != nil { + return err + } + defer lidFW.release() + + var ( + bb blocksBuilder + allFieldsTables []token.FieldTable + lidAccum = newLIDBlocksAccumulator(consts.LIDBlockCap) + ) + + accumulate := func(lids []uint32) error { + return lidAccum.Add(lids, func(block lidsSealBlock) error { + return lidFW.writeBlock(s.packLIDsBlock(block)) + }) + } - // SECTION 1: Info Section - statsInfo := startStats() - info := src.Info() - if !push(s.packInfoBlock(sealed.BlockInfo{Info: info}), &statsInfo) { - return - } + blocks := bb.BuildTokenBlocks( + src.TokenAndLIDs(), src.Field(), + accumulate, consts.RegularBlockSize, + ) - // SECTION 2: Tokens Section - statsTokens := startStats() - allFieldsTables := []token.FieldTable{} - tokensBlocks := bb.BuildTokenBlocks(src.TokenBlocks(consts.RegularBlockSize), src.Fields()) - for block, fieldsTables := range tokensBlocks { - if !push(s.packTokenBlock(block), &statsTokens) { - return - } - allFieldsTables = append(allFieldsTables, fieldsTables...) - } - if s.lastErr = util.CollapseErrors([]error{src.LastError(), bb.LastError()}); s.lastErr != nil { - return + for block, fieldsTables := range blocks { + if err := tokenFW.writeBlock(s.packTokenBlock(block)); err != nil { + return err } + allFieldsTables = append(allFieldsTables, fieldsTables...) + } - if !sectionSeparator() { - return - } + if s.lastErr = util.CollapseErrors([]error{src.LastError(), bb.LastError()}); s.lastErr != nil { + return s.lastErr + } - // SECTION 3: Token Table Section - statsTokenTable := startStats() - tokenTableBlock := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} - if !push(s.packTokenTableBlock(tokenTableBlock), &statsTokenTable) { - return - } + // Write the final (possibly partial) LID block and trailing separator. + if err := lidFW.writeBlock(s.packLIDsBlock(lidAccum.Flush())); err != nil { + return err + } - if !sectionSeparator() { - return - } + if err := lidFW.writeBlock(indexBlock{}); err != nil { // trailing separator + return err + } + + if err := lidFW.finalize(); err != nil { + return err + } - // SECTION 4: Offsets Section - statsOffsets := startStats() + // Write token section separator, token table, trailing separator. + if err := tokenFW.writeBlock(indexBlock{}); err != nil { // section separator + return err + } + tokenTableBlock := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} + if err := tokenFW.writeBlock(s.packTokenTableBlock(tokenTableBlock)); err != nil { + return err + } + if err := tokenFW.writeBlock(indexBlock{}); err != nil { // trailing separator + return err + } + return tokenFW.finalize() +} + +// WriteOffsetsFile writes the .offsets file containing a single BlockOffsets block. +func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { + return s.write(ws, func(yield func(indexBlock) bool) { offsets := sealed.BlockOffsets{ - IDsTotal: info.DocsTotal + 1, // +1 for system ID at position zero - Offsets: src.BlocksOffsets(), - } - if !push(s.packBlocksOffsetsBlock(offsets), &statsOffsets) { - return + IDsTotal: src.Info().DocsTotal + 1, + Offsets: src.BlockOffsets(), } + yield(s.packBlocksOffsetsBlock(offsets)) + }) +} - // SECTION 5: IDs Section - s.idsTable.StartBlockIndex = blocksCounter // Record starting position for IDs blocks - statsMIDs, statsRIDs, statsParams := startStats(), startStats(), startStats() - for block := range createIDsSealBlocks(src.IDsBlocks(consts.IDsPerBlock)) { - if !push(s.packMIDsBlock(block), &statsMIDs) { - return - } - if !push(s.packRIDsBlock(block), &statsRIDs) { +func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { + return s.write(ws, func(yield func(indexBlock) bool) { + for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { + if !yield(s.packMIDsBlock(block)) { return } - if !push(s.packPosBlock(block), &statsParams) { + + if !yield(s.packRIDsBlock(block)) { return } - } - if s.lastErr = src.LastError(); s.lastErr != nil { - return - } - - if !sectionSeparator() { - return - } - // SECTION 6: LIDs Section - statsLIDs := startStats() - s.lidsTable.StartBlockIndex = blocksCounter - for block := range bb.BuildLIDsBlocks(src.TokenLIDs(), consts.LIDBlockCap) { - if !push(s.packLIDsBlock(block), &statsLIDs) { + if !yield(s.packPosBlock(block)) { return } } - if s.lastErr = util.CollapseErrors([]error{src.LastError(), bb.LastError()}); s.lastErr != nil { - return - } - if !sectionSeparator() { + if s.lastErr = src.LastError(); s.lastErr != nil { return } - // Log statistics for all sections - endTime := time.Now() - statsInfo.log("info", statsTokens.start) - statsTokens.log("tokens", statsTokenTable.start) - statsTokenTable.log("tokenTable", statsOffsets.start) - statsOffsets.log("offsets", statsMIDs.start) - statsMIDs.log("mids", statsLIDs.start) - statsRIDs.log("rids", statsLIDs.start) - statsParams.log("pos", statsLIDs.start) - statsLIDs.log("lids", endTime) - statsOverall.log("overall", endTime) - } + yield(indexBlock{}) // trailing separator + }) } -// collapseOrderedFieldsTables merges field tables with identical field names -// Assumes the input array is already sorted by the Field property +// collapseOrderedFieldsTables merges FieldTables with the same field name. +// Assumes input is sorted by Field. func collapseOrderedFieldsTables(src []token.FieldTable) []token.FieldTable { if len(src) == 0 { return nil } + current := src[0] - dst := []token.FieldTable{} + var dst []token.FieldTable for _, ft := range src[1:] { if current.Field == ft.Field { current.Entries = append(current.Entries, ft.Entries...) continue } + dst = append(dst, current) current = ft } - dst = append(dst, current) - return dst + + return append(dst, current) } -// newIndexBlock creates an uncompressed index block. func newIndexBlock(raw []byte) indexBlock { - return indexBlock{ - codec: storage.CodecNo, - rawLen: uint32(len(raw)), - payload: raw, - } + return indexBlock{codec: storage.CodecNo, rawLen: uint32(len(raw)), payload: raw} } -// newIndexBlockZSTD creates a compressed index block using ZSTD compression. -// Falls back to uncompressed if compression doesn't provide benefits. func (s *IndexSealer) newIndexBlockZSTD(raw []byte, level int) indexBlock { s.buf2 = zstd.CompressLevel(raw, s.buf2[:0], level) - // Only use compression if it actually reduces size if len(s.buf2) < len(raw) { - return indexBlock{ - codec: storage.CodecZSTD, - rawLen: uint32(len(raw)), - payload: s.buf2, - } + return indexBlock{codec: storage.CodecZSTD, rawLen: uint32(len(raw)), payload: s.buf2} } return newIndexBlock(raw) } -// packInfoBlock packs fraction information into an index block. func (s *IndexSealer) packInfoBlock(block sealed.BlockInfo) indexBlock { s.buf1 = block.Pack(s.buf1[:0]) - return newIndexBlock(s.buf1) // Info block is typically small, no compression + return newIndexBlock(s.buf1) } -// packTokenBlock packs token data into a compressed index block. func (s *IndexSealer) packTokenBlock(block tokensSealBlock) indexBlock { - s.buf1 = block.payload.Pack(s.buf1[:0]) // Pack token data + s.buf1 = block.payload.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.TokenListZstdLevel) - // Store TID range in extended metadata b.ext1 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) return b } -// packTokenTableBlock packs the token table into a compressed index block. -func (s *IndexSealer) packTokenTableBlock(tokenTableBlock token.TableBlock) indexBlock { - s.tokenTable = token.TableFromBlocks([]token.TableBlock{tokenTableBlock}) // Store for PreloadedData - - // Packing block - s.buf1 = tokenTableBlock.Pack(s.buf1[:0]) +func (s *IndexSealer) packTokenTableBlock(tb token.TableBlock) indexBlock { + s.tokenTable = token.TableFromBlocks([]token.TableBlock{tb}) + s.buf1 = tb.Pack(s.buf1[:0]) return s.newIndexBlockZSTD(s.buf1, s.params.TokenTableZstdLevel) } -// packBlocksOffsetsBlock packs document block offsets into a compressed index block. func (s *IndexSealer) packBlocksOffsetsBlock(block sealed.BlockOffsets) indexBlock { - // Update IDs table for PreloadedData - s.idsTable.IDsTotal = block.IDsTotal // Total number of IDs - s.idsTable.IDBlocksTotal = uint32(len(block.Offsets)) // Number of ID blocks - - // Packing block + s.idsTable.IDsTotal = block.IDsTotal + s.idsTable.IDBlocksTotal = uint32(len(block.Offsets)) s.buf1 = block.Pack(s.buf1[:0]) - b := s.newIndexBlockZSTD(s.buf1, s.params.DocsPositionsZstdLevel) - return b + return s.newIndexBlockZSTD(s.buf1, s.params.DocsPositionsZstdLevel) } -// packMIDsBlock packs MIDs into a compressed index block. func (s *IndexSealer) packMIDsBlock(block idsSealBlock) indexBlock { - // Get the last ID in the block (smallest due to descending order) last := len(block.mids.Values) - 1 - minID := seq.ID{ - MID: seq.MID(block.mids.Values[last]), - RID: seq.RID(block.rids.Values[last]), - } - s.idsTable.MinBlockIDs = append(s.idsTable.MinBlockIDs, minID) // Store for PreloadedData - - // Packing block + minID := seq.ID{MID: seq.MID(block.mids.Values[last]), RID: seq.RID(block.rids.Values[last])} + s.idsTable.MinBlockIDs = append(s.idsTable.MinBlockIDs, minID) s.buf1 = block.mids.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) - // Store min MID and RID in extended metadata b.ext1 = uint64(minID.MID) b.ext2 = uint64(minID.RID) return b } -// packRIDsBlock packs RIDs into a compressed index block. func (s *IndexSealer) packRIDsBlock(block idsSealBlock) indexBlock { s.buf1 = block.rids.Pack(s.buf1[:0]) - b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) - return b + return s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) } -// packPosBlock packs document positions into a compressed index block. func (s *IndexSealer) packPosBlock(block idsSealBlock) indexBlock { s.buf1 = block.params.Pack(s.buf1[:0]) - b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) - return b + return s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) } -// packLIDsBlock packs Local IDs (LIDs) into a compressed index block. -// Also updates LIDs table for preloaded data access. func (s *IndexSealer) packLIDsBlock(block lidsSealBlock) indexBlock { var ext1 uint64 - if block.ext.isContinued { // todo: Legacy continuation flag + if block.ext.isContinued { ext1 = 1 - block.ext.minTID++ // Adjust for legacy format + block.ext.minTID++ } - - // Update LIDs table for PreloadedData s.lidsTable.MinTIDs = append(s.lidsTable.MinTIDs, block.ext.minTID) s.lidsTable.MaxTIDs = append(s.lidsTable.MaxTIDs, block.ext.maxTID) s.lidsTable.IsContinued = append(s.lidsTable.IsContinued, block.ext.isContinued) - - // Packing block s.buf1 = block.payload.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.LIDsZstdLevel) - b.ext1 = ext1 // Legacy continuation flag - b.ext2 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) // TID range + b.ext1 = ext1 + b.ext2 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) return b } -// LIDsTable returns the built LIDs table for fast initialization of sealed fraction. -func (s *IndexSealer) LIDsTable() lids.Table { - return s.lidsTable -} - -// TokenTable returns the built token table for fast initialization of sealed fraction. -func (s *IndexSealer) TokenTable() token.Table { - return s.tokenTable -} - -// IDsTable returns the built IDs table for fast initialization of sealed fraction. -func (s *IndexSealer) IDsTable() seqids.Table { - return s.idsTable -} +func (s *IndexSealer) LIDsTable() lids.Table { return s.lidsTable } +func (s *IndexSealer) TokenTable() token.Table { return s.tokenTable } +func (s *IndexSealer) IDsTable() seqids.Table { return s.idsTable } diff --git a/frac/sealed/sealing/sealer.go b/frac/sealed/sealing/sealer.go index 3eb00761..233f0aa2 100644 --- a/frac/sealed/sealing/sealer.go +++ b/frac/sealed/sealing/sealer.go @@ -16,85 +16,145 @@ import ( // Source interface defines the contract for data sources that can be sealed. // Provides access to all necessary data components for index creation. type Source interface { - Info() *common.Info // Fraction metadata information - IDsBlocks(size int) iter.Seq2[[]seq.ID, []seq.DocPos] // Ordered sequence of document IDs and their positions, divided into blocks - TokenBlocks(size int) iter.Seq[[][]byte] // Ordered sequence of tokens divided into blocks - Fields() iter.Seq2[string, uint32] // Ordered sequence of fields with their max field's TID value - TokenLIDs() iter.Seq[[]uint32] // Sequence of Token LIDs ordered by TID and LID - BlocksOffsets() []uint64 // Offsets of DocBlock's in the doc file - LastError() error // Last error encountered during data retrieval + Info() *common.Info // Fraction metadata information + ID() iter.Seq2[seq.ID, seq.DocPos] // Ordered sequence of document IDs and their positions + TokenAndLIDs() iter.Seq2[[]byte, []uint32] // Ordered sequence of tokens paired with their LID list + Field() iter.Seq2[string, uint32] // Ordered sequence of fields with their max TID value + BlockOffsets() []uint64 // Offsets of DocBlocks in the doc file + LastError() error // Last error encountered during data retrieval } -// Seal is the main entry point for sealing a fraction. -// It performs the complete sealing process: -// 1. Creates the index file structure -// 2. Writes all index blocks with compression -// 3. Builds PreloadedData structures for fast initialization of sealed fraction -// 4. Handles file system operations and error recovery -// -// Parameters: -// - src: Data source providing all fraction data -// - params: Sealing parameters including compression levels -// -// Returns: -// - *sealed.PreloadedData: Preloaded data structures for initialization of sealed fraction -// - error: Any error encountered during the sealing process +// createAndWrite creates a tmp file, calls write, syncs, closes, then renames to finalPath. +func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { + f, err := os.Create(tmpPath) + if err != nil { + return err + } + if err := write(f); err != nil { + f.Close() + return err + } + if err := f.Sync(); err != nil { + f.Close() + return err + } + if err := f.Close(); err != nil { + return err + } + return os.Rename(tmpPath, finalPath) +} + +// createAndWriteBoth creates two tmp files, calls write with both, syncs and closes them, +// then renames both to their final paths. +func createAndWriteBoth(tmpPath1, finalPath1, tmpPath2, finalPath2 string, write func(*os.File, *os.File) error) error { + f1, err := os.Create(tmpPath1) + if err != nil { + return err + } + f2, err := os.Create(tmpPath2) + if err != nil { + f1.Close() + return err + } + if err := write(f1, f2); err != nil { + f1.Close() + f2.Close() + return err + } + if err := f1.Sync(); err != nil { + f1.Close() + f2.Close() + return err + } + if err := f1.Close(); err != nil { + f2.Close() + return err + } + if err := f2.Sync(); err != nil { + f2.Close() + return err + } + if err := f2.Close(); err != nil { + return err + } + if err := os.Rename(tmpPath1, finalPath1); err != nil { + return err + } + return os.Rename(tmpPath2, finalPath2) +} + +// Seal writes five index files (.info, .token, .offsets, .id, .lid) for the fraction +// and returns PreloadedData for fast initialization of the sealed fraction. func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { info := src.Info() - // Validate that we're not sealing an empty fraction if info.To == 0 { return nil, errors.New("sealing of an empty active fraction is not supported") } - // Create temporary index file (will be renamed on success) - indexFile, err := os.Create(info.Path + consts.IndexTmpFileSuffix) - if err != nil { - return nil, err - } - - // Create index sealer and write the index structure - indexSealer := NewIndexSealer(params) - if err := indexSealer.WriteIndex(indexFile, src); err != nil { - return nil, err - } + sealer := NewIndexSealer(params) - // Ensure data is flushed to disk - if err := indexFile.Sync(); err != nil { + if err := createAndWrite( + info.Path+consts.InfoTmpFileSuffix, + info.Path+consts.InfoFileSuffix, + func(f *os.File) error { return sealer.WriteInfoFile(f, src) }, + ); err != nil { return nil, err } - // Get final file size for metadata - stat, err := indexFile.Stat() - if err != nil { + if err := createAndWrite( + info.Path+consts.OffsetsTmpFileSuffix, + info.Path+consts.OffsetsFileSuffix, + func(f *os.File) error { return sealer.WriteOffsetsFile(f, src) }, + ); err != nil { return nil, err } - info.IndexOnDisk = uint64(stat.Size()) - // Close file before renaming - if err := indexFile.Close(); err != nil { + if err := createAndWrite( + info.Path+consts.IDTmpFileSuffix, + info.Path+consts.IDFileSuffix, + func(f *os.File) error { return sealer.WriteIDFile(f, src) }, + ); err != nil { return nil, err } - // Atomically rename temporary file to final name - if err := os.Rename(indexFile.Name(), info.Path+consts.IndexFileSuffix); err != nil { + if err := createAndWriteBoth( + info.Path+consts.TokenTmpFileSuffix, info.Path+consts.TokenFileSuffix, + info.Path+consts.LIDTmpFileSuffix, info.Path+consts.LIDFileSuffix, + func(tokenF, lidF *os.File) error { return sealer.WriteTokenAndLIDFiles(tokenF, lidF, src) }, + ); err != nil { return nil, err } - // Ensure directory metadata is synced to disk util.MustSyncPath(filepath.Dir(info.Path)) - // Build preloaded data structure for fast query access - lidsTable := indexSealer.LIDsTable() - preloaded := sealed.PreloadedData{ + // Compute total index size as sum of all 5 files. + var totalSize uint64 + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + st, err := os.Stat(info.Path + suffix) + if err != nil { + return nil, err + } + totalSize += uint64(st.Size()) + } + info.IndexOnDisk = totalSize + + lidsTable := sealer.LIDsTable() + preloaded := &sealed.PreloadedData{ Info: info, - TokenTable: indexSealer.TokenTable(), + TokenTable: sealer.TokenTable(), BlocksData: sealed.BlocksData{ - IDsTable: indexSealer.IDsTable(), + IDsTable: sealer.IDsTable(), LIDsTable: &lidsTable, - BlocksOffsets: src.BlocksOffsets(), + BlocksOffsets: src.BlockOffsets(), }, } - return &preloaded, nil + return preloaded, nil } diff --git a/frac/sealed/token/provider.go b/frac/sealed/token/provider.go index 6d18ff68..a650c266 100644 --- a/frac/sealed/token/provider.go +++ b/frac/sealed/token/provider.go @@ -1,6 +1,7 @@ package token import ( + "math" "sort" ) @@ -15,9 +16,9 @@ type Provider struct { func NewProvider(loader *BlockLoader, entries []*TableEntry) *Provider { return &Provider{ - loader: loader, - entries: entries, - curEntry: nil, + loader: loader, + entries: entries, + curBlockIndex: math.MaxUint32, // sentinel: no block loaded yet } } diff --git a/frac/sealed/token/table_entry.go b/frac/sealed/token/table_entry.go index a16b9a55..6e1df9c9 100644 --- a/frac/sealed/token/table_entry.go +++ b/frac/sealed/token/table_entry.go @@ -12,7 +12,7 @@ type TableEntry struct { } func (t *TableEntry) GetIndexInTokensBlock(tid uint32) int { - return int(t.StartIndex + tid - t.StartTID) + return int(t.StartIndex + (tid - t.StartTID)) } func (t *TableEntry) getLastTID() uint32 { diff --git a/frac/sealed/token/table_loader.go b/frac/sealed/token/table_loader.go index 6c3a5936..a0bf87be 100644 --- a/frac/sealed/token/table_loader.go +++ b/frac/sealed/token/table_loader.go @@ -50,6 +50,7 @@ func (l *TableLoader) Load() Table { func TableFromBlocks(blocks []TableBlock) Table { table := make(Table) + for _, block := range blocks { for _, ft := range block.FieldsTables { fd, ok := table[ft.Field] @@ -62,13 +63,16 @@ func TableFromBlocks(blocks []TableBlock) Table { } else if minVal < fd.MinVal { fd.MinVal = minVal } + for _, e := range ft.Entries { e.MinVal = "" fd.Entries = append(fd.Entries, e) } + table[ft.Field] = fd } } + return table } @@ -89,10 +93,8 @@ func (l *TableLoader) readBlock() ([]byte, error) { } func (l *TableLoader) loadBlocks() ([]TableBlock, error) { - // todo: scan all headers in sealed_loader and remember startIndex for each sections - // todo: than use this startIndex to load sections on demand (do not scan every time) - l.i = 1 - for h := l.readHeader(); h.Len() > 0; h = l.readHeader() { // skip actual token blocks, go for token table + l.i = 0 + for h := l.readHeader(); h.Len() > 0; h = l.readHeader() { // skip token blocks, go for token table } blocks := make([]TableBlock, 0) diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index ae639862..c20272ee 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -16,32 +16,43 @@ import ( "github.com/ozontech/seq-db/util" ) +// IndexReaders holds one IndexReader per split index file. +type IndexReaders struct { + Info storage.IndexReader + Token storage.IndexReader + Offsets storage.IndexReader + ID storage.IndexReader + LID storage.IndexReader +} + +// Loader reads the per-section index files to populate BlocksData. +// Token data is loaded lazily (BlockLoader / TableLoader use the Token reader directly). +// Info is loaded separately via loadHeader before Load is called. type Loader struct { - reader *storage.IndexReader - blockIndex uint32 - blockBuf []byte + buf []byte } -func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, indexReader *storage.IndexReader) { +// Load populates blocksData from the .offsets, .id, and .lid files. +func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, readers IndexReaders) { t := time.Now() - l.reader = indexReader - l.blockIndex = 1 // skipping info block that's already read - - l.skipTokens() - var err error - if blocksData.IDsTable, blocksData.BlocksOffsets, err = l.loadIDs(info.BinaryDataVer); err != nil { - logger.Fatal("load ids error", zap.Error(err)) + var blockOffsets sealed.BlockOffsets + blockOffsets, err = l.loadBlocksOffsets(readers.Offsets) + if err != nil { + logger.Fatal("load offsets error", zap.Error(err)) } + blocksData.BlocksOffsets = blockOffsets.Offsets - if blocksData.LIDsTable, err = l.loadLIDsBlocksTable(); err != nil { + blocksData.IDsTable = l.loadIDsTable(readers.ID, blockOffsets.IDsTotal, info.BinaryDataVer) + + blocksData.LIDsTable, err = l.loadLIDsTable(readers.LID) + if err != nil { logger.Fatal("load lids error", zap.Error(err)) } took := time.Since(t) - docsTotalK := float64(info.DocsTotal) / 1000 indexOnDiskMb := util.SizeToUnit(info.IndexOnDisk, "mb") throughput := indexOnDiskMb / util.DurationToUnit(took, "s") @@ -56,43 +67,34 @@ func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, indexRea ) } -func (l *Loader) nextIndexBlock() ([]byte, error) { - data, _, err := l.reader.ReadIndexBlock(l.blockIndex, l.blockBuf) - l.blockBuf = data - l.blockIndex++ - return data, err -} - -func (l *Loader) skipBlock() storage.IndexBlockHeader { - header, err := l.reader.GetBlockHeader(l.blockIndex) +// loadBlocksOffsets reads block 0 from the .offsets file. +func (l *Loader) loadBlocksOffsets(r storage.IndexReader) (sealed.BlockOffsets, error) { + data, _, err := r.ReadIndexBlock(0, l.buf) + l.buf = data if err != nil { - logger.Panic("error reading block header", zap.Error(err)) - } - l.blockIndex++ - return header -} - -func (l *Loader) loadIDs(fracVersion config.BinaryDataVersion) (idsTable seqids.Table, blocksOffsets []uint64, err error) { - var result []byte - - if result, err = l.nextIndexBlock(); err != nil { - return idsTable, nil, err + return sealed.BlockOffsets{}, err } - b := sealed.BlockOffsets{} - if err := b.Unpack(result); err != nil { - return idsTable, nil, err + if err := b.Unpack(data); err != nil { + return sealed.BlockOffsets{}, err } + return b, nil +} - blocksOffsets = b.Offsets - idsTable.IDsTotal = b.IDsTotal - idsTable.IDBlocksTotal = uint32(len(b.Offsets)) - idsTable.StartBlockIndex = l.blockIndex +// loadIDsTable scans block headers in the .id file to build seqids.Table. +// Blocks are stored as (MIDs, RIDs, Pos) triplets; we only need MIDs headers. +func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersion config.BinaryDataVersion) seqids.Table { + table := seqids.Table{ + StartBlockIndex: 0, + IDsTotal: idsTotal, + } - for { - // get MIDs block header - header := l.skipBlock() - if header.Len() == 0 { + for blockIdx := uint32(0); ; { + header, err := r.GetBlockHeader(blockIdx) + if err != nil { + logger.Fatal("error reading id block header", zap.Error(err)) + } + if header.Len() == 0 { // separator break } @@ -102,58 +104,36 @@ func (l *Loader) loadIDs(fracVersion config.BinaryDataVersion) (idsTable seqids. } else { mid = seq.MID(header.GetExt1()) } - - idsTable.MinBlockIDs = append(idsTable.MinBlockIDs, seq.ID{ + table.MinBlockIDs = append(table.MinBlockIDs, seq.ID{ MID: mid, RID: seq.RID(header.GetExt2()), }) + table.IDBlocksTotal++ - // skipping RIDs and Pos blocks - l.skipBlock() - l.skipBlock() + blockIdx += 3 // skip RIDs and Pos blocks } - return idsTable, blocksOffsets, nil + return table } -func (l *Loader) skipTokens() { - for { - // skip actual token blocks - header := l.skipBlock() - if header.Len() == 0 { - break - } - } +// loadLIDsTable scans block headers in the .lid file to build lids.Table. +func (l *Loader) loadLIDsTable(r storage.IndexReader) (*lids.Table, error) { + var maxTIDs, minTIDs []uint32 + var isContinued []bool - for { - // skip token table - header := l.skipBlock() - if header.Len() == 0 { - break + for blockIdx := uint32(0); ; blockIdx++ { + header, err := r.GetBlockHeader(blockIdx) + if err != nil { + return nil, err } - } -} - -func (l *Loader) loadLIDsBlocksTable() (*lids.Table, error) { - maxTIDs := make([]uint32, 0) - minTIDs := make([]uint32, 0) - isContinued := make([]bool, 0) - - startIndex := l.blockIndex - for { - header := l.skipBlock() if header.Len() == 0 { break } - - ext1 := header.GetExt1() ext2 := header.GetExt2() - maxTIDs = append(maxTIDs, uint32(ext2>>32)) minTIDs = append(minTIDs, uint32(ext2&0xFFFFFFFF)) - - isContinued = append(isContinued, ext1 == 1) + isContinued = append(isContinued, header.GetExt1() == 1) } - return lids.NewTable(startIndex, minTIDs, maxTIDs, isContinued), nil + return lids.NewTable(0, minTIDs, maxTIDs, isContinued), nil } diff --git a/fracmanager/cache_maintainer.go b/fracmanager/cache_maintainer.go index 70e5f956..2a6ac6dd 100644 --- a/fracmanager/cache_maintainer.go +++ b/fracmanager/cache_maintainer.go @@ -149,7 +149,12 @@ func (cm *CacheMaintainer) CreateIndexCache() *frac.IndexCache { LIDs: newCache[*lids.Block](cm, lidsName), Tokens: newCache[*token.Block](cm, tokensName), TokenTable: newCache[token.Table](cm, tokenTableName), - Registry: newCache[[]byte](cm, indexName), + // Each index file gets its own registry cache (they all use key=1 internally). + InfoRegistry: newCache[[]byte](cm, indexName), + TokenRegistry: newCache[[]byte](cm, indexName), + OffsetsRegistry: newCache[[]byte](cm, indexName), + IDRegistry: newCache[[]byte](cm, indexName), + LIDRegistry: newCache[[]byte](cm, indexName), } } diff --git a/fracmanager/frac_manifest.go b/fracmanager/frac_manifest.go index 2a258bda..9fc15fe9 100644 --- a/fracmanager/frac_manifest.go +++ b/fracmanager/frac_manifest.go @@ -19,20 +19,35 @@ import ( type fracManifest struct { basePath string // base path to fraction files (without extension) hasDocs bool // presence of main documents file - hasIndex bool // presence of index file hasMeta bool // presence of meta-information (legacy WAL format) hasWal bool // presence of WAL with meta (new WAL format) + hasIndex bool // presence of index file hasSdocs bool // presence of sorted documents hasRemote bool // presence of remote fraction + // Split index file flags + hasInfo bool + hasToken bool + hasOffsets bool + hasID bool + hasLID bool + // Deletion marker file flags hasDocsDel bool // documents deletion marker hasSdocsDel bool // sorted documents deletion marker - hasIndexDel bool // index deletion marker // Temporary file flags - hasIndexTmp bool // temporary index file - hasSdocsTmp bool // temporary sorted documents file + hasInfoTmp bool + hasTokenTmp bool + hasOffsetsTmp bool + hasIDTmp bool + hasLIDTmp bool + hasSdocsTmp bool // temporary sorted documents file +} + +// hasAllIndexFiles reports whether all 5 split index files are present. +func (m *fracManifest) hasAllIndexFiles() bool { + return m.hasInfo && m.hasToken && m.hasOffsets && m.hasID && m.hasLID } // AddExtension adds information about a file with the specified extension @@ -47,20 +62,35 @@ func (m *fracManifest) AddExtension(ext string) error { m.hasWal = true case consts.SdocsFileSuffix: m.hasSdocs = true - case consts.IndexFileSuffix: - m.hasIndex = true case consts.RemoteFractionSuffix: m.hasRemote = true + case consts.InfoFileSuffix: + m.hasInfo = true + case consts.TokenFileSuffix: + m.hasToken = true + case consts.OffsetsFileSuffix: + m.hasOffsets = true + case consts.IDFileSuffix: + m.hasID = true + case consts.LIDFileSuffix: + m.hasLID = true + case consts.DocsDelFileSuffix: m.hasDocsDel = true case consts.SdocsDelFileSuffix: m.hasSdocsDel = true - case consts.IndexDelFileSuffix: - m.hasIndexDel = true - case consts.IndexTmpFileSuffix: - m.hasIndexTmp = true + case consts.InfoTmpFileSuffix: + m.hasInfoTmp = true + case consts.TokenTmpFileSuffix: + m.hasTokenTmp = true + case consts.OffsetsTmpFileSuffix: + m.hasOffsetsTmp = true + case consts.IDTmpFileSuffix: + m.hasIDTmp = true + case consts.LIDTmpFileSuffix: + m.hasLIDTmp = true case consts.SdocsTmpFileSuffix: m.hasSdocsTmp = true @@ -88,13 +118,13 @@ func (m *fracManifest) Stage() fracStage { if m.hasRemote { return fracStageRemote } - if m.hasIndex && (m.hasSdocs || m.hasDocs) { + if m.hasAllIndexFiles() && (m.hasSdocs || m.hasDocs) { return fracStageSealed } if (m.hasMeta || m.hasWal) && m.hasDocs { return fracStageActive } - if m.hasDocsDel || m.hasIndexDel || m.hasSdocsDel { + if m.hasDocsDel || m.hasSdocsDel { return fracStageZombie } return fracStageUnknown @@ -125,18 +155,21 @@ func removeMeta(m *fracManifest) { } } -func removeIndex(m *fracManifest) { - if m.hasIndex { - util.RemoveFile(m.basePath + consts.IndexFileSuffix) - m.hasIndex = false - } -} - -func removeIndexDel(m *fracManifest) { - if m.hasIndexDel { - util.RemoveFile(m.basePath + consts.IndexDelFileSuffix) - m.hasIndexDel = false +func removeIndexFiles(m *fracManifest) { + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + } { + util.RemoveFile(m.basePath + suffix) } + m.hasInfo = false + m.hasToken = false + m.hasOffsets = false + m.hasID = false + m.hasLID = false } func removeSdocsDel(m *fracManifest) { @@ -154,10 +187,20 @@ func removeDocsDel(m *fracManifest) { } func removeIndexTmp(m *fracManifest) { - if m.hasIndexTmp { - util.RemoveFile(m.basePath + consts.IndexTmpFileSuffix) - m.hasIndexTmp = false + for _, suffix := range []string{ + consts.InfoTmpFileSuffix, + consts.TokenTmpFileSuffix, + consts.OffsetsTmpFileSuffix, + consts.IDTmpFileSuffix, + consts.LIDTmpFileSuffix, + } { + util.RemoveFile(m.basePath + suffix) } + m.hasInfoTmp = false + m.hasTokenTmp = false + m.hasOffsetsTmp = false + m.hasIDTmp = false + m.hasLIDTmp = false } func removeSdocsTmp(m *fracManifest) { @@ -240,8 +283,7 @@ func cleanupRemoteFrac(m *fracManifest) { removeMeta(m) removeDocs(m) removeSdocs(m) - removeIndex(m) - removeIndexDel(m) + removeIndexFiles(m) } // cleanupSealedFrac cleans files for sealed fractions @@ -265,17 +307,26 @@ func cleanupTemporary(m *fracManifest) { // removeAllFiles completely removes all fraction files // Used for cleaning up partially deleted or corrupted fractions func removeAllFiles(basePath string) { - // Remove main files first, then deletion markers to preserve deletion intent - util.RemoveFile(basePath + consts.IndexFileSuffix) - util.RemoveFile(basePath + consts.DocsFileSuffix) - util.RemoveFile(basePath + consts.SdocsFileSuffix) - util.RemoveFile(basePath + consts.MetaFileSuffix) - - util.RemoveFile(basePath + consts.IndexDelFileSuffix) - util.RemoveFile(basePath + consts.DocsDelFileSuffix) - util.RemoveFile(basePath + consts.SdocsDelFileSuffix) - util.RemoveFile(basePath + consts.SdocsTmpFileSuffix) - util.RemoveFile(basePath + consts.IndexTmpFileSuffix) + for _, suffix := range []string{ + consts.InfoFileSuffix, + consts.TokenFileSuffix, + consts.OffsetsFileSuffix, + consts.IDFileSuffix, + consts.LIDFileSuffix, + consts.DocsFileSuffix, + consts.SdocsFileSuffix, + consts.MetaFileSuffix, + consts.DocsDelFileSuffix, + consts.SdocsDelFileSuffix, + consts.SdocsTmpFileSuffix, + consts.InfoTmpFileSuffix, + consts.TokenTmpFileSuffix, + consts.OffsetsTmpFileSuffix, + consts.IDTmpFileSuffix, + consts.LIDTmpFileSuffix, + } { + util.RemoveFile(basePath + suffix) + } } // parseFilePath extracts components from a fraction file path From 73e3dcacee5a5b21998ddba8a496c7e241711bba Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Fri, 27 Mar 2026 15:59:06 +0300 Subject: [PATCH 02/19] refactor: change `sealing.Source` interface --- frac/active_sealing_source.go | 228 +++++++++------------ frac/fraction_concurrency_test.go | 18 +- frac/sealed/sealing/blocks_builder.go | 224 +++++++------------- frac/sealed/sealing/blocks_builder_test.go | 24 ++- frac/sealed/sealing/index.go | 2 +- frac/sealed/sealing/sealer.go | 24 ++- 6 files changed, 213 insertions(+), 307 deletions(-) diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 44aaa850..8d56bdd4 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -22,66 +22,55 @@ import ( "github.com/ozontech/seq-db/util" ) -// ActiveSealingSource transforms data from in-memory (frac.Active) storage -// into a format suitable for disk writing during index creation. -// -// The main purpose of this type is to provide access to sorted data -// through a set of iterators that allow sequential processing of -// data in sized blocks for disk writing: -// -// - TokenBlocks() - iterator for token blocks, sorted by fields and values -// - Fields() - iterator for sorted fields with maximum TIDs -// - IDsBlocks() - iterator for document ID blocks and their positions -// - TokenLIDs() - iterator for LID lists for each token -// - Docs() - iterator for documents themselves with duplicate handling -// -// All iterators work with pre-sorted data and return information -// in an order optimal for creating disk index structures. type ActiveSealingSource struct { - params common.SealParams // Sealing parameters - info *common.Info // fraction Info - created time.Time // Creation time of the source - sortedLIDs []uint32 // Sorted LIDs (Local ID) - oldToNewLIDs []uint32 // Mapping from old LIDs to new ones (after sorting) - mids *UInt64s // MIDs - rids *UInt64s // RIDs - fields []string // Sorted field names - fieldsMaxTIDs []uint32 // Maximum TIDs for each field - tids []uint32 // Sorted TIDs (Token ID) - tokens [][]byte // Tokens (values) by TID - lids []*TokenLIDs // LID lists for each token - docPosMap map[seq.ID]seq.DocPos // Original document positions - docPosSorted []seq.DocPos // Document positions after sorting - blocksOffsets []uint64 // Document block offsets - docsReader *storage.DocsReader // Document storage reader - lastErr error // Last error + params common.SealParams // Sealing parameters + + info *common.Info // fraction Info + created time.Time // Creation time of the source + + blocksOffsets []uint64 // Document block offsets + + sortedLIDs []uint32 // Sorted LIDs (Local ID) + oldToNewLIDs []uint32 // Mapping from old LIDs to new ones (after sorting) + + mids *UInt64s // MIDs + rids *UInt64s // RIDs + + fields []string // Sorted field names + fieldTid map[string][]uint32 // Each field contains sorted TIDs based on token value + tokens [][]byte // Tokens (values) by TID + lids []*TokenLIDs // LID lists for each token + + docPosMap map[seq.ID]seq.DocPos // Original document positions + docPosSorted []seq.DocPos // Document positions after sorting + docsReader *storage.DocsReader // Document storage reader + + lastErr error // Last error } -// NewActiveSealingSource creates a new data source for sealing -// based on an active in-memory index. func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSealingSource, error) { info := *active.info // copy + sortedLIDs := active.GetAllDocuments() + fields, fieldTid := sortFields(active.TokenList) - // Sort fields and get maximum TIDs for each field - sortedFields, fieldsMaxTIDs := sortFields(active.TokenList) + src := ActiveSealingSource{ + params: params, - // Sort tokens within each field - sortedTIDs := sortTokens(sortedFields, active.TokenList) + info: &info, + created: time.Now(), + + sortedLIDs: sortedLIDs, + oldToNewLIDs: makeInverser(sortedLIDs), // Create LID mapping + + mids: active.MIDs, + rids: active.RIDs, + + fields: fields, + fieldTid: fieldTid, + tokens: active.TokenList.tidToVal, + lids: active.TokenList.tidToLIDs, - src := ActiveSealingSource{ - params: params, - info: &info, - created: time.Now(), - sortedLIDs: sortedLIDs, - oldToNewLIDs: makeInverser(sortedLIDs), // Create LID mapping - mids: active.MIDs, - rids: active.RIDs, - fields: sortedFields, - tids: sortedTIDs, - fieldsMaxTIDs: fieldsMaxTIDs, - tokens: active.TokenList.tidToVal, - lids: active.TokenList.tidToLIDs, docPosMap: active.DocsPositions.idToPos, blocksOffsets: active.DocBlocks.vals, docsReader: &active.sortReader, @@ -99,49 +88,61 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe return &src, nil } -// sortFields sorts field names and calculates maximum TIDs for each field. -// Returns sorted field list and array of maximum TIDs. -func sortFields(tl *TokenList) ([]string, []uint32) { +func sortFields(tl *TokenList) ([]string, map[string][]uint32) { fields := make([]string, 0, len(tl.FieldTIDs)) - for field := range tl.FieldTIDs { + fieldTid := make(map[string][]uint32, len(tl.FieldTIDs)) + + for field, tids := range tl.FieldTIDs { fields = append(fields, field) - } - slices.Sort(fields) - pos := 0 - maxTIDs := make([]uint32, 0, len(fields)) - for _, field := range fields { - pos += len(tl.FieldTIDs[field]) - maxTIDs = append(maxTIDs, uint32(pos)) + // Make a copy because this memory is shared + // with concurrent readers (user search queries). + cp := slices.Clone(tids) + + slices.SortFunc(cp, func(i, j uint32) int { + return bytes.Compare(tl.tidToVal[i], tl.tidToVal[j]) + }) + + fieldTid[field] = cp } - return fields, maxTIDs + slices.Sort(fields) + return fields, fieldTid } -// sortTokens sorts tokens lexicographically within each field. -// Returns sorted list of TIDs. -func sortTokens(sortedFields []string, tl *TokenList) []uint32 { - pos := 0 - tids := make([]uint32, 0, len(tl.tidToVal)) - for _, field := range sortedFields { - tids = append(tids, tl.FieldTIDs[field]...) - chunk := tids[pos:] - slices.SortFunc(chunk, func(i, j uint32) int { - a := tl.tidToVal[i] - b := tl.tidToVal[j] - return bytes.Compare(a, b) // Sort by token value - }) - pos = len(tids) +func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { + return func(yield func(seq.ID, seq.DocPos) bool) { + mids := src.mids.vals + rids := src.rids.vals + + // First reserved ID (system). Position unused; LIDs use 1-based indexing. + if !yield(seq.ID{MID: seq.MID(mids[0]), RID: seq.RID(rids[0])}, 0) { + return + } + + for i, lid := range src.sortedLIDs { + id := seq.ID{MID: seq.MID(mids[lid]), RID: seq.RID(rids[lid])} + var pos seq.DocPos + if len(src.docPosSorted) == 0 { + pos = src.docPosMap[id] + } else { + pos = src.docPosSorted[i+1] // +1 for system document + } + if !yield(id, pos) { + return + } + } } - return tids } -// LastError returns the last error that occurred during processing. +func (src *ActiveSealingSource) BlockOffsets() []uint64 { + return src.blocksOffsets +} + func (src *ActiveSealingSource) LastError() error { return src.lastErr } -// prepareInfo prepares metadata for disk writing. func (src *ActiveSealingSource) prepareInfo() { src.info.MetaOnDisk = 0 src.info.SealingTime = uint64(src.created.UnixMilli()) @@ -153,77 +154,40 @@ func (src *ActiveSealingSource) prepareInfo() { src.info.BuildDistribution(mids) } -// Info returns index metadata information. func (src *ActiveSealingSource) Info() *common.Info { return src.info } -// TokenAndLIDs returns an iterator that yields one (token, lids) pair at a time, in TID order. -// Tokens are pre-sorted: first by field, then lexicographically within each field. -// The lids slice is reused between yields and must not be retained by the caller. -func (src *ActiveSealingSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { - return func(yield func([]byte, []uint32) bool) { - var lidBuf []uint32 - - for _, tid := range src.tids { - oldLIDs := src.lids[tid].GetLIDs(src.mids, src.rids) - - lidBuf = slices.Grow(lidBuf[:0], len(oldLIDs)) - for _, lid := range oldLIDs { - lidBuf = append(lidBuf, src.oldToNewLIDs[lid]) - } - - if !yield(src.tokens[tid], lidBuf) { - return - } - } - } -} - -// Field returns an iterator for sorted fields and their maximum TIDs. -func (src *ActiveSealingSource) Field() iter.Seq2[string, uint32] { - return func(yield func(string, uint32) bool) { - for i, field := range src.fields { - if !yield(field, src.fieldsMaxTIDs[i]) { +func (src *ActiveSealingSource) Iterator() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { + return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + for _, field := range src.fields { + if !yield(field, src.tokensForField(field)) { return } } } } -// ID returns an iterator for document IDs and their positions, one pair at a time. -func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { - return func(yield func(seq.ID, seq.DocPos) bool) { - mids := src.mids.vals - rids := src.rids.vals +func (src *ActiveSealingSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] { + var lidsbuf []uint32 + return func(yield func([]byte, []uint32) bool) { + for _, tid := range src.fieldTid[field] { + token := src.tokens[tid] - // First reserved ID (system). Position unused; LIDs use 1-based indexing. - if !yield(seq.ID{MID: seq.MID(mids[0]), RID: seq.RID(rids[0])}, 0) { - return - } + lids := src.lids[tid].GetLIDs(src.mids, src.rids) + lidsbuf = slices.Grow(lidsbuf[:0], len(lids)) - for i, lid := range src.sortedLIDs { - id := seq.ID{MID: seq.MID(mids[lid]), RID: seq.RID(rids[lid])} - var pos seq.DocPos - if len(src.docPosSorted) == 0 { - pos = src.docPosMap[id] - } else { - pos = src.docPosSorted[i+1] // +1 for system document + for _, lid := range lids { + lidsbuf = append(lidsbuf, src.oldToNewLIDs[lid]) } - if !yield(id, pos) { + + if !yield(token, lidsbuf) { return } } } } -// BlockOffsets returns document block offsets. -func (src *ActiveSealingSource) BlockOffsets() []uint64 { - return src.blocksOffsets -} - -// makeInverser creates an array for converting old LIDs to new ones. -// sortedLIDs[i] = oldLID -> inverser[oldLID] = i+1 func makeInverser(sortedLIDs []uint32) []uint32 { inverser := make([]uint32, len(sortedLIDs)+1) for i, lid := range sortedLIDs { diff --git a/frac/fraction_concurrency_test.go b/frac/fraction_concurrency_test.go index 95e96637..138586fd 100644 --- a/frac/fraction_concurrency_test.go +++ b/frac/fraction_concurrency_test.go @@ -24,7 +24,7 @@ import ( "github.com/ozontech/seq-db/parser" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" - test_common "github.com/ozontech/seq-db/tests/common" + testcommon "github.com/ozontech/seq-db/tests/common" "github.com/ozontech/seq-db/tokenizer" ) @@ -38,9 +38,9 @@ func TestConcurrentAppendAndQuery(t *testing.T) { docs, bulks, fromTime, toTime := generatesMessages(numWriters*numMessagesPerWriter, bulkSize) - tmpDir := test_common.CreateTempDir() + tmpDir := testcommon.CreateTempDir() fracPath := filepath.Join(tmpDir, "test_fraction") - defer test_common.RemoveDir(fracPath) + defer testcommon.RemoveDir(fracPath) activeIndexer, stop := NewActiveIndexer(numIndexWorkers, 1000) defer stop() @@ -354,12 +354,12 @@ func seal(active *Active) (*Sealed, error) { return nil, err } indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), InfoRegistry: cache.NewCache[[]byte](nil, nil), TokenRegistry: cache.NewCache[[]byte](nil, nil), OffsetsRegistry: cache.NewCache[[]byte](nil, nil), diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index 9090db18..4183ae46 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -2,7 +2,6 @@ package sealing import ( "encoding/binary" - "errors" "iter" "unsafe" @@ -55,40 +54,11 @@ func (bb *blocksBuilder) LastError() error { return bb.lastErr } -// BuildTokenBlocks converts scalar (token, lids) pairs into token blocks with field tables. -// onLIDs is called for each token's LIDs immediately during iteration — the caller must not -// retain the slice after onLIDs returns. Errors from onLIDs are stored in bb.lastErr. -// -// Visualization of relationships between fields, tokens, and table entries: -// -// Field Ranges: <-------f1----------><------f2-------><------------f3------------><----------f4----------> -// Token Blocks: [.t1.t2.t3.t4.][.t5.t6.t7.t8.][.t9....etc...][.............][.............][.............] -// Field Entries: {-----f1------}{-f1-}{---f2--}{--f2--}{-f3--}{------f3-----}{-f3-}{----f4-}{-----f4------} -// -// Parameters: -// - tokens: Scalar sequence of (token bytes, per-token LID list) pairs -// - fields: Iterator of [fieldName, maxTID] pairs for all fields in ascending TID order -// - blockSize: Maximum payload size in bytes per token block -// - onLIDs: Called for each token's LIDs before the source advances to the next token func (bb *blocksBuilder) BuildTokenBlocks( - tokens iter.Seq2[[]byte, []uint32], - fields iter.Seq2[string, uint32], - accumulate func([]uint32) error, - blockSize int, + it iter.Seq2[string, iter.Seq2[[]byte, []uint32]], + accumulate func([]uint32) error, blockCapacity int, ) iter.Seq2[tokensSealBlock, []token.FieldTable] { return func(yield func(tokensSealBlock, []token.FieldTable) bool) { - nextField, stop := iter.Pull2(fields) - defer stop() - - var ( - hasMore bool - currentTID uint32 = 1 - fieldMaxTID uint32 = 0 - fieldName string - ) - - // Just wrap `accumulate` function to be able - // to track returned errors. accumulate := func(lids []uint32) error { if err := accumulate(lids); err != nil { bb.lastErr = err @@ -97,56 +67,95 @@ func (bb *blocksBuilder) BuildTokenBlocks( return nil } - for blockIdx, block := range seqBlockToken(tokens, blockSize, accumulate) { - if bb.lastErr != nil { + var ( + block tokensSealBlock + blockIdx uint32 + blockSize int + ) + + var ( + currentTID uint32 + pendingTable []token.FieldTable + fieldName string + fieldEntryStartTID uint32 + ) + + emitFieldEntry := func() { + if fieldName == "" || fieldEntryStartTID > currentTID { return } - // A block may span multiple fields, and a field may span multiple blocks. - // We emit one TableEntry per (field, block) intersection so that lookups - // can find the exact position of any token given its field and TID. - var table []token.FieldTable - for currentTID <= block.ext.maxTID { - if fieldMaxTID < currentTID { - if fieldName, fieldMaxTID, hasMore = nextField(); !hasMore { - bb.lastErr = errors.New("not enough fields to cover all TIDs") + entry := newTokenTableEntry(fieldEntryStartTID, currentTID, blockIdx, block) + pendingTable = append(pendingTable, token.FieldTable{ + Field: fieldName, + Entries: []*token.TableEntry{entry}, + }) + } + + flushBlock := func() bool { + emitFieldEntry() + block.ext.maxTID = currentTID + + if !yield(block, pendingTable) { + return false + } + + block.payload.Payload = block.payload.Payload[:0] + block.payload.Offsets = block.payload.Offsets[:0] + block.ext.minTID = currentTID + 1 + + blockIdx++ + blockSize = 0 + + pendingTable = pendingTable[:0] + fieldEntryStartTID = currentTID + 1 + + return true + } + + block.ext.minTID = 1 + for field, tokIt := range it { + emitFieldEntry() + + fieldName = field + fieldEntryStartTID = currentTID + 1 + + for tok, lids := range tokIt { + tokenSize := int(unsafe.Sizeof(uint32(0))) + len(tok) + + if blockSize > 0 && blockSize+tokenSize > blockCapacity { + if !flushBlock() { return } } - entry := newTokenTableEntry(currentTID, fieldMaxTID, blockIdx, block) - currentTID += entry.ValCount + block.payload.Offsets = append(block.payload.Offsets, uint32(len(block.payload.Payload))) + block.payload.Payload = binary.LittleEndian.AppendUint32(block.payload.Payload, uint32(len(tok))) + block.payload.Payload = append(block.payload.Payload, tok...) - table = append(table, token.FieldTable{ - Field: fieldName, - Entries: []*token.TableEntry{entry}}, - ) - } + if err := accumulate(lids); err != nil { + bb.lastErr = err + return + } - if !yield(block, table) { - return + currentTID++ + blockSize += tokenSize } } - if bb.lastErr != nil { - return - } - - if currentTID-1 != fieldMaxTID { - bb.lastErr = errors.New("fields and tokens not consistent") - } else if _, _, hasMore = nextField(); hasMore { - bb.lastErr = errors.New("excess field after processing all blocks") + if blockSize > 0 { + flushBlock() } } } func newTokenTableEntry( - entryStartTID, fieldMaxTID, + entryStartTID, entryEndTID uint32, blockIndex uint32, block tokensSealBlock, ) *token.TableEntry { // Convert global TIDs to block-local indices firstIndex := entryStartTID - block.ext.minTID - lastIndex := min(fieldMaxTID, block.ext.maxTID) - block.ext.minTID + lastIndex := entryEndTID - block.ext.minTID // Extract min and max token values for the entry range minVal := string(block.payload.GetToken(int(firstIndex))) @@ -193,99 +202,6 @@ func seqBlockID( } } -// seqBlockToken accumulates scalar (token, lids) pairs into sealed token blocks. -// A new block is started whenever the accumulated payload would exceed blockSize bytes. -// onLIDs is called for each token's LIDs immediately during iteration — the caller must not -// retain the slice after onLIDs returns. If onLIDs returns a non-nil error, iteration stops. -// -// Parameters: -// - tokens: Scalar sequence of (token bytes, per-token LID list) pairs -// - blockSize: Maximum payload size in bytes before starting a new block -// - onLIDs: Called for each token's LIDs before the source advances to the next token -// -// Returns: -// - iter.Seq2[uint32, tokensSealBlock]: Sequence of (block index, sealed token block) pairs -func seqBlockToken( - tokens iter.Seq2[[]byte, []uint32], - blockSize int, accumulate func([]uint32) error, -) iter.Seq2[uint32, tokensSealBlock] { - return func(yield func(uint32, tokensSealBlock) bool) { - var ( - idx uint32 // 0-based block index - currentTID uint32 // monotonically increasing TID - block tokensSealBlock // block under construction - actualSize int // accumulated payload bytes - ) - - block.ext.minTID = 1 - flush := func() bool { - block.ext.maxTID = currentTID - - if !yield(idx, block) { - return false - } - - idx++ - - // We yielded complete token block several lines earlier. - // And now we prepare token block for the next batch. - block.payload.Payload = block.payload.Payload[:0] - block.payload.Offsets = block.payload.Offsets[:0] - - // Here we increment currentTID by one because - // it points to TID at the end of the *currently* yielded block. - block.ext.minTID = currentTID + 1 - - actualSize = 0 - return true - } - - for token, lids := range tokens { - // We encode token as [size](4B)[token](?B). - tokenSize := int(unsafe.Sizeof(uint32(0))) + len(token) - - needsFlushing := actualSize > 0 && - actualSize+tokenSize > blockSize - - if needsFlushing { - if !flush() { - return - } - } - - block.payload.Offsets = append( - block.payload.Offsets, - uint32(len(block.payload.Payload)), - ) - - block.payload.Payload = binary.LittleEndian.AppendUint32( - block.payload.Payload, - uint32(len(token)), - ) - - block.payload.Payload = append( - block.payload.Payload, - token..., - ) - - if err := accumulate(lids); err != nil { - return - } - - currentTID += 1 - actualSize += tokenSize - } - - if actualSize > 0 { - flush() - } - } -} - -// lidBlocksAcc incrementally builds LID blocks from per-token LID lists. -// Call Add for each token's LIDs in TID order, passing a callback that is invoked -// for each completed block before its backing arrays are reused. -// Call Flush once after all Add calls to handle the final (possibly partial) block. type lidBlocksAcc struct { blockCap int currentTID uint32 diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index d5637dc8..95ae545d 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -27,12 +27,26 @@ type mockSource struct { func (m *mockSource) Info() common.Info { return m.info } -func (m *mockSource) Field() iter.Seq2[string, uint32] { - return func(yield func(string, uint32) bool) { - for i := range len(m.fields) { - if !yield(m.fields[i], m.fieldMaxTIDs[i]) { +func (m *mockSource) Iterator() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { + return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { + start := 0 + for i, field := range m.fields { + end := int(m.fieldMaxTIDs[i]) + tokenStart, tokenEnd := start, end + if !yield(field, func(yield func([]byte, []uint32) bool) { + for j := tokenStart; j < tokenEnd; j++ { + var lids []uint32 + if j < len(m.tokenLIDs) { + lids = m.tokenLIDs[j] + } + if !yield(m.tokens[j], lids) { + return + } + } + }) { return } + start = end } } } @@ -114,7 +128,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { lidAccum := newLIDBlocksAccumulator(lidBlockCap) var lidBlocks []lidsSealBlock tokenBlocks := bb.BuildTokenBlocks( - src.TokenAndLIDs(), src.Field(), + src.Iterator(), func(lids []uint32) error { return lidAccum.Add(lids, func(block lidsSealBlock) error { block.payload.LIDs = slices.Clone(block.payload.LIDs) diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 57a3b3ad..27eb2823 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -219,7 +219,7 @@ func (s *IndexSealer) WriteTokenAndLIDFiles(tokenWS, lidWS io.WriteSeeker, src S } blocks := bb.BuildTokenBlocks( - src.TokenAndLIDs(), src.Field(), + src.Iterator(), accumulate, consts.RegularBlockSize, ) diff --git a/frac/sealed/sealing/sealer.go b/frac/sealed/sealing/sealer.go index 233f0aa2..484270b1 100644 --- a/frac/sealed/sealing/sealer.go +++ b/frac/sealed/sealing/sealer.go @@ -16,12 +16,24 @@ import ( // Source interface defines the contract for data sources that can be sealed. // Provides access to all necessary data components for index creation. type Source interface { - Info() *common.Info // Fraction metadata information - ID() iter.Seq2[seq.ID, seq.DocPos] // Ordered sequence of document IDs and their positions - TokenAndLIDs() iter.Seq2[[]byte, []uint32] // Ordered sequence of tokens paired with their LID list - Field() iter.Seq2[string, uint32] // Ordered sequence of fields with their max TID value - BlockOffsets() []uint64 // Offsets of DocBlocks in the doc file - LastError() error // Last error encountered during data retrieval + // Info returns information about [sealing.Source]. + // For example, in one case it returns information about [frac.Active]. + Info() *common.Info + + // ID returns a view into [sealing.Source] stored ids. + // Identificators are returned in sorted order starting with the biggest seq.ID. + ID() iter.Seq2[seq.ID, seq.DocPos] + + // BlockOffsets returns all offsets to [storage.DocBlock] + // stored nside `.docs` file that is owned by [sealing.Source]. + BlockOffsets() []uint64 + + Iterator() iter.Seq2[ + string, // Field name + iter.Seq2[[]byte, []uint32], // Token value and lids for this token + ] + + LastError() error // Last error encountered during data retrieval } // createAndWrite creates a tmp file, calls write, syncs, closes, then renames to finalPath. From 9df407e0c4ce704bef3a0ebde283a4caced8a704 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 12:13:07 +0300 Subject: [PATCH 03/19] refactor: comments, split files --- consts/consts.go | 18 +- frac/active_sealing_source.go | 27 +- frac/sealed/sealing/blocks_builder.go | 9 +- frac/sealed/sealing/index.go | 357 ++++++++++++-------------- frac/sealed/sealing/sealer.go | 56 ++-- frac/sealed/sealing/writer.go | 74 ++++++ seq/seq.go | 14 +- 7 files changed, 314 insertions(+), 241 deletions(-) create mode 100644 frac/sealed/sealing/writer.go diff --git a/consts/consts.go b/consts/consts.go index 7a8eb9a4..40abbdab 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -63,21 +63,21 @@ const ( InfoTmpFileSuffix = "._info" InfoDelFileSuffix = ".info.del" - TokenFileSuffix = ".token" - TokenTmpFileSuffix = "._token" - TokenDelFileSuffix = ".token.del" + TokenFileSuffix = ".tokens" + TokenTmpFileSuffix = "._tokens" + TokenDelFileSuffix = ".tokens.del" OffsetsFileSuffix = ".offsets" OffsetsTmpFileSuffix = "._offsets" OffsetsDelFileSuffix = ".offsets.del" - IDFileSuffix = ".id" - IDTmpFileSuffix = "._id" - IDDelFileSuffix = ".id.del" + IDFileSuffix = ".ids" + IDTmpFileSuffix = "._ids" + IDDelFileSuffix = ".ids.del" - LIDFileSuffix = ".lid" - LIDTmpFileSuffix = "._lid" - LIDDelFileSuffix = ".lid.del" + LIDFileSuffix = ".lids" + LIDTmpFileSuffix = "._lids" + LIDDelFileSuffix = ".lids.del" RemoteFractionSuffix = ".remote" diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 8d56bdd4..b90c0297 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -115,20 +115,29 @@ func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { mids := src.mids.vals rids := src.rids.vals - // First reserved ID (system). Position unused; LIDs use 1-based indexing. - if !yield(seq.ID{MID: seq.MID(mids[0]), RID: seq.RID(rids[0])}, 0) { + // System ID and DocPos are not stored in `src.sortedLIDs`. + // However we do have to yield them to preserve 1-baseed indexing for ids. + if !yield(seq.SystemID, seq.SystemDocPos) { return } for i, lid := range src.sortedLIDs { - id := seq.ID{MID: seq.MID(mids[lid]), RID: seq.RID(rids[lid])} - var pos seq.DocPos + id := seq.ID{ + MID: seq.MID(mids[lid]), + RID: seq.RID(rids[lid]), + } + + // Documents were not sorted previously. if len(src.docPosSorted) == 0 { - pos = src.docPosMap[id] - } else { - pos = src.docPosSorted[i+1] // +1 for system document + if !yield(id, src.docPosMap[id]) { + return + } + continue } - if !yield(id, pos) { + + // `i` in range [0; len(src.sortedLIDs)) + // but lids indexes are 1-based. + if !yield(id, src.docPosSorted[i+1]) { return } } @@ -158,7 +167,7 @@ func (src *ActiveSealingSource) Info() *common.Info { return src.info } -func (src *ActiveSealingSource) Iterator() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { +func (src *ActiveSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { for _, field := range src.fields { if !yield(field, src.tokensForField(field)) { diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index 4183ae46..ea506402 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -81,6 +81,7 @@ func (bb *blocksBuilder) BuildTokenBlocks( ) emitFieldEntry := func() { + // Handle case when field does not have tokens. if fieldName == "" || fieldEntryStartTID > currentTID { return } @@ -203,20 +204,24 @@ func seqBlockID( } type lidBlocksAcc struct { - blockCap int + blockCap int + currentTID uint32 currentBlock lidsSealBlock + isEndOfToken bool isContinued bool } func newLIDBlocksAccumulator(blockCap int) *lidBlocksAcc { a := &lidBlocksAcc{blockCap: blockCap} + a.currentBlock.ext.minTID = 1 a.currentBlock.payload = lids.Block{ LIDs: make([]uint32, 0, blockCap), Offsets: []uint32{0}, } + return a } @@ -268,7 +273,7 @@ func (a *lidBlocksAcc) finalizeBlock() lidsSealBlock { result := a.currentBlock result.payload.IsLastLID = a.isEndOfToken result.ext.isContinued = a.isContinued - a.isContinued = !a.isEndOfToken + a.isContinued = !a.isEndOfToken return result } diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 27eb2823..2ac8d885 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -21,35 +21,6 @@ import ( "github.com/ozontech/seq-db/zstd" ) -// IndexSealer writes sealed fraction index data across multiple files. -// Each Write*File method writes one section to an independent file using the -// standard [prefix][blocks][registry] format so each file has its own IndexReader. -// -// Call order matters for PreloadedData: -// -// WriteTokenAndLIDFiles → WriteOffsetsFile → WriteIDFile -// -// (TokenTable is populated by WriteTokenAndLIDFiles; IDsTable by WriteOffsetsFile+WriteIDFile; -// LIDsTable by WriteTokenAndLIDFiles.) -type IndexSealer struct { - lastErr error - buf1 []byte - buf2 []byte - params common.SealParams - - idsTable seqids.Table - lidsTable lids.Table - tokenTable token.Table -} - -func NewIndexSealer(params common.SealParams) *IndexSealer { - return &IndexSealer{ - params: params, - buf1: make([]byte, 0, consts.RegularBlockSize), - buf2: make([]byte, 0, consts.RegularBlockSize), - } -} - // indexBlock is one compressed (or not) block with its registry metadata. type indexBlock struct { codec storage.Codec @@ -63,137 +34,75 @@ func (i indexBlock) Bin(pos int64) (storage.IndexBlockHeader, []byte) { return storage.NewIndexBlockHeader(pos, i.ext1, i.ext2, uint32(len(i.payload)), i.rawLen, i.codec), i.payload } -const filePrefixSize = 16 - -// write writes blocks to ws using [16-byte prefix][blocks][registry]. -// The prefix is written last (via seek-back) and stores registry position + size. -func (s *IndexSealer) write(ws io.WriteSeeker, blocks iter.Seq[indexBlock]) error { - if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { - return err - } +type IndexSealer struct { + params common.SealParams - hw := bytes.NewBuffer(nil) - bw := bytespool.AcquireWriterSize(ws, int(units.MiB)) - defer bytespool.ReleaseWriter(bw) + buf1 []byte + buf2 []byte - pos := filePrefixSize - for block := range blocks { - if s.lastErr != nil { - return s.lastErr - } - header, payload := block.Bin(int64(pos)) - if _, err := bw.Write(payload); err != nil { - return err - } - if _, err := hw.Write(header); err != nil { - return err - } - pos += len(payload) - } - if s.lastErr != nil { - return s.lastErr - } - if err := bw.Flush(); err != nil { - return err - } - - size := hw.Len() - regPos, err := ws.Seek(0, io.SeekEnd) - if err != nil { - return err - } - if _, err := bw.Write(hw.Bytes()); err != nil { - return err - } - if err := bw.Flush(); err != nil { - return err - } + idsTable seqids.Table + lidsTable lids.Table + tokenTable token.Table - prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(size)) - if _, err := ws.Seek(0, io.SeekStart); err != nil { - return err - } - _, err = ws.Write(prefix) - return err + lastErr error } -// fileStreamWriter writes blocks incrementally to a single file using the -// [prefix][blocks][registry] format, allowing interleaved writes to multiple files. -type fileStreamWriter struct { - ws io.WriteSeeker - bw *bytespool.Writer - hw bytes.Buffer - pos int +func NewIndexSealer(params common.SealParams) *IndexSealer { + return &IndexSealer{ + params: params, + buf1: make([]byte, 0, consts.RegularBlockSize), + buf2: make([]byte, 0, consts.RegularBlockSize), + } } -func newFileStreamWriter(ws io.WriteSeeker) (*fileStreamWriter, error) { - if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { - return nil, err - } - return &fileStreamWriter{ - ws: ws, - bw: bytespool.AcquireWriterSize(ws, int(units.MiB)), - pos: filePrefixSize, - }, nil +func (s *IndexSealer) LIDsTable() lids.Table { + return s.lidsTable } -func (fw *fileStreamWriter) writeBlock(block indexBlock) error { - header, payload := block.Bin(int64(fw.pos)) - if _, err := fw.bw.Write(payload); err != nil { - return err - } - fw.hw.Write(header) // bytes.Buffer.Write never fails - fw.pos += len(payload) - return nil +func (s *IndexSealer) TokenTable() token.Table { + return s.tokenTable } -func (fw *fileStreamWriter) finalize() (err error) { - defer fw.release() - if err = fw.bw.Flush(); err != nil { - return - } - var regPos int64 - if regPos, err = fw.ws.Seek(0, io.SeekEnd); err != nil { - return - } - if _, err = fw.bw.Write(fw.hw.Bytes()); err != nil { - return - } - if err = fw.bw.Flush(); err != nil { - return - } - prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(fw.hw.Len())) - if _, err = fw.ws.Seek(0, io.SeekStart); err != nil { - return - } - _, err = fw.ws.Write(prefix) - return +func (s *IndexSealer) IDsTable() seqids.Table { + return s.idsTable } -func (fw *fileStreamWriter) release() { - if fw.bw != nil { - bytespool.ReleaseWriter(fw.bw) - fw.bw = nil - } +// WriteOffsetsFile writes the .offsets file containing a single BlockOffsets block. +func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { + return s.write(ws, func(yield func(indexBlock) bool) { + offsets := sealed.BlockOffsets{ + IDsTotal: src.Info().DocsTotal + 1, + Offsets: src.BlockOffsets(), + } + yield(s.packBlocksOffsetsBlock(offsets)) + }) } -// WriteInfoFile writes the .info file containing a single BlockInfo block. -func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { +func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { return s.write(ws, func(yield func(indexBlock) bool) { - yield(s.packInfoBlock(sealed.BlockInfo{Info: src.Info()})) + for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { + if !yield(s.packMIDsBlock(block)) { + return + } + + if !yield(s.packRIDsBlock(block)) { + return + } + + if !yield(s.packPosBlock(block)) { + return + } + } + + if s.lastErr = src.LastError(); s.lastErr != nil { + return + } + + yield(indexBlock{}) // trailing separator }) } -// WriteTokenAndLIDFiles writes the .token and .lid files in a single pass over the source data. -// -// .token file: [token blocks...] [separator] [token-table block] [separator] -// .lid file: [LID blocks...] [separator] -// -// LID blocks are written interleaved with token block processing so that both files -// are produced from one sequential scan of the (token, LID) data. -func (s *IndexSealer) WriteTokenAndLIDFiles(tokenWS, lidWS io.WriteSeeker, src Source) error { +func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Source) error { tokenFW, err := newFileStreamWriter(tokenWS) if err != nil { return err @@ -219,7 +128,7 @@ func (s *IndexSealer) WriteTokenAndLIDFiles(tokenWS, lidWS io.WriteSeeker, src S } blocks := bb.BuildTokenBlocks( - src.Iterator(), + src.TokenTriplet(), accumulate, consts.RegularBlockSize, ) @@ -261,38 +170,9 @@ func (s *IndexSealer) WriteTokenAndLIDFiles(tokenWS, lidWS io.WriteSeeker, src S return tokenFW.finalize() } -// WriteOffsetsFile writes the .offsets file containing a single BlockOffsets block. -func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { - return s.write(ws, func(yield func(indexBlock) bool) { - offsets := sealed.BlockOffsets{ - IDsTotal: src.Info().DocsTotal + 1, - Offsets: src.BlockOffsets(), - } - yield(s.packBlocksOffsetsBlock(offsets)) - }) -} - -func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { +func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { return s.write(ws, func(yield func(indexBlock) bool) { - for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { - if !yield(s.packMIDsBlock(block)) { - return - } - - if !yield(s.packRIDsBlock(block)) { - return - } - - if !yield(s.packPosBlock(block)) { - return - } - } - - if s.lastErr = src.LastError(); s.lastErr != nil { - return - } - - yield(indexBlock{}) // trailing separator + yield(s.packInfoBlock(sealed.BlockInfo{Info: src.Info()})) }) } @@ -318,6 +198,67 @@ func collapseOrderedFieldsTables(src []token.FieldTable) []token.FieldTable { return append(dst, current) } +// write writes blocks to ws using [16-byte prefix][blocks][registry]. +// The prefix is written last (via seek-back) and stores registry position + size. +func (s *IndexSealer) write(ws io.WriteSeeker, blocks iter.Seq[indexBlock]) error { + if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { + return err + } + + hw := bytes.NewBuffer(nil) + bw := bytespool.AcquireWriterSize(ws, int(units.MiB)) + defer bytespool.ReleaseWriter(bw) + + pos := filePrefixSize + for block := range blocks { + if s.lastErr != nil { + return s.lastErr + } + + header, payload := block.Bin(int64(pos)) + if _, err := bw.Write(payload); err != nil { + return err + } + + if _, err := hw.Write(header); err != nil { + return err + } + + pos += len(payload) + } + + if s.lastErr != nil { + return s.lastErr + } + + if err := bw.Flush(); err != nil { + return err + } + + size := hw.Len() + regPos, err := ws.Seek(0, io.SeekEnd) + if err != nil { + return err + } + + if _, err := bw.Write(hw.Bytes()); err != nil { + return err + } + + if err := bw.Flush(); err != nil { + return err + } + + prefix := binary.LittleEndian.AppendUint64(nil, uint64(regPos)) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(size)) + if _, err := ws.Seek(0, io.SeekStart); err != nil { + return err + } + + _, err = ws.Write(prefix) + return err +} + func newIndexBlock(raw []byte) indexBlock { return indexBlock{codec: storage.CodecNo, rawLen: uint32(len(raw)), payload: raw} } @@ -330,68 +271,98 @@ func (s *IndexSealer) newIndexBlockZSTD(raw []byte, level int) indexBlock { return newIndexBlock(raw) } +// packInfoBlock packs fraction information into an index block. func (s *IndexSealer) packInfoBlock(block sealed.BlockInfo) indexBlock { s.buf1 = block.Pack(s.buf1[:0]) - return newIndexBlock(s.buf1) + return newIndexBlock(s.buf1) // Info block is typically small, no compression } +// packTokenBlock packs token data into a compressed index block. func (s *IndexSealer) packTokenBlock(block tokensSealBlock) indexBlock { - s.buf1 = block.payload.Pack(s.buf1[:0]) + s.buf1 = block.payload.Pack(s.buf1[:0]) // Pack token data b := s.newIndexBlockZSTD(s.buf1, s.params.TokenListZstdLevel) + // Store TID range in extended metadata b.ext1 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) return b } -func (s *IndexSealer) packTokenTableBlock(tb token.TableBlock) indexBlock { - s.tokenTable = token.TableFromBlocks([]token.TableBlock{tb}) - s.buf1 = tb.Pack(s.buf1[:0]) +// packTokenTableBlock packs the token table into a compressed index block. +func (s *IndexSealer) packTokenTableBlock(tokenTableBlock token.TableBlock) indexBlock { + s.tokenTable = token.TableFromBlocks([]token.TableBlock{tokenTableBlock}) // Store for PreloadedData + + // Packing block + s.buf1 = tokenTableBlock.Pack(s.buf1[:0]) return s.newIndexBlockZSTD(s.buf1, s.params.TokenTableZstdLevel) } +// packBlocksOffsetsBlock packs document block offsets into a compressed index block. func (s *IndexSealer) packBlocksOffsetsBlock(block sealed.BlockOffsets) indexBlock { - s.idsTable.IDsTotal = block.IDsTotal - s.idsTable.IDBlocksTotal = uint32(len(block.Offsets)) + // Update IDs table for PreloadedData + s.idsTable.IDsTotal = block.IDsTotal // Total number of IDs + s.idsTable.IDBlocksTotal = uint32(len(block.Offsets)) // Number of ID blocks + + // Packing block s.buf1 = block.Pack(s.buf1[:0]) - return s.newIndexBlockZSTD(s.buf1, s.params.DocsPositionsZstdLevel) + b := s.newIndexBlockZSTD(s.buf1, s.params.DocsPositionsZstdLevel) + return b } +// packMIDsBlock packs MIDs into a compressed index block. func (s *IndexSealer) packMIDsBlock(block idsSealBlock) indexBlock { + // Get the last ID in the block (smallest due to descending order) last := len(block.mids.Values) - 1 - minID := seq.ID{MID: seq.MID(block.mids.Values[last]), RID: seq.RID(block.rids.Values[last])} - s.idsTable.MinBlockIDs = append(s.idsTable.MinBlockIDs, minID) + + minID := seq.ID{ + MID: seq.MID(block.mids.Values[last]), + RID: seq.RID(block.rids.Values[last]), + } + + s.idsTable.MinBlockIDs = append(s.idsTable.MinBlockIDs, minID) // Store for PreloadedData + + // Packing block s.buf1 = block.mids.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) + + // Store min MID and RID in extended metadata b.ext1 = uint64(minID.MID) b.ext2 = uint64(minID.RID) + return b } +// packRIDsBlock packs RIDs into a compressed index block. func (s *IndexSealer) packRIDsBlock(block idsSealBlock) indexBlock { s.buf1 = block.rids.Pack(s.buf1[:0]) - return s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) + b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) + return b } +// packPosBlock packs document positions into a compressed index block. func (s *IndexSealer) packPosBlock(block idsSealBlock) indexBlock { s.buf1 = block.params.Pack(s.buf1[:0]) - return s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) + b := s.newIndexBlockZSTD(s.buf1, s.params.IDsZstdLevel) + return b } +// packLIDsBlock packs Local IDs (LIDs) into a compressed index block. +// Also updates LIDs table for preloaded data access. func (s *IndexSealer) packLIDsBlock(block lidsSealBlock) indexBlock { var ext1 uint64 - if block.ext.isContinued { + if block.ext.isContinued { // todo: Legacy continuation flag ext1 = 1 - block.ext.minTID++ + block.ext.minTID++ // Adjust for legacy format } + + // Update LIDs table for PreloadedData s.lidsTable.MinTIDs = append(s.lidsTable.MinTIDs, block.ext.minTID) s.lidsTable.MaxTIDs = append(s.lidsTable.MaxTIDs, block.ext.maxTID) s.lidsTable.IsContinued = append(s.lidsTable.IsContinued, block.ext.isContinued) + + // Packing block s.buf1 = block.payload.Pack(s.buf1[:0]) b := s.newIndexBlockZSTD(s.buf1, s.params.LIDsZstdLevel) - b.ext1 = ext1 - b.ext2 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) + b.ext1 = ext1 // Legacy continuation flag + b.ext2 = uint64(block.ext.maxTID)<<32 | uint64(block.ext.minTID) // TID range + return b } - -func (s *IndexSealer) LIDsTable() lids.Table { return s.lidsTable } -func (s *IndexSealer) TokenTable() token.Table { return s.tokenTable } -func (s *IndexSealer) IDsTable() seqids.Table { return s.idsTable } diff --git a/frac/sealed/sealing/sealer.go b/frac/sealed/sealing/sealer.go index 484270b1..ab97091f 100644 --- a/frac/sealed/sealing/sealer.go +++ b/frac/sealed/sealing/sealer.go @@ -14,45 +14,52 @@ import ( ) // Source interface defines the contract for data sources that can be sealed. -// Provides access to all necessary data components for index creation. +// Provides access to all necessary data components for index creation type Source interface { - // Info returns information about [sealing.Source]. - // For example, in one case it returns information about [frac.Active]. + // Info returns metadata describing this source. Info() *common.Info - // ID returns a view into [sealing.Source] stored ids. - // Identificators are returned in sorted order starting with the biggest seq.ID. + // ID returns an iterator over stored document identifiers paired with + // their positions, in descending [seq.ID] order. ID() iter.Seq2[seq.ID, seq.DocPos] - // BlockOffsets returns all offsets to [storage.DocBlock] - // stored nside `.docs` file that is owned by [sealing.Source]. + // BlockOffsets returns byte offsets to each document block + // within this source's `.docs` file. BlockOffsets() []uint64 - Iterator() iter.Seq2[ - string, // Field name - iter.Seq2[[]byte, []uint32], // Token value and lids for this token - ] + // TokenTriplet iterates over fields in lexicographic order. + // For each field, it yields tokens (lexicographically sorted) + // paired with the local document ID list for that token. + TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] - LastError() error // Last error encountered during data retrieval + // LastError returns the last error encountered during iteration, + // or nil if no error occurred. + LastError() error } -// createAndWrite creates a tmp file, calls write, syncs, closes, then renames to finalPath. -func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { +func createAndWrite( + tmpPath, finalPath string, + write func(*os.File) error, +) error { f, err := os.Create(tmpPath) if err != nil { return err } + if err := write(f); err != nil { f.Close() return err } + if err := f.Sync(); err != nil { f.Close() return err } + if err := f.Close(); err != nil { return err } + return os.Rename(tmpPath, finalPath) } @@ -106,14 +113,6 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { sealer := NewIndexSealer(params) - if err := createAndWrite( - info.Path+consts.InfoTmpFileSuffix, - info.Path+consts.InfoFileSuffix, - func(f *os.File) error { return sealer.WriteInfoFile(f, src) }, - ); err != nil { - return nil, err - } - if err := createAndWrite( info.Path+consts.OffsetsTmpFileSuffix, info.Path+consts.OffsetsFileSuffix, @@ -133,7 +132,15 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { if err := createAndWriteBoth( info.Path+consts.TokenTmpFileSuffix, info.Path+consts.TokenFileSuffix, info.Path+consts.LIDTmpFileSuffix, info.Path+consts.LIDFileSuffix, - func(tokenF, lidF *os.File) error { return sealer.WriteTokenAndLIDFiles(tokenF, lidF, src) }, + func(tokenF, lidF *os.File) error { return sealer.WriteTokenTriplet(tokenF, lidF, src) }, + ); err != nil { + return nil, err + } + + if err := createAndWrite( + info.Path+consts.InfoTmpFileSuffix, + info.Path+consts.InfoFileSuffix, + func(f *os.File) error { return sealer.WriteInfoFile(f, src) }, ); err != nil { return nil, err } @@ -155,9 +162,10 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { } totalSize += uint64(st.Size()) } - info.IndexOnDisk = totalSize + info.IndexOnDisk = totalSize lidsTable := sealer.LIDsTable() + preloaded := &sealed.PreloadedData{ Info: info, TokenTable: sealer.TokenTable(), diff --git a/frac/sealed/sealing/writer.go b/frac/sealed/sealing/writer.go new file mode 100644 index 00000000..9c003fb9 --- /dev/null +++ b/frac/sealed/sealing/writer.go @@ -0,0 +1,74 @@ +package sealing + +import ( + "bytes" + "encoding/binary" + "io" + + "github.com/alecthomas/units" + "github.com/ozontech/seq-db/bytespool" +) + +const filePrefixSize = 16 + +// fileStreamWriter writes blocks incrementally to a single file using the +// [prefix][blocks][registry] format, allowing interleaved writes to multiple files. +type fileStreamWriter struct { + ws io.WriteSeeker + bw *bytespool.Writer + hw bytes.Buffer + pos int +} + +func newFileStreamWriter(ws io.WriteSeeker) (*fileStreamWriter, error) { + if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { + return nil, err + } + + return &fileStreamWriter{ + ws: ws, + bw: bytespool.AcquireWriterSize(ws, int(units.MiB)), + pos: filePrefixSize, + }, nil +} + +func (fw *fileStreamWriter) writeBlock(block indexBlock) error { + header, payload := block.Bin(int64(fw.pos)) + if _, err := fw.bw.Write(payload); err != nil { + return err + } + fw.hw.Write(header) // bytes.Buffer.Write never fails + fw.pos += len(payload) + return nil +} + +func (fw *fileStreamWriter) finalize() (err error) { + defer fw.release() + if err = fw.bw.Flush(); err != nil { + return + } + var regPos int64 + if regPos, err = fw.ws.Seek(0, io.SeekEnd); err != nil { + return + } + if _, err = fw.bw.Write(fw.hw.Bytes()); err != nil { + return + } + if err = fw.bw.Flush(); err != nil { + return + } + prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(fw.hw.Len())) + if _, err = fw.ws.Seek(0, io.SeekStart); err != nil { + return + } + _, err = fw.ws.Write(prefix) + return +} + +func (fw *fileStreamWriter) release() { + if fw.bw != nil { + bytespool.ReleaseWriter(fw.bw) + fw.bw = nil + } +} diff --git a/seq/seq.go b/seq/seq.go index 6a5a0039..64168d16 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -15,9 +15,16 @@ type ID struct { RID RID } -type MID uint64 // nanoseconds part of ID -type RID uint64 // random part of ID -type LID uint32 // local id for a fraction +var ( + SystemID = ID{math.MaxUint64, math.MaxUint64} + SystemDocPos = DocPos(0) +) + +type ( + MID uint64 // nanoseconds part of ID + RID uint64 // random part of ID + LID uint32 // local id for a fraction +) func (m MID) Time() time.Time { nanosPerSecond := uint64(time.Second) @@ -100,7 +107,6 @@ func FromString(x string) (ID, error) { } rid, err := hex.DecodeString(x[17:]) - if err != nil { return id, err } From a89cf378477fecce4387873b979802c3deaeb5dc Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 13:08:57 +0300 Subject: [PATCH 04/19] refactor: reuse writer for all blocks --- frac/sealed/sealing/index.go | 158 +++++++++++++--------------------- frac/sealed/sealing/writer.go | 91 +++++++++++--------- 2 files changed, 112 insertions(+), 137 deletions(-) diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 2ac8d885..558cd4a5 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -1,14 +1,8 @@ package sealing import ( - "bytes" - "encoding/binary" "io" - "iter" - "github.com/alecthomas/units" - - "github.com/ozontech/seq-db/bytespool" "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/sealed" @@ -69,47 +63,66 @@ func (s *IndexSealer) IDsTable() seqids.Table { // WriteOffsetsFile writes the .offsets file containing a single BlockOffsets block. func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { - return s.write(ws, func(yield func(indexBlock) bool) { - offsets := sealed.BlockOffsets{ - IDsTotal: src.Info().DocsTotal + 1, - Offsets: src.BlockOffsets(), - } - yield(s.packBlocksOffsetsBlock(offsets)) - }) + w, err := newWriter(ws) + if err != nil { + return err + } + defer w.release() + + offsets := sealed.BlockOffsets{ + IDsTotal: src.Info().DocsTotal + 1, + Offsets: src.BlockOffsets(), + } + + if err := w.writeBlock(s.packBlocksOffsetsBlock(offsets)); err != nil { + return err + } + + // Emit trailing separator. + if err := w.writeBlock(indexBlock{}); err != nil { + return err + } + + return w.finalize() } func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { - return s.write(ws, func(yield func(indexBlock) bool) { - for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { - if !yield(s.packMIDsBlock(block)) { - return - } - - if !yield(s.packRIDsBlock(block)) { - return - } - - if !yield(s.packPosBlock(block)) { - return - } + w, err := newWriter(ws) + if err != nil { + return err + } + defer w.release() + + for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { + if err := w.writeBlock(s.packMIDsBlock(block)); err != nil { + return err } - if s.lastErr = src.LastError(); s.lastErr != nil { - return + if err := w.writeBlock(s.packRIDsBlock(block)); err != nil { + return err + } + + if err := w.writeBlock(s.packPosBlock(block)); err != nil { + return err } + } + + // Emit trailing separator. + if err := w.writeBlock(indexBlock{}); err != nil { + return err + } - yield(indexBlock{}) // trailing separator - }) + return w.finalize() } func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Source) error { - tokenFW, err := newFileStreamWriter(tokenWS) + tokenFW, err := newWriter(tokenWS) if err != nil { return err } defer tokenFW.release() - lidFW, err := newFileStreamWriter(lidWS) + lidFW, err := newWriter(lidWS) if err != nil { return err } @@ -171,9 +184,23 @@ func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Sourc } func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { - return s.write(ws, func(yield func(indexBlock) bool) { - yield(s.packInfoBlock(sealed.BlockInfo{Info: src.Info()})) - }) + w, err := newWriter(ws) + if err != nil { + return err + } + defer w.release() + + block := sealed.BlockInfo{Info: src.Info()} + if err := w.writeBlock(s.packInfoBlock(block)); err != nil { + return err + } + + // Emit trailing separator. + if err := w.writeBlock(indexBlock{}); err != nil { + return err + } + + return w.finalize() } // collapseOrderedFieldsTables merges FieldTables with the same field name. @@ -198,67 +225,6 @@ func collapseOrderedFieldsTables(src []token.FieldTable) []token.FieldTable { return append(dst, current) } -// write writes blocks to ws using [16-byte prefix][blocks][registry]. -// The prefix is written last (via seek-back) and stores registry position + size. -func (s *IndexSealer) write(ws io.WriteSeeker, blocks iter.Seq[indexBlock]) error { - if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { - return err - } - - hw := bytes.NewBuffer(nil) - bw := bytespool.AcquireWriterSize(ws, int(units.MiB)) - defer bytespool.ReleaseWriter(bw) - - pos := filePrefixSize - for block := range blocks { - if s.lastErr != nil { - return s.lastErr - } - - header, payload := block.Bin(int64(pos)) - if _, err := bw.Write(payload); err != nil { - return err - } - - if _, err := hw.Write(header); err != nil { - return err - } - - pos += len(payload) - } - - if s.lastErr != nil { - return s.lastErr - } - - if err := bw.Flush(); err != nil { - return err - } - - size := hw.Len() - regPos, err := ws.Seek(0, io.SeekEnd) - if err != nil { - return err - } - - if _, err := bw.Write(hw.Bytes()); err != nil { - return err - } - - if err := bw.Flush(); err != nil { - return err - } - - prefix := binary.LittleEndian.AppendUint64(nil, uint64(regPos)) - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(size)) - if _, err := ws.Seek(0, io.SeekStart); err != nil { - return err - } - - _, err = ws.Write(prefix) - return err -} - func newIndexBlock(raw []byte) indexBlock { return indexBlock{codec: storage.CodecNo, rawLen: uint32(len(raw)), payload: raw} } diff --git a/frac/sealed/sealing/writer.go b/frac/sealed/sealing/writer.go index 9c003fb9..e67b8123 100644 --- a/frac/sealed/sealing/writer.go +++ b/frac/sealed/sealing/writer.go @@ -9,66 +9,75 @@ import ( "github.com/ozontech/seq-db/bytespool" ) -const filePrefixSize = 16 - -// fileStreamWriter writes blocks incrementally to a single file using the -// [prefix][blocks][registry] format, allowing interleaved writes to multiple files. -type fileStreamWriter struct { - ws io.WriteSeeker - bw *bytespool.Writer - hw bytes.Buffer +const prefixSize = 16 + +// writer writes blocks incrementally to a single file using the +// [prefix][blocks][registry] format. +type writer struct { + ws io.WriteSeeker + + wpayload *bytespool.Writer + wheader bytes.Buffer + pos int } -func newFileStreamWriter(ws io.WriteSeeker) (*fileStreamWriter, error) { - if _, err := ws.Seek(filePrefixSize, io.SeekStart); err != nil { +func newWriter(ws io.WriteSeeker) (*writer, error) { + if _, err := ws.Seek(prefixSize, io.SeekStart); err != nil { return nil, err } - return &fileStreamWriter{ - ws: ws, - bw: bytespool.AcquireWriterSize(ws, int(units.MiB)), - pos: filePrefixSize, + return &writer{ + ws: ws, + wpayload: bytespool.AcquireWriterSize(ws, int(units.MiB)), + pos: prefixSize, }, nil } -func (fw *fileStreamWriter) writeBlock(block indexBlock) error { - header, payload := block.Bin(int64(fw.pos)) - if _, err := fw.bw.Write(payload); err != nil { +func (w *writer) writeBlock(block indexBlock) error { + header, payload := block.Bin(int64(w.pos)) + + if _, err := w.wpayload.Write(payload); err != nil { return err } - fw.hw.Write(header) // bytes.Buffer.Write never fails - fw.pos += len(payload) + + w.wheader.Write(header) + w.pos += len(payload) + return nil } -func (fw *fileStreamWriter) finalize() (err error) { - defer fw.release() - if err = fw.bw.Flush(); err != nil { - return +func (w *writer) finalize() error { + if err := w.wpayload.Flush(); err != nil { + return err } - var regPos int64 - if regPos, err = fw.ws.Seek(0, io.SeekEnd); err != nil { - return + + regpos, err := w.ws.Seek(0, io.SeekEnd) + if err != nil { + return err } - if _, err = fw.bw.Write(fw.hw.Bytes()); err != nil { - return + + if _, err := w.wpayload.Write(w.wheader.Bytes()); err != nil { + return err } - if err = fw.bw.Flush(); err != nil { - return + + if err := w.wpayload.Flush(); err != nil { + return err } - prefix := binary.LittleEndian.AppendUint64(make([]byte, 0, filePrefixSize), uint64(regPos)) - prefix = binary.LittleEndian.AppendUint64(prefix, uint64(fw.hw.Len())) - if _, err = fw.ws.Seek(0, io.SeekStart); err != nil { - return + + prefix := make([]byte, 0, prefixSize) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(regpos)) + prefix = binary.LittleEndian.AppendUint64(prefix, uint64(w.wheader.Len())) + + if _, err := w.ws.Seek(0, io.SeekStart); err != nil { + return err } - _, err = fw.ws.Write(prefix) - return + + _, err = w.ws.Write(prefix) + return err } -func (fw *fileStreamWriter) release() { - if fw.bw != nil { - bytespool.ReleaseWriter(fw.bw) - fw.bw = nil - } +func (w *writer) release() { + bytespool.ReleaseWriter(w.wpayload) + w.wpayload = nil } From 216f4cd666a11d4b0e7e8538f2837ce4e93aa430 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 13:29:47 +0300 Subject: [PATCH 05/19] refactor: split token triple writing --- frac/sealed/sealing/index.go | 38 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 558cd4a5..a983d9de 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -134,18 +134,14 @@ func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Sourc lidAccum = newLIDBlocksAccumulator(consts.LIDBlockCap) ) + // NOTE(dkharms): This is so ugly but I cannot come up with other solution here. accumulate := func(lids []uint32) error { return lidAccum.Add(lids, func(block lidsSealBlock) error { return lidFW.writeBlock(s.packLIDsBlock(block)) }) } - blocks := bb.BuildTokenBlocks( - src.TokenTriplet(), - accumulate, consts.RegularBlockSize, - ) - - for block, fieldsTables := range blocks { + for block, fieldsTables := range bb.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { if err := tokenFW.writeBlock(s.packTokenBlock(block)); err != nil { return err } @@ -156,31 +152,43 @@ func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Sourc return s.lastErr } - // Write the final (possibly partial) LID block and trailing separator. - if err := lidFW.writeBlock(s.packLIDsBlock(lidAccum.Flush())); err != nil { + if err := s.finalizeLIDFile(lidFW, lidAccum); err != nil { return err } - if err := lidFW.writeBlock(indexBlock{}); err != nil { // trailing separator + return s.finalizeTokenFile(tokenFW, allFieldsTables) +} + +func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccum *lidBlocksAcc) error { + if err := w.writeBlock(s.packLIDsBlock(lidAccum.Flush())); err != nil { return err } - if err := lidFW.finalize(); err != nil { + // Emit trailing separator. + if err := w.writeBlock(indexBlock{}); err != nil { return err } - // Write token section separator, token table, trailing separator. - if err := tokenFW.writeBlock(indexBlock{}); err != nil { // section separator + return w.finalize() +} + +func (s *IndexSealer) finalizeTokenFile(w *writer, allFieldsTables []token.FieldTable) error { + // Emit section separator. + if err := w.writeBlock(indexBlock{}); err != nil { return err } + tokenTableBlock := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} - if err := tokenFW.writeBlock(s.packTokenTableBlock(tokenTableBlock)); err != nil { + if err := w.writeBlock(s.packTokenTableBlock(tokenTableBlock)); err != nil { return err } - if err := tokenFW.writeBlock(indexBlock{}); err != nil { // trailing separator + + // Emit trailing separator. + if err := w.writeBlock(indexBlock{}); err != nil { return err } - return tokenFW.finalize() + + return w.finalize() } func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { From d1690525c620498e0a5d08407a977fb4688a4fdf Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 13:54:43 +0300 Subject: [PATCH 06/19] refactor: one more refactoring --- frac/remote.go | 5 ++-- frac/sealed.go | 5 ++-- frac/sealed/sealing/sealer.go | 54 ++++++++++++----------------------- frac/sealed_loader.go | 26 ++++++++++++----- 4 files changed, 42 insertions(+), 48 deletions(-) diff --git a/frac/remote.go b/frac/remote.go index 7da03205..dc4e7118 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -23,9 +23,7 @@ import ( "github.com/ozontech/seq-db/util" ) -var ( - _ Fraction = (*Remote)(nil) -) +var _ Fraction = (*Remote)(nil) // Remote fraction is a fraction that is backed by remote storage. // @@ -255,6 +253,7 @@ func (f *Remote) load() error { ID: f.idReader, LID: f.lidReader, } + (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true diff --git a/frac/sealed.go b/frac/sealed.go index 7c419120..1c152735 100644 --- a/frac/sealed.go +++ b/frac/sealed.go @@ -24,9 +24,7 @@ import ( "github.com/ozontech/seq-db/util" ) -var ( - _ Fraction = (*Sealed)(nil) -) +var _ Fraction = (*Sealed)(nil) type Sealed struct { Config *Config @@ -247,6 +245,7 @@ func (f *Sealed) load() { ID: f.idReader, LID: f.lidReader, } + (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true } diff --git a/frac/sealed/sealing/sealer.go b/frac/sealed/sealing/sealer.go index ab97091f..888f7973 100644 --- a/frac/sealed/sealing/sealer.go +++ b/frac/sealed/sealing/sealer.go @@ -37,68 +37,52 @@ type Source interface { LastError() error } -func createAndWrite( - tmpPath, finalPath string, - write func(*os.File) error, -) error { - f, err := os.Create(tmpPath) - if err != nil { - return err - } - - if err := write(f); err != nil { +func syncAndClose(f *os.File) error { + if err := f.Sync(); err != nil { f.Close() return err } + return f.Close() +} - if err := f.Sync(); err != nil { - f.Close() +func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { + f, err := os.Create(tmpPath) + if err != nil { return err } - if err := f.Close(); err != nil { + if err := errors.Join(write(f), syncAndClose(f)); err != nil { return err } return os.Rename(tmpPath, finalPath) } -// createAndWriteBoth creates two tmp files, calls write with both, syncs and closes them, -// then renames both to their final paths. -func createAndWriteBoth(tmpPath1, finalPath1, tmpPath2, finalPath2 string, write func(*os.File, *os.File) error) error { +func createAndWriteBoth( + tmpPath1, finalPath1, + tmpPath2, finalPath2 string, + write func(*os.File, *os.File) error, +) error { f1, err := os.Create(tmpPath1) if err != nil { return err } + f2, err := os.Create(tmpPath2) if err != nil { f1.Close() return err } - if err := write(f1, f2); err != nil { - f1.Close() - f2.Close() - return err - } - if err := f1.Sync(); err != nil { - f1.Close() - f2.Close() - return err - } - if err := f1.Close(); err != nil { - f2.Close() - return err - } - if err := f2.Sync(); err != nil { - f2.Close() - return err - } - if err := f2.Close(); err != nil { + + writeErr := write(f1, f2) + if err := errors.Join(writeErr, syncAndClose(f1), syncAndClose(f2)); err != nil { return err } + if err := os.Rename(tmpPath1, finalPath1); err != nil { return err } + return os.Rename(tmpPath2, finalPath2) } diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index c20272ee..6f74f155 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -36,15 +36,17 @@ type Loader struct { func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, readers IndexReaders) { t := time.Now() - var err error + var ( + err error + blockOffsets sealed.BlockOffsets + ) - var blockOffsets sealed.BlockOffsets blockOffsets, err = l.loadBlocksOffsets(readers.Offsets) if err != nil { logger.Fatal("load offsets error", zap.Error(err)) } - blocksData.BlocksOffsets = blockOffsets.Offsets + blocksData.BlocksOffsets = blockOffsets.Offsets blocksData.IDsTable = l.loadIDsTable(readers.ID, blockOffsets.IDsTotal, info.BinaryDataVer) blocksData.LIDsTable, err = l.loadLIDsTable(readers.LID) @@ -71,13 +73,16 @@ func (l *Loader) Load(blocksData *sealed.BlocksData, info *common.Info, readers func (l *Loader) loadBlocksOffsets(r storage.IndexReader) (sealed.BlockOffsets, error) { data, _, err := r.ReadIndexBlock(0, l.buf) l.buf = data + if err != nil { return sealed.BlockOffsets{}, err } - b := sealed.BlockOffsets{} + + var b sealed.BlockOffsets if err := b.Unpack(data); err != nil { return sealed.BlockOffsets{}, err } + return b, nil } @@ -104,12 +109,13 @@ func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersio } else { mid = seq.MID(header.GetExt1()) } + table.MinBlockIDs = append(table.MinBlockIDs, seq.ID{ MID: mid, RID: seq.RID(header.GetExt2()), }) - table.IDBlocksTotal++ + table.IDBlocksTotal++ blockIdx += 3 // skip RIDs and Pos blocks } @@ -118,20 +124,26 @@ func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersio // loadLIDsTable scans block headers in the .lid file to build lids.Table. func (l *Loader) loadLIDsTable(r storage.IndexReader) (*lids.Table, error) { - var maxTIDs, minTIDs []uint32 - var isContinued []bool + var ( + maxTIDs []uint32 + minTIDs []uint32 + isContinued []bool + ) for blockIdx := uint32(0); ; blockIdx++ { header, err := r.GetBlockHeader(blockIdx) if err != nil { return nil, err } + if header.Len() == 0 { break } + ext2 := header.GetExt2() maxTIDs = append(maxTIDs, uint32(ext2>>32)) minTIDs = append(minTIDs, uint32(ext2&0xFFFFFFFF)) + isContinued = append(isContinued, header.GetExt1() == 1) } From 3eed12e7978cc76814d4f3b9afc4583a110da430 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 14:36:09 +0300 Subject: [PATCH 07/19] chore: add backwards compatibility --- consts/consts.go | 3 + frac/fraction_test.go | 42 ++++++---- frac/sealed.go | 110 +++++++++++++++++++------ frac/sealed_loader.go | 137 +++++++++++++++++++++++++++++++ fracmanager/frac_manifest.go | 7 +- fracmanager/fraction_provider.go | 3 +- fracmanager/loader.go | 8 +- 7 files changed, 260 insertions(+), 50 deletions(-) diff --git a/consts/consts.go b/consts/consts.go index 40abbdab..421f44c5 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -79,6 +79,9 @@ const ( LIDTmpFileSuffix = "._lids" LIDDelFileSuffix = ".lids.del" + // IndexFileSuffix is the legacy single-file index format (pre-split). + IndexFileSuffix = ".index" + RemoteFractionSuffix = ".remote" FracCacheFileSuffix = ".frac-cache" diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 7326ce54..9d4f7422 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -2094,12 +2094,12 @@ func (s *FractionTestSuite) newSealed(bulks ...[]string) *Sealed { s.Require().NoError(err, "Sealing failed") indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), InfoRegistry: cache.NewCache[[]byte](nil, nil), TokenRegistry: cache.NewCache[[]byte](nil, nil), OffsetsRegistry: cache.NewCache[[]byte](nil, nil), @@ -2116,6 +2116,7 @@ func (s *FractionTestSuite) newSealed(bulks ...[]string) *Sealed { s.config, testSkipMaskProvider{}, ) + active.Release() return sealed } @@ -2289,12 +2290,12 @@ func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Seal sealed.Release() indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), InfoRegistry: cache.NewCache[[]byte](nil, nil), TokenRegistry: cache.NewCache[[]byte](nil, nil), OffsetsRegistry: cache.NewCache[[]byte](nil, nil), @@ -2311,6 +2312,11 @@ func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Seal s.config, testSkipMaskProvider{}, ) + s.config) + s.config, + false, + ) + s.fraction = sealed return sealed } @@ -2361,12 +2367,12 @@ func (s *RemoteFractionTestSuite) SetupTest() { s.Require().True(offloaded, "didn't offload frac") indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), InfoRegistry: cache.NewCache[[]byte](nil, nil), TokenRegistry: cache.NewCache[[]byte](nil, nil), OffsetsRegistry: cache.NewCache[[]byte](nil, nil), diff --git a/frac/sealed.go b/frac/sealed.go index 1c152735..b1ec2eb0 100644 --- a/frac/sealed.go +++ b/frac/sealed.go @@ -37,7 +37,12 @@ type Sealed struct { docsCache *cache.Cache[[]byte] docsReader storage.DocsReader - // Per-section index files and their readers. + // isLegacy is true for fractions that use the old single .index file format. + isLegacy bool + legacyFile *os.File + legacyReader storage.IndexReader + + // Per-section index files and their readers (new split format only). infoFile *os.File tokenFile *os.File offsetsFile *os.File @@ -80,6 +85,7 @@ func NewSealed( info *common.Info, config *Config, skipMaskProvider skipMaskProvider, + isLegacy bool, ) *Sealed { f := &Sealed{ loadMu: &sync.RWMutex{}, @@ -88,6 +94,7 @@ func NewSealed( docsCache: docsCache, indexCache: indexCache, + isLegacy: isLegacy, info: info, BaseFileName: baseFile, Config: config, @@ -104,12 +111,26 @@ func NewSealed( f.openInfoFile() f.info = loadHeader(f.infoReader) - f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName) + f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName, f.isLegacy) return f } func (f *Sealed) openInfoFile() { + if f.isLegacy { + if f.legacyFile == nil { + name := f.BaseFileName + consts.IndexFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal("can't open legacy index file", zap.String("file", name), zap.Error(err)) + } + f.legacyFile = file + f.legacyReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) + } + f.infoReader = f.legacyReader // loadHeader uses infoReader + return + } + if f.infoFile == nil { name := f.BaseFileName + consts.InfoFileSuffix file, err := os.Open(name) @@ -122,6 +143,11 @@ func (f *Sealed) openInfoFile() { } func (f *Sealed) openIndexFiles() { + if f.isLegacy { + f.openInfoFile() // opens legacyFile if not already open + return + } + f.openInfoFile() if f.tokenFile == nil { @@ -238,15 +264,19 @@ func (f *Sealed) load() { f.openDocs() f.openIndexFiles() - readers := IndexReaders{ - Info: f.infoReader, - Token: f.tokenReader, - Offsets: f.offsetsReader, - ID: f.idReader, - LID: f.lidReader, + if f.isLegacy { + (&LegacyLoader{}).Load(&f.blocksData, f.info, f.legacyReader) + } else { + readers := IndexReaders{ + Info: f.infoReader, + Token: f.tokenReader, + Offsets: f.offsetsReader, + ID: f.idReader, + LID: f.lidReader, + } + (&Loader{}).Load(&f.blocksData, f.info, readers) } - (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true } } @@ -260,11 +290,15 @@ func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) g, gctx := errgroup.WithContext(ctx) g.Go(func() error { return u.Upload(gctx, f.docsFile) }) - g.Go(func() error { return u.Upload(gctx, f.infoFile) }) - g.Go(func() error { return u.Upload(gctx, f.tokenFile) }) - g.Go(func() error { return u.Upload(gctx, f.offsetsFile) }) - g.Go(func() error { return u.Upload(gctx, f.idFile) }) - g.Go(func() error { return u.Upload(gctx, f.lidFile) }) + if f.isLegacy { + g.Go(func() error { return u.Upload(gctx, f.legacyFile) }) + } else { + g.Go(func() error { return u.Upload(gctx, f.infoFile) }) + g.Go(func() error { return u.Upload(gctx, f.tokenFile) }) + g.Go(func() error { return u.Upload(gctx, f.offsetsFile) }) + g.Go(func() error { return u.Upload(gctx, f.idFile) }) + g.Go(func() error { return u.Upload(gctx, f.lidFile) }) + } if err := g.Wait(); err != nil { return true, err @@ -282,7 +316,11 @@ func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) } func (f *Sealed) Release() { - for _, file := range []*os.File{f.docsFile, f.infoFile, f.tokenFile, f.offsetsFile, f.idFile, f.lidFile} { + indexFiles := []*os.File{f.infoFile, f.tokenFile, f.offsetsFile, f.idFile, f.lidFile} + if f.isLegacy { + indexFiles = []*os.File{f.legacyFile} + } + for _, file := range append([]*os.File{f.docsFile}, indexFiles...) { if file != nil { if err := file.Close(); err != nil { logger.Error("can't close file", zap.String("file", file.Name()), zap.Error(err)) @@ -315,13 +353,17 @@ func (f *Sealed) Suicide() { } // Delete all index files directly (they are regenerable; no atomic rename needed). - for _, suffix := range []string{ + indexSuffixes := []string{ consts.InfoFileSuffix, consts.TokenFileSuffix, consts.OffsetsFileSuffix, consts.IDFileSuffix, consts.LIDFileSuffix, - } { + } + if f.isLegacy { + indexSuffixes = []string{consts.IndexFileSuffix} + } + for _, suffix := range indexSuffixes { if err := os.Remove(f.BaseFileName + suffix); err != nil && !errors.Is(err, os.ErrNotExist) { logger.Error("can't remove index file", zap.String("file", f.BaseFileName+suffix), zap.Error(err)) } @@ -367,6 +409,17 @@ func (f *Sealed) FindLIDs(ctx context.Context, ids []seq.ID) ([]seq.LID, error) func (f *Sealed) createDataProvider(ctx context.Context) *sealedDataProvider { f.load() + + tokenReader := &f.tokenReader + lidReader := &f.lidReader + idReader := &f.idReader + + if f.isLegacy { + tokenReader = &f.legacyReader + lidReader = &f.legacyReader + idReader = &f.legacyReader + } + return &sealedDataProvider{ ctx: ctx, fractionTypeLabel: "sealed", @@ -376,13 +429,13 @@ func (f *Sealed) createDataProvider(ctx context.Context) *sealedDataProvider { docsReader: &f.docsReader, blocksOffsets: f.blocksData.BlocksOffsets, lidsTable: f.blocksData.LIDsTable, - lidsLoader: lids.NewLoader(&f.lidReader, f.indexCache.LIDs), - tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.tokenReader, f.indexCache.Tokens), - tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.tokenReader, f.indexCache.TokenTable), + lidsLoader: lids.NewLoader(lidReader, f.indexCache.LIDs), + tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, tokenReader, f.indexCache.Tokens), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, tokenReader, f.indexCache.TokenTable), idsTable: &f.blocksData.IDsTable, idsProvider: seqids.NewProvider( - &f.idReader, + idReader, f.indexCache.MIDs, f.indexCache.RIDs, f.indexCache.Params, @@ -419,16 +472,21 @@ func loadHeader(infoReader storage.IndexReader) *common.Info { return bi.Info } -// computeIndexOnDisk returns the total on-disk size of all 5 index files for a local fraction. -func computeIndexOnDisk(basePath string) uint64 { - var total int64 - for _, suffix := range []string{ +// computeIndexOnDisk returns the total on-disk size of index files for a local fraction. +func computeIndexOnDisk(basePath string, isLegacy bool) uint64 { + suffixes := []string{ consts.InfoFileSuffix, consts.TokenFileSuffix, consts.OffsetsFileSuffix, consts.IDFileSuffix, consts.LIDFileSuffix, - } { + } + if isLegacy { + suffixes = []string{consts.IndexFileSuffix} + } + + var total int64 + for _, suffix := range suffixes { st, err := os.Stat(basePath + suffix) if err != nil { logger.Fatal("can't stat index file", zap.String("file", basePath+suffix), zap.Error(err)) diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index 6f74f155..588c5fee 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -16,6 +16,143 @@ import ( "github.com/ozontech/seq-db/util" ) +// LegacyLoader reads the old single .index file format by scanning blocks sequentially. +// Block indices stored in lids.Table and seqids.Table are absolute within the .index file, +// so the same IndexReader can be passed to all sub-loaders unchanged. +type LegacyLoader struct { + reader storage.IndexReader + blockIndex uint32 +} + +// Load populates blocksData from a single legacy .index file. +// It starts at block 1 (block 0 is the Info block, already read by loadHeader). +func (l *LegacyLoader) Load(blocksData *sealed.BlocksData, info *common.Info, reader storage.IndexReader) { + t := time.Now() + + l.reader = reader + l.blockIndex = 1 // skip Info block at index 0 + + l.skipSection() // skip token blocks + l.skipSection() // skip token table blocks + + var err error + blocksData.IDsTable, blocksData.BlocksOffsets, err = l.loadIDs(info.BinaryDataVer) + if err != nil { + logger.Fatal("legacy load ids error", zap.Error(err)) + } + + blocksData.LIDsTable, err = l.loadLIDs() + if err != nil { + logger.Fatal("legacy load lids error", zap.Error(err)) + } + + took := time.Since(t) + docsTotalK := float64(info.DocsTotal) / 1000 + indexOnDiskMb := util.SizeToUnit(info.IndexOnDisk, "mb") + throughput := indexOnDiskMb / util.DurationToUnit(took, "s") + logger.Info("sealed fraction loaded (legacy format)", + zap.String("fraction", info.Path), + util.ZapMsTsAsESTimeStr("creation_time", info.CreationTime), + zap.String("from", info.From.String()), + zap.String("to", info.To.String()), + util.ZapFloat64WithPrec("docs_k", docsTotalK, 1), + util.ZapDurationWithPrec("took_ms", took, "ms", 1), + util.ZapFloat64WithPrec("throughput_mb_sec", throughput, 1), + ) +} + +// skipSection advances past one separator-delimited section (reads headers until Len() == 0). +func (l *LegacyLoader) skipSection() { + for { + h, err := l.reader.GetBlockHeader(l.blockIndex) + if err != nil { + logger.Panic("error reading block header", zap.Error(err)) + } + + l.blockIndex++ + if h.Len() == 0 { + return + } + } +} + +// loadIDs reads the BlockOffsets block and then scans MID/RID/Pos triplets. +func (l *LegacyLoader) loadIDs(fracVersion config.BinaryDataVersion) (seqids.Table, []uint64, error) { + var buf []byte + data, _, err := l.reader.ReadIndexBlock(l.blockIndex, buf) + l.blockIndex++ + if err != nil { + return seqids.Table{}, nil, err + } + + var offsets sealed.BlockOffsets + if err := offsets.Unpack(data); err != nil { + return seqids.Table{}, nil, err + } + + table := seqids.Table{ + StartBlockIndex: l.blockIndex, // absolute index of first MID block in .index + IDsTotal: offsets.IDsTotal, + IDBlocksTotal: uint32(len(offsets.Offsets)), + } + + for { + h, err := l.reader.GetBlockHeader(l.blockIndex) + if err != nil { + logger.Fatal("error reading id block header", zap.Error(err)) + } + + l.blockIndex++ + if h.Len() == 0 { + break + } + + mid := seq.MID(h.GetExt1()) + if fracVersion < config.BinaryDataV2 { + mid = seq.MillisToMID(h.GetExt1()) + } + + table.MinBlockIDs = append(table.MinBlockIDs, seq.ID{ + MID: mid, + RID: seq.RID(h.GetExt2()), + }) + + l.blockIndex += 2 // skip RIDs and Pos blocks + } + + return table, offsets.Offsets, nil +} + +// loadLIDs scans LID block headers, recording the absolute start index for lids.Table. +func (l *LegacyLoader) loadLIDs() (*lids.Table, error) { + startIndex := l.blockIndex // absolute index of first LID block in .index + + var ( + maxTIDs []uint32 + minTIDs []uint32 + isContinued []bool + ) + + for { + h, err := l.reader.GetBlockHeader(l.blockIndex) + if err != nil { + return nil, err + } + + l.blockIndex++ + if h.Len() == 0 { + break + } + + maxTIDs = append(maxTIDs, uint32(h.GetExt2()>>32)) + minTIDs = append(minTIDs, uint32(h.GetExt2()&0xFFFFFFFF)) + + isContinued = append(isContinued, h.GetExt1() == 1) + } + + return lids.NewTable(startIndex, minTIDs, maxTIDs, isContinued), nil +} + // IndexReaders holds one IndexReader per split index file. type IndexReaders struct { Info storage.IndexReader diff --git a/fracmanager/frac_manifest.go b/fracmanager/frac_manifest.go index 9fc15fe9..5b07b8dc 100644 --- a/fracmanager/frac_manifest.go +++ b/fracmanager/frac_manifest.go @@ -62,6 +62,8 @@ func (m *fracManifest) AddExtension(ext string) error { m.hasWal = true case consts.SdocsFileSuffix: m.hasSdocs = true + case consts.IndexFileSuffix: + m.hasIndex = true case consts.RemoteFractionSuffix: m.hasRemote = true @@ -118,7 +120,7 @@ func (m *fracManifest) Stage() fracStage { if m.hasRemote { return fracStageRemote } - if m.hasAllIndexFiles() && (m.hasSdocs || m.hasDocs) { + if (m.hasAllIndexFiles() || m.hasIndex) && (m.hasSdocs || m.hasDocs) { return fracStageSealed } if (m.hasMeta || m.hasWal) && m.hasDocs { @@ -162,6 +164,7 @@ func removeIndexFiles(m *fracManifest) { consts.OffsetsFileSuffix, consts.IDFileSuffix, consts.LIDFileSuffix, + consts.IndexFileSuffix, } { util.RemoveFile(m.basePath + suffix) } @@ -170,6 +173,7 @@ func removeIndexFiles(m *fracManifest) { m.hasOffsets = false m.hasID = false m.hasLID = false + m.hasIndex = false } func removeSdocsDel(m *fracManifest) { @@ -313,6 +317,7 @@ func removeAllFiles(basePath string) { consts.OffsetsFileSuffix, consts.IDFileSuffix, consts.LIDFileSuffix, + consts.IndexFileSuffix, consts.DocsFileSuffix, consts.SdocsFileSuffix, consts.MetaFileSuffix, diff --git a/fracmanager/fraction_provider.go b/fracmanager/fraction_provider.go index 73deb907..1e9eafec 100644 --- a/fracmanager/fraction_provider.go +++ b/fracmanager/fraction_provider.go @@ -66,7 +66,7 @@ func (fp *fractionProvider) NewActive(name string) *frac.Active { ) } -func (fp *fractionProvider) NewSealed(name string, cachedInfo *common.Info) *frac.Sealed { +func (fp *fractionProvider) NewSealed(name string, cachedInfo *common.Info, isLegacy bool) *frac.Sealed { return frac.NewSealed( name, fp.readLimiter, @@ -75,6 +75,7 @@ func (fp *fractionProvider) NewSealed(name string, cachedInfo *common.Info) *fra cachedInfo, // Preloaded meta information &fp.config.Fraction, fp.skipMaskProvider, + isLegacy, ) } diff --git a/fracmanager/loader.go b/fracmanager/loader.go index 6eb788ee..143b6d64 100644 --- a/fracmanager/loader.go +++ b/fracmanager/loader.go @@ -136,7 +136,7 @@ func (l *Loader) discover(ctx context.Context) ([]*frac.Active, []*frac.Sealed, case fracStageActive: actives = append(actives, l.provider.NewActive(manifest.basePath)) case fracStageSealed: - locals = append(locals, l.loadSealed(manifest.basePath, loadedInfoCache)) + locals = append(locals, l.loadSealed(manifest, loadedInfoCache)) case fracStageRemote: remotes = append(remotes, l.loadRemote(ctx, manifest.basePath, loadedInfoCache)) default: @@ -153,11 +153,11 @@ func (l *Loader) discover(ctx context.Context) ([]*frac.Active, []*frac.Sealed, } // loadSealed loads a sealed fraction using cache -func (l *Loader) loadSealed(basePath string, loadedInfoCache *fracInfoCache) *frac.Sealed { - info, found := loadedInfoCache.Get(filepath.Base(basePath)) +func (l *Loader) loadSealed(manifest *fracManifest, loadedInfoCache *fracInfoCache) *frac.Sealed { + info, found := loadedInfoCache.Get(filepath.Base(manifest.basePath)) l.updateStats(found) - f := l.provider.NewSealed(basePath, info) + f := l.provider.NewSealed(manifest.basePath, info, manifest.hasIndex) l.infoCache.Add(f.Info()) return f } From fafc75bd195c787753dbdfc79eeae16436487f89 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 16:32:22 +0300 Subject: [PATCH 08/19] perf: sort lids on creation --- frac/active_sealing_source.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index b90c0297..e9d814ca 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -77,6 +77,7 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe } src.prepareInfo() + src.prepareLids() // Sort documents if not skipped in configuration if !active.Config.SkipSortDocs { @@ -163,6 +164,12 @@ func (src *ActiveSealingSource) prepareInfo() { src.info.BuildDistribution(mids) } +func (src *ActiveSealingSource) prepareLids() { + for _, tl := range src.lids[1:] { + tl.GetLIDs(src.mids, src.rids) + } +} + func (src *ActiveSealingSource) Info() *common.Info { return src.info } From 9e2c2d493253f0bb4308a7bc63d86fd008cdb603 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 31 Mar 2026 20:24:00 +0300 Subject: [PATCH 09/19] chore: add statistics reporting on sealing --- frac/sealed/sealing/blocks_builder.go | 4 +- frac/sealed/sealing/blocks_builder_test.go | 12 ++--- frac/sealed/sealing/index.go | 48 ++++++++--------- frac/sealed/sealing/stats.go | 42 --------------- frac/sealed/sealing/writer.go | 62 ++++++++++++++++++++-- indexer/processor.go | 1 - 6 files changed, 89 insertions(+), 80 deletions(-) delete mode 100644 frac/sealed/sealing/stats.go diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index ea506402..a4c7b074 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -230,10 +230,10 @@ func newLIDBlocksAccumulator(blockCap int) *lidBlocksAcc { // For each block that fills up, `onBlock` is called immediately // before the backing arrays are reset, so `onBlock` may read the // block data but must not retain references to it. -func (a *lidBlocksAcc) Add(lids []uint32, onBlock func(lidsSealBlock) error) error { +func (a *lidBlocksAcc) Add(lidsbuf []uint32, onBlock func(lidsSealBlock) error) error { a.currentTID++ - for _, lid := range lids { + for _, lid := range lidsbuf { if len(a.currentBlock.payload.LIDs) == a.blockCap { if err := onBlock(a.finalizeBlock()); err != nil { return err diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index 95ae545d..e2d3770e 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -35,11 +35,11 @@ func (m *mockSource) Iterator() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { tokenStart, tokenEnd := start, end if !yield(field, func(yield func([]byte, []uint32) bool) { for j := tokenStart; j < tokenEnd; j++ { - var lids []uint32 + var lidsbuf []uint32 if j < len(m.tokenLIDs) { - lids = m.tokenLIDs[j] + lidsbuf = m.tokenLIDs[j] } - if !yield(m.tokens[j], lids) { + if !yield(m.tokens[j], lidsbuf) { return } } @@ -64,11 +64,11 @@ func (m *mockSource) ID() iter.Seq2[seq.ID, seq.DocPos] { func (m *mockSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { return func(yield func([]byte, []uint32) bool) { for i, token := range m.tokens { - var lids []uint32 + var lidsbuf []uint32 if i < len(m.tokenLIDs) { - lids = m.tokenLIDs[i] + lidsbuf = m.tokenLIDs[i] } - if !yield(token, lids) { + if !yield(token, lidsbuf) { return } } diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index a983d9de..6c6d57eb 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -74,12 +74,12 @@ func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { Offsets: src.BlockOffsets(), } - if err := w.writeBlock(s.packBlocksOffsetsBlock(offsets)); err != nil { + if err := w.writeBlock(btypeOffset, s.packBlocksOffsetsBlock(offsets)); err != nil { return err } // Emit trailing separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { return err } @@ -94,55 +94,55 @@ func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { defer w.release() for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { - if err := w.writeBlock(s.packMIDsBlock(block)); err != nil { + if err := w.writeBlock(btypeMid, s.packMIDsBlock(block)); err != nil { return err } - if err := w.writeBlock(s.packRIDsBlock(block)); err != nil { + if err := w.writeBlock(btypeRid, s.packRIDsBlock(block)); err != nil { return err } - if err := w.writeBlock(s.packPosBlock(block)); err != nil { + if err := w.writeBlock(btypeDocPos, s.packPosBlock(block)); err != nil { return err } } // Emit trailing separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { return err } return w.finalize() } -func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Source) error { - tokenFW, err := newWriter(tokenWS) +func (s *IndexSealer) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) error { + tw, err := newWriter(tws) if err != nil { return err } - defer tokenFW.release() + defer tw.release() - lidFW, err := newWriter(lidWS) + lw, err := newWriter(lws) if err != nil { return err } - defer lidFW.release() + defer lw.release() var ( bb blocksBuilder allFieldsTables []token.FieldTable - lidAccum = newLIDBlocksAccumulator(consts.LIDBlockCap) + lidacc = newLIDBlocksAccumulator(consts.LIDBlockCap) ) // NOTE(dkharms): This is so ugly but I cannot come up with other solution here. accumulate := func(lids []uint32) error { - return lidAccum.Add(lids, func(block lidsSealBlock) error { - return lidFW.writeBlock(s.packLIDsBlock(block)) + return lidacc.Add(lids, func(block lidsSealBlock) error { + return lw.writeBlock(btypeLid, s.packLIDsBlock(block)) }) } for block, fieldsTables := range bb.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { - if err := tokenFW.writeBlock(s.packTokenBlock(block)); err != nil { + if err := tw.writeBlock(btypeToken, s.packTokenBlock(block)); err != nil { return err } allFieldsTables = append(allFieldsTables, fieldsTables...) @@ -152,20 +152,20 @@ func (s *IndexSealer) WriteTokenTriplet(tokenWS, lidWS io.WriteSeeker, src Sourc return s.lastErr } - if err := s.finalizeLIDFile(lidFW, lidAccum); err != nil { + if err := s.finalizeLIDFile(lw, lidacc); err != nil { return err } - return s.finalizeTokenFile(tokenFW, allFieldsTables) + return s.finalizeTokenFile(tw, allFieldsTables) } func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccum *lidBlocksAcc) error { - if err := w.writeBlock(s.packLIDsBlock(lidAccum.Flush())); err != nil { + if err := w.writeBlock(btypeLid, s.packLIDsBlock(lidAccum.Flush())); err != nil { return err } // Emit trailing separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { return err } @@ -174,17 +174,17 @@ func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccum *lidBlocksAcc) error { func (s *IndexSealer) finalizeTokenFile(w *writer, allFieldsTables []token.FieldTable) error { // Emit section separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeToken, indexBlock{}); err != nil { return err } tokenTableBlock := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} - if err := w.writeBlock(s.packTokenTableBlock(tokenTableBlock)); err != nil { + if err := w.writeBlock(btypeTokenTable, s.packTokenTableBlock(tokenTableBlock)); err != nil { return err } // Emit trailing separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { return err } @@ -199,12 +199,12 @@ func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { defer w.release() block := sealed.BlockInfo{Info: src.Info()} - if err := w.writeBlock(s.packInfoBlock(block)); err != nil { + if err := w.writeBlock(btypeInfo, s.packInfoBlock(block)); err != nil { return err } // Emit trailing separator. - if err := w.writeBlock(indexBlock{}); err != nil { + if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { return err } diff --git a/frac/sealed/sealing/stats.go b/frac/sealed/sealing/stats.go deleted file mode 100644 index 5b119d60..00000000 --- a/frac/sealed/sealing/stats.go +++ /dev/null @@ -1,42 +0,0 @@ -package sealing - -import ( - "time" - - "go.uber.org/zap" - - "github.com/ozontech/seq-db/logger" - "github.com/ozontech/seq-db/util" -) - -type blocksStats struct { - start time.Time - len int - rawLen int - blocksCount int -} - -func startStats() blocksStats { - return blocksStats{start: time.Now()} -} - -func (s *blocksStats) takeStock(block indexBlock) { - s.blocksCount++ - s.len += len(block.payload) - s.rawLen += int(block.rawLen) -} - -func (s *blocksStats) log(name string, endTime time.Time) { - var ratio float64 - if s.len > 0 { - ratio = float64(s.rawLen) / float64(s.len) - } - logger.Info("seal block stats", - zap.String("type", name), - util.ZapUint64AsSizeStr("raw", uint64(s.rawLen)), - util.ZapUint64AsSizeStr("compressed", uint64(s.len)), - util.ZapFloat64WithPrec("ratio", ratio, 2), - zap.Uint64("blocks_count", uint64(s.blocksCount)), - util.ZapDurationWithPrec("write_duration_ms", endTime.Sub(s.start), "ms", 0), - ) -} diff --git a/frac/sealed/sealing/writer.go b/frac/sealed/sealing/writer.go index e67b8123..5bcfe0f3 100644 --- a/frac/sealed/sealing/writer.go +++ b/frac/sealed/sealing/writer.go @@ -5,12 +5,29 @@ import ( "encoding/binary" "io" + "go.uber.org/zap" + "github.com/alecthomas/units" + "github.com/ozontech/seq-db/bytespool" + "github.com/ozontech/seq-db/logger" + "github.com/ozontech/seq-db/util" ) const prefixSize = 16 +const ( + btypeInfo = "info" + btypeOffset = "offset" + btypeToken = "token" + btypeTokenTable = "token-table" + btypeMid = "mid" + btypeRid = "rid" + btypeDocPos = "doc-pos" + btypeLid = "lid" + btypeBlackhole = "blackhole" +) + // writer writes blocks incrementally to a single file using the // [prefix][blocks][registry] format. type writer struct { @@ -19,7 +36,26 @@ type writer struct { wpayload *bytespool.Writer wheader bytes.Buffer - pos int + pos int + stats map[string]blockstat +} + +type blockstat struct { + count int + raw int + compressed int + header int +} + +func (b blockstat) log(btype string) { + logger.Info( + "seal block stats", + zap.String("type", btype), + util.ZapUint64AsSizeStr("raw", uint64(b.raw)), + util.ZapUint64AsSizeStr("compressed", uint64(b.compressed)), + util.ZapUint64AsSizeStr("header", uint64(b.header)), + zap.Uint64("blocks_count", uint64(b.count)), + ) } func newWriter(ws io.WriteSeeker) (*writer, error) { @@ -31,16 +67,25 @@ func newWriter(ws io.WriteSeeker) (*writer, error) { ws: ws, wpayload: bytespool.AcquireWriterSize(ws, int(units.MiB)), pos: prefixSize, + stats: make(map[string]blockstat), }, nil } -func (w *writer) writeBlock(block indexBlock) error { +func (w *writer) writeBlock(btype string, block indexBlock) error { header, payload := block.Bin(int64(w.pos)) - if _, err := w.wpayload.Write(payload); err != nil { return err } + if btype != btypeBlackhole { + w.stats[btype] = blockstat{ + count: w.stats[btype].count + 1, + raw: w.stats[btype].raw + int(block.rawLen), + compressed: w.stats[btype].compressed + len(block.payload), + header: w.stats[btype].header + len(header), + } + } + w.wheader.Write(header) w.pos += len(payload) @@ -73,8 +118,15 @@ func (w *writer) finalize() error { return err } - _, err = w.ws.Write(prefix) - return err + if _, err := w.ws.Write(prefix); err != nil { + return err + } + + for btype, stats := range w.stats { + stats.log(btype) + } + + return nil } func (w *writer) release() { diff --git a/indexer/processor.go b/indexer/processor.go index dbf7c106..9ca83938 100644 --- a/indexer/processor.go +++ b/indexer/processor.go @@ -13,7 +13,6 @@ import ( "go.uber.org/zap" "github.com/ozontech/seq-db/consts" - "github.com/ozontech/seq-db/logger" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/tokenizer" From d59cbd0a3fdc7de52a1bb61587a25db326dc71f0 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Thu, 2 Apr 2026 10:36:50 +0300 Subject: [PATCH 10/19] refactor: add remote backwards compatibility --- consts/consts.go | 8 +- frac/fraction_test.go | 5 + frac/remote.go | 125 +++++++++++--- frac/sealed.go | 237 ++++++++++++++++++-------- frac/sealed/sealing/blocks_builder.go | 19 +-- frac/sealed/token/table_loader.go | 1 + frac/sealed_loader.go | 5 +- fracmanager/frac_manifest.go | 37 +--- fracmanager/fraction_provider.go | 7 +- fracmanager/loader.go | 23 ++- 10 files changed, 312 insertions(+), 155 deletions(-) diff --git a/consts/consts.go b/consts/consts.go index 421f44c5..fc027f24 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -61,26 +61,22 @@ const ( InfoFileSuffix = ".info" InfoTmpFileSuffix = "._info" - InfoDelFileSuffix = ".info.del" TokenFileSuffix = ".tokens" TokenTmpFileSuffix = "._tokens" - TokenDelFileSuffix = ".tokens.del" OffsetsFileSuffix = ".offsets" OffsetsTmpFileSuffix = "._offsets" - OffsetsDelFileSuffix = ".offsets.del" IDFileSuffix = ".ids" IDTmpFileSuffix = "._ids" - IDDelFileSuffix = ".ids.del" LIDFileSuffix = ".lids" LIDTmpFileSuffix = "._lids" - LIDDelFileSuffix = ".lids.del" // IndexFileSuffix is the legacy single-file index format (pre-split). - IndexFileSuffix = ".index" + IndexFileSuffix = ".index" + IndexTmpFileSuffix = "._index" RemoteFractionSuffix = ".remote" diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 9d4f7422..8113251a 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -2391,6 +2391,11 @@ func (s *RemoteFractionTestSuite) SetupTest() { s3cli, testSkipMaskProvider{}, ) + s3cli) + s3cli, + false, + ) + s.fraction = remoteFrac } } diff --git a/frac/remote.go b/frac/remote.go index dc4e7118..c5afa37b 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -43,7 +43,12 @@ type Remote struct { docsCache *cache.Cache[[]byte] docsReader storage.DocsReader - // Per-section index files and their readers. + // IsLegacy is true for fractions that use the old single .index file format. + IsLegacy bool + legacyFile storage.ImmutableFile + legacyReader storage.IndexReader + + // Per-section index files and their readers (new split format only). infoFile storage.ImmutableFile tokenFile storage.ImmutableFile offsetsFile storage.ImmutableFile @@ -78,6 +83,7 @@ func NewRemote( config *Config, s3cli *s3.Client, skipMaskProvider skipMaskProvider, + isLegacy bool, ) *Remote { f := &Remote{ ctx: ctx, @@ -95,6 +101,9 @@ func NewRemote( s3cli: s3cli, skipMaskProvider: skipMaskProvider, + s3cli: s3cli, + s3cli: s3cli, + IsLegacy: isLegacy, } // Fast path if fraction-info cache exists AND it has valid index size. @@ -109,7 +118,7 @@ func NewRemote( // I wrote a small proposal on how we can reduce impact of such events. // https://github.com/ozontech/seq-db/issues/92 - if err := f.openInfoFile(); err != nil { + if err := f.openInfo(); err != nil { logger.Error( "cannot open info file: any subsequent operation will fail", zap.String("fraction", filepath.Base(f.BaseFileName)), @@ -117,7 +126,7 @@ func NewRemote( ) } - f.info = loadHeader(f.infoReader) + f.info = loadInfo(f.infoReader) return f } @@ -164,6 +173,17 @@ func (f *Remote) createDataProvider(ctx context.Context) (*sealedDataProvider, e ) return nil, err } + + tokenReader := &f.tokenReader + lidReader := &f.lidReader + idReader := &f.idReader + + if f.IsLegacy { + tokenReader = &f.legacyReader + lidReader = &f.legacyReader + idReader = &f.legacyReader + } + return &sealedDataProvider{ ctx: ctx, fractionTypeLabel: "remote", @@ -173,13 +193,13 @@ func (f *Remote) createDataProvider(ctx context.Context) (*sealedDataProvider, e docsReader: &f.docsReader, blocksOffsets: f.blocksData.BlocksOffsets, lidsTable: f.blocksData.LIDsTable, - lidsLoader: lids.NewLoader(&f.lidReader, f.indexCache.LIDs), - tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, &f.tokenReader, f.indexCache.Tokens), - tokenTableLoader: token.NewTableLoader(f.BaseFileName, &f.tokenReader, f.indexCache.TokenTable), + lidsLoader: lids.NewLoader(lidReader, f.indexCache.LIDs), + tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, tokenReader, f.indexCache.Tokens), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, tokenReader, f.indexCache.TokenTable), idsTable: &f.blocksData.IDsTable, idsProvider: seqids.NewProvider( - &f.idReader, + idReader, f.indexCache.MIDs, f.indexCache.RIDs, f.indexCache.Params, @@ -207,6 +227,9 @@ func (f *Remote) Suicide() { files := []string{ filepath.Base(f.BaseFileName) + consts.DocsFileSuffix, filepath.Base(f.BaseFileName) + consts.SdocsFileSuffix, + // Legacy single-file format. + filepath.Base(f.BaseFileName) + consts.IndexFileSuffix, + // New split format. filepath.Base(f.BaseFileName) + consts.InfoFileSuffix, filepath.Base(f.BaseFileName) + consts.TokenFileSuffix, filepath.Base(f.BaseFileName) + consts.OffsetsFileSuffix, @@ -242,73 +265,117 @@ func (f *Remote) load() error { return err } - if err := f.openIndexFiles(); err != nil { + if err := f.openIndex(); err != nil { return err } - readers := IndexReaders{ + if f.IsLegacy { + (&LegacyLoader{}).Load(&f.blocksData, f.info, f.legacyReader) + f.isLoaded = true + return nil + } + + (&Loader{}).Load(&f.blocksData, f.info, IndexReaders{ Info: f.infoReader, Token: f.tokenReader, Offsets: f.offsetsReader, ID: f.idReader, LID: f.lidReader, - } + }) - (&Loader{}).Load(&f.blocksData, f.info, readers) f.isLoaded = true - return nil } -func (f *Remote) openInfoFile() error { +func (f *Remote) openInfo() error { + if f.IsLegacy { + if f.legacyFile != nil { + return nil + } + + indexName := filepath.Base(f.BaseFileName) + consts.IndexFileSuffix + f.legacyFile = s3.NewReader(f.ctx, f.s3cli, indexName) + + f.legacyReader = storage.NewIndexReader( + f.readLimiter, indexName, + f.legacyFile, f.indexCache.InfoRegistry, + ) + + // infoReader is used by [loadInfo] + f.infoReader = f.legacyReader + return nil + } + if f.infoFile != nil { return nil } - return f.openRemoteFile( - consts.InfoFileSuffix, - func(file storage.ImmutableFile) { - f.infoFile = file - f.infoReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) - }, - ) + + return f.openRemoteFile(consts.InfoFileSuffix, func(file storage.ImmutableFile) { + f.infoFile = file + f.infoReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.InfoRegistry, + ) + }) } -func (f *Remote) openIndexFiles() error { - if err := f.openInfoFile(); err != nil { +func (f *Remote) openIndex() error { + if err := f.openInfo(); err != nil { return err } + + if f.IsLegacy { + return nil + } + if f.tokenFile == nil { if err := f.openRemoteFile(consts.TokenFileSuffix, func(file storage.ImmutableFile) { f.tokenFile = file - f.tokenReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.TokenRegistry) + f.tokenReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.TokenRegistry, + ) }); err != nil { return err } } + if f.offsetsFile == nil { if err := f.openRemoteFile(consts.OffsetsFileSuffix, func(file storage.ImmutableFile) { f.offsetsFile = file - f.offsetsReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.OffsetsRegistry) + f.offsetsReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.OffsetsRegistry, + ) }); err != nil { return err } } + if f.idFile == nil { if err := f.openRemoteFile(consts.IDFileSuffix, func(file storage.ImmutableFile) { f.idFile = file - f.idReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.IDRegistry) + f.idReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.IDRegistry, + ) }); err != nil { return err } } + if f.lidFile == nil { if err := f.openRemoteFile(consts.LIDFileSuffix, func(file storage.ImmutableFile) { f.lidFile = file - f.lidReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.LIDRegistry) + f.lidReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.LIDRegistry, + ) }); err != nil { return err } } + return nil } @@ -317,8 +384,12 @@ func (f *Remote) openRemoteFile(suffix string, assign func(storage.ImmutableFile ok, err := f.s3cli.Exists(f.ctx, name) if err != nil { - return fmt.Errorf("cannot check existence of %q file: %w", suffix, err) + return fmt.Errorf( + "cannot check existence of %q file: %w", + suffix, err, + ) } + if !ok { return fmt.Errorf("missing %q file", suffix) } diff --git a/frac/sealed.go b/frac/sealed.go index b1ec2eb0..c7c92023 100644 --- a/frac/sealed.go +++ b/frac/sealed.go @@ -37,8 +37,8 @@ type Sealed struct { docsCache *cache.Cache[[]byte] docsReader storage.DocsReader - // isLegacy is true for fractions that use the old single .index file format. - isLegacy bool + // IsLegacy is true for fractions that use the old single .index file format. + IsLegacy bool legacyFile *os.File legacyReader storage.IndexReader @@ -94,7 +94,7 @@ func NewSealed( docsCache: docsCache, indexCache: indexCache, - isLegacy: isLegacy, + IsLegacy: isLegacy, info: info, BaseFileName: baseFile, Config: config, @@ -109,46 +109,66 @@ func NewSealed( return f } - f.openInfoFile() - f.info = loadHeader(f.infoReader) - f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName, f.isLegacy) + f.openInfo() + f.info = loadInfo(f.infoReader) + f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName, f.IsLegacy) return f } -func (f *Sealed) openInfoFile() { - if f.isLegacy { - if f.legacyFile == nil { - name := f.BaseFileName + consts.IndexFileSuffix - file, err := os.Open(name) - if err != nil { - logger.Fatal("can't open legacy index file", zap.String("file", name), zap.Error(err)) - } - f.legacyFile = file - f.legacyReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) +func (f *Sealed) openInfo() { + if f.IsLegacy { + if f.legacyFile != nil { + return } - f.infoReader = f.legacyReader // loadHeader uses infoReader - return - } - if f.infoFile == nil { - name := f.BaseFileName + consts.InfoFileSuffix + name := f.BaseFileName + consts.IndexFileSuffix file, err := os.Open(name) if err != nil { - logger.Fatal("can't open info file", zap.String("file", name), zap.Error(err)) + logger.Fatal( + "can't open legacy index file", + zap.String("file", name), + zap.Error(err), + ) } - f.infoFile = file - f.infoReader = storage.NewIndexReader(f.readLimiter, file.Name(), file, f.indexCache.InfoRegistry) + + f.legacyFile = file + f.legacyReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.InfoRegistry, + ) + + // infoReader is used by [loadInfo] + f.infoReader = f.legacyReader + return } -} -func (f *Sealed) openIndexFiles() { - if f.isLegacy { - f.openInfoFile() // opens legacyFile if not already open + if f.infoFile != nil { return } - f.openInfoFile() + name := f.BaseFileName + consts.InfoFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal( + "can't open info file", + zap.String("file", name), + zap.Error(err), + ) + } + + f.infoFile = file + f.infoReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.InfoRegistry, + ) +} + +func (f *Sealed) openIndex() { + f.openInfo() + if f.IsLegacy { + return + } if f.tokenFile == nil { name := f.BaseFileName + consts.TokenFileSuffix @@ -192,20 +212,32 @@ func (f *Sealed) openIndexFiles() { } func (f *Sealed) openDocs() { - if f.docsFile == nil { - var err error - f.docsFile, err = os.Open(f.BaseFileName + consts.SdocsFileSuffix) + if f.docsFile != nil { + return + } + + var err error + f.docsFile, err = os.Open(f.BaseFileName + consts.SdocsFileSuffix) + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + logger.Fatal( + "can't open sdocs file", + zap.String("frac", f.BaseFileName), + zap.Error(err), + ) + } + + f.docsFile, err = os.Open(f.BaseFileName + consts.DocsFileSuffix) if err != nil { - if !errors.Is(err, os.ErrNotExist) { - logger.Fatal("can't open sdocs file", zap.String("frac", f.BaseFileName), zap.Error(err)) - } - f.docsFile, err = os.Open(f.BaseFileName + consts.DocsFileSuffix) - if err != nil { - logger.Fatal("can't open docs file", zap.String("frac", f.BaseFileName), zap.Error(err)) - } + logger.Fatal( + "can't open docs file", + zap.String("frac", f.BaseFileName), + zap.Error(err), + ) } - f.docsReader = storage.NewDocsReader(f.readLimiter, f.docsFile, f.docsCache) } + + f.docsReader = storage.NewDocsReader(f.readLimiter, f.docsFile, f.docsCache) } func NewSealedPreloaded( @@ -240,7 +272,7 @@ func NewSealedPreloaded( }) f.openDocs() - f.openIndexFiles() + f.openIndex() docsCountK := float64(f.info.DocsTotal) / 1000 logger.Info("sealed fraction created from active", @@ -260,37 +292,41 @@ func (f *Sealed) load() { f.loadMu.Lock() defer f.loadMu.Unlock() - if !f.isLoaded { - f.openDocs() - f.openIndexFiles() - - if f.isLegacy { - (&LegacyLoader{}).Load(&f.blocksData, f.info, f.legacyReader) - } else { - readers := IndexReaders{ - Info: f.infoReader, - Token: f.tokenReader, - Offsets: f.offsetsReader, - ID: f.idReader, - LID: f.lidReader, - } - (&Loader{}).Load(&f.blocksData, f.info, readers) - } + if f.isLoaded { + return + } + f.openDocs() + f.openIndex() + + if f.IsLegacy { + (&LegacyLoader{}).Load(&f.blocksData, f.info, f.legacyReader) f.isLoaded = true + return } + + (&Loader{}).Load(&f.blocksData, f.info, IndexReaders{ + Info: f.infoReader, + Token: f.tokenReader, + Offsets: f.offsetsReader, + ID: f.idReader, + LID: f.lidReader, + }) + + f.isLoaded = true } // Offload saves all index files and docs to remote storage. func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) { f.loadMu.Lock() f.openDocs() - f.openIndexFiles() + f.openIndex() f.loadMu.Unlock() g, gctx := errgroup.WithContext(ctx) g.Go(func() error { return u.Upload(gctx, f.docsFile) }) - if f.isLegacy { + + if f.IsLegacy { g.Go(func() error { return u.Upload(gctx, f.legacyFile) }) } else { g.Go(func() error { return u.Upload(gctx, f.infoFile) }) @@ -316,14 +352,30 @@ func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) } func (f *Sealed) Release() { - indexFiles := []*os.File{f.infoFile, f.tokenFile, f.offsetsFile, f.idFile, f.lidFile} - if f.isLegacy { - indexFiles = []*os.File{f.legacyFile} + indexFiles := []*os.File{ + f.docsFile, + f.infoFile, + f.tokenFile, + f.offsetsFile, + f.idFile, + f.lidFile, + } + + if f.IsLegacy { + indexFiles = []*os.File{ + f.docsFile, + f.legacyFile, + } } - for _, file := range append([]*os.File{f.docsFile}, indexFiles...) { + + for _, file := range indexFiles { if file != nil { if err := file.Close(); err != nil { - logger.Error("can't close file", zap.String("file", file.Name()), zap.Error(err)) + logger.Error( + "can't close file", + zap.String("file", file.Name()), + zap.Error(err), + ) } } } @@ -339,13 +391,23 @@ func (f *Sealed) Suicide() { oldPath := f.BaseFileName + consts.DocsFileSuffix newPath := f.BaseFileName + consts.DocsDelFileSuffix if err := os.Rename(oldPath, newPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't rename docs file", zap.String("old", oldPath), zap.String("new", newPath), zap.Error(err)) + logger.Error( + "can't rename docs file", + zap.String("old", oldPath), + zap.String("new", newPath), + zap.Error(err), + ) } oldPath = f.BaseFileName + consts.SdocsFileSuffix newPath = f.BaseFileName + consts.SdocsDelFileSuffix if err := os.Rename(oldPath, newPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't rename sdocs file", zap.String("old", oldPath), zap.String("new", newPath), zap.Error(err)) + logger.Error( + "can't rename sdocs file", + zap.String("old", oldPath), + zap.String("new", newPath), + zap.Error(err), + ) } if f.PartialSuicideMode == HalfRename { @@ -360,17 +422,29 @@ func (f *Sealed) Suicide() { consts.IDFileSuffix, consts.LIDFileSuffix, } - if f.isLegacy { - indexSuffixes = []string{consts.IndexFileSuffix} + + if f.IsLegacy { + indexSuffixes = []string{ + consts.IndexFileSuffix, + } } + for _, suffix := range indexSuffixes { if err := os.Remove(f.BaseFileName + suffix); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't remove index file", zap.String("file", f.BaseFileName+suffix), zap.Error(err)) + logger.Error( + "can't remove index file", + zap.String("file", f.BaseFileName+suffix), + zap.Error(err), + ) } } if err := os.Remove(f.BaseFileName + consts.DocsDelFileSuffix); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't remove docs del file", zap.String("frac", f.BaseFileName), zap.Error(err)) + logger.Error( + "can't remove docs del file", + zap.String("frac", f.BaseFileName), + zap.Error(err), + ) } if f.PartialSuicideMode == HalfRemove { @@ -378,7 +452,11 @@ func (f *Sealed) Suicide() { } if err := os.Remove(f.BaseFileName + consts.SdocsDelFileSuffix); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Error("can't remove sdocs del file", zap.String("frac", f.BaseFileName), zap.Error(err)) + logger.Error( + "can't remove sdocs del file", + zap.String("frac", f.BaseFileName), + zap.Error(err), + ) } f.skipMaskProvider.RemoveFrac(f.info.Name()) @@ -414,7 +492,7 @@ func (f *Sealed) createDataProvider(ctx context.Context) *sealedDataProvider { lidReader := &f.lidReader idReader := &f.idReader - if f.isLegacy { + if f.IsLegacy { tokenReader = &f.legacyReader lidReader = &f.legacyReader idReader = &f.legacyReader @@ -459,7 +537,7 @@ func (f *Sealed) IsIntersecting(from, to seq.MID) bool { return f.info.IsIntersecting(from, to) } -func loadHeader(infoReader storage.IndexReader) *common.Info { +func loadInfo(infoReader storage.IndexReader) *common.Info { block, _, err := infoReader.ReadIndexBlock(0, nil) if err != nil { logger.Fatal("error reading info block", zap.Error(err)) @@ -469,6 +547,7 @@ func loadHeader(infoReader storage.IndexReader) *common.Info { if err := bi.Unpack(block); err != nil { logger.Fatal("error unpacking info block", zap.Error(err)) } + return bi.Info } @@ -481,17 +560,25 @@ func computeIndexOnDisk(basePath string, isLegacy bool) uint64 { consts.IDFileSuffix, consts.LIDFileSuffix, } + if isLegacy { - suffixes = []string{consts.IndexFileSuffix} + suffixes = []string{ + consts.IndexFileSuffix, + } } var total int64 for _, suffix := range suffixes { st, err := os.Stat(basePath + suffix) if err != nil { - logger.Fatal("can't stat index file", zap.String("file", basePath+suffix), zap.Error(err)) + logger.Fatal( + "can't stat index file", + zap.String("file", basePath+suffix), + zap.Error(err), + ) } total += st.Size() } + return uint64(total) } diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index a4c7b074..f91a4f9a 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -173,11 +173,8 @@ func newTokenTableEntry( } // seqBlockID accumulates scalar (ID, position) pairs into sealed ID blocks. -// A new block is yielded every `blockSize` IDs. -func seqBlockID( - ids iter.Seq2[seq.ID, seq.DocPos], - blockSize int, -) iter.Seq[idsSealBlock] { +// A new block is yielded every `blockCapacity` IDs. +func seqBlockID(ids iter.Seq2[seq.ID, seq.DocPos], blockCapacity int) iter.Seq[idsSealBlock] { return func(yield func(idsSealBlock) bool) { var block idsSealBlock @@ -186,7 +183,7 @@ func seqBlockID( block.rids.Values = append(block.rids.Values, uint64(id.RID)) block.params.Values = append(block.params.Values, uint64(pos)) - if len(block.mids.Values) == blockSize { + if len(block.mids.Values) == blockCapacity { if !yield(block) { return } @@ -204,7 +201,7 @@ func seqBlockID( } type lidBlocksAcc struct { - blockCap int + blockCapacity int currentTID uint32 currentBlock lidsSealBlock @@ -213,12 +210,12 @@ type lidBlocksAcc struct { isContinued bool } -func newLIDBlocksAccumulator(blockCap int) *lidBlocksAcc { - a := &lidBlocksAcc{blockCap: blockCap} +func newLIDBlocksAccumulator(blockCapacity int) *lidBlocksAcc { + a := &lidBlocksAcc{blockCapacity: blockCapacity} a.currentBlock.ext.minTID = 1 a.currentBlock.payload = lids.Block{ - LIDs: make([]uint32, 0, blockCap), + LIDs: make([]uint32, 0, blockCapacity), Offsets: []uint32{0}, } @@ -234,7 +231,7 @@ func (a *lidBlocksAcc) Add(lidsbuf []uint32, onBlock func(lidsSealBlock) error) a.currentTID++ for _, lid := range lidsbuf { - if len(a.currentBlock.payload.LIDs) == a.blockCap { + if len(a.currentBlock.payload.LIDs) == a.blockCapacity { if err := onBlock(a.finalizeBlock()); err != nil { return err } diff --git a/frac/sealed/token/table_loader.go b/frac/sealed/token/table_loader.go index a0bf87be..0750de62 100644 --- a/frac/sealed/token/table_loader.go +++ b/frac/sealed/token/table_loader.go @@ -106,6 +106,7 @@ func (l *TableLoader) loadBlocks() ([]TableBlock, error) { tb.Unpack(blockData) blocks = append(blocks, tb) } + return blocks, nil } diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index 588c5fee..28b9ef9f 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -79,8 +79,8 @@ func (l *LegacyLoader) skipSection() { // loadIDs reads the BlockOffsets block and then scans MID/RID/Pos triplets. func (l *LegacyLoader) loadIDs(fracVersion config.BinaryDataVersion) (seqids.Table, []uint64, error) { var buf []byte + data, _, err := l.reader.ReadIndexBlock(l.blockIndex, buf) - l.blockIndex++ if err != nil { return seqids.Table{}, nil, err } @@ -90,6 +90,9 @@ func (l *LegacyLoader) loadIDs(fracVersion config.BinaryDataVersion) (seqids.Tab return seqids.Table{}, nil, err } + // Move to the first block of ID section. + l.blockIndex++ + table := seqids.Table{ StartBlockIndex: l.blockIndex, // absolute index of first MID block in .index IDsTotal: offsets.IDsTotal, diff --git a/fracmanager/frac_manifest.go b/fracmanager/frac_manifest.go index 5b07b8dc..8dc6dc72 100644 --- a/fracmanager/frac_manifest.go +++ b/fracmanager/frac_manifest.go @@ -35,14 +35,6 @@ type fracManifest struct { // Deletion marker file flags hasDocsDel bool // documents deletion marker hasSdocsDel bool // sorted documents deletion marker - - // Temporary file flags - hasInfoTmp bool - hasTokenTmp bool - hasOffsetsTmp bool - hasIDTmp bool - hasLIDTmp bool - hasSdocsTmp bool // temporary sorted documents file } // hasAllIndexFiles reports whether all 5 split index files are present. @@ -83,18 +75,12 @@ func (m *fracManifest) AddExtension(ext string) error { case consts.SdocsDelFileSuffix: m.hasSdocsDel = true - case consts.InfoTmpFileSuffix: - m.hasInfoTmp = true - case consts.TokenTmpFileSuffix: - m.hasTokenTmp = true - case consts.OffsetsTmpFileSuffix: - m.hasOffsetsTmp = true - case consts.IDTmpFileSuffix: - m.hasIDTmp = true - case consts.LIDTmpFileSuffix: - m.hasLIDTmp = true - case consts.SdocsTmpFileSuffix: - m.hasSdocsTmp = true + case consts.IndexTmpFileSuffix, + consts.InfoTmpFileSuffix, consts.TokenTmpFileSuffix, + consts.OffsetsTmpFileSuffix, consts.IDTmpFileSuffix, + consts.LIDTmpFileSuffix, consts.SdocsTmpFileSuffix: + + // Just handle temporary files (which were not commited). default: return fmt.Errorf("unknown fraction file type %s", ext) @@ -192,6 +178,7 @@ func removeDocsDel(m *fracManifest) { func removeIndexTmp(m *fracManifest) { for _, suffix := range []string{ + consts.IndexTmpFileSuffix, consts.InfoTmpFileSuffix, consts.TokenTmpFileSuffix, consts.OffsetsTmpFileSuffix, @@ -200,18 +187,10 @@ func removeIndexTmp(m *fracManifest) { } { util.RemoveFile(m.basePath + suffix) } - m.hasInfoTmp = false - m.hasTokenTmp = false - m.hasOffsetsTmp = false - m.hasIDTmp = false - m.hasLIDTmp = false } func removeSdocsTmp(m *fracManifest) { - if m.hasSdocsTmp { - util.RemoveFile(m.basePath + consts.SdocsTmpFileSuffix) - m.hasSdocsTmp = false - } + util.RemoveFile(m.basePath + consts.SdocsTmpFileSuffix) } // analyzeFiles analyzes fraction files and groups them by fraction ID diff --git a/fracmanager/fraction_provider.go b/fracmanager/fraction_provider.go index 1e9eafec..66e6477b 100644 --- a/fracmanager/fraction_provider.go +++ b/fracmanager/fraction_provider.go @@ -91,7 +91,7 @@ func (fp *fractionProvider) NewSealedPreloaded(name string, preloadedData *seale ) } -func (fp *fractionProvider) NewRemote(ctx context.Context, name string, cachedInfo *common.Info) *frac.Remote { +func (fp *fractionProvider) NewRemote(ctx context.Context, name string, cachedInfo *common.Info, isLegacy bool) *frac.Remote { return frac.NewRemote( ctx, name, @@ -102,6 +102,7 @@ func (fp *fractionProvider) NewRemote(ctx context.Context, name string, cachedIn &fp.config.Fraction, fp.s3cli, fp.skipMaskProvider, + isLegacy, ) } @@ -144,9 +145,11 @@ func (fp *fractionProvider) Offload(ctx context.Context, f *frac.Sealed) (*frac. if err != nil { return nil, err } + if !mustBeOffloaded { return nil, nil } + info := f.Info() - return fp.NewRemote(ctx, info.Path, info), nil + return fp.NewRemote(ctx, info.Path, info, f.IsLegacy), nil } diff --git a/fracmanager/loader.go b/fracmanager/loader.go index 143b6d64..69ff7c02 100644 --- a/fracmanager/loader.go +++ b/fracmanager/loader.go @@ -9,6 +9,7 @@ import ( "go.uber.org/zap" "golang.org/x/sync/errgroup" + "github.com/ozontech/seq-db/consts" "github.com/ozontech/seq-db/frac" "github.com/ozontech/seq-db/logger" ) @@ -138,7 +139,21 @@ func (l *Loader) discover(ctx context.Context) ([]*frac.Active, []*frac.Sealed, case fracStageSealed: locals = append(locals, l.loadSealed(manifest, loadedInfoCache)) case fracStageRemote: - remotes = append(remotes, l.loadRemote(ctx, manifest.basePath, loadedInfoCache)) + // TODO(dkharms): Drop this compatibility check. + + indexName := filepath.Base(manifest.basePath) + consts.IndexFileSuffix + hasIndex, err := l.provider.s3cli.Exists(ctx, indexName) + if err != nil { + logger.Error( + "will skip fraction: cannot check existence of .index file", + zap.String("fraction", filepath.Base(manifest.basePath)), + zap.Error(err), + ) + continue + } + + manifest.hasIndex = hasIndex + remotes = append(remotes, l.loadRemote(ctx, manifest, loadedInfoCache)) default: logger.Error("unexpected fraction stage", zap.Any("manifest", manifest)) } @@ -163,11 +178,11 @@ func (l *Loader) loadSealed(manifest *fracManifest, loadedInfoCache *fracInfoCac } // loadRemote loads a remote fraction -func (l *Loader) loadRemote(ctx context.Context, basePath string, loadedInfoCache *fracInfoCache) *frac.Remote { - info, found := loadedInfoCache.Get(filepath.Base(basePath)) +func (l *Loader) loadRemote(ctx context.Context, manifest *fracManifest, loadedInfoCache *fracInfoCache) *frac.Remote { + info, found := loadedInfoCache.Get(filepath.Base(manifest.basePath)) l.updateStats(found) - f := l.provider.NewRemote(ctx, basePath, info) + f := l.provider.NewRemote(ctx, manifest.basePath, info, manifest.hasIndex) l.infoCache.Add(f.Info()) return f } From 59d563b388a027692bbf99dae6de50a822c2c9c6 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Thu, 2 Apr 2026 11:42:12 +0300 Subject: [PATCH 11/19] fix: fix deletion logic in fractions loader --- consts/consts.go | 3 ++ frac/active.go | 22 ++++---------- frac/active_sealing_source.go | 2 +- frac/sealed/sealing/blocks_builder_test.go | 8 +++-- frac/sealed/sealing/writer.go | 3 +- fracmanager/frac_manifest.go | 35 +++++++++++----------- seq/seq.go | 12 ++++---- 7 files changed, 39 insertions(+), 46 deletions(-) diff --git a/consts/consts.go b/consts/consts.go index fc027f24..8cc1ee75 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -77,6 +77,9 @@ const ( // IndexFileSuffix is the legacy single-file index format (pre-split). IndexFileSuffix = ".index" IndexTmpFileSuffix = "._index" + // TODO(dkharms): [IndexDelFileSuffix] is actually not necessary. + // We can remove it in the future releases. + IndexDelFileSuffix = ".index.del" RemoteFractionSuffix = ".remote" diff --git a/frac/active.go b/frac/active.go index 7c3691c1..91e25c22 100644 --- a/frac/active.go +++ b/frac/active.go @@ -3,7 +3,6 @@ package frac import ( "context" "io" - "math" "os" "path/filepath" "sync" @@ -26,9 +25,7 @@ import ( "github.com/ozontech/seq-db/util" ) -var ( - _ Fraction = (*Active)(nil) -) +var _ Fraction = (*Active)(nil) type Active struct { Config *Config @@ -64,16 +61,6 @@ type Active struct { skipMaskProvider skipMaskProvider } -const ( - systemMID = math.MaxUint64 - systemRID = math.MaxUint64 -) - -var systemSeqID = seq.ID{ - MID: systemMID, - RID: systemRID, -} - func NewActive( baseFileName string, activeIndexer *ActiveIndexer, @@ -116,8 +103,8 @@ func NewActive( } // use of 0 as keys in maps is prohibited – it's system key, so add first element - f.MIDs.Append(systemMID) - f.RIDs.Append(systemRID) + f.MIDs.Append(uint64(seq.SystemMID)) + f.RIDs.Append(uint64(seq.SystemRID)) logger.Info("active fraction created", zap.String("fraction", baseFileName)) @@ -128,7 +115,8 @@ func mustOpenMetaWriter( baseFileName string, readLimiter *storage.ReadLimiter, docsFile *os.File, - docsStats os.FileInfo) (*os.File, *ActiveWriter, *storage.DocBlocksReader, *storage.WalReader, uint64) { + docsStats os.FileInfo, +) (*os.File, *ActiveWriter, *storage.DocBlocksReader, *storage.WalReader, uint64) { legacyMetaFileName := baseFileName + consts.MetaFileSuffix if _, err := os.Stat(legacyMetaFileName); err == nil { diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index e9d814ca..8c960b41 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -223,7 +223,7 @@ func (src *ActiveSealingSource) Docs() iter.Seq2[seq.ID, []byte] { ) for id, pos := range src.ID() { - if id == systemSeqID { + if id == seq.SystemID { curDoc = nil // reserved system document (no payload) } else if id != prev { if curDoc, src.lastErr = src.doc(pos); src.lastErr != nil { diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index e2d3770e..4d32ad2a 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -13,6 +13,8 @@ import ( "github.com/ozontech/seq-db/seq" ) +var _ Source = (*mockSource)(nil) + type mockSource struct { info common.Info tokens [][]byte @@ -25,9 +27,9 @@ type mockSource struct { lastError error } -func (m *mockSource) Info() common.Info { return m.info } +func (m *mockSource) Info() *common.Info { return &m.info } -func (m *mockSource) Iterator() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { +func (m *mockSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { start := 0 for i, field := range m.fields { @@ -128,7 +130,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { lidAccum := newLIDBlocksAccumulator(lidBlockCap) var lidBlocks []lidsSealBlock tokenBlocks := bb.BuildTokenBlocks( - src.Iterator(), + src.TokenTriplet(), func(lids []uint32) error { return lidAccum.Add(lids, func(block lidsSealBlock) error { block.payload.LIDs = slices.Clone(block.payload.LIDs) diff --git a/frac/sealed/sealing/writer.go b/frac/sealed/sealing/writer.go index 5bcfe0f3..c0e9e645 100644 --- a/frac/sealed/sealing/writer.go +++ b/frac/sealed/sealing/writer.go @@ -5,9 +5,8 @@ import ( "encoding/binary" "io" - "go.uber.org/zap" - "github.com/alecthomas/units" + "go.uber.org/zap" "github.com/ozontech/seq-db/bytespool" "github.com/ozontech/seq-db/logger" diff --git a/fracmanager/frac_manifest.go b/fracmanager/frac_manifest.go index 8dc6dc72..a7f8b81c 100644 --- a/fracmanager/frac_manifest.go +++ b/fracmanager/frac_manifest.go @@ -35,6 +35,7 @@ type fracManifest struct { // Deletion marker file flags hasDocsDel bool // documents deletion marker hasSdocsDel bool // sorted documents deletion marker + hasIndexDel bool // index deletion marker } // hasAllIndexFiles reports whether all 5 split index files are present. @@ -74,6 +75,8 @@ func (m *fracManifest) AddExtension(ext string) error { m.hasDocsDel = true case consts.SdocsDelFileSuffix: m.hasSdocsDel = true + case consts.IndexDelFileSuffix: + m.hasIndexDel = true case consts.IndexTmpFileSuffix, consts.InfoTmpFileSuffix, consts.TokenTmpFileSuffix, @@ -81,6 +84,7 @@ func (m *fracManifest) AddExtension(ext string) error { consts.LIDTmpFileSuffix, consts.SdocsTmpFileSuffix: // Just handle temporary files (which were not commited). + // We will just drop them in all possible cases. default: return fmt.Errorf("unknown fraction file type %s", ext) @@ -112,7 +116,7 @@ func (m *fracManifest) Stage() fracStage { if (m.hasMeta || m.hasWal) && m.hasDocs { return fracStageActive } - if m.hasDocsDel || m.hasSdocsDel { + if m.hasDocsDel || m.hasSdocsDel || m.hasIndexDel { return fracStageZombie } return fracStageUnknown @@ -126,7 +130,7 @@ func removeDocs(m *fracManifest) { } func removeSdocs(m *fracManifest) { - if m.hasDocs { + if m.hasSdocs { util.RemoveFile(m.basePath + consts.SdocsFileSuffix) m.hasSdocs = false } @@ -291,23 +295,18 @@ func cleanupTemporary(m *fracManifest) { // Used for cleaning up partially deleted or corrupted fractions func removeAllFiles(basePath string) { for _, suffix := range []string{ - consts.InfoFileSuffix, - consts.TokenFileSuffix, - consts.OffsetsFileSuffix, - consts.IDFileSuffix, - consts.LIDFileSuffix, - consts.IndexFileSuffix, - consts.DocsFileSuffix, - consts.SdocsFileSuffix, + consts.DocsFileSuffix, consts.DocsDelFileSuffix, + consts.SdocsFileSuffix, consts.SdocsDelFileSuffix, consts.SdocsTmpFileSuffix, + consts.IndexFileSuffix, consts.IndexDelFileSuffix, consts.IndexTmpFileSuffix, + + consts.InfoFileSuffix, consts.InfoTmpFileSuffix, + consts.TokenFileSuffix, consts.TokenTmpFileSuffix, + consts.OffsetsFileSuffix, consts.OffsetsTmpFileSuffix, + consts.IDFileSuffix, consts.IDTmpFileSuffix, + consts.LIDFileSuffix, consts.LIDTmpFileSuffix, + consts.MetaFileSuffix, - consts.DocsDelFileSuffix, - consts.SdocsDelFileSuffix, - consts.SdocsTmpFileSuffix, - consts.InfoTmpFileSuffix, - consts.TokenTmpFileSuffix, - consts.OffsetsTmpFileSuffix, - consts.IDTmpFileSuffix, - consts.LIDTmpFileSuffix, + consts.WalFileSuffix, } { util.RemoveFile(basePath + suffix) } diff --git a/seq/seq.go b/seq/seq.go index 64168d16..adae4265 100644 --- a/seq/seq.go +++ b/seq/seq.go @@ -10,16 +10,18 @@ import ( "github.com/ozontech/seq-db/util" ) +var ( + SystemMID MID = math.MaxUint64 + SystemRID RID = math.MaxUint64 + SystemID ID = ID{SystemMID, SystemRID} + SystemDocPos DocPos = DocPos(0) +) + type ID struct { MID MID RID RID } -var ( - SystemID = ID{math.MaxUint64, math.MaxUint64} - SystemDocPos = DocPos(0) -) - type ( MID uint64 // nanoseconds part of ID RID uint64 // random part of ID From 2673097d2b71fa31d5cf5a5261eb75672dcd5dbf Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Wed, 15 Apr 2026 13:20:13 +0300 Subject: [PATCH 12/19] chore: fix merge conflicts --- frac/fraction_test.go | 6 ------ frac/remote.go | 6 ++---- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 8113251a..26488e94 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -2311,9 +2311,6 @@ func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Seal nil, s.config, testSkipMaskProvider{}, - ) - s.config) - s.config, false, ) @@ -2390,9 +2387,6 @@ func (s *RemoteFractionTestSuite) SetupTest() { s.config, s3cli, testSkipMaskProvider{}, - ) - s3cli) - s3cli, false, ) diff --git a/frac/remote.go b/frac/remote.go index c5afa37b..2d8506af 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -98,11 +98,9 @@ func NewRemote( BaseFileName: baseFile, Config: config, - s3cli: s3cli, - + s3cli: s3cli, skipMaskProvider: skipMaskProvider, - s3cli: s3cli, - s3cli: s3cli, + IsLegacy: isLegacy, } From 90be609dffd52fc549c948fec90cd53eaab5b8e3 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Thu, 16 Apr 2026 16:17:10 +0300 Subject: [PATCH 13/19] refactor: return error in iterators --- frac/active_sealing_source.go | 135 ++++++++++++++------- frac/sealed/sealing/blocks_builder.go | 60 ++++----- frac/sealed/sealing/blocks_builder_test.go | 59 ++++----- frac/sealed/sealing/index.go | 22 ++-- frac/sealed/sealing/sealer.go | 114 ++++++++--------- util/pair.go | 6 + 6 files changed, 223 insertions(+), 173 deletions(-) create mode 100644 util/pair.go diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 8c960b41..ad7db7a8 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -22,6 +22,13 @@ import ( "github.com/ozontech/seq-db/util" ) +type ( + Document = util.Pair[seq.ID, []byte] + TokenPosting = util.Pair[[]byte, []uint32] + DocLocation = util.Pair[seq.ID, seq.DocPos] + IndexedDocBlock = util.Pair[[]byte, []seq.DocPos] +) + type ActiveSealingSource struct { params common.SealParams // Sealing parameters @@ -44,8 +51,6 @@ type ActiveSealingSource struct { docPosMap map[seq.ID]seq.DocPos // Original document positions docPosSorted []seq.DocPos // Document positions after sorting docsReader *storage.DocsReader // Document storage reader - - lastErr error // Last error } func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSealingSource, error) { @@ -111,26 +116,30 @@ func sortFields(tl *TokenList) ([]string, map[string][]uint32) { return fields, fieldTid } -func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { - return func(yield func(seq.ID, seq.DocPos) bool) { +func (src *ActiveSealingSource) ID() iter.Seq2[DocLocation, error] { + return func(yield func(DocLocation, error) bool) { mids := src.mids.vals rids := src.rids.vals // System ID and DocPos are not stored in `src.sortedLIDs`. // However we do have to yield them to preserve 1-baseed indexing for ids. - if !yield(seq.SystemID, seq.SystemDocPos) { + dloc := DocLocation{First: seq.SystemID, Second: seq.SystemDocPos} + if !yield(dloc, nil) { return } for i, lid := range src.sortedLIDs { - id := seq.ID{ - MID: seq.MID(mids[lid]), - RID: seq.RID(rids[lid]), + dloc := DocLocation{ + First: seq.ID{ + MID: seq.MID(mids[lid]), + RID: seq.RID(rids[lid]), + }, } // Documents were not sorted previously. if len(src.docPosSorted) == 0 { - if !yield(id, src.docPosMap[id]) { + dloc.Second = src.docPosMap[dloc.First] + if !yield(dloc, nil) { return } continue @@ -138,7 +147,8 @@ func (src *ActiveSealingSource) ID() iter.Seq2[seq.ID, seq.DocPos] { // `i` in range [0; len(src.sortedLIDs)) // but lids indexes are 1-based. - if !yield(id, src.docPosSorted[i+1]) { + dloc.Second = src.docPosSorted[i+1] + if !yield(dloc, nil) { return } } @@ -149,10 +159,6 @@ func (src *ActiveSealingSource) BlockOffsets() []uint64 { return src.blocksOffsets } -func (src *ActiveSealingSource) LastError() error { - return src.lastErr -} - func (src *ActiveSealingSource) prepareInfo() { src.info.MetaOnDisk = 0 src.info.SealingTime = uint64(src.created.UnixMilli()) @@ -174,19 +180,19 @@ func (src *ActiveSealingSource) Info() *common.Info { return src.info } -func (src *ActiveSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { - return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { +func (src *ActiveSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] { + return func(yield func(string, iter.Seq2[TokenPosting, error]) bool) { for _, field := range src.fields { - if !yield(field, src.tokensForField(field)) { + if !yield(field, src.postingsForField(field)) { return } } } } -func (src *ActiveSealingSource) tokensForField(field string) iter.Seq2[[]byte, []uint32] { +func (src *ActiveSealingSource) postingsForField(field string) iter.Seq2[TokenPosting, error] { var lidsbuf []uint32 - return func(yield func([]byte, []uint32) bool) { + return func(yield func(TokenPosting, error) bool) { for _, tid := range src.fieldTid[field] { token := src.tokens[tid] @@ -197,7 +203,8 @@ func (src *ActiveSealingSource) tokensForField(field string) iter.Seq2[[]byte, [ lidsbuf = append(lidsbuf, src.oldToNewLIDs[lid]) } - if !yield(token, lidsbuf) { + tpost := TokenPosting{First: token, Second: lidsbuf} + if !yield(tpost, nil) { return } } @@ -214,24 +221,34 @@ func makeInverser(sortedLIDs []uint32) []uint32 { // Docs returns an iterator for documents with their IDs. // Handles duplicate IDs (for nested indexes). -func (src *ActiveSealingSource) Docs() iter.Seq2[seq.ID, []byte] { - src.lastErr = nil - return func(yield func(seq.ID, []byte) bool) { +func (src *ActiveSealingSource) Docs() iter.Seq2[Document, error] { + return func(yield func(Document, error) bool) { var ( - prev seq.ID - curDoc []byte + curdoc []byte + prev seq.ID = seq.SystemID ) - for id, pos := range src.ID() { - if id == seq.SystemID { - curDoc = nil // reserved system document (no payload) - } else if id != prev { - if curDoc, src.lastErr = src.doc(pos); src.lastErr != nil { + for dloc, err := range src.ID() { + if err != nil { + yield(Document{}, err) + return + } + + id, pos := dloc.First, dloc.Second + + if id != prev { + xcurdoc, xerr := src.doc(pos) + if xerr != nil { + yield(Document{}, xerr) return } + curdoc = xcurdoc } + prev = id - if !yield(id, curDoc) { + doc := Document{First: id, Second: curdoc} + + if !yield(doc, nil) { return } } @@ -244,13 +261,17 @@ func (src *ActiveSealingSource) doc(pos seq.DocPos) ([]byte, error) { blockOffset := src.blocksOffsets[blockIndex] var doc []byte - err := src.docsReader.ReadDocsFunc(blockOffset, []uint64{docOffset}, func(b []byte) error { - doc = b - return nil - }) + err := src.docsReader.ReadDocsFunc( + blockOffset, []uint64{docOffset}, + func(b []byte) error { + doc = b + return nil + }, + ) if err != nil { return nil, err } + return doc, nil } @@ -274,10 +295,10 @@ func (src *ActiveSealingSource) SortDocs() error { // Write blocks and get new offsets and positions blocksOffsets, positions, err := src.writeDocs(blocks, bw) - - if err := util.CollapseErrors([]error{src.lastErr, err}); err != nil { + if err != nil { return err } + if err := bw.Flush(); err != nil { return err } @@ -296,12 +317,15 @@ func (src *ActiveSealingSource) SortDocs() error { if err := sdocsFile.Sync(); err != nil { return err } + if err := sdocsFile.Close(); err != nil { return err } + if err := os.Rename(sdocsFile.Name(), src.info.Path+consts.SdocsFileSuffix); err != nil { return err } + if err := util.SyncPath(filepath.Dir(src.info.Path)); err != nil { return err } @@ -322,32 +346,39 @@ func (src *ActiveSealingSource) SortDocs() error { // writeDocs compresses and writes document blocks, calculating new offsets // and collecting document positions. -func (src *ActiveSealingSource) writeDocs(blocks iter.Seq2[[]byte, []seq.DocPos], w io.Writer) ([]uint64, []seq.DocPos, error) { +func (src *ActiveSealingSource) writeDocs(blocks iter.Seq2[IndexedDocBlock, error], w io.Writer) ([]uint64, []seq.DocPos, error) { offset := 0 buf := make([]byte, 0) blocksOffsets := make([]uint64, 0) allPositions := make([]seq.DocPos, 0, len(src.mids.vals)) // Process each document block - for block, positions := range blocks { - allPositions = append(allPositions, positions...) + for docBlock, err := range blocks { + if err != nil { + return nil, nil, err + } + + allPositions = append(allPositions, docBlock.Second...) blocksOffsets = append(blocksOffsets, uint64(offset)) // Compress document block - buf = storage.CompressDocBlock(block, buf[:0], src.params.DocBlocksZstdLevel) + buf = storage.CompressDocBlock(docBlock.First, buf[:0], src.params.DocBlocksZstdLevel) if _, err := w.Write(buf); err != nil { return nil, nil, err } + offset += len(buf) } + return blocksOffsets, allPositions, nil } // docBlocks groups documents into fixed-size blocks. // Returns an iterator for blocks and corresponding document positions. -func docBlocks(docs iter.Seq2[seq.ID, []byte], blockSize int) iter.Seq2[[]byte, []seq.DocPos] { - return func(yield func([]byte, []seq.DocPos) bool) { +func docBlocks(docs iter.Seq2[Document, error], blockSize int) iter.Seq2[IndexedDocBlock, error] { + return func(yield func(IndexedDocBlock, error) bool) { const defaultBlockSize = 128 * units.KiB + if blockSize <= 0 { blockSize = int(defaultBlockSize) logger.Warn("document block size not specified", zap.Int("default_size", blockSize)) @@ -357,24 +388,34 @@ func docBlocks(docs iter.Seq2[seq.ID, []byte], blockSize int) iter.Seq2[[]byte, prev seq.ID index uint32 // Current block index ) + pos := make([]seq.DocPos, 0) buf := make([]byte, 0, blockSize) // Iterate through documents - for id, doc := range docs { + for doc, err := range docs { + if err != nil { + yield(IndexedDocBlock{}, err) + return + } + + id, doc := doc.First, doc.Second if id == prev { // Duplicate IDs (for nested indexes) - store document once, // but create positions for each LID pos = append(pos, seq.PackDocPos(index, uint64(len(buf)))) continue } + prev = id // If block is full, yield it if len(buf) >= blockSize { - if !yield(buf, pos) { + docBlock := IndexedDocBlock{First: buf, Second: pos} + if !yield(docBlock, nil) { return } + index++ buf = buf[:0] pos = pos[:0] @@ -387,6 +428,8 @@ func docBlocks(docs iter.Seq2[seq.ID, []byte], blockSize int) iter.Seq2[[]byte, buf = binary.LittleEndian.AppendUint32(buf, uint32(len(doc))) buf = append(buf, doc...) } - yield(buf, pos) + + docBlock := IndexedDocBlock{First: buf, Second: pos} + yield(docBlock, nil) } } diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index f91a4f9a..6c295903 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -8,7 +8,11 @@ import ( "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/seqids" "github.com/ozontech/seq-db/frac/sealed/token" - "github.com/ozontech/seq-db/seq" + "github.com/ozontech/seq-db/util" +) + +type ( + TokenBlock = util.Pair[tokensSealBlock, []token.FieldTable] ) // tokensExt represents the token ID range contained in a block. @@ -45,28 +49,13 @@ type idsSealBlock struct { // blocksBuilder constructs sealed blocks from various data sources. // Provides error tracking and consistency validation during block construction. -type blocksBuilder struct { - lastErr error // Last error encountered during processing -} - -// LastError returns the last error encountered during block processing. -func (bb *blocksBuilder) LastError() error { - return bb.lastErr -} +type blocksBuilder struct{} func (bb *blocksBuilder) BuildTokenBlocks( - it iter.Seq2[string, iter.Seq2[[]byte, []uint32]], + it iter.Seq2[string, iter.Seq2[TokenPosting, error]], accumulate func([]uint32) error, blockCapacity int, -) iter.Seq2[tokensSealBlock, []token.FieldTable] { - return func(yield func(tokensSealBlock, []token.FieldTable) bool) { - accumulate := func(lids []uint32) error { - if err := accumulate(lids); err != nil { - bb.lastErr = err - return err - } - return nil - } - +) iter.Seq2[TokenBlock, error] { + return func(yield func(TokenBlock, error) bool) { var ( block tokensSealBlock blockIdx uint32 @@ -97,7 +86,8 @@ func (bb *blocksBuilder) BuildTokenBlocks( emitFieldEntry() block.ext.maxTID = currentTID - if !yield(block, pendingTable) { + pair := TokenBlock{First: block, Second: pendingTable} + if !yield(pair, nil) { return false } @@ -121,7 +111,13 @@ func (bb *blocksBuilder) BuildTokenBlocks( fieldName = field fieldEntryStartTID = currentTID + 1 - for tok, lids := range tokIt { + for pair, err := range tokIt { + if err != nil { + yield(TokenBlock{}, err) + return + } + + tok, tlids := pair.First, pair.Second tokenSize := int(unsafe.Sizeof(uint32(0))) + len(tok) if blockSize > 0 && blockSize+tokenSize > blockCapacity { @@ -134,8 +130,8 @@ func (bb *blocksBuilder) BuildTokenBlocks( block.payload.Payload = binary.LittleEndian.AppendUint32(block.payload.Payload, uint32(len(tok))) block.payload.Payload = append(block.payload.Payload, tok...) - if err := accumulate(lids); err != nil { - bb.lastErr = err + if err := accumulate(tlids); err != nil { + yield(TokenBlock{}, err) return } @@ -174,17 +170,23 @@ func newTokenTableEntry( // seqBlockID accumulates scalar (ID, position) pairs into sealed ID blocks. // A new block is yielded every `blockCapacity` IDs. -func seqBlockID(ids iter.Seq2[seq.ID, seq.DocPos], blockCapacity int) iter.Seq[idsSealBlock] { - return func(yield func(idsSealBlock) bool) { +func seqBlockID(ids iter.Seq2[DocLocation, error], blockCapacity int) iter.Seq2[idsSealBlock, error] { + return func(yield func(idsSealBlock, error) bool) { var block idsSealBlock - for id, pos := range ids { + for pair, err := range ids { + if err != nil { + yield(idsSealBlock{}, err) + return + } + + id, pos := pair.First, pair.Second block.mids.Values = append(block.mids.Values, uint64(id.MID)) block.rids.Values = append(block.rids.Values, uint64(id.RID)) block.params.Values = append(block.params.Values, uint64(pos)) if len(block.mids.Values) == blockCapacity { - if !yield(block) { + if !yield(block, nil) { return } @@ -195,7 +197,7 @@ func seqBlockID(ids iter.Seq2[seq.ID, seq.DocPos], blockCapacity int) iter.Seq[i } if len(block.mids.Values) > 0 { - yield(block) + yield(block, nil) } } } diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index 4d32ad2a..a0d1ff2b 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -24,28 +24,16 @@ type mockSource struct { pos []seq.DocPos tokenLIDs [][]uint32 blocksOffsets []uint64 - lastError error } func (m *mockSource) Info() *common.Info { return &m.info } -func (m *mockSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] { - return func(yield func(string, iter.Seq2[[]byte, []uint32]) bool) { +func (m *mockSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] { + return func(yield func(string, iter.Seq2[TokenPosting, error]) bool) { start := 0 for i, field := range m.fields { end := int(m.fieldMaxTIDs[i]) - tokenStart, tokenEnd := start, end - if !yield(field, func(yield func([]byte, []uint32) bool) { - for j := tokenStart; j < tokenEnd; j++ { - var lidsbuf []uint32 - if j < len(m.tokenLIDs) { - lidsbuf = m.tokenLIDs[j] - } - if !yield(m.tokens[j], lidsbuf) { - return - } - } - }) { + if !yield(field, m.tokensForField(start, end)) { return } start = end @@ -53,24 +41,25 @@ func (m *mockSource) TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32 } } -func (m *mockSource) ID() iter.Seq2[seq.ID, seq.DocPos] { - return func(yield func(seq.ID, seq.DocPos) bool) { - for i, id := range m.ids { - if !yield(id, m.pos[i]) { +func (m *mockSource) tokensForField(start, end int) iter.Seq2[TokenPosting, error] { + return func(yield func(TokenPosting, error) bool) { + for j := start; j < end; j++ { + var lidsbuf []uint32 + if j < len(m.tokenLIDs) { + lidsbuf = m.tokenLIDs[j] + } + pair := TokenPosting{First: m.tokens[j], Second: lidsbuf} + if !yield(pair, nil) { return } } } } -func (m *mockSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { - return func(yield func([]byte, []uint32) bool) { - for i, token := range m.tokens { - var lidsbuf []uint32 - if i < len(m.tokenLIDs) { - lidsbuf = m.tokenLIDs[i] - } - if !yield(token, lidsbuf) { +func (m *mockSource) ID() iter.Seq2[DocLocation, error] { + return func(yield func(DocLocation, error) bool) { + for i, id := range m.ids { + if !yield(DocLocation{First: id, Second: m.pos[i]}, nil) { return } } @@ -78,7 +67,6 @@ func (m *mockSource) TokenAndLIDs() iter.Seq2[[]byte, []uint32] { } func (m *mockSource) BlockOffsets() []uint64 { return m.blocksOffsets } -func (m *mockSource) LastError() error { return m.lastError } func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { src := mockSource{ @@ -150,11 +138,13 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { blockIndex := 0 allFieldsTables := []token.FieldTable{} - for result, fieldsTables := range tokenBlocks { - assert.Equal(t, expectedSizes[blockIndex], result.payload.Len()) - for i := range result.payload.Len() { + for pair, err := range tokenBlocks { + assert.NoError(t, err) + block, fieldsTables := pair.First, pair.Second + assert.Equal(t, expectedSizes[blockIndex], block.payload.Len()) + for i := range block.payload.Len() { tid++ - assert.Equal(t, src.tokens[tid-1], result.payload.GetToken(i)) + assert.Equal(t, src.tokens[tid-1], block.payload.GetToken(i)) } allFieldsTables = append(allFieldsTables, fieldsTables...) blockIndex++ @@ -323,10 +313,13 @@ func TestBlocksBuilder_IDsBlocks(t *testing.T) { i := 0 ids := []seq.ID{} pos := []seq.DocPos{} - for block := range seqBlockID(src.ID(), 3) { + for block, err := range seqBlockID(src.ID(), 3) { + assert.NoError(t, err) + assert.Equal(t, expectedSizes[i], len(block.mids.Values)) assert.Equal(t, expectedSizes[i], len(block.rids.Values)) assert.Equal(t, expectedSizes[i], len(block.params.Values)) + i++ j := 0 for _, mid := range block.mids.Values { diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 6c6d57eb..52c38308 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -11,7 +11,6 @@ import ( "github.com/ozontech/seq-db/frac/sealed/token" "github.com/ozontech/seq-db/seq" "github.com/ozontech/seq-db/storage" - "github.com/ozontech/seq-db/util" "github.com/ozontech/seq-db/zstd" ) @@ -37,8 +36,6 @@ type IndexSealer struct { idsTable seqids.Table lidsTable lids.Table tokenTable token.Table - - lastErr error } func NewIndexSealer(params common.SealParams) *IndexSealer { @@ -93,7 +90,11 @@ func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { } defer w.release() - for block := range seqBlockID(src.ID(), consts.IDsPerBlock) { + for block, err := range seqBlockID(src.ID(), consts.IDsPerBlock) { + if err != nil { + return err + } + if err := w.writeBlock(btypeMid, s.packMIDsBlock(block)); err != nil { return err } @@ -141,15 +142,16 @@ func (s *IndexSealer) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) err }) } - for block, fieldsTables := range bb.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { - if err := tw.writeBlock(btypeToken, s.packTokenBlock(block)); err != nil { + for pair, err := range bb.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { + if err != nil { + return err + } + + if err := tw.writeBlock(btypeToken, s.packTokenBlock(pair.First)); err != nil { return err } - allFieldsTables = append(allFieldsTables, fieldsTables...) - } - if s.lastErr = util.CollapseErrors([]error{src.LastError(), bb.LastError()}); s.lastErr != nil { - return s.lastErr + allFieldsTables = append(allFieldsTables, pair.Second...) } if err := s.finalizeLIDFile(lw, lidacc); err != nil { diff --git a/frac/sealed/sealing/sealer.go b/frac/sealed/sealing/sealer.go index 888f7973..57863d82 100644 --- a/frac/sealed/sealing/sealer.go +++ b/frac/sealed/sealing/sealer.go @@ -13,6 +13,11 @@ import ( "github.com/ozontech/seq-db/util" ) +type ( + DocLocation = util.Pair[seq.ID, seq.DocPos] + TokenPosting = util.Pair[[]byte, []uint32] +) + // Source interface defines the contract for data sources that can be sealed. // Provides access to all necessary data components for index creation type Source interface { @@ -21,7 +26,7 @@ type Source interface { // ID returns an iterator over stored document identifiers paired with // their positions, in descending [seq.ID] order. - ID() iter.Seq2[seq.ID, seq.DocPos] + ID() iter.Seq2[DocLocation, error] // BlockOffsets returns byte offsets to each document block // within this source's `.docs` file. @@ -30,60 +35,7 @@ type Source interface { // TokenTriplet iterates over fields in lexicographic order. // For each field, it yields tokens (lexicographically sorted) // paired with the local document ID list for that token. - TokenTriplet() iter.Seq2[string, iter.Seq2[[]byte, []uint32]] - - // LastError returns the last error encountered during iteration, - // or nil if no error occurred. - LastError() error -} - -func syncAndClose(f *os.File) error { - if err := f.Sync(); err != nil { - f.Close() - return err - } - return f.Close() -} - -func createAndWrite(tmpPath, finalPath string, write func(*os.File) error) error { - f, err := os.Create(tmpPath) - if err != nil { - return err - } - - if err := errors.Join(write(f), syncAndClose(f)); err != nil { - return err - } - - return os.Rename(tmpPath, finalPath) -} - -func createAndWriteBoth( - tmpPath1, finalPath1, - tmpPath2, finalPath2 string, - write func(*os.File, *os.File) error, -) error { - f1, err := os.Create(tmpPath1) - if err != nil { - return err - } - - f2, err := os.Create(tmpPath2) - if err != nil { - f1.Close() - return err - } - - writeErr := write(f1, f2) - if err := errors.Join(writeErr, syncAndClose(f1), syncAndClose(f2)); err != nil { - return err - } - - if err := os.Rename(tmpPath1, finalPath1); err != nil { - return err - } - - return os.Rename(tmpPath2, finalPath2) + TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] } // Seal writes five index files (.info, .token, .offsets, .id, .lid) for the fraction @@ -162,3 +114,55 @@ func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) { return preloaded, nil } + +func syncAndClose(f *os.File) error { + if err := f.Sync(); err != nil { + f.Close() + return err + } + return f.Close() +} + +func createAndWrite( + tmp, final string, + write func(*os.File) error, +) error { + f, err := os.Create(tmp) + if err != nil { + return err + } + + if err := errors.Join(write(f), syncAndClose(f)); err != nil { + return err + } + + return os.Rename(tmp, final) +} + +func createAndWriteBoth( + tmpa, finala, + tmpb, finalb string, + write func(*os.File, *os.File) error, +) error { + a, err := os.Create(tmpa) + if err != nil { + return err + } + + b, err := os.Create(tmpb) + if err != nil { + a.Close() + return err + } + + writeErr := write(a, b) + if err := errors.Join(writeErr, syncAndClose(a), syncAndClose(b)); err != nil { + return err + } + + if err := os.Rename(tmpa, finala); err != nil { + return err + } + + return os.Rename(tmpb, finalb) +} diff --git a/util/pair.go b/util/pair.go new file mode 100644 index 00000000..2930fee9 --- /dev/null +++ b/util/pair.go @@ -0,0 +1,6 @@ +package util + +type Pair[F, S any] struct { + First F + Second S +} From 73b8b6442720ec968d3dfe442698379c4947f3ab Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Thu, 16 Apr 2026 19:05:56 +0300 Subject: [PATCH 14/19] perf: unsafe way to receive lids --- frac/active_lids.go | 7 ++++++- frac/active_sealing_source.go | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/frac/active_lids.go b/frac/active_lids.go index 4875deb8..236136ef 100644 --- a/frac/active_lids.go +++ b/frac/active_lids.go @@ -41,13 +41,18 @@ func (tl *TokenLIDs) GetLIDs(mids, rids *UInt64s) []uint32 { return tl.sorted } +// SortedLIDs returns pre-merged LIDs. +// Only safe to call after the fraction is frozen and lids queue was drained. +func (tl *TokenLIDs) SortedLIDsUnsafe() []uint32 { + return tl.sorted +} + type SeqIDCmp struct { mid []uint64 rid []uint64 } func (c *SeqIDCmp) compare(a, b uint32) int { - midA, midB := c.mid[a], c.mid[b] if midA > midB { diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index ad7db7a8..147fd08a 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -196,7 +196,7 @@ func (src *ActiveSealingSource) postingsForField(field string) iter.Seq2[TokenPo for _, tid := range src.fieldTid[field] { token := src.tokens[tid] - lids := src.lids[tid].GetLIDs(src.mids, src.rids) + lids := src.lids[tid].SortedLIDsUnsafe() lidsbuf = slices.Grow(lidsbuf[:0], len(lids)) for _, lid := range lids { From a9e9ec1c8227edb4b0d1edfff21346b45e5d1268 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Fri, 17 Apr 2026 13:08:29 +0300 Subject: [PATCH 15/19] perf: use linear array for token ids --- frac/active_sealing_source.go | 46 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index 147fd08a..a625da42 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -5,6 +5,7 @@ import ( "encoding/binary" "io" "iter" + "maps" "os" "path/filepath" "slices" @@ -43,10 +44,11 @@ type ActiveSealingSource struct { mids *UInt64s // MIDs rids *UInt64s // RIDs - fields []string // Sorted field names - fieldTid map[string][]uint32 // Each field contains sorted TIDs based on token value - tokens [][]byte // Tokens (values) by TID - lids []*TokenLIDs // LID lists for each token + fields []string // Sorted field names + fieldTids [][]uint32 // Each field contains sorted TIDs based on token value + + tokens [][]byte // Tokens (values) by TID + lids []*TokenLIDs // LID lists for each token docPosMap map[seq.ID]seq.DocPos // Original document positions docPosSorted []seq.DocPos // Document positions after sorting @@ -57,7 +59,7 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe info := *active.info // copy sortedLIDs := active.GetAllDocuments() - fields, fieldTid := sortFields(active.TokenList) + fields, fieldTids := sortFields(active.TokenList) src := ActiveSealingSource{ params: params, @@ -71,10 +73,10 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe mids: active.MIDs, rids: active.RIDs, - fields: fields, - fieldTid: fieldTid, - tokens: active.TokenList.tidToVal, - lids: active.TokenList.tidToLIDs, + fields: fields, + fieldTids: fieldTids, + tokens: active.TokenList.tidToVal, + lids: active.TokenList.tidToLIDs, docPosMap: active.DocsPositions.idToPos, blocksOffsets: active.DocBlocks.vals, @@ -94,26 +96,24 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe return &src, nil } -func sortFields(tl *TokenList) ([]string, map[string][]uint32) { - fields := make([]string, 0, len(tl.FieldTIDs)) - fieldTid := make(map[string][]uint32, len(tl.FieldTIDs)) - - for field, tids := range tl.FieldTIDs { - fields = append(fields, field) +func sortFields(tl *TokenList) ([]string, [][]uint32) { + fields := slices.Collect(maps.Keys(tl.FieldTIDs)) + slices.Sort(fields) + fieldTids := make([][]uint32, len(tl.FieldTIDs)) + for i, field := range fields { // Make a copy because this memory is shared // with concurrent readers (user search queries). - cp := slices.Clone(tids) + cp := slices.Clone(tl.FieldTIDs[field]) slices.SortFunc(cp, func(i, j uint32) int { return bytes.Compare(tl.tidToVal[i], tl.tidToVal[j]) }) - fieldTid[field] = cp + fieldTids[i] = cp } - slices.Sort(fields) - return fields, fieldTid + return fields, fieldTids } func (src *ActiveSealingSource) ID() iter.Seq2[DocLocation, error] { @@ -182,18 +182,18 @@ func (src *ActiveSealingSource) Info() *common.Info { func (src *ActiveSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[TokenPosting, error]] { return func(yield func(string, iter.Seq2[TokenPosting, error]) bool) { - for _, field := range src.fields { - if !yield(field, src.postingsForField(field)) { + for idx, field := range src.fields { + if !yield(field, src.postingsForField(field, idx)) { return } } } } -func (src *ActiveSealingSource) postingsForField(field string) iter.Seq2[TokenPosting, error] { +func (src *ActiveSealingSource) postingsForField(field string, idx int) iter.Seq2[TokenPosting, error] { var lidsbuf []uint32 return func(yield func(TokenPosting, error) bool) { - for _, tid := range src.fieldTid[field] { + for _, tid := range src.fieldTids[idx] { token := src.tokens[tid] lids := src.lids[tid].SortedLIDsUnsafe() From fdd5beddf7236bd203ec68da1d18e943b3312c15 Mon Sep 17 00:00:00 2001 From: Daniil Date: Tue, 5 May 2026 11:39:48 +0300 Subject: [PATCH 16/19] refactor: bump binary version (#414) --- cmd/index_analyzer/main.go | 4 +- config/frac_version.go | 11 +- frac/active_sealing_source.go | 16 +- frac/common/info.go | 8 +- frac/fraction_concurrency_test.go | 20 +- frac/fraction_test.go | 55 +----- frac/index_cache.go | 41 +++- frac/remote.go | 154 ++++++++------- frac/sealed.go | 213 +++++++++++---------- frac/sealed/sealing/blocks_builder.go | 27 ++- frac/sealed/sealing/blocks_builder_test.go | 26 +-- frac/sealed/sealing/index.go | 75 ++------ frac/sealed/sealing/writer.go | 38 ++-- frac/sealed/token/table_loader.go | 69 ++++++- frac/sealed_loader.go | 39 ++-- fracmanager/cache_maintainer.go | 18 +- fracmanager/loader.go | 2 +- go.mod | 2 +- go.sum | 4 +- storage/index_reader.go | 12 ++ storage/s3/reader.go | 11 +- 21 files changed, 458 insertions(+), 387 deletions(-) diff --git a/cmd/index_analyzer/main.go b/cmd/index_analyzer/main.go index 4ea8dd44..e2c1c349 100644 --- a/cmd/index_analyzer/main.go +++ b/cmd/index_analyzer/main.go @@ -4,6 +4,7 @@ import ( "encoding/binary" "fmt" "hash/fnv" + "io" "os" "strings" "sync" @@ -117,13 +118,12 @@ func analyzeIndex( defer tokenFile.Close() defer lidFile.Close() - infoReader := storage.NewIndexReader(rl, infoFile.Name(), infoFile, indexCache.InfoRegistry) tokenReader := storage.NewIndexReader(rl, tokenFile.Name(), tokenFile, indexCache.TokenRegistry) lidReader := storage.NewIndexReader(rl, lidFile.Name(), lidFile, indexCache.LIDRegistry) // --- Info --- var blockIndex uint32 - infoData, _, err := infoReader.ReadIndexBlock(0, nil) + infoData, err := io.ReadAll(infoFile) if err != nil { logger.Fatal("error reading info block", zap.String("file", infoFile.Name()), zap.Error(err)) } diff --git a/config/frac_version.go b/config/frac_version.go index d3ff1b14..ff1283b0 100644 --- a/config/frac_version.go +++ b/config/frac_version.go @@ -5,10 +5,19 @@ type BinaryDataVersion uint16 const ( // BinaryDataV0 - initial version BinaryDataV0 BinaryDataVersion = iota + // BinaryDataV1 - support RIDs encoded without varint BinaryDataV1 + // BinaryDataV2 - MIDs stored in nanoseconds BinaryDataV2 + + // BinariDataV3 - `.index` file is split across several files + // storing specific sections: `.info`, `.offsets`, `.tokens`, `.ids`, `.lids`. + // + // Also in this version we've changed the binary layout of section storing + // info block. As a result we store info as a plain JSON without additional registry. + BinaryDataV3 ) -const CurrentFracVersion = BinaryDataV2 +const CurrentFracVersion = BinaryDataV3 diff --git a/frac/active_sealing_source.go b/frac/active_sealing_source.go index a625da42..e7c451e2 100644 --- a/frac/active_sealing_source.go +++ b/frac/active_sealing_source.go @@ -45,7 +45,7 @@ type ActiveSealingSource struct { rids *UInt64s // RIDs fields []string // Sorted field names - fieldTids [][]uint32 // Each field contains sorted TIDs based on token value + fieldTIDs [][]uint32 // Each field contains sorted TIDs based on token value tokens [][]byte // Tokens (values) by TID lids []*TokenLIDs // LID lists for each token @@ -59,7 +59,7 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe info := *active.info // copy sortedLIDs := active.GetAllDocuments() - fields, fieldTids := sortFields(active.TokenList) + fields, fieldTIDs := sortFields(active.TokenList) src := ActiveSealingSource{ params: params, @@ -74,7 +74,7 @@ func NewActiveSealingSource(active *Active, params common.SealParams) (*ActiveSe rids: active.RIDs, fields: fields, - fieldTids: fieldTids, + fieldTIDs: fieldTIDs, tokens: active.TokenList.tidToVal, lids: active.TokenList.tidToLIDs, @@ -100,7 +100,7 @@ func sortFields(tl *TokenList) ([]string, [][]uint32) { fields := slices.Collect(maps.Keys(tl.FieldTIDs)) slices.Sort(fields) - fieldTids := make([][]uint32, len(tl.FieldTIDs)) + fieldTIDs := make([][]uint32, len(tl.FieldTIDs)) for i, field := range fields { // Make a copy because this memory is shared // with concurrent readers (user search queries). @@ -110,10 +110,10 @@ func sortFields(tl *TokenList) ([]string, [][]uint32) { return bytes.Compare(tl.tidToVal[i], tl.tidToVal[j]) }) - fieldTids[i] = cp + fieldTIDs[i] = cp } - return fields, fieldTids + return fields, fieldTIDs } func (src *ActiveSealingSource) ID() iter.Seq2[DocLocation, error] { @@ -122,7 +122,7 @@ func (src *ActiveSealingSource) ID() iter.Seq2[DocLocation, error] { rids := src.rids.vals // System ID and DocPos are not stored in `src.sortedLIDs`. - // However we do have to yield them to preserve 1-baseed indexing for ids. + // However we do have to yield them to preserve 1-based indexing for ids. dloc := DocLocation{First: seq.SystemID, Second: seq.SystemDocPos} if !yield(dloc, nil) { return @@ -193,7 +193,7 @@ func (src *ActiveSealingSource) TokenTriplet() iter.Seq2[string, iter.Seq2[Token func (src *ActiveSealingSource) postingsForField(field string, idx int) iter.Seq2[TokenPosting, error] { var lidsbuf []uint32 return func(yield func(TokenPosting, error) bool) { - for _, tid := range src.fieldTids[idx] { + for _, tid := range src.fieldTIDs[idx] { token := src.tokens[tid] lids := src.lids[tid].SortedLIDsUnsafe() diff --git a/frac/common/info.go b/frac/common/info.go index 69121408..20e7f7c2 100644 --- a/frac/common/info.go +++ b/frac/common/info.go @@ -15,9 +15,11 @@ import ( "github.com/ozontech/seq-db/seq" ) -const DistributionMaxInterval = 24 * time.Hour -const DistributionBucket = time.Minute -const DistributionSpreadThreshold = 10 * time.Minute +const ( + DistributionMaxInterval = 24 * time.Hour + DistributionBucket = time.Minute + DistributionSpreadThreshold = 10 * time.Minute +) type Info struct { Path string `json:"name"` diff --git a/frac/fraction_concurrency_test.go b/frac/fraction_concurrency_test.go index 138586fd..27f5d971 100644 --- a/frac/fraction_concurrency_test.go +++ b/frac/fraction_concurrency_test.go @@ -16,10 +16,7 @@ import ( "github.com/ozontech/seq-db/cache" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/processor" - "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/sealing" - "github.com/ozontech/seq-db/frac/sealed/seqids" - "github.com/ozontech/seq-db/frac/sealed/token" "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/parser" "github.com/ozontech/seq-db/seq" @@ -353,28 +350,17 @@ func seal(active *Active) (*Sealed, error) { if err != nil { return nil, err } - indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), - InfoRegistry: cache.NewCache[[]byte](nil, nil), - TokenRegistry: cache.NewCache[[]byte](nil, nil), - OffsetsRegistry: cache.NewCache[[]byte](nil, nil), - IDRegistry: cache.NewCache[[]byte](nil, nil), - LIDRegistry: cache.NewCache[[]byte](nil, nil), - } + sealed := NewSealedPreloaded( active.BaseFileName, preloaded, storage.NewReadLimiter(1, nil), - indexCache, + newIndexCache(), cache.NewCache[[]byte](nil, nil), &Config{}, testSkipMaskProvider{}, ) + active.Release() return sealed, nil } diff --git a/frac/fraction_test.go b/frac/fraction_test.go index 26488e94..8757c0db 100644 --- a/frac/fraction_test.go +++ b/frac/fraction_test.go @@ -22,10 +22,7 @@ import ( "github.com/ozontech/seq-db/cache" "github.com/ozontech/seq-db/frac/common" "github.com/ozontech/seq-db/frac/processor" - "github.com/ozontech/seq-db/frac/sealed/lids" "github.com/ozontech/seq-db/frac/sealed/sealing" - "github.com/ozontech/seq-db/frac/sealed/seqids" - "github.com/ozontech/seq-db/frac/sealed/token" "github.com/ozontech/seq-db/indexer" "github.com/ozontech/seq-db/node" "github.com/ozontech/seq-db/parser" @@ -1837,11 +1834,11 @@ func (s *FractionTestSuite) TestFractionInfo() { s.Require().Equal(uint64(0), info.IndexOnDisk, "index on disk doesn't match") case *Sealed: s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value") - s.Require().True(info.IndexOnDisk > uint64(1400) && info.IndexOnDisk < uint64(1600), + s.Require().True(info.IndexOnDisk > uint64(1300) && info.IndexOnDisk < uint64(1400), "index on disk doesn't match. actual value: %d", info.IndexOnDisk) case *Remote: s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value") - s.Require().True(info.IndexOnDisk > uint64(1400) && info.IndexOnDisk < uint64(1600), + s.Require().True(info.IndexOnDisk > uint64(1300) && info.IndexOnDisk < uint64(1400), "index on disk doesn't match. actual value: %d", info.IndexOnDisk) default: s.Require().Fail("unsupported fraction type") @@ -2093,25 +2090,11 @@ func (s *FractionTestSuite) newSealed(bulks ...[]string) *Sealed { preloaded, err := sealing.Seal(activeSealingSource, s.sealParams) s.Require().NoError(err, "Sealing failed") - indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), - InfoRegistry: cache.NewCache[[]byte](nil, nil), - TokenRegistry: cache.NewCache[[]byte](nil, nil), - OffsetsRegistry: cache.NewCache[[]byte](nil, nil), - IDRegistry: cache.NewCache[[]byte](nil, nil), - LIDRegistry: cache.NewCache[[]byte](nil, nil), - } - sealed := NewSealedPreloaded( active.BaseFileName, preloaded, storage.NewReadLimiter(1, nil), - indexCache, + newIndexCache(), cache.NewCache[[]byte](nil, nil), s.config, testSkipMaskProvider{}, @@ -2289,24 +2272,10 @@ func (s *SealedLoadedFractionTestSuite) newSealedLoaded(bulks ...[]string) *Seal sealed := s.newSealed(bulks...) sealed.Release() - indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), - InfoRegistry: cache.NewCache[[]byte](nil, nil), - TokenRegistry: cache.NewCache[[]byte](nil, nil), - OffsetsRegistry: cache.NewCache[[]byte](nil, nil), - IDRegistry: cache.NewCache[[]byte](nil, nil), - LIDRegistry: cache.NewCache[[]byte](nil, nil), - } - sealed = NewSealed( sealed.BaseFileName, storage.NewReadLimiter(1, nil), - indexCache, + newIndexCache(), cache.NewCache[[]byte](nil, nil), nil, s.config, @@ -2363,25 +2332,11 @@ func (s *RemoteFractionTestSuite) SetupTest() { s.Require().NoError(err, "offload failed") s.Require().True(offloaded, "didn't offload frac") - indexCache := &IndexCache{ - MIDs: cache.NewCache[[]byte](nil, nil), - RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), - Params: cache.NewCache[seqids.BlockParams](nil, nil), - LIDs: cache.NewCache[*lids.Block](nil, nil), - Tokens: cache.NewCache[*token.Block](nil, nil), - TokenTable: cache.NewCache[token.Table](nil, nil), - InfoRegistry: cache.NewCache[[]byte](nil, nil), - TokenRegistry: cache.NewCache[[]byte](nil, nil), - OffsetsRegistry: cache.NewCache[[]byte](nil, nil), - IDRegistry: cache.NewCache[[]byte](nil, nil), - LIDRegistry: cache.NewCache[[]byte](nil, nil), - } - remoteFrac := NewRemote( context.Background(), sealed.BaseFileName, storage.NewReadLimiter(1, nil), - indexCache, + newIndexCache(), cache.NewCache[[]byte](nil, nil), sealed.info, s.config, diff --git a/frac/index_cache.go b/frac/index_cache.go index 852fe51f..043e8c5c 100644 --- a/frac/index_cache.go +++ b/frac/index_cache.go @@ -7,33 +7,60 @@ import ( "github.com/ozontech/seq-db/frac/sealed/token" ) +func newIndexCache() *IndexCache { + return &IndexCache{ + LegacyRegistry: cache.NewCache[[]byte](nil, nil), + + TokenRegistry: cache.NewCache[[]byte](nil, nil), + OffsetsRegistry: cache.NewCache[[]byte](nil, nil), + IDRegistry: cache.NewCache[[]byte](nil, nil), + LIDRegistry: cache.NewCache[[]byte](nil, nil), + + MIDs: cache.NewCache[[]byte](nil, nil), + RIDs: cache.NewCache[seqids.BlockRIDs](nil, nil), + Params: cache.NewCache[seqids.BlockParams](nil, nil), + + Tokens: cache.NewCache[*token.Block](nil, nil), + TokenTable: cache.NewCache[token.Table](nil, nil), + LIDs: cache.NewCache[*lids.Block](nil, nil), + } +} + type IndexCache struct { + // Registry cache for legacy sealed fractions. + LegacyRegistry *cache.Cache[[]byte] + // Per-file registry caches (each IndexReader needs its own). - InfoRegistry *cache.Cache[[]byte] TokenRegistry *cache.Cache[[]byte] OffsetsRegistry *cache.Cache[[]byte] IDRegistry *cache.Cache[[]byte] LIDRegistry *cache.Cache[[]byte] // Block-level data caches shared across all readers. - MIDs *cache.Cache[[]byte] - RIDs *cache.Cache[seqids.BlockRIDs] - Params *cache.Cache[seqids.BlockParams] + MIDs *cache.Cache[[]byte] + RIDs *cache.Cache[seqids.BlockRIDs] + Params *cache.Cache[seqids.BlockParams] + Tokens *cache.Cache[*token.Block] TokenTable *cache.Cache[token.Table] - LIDs *cache.Cache[*lids.Block] + + LIDs *cache.Cache[*lids.Block] } func (s *IndexCache) Release() { - s.InfoRegistry.Release() + s.LegacyRegistry.Release() + s.TokenRegistry.Release() s.OffsetsRegistry.Release() s.IDRegistry.Release() s.LIDRegistry.Release() - s.LIDs.Release() + s.MIDs.Release() s.RIDs.Release() s.Params.Release() + s.Tokens.Release() s.TokenTable.Release() + + s.LIDs.Release() } diff --git a/frac/remote.go b/frac/remote.go index 2d8506af..f68e6986 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -55,7 +55,6 @@ type Remote struct { idFile storage.ImmutableFile lidFile storage.ImmutableFile - infoReader storage.IndexReader tokenReader storage.IndexReader offsetsReader storage.IndexReader idReader storage.IndexReader @@ -63,8 +62,8 @@ type Remote struct { indexCache *IndexCache - loadMu *sync.RWMutex - isLoaded bool + initMu *sync.RWMutex + isInited bool blocksData sealed.BlocksData s3cli *s3.Client @@ -88,7 +87,7 @@ func NewRemote( f := &Remote{ ctx: ctx, - loadMu: &sync.RWMutex{}, + initMu: &sync.RWMutex{}, readLimiter: readLimiter, docsCache: docsCache, @@ -116,7 +115,7 @@ func NewRemote( // I wrote a small proposal on how we can reduce impact of such events. // https://github.com/ozontech/seq-db/issues/92 - if err := f.openInfo(); err != nil { + if err := f.loadInfo(); err != nil { logger.Error( "cannot open info file: any subsequent operation will fail", zap.String("fraction", filepath.Base(f.BaseFileName)), @@ -124,7 +123,6 @@ func NewRemote( ) } - f.info = loadInfo(f.infoReader) return f } @@ -163,7 +161,7 @@ func (f *Remote) FindLIDs(ctx context.Context, ids []seq.ID) ([]seq.LID, error) } func (f *Remote) createDataProvider(ctx context.Context) (*sealedDataProvider, error) { - if err := f.load(); err != nil { + if err := f.init(); err != nil { logger.Error( "will create empty data provider: cannot load remote fraction", zap.String("fraction", f.Info().Name()), @@ -193,7 +191,7 @@ func (f *Remote) createDataProvider(ctx context.Context) (*sealedDataProvider, e lidsTable: f.blocksData.LIDsTable, lidsLoader: lids.NewLoader(lidReader, f.indexCache.LIDs), tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, tokenReader, f.indexCache.Tokens), - tokenTableLoader: token.NewTableLoader(f.BaseFileName, tokenReader, f.indexCache.TokenTable), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, f.IsLegacy, tokenReader, f.indexCache.TokenTable), idsTable: &f.blocksData.IDsTable, idsProvider: seqids.NewProvider( @@ -217,6 +215,8 @@ func (f *Remote) IsIntersecting(from, to seq.MID) bool { } func (f *Remote) Suicide() { + // FIXME(dkharms): We need to rename `.remote` file to `._remote` to commit deletion intent. + // Now, we might have fraction leaks in S3 storage since [Suicide] is not atomic. util.MustRemoveFileByPath(f.BaseFileName + consts.RemoteFractionSuffix) f.docsCache.Release() @@ -251,14 +251,28 @@ func (f *Remote) String() string { return fracToString(f, "remote") } -func (f *Remote) load() error { - f.loadMu.Lock() - defer f.loadMu.Unlock() +func (f *Remote) loadInfo() error { + if f.IsLegacy { + if err := f.openInfoLegacy(); err != nil { + return err + } - if f.isLoaded { + f.info = loadInfoLegacy(f.legacyReader) return nil } + if err := f.openInfo(); err != nil { + return err + } + + f.info = loadInfo(f.infoFile) + return nil +} + +func (f *Remote) init() error { + f.initMu.Lock() + defer f.initMu.Unlock() + if err := f.openDocs(); err != nil { return err } @@ -267,54 +281,52 @@ func (f *Remote) load() error { return err } + if f.isInited { + return nil + } + if f.IsLegacy { (&LegacyLoader{}).Load(&f.blocksData, f.info, f.legacyReader) - f.isLoaded = true + f.isInited = true return nil } (&Loader{}).Load(&f.blocksData, f.info, IndexReaders{ - Info: f.infoReader, Token: f.tokenReader, Offsets: f.offsetsReader, ID: f.idReader, LID: f.lidReader, }) - f.isLoaded = true + f.isInited = true return nil } -func (f *Remote) openInfo() error { - if f.IsLegacy { - if f.legacyFile != nil { - return nil - } - - indexName := filepath.Base(f.BaseFileName) + consts.IndexFileSuffix - f.legacyFile = s3.NewReader(f.ctx, f.s3cli, indexName) +func (f *Remote) openInfoLegacy() error { + if f.legacyFile != nil { + return nil + } + return f.openRemoteFile(consts.IndexFileSuffix, func(file storage.ImmutableFile) { + f.legacyFile = file f.legacyReader = storage.NewIndexReader( - f.readLimiter, indexName, - f.legacyFile, f.indexCache.InfoRegistry, + f.readLimiter, file.Name(), + file, f.indexCache.LegacyRegistry, ) + }) +} - // infoReader is used by [loadInfo] - f.infoReader = f.legacyReader - return nil - } - +func (f *Remote) openInfo() error { if f.infoFile != nil { return nil } - return f.openRemoteFile(consts.InfoFileSuffix, func(file storage.ImmutableFile) { - f.infoFile = file - f.infoReader = storage.NewIndexReader( - f.readLimiter, file.Name(), - file, f.indexCache.InfoRegistry, - ) - }) + return f.openRemoteFile( + consts.InfoFileSuffix, + func(file storage.ImmutableFile) { + f.infoFile = file + }, + ) } func (f *Remote) openIndex() error { @@ -327,49 +339,61 @@ func (f *Remote) openIndex() error { } if f.tokenFile == nil { - if err := f.openRemoteFile(consts.TokenFileSuffix, func(file storage.ImmutableFile) { - f.tokenFile = file - f.tokenReader = storage.NewIndexReader( - f.readLimiter, file.Name(), - file, f.indexCache.TokenRegistry, - ) - }); err != nil { + if err := f.openRemoteFile( + consts.TokenFileSuffix, + func(file storage.ImmutableFile) { + f.tokenFile = file + f.tokenReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.TokenRegistry, + ) + }, + ); err != nil { return err } } if f.offsetsFile == nil { - if err := f.openRemoteFile(consts.OffsetsFileSuffix, func(file storage.ImmutableFile) { - f.offsetsFile = file - f.offsetsReader = storage.NewIndexReader( - f.readLimiter, file.Name(), - file, f.indexCache.OffsetsRegistry, - ) - }); err != nil { + if err := f.openRemoteFile( + consts.OffsetsFileSuffix, + func(file storage.ImmutableFile) { + f.offsetsFile = file + f.offsetsReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.OffsetsRegistry, + ) + }, + ); err != nil { return err } } if f.idFile == nil { - if err := f.openRemoteFile(consts.IDFileSuffix, func(file storage.ImmutableFile) { - f.idFile = file - f.idReader = storage.NewIndexReader( - f.readLimiter, file.Name(), - file, f.indexCache.IDRegistry, - ) - }); err != nil { + if err := f.openRemoteFile( + consts.IDFileSuffix, + func(file storage.ImmutableFile) { + f.idFile = file + f.idReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.IDRegistry, + ) + }, + ); err != nil { return err } } if f.lidFile == nil { - if err := f.openRemoteFile(consts.LIDFileSuffix, func(file storage.ImmutableFile) { - f.lidFile = file - f.lidReader = storage.NewIndexReader( - f.readLimiter, file.Name(), - file, f.indexCache.LIDRegistry, - ) - }); err != nil { + if err := f.openRemoteFile( + consts.LIDFileSuffix, + func(file storage.ImmutableFile) { + f.lidFile = file + f.lidReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.LIDRegistry, + ) + }, + ); err != nil { return err } } diff --git a/frac/sealed.go b/frac/sealed.go index c7c92023..5cf7bfa3 100644 --- a/frac/sealed.go +++ b/frac/sealed.go @@ -3,6 +3,7 @@ package frac import ( "context" "errors" + "io" "os" "path/filepath" "sync" @@ -49,17 +50,16 @@ type Sealed struct { idFile *os.File lidFile *os.File - infoReader storage.IndexReader tokenReader storage.IndexReader offsetsReader storage.IndexReader idReader storage.IndexReader lidReader storage.IndexReader + blocksData sealed.BlocksData indexCache *IndexCache - loadMu *sync.RWMutex - isLoaded bool - blocksData sealed.BlocksData + initMu *sync.RWMutex + isInited bool readLimiter *storage.ReadLimiter @@ -88,7 +88,7 @@ func NewSealed( isLegacy bool, ) *Sealed { f := &Sealed{ - loadMu: &sync.RWMutex{}, + initMu: &sync.RWMutex{}, readLimiter: readLimiter, docsCache: docsCache, @@ -109,40 +109,79 @@ func NewSealed( return f } - f.openInfo() - f.info = loadInfo(f.infoReader) + f.loadInfo() f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName, f.IsLegacy) return f } -func (f *Sealed) openInfo() { - if f.IsLegacy { - if f.legacyFile != nil { - return - } +func NewSealedPreloaded( + baseFile string, + preloaded *sealed.PreloadedData, + rl *storage.ReadLimiter, + indexCache *IndexCache, + docsCache *cache.Cache[[]byte], + config *Config, + skipMaskProvider skipMaskProvider, +) *Sealed { + f := &Sealed{ + blocksData: preloaded.BlocksData, + docsCache: docsCache, + indexCache: indexCache, - name := f.BaseFileName + consts.IndexFileSuffix - file, err := os.Open(name) - if err != nil { - logger.Fatal( - "can't open legacy index file", - zap.String("file", name), - zap.Error(err), - ) - } + initMu: &sync.RWMutex{}, + isInited: true, - f.legacyFile = file - f.legacyReader = storage.NewIndexReader( - f.readLimiter, file.Name(), - file, f.indexCache.InfoRegistry, - ) + readLimiter: rl, + + info: preloaded.Info, + BaseFileName: baseFile, + Config: config, + + skipMaskProvider: skipMaskProvider, + } - // infoReader is used by [loadInfo] - f.infoReader = f.legacyReader + // Put token table built during sealing into the cache. + indexCache.TokenTable.Get(token.CacheKeyTable, func() (token.Table, int) { + return preloaded.TokenTable, preloaded.TokenTable.Size() + }) + + docsCountK := float64(f.info.DocsTotal) / 1000 + logger.Info("sealed fraction created from active", + zap.String("frac", f.info.Name()), + util.ZapMsTsAsESTimeStr("creation_time", f.info.CreationTime), + zap.String("from", f.info.From.String()), + zap.String("to", f.info.To.String()), + util.ZapFloat64WithPrec("docs_k", docsCountK, 1), + ) + + f.info.MetaOnDisk = 0 + return f +} + +func (f *Sealed) openInfoLegacy() { + if f.legacyFile != nil { return } + name := f.BaseFileName + consts.IndexFileSuffix + file, err := os.Open(name) + if err != nil { + logger.Fatal( + "can't open legacy index file", + zap.String("file", name), + zap.Error(err), + ) + } + + f.legacyFile = file + f.legacyReader = storage.NewIndexReader( + f.readLimiter, file.Name(), + file, f.indexCache.LegacyRegistry, + ) +} + +func (f *Sealed) openInfo() { if f.infoFile != nil { return } @@ -158,18 +197,17 @@ func (f *Sealed) openInfo() { } f.infoFile = file - f.infoReader = storage.NewIndexReader( - f.readLimiter, file.Name(), - file, f.indexCache.InfoRegistry, - ) } func (f *Sealed) openIndex() { - f.openInfo() if f.IsLegacy { + // We have exactly one `.index` file for legacy sealed fractions. + // So opening only this file is sufficient. + f.openInfoLegacy() return } + f.openInfo() if f.tokenFile == nil { name := f.BaseFileName + consts.TokenFileSuffix file, err := os.Open(name) @@ -240,88 +278,47 @@ func (f *Sealed) openDocs() { f.docsReader = storage.NewDocsReader(f.readLimiter, f.docsFile, f.docsCache) } -func NewSealedPreloaded( - baseFile string, - preloaded *sealed.PreloadedData, - rl *storage.ReadLimiter, - indexCache *IndexCache, - docsCache *cache.Cache[[]byte], - config *Config, - skipMaskProvider skipMaskProvider, -) *Sealed { - f := &Sealed{ - blocksData: preloaded.BlocksData, - docsCache: docsCache, - indexCache: indexCache, - - loadMu: &sync.RWMutex{}, - isLoaded: true, - - readLimiter: rl, - - info: preloaded.Info, - BaseFileName: baseFile, - Config: config, - - skipMaskProvider: skipMaskProvider, +func (f *Sealed) loadInfo() { + if f.IsLegacy { + f.openInfoLegacy() + f.info = loadInfoLegacy(f.legacyReader) + return } - // Put token table built during sealing into the cache. - indexCache.TokenTable.Get(token.CacheKeyTable, func() (token.Table, int) { - return preloaded.TokenTable, preloaded.TokenTable.Size() - }) + f.openInfo() + f.info = loadInfo(f.infoFile) +} + +func (f *Sealed) init(full bool) { + f.initMu.Lock() + defer f.initMu.Unlock() f.openDocs() f.openIndex() - docsCountK := float64(f.info.DocsTotal) / 1000 - logger.Info("sealed fraction created from active", - zap.String("frac", f.info.Name()), - util.ZapMsTsAsESTimeStr("creation_time", f.info.CreationTime), - zap.String("from", f.info.From.String()), - zap.String("to", f.info.To.String()), - util.ZapFloat64WithPrec("docs_k", docsCountK, 1), - ) - - f.info.MetaOnDisk = 0 - - return f -} - -func (f *Sealed) load() { - f.loadMu.Lock() - defer f.loadMu.Unlock() - - if f.isLoaded { + if f.isInited || !full { return } - f.openDocs() - f.openIndex() - if f.IsLegacy { (&LegacyLoader{}).Load(&f.blocksData, f.info, f.legacyReader) - f.isLoaded = true + f.isInited = true return } (&Loader{}).Load(&f.blocksData, f.info, IndexReaders{ - Info: f.infoReader, Token: f.tokenReader, Offsets: f.offsetsReader, ID: f.idReader, LID: f.lidReader, }) - f.isLoaded = true + f.isInited = true } // Offload saves all index files and docs to remote storage. func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) { - f.loadMu.Lock() - f.openDocs() - f.openIndex() - f.loadMu.Unlock() + f.init(false) g, gctx := errgroup.WithContext(ctx) g.Go(func() error { return u.Upload(gctx, f.docsFile) }) @@ -352,6 +349,8 @@ func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error) } func (f *Sealed) Release() { + f.init(false) + indexFiles := []*os.File{ f.docsFile, f.infoFile, @@ -386,7 +385,6 @@ func (f *Sealed) Release() { func (f *Sealed) Suicide() { f.Release() - // Rename docs atomically first — this commits the intent to delete. oldPath := f.BaseFileName + consts.DocsFileSuffix newPath := f.BaseFileName + consts.DocsDelFileSuffix @@ -486,7 +484,7 @@ func (f *Sealed) FindLIDs(ctx context.Context, ids []seq.ID) ([]seq.LID, error) } func (f *Sealed) createDataProvider(ctx context.Context) *sealedDataProvider { - f.load() + f.init(true) tokenReader := &f.tokenReader lidReader := &f.lidReader @@ -509,7 +507,7 @@ func (f *Sealed) createDataProvider(ctx context.Context) *sealedDataProvider { lidsTable: f.blocksData.LIDsTable, lidsLoader: lids.NewLoader(lidReader, f.indexCache.LIDs), tokenBlockLoader: token.NewBlockLoader(f.BaseFileName, tokenReader, f.indexCache.Tokens), - tokenTableLoader: token.NewTableLoader(f.BaseFileName, tokenReader, f.indexCache.TokenTable), + tokenTableLoader: token.NewTableLoader(f.BaseFileName, f.IsLegacy, tokenReader, f.indexCache.TokenTable), idsTable: &f.blocksData.IDsTable, idsProvider: seqids.NewProvider( @@ -537,15 +535,38 @@ func (f *Sealed) IsIntersecting(from, to seq.MID) bool { return f.info.IsIntersecting(from, to) } -func loadInfo(infoReader storage.IndexReader) *common.Info { +func loadInfoLegacy(infoReader storage.IndexReader) *common.Info { block, _, err := infoReader.ReadIndexBlock(0, nil) if err != nil { - logger.Fatal("error reading info block", zap.Error(err)) + logger.Fatal("cannot read info block", zap.Error(err)) + } + + var bi sealed.BlockInfo + if err := bi.Unpack(block); err != nil { + logger.Fatal("cannot unpack info block", zap.Error(err)) + } + + return bi.Info +} + +func loadInfo(r interface { + io.ReaderAt + Stat() (os.FileInfo, error) +}, +) *common.Info { + stat, err := r.Stat() + if err != nil { + logger.Fatal("cannot stat info file", zap.Error(err)) + } + + block := make([]byte, stat.Size()) + if _, err := r.ReadAt(block, io.SeekStart); err != nil { + logger.Fatal("cannot read info block", zap.Error(err)) } var bi sealed.BlockInfo if err := bi.Unpack(block); err != nil { - logger.Fatal("error unpacking info block", zap.Error(err)) + logger.Fatal("cannot unpack info block", zap.Error(err)) } return bi.Info diff --git a/frac/sealed/sealing/blocks_builder.go b/frac/sealed/sealing/blocks_builder.go index 6c295903..fc069cbf 100644 --- a/frac/sealed/sealing/blocks_builder.go +++ b/frac/sealed/sealing/blocks_builder.go @@ -105,13 +105,13 @@ func (bb *blocksBuilder) BuildTokenBlocks( } block.ext.minTID = 1 - for field, tokIt := range it { + for field, tokenIterator := range it { emitFieldEntry() fieldName = field fieldEntryStartTID = currentTID + 1 - for pair, err := range tokIt { + for pair, err := range tokenIterator { if err != nil { yield(TokenBlock{}, err) return @@ -202,8 +202,9 @@ func seqBlockID(ids iter.Seq2[DocLocation, error], blockCapacity int) iter.Seq2[ } } -type lidBlocksAcc struct { +type lidAccumulator struct { blockCapacity int + onBlock func(lidsSealBlock) error currentTID uint32 currentBlock lidsSealBlock @@ -212,8 +213,14 @@ type lidBlocksAcc struct { isContinued bool } -func newLIDBlocksAccumulator(blockCapacity int) *lidBlocksAcc { - a := &lidBlocksAcc{blockCapacity: blockCapacity} +func newLIDAccumulator( + blockCapacity int, + onBlock func(lidsSealBlock) error, +) *lidAccumulator { + a := &lidAccumulator{ + blockCapacity: blockCapacity, + onBlock: onBlock, + } a.currentBlock.ext.minTID = 1 a.currentBlock.payload = lids.Block{ @@ -229,12 +236,12 @@ func newLIDBlocksAccumulator(blockCapacity int) *lidBlocksAcc { // For each block that fills up, `onBlock` is called immediately // before the backing arrays are reset, so `onBlock` may read the // block data but must not retain references to it. -func (a *lidBlocksAcc) Add(lidsbuf []uint32, onBlock func(lidsSealBlock) error) error { +func (a *lidAccumulator) Add(lidsbuf []uint32) error { a.currentTID++ for _, lid := range lidsbuf { if len(a.currentBlock.payload.LIDs) == a.blockCapacity { - if err := onBlock(a.finalizeBlock()); err != nil { + if err := a.onBlock(a.finalizeBlock()); err != nil { return err } @@ -257,11 +264,11 @@ func (a *lidBlocksAcc) Add(lidsbuf []uint32, onBlock func(lidsSealBlock) error) return nil } -func (a *lidBlocksAcc) Flush() lidsSealBlock { - return a.finalizeBlock() +func (a *lidAccumulator) Finalize() error { + return a.onBlock(a.finalizeBlock()) } -func (a *lidBlocksAcc) finalizeBlock() lidsSealBlock { +func (a *lidAccumulator) finalizeBlock() lidsSealBlock { if !a.isEndOfToken { a.currentBlock.payload.Offsets = append( a.currentBlock.payload.Offsets, diff --git a/frac/sealed/sealing/blocks_builder_test.go b/frac/sealed/sealing/blocks_builder_test.go index a0d1ff2b..d6bca144 100644 --- a/frac/sealed/sealing/blocks_builder_test.go +++ b/frac/sealed/sealing/blocks_builder_test.go @@ -114,18 +114,22 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { const blockSize = 24 const lidBlockCap = 3 - var bb blocksBuilder - lidAccum := newLIDBlocksAccumulator(lidBlockCap) var lidBlocks []lidsSealBlock + lidAccumulator := newLIDAccumulator( + lidBlockCap, + func(block lidsSealBlock) error { + block.payload.LIDs = slices.Clone(block.payload.LIDs) + block.payload.Offsets = slices.Clone(block.payload.Offsets) + lidBlocks = append(lidBlocks, block) + return nil + }, + ) + + var bb blocksBuilder tokenBlocks := bb.BuildTokenBlocks( src.TokenTriplet(), func(lids []uint32) error { - return lidAccum.Add(lids, func(block lidsSealBlock) error { - block.payload.LIDs = slices.Clone(block.payload.LIDs) - block.payload.Offsets = slices.Clone(block.payload.Offsets) - lidBlocks = append(lidBlocks, block) - return nil - }) + return lidAccumulator.Add(lids) }, blockSize, ) @@ -245,11 +249,7 @@ func TestBlocksBuilder_BuildTokenBlocks(t *testing.T) { }, } assert.Equal(t, actualTokenTable.FieldsTables, expectedTokenTable.FieldsTables) - - finalBlock := lidAccum.Flush() - finalBlock.payload.LIDs = slices.Clone(finalBlock.payload.LIDs) - finalBlock.payload.Offsets = slices.Clone(finalBlock.payload.Offsets) - lidBlocks = append(lidBlocks, finalBlock) + assert.NoError(t, lidAccumulator.Finalize()) expectedLIDBlocks := []lidsSealBlock{ { diff --git a/frac/sealed/sealing/index.go b/frac/sealed/sealing/index.go index 52c38308..5c23842a 100644 --- a/frac/sealed/sealing/index.go +++ b/frac/sealed/sealing/index.go @@ -71,12 +71,7 @@ func (s *IndexSealer) WriteOffsetsFile(ws io.WriteSeeker, src Source) error { Offsets: src.BlockOffsets(), } - if err := w.writeBlock(btypeOffset, s.packBlocksOffsetsBlock(offsets)); err != nil { - return err - } - - // Emit trailing separator. - if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { + if err := w.writeBlock(blockTypeOffset, s.packBlocksOffsetsBlock(offsets)); err != nil { return err } @@ -95,24 +90,19 @@ func (s *IndexSealer) WriteIDFile(ws io.WriteSeeker, src Source) error { return err } - if err := w.writeBlock(btypeMid, s.packMIDsBlock(block)); err != nil { + if err := w.writeBlock(blockTypeMID, s.packMIDsBlock(block)); err != nil { return err } - if err := w.writeBlock(btypeRid, s.packRIDsBlock(block)); err != nil { + if err := w.writeBlock(blockTypeRID, s.packRIDsBlock(block)); err != nil { return err } - if err := w.writeBlock(btypeDocPos, s.packPosBlock(block)); err != nil { + if err := w.writeBlock(blockTypeDocPos, s.packPosBlock(block)); err != nil { return err } } - // Emit trailing separator. - if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { - return err - } - return w.finalize() } @@ -132,42 +122,36 @@ func (s *IndexSealer) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) err var ( bb blocksBuilder allFieldsTables []token.FieldTable - lidacc = newLIDBlocksAccumulator(consts.LIDBlockCap) ) - // NOTE(dkharms): This is so ugly but I cannot come up with other solution here. - accumulate := func(lids []uint32) error { - return lidacc.Add(lids, func(block lidsSealBlock) error { - return lw.writeBlock(btypeLid, s.packLIDsBlock(block)) - }) - } + lidAccumulator := newLIDAccumulator( + consts.LIDBlockCap, + func(block lidsSealBlock) error { + return lw.writeBlock(blockTypeLID, s.packLIDsBlock(block)) + }, + ) - for pair, err := range bb.BuildTokenBlocks(src.TokenTriplet(), accumulate, consts.RegularBlockSize) { + for pair, err := range bb.BuildTokenBlocks(src.TokenTriplet(), lidAccumulator.Add, consts.RegularBlockSize) { if err != nil { return err } - if err := tw.writeBlock(btypeToken, s.packTokenBlock(pair.First)); err != nil { + if err := tw.writeBlock(blockTypeToken, s.packTokenBlock(pair.First)); err != nil { return err } allFieldsTables = append(allFieldsTables, pair.Second...) } - if err := s.finalizeLIDFile(lw, lidacc); err != nil { + if err := s.finalizeLIDFile(lw, lidAccumulator); err != nil { return err } return s.finalizeTokenFile(tw, allFieldsTables) } -func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccum *lidBlocksAcc) error { - if err := w.writeBlock(btypeLid, s.packLIDsBlock(lidAccum.Flush())); err != nil { - return err - } - - // Emit trailing separator. - if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { +func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccumulator *lidAccumulator) error { + if err := lidAccumulator.Finalize(); err != nil { return err } @@ -176,41 +160,22 @@ func (s *IndexSealer) finalizeLIDFile(w *writer, lidAccum *lidBlocksAcc) error { func (s *IndexSealer) finalizeTokenFile(w *writer, allFieldsTables []token.FieldTable) error { // Emit section separator. - if err := w.writeBlock(btypeToken, indexBlock{}); err != nil { + if err := w.writeEmptyBlock(); err != nil { return err } tokenTableBlock := token.TableBlock{FieldsTables: collapseOrderedFieldsTables(allFieldsTables)} - if err := w.writeBlock(btypeTokenTable, s.packTokenTableBlock(tokenTableBlock)); err != nil { - return err - } - - // Emit trailing separator. - if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { + if err := w.writeBlock(blockTypeTokenTable, s.packTokenTableBlock(tokenTableBlock)); err != nil { return err } return w.finalize() } -func (s *IndexSealer) WriteInfoFile(ws io.WriteSeeker, src Source) error { - w, err := newWriter(ws) - if err != nil { - return err - } - defer w.release() - +func (s *IndexSealer) WriteInfoFile(ws io.Writer, src Source) error { block := sealed.BlockInfo{Info: src.Info()} - if err := w.writeBlock(btypeInfo, s.packInfoBlock(block)); err != nil { - return err - } - - // Emit trailing separator. - if err := w.writeBlock(btypeBlackhole, indexBlock{}); err != nil { - return err - } - - return w.finalize() + _, err := ws.Write(s.packInfoBlock(block).payload) + return err } // collapseOrderedFieldsTables merges FieldTables with the same field name. diff --git a/frac/sealed/sealing/writer.go b/frac/sealed/sealing/writer.go index c0e9e645..1a147e4e 100644 --- a/frac/sealed/sealing/writer.go +++ b/frac/sealed/sealing/writer.go @@ -16,15 +16,17 @@ import ( const prefixSize = 16 const ( - btypeInfo = "info" - btypeOffset = "offset" - btypeToken = "token" - btypeTokenTable = "token-table" - btypeMid = "mid" - btypeRid = "rid" - btypeDocPos = "doc-pos" - btypeLid = "lid" - btypeBlackhole = "blackhole" + blockTypeInfo = "info" + blockTypeOffset = "offset" + + blockTypeToken = "token" + blockTypeTokenTable = "token-table" + + blockTypeMID = "mid" + blockTypeRID = "rid" + blockTypeDocPos = "doc-pos" + + blockTypeLID = "lid" ) // writer writes blocks incrementally to a single file using the @@ -76,13 +78,11 @@ func (w *writer) writeBlock(btype string, block indexBlock) error { return err } - if btype != btypeBlackhole { - w.stats[btype] = blockstat{ - count: w.stats[btype].count + 1, - raw: w.stats[btype].raw + int(block.rawLen), - compressed: w.stats[btype].compressed + len(block.payload), - header: w.stats[btype].header + len(header), - } + w.stats[btype] = blockstat{ + count: w.stats[btype].count + 1, + raw: w.stats[btype].raw + int(block.rawLen), + compressed: w.stats[btype].compressed + len(block.payload), + header: w.stats[btype].header + len(header), } w.wheader.Write(header) @@ -91,6 +91,12 @@ func (w *writer) writeBlock(btype string, block indexBlock) error { return nil } +func (w *writer) writeEmptyBlock() error { + header, _ := indexBlock{}.Bin(int64(w.pos)) + w.wheader.Write(header) + return nil +} + func (w *writer) finalize() error { if err := w.wpayload.Flush(); err != nil { return err diff --git a/frac/sealed/token/table_loader.go b/frac/sealed/token/table_loader.go index 0750de62..cd04830b 100644 --- a/frac/sealed/token/table_loader.go +++ b/frac/sealed/token/table_loader.go @@ -17,15 +17,24 @@ const CacheKeyTable = 1 type TableLoader struct { fracName string - reader *storage.IndexReader - cache *cache.Cache[Table] - i uint32 - buf []byte + isLegacy bool + + reader *storage.IndexReader + cache *cache.Cache[Table] + + i uint32 + buf []byte } -func NewTableLoader(fracName string, reader *storage.IndexReader, c *cache.Cache[Table]) *TableLoader { +func NewTableLoader( + fracName string, + isLegacy bool, + reader *storage.IndexReader, + c *cache.Cache[Table], +) *TableLoader { return &TableLoader{ fracName: fracName, + isLegacy: isLegacy, reader: reader, cache: c, } @@ -33,10 +42,21 @@ func NewTableLoader(fracName string, reader *storage.IndexReader, c *cache.Cache func (l *TableLoader) Load() Table { table, err := l.cache.GetWithError(CacheKeyTable, func() (Table, int, error) { - blocks, err := l.loadBlocks() + var ( + blocks []TableBlock + err error + ) + + if l.isLegacy { + blocks, err = l.loadBlocksLegacy() + } else { + blocks, err = l.loadBlocks() + } + if err != nil { return nil, 0, err } + table := TableFromBlocks(blocks) return table, table.Size(), nil }) @@ -45,6 +65,7 @@ func (l *TableLoader) Load() Table { zap.String("frac", l.fracName), zap.Error(err)) } + return table } @@ -92,9 +113,11 @@ func (l *TableLoader) readBlock() ([]byte, error) { return block, err } -func (l *TableLoader) loadBlocks() ([]TableBlock, error) { - l.i = 0 - for h := l.readHeader(); h.Len() > 0; h = l.readHeader() { // skip token blocks, go for token table +func (l *TableLoader) loadBlocksLegacy() ([]TableBlock, error) { + l.i = 1 // Skip info block immediately. + + for h := l.readHeader(); h.Len() > 0; h = l.readHeader() { + // Skip token blocks, go for token table. } blocks := make([]TableBlock, 0) @@ -110,6 +133,34 @@ func (l *TableLoader) loadBlocks() ([]TableBlock, error) { return blocks, nil } +func (l *TableLoader) loadBlocks() ([]TableBlock, error) { + l.i = 0 + + blocksCount, err := l.reader.BlocksCount() + if err != nil { + return nil, err + } + + for h := l.readHeader(); h.Len() > 0; h = l.readHeader() { + // Skip token blocks, go for token table. + } + + var blocks []TableBlock + for l.i < uint32(blocksCount) { + data, err := l.readBlock() + if err != nil { + return nil, err + } + + var tb TableBlock + tb.Unpack(data) + + blocks = append(blocks, tb) + } + + return blocks, nil +} + // TableBlock represents how token.Table is stored on disk type TableBlock struct { FieldsTables []FieldTable diff --git a/frac/sealed_loader.go b/frac/sealed_loader.go index 28b9ef9f..893b75a4 100644 --- a/frac/sealed_loader.go +++ b/frac/sealed_loader.go @@ -41,7 +41,7 @@ func (l *LegacyLoader) Load(blocksData *sealed.BlocksData, info *common.Info, re logger.Fatal("legacy load ids error", zap.Error(err)) } - blocksData.LIDsTable, err = l.loadLIDs() + blocksData.LIDsTable, err = l.loadLIDsTable() if err != nil { logger.Fatal("legacy load lids error", zap.Error(err)) } @@ -126,8 +126,8 @@ func (l *LegacyLoader) loadIDs(fracVersion config.BinaryDataVersion) (seqids.Tab return table, offsets.Offsets, nil } -// loadLIDs scans LID block headers, recording the absolute start index for lids.Table. -func (l *LegacyLoader) loadLIDs() (*lids.Table, error) { +// loadLIDsTable scans LID block headers, recording the absolute start index for lids.Table. +func (l *LegacyLoader) loadLIDsTable() (*lids.Table, error) { startIndex := l.blockIndex // absolute index of first LID block in .index var ( @@ -158,7 +158,6 @@ func (l *LegacyLoader) loadLIDs() (*lids.Table, error) { // IndexReaders holds one IndexReader per split index file. type IndexReaders struct { - Info storage.IndexReader Token storage.IndexReader Offsets storage.IndexReader ID storage.IndexReader @@ -234,14 +233,19 @@ func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersio IDsTotal: idsTotal, } - for blockIdx := uint32(0); ; { - header, err := r.GetBlockHeader(blockIdx) + blocksCount, err := r.BlocksCount() + if err != nil { + logger.Fatal( + "cannot get block count", + zap.Error(err), + ) + } + + for blockIdx := 0; blockIdx < blocksCount; blockIdx += 3 { + header, err := r.GetBlockHeader(uint32(blockIdx)) if err != nil { logger.Fatal("error reading id block header", zap.Error(err)) } - if header.Len() == 0 { // separator - break - } var mid seq.MID if fracVersion < config.BinaryDataV2 { @@ -256,7 +260,6 @@ func (l *Loader) loadIDsTable(r storage.IndexReader, idsTotal uint32, fracVersio }) table.IDBlocksTotal++ - blockIdx += 3 // skip RIDs and Pos blocks } return table @@ -270,16 +273,20 @@ func (l *Loader) loadLIDsTable(r storage.IndexReader) (*lids.Table, error) { isContinued []bool ) - for blockIdx := uint32(0); ; blockIdx++ { - header, err := r.GetBlockHeader(blockIdx) + blocksCount, err := r.BlocksCount() + if err != nil { + logger.Fatal( + "cannot get block count", + zap.Error(err), + ) + } + + for blockIdx := 0; blockIdx < blocksCount; blockIdx++ { + header, err := r.GetBlockHeader(uint32(blockIdx)) if err != nil { return nil, err } - if header.Len() == 0 { - break - } - ext2 := header.GetExt2() maxTIDs = append(maxTIDs, uint32(ext2>>32)) minTIDs = append(minTIDs, uint32(ext2&0xFFFFFFFF)) diff --git a/fracmanager/cache_maintainer.go b/fracmanager/cache_maintainer.go index 2a6ac6dd..0229a06b 100644 --- a/fracmanager/cache_maintainer.go +++ b/fracmanager/cache_maintainer.go @@ -143,18 +143,22 @@ func (cm *CacheMaintainer) CreateSortDocsCache() *cache.Cache[[]byte] { func (cm *CacheMaintainer) CreateIndexCache() *frac.IndexCache { return &frac.IndexCache{ - MIDs: newCache[[]byte](cm, midsName), - RIDs: newCache[seqids.BlockRIDs](cm, ridsName), - Params: newCache[seqids.BlockParams](cm, paramsName), - LIDs: newCache[*lids.Block](cm, lidsName), - Tokens: newCache[*token.Block](cm, tokensName), - TokenTable: newCache[token.Table](cm, tokenTableName), + LegacyRegistry: newCache[[]byte](cm, indexName), + // Each index file gets its own registry cache (they all use key=1 internally). - InfoRegistry: newCache[[]byte](cm, indexName), TokenRegistry: newCache[[]byte](cm, indexName), OffsetsRegistry: newCache[[]byte](cm, indexName), IDRegistry: newCache[[]byte](cm, indexName), LIDRegistry: newCache[[]byte](cm, indexName), + + MIDs: newCache[[]byte](cm, midsName), + RIDs: newCache[seqids.BlockRIDs](cm, ridsName), + Params: newCache[seqids.BlockParams](cm, paramsName), + + Tokens: newCache[*token.Block](cm, tokensName), + TokenTable: newCache[token.Table](cm, tokenTableName), + + LIDs: newCache[*lids.Block](cm, lidsName), } } diff --git a/fracmanager/loader.go b/fracmanager/loader.go index 69ff7c02..a3273c0c 100644 --- a/fracmanager/loader.go +++ b/fracmanager/loader.go @@ -139,7 +139,7 @@ func (l *Loader) discover(ctx context.Context) ([]*frac.Active, []*frac.Sealed, case fracStageSealed: locals = append(locals, l.loadSealed(manifest, loadedInfoCache)) case fracStageRemote: - // TODO(dkharms): Drop this compatibility check. + // TODO(dkharms): Drop this check once we store `Info` for remote fractions locally. indexName := filepath.Base(manifest.basePath) + consts.IndexFileSuffix hasIndex, err := l.provider.s3cli.Exists(ctx, indexName) diff --git a/go.mod b/go.mod index 7d259333..7efc9e75 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/golang/mock v1.6.0 github.com/google/uuid v1.6.0 github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 - github.com/johannesboyne/gofakes3 v0.0.0-20250916175020-ebf3e50324d3 + github.com/johannesboyne/gofakes3 v0.0.0-20260208201424-4c385a1f6a73 github.com/kkyr/fig v0.5.0 github.com/klauspost/compress v1.18.2 github.com/oklog/ulid/v2 v2.1.1 diff --git a/go.sum b/go.sum index 92b59e95..07cca964 100644 --- a/go.sum +++ b/go.sum @@ -173,8 +173,8 @@ github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw= -github.com/johannesboyne/gofakes3 v0.0.0-20250916175020-ebf3e50324d3 h1:2713fQZ560HxoNVgfJH41GKzjMjIG+DW4hH6nYXfXW8= -github.com/johannesboyne/gofakes3 v0.0.0-20250916175020-ebf3e50324d3/go.mod h1:S4S9jGBVlLri0OeqrSSbCGG5vsI6he06UJyuz1WT1EE= +github.com/johannesboyne/gofakes3 v0.0.0-20260208201424-4c385a1f6a73 h1:0xkWp+RMC2ImuKacheMHEAtrbOTMOa0kYkxyzM1Z/II= +github.com/johannesboyne/gofakes3 v0.0.0-20260208201424-4c385a1f6a73/go.mod h1:S4S9jGBVlLri0OeqrSSbCGG5vsI6he06UJyuz1WT1EE= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= diff --git a/storage/index_reader.go b/storage/index_reader.go index 60cf3641..c7dee18c 100644 --- a/storage/index_reader.go +++ b/storage/index_reader.go @@ -107,3 +107,15 @@ func (r *IndexReader) ReadIndexBlock(blockIndex uint32, dst []byte) ([]byte, uin return dst, uint64(n), err } + +func (r *IndexReader) BlocksCount() (int, error) { + registry, err := r.cache.GetWithError(1, func() ([]byte, int, error) { + data, err := r.readRegistry() + return data, cap(data), err + }) + if err != nil { + return 0, err + } + + return len(registry) / IndexBlockHeaderSize, nil +} diff --git a/storage/s3/reader.go b/storage/s3/reader.go index 76ab4f0f..4e161d34 100644 --- a/storage/s3/reader.go +++ b/storage/s3/reader.go @@ -15,9 +15,7 @@ import ( "github.com/ozontech/seq-db/storage" ) -var ( - _ storage.ImmutableFile = (*reader)(nil) -) +var _ storage.ImmutableFile = (*reader)(nil) // reader is a wrapper around S3 client that provides basic IO functions. // Be aware that [reader] is not thread-safe. @@ -64,7 +62,7 @@ func (r *reader) Read(p []byte) (int, error) { if b != expected { return 0, fmt.Errorf( - "s3: short copy occurred: written=%d but expected=%d", + "s3: short copy occurred: read=%d but expected=%d", b, expected, ) } @@ -159,7 +157,6 @@ func (r *reader) Stat() (os.FileInfo, error) { Bucket: aws.String(r.c.bucket), Key: aws.String(r.filename), }) - if err != nil { return nil, fmt.Errorf( "s3: cannot stat file=%q: %w", @@ -199,9 +196,7 @@ func (r *reader) getSize() (int64, error) { return size, nil } -var ( - _ os.FileInfo = (*fileStat)(nil) -) +var _ os.FileInfo = (*fileStat)(nil) type fileStat struct { name string From d49d05a4d9abd1392b840cc75371cb5703c21291 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Tue, 5 May 2026 16:43:42 +0300 Subject: [PATCH 17/19] chore: disable goconst --- .golangci.yaml | 1 - Makefile | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index b9534d6a..25447931 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -12,7 +12,6 @@ linters: default: none enable: - errcheck - - goconst - gocritic - govet - ineffassign diff --git a/Makefile b/Makefile index 5366df76..9a90289f 100644 --- a/Makefile +++ b/Makefile @@ -103,7 +103,7 @@ ci-tests-race: test-deps # run diff lint like in pipeline .lint: $(info Running lint...) - GOBIN=$(LOCAL_BIN) go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.1.6 run \ + GOBIN=$(LOCAL_BIN) go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.12.1 run \ --config=.golangci.yaml ./... .PHONY: lint From eb9e7a80929eb1cd6471897764909633a3149f64 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Wed, 6 May 2026 14:00:27 +0300 Subject: [PATCH 18/19] fix: use `openInfoLegacy()` for legacy remote fractions --- frac/remote.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/frac/remote.go b/frac/remote.go index f68e6986..34b3e5f1 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -330,12 +330,12 @@ func (f *Remote) openInfo() error { } func (f *Remote) openIndex() error { - if err := f.openInfo(); err != nil { - return err + if f.IsLegacy { + return f.openInfoLegacy() } - if f.IsLegacy { - return nil + if err := f.openInfo(); err != nil { + return err } if f.tokenFile == nil { From 16e799c721e21d95f900d16da4f37f76ab2f2e65 Mon Sep 17 00:00:00 2001 From: Daniil Porokhnin Date: Wed, 6 May 2026 15:19:08 +0300 Subject: [PATCH 19/19] fix: calculate index size for remote fraction --- frac/remote.go | 40 ++++++++++++++++++++++++++++++++++++++++ frac/sealed.go | 17 ++++++++--------- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/frac/remote.go b/frac/remote.go index 34b3e5f1..23950b3a 100644 --- a/frac/remote.go +++ b/frac/remote.go @@ -122,6 +122,7 @@ func NewRemote( zap.Error(err), ) } + f.computeIndexSize() return f } @@ -458,3 +459,42 @@ func (f *Remote) openDocs() error { return fmt.Errorf("missing %q and %q files", consts.DocsFileSuffix, consts.SdocsFileSuffix) } + +func (f *Remote) computeIndexSize() { + if err := f.openIndex(); err != nil { + logger.Error( + "cannot open index file", + zap.Error(err), + ) + return + } + + files := []storage.ImmutableFile{ + f.infoFile, + f.tokenFile, + f.offsetsFile, + f.idFile, + f.lidFile, + } + + if f.IsLegacy { + files = []storage.ImmutableFile{ + f.legacyFile, + } + } + + f.info.IndexOnDisk = 0 + for _, file := range files { + st, err := file.Stat() + if err != nil { + logger.Error( + "can't stat index file", + zap.String("file", file.Name()), + zap.Error(err), + ) + continue + } + + f.info.IndexOnDisk += uint64(st.Size()) + } +} diff --git a/frac/sealed.go b/frac/sealed.go index 5cf7bfa3..2e31f263 100644 --- a/frac/sealed.go +++ b/frac/sealed.go @@ -110,7 +110,7 @@ func NewSealed( } f.loadInfo() - f.info.IndexOnDisk = computeIndexOnDisk(f.BaseFileName, f.IsLegacy) + f.computeIndexSize() return f } @@ -573,7 +573,7 @@ func loadInfo(r interface { } // computeIndexOnDisk returns the total on-disk size of index files for a local fraction. -func computeIndexOnDisk(basePath string, isLegacy bool) uint64 { +func (f *Sealed) computeIndexSize() { suffixes := []string{ consts.InfoFileSuffix, consts.TokenFileSuffix, @@ -582,24 +582,23 @@ func computeIndexOnDisk(basePath string, isLegacy bool) uint64 { consts.LIDFileSuffix, } - if isLegacy { + if f.IsLegacy { suffixes = []string{ consts.IndexFileSuffix, } } - var total int64 + f.info.IndexOnDisk = 0 for _, suffix := range suffixes { - st, err := os.Stat(basePath + suffix) + st, err := os.Stat(f.info.Path + suffix) if err != nil { logger.Fatal( "can't stat index file", - zap.String("file", basePath+suffix), + zap.String("file", f.info.Path+suffix), zap.Error(err), ) } - total += st.Size() - } - return uint64(total) + f.info.IndexOnDisk += uint64(st.Size()) + } }