diff --git a/package.json b/package.json index 7e83f2f8ee4d..2df64b378d59 100644 --- a/package.json +++ b/package.json @@ -55,7 +55,7 @@ "prepare": "husky" }, "devDependencies": { - "@apify/eslint-config": "^1.0.0", + "@apify/eslint-config": "^2.0.0", "@apify/log": "^2.4.0", "@apify/tsconfig": "^0.1.0", "@biomejs/biome": "^2.3.11", diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index 3ff8e2c63421..850dda046f97 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -1202,7 +1202,6 @@ export class BasicCrawler(); - const skippedBecauseOfLimit = new Set(); const skippedBecauseOfMaxCrawlDepth = new Set(); const isAllowedBasedOnRobotsTxtFile = this.isAllowedBasedOnRobotsTxtFile.bind(this); @@ -1216,16 +1215,9 @@ export class BasicCrawler= requestLimit) { - skippedBecauseOfLimit.add(url); - continue; - } - if (maxCrawlDepth !== undefined && (request as any).crawlDepth > maxCrawlDepth) { skippedBecauseOfMaxCrawlDepth.add(url); continue; @@ -1233,14 +1225,19 @@ export class BasicCrawler 0) { this.log.warning(`Some requests were skipped because they were disallowed based on the robots.txt file`, { @@ -1250,7 +1247,7 @@ export class BasicCrawler 0 || - skippedBecauseOfLimit.size > 0 || + skippedBecauseOfLimit.length > 0 || skippedBecauseOfMaxCrawlDepth.size > 0 ) { await Promise.all( @@ -1259,7 +1256,8 @@ export class BasicCrawler { + skippedBecauseOfLimit.map((request) => { + const url = typeof request === 'string' ? request : request.url!; return this.handleSkippedRequest({ url, reason: 'limit' }); }), [...skippedBecauseOfMaxCrawlDepth].map((url) => { diff --git a/packages/core/src/enqueue_links/enqueue_links.ts b/packages/core/src/enqueue_links/enqueue_links.ts index 5d6d2fce0e55..5668e9353ee6 100644 --- a/packages/core/src/enqueue_links/enqueue_links.ts +++ b/packages/core/src/enqueue_links/enqueue_links.ts @@ -488,17 +488,19 @@ export async function enqueueLinks( return filtered; } - let requests = await createFilteredRequests(); - if (typeof limit === 'number' && limit < requests.length) { - await reportSkippedRequests(requests.slice(limit), 'enqueueLimit'); - requests = requests.slice(0, limit); - } - - const { addedRequests } = await requestQueue.addRequestsBatched(requests, { + const { addedRequests, requestsOverLimit } = await requestQueue.addRequestsBatched(await createFilteredRequests(), { forefront, waitForAllRequestsToBeAdded, + maxNewRequests: limit, }); + if (requestsOverLimit?.length !== undefined && requestsOverLimit.length > 0) { + await reportSkippedRequests( + requestsOverLimit.map((r) => ({ url: typeof r === 'string' ? r : r.url! })), + 'enqueueLimit', + ); + } + return { processedRequests: addedRequests, unprocessedRequests: [] }; } diff --git a/packages/core/src/storages/request_provider.ts b/packages/core/src/storages/request_provider.ts index dc1204abf5aa..e4105e856cb3 100644 --- a/packages/core/src/storages/request_provider.ts +++ b/packages/core/src/storages/request_provider.ts @@ -413,6 +413,7 @@ export abstract class RequestProvider implements IStorage, IRequestManager { waitForAllRequestsToBeAdded: ow.optional.boolean, batchSize: ow.optional.number, waitBetweenBatchesMillis: ow.optional.number, + maxNewRequests: ow.optional.number, }), ); @@ -454,9 +455,21 @@ export abstract class RequestProvider implements IStorage, IRequestManager { } } - const { batchSize = 1000, waitBetweenBatchesMillis = 1000 } = options; + const { batchSize = 1000, waitBetweenBatchesMillis = 1000, maxNewRequests } = options; - const chunks = peekableAsyncIterable(chunkedAsyncIterable(generateRequests(), batchSize)); + let remainingBudget = maxNewRequests ?? Infinity; + const requestsOverLimit: Source[] = []; + + // If there's a limit on the number of added requests, do not send batches bigger than the limit + const effectiveChunkSize = + maxNewRequests !== undefined ? () => Math.min(batchSize, remainingBudget) : batchSize; + + // Hold onto the underlying iterator so we can drain leftovers from it in buildResult + const requestIterator = generateRequests(); + + const chunks = peekableAsyncIterable( + chunkedAsyncIterable(requestIterator, effectiveChunkSize) as AsyncIterable, + ); const chunksIterator = chunks[Symbol.asyncIterator](); const attemptToAddToQueueAndAddAnyUnprocessed = async (providedRequests: Source[], cache = true) => { @@ -480,21 +493,55 @@ export abstract class RequestProvider implements IStorage, IRequestManager { return resultsToReturn; }; - // Add initial batch of `batchSize` to process them right away + /** + * Process a chunk: send it to the queue, then update the remaining budget if maxNewRequests is active. + */ + const processChunk = async (chunk: Source[], cache = true) => { + const results = await attemptToAddToQueueAndAddAnyUnprocessed(chunk, cache); + + if (maxNewRequests !== undefined) { + remainingBudget -= results.filter((r) => !r.wasAlreadyPresent).length; + } + + return results; + }; + + /** + * Build the final result. When maxNewRequests is set, drains any remaining items + * from the underlying request iterator into requestsOverLimit. + * + * We accept the iterator explicitly (rather than closing over it) to make it obvious + * that this is the *same* iterator that `chunkedAsyncIterable` has been consuming — + * so only unconsumed items are drained. We drain `requestIterator` (not `chunks`) + * because `chunkedAsyncIterable` stops yielding when the budget-based chunk size + * drops to 0, leaving unconsumed items in the underlying iterator. + */ + const buildResult = async ( + addedRequests: ProcessedRequest[], + waitForAllRequestsToBeAdded: Promise, + unconsumedIterator: AsyncGenerator, + ): Promise => { + if (maxNewRequests !== undefined) { + for await (const request of unconsumedIterator) { + requestsOverLimit.push(request); + } + } + + return { addedRequests, waitForAllRequestsToBeAdded, requestsOverLimit }; + }; + + // Add initial batch to process right away const initialChunk = await chunksIterator.peek(); if (initialChunk === undefined) { - return { addedRequests: [], waitForAllRequestsToBeAdded: Promise.resolve([]) }; + return buildResult([], Promise.resolve([]), requestIterator); } - const addedRequests = await attemptToAddToQueueAndAddAnyUnprocessed(initialChunk); + const addedRequests = await processChunk(initialChunk); await chunksIterator.next(); - // If we have no more requests to add, return immediately + // If we have no more requests to add (either exhausted or budget hit), return immediately if ((await chunksIterator.peek()) === undefined) { - return { - addedRequests, - waitForAllRequestsToBeAdded: Promise.resolve([]), - }; + return buildResult(addedRequests, Promise.resolve([]), requestIterator); } // eslint-disable-next-line no-async-promise-executor @@ -502,8 +549,7 @@ export abstract class RequestProvider implements IStorage, IRequestManager { const finalAddedRequests: ProcessedRequest[] = []; for await (const requestChunk of chunks) { - finalAddedRequests.push(...(await attemptToAddToQueueAndAddAnyUnprocessed(requestChunk, false))); - + finalAddedRequests.push(...(await processChunk(requestChunk, false))); await sleep(waitBetweenBatchesMillis); } @@ -515,15 +561,12 @@ export abstract class RequestProvider implements IStorage, IRequestManager { this.inProgressRequestBatchCount -= 1; }); - // If the user wants to wait for all the requests to be added, we wait for the promise to resolve for them - if (options.waitForAllRequestsToBeAdded) { + // When maxNewRequests is set, we must wait for all batches so we can accurately report skipped requests. + if (options.waitForAllRequestsToBeAdded || maxNewRequests !== undefined) { addedRequests.push(...(await promise)); } - return { - addedRequests, - waitForAllRequestsToBeAdded: promise, - }; + return buildResult(addedRequests, promise, requestIterator); } /** @@ -980,6 +1023,18 @@ export interface AddRequestsBatchedOptions extends RequestQueueOperationOptions * @default 1000 */ waitBetweenBatchesMillis?: number; + + /** + * If set, only this many *actually new* requests (i.e. not already present in the queue) will be added. + * Once the budget is reached, remaining requests from the iterable will be collected in + * {@apilink AddRequestsBatchedResult.requestsOverLimit|`requestsOverLimit`} instead. + * + * This is useful in combination with `maxRequestsPerCrawl` to avoid duplicate URLs consuming the budget. + * + * **Note:** Setting this option implicitly enables {@apilink AddRequestsBatchedOptions.waitForAllRequestsToBeAdded|`waitForAllRequestsToBeAdded`}, + * since all batches must complete before leftover requests can be accurately reported. + */ + maxNewRequests?: number; } export interface AddRequestsBatchedResult { @@ -1001,4 +1056,11 @@ export interface AddRequestsBatchedResult { * ``` */ waitForAllRequestsToBeAdded: Promise; + + /** + * Requests from the input that were not added to the queue because the + * {@apilink AddRequestsBatchedOptions.maxNewRequests|`maxNewRequests`} budget was reached. + * Empty when `maxNewRequests` is not set. + */ + requestsOverLimit?: Source[]; } diff --git a/packages/core/src/storages/request_queue_v2.ts b/packages/core/src/storages/request_queue_v2.ts index 7dd8157d7ca0..fd9e63f23111 100644 --- a/packages/core/src/storages/request_queue_v2.ts +++ b/packages/core/src/storages/request_queue_v2.ts @@ -540,7 +540,6 @@ export class RequestQueue extends RequestProvider { this.queuePausedForMigration = true; let requestId: string | null; - // eslint-disable-next-line no-cond-assign while ((requestId = this.queueHeadIds.removeFirst()) !== null) { try { await this.client.deleteRequestLock(requestId); diff --git a/packages/memory-storage/test/async-iteration.test.ts b/packages/memory-storage/test/async-iteration.test.ts index 8303dd61d291..b3f512561c80 100644 --- a/packages/memory-storage/test/async-iteration.test.ts +++ b/packages/memory-storage/test/async-iteration.test.ts @@ -204,7 +204,6 @@ describe('Async iteration support', () => { }); test('yields strings directly, not objects', async () => { - // eslint-disable-next-line no-unreachable-loop for await (const key of kvStore.keys()) { expect(typeof key).toBe('string'); break; // Only need to check the first one @@ -291,7 +290,6 @@ describe('Async iteration support', () => { }); test('yields values directly, not KeyValueStoreRecord objects', async () => { - // eslint-disable-next-line no-unreachable-loop for await (const value of kvStore.values()) { // Should be the actual value, not a record wrapper expect(value).toStrictEqual({ data: 'key-00' }); @@ -380,7 +378,6 @@ describe('Async iteration support', () => { }); test('yields [key, value] tuples', async () => { - // eslint-disable-next-line no-unreachable-loop for await (const [key, value] of kvStore.entries()) { expect(typeof key).toBe('string'); expect(key).toBe('key-00'); diff --git a/packages/playwright-crawler/src/internals/adaptive-playwright-crawler.ts b/packages/playwright-crawler/src/internals/adaptive-playwright-crawler.ts index 2e6d5c91f551..1766ac47dc34 100644 --- a/packages/playwright-crawler/src/internals/adaptive-playwright-crawler.ts +++ b/packages/playwright-crawler/src/internals/adaptive-playwright-crawler.ts @@ -708,6 +708,7 @@ export class AdaptivePlaywrightCrawler extends PlaywrightCrawler { wasAlreadyHandled: false, })), waitForAllRequestsToBeAdded: Promise.resolve([]), + requestsOverLimit: [], }; }; // We need to use a mock request queue implementation, in order to add the requests into our result object diff --git a/packages/utils/src/internals/iterables.ts b/packages/utils/src/internals/iterables.ts index ed55bde56369..36f6d0f24ff7 100644 --- a/packages/utils/src/internals/iterables.ts +++ b/packages/utils/src/internals/iterables.ts @@ -85,24 +85,34 @@ export async function* asyncifyIterable(iterable: Iterable | AsyncIterable */ export async function* chunkedAsyncIterable( iterable: AsyncIterable | Iterable, - chunkSize: number, + chunkSize: number | (() => number), ): AsyncIterable { - if (typeof chunkSize !== 'number' || chunkSize < 1) { + const getChunkSize = typeof chunkSize === 'function' ? chunkSize : () => chunkSize; + + if (typeof chunkSize === 'number' && chunkSize < 1) { throw new Error(`Chunk size must be a positive number (${inspect(chunkSize)}) received`); } - let chunk: T[] = []; + const iterator = + Symbol.asyncIterator in iterable + ? (iterable as AsyncIterable)[Symbol.asyncIterator]() + : (iterable as Iterable)[Symbol.iterator](); + + while (true) { + const currentSize = getChunkSize(); + if (currentSize < 1) break; - for await (const item of iterable) { - chunk.push(item); + const chunk: T[] = []; - if (chunk.length >= chunkSize) { - yield chunk; - chunk = []; + for (let i = 0; i < currentSize; i++) { + const next = await iterator.next(); + if (next.done) { + break; + } + chunk.push(next.value); } - } - if (chunk.length) { + if (chunk.length === 0) break; yield chunk; } } diff --git a/packages/utils/src/internals/sitemap.ts b/packages/utils/src/internals/sitemap.ts index d74eda26445f..4b4fbada3e80 100644 --- a/packages/utils/src/internals/sitemap.ts +++ b/packages/utils/src/internals/sitemap.ts @@ -194,6 +194,13 @@ export interface ParseSitemapOptions { * @default true */ reportNetworkErrors?: boolean; + /** + * Optional filter for nested sitemap URLs discovered in sitemap index files. + * Called with the URL of each child sitemap before it is fetched. + * Return `true` to include the sitemap, `false` to skip it. + * If not provided, all nested sitemaps are followed. + */ + nestedSitemapFilter?: (sitemapUrl: string) => boolean; } export async function* parseSitemap( @@ -209,6 +216,7 @@ export async function* parseSitemap( sitemapRetries = 3, networkTimeouts, reportNetworkErrors = true, + nestedSitemapFilter, } = options ?? {}; const sources = [...initialSources]; @@ -340,6 +348,11 @@ export async function* parseSitemap( for await (const item of items) { if (item.type === 'sitemapUrl' && !visitedSitemapUrls.has(item.url)) { + if (nestedSitemapFilter && !nestedSitemapFilter(item.url)) { + log.debug(`Skipping sitemap ${item.url} due to nestedSitemapFilter.`); + continue; + } + sources.push({ type: 'url', url: item.url, depth: (source.depth ?? 0) + 1 }); if (emitNestedSitemaps) { yield { loc: item.url, originSitemapUrl: null } as any; diff --git a/packages/utils/test/sitemap.test.ts b/packages/utils/test/sitemap.test.ts index 2fbc54928852..6a8545cb00ed 100644 --- a/packages/utils/test/sitemap.test.ts +++ b/packages/utils/test/sitemap.test.ts @@ -294,6 +294,33 @@ describe('Sitemap', () => { ); }); + it('respects nestedSitemapFilter when following sitemap indexes', async () => { + const items: SitemapUrl[] = []; + + for await (const item of parseSitemap( + [{ type: 'url', url: 'http://not-exists.com/sitemap_parent.xml' }], + undefined, + { + nestedSitemapFilter: (url) => !url.includes('sitemap_child_2'), + }, + )) { + items.push(item); + } + + expect(items).toHaveLength(5); + expect(items.every((item) => item.originSitemapUrl === 'http://not-exists.com/sitemap_child.xml')).toBe(true); + }); + + it('follows all nested sitemaps when nestedSitemapFilter is not provided', async () => { + const items: SitemapUrl[] = []; + + for await (const item of parseSitemap([{ type: 'url', url: 'http://not-exists.com/sitemap_parent.xml' }])) { + items.push(item); + } + + expect(items).toHaveLength(10); + }); + it('does not break on invalid xml', async () => { const sitemap = await Sitemap.load('http://not-exists.com/not_actual_xml.xml'); expect(sitemap.urls).toEqual([]); diff --git a/test/core/crawlers/basic_crawler.test.ts b/test/core/crawlers/basic_crawler.test.ts index c455fbf4f402..16410e298f50 100644 --- a/test/core/crawlers/basic_crawler.test.ts +++ b/test/core/crawlers/basic_crawler.test.ts @@ -250,7 +250,11 @@ describe('BasicCrawler', () => { const crawler = new TestCrawler({ maxCrawlDepth: 3 }); beforeEach(() => { - addRequestsBatchedMock = vi.fn().mockImplementation(async () => ({})); + addRequestsBatchedMock = vi.fn().mockImplementation(async () => ({ + addedRequests: [], + waitForAllRequestsToBeAdded: Promise.resolve([]), + requestsOverLimit: [], + })); onSkippedRequestMock = vi.fn(); options = { @@ -1885,6 +1889,102 @@ describe('BasicCrawler', () => { ); expect(maxCrawlDepthMessages).toHaveLength(1); }); + + test('should not count duplicate URLs toward maxRequestsPerCrawl limit (addRequests)', async () => { + const requestQueue = await RequestQueue.open(); + + const crawler = new BasicCrawler({ + requestQueue, + maxRequestsPerCrawl: 5, + requestHandler: async () => {}, + }); + + // 10 duplicate links to the same URL + 1 unique link at the end + const requestsToAdd = [ + ...Array.from({ length: 10 }, () => 'http://example.com/same'), + 'http://example.com/new', + ]; + + await crawler.addRequests(requestsToAdd); + + // Both unique URLs should have been enqueued — duplicates should not consume the budget + await expect(localStorageEmulator.getRequestQueueItems()).resolves.toMatchObject([ + { url: 'http://example.com/same' }, + { url: 'http://example.com/new' }, + ]); + }); + + test('addRequestsBatched with maxNewRequests should correctly report requestsOverLimit for array input', async () => { + const queue = await RequestQueue.open(); + + const result = await queue.addRequestsBatched( + [ + { url: 'http://example.com/a' }, + { url: 'http://example.com/b' }, + { url: 'http://example.com/c' }, + { url: 'http://example.com/d' }, + { url: 'http://example.com/e' }, + ], + { maxNewRequests: 2 }, + ); + + const addedUrls = result.addedRequests.filter((r) => !r.wasAlreadyPresent).map((r) => r.uniqueKey); + + const overLimitUrls = (result.requestsOverLimit ?? []).map((r) => (typeof r === 'string' ? r : r.url)); + + expect(addedUrls).toHaveLength(2); + expect(overLimitUrls).toHaveLength(3); + }); + + test('addRequestsBatched with maxNewRequests should correctly report requestsOverLimit for generator input', async () => { + const queue = await RequestQueue.open(); + + async function* urls() { + yield { url: 'http://example.com/a' }; + yield { url: 'http://example.com/b' }; + yield { url: 'http://example.com/c' }; + yield { url: 'http://example.com/d' }; + yield { url: 'http://example.com/e' }; + } + + const result = await queue.addRequestsBatched(urls(), { maxNewRequests: 2 }); + + const addedUrls = result.addedRequests.filter((r) => !r.wasAlreadyPresent).map((r) => r.uniqueKey); + + const overLimitUrls = (result.requestsOverLimit ?? []).map((r) => (typeof r === 'string' ? r : r.url)); + + expect(addedUrls).toHaveLength(2); + expect(overLimitUrls).toHaveLength(3); + }); + + test('should not count duplicate URLs toward maxRequestsPerCrawl limit (enqueueLinks)', async () => { + const requestQueue = await RequestQueue.open(); + + const visitedUrls: string[] = []; + + const crawler = new BasicCrawler({ + requestQueue, + maxRequestsPerCrawl: 5, + requestHandler: async (context) => { + visitedUrls.push(context.request.url); + + if (context.request.label) { + return; + } + + // Enqueue 10 duplicate links + 1 new unique link + const urls = [...Array.from({ length: 10 }, () => 'http://example.com/'), 'http://example.com/new']; + + await context.enqueueLinks({ urls, label: 'child' }); + }, + }); + + await crawler.run(['http://example.com/']); + + // Both the start URL and the new URL should have been visited + expect(visitedUrls).toContain('http://example.com/'); + expect(visitedUrls).toContain('http://example.com/new'); + }); }); describe('addRequests input validation', () => { diff --git a/test/core/enqueue_links/enqueue_links.test.ts b/test/core/enqueue_links/enqueue_links.test.ts index fbf34c604973..cede47fec3c6 100644 --- a/test/core/enqueue_links/enqueue_links.test.ts +++ b/test/core/enqueue_links/enqueue_links.test.ts @@ -1008,7 +1008,7 @@ describe('enqueueLinks()', () => { for await (const request of requests) { enqueued.push({ request: typeof request === 'string' ? { url: request } : request, options }); } - return { addedRequests: [], waitForAllRequestsToBeAdded: Promise.resolve([]) }; + return { addedRequests: [], waitForAllRequestsToBeAdded: Promise.resolve([]), requestsOverLimit: [] }; }; await cheerioCrawlerEnqueueLinks({ diff --git a/test/core/session_pool/session_pool.test.ts b/test/core/session_pool/session_pool.test.ts index d80957279693..ec8f6565c3e1 100644 --- a/test/core/session_pool/session_pool.test.ts +++ b/test/core/session_pool/session_pool.test.ts @@ -112,18 +112,17 @@ describe('SessionPool - testing session pool', () => { await sessionPool.getSession(); let isCalled = false; // @ts-expect-error Accessing private property - const oldPick = sessionPool._pickSession; //eslint-disable-line + const oldPick = sessionPool._pickSession; // @ts-expect-error Overriding private property sessionPool._pickSession = () => { - //eslint-disable-line isCalled = true; return oldPick.bind(sessionPool)(); }; await sessionPool.getSession(); - expect(isCalled).toBe(true); //eslint-disable-line + expect(isCalled).toBe(true); }); test('should delete picked session when it is unusable and create a new one', async () => { diff --git a/test/utils/iterables.test.ts b/test/utils/iterables.test.ts index f571277c22aa..0a77ead0eddc 100644 --- a/test/utils/iterables.test.ts +++ b/test/utils/iterables.test.ts @@ -92,6 +92,54 @@ describe('chunkedAsyncIterable', () => { expect(result).toEqual([]); }); + it('should accept a callback for dynamic chunk size', async () => { + let size = 3; + const result = []; + for await (const chunk of chunkedAsyncIterable([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], () => size)) { + result.push(chunk); + size = 2; // shrink after first chunk + } + + expect(result).toEqual([[1, 2, 3], [4, 5], [6, 7], [8, 9], [10]]); + }); + + it('should stop iterating when dynamic chunk size drops to zero', async () => { + let size = 2; + const result = []; + for await (const chunk of chunkedAsyncIterable([1, 2, 3, 4, 5, 6], () => size)) { + result.push(chunk); + size = 0; // signal stop after first chunk + } + + expect(result).toEqual([[1, 2]]); + }); + + it('should leave the underlying iterator drainable after partial consumption', async () => { + async function* source() { + yield 1; + yield 2; + yield 3; + yield 4; + yield 5; + } + + const iterator = source(); + + // Consume only the first chunk via chunkedAsyncIterable + let size = 2; + for await (const _ of chunkedAsyncIterable(iterator, () => size)) { + size = 0; // stop after first chunk + } + + // The underlying iterator should still be drainable + const remaining: number[] = []; + for await (const value of iterator) { + remaining.push(value); + } + + expect(remaining).toEqual([3, 4, 5]); + }); + it('should throw error for invalid chunk size', async () => { await expect( (async () => { diff --git a/website/package.json b/website/package.json index dd7b35448216..3a36f2db62f2 100644 --- a/website/package.json +++ b/website/package.json @@ -48,7 +48,7 @@ "@giscus/react": "^3.0.0", "@mdx-js/react": "^3.0.1", "@signalwire/docusaurus-plugin-llms-txt": "^1.2.1", - "axios": "^1.13.5", + "axios": "^1.15.0", "buffer": "^6.0.3", "clsx": "^2.0.0", "crypto-browserify": "^3.12.0", diff --git a/website/yarn.lock b/website/yarn.lock index dccc837e366c..eed0a239092e 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -6206,14 +6206,14 @@ __metadata: languageName: node linkType: hard -"axios@npm:^1.13.5": - version: 1.13.5 - resolution: "axios@npm:1.13.5" +"axios@npm:^1.15.0": + version: 1.15.0 + resolution: "axios@npm:1.15.0" dependencies: follow-redirects: "npm:^1.15.11" form-data: "npm:^4.0.5" - proxy-from-env: "npm:^1.1.0" - checksum: 10c0/abf468c34f2d145f3dc7dbc0f1be67e520630624307bda69a41bbe8d386bd672d87b4405c4ee77f9ff54b235ab02f96a9968fb00e75b13ce64706e352a3068fd + proxy-from-env: "npm:^2.1.0" + checksum: 10c0/47e0f860e98d4d7aa145e89ce0cae00e1fb0f1d2485f065c21fce955ddb1dba4103a46bd0e47acd18a27208a7f62c96249e620db575521b92a968619ab133409 languageName: node linkType: hard @@ -15104,10 +15104,10 @@ __metadata: languageName: node linkType: hard -"proxy-from-env@npm:^1.1.0": - version: 1.1.0 - resolution: "proxy-from-env@npm:1.1.0" - checksum: 10c0/fe7dd8b1bdbbbea18d1459107729c3e4a2243ca870d26d34c2c1bcd3e4425b7bcc5112362df2d93cc7fb9746f6142b5e272fd1cc5c86ddf8580175186f6ad42b +"proxy-from-env@npm:^2.1.0": + version: 2.1.0 + resolution: "proxy-from-env@npm:2.1.0" + checksum: 10c0/ed01729fd4d094eab619cd7e17ce3698b3413b31eb102c4904f9875e677cd207392795d5b4adee9cec359dfd31c44d5ad7595a3a3ad51c40250e141512281c58 languageName: node linkType: hard @@ -15997,7 +15997,7 @@ __metadata: "@types/react": "npm:^19.0.0" "@typescript-eslint/eslint-plugin": "npm:^7.0.0" "@typescript-eslint/parser": "npm:^7.0.0" - axios: "npm:^1.13.5" + axios: "npm:^1.15.0" buffer: "npm:^6.0.3" clsx: "npm:^2.0.0" crypto-browserify: "npm:^3.12.0" diff --git a/yarn.lock b/yarn.lock index 1144d78c311e..918bf864aabe 100644 --- a/yarn.lock +++ b/yarn.lock @@ -232,25 +232,29 @@ __metadata: languageName: node linkType: hard -"@apify/eslint-config@npm:^1.0.0": - version: 1.1.0 - resolution: "@apify/eslint-config@npm:1.1.0" +"@apify/eslint-config@npm:^2.0.0": + version: 2.0.0 + resolution: "@apify/eslint-config@npm:2.0.0" dependencies: - "@eslint/compat": "npm:^1.2.6" - eslint-config-airbnb-base: "npm:^15.0.0" eslint-plugin-import: "npm:^2.32.0" eslint-plugin-simple-import-sort: "npm:^12.1.1" globals: "npm:^15.14.0" peerDependencies: + "@stylistic/eslint-plugin": ^5.0.0 + "@vitest/eslint-plugin": ^1.6.14 eslint: ^9.19.0 eslint-plugin-jest: ^28.11.0 typescript-eslint: ^8.23.0 peerDependenciesMeta: + "@stylistic/eslint-plugin": + optional: true + "@vitest/eslint-plugin": + optional: true eslint-plugin-jest: optional: true typescript-eslint: optional: true - checksum: 10c0/9c1461d859d02bbbb59a6004aa289054a7fca33e573d703ffb6fe62f021607ba298e1dba2ac8c1cc43362150be5444e0112efa98f768d8d06409c3f939671c0e + checksum: 10c0/b2139b231e735853f0d6c10b9962a8da5f49cb2388429368484471366423a5d0cc7be68d376f6bb8fdaa5825d100f52be913069b3a8b38821dc883e278225652 languageName: node linkType: hard @@ -1059,7 +1063,7 @@ __metadata: version: 0.0.0-use.local resolution: "@crawlee/root@workspace:." dependencies: - "@apify/eslint-config": "npm:^1.0.0" + "@apify/eslint-config": "npm:^2.0.0" "@apify/log": "npm:^2.4.0" "@apify/tsconfig": "npm:^0.1.0" "@biomejs/biome": "npm:^2.3.11" @@ -1665,20 +1669,6 @@ __metadata: languageName: node linkType: hard -"@eslint/compat@npm:^1.2.6": - version: 1.4.1 - resolution: "@eslint/compat@npm:1.4.1" - dependencies: - "@eslint/core": "npm:^0.17.0" - peerDependencies: - eslint: ^8.40 || 9 - peerDependenciesMeta: - eslint: - optional: true - checksum: 10c0/46f5ff884873c2e2366df55dd7b2d6b12f7f852bfba8e2a48dae4819cc5e58756deefa9b7f87f1b107af725ee883a05fcc02caf969b58fb142e790c6036a0450 - languageName: node - linkType: hard - "@eslint/config-array@npm:^0.21.1": version: 0.21.1 resolution: "@eslint/config-array@npm:0.21.1" @@ -4634,13 +4624,13 @@ __metadata: linkType: hard "axios@npm:^1.12.0, axios@npm:^1.6.7": - version: 1.13.2 - resolution: "axios@npm:1.13.2" + version: 1.15.0 + resolution: "axios@npm:1.15.0" dependencies: - follow-redirects: "npm:^1.15.6" - form-data: "npm:^4.0.4" - proxy-from-env: "npm:^1.1.0" - checksum: 10c0/e8a42e37e5568ae9c7a28c348db0e8cf3e43d06fcbef73f0048669edfe4f71219664da7b6cc991b0c0f01c28a48f037c515263cb79be1f1ae8ff034cd813867b + follow-redirects: "npm:^1.15.11" + form-data: "npm:^4.0.5" + proxy-from-env: "npm:^2.1.0" + checksum: 10c0/47e0f860e98d4d7aa145e89ce0cae00e1fb0f1d2485f065c21fce955ddb1dba4103a46bd0e47acd18a27208a7f62c96249e620db575521b92a968619ab133409 languageName: node linkType: hard @@ -4768,9 +4758,9 @@ __metadata: linkType: hard "basic-ftp@npm:^5.0.2": - version: 5.2.0 - resolution: "basic-ftp@npm:5.2.0" - checksum: 10c0/a0f85c01deae0723021f9bf4a7be29378186fa8bba41e74ea11832fe74c187ce90c3599c3cc5ec936581cfd150020e79f4a9ed0ee9fb20b2308e69b045f3a059 + version: 5.2.2 + resolution: "basic-ftp@npm:5.2.2" + checksum: 10c0/a314a05450cf6311035d1bbb23c1ba1c8c0b991e7cb9bfafafc72a82bfafc540561c22eb046a58374688b7b9df502aa002fc28f4d366eb40964f307d131e06a6 languageName: node linkType: hard @@ -5594,13 +5584,6 @@ __metadata: languageName: node linkType: hard -"confusing-browser-globals@npm:^1.0.10": - version: 1.0.11 - resolution: "confusing-browser-globals@npm:1.0.11" - checksum: 10c0/475d0a284fa964a5182b519af5738b5b64bf7e413cfd703c1b3496bf6f4df9f827893a9b221c0ea5873c1476835beb1e0df569ba643eff0734010c1eb780589e - languageName: node - linkType: hard - "console-control-strings@npm:^1.1.0": version: 1.1.0 resolution: "console-control-strings@npm:1.1.0" @@ -6913,21 +6896,6 @@ __metadata: languageName: node linkType: hard -"eslint-config-airbnb-base@npm:^15.0.0": - version: 15.0.0 - resolution: "eslint-config-airbnb-base@npm:15.0.0" - dependencies: - confusing-browser-globals: "npm:^1.0.10" - object.assign: "npm:^4.1.2" - object.entries: "npm:^1.1.5" - semver: "npm:^6.3.0" - peerDependencies: - eslint: ^7.32.0 || ^8.2.0 - eslint-plugin-import: ^2.25.2 - checksum: 10c0/93639d991654414756f82ad7860aac30b0dc6797277b7904ddb53ed88a32c470598696bbc6c503e066414024d305221974d3769e6642de65043bedf29cbbd30f - languageName: node - linkType: hard - "eslint-config-prettier@npm:^10.1.1": version: 10.1.8 resolution: "eslint-config-prettier@npm:10.1.8" @@ -7653,7 +7621,7 @@ __metadata: languageName: node linkType: hard -"follow-redirects@npm:^1.15.6": +"follow-redirects@npm:^1.15.11": version: 1.15.11 resolution: "follow-redirects@npm:1.15.11" peerDependenciesMeta: @@ -7716,6 +7684,19 @@ __metadata: languageName: node linkType: hard +"form-data@npm:^4.0.5": + version: 4.0.5 + resolution: "form-data@npm:4.0.5" + dependencies: + asynckit: "npm:^0.4.0" + combined-stream: "npm:^1.0.8" + es-set-tostringtag: "npm:^2.1.0" + hasown: "npm:^2.0.2" + mime-types: "npm:^2.1.12" + checksum: 10c0/dd6b767ee0bbd6d84039db12a0fa5a2028160ffbfaba1800695713b46ae974a5f6e08b3356c3195137f8530dcd9dfcb5d5ae1eeff53d0db1e5aad863b619ce3b + languageName: node + linkType: hard + "formdata-node@npm:^4.3.2": version: 4.4.1 resolution: "formdata-node@npm:4.4.1" @@ -11555,7 +11536,7 @@ __metadata: languageName: node linkType: hard -"object.assign@npm:^4.1.2, object.assign@npm:^4.1.4, object.assign@npm:^4.1.7": +"object.assign@npm:^4.1.4, object.assign@npm:^4.1.7": version: 4.1.7 resolution: "object.assign@npm:4.1.7" dependencies: @@ -11569,18 +11550,6 @@ __metadata: languageName: node linkType: hard -"object.entries@npm:^1.1.5": - version: 1.1.9 - resolution: "object.entries@npm:1.1.9" - dependencies: - call-bind: "npm:^1.0.8" - call-bound: "npm:^1.0.4" - define-properties: "npm:^1.2.1" - es-object-atoms: "npm:^1.1.1" - checksum: 10c0/d4b8c1e586650407da03370845f029aa14076caca4e4d4afadbc69cfb5b78035fd3ee7be417141abdb0258fa142e59b11923b4c44d8b1255b28f5ffcc50da7db - languageName: node - linkType: hard - "object.fromentries@npm:^2.0.8": version: 2.0.8 resolution: "object.fromentries@npm:2.0.8" @@ -12676,6 +12645,13 @@ __metadata: languageName: node linkType: hard +"proxy-from-env@npm:^2.1.0": + version: 2.1.0 + resolution: "proxy-from-env@npm:2.1.0" + checksum: 10c0/ed01729fd4d094eab619cd7e17ce3698b3413b31eb102c4904f9875e677cd207392795d5b4adee9cec359dfd31c44d5ad7595a3a3ad51c40250e141512281c58 + languageName: node + linkType: hard + "proxy@npm:^1.0.2": version: 1.0.2 resolution: "proxy@npm:1.0.2" @@ -13448,7 +13424,7 @@ __metadata: languageName: node linkType: hard -"semver@npm:^6.3.0, semver@npm:^6.3.1": +"semver@npm:^6.3.1": version: 6.3.1 resolution: "semver@npm:6.3.1" bin: