From c102b8bd6f97313997094a40cbba927b30a76c62 Mon Sep 17 00:00:00 2001 From: David Chen <36295559+David0922@users.noreply.github.com> Date: Sun, 29 Mar 2026 02:39:14 +0800 Subject: [PATCH] retry failed downloads --- .DS_Store | Bin 0 -> 6148 bytes src/main.ts | 170 +++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 154 insertions(+), 16 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..64609b8d8a0865b5eb79cc4d336880b09ac79f6c GIT binary patch literal 6148 zcmeHK%}T>S5T0$TO(;SR3LXPq3#K4Q@e*o%0V8@)sR=0>jM>tp=1>Z`=?nQPK94iI z8?m&4ClNaXv)}CeWZ7@S{s4d&%;Fw^0{|8(VZ+8|jZmC)MHlsPoh*t7=4Q-X_6P7_bwXE*4B30YFi!aHh7R_P=v)SAB59u z>Rm{gMztMAm&v#sIlHGaE5aljPgOw@k1^%uD#>D54&*G0b5$GYfMr|u$l06Ey{_vm zy6(Jpbc|-d*Iz7bYwzIjK!Tx(x: T, xs: T[]): T { + const n = xs.length; + + if (n === 0) throw new Error('xs.length = 0'); + + const idx = xs.indexOf(x); + + if (idx < 0) throw new Error('x not found in xs'); + + const res = xs[(idx + n - 1) % n]; + + return res!; +} + +function nextElement(x: T, xs: T[]): T { + const n = xs.length; + + if (n === 0) throw new Error('xs.length = 0'); + + const idx = xs.indexOf(x); + + if (idx < 0) throw new Error('x not found in xs'); + + const res = xs[(idx + 1) % n]; + + return res!; +} + +const pad2 = (x: number) => x.toString().padStart(2, '0'); + // m & d are 0-indexed function toDateStr(y: number, m: number, d: number) { - const pad2 = (x: number) => x.toString().padStart(2, '0'); return `${y}${pad2(m + 1)}${pad2(d + 1)}`; } // expMonth is 0-indexed function toTradingViewFutureSymbol( + exchange: string, symbol: string, expYear: number, expMonth: number ) { - return `${symbol}${cmeMonthCode[expMonth]}${expYear}`; + return `${exchange}:${symbol}${cmeMonthCode[expMonth]}${expYear}`; } const tvFutSymbol = toTradingViewFutureSymbol; @@ -164,6 +220,46 @@ async function login(browser: puppeteer.Browser) { await page.close(); } +async function findCSVWithSubstr(dir: string, substr: string): Promise { + const files = await fsp.readdir(dir); + const file = files.find(f => f.includes(substr) && f.endsWith('.csv')); + + if (!file) throw new Error(`${substr} not found in ${dir}`); + + return path.join(dir, file); +} + +async function verifyCSV( + dir: string, + substr: string, + minRows: number +): Promise { + let fullPath: string; + + try { + fullPath = await findCSVWithSubstr(dir, substr); + } catch { + return false; + } + + const rl = readline.createInterface({ + input: fs.createReadStream(fullPath), + crlfDelay: Infinity, + }); + + let count = 0; + + for await (const _ of rl) { + ++count; + if (count >= minRows) { + rl.close(); + return true; + } + } + + return false; +} + async function main() { const downloadPath = path.resolve(process.cwd(), 'downloads'); if (!fs.existsSync(downloadPath)) { @@ -172,27 +268,67 @@ async function main() { const browser = await puppeteer.launch({ browser: 'chrome', - headless: false, + // headless: false, + headless: true, userDataDir: path.resolve(process.cwd(), 'session'), defaultViewport: null, }); await login(browser); - for (let symbol of ['ES' /* , 'NQ' */]) { - for (let expYear = 2020; expYear <= 2022; ++expYear) { - for (let expMonth = Month.MAR; expMonth <= Month.DEC; expMonth += 3) { - let startMonth = expMonth + 12 - 4 + 1; - let startYear = expYear - 1 + Math.trunc(startMonth / 12); - startMonth %= 12; - await downloadCSV( - browser, - downloadPath, - tvFutSymbol(symbol, expYear, expMonth), - 10, - toDateStr(startYear, startMonth, 0), - toDateStr(expYear, expMonth, 0) + const futs: Fut[] = [ + // { exchange: 'CME_MINI', symbol: 'ES', months: quarterMonths }, + // { exchange: 'CME_MINI', symbol: 'NQ', months: quarterMonths }, + // { exchange: 'COMEX', symbol: 'SI', months: siMonths }, + ]; + + let fails: string[] = []; + + for (let fut of futs) { + for (let expYear = 2001; expYear <= 2025; ++expYear) { + for (let expMonth of fut.months) { + const startMonth = prevElement(expMonth, fut.months); + const startYear = expYear - (startMonth < expMonth ? 0 : 1); + const tvSymbol = tvFutSymbol( + fut.exchange, + fut.symbol, + expYear, + expMonth ); + const intervalMinutes = 10; + const csvFilenameSubstr = `${fut.symbol}${cmeMonthCode[expMonth]}${expYear}`; + const minRows = + ((expMonth + 12 - startMonth) % 12) * + 20 * + ((23 * 60) / intervalMinutes); + + let retries = 3; + let success = false; + + do { + --retries; + await downloadCSV( + browser, + downloadPath, + tvSymbol, + intervalMinutes, + toDateStr(startYear, startMonth, 0), + toDateStr(expYear, expMonth, 27) + ); + success = await verifyCSV(downloadPath, csvFilenameSubstr, minRows); + } while (retries && !success); + + if (success) { + await fsp.rename( + await findCSVWithSubstr(downloadPath, csvFilenameSubstr), + path.join( + downloadPath, + `${fut.symbol}_${expYear}_${pad2(expMonth + 1)}.csv` + ) + ); + } else { + fails.push(tvSymbol); + } // todo: verify download and retry if download failed or data is incomplete } @@ -202,6 +338,8 @@ async function main() { const pages = await browser.pages(); await Promise.allSettled(pages.map(page => page.close())); await browser.close(); + + if (fails.length !== 0) throw new Error(`failed to download ${fails}`); } main().catch(console.error);