retry failed downloads

This commit is contained in:
David Chen
2026-03-29 02:39:14 +08:00
committed by GitHub
parent e4d1c47085
commit c102b8bd6f
2 changed files with 154 additions and 16 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

View File

@@ -1,4 +1,5 @@
import fs from 'node:fs';
import * as fsp from 'node:fs/promises';
import path from 'node:path';
import process from 'node:process';
import readline from 'node:readline/promises';
@@ -20,23 +21,78 @@ enum Month {
DEC = 11,
}
const months = [
Month.JAN,
Month.FEB,
Month.MAR,
Month.APR,
Month.MAY,
Month.JUN,
Month.JUL,
Month.AUG,
Month.SEP,
Month.OCT,
Month.NOV,
Month.DEC,
];
const quarterMonths = [Month.MAR, Month.JUN, Month.SEP, Month.DEC];
const siMonths = [Month.MAR, Month.MAY, Month.JUL, Month.SEP, Month.DEC];
interface Fut {
exchange: string;
symbol: string;
months: Month[];
}
// prettier-ignore
// 0-indexed
const cmeMonthCode = ['F', 'G', 'H', 'J', 'K', 'M', 'N', 'Q', 'U', 'V', 'X', 'Z'];
function prevElement<T>(x: T, xs: T[]): T {
const n = xs.length;
if (n === 0) throw new Error('xs.length = 0');
const idx = xs.indexOf(x);
if (idx < 0) throw new Error('x not found in xs');
const res = xs[(idx + n - 1) % n];
return res!;
}
function nextElement<T>(x: T, xs: T[]): T {
const n = xs.length;
if (n === 0) throw new Error('xs.length = 0');
const idx = xs.indexOf(x);
if (idx < 0) throw new Error('x not found in xs');
const res = xs[(idx + 1) % n];
return res!;
}
const pad2 = (x: number) => x.toString().padStart(2, '0');
// m & d are 0-indexed
function toDateStr(y: number, m: number, d: number) {
const pad2 = (x: number) => x.toString().padStart(2, '0');
return `${y}${pad2(m + 1)}${pad2(d + 1)}`;
}
// expMonth is 0-indexed
function toTradingViewFutureSymbol(
exchange: string,
symbol: string,
expYear: number,
expMonth: number
) {
return `${symbol}${cmeMonthCode[expMonth]}${expYear}`;
return `${exchange}:${symbol}${cmeMonthCode[expMonth]}${expYear}`;
}
const tvFutSymbol = toTradingViewFutureSymbol;
@@ -164,6 +220,46 @@ async function login(browser: puppeteer.Browser) {
await page.close();
}
async function findCSVWithSubstr(dir: string, substr: string): Promise<string> {
const files = await fsp.readdir(dir);
const file = files.find(f => f.includes(substr) && f.endsWith('.csv'));
if (!file) throw new Error(`${substr} not found in ${dir}`);
return path.join(dir, file);
}
async function verifyCSV(
dir: string,
substr: string,
minRows: number
): Promise<boolean> {
let fullPath: string;
try {
fullPath = await findCSVWithSubstr(dir, substr);
} catch {
return false;
}
const rl = readline.createInterface({
input: fs.createReadStream(fullPath),
crlfDelay: Infinity,
});
let count = 0;
for await (const _ of rl) {
++count;
if (count >= minRows) {
rl.close();
return true;
}
}
return false;
}
async function main() {
const downloadPath = path.resolve(process.cwd(), 'downloads');
if (!fs.existsSync(downloadPath)) {
@@ -172,27 +268,67 @@ async function main() {
const browser = await puppeteer.launch({
browser: 'chrome',
headless: false,
// headless: false,
headless: true,
userDataDir: path.resolve(process.cwd(), 'session'),
defaultViewport: null,
});
await login(browser);
for (let symbol of ['ES' /* , 'NQ' */]) {
for (let expYear = 2020; expYear <= 2022; ++expYear) {
for (let expMonth = Month.MAR; expMonth <= Month.DEC; expMonth += 3) {
let startMonth = expMonth + 12 - 4 + 1;
let startYear = expYear - 1 + Math.trunc(startMonth / 12);
startMonth %= 12;
await downloadCSV(
browser,
downloadPath,
tvFutSymbol(symbol, expYear, expMonth),
10,
toDateStr(startYear, startMonth, 0),
toDateStr(expYear, expMonth, 0)
const futs: Fut[] = [
// { exchange: 'CME_MINI', symbol: 'ES', months: quarterMonths },
// { exchange: 'CME_MINI', symbol: 'NQ', months: quarterMonths },
// { exchange: 'COMEX', symbol: 'SI', months: siMonths },
];
let fails: string[] = [];
for (let fut of futs) {
for (let expYear = 2001; expYear <= 2025; ++expYear) {
for (let expMonth of fut.months) {
const startMonth = prevElement(expMonth, fut.months);
const startYear = expYear - (startMonth < expMonth ? 0 : 1);
const tvSymbol = tvFutSymbol(
fut.exchange,
fut.symbol,
expYear,
expMonth
);
const intervalMinutes = 10;
const csvFilenameSubstr = `${fut.symbol}${cmeMonthCode[expMonth]}${expYear}`;
const minRows =
((expMonth + 12 - startMonth) % 12) *
20 *
((23 * 60) / intervalMinutes);
let retries = 3;
let success = false;
do {
--retries;
await downloadCSV(
browser,
downloadPath,
tvSymbol,
intervalMinutes,
toDateStr(startYear, startMonth, 0),
toDateStr(expYear, expMonth, 27)
);
success = await verifyCSV(downloadPath, csvFilenameSubstr, minRows);
} while (retries && !success);
if (success) {
await fsp.rename(
await findCSVWithSubstr(downloadPath, csvFilenameSubstr),
path.join(
downloadPath,
`${fut.symbol}_${expYear}_${pad2(expMonth + 1)}.csv`
)
);
} else {
fails.push(tvSymbol);
}
// todo: verify download and retry if download failed or data is incomplete
}
@@ -202,6 +338,8 @@ async function main() {
const pages = await browser.pages();
await Promise.allSettled(pages.map(page => page.close()));
await browser.close();
if (fails.length !== 0) throw new Error(`failed to download ${fails}`);
}
main().catch(console.error);