retry failed downloads
This commit is contained in:
170
src/main.ts
170
src/main.ts
@@ -1,4 +1,5 @@
|
|||||||
import fs from 'node:fs';
|
import fs from 'node:fs';
|
||||||
|
import * as fsp from 'node:fs/promises';
|
||||||
import path from 'node:path';
|
import path from 'node:path';
|
||||||
import process from 'node:process';
|
import process from 'node:process';
|
||||||
import readline from 'node:readline/promises';
|
import readline from 'node:readline/promises';
|
||||||
@@ -20,23 +21,78 @@ enum Month {
|
|||||||
DEC = 11,
|
DEC = 11,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const months = [
|
||||||
|
Month.JAN,
|
||||||
|
Month.FEB,
|
||||||
|
Month.MAR,
|
||||||
|
Month.APR,
|
||||||
|
Month.MAY,
|
||||||
|
Month.JUN,
|
||||||
|
Month.JUL,
|
||||||
|
Month.AUG,
|
||||||
|
Month.SEP,
|
||||||
|
Month.OCT,
|
||||||
|
Month.NOV,
|
||||||
|
Month.DEC,
|
||||||
|
];
|
||||||
|
|
||||||
|
const quarterMonths = [Month.MAR, Month.JUN, Month.SEP, Month.DEC];
|
||||||
|
|
||||||
|
const siMonths = [Month.MAR, Month.MAY, Month.JUL, Month.SEP, Month.DEC];
|
||||||
|
|
||||||
|
interface Fut {
|
||||||
|
exchange: string;
|
||||||
|
symbol: string;
|
||||||
|
months: Month[];
|
||||||
|
}
|
||||||
|
|
||||||
// prettier-ignore
|
// prettier-ignore
|
||||||
// 0-indexed
|
// 0-indexed
|
||||||
const cmeMonthCode = ['F', 'G', 'H', 'J', 'K', 'M', 'N', 'Q', 'U', 'V', 'X', 'Z'];
|
const cmeMonthCode = ['F', 'G', 'H', 'J', 'K', 'M', 'N', 'Q', 'U', 'V', 'X', 'Z'];
|
||||||
|
|
||||||
|
function prevElement<T>(x: T, xs: T[]): T {
|
||||||
|
const n = xs.length;
|
||||||
|
|
||||||
|
if (n === 0) throw new Error('xs.length = 0');
|
||||||
|
|
||||||
|
const idx = xs.indexOf(x);
|
||||||
|
|
||||||
|
if (idx < 0) throw new Error('x not found in xs');
|
||||||
|
|
||||||
|
const res = xs[(idx + n - 1) % n];
|
||||||
|
|
||||||
|
return res!;
|
||||||
|
}
|
||||||
|
|
||||||
|
function nextElement<T>(x: T, xs: T[]): T {
|
||||||
|
const n = xs.length;
|
||||||
|
|
||||||
|
if (n === 0) throw new Error('xs.length = 0');
|
||||||
|
|
||||||
|
const idx = xs.indexOf(x);
|
||||||
|
|
||||||
|
if (idx < 0) throw new Error('x not found in xs');
|
||||||
|
|
||||||
|
const res = xs[(idx + 1) % n];
|
||||||
|
|
||||||
|
return res!;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pad2 = (x: number) => x.toString().padStart(2, '0');
|
||||||
|
|
||||||
// m & d are 0-indexed
|
// m & d are 0-indexed
|
||||||
function toDateStr(y: number, m: number, d: number) {
|
function toDateStr(y: number, m: number, d: number) {
|
||||||
const pad2 = (x: number) => x.toString().padStart(2, '0');
|
|
||||||
return `${y}${pad2(m + 1)}${pad2(d + 1)}`;
|
return `${y}${pad2(m + 1)}${pad2(d + 1)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// expMonth is 0-indexed
|
// expMonth is 0-indexed
|
||||||
function toTradingViewFutureSymbol(
|
function toTradingViewFutureSymbol(
|
||||||
|
exchange: string,
|
||||||
symbol: string,
|
symbol: string,
|
||||||
expYear: number,
|
expYear: number,
|
||||||
expMonth: number
|
expMonth: number
|
||||||
) {
|
) {
|
||||||
return `${symbol}${cmeMonthCode[expMonth]}${expYear}`;
|
return `${exchange}:${symbol}${cmeMonthCode[expMonth]}${expYear}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const tvFutSymbol = toTradingViewFutureSymbol;
|
const tvFutSymbol = toTradingViewFutureSymbol;
|
||||||
@@ -164,6 +220,46 @@ async function login(browser: puppeteer.Browser) {
|
|||||||
await page.close();
|
await page.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function findCSVWithSubstr(dir: string, substr: string): Promise<string> {
|
||||||
|
const files = await fsp.readdir(dir);
|
||||||
|
const file = files.find(f => f.includes(substr) && f.endsWith('.csv'));
|
||||||
|
|
||||||
|
if (!file) throw new Error(`${substr} not found in ${dir}`);
|
||||||
|
|
||||||
|
return path.join(dir, file);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function verifyCSV(
|
||||||
|
dir: string,
|
||||||
|
substr: string,
|
||||||
|
minRows: number
|
||||||
|
): Promise<boolean> {
|
||||||
|
let fullPath: string;
|
||||||
|
|
||||||
|
try {
|
||||||
|
fullPath = await findCSVWithSubstr(dir, substr);
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const rl = readline.createInterface({
|
||||||
|
input: fs.createReadStream(fullPath),
|
||||||
|
crlfDelay: Infinity,
|
||||||
|
});
|
||||||
|
|
||||||
|
let count = 0;
|
||||||
|
|
||||||
|
for await (const _ of rl) {
|
||||||
|
++count;
|
||||||
|
if (count >= minRows) {
|
||||||
|
rl.close();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
const downloadPath = path.resolve(process.cwd(), 'downloads');
|
const downloadPath = path.resolve(process.cwd(), 'downloads');
|
||||||
if (!fs.existsSync(downloadPath)) {
|
if (!fs.existsSync(downloadPath)) {
|
||||||
@@ -172,27 +268,67 @@ async function main() {
|
|||||||
|
|
||||||
const browser = await puppeteer.launch({
|
const browser = await puppeteer.launch({
|
||||||
browser: 'chrome',
|
browser: 'chrome',
|
||||||
headless: false,
|
// headless: false,
|
||||||
|
headless: true,
|
||||||
userDataDir: path.resolve(process.cwd(), 'session'),
|
userDataDir: path.resolve(process.cwd(), 'session'),
|
||||||
defaultViewport: null,
|
defaultViewport: null,
|
||||||
});
|
});
|
||||||
|
|
||||||
await login(browser);
|
await login(browser);
|
||||||
|
|
||||||
for (let symbol of ['ES' /* , 'NQ' */]) {
|
const futs: Fut[] = [
|
||||||
for (let expYear = 2020; expYear <= 2022; ++expYear) {
|
// { exchange: 'CME_MINI', symbol: 'ES', months: quarterMonths },
|
||||||
for (let expMonth = Month.MAR; expMonth <= Month.DEC; expMonth += 3) {
|
// { exchange: 'CME_MINI', symbol: 'NQ', months: quarterMonths },
|
||||||
let startMonth = expMonth + 12 - 4 + 1;
|
// { exchange: 'COMEX', symbol: 'SI', months: siMonths },
|
||||||
let startYear = expYear - 1 + Math.trunc(startMonth / 12);
|
];
|
||||||
startMonth %= 12;
|
|
||||||
await downloadCSV(
|
let fails: string[] = [];
|
||||||
browser,
|
|
||||||
downloadPath,
|
for (let fut of futs) {
|
||||||
tvFutSymbol(symbol, expYear, expMonth),
|
for (let expYear = 2001; expYear <= 2025; ++expYear) {
|
||||||
10,
|
for (let expMonth of fut.months) {
|
||||||
toDateStr(startYear, startMonth, 0),
|
const startMonth = prevElement(expMonth, fut.months);
|
||||||
toDateStr(expYear, expMonth, 0)
|
const startYear = expYear - (startMonth < expMonth ? 0 : 1);
|
||||||
|
const tvSymbol = tvFutSymbol(
|
||||||
|
fut.exchange,
|
||||||
|
fut.symbol,
|
||||||
|
expYear,
|
||||||
|
expMonth
|
||||||
);
|
);
|
||||||
|
const intervalMinutes = 10;
|
||||||
|
const csvFilenameSubstr = `${fut.symbol}${cmeMonthCode[expMonth]}${expYear}`;
|
||||||
|
const minRows =
|
||||||
|
((expMonth + 12 - startMonth) % 12) *
|
||||||
|
20 *
|
||||||
|
((23 * 60) / intervalMinutes);
|
||||||
|
|
||||||
|
let retries = 3;
|
||||||
|
let success = false;
|
||||||
|
|
||||||
|
do {
|
||||||
|
--retries;
|
||||||
|
await downloadCSV(
|
||||||
|
browser,
|
||||||
|
downloadPath,
|
||||||
|
tvSymbol,
|
||||||
|
intervalMinutes,
|
||||||
|
toDateStr(startYear, startMonth, 0),
|
||||||
|
toDateStr(expYear, expMonth, 27)
|
||||||
|
);
|
||||||
|
success = await verifyCSV(downloadPath, csvFilenameSubstr, minRows);
|
||||||
|
} while (retries && !success);
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
await fsp.rename(
|
||||||
|
await findCSVWithSubstr(downloadPath, csvFilenameSubstr),
|
||||||
|
path.join(
|
||||||
|
downloadPath,
|
||||||
|
`${fut.symbol}_${expYear}_${pad2(expMonth + 1)}.csv`
|
||||||
|
)
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
fails.push(tvSymbol);
|
||||||
|
}
|
||||||
|
|
||||||
// todo: verify download and retry if download failed or data is incomplete
|
// todo: verify download and retry if download failed or data is incomplete
|
||||||
}
|
}
|
||||||
@@ -202,6 +338,8 @@ async function main() {
|
|||||||
const pages = await browser.pages();
|
const pages = await browser.pages();
|
||||||
await Promise.allSettled(pages.map(page => page.close()));
|
await Promise.allSettled(pages.map(page => page.close()));
|
||||||
await browser.close();
|
await browser.close();
|
||||||
|
|
||||||
|
if (fails.length !== 0) throw new Error(`failed to download ${fails}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
main().catch(console.error);
|
main().catch(console.error);
|
||||||
|
|||||||
Reference in New Issue
Block a user