Browse Source

参数错误

main
YuJian 3 years ago
parent
commit
ae9fb256c2
  1. 4
      utils/saveImages.js
  2. 16
      yitudao.js

4
utils/saveImages.js

@ -5,7 +5,7 @@ const fs = require("fs");
* 图片下载 * 图片下载
* @param {*} imgObject 图片 URL * @param {*} imgObject 图片 URL
*/ */
function saveImages(imgObject) { function saveImages(imgObject, total) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const forItem = Object.keys(imgObject); const forItem = Object.keys(imgObject);
for (let forItemIndex = 0; forItemIndex < forItem.length; forItemIndex++) { for (let forItemIndex = 0; forItemIndex < forItem.length; forItemIndex++) {
@ -15,7 +15,7 @@ function saveImages(imgObject) {
else { else {
const alreadyPath = fs.readdirSync(`./Result/${imTitle}`); const alreadyPath = fs.readdirSync(`./Result/${imTitle}`);
// 已有文件跳过 // 已有文件跳过
if (alreadyPath.length === imgObject[imTitle].length) { if (alreadyPath.length === total) {
console.log(`${imTitle} 已存在 跳过抓取`); console.log(`${imTitle} 已存在 跳过抓取`);
resolve() resolve()
return; return;

16
yitudao.js

@ -36,7 +36,7 @@ urlArray = [
"https://www.yitudao.com/meinv/rentiyishu/", "https://www.yitudao.com/meinv/rentiyishu/",
]; ];
let CURRY_PAGENUMBER = 1; // 爬取起始页码 let CURRY_PAGENUMBER = 2; // 爬取起始页码
let MAX_PAGENUMBER = 573; // 爬取最大页码 let MAX_PAGENUMBER = 573; // 爬取最大页码
// 爬取队列 // 爬取队列
@ -56,7 +56,8 @@ const spiderQueue = async (soureUrl) => {
for (let index = 0; index < requestQueue.length; index++) { for (let index = 0; index < requestQueue.length; index++) {
console.log( console.log(
`正在抓取第${CURRY_PAGENUMBER}页 ==>`, `正在抓取第${CURRY_PAGENUMBER}页 ==>`,
requestQueue[index].title requestQueue[index].title,
requestQueue[index].url
); );
await loadHtml(requestQueue[index].url, requestQueue[index].title); await loadHtml(requestQueue[index].url, requestQueue[index].title);
} }
@ -74,15 +75,20 @@ const loadHtml = async (url, title) => {
const $ = cheerio.load(data); const $ = cheerio.load(data);
const soureURL = url.substring(0, url.length - 5); const soureURL = url.substring(0, url.length - 5);
const forNum = $("#title .imageset-sum").html().slice(2); const forNum = +$("#title .imageset-sum").html().slice(2);
const loadQueue = []; const loadQueue = [];
for (let index = 2; index <= forNum; index++) { for (let index = 1; index <= forNum; index++) {
if (index === 1) {
const imageUrl = await loadImages(`${soureURL}.html`);
loadQueue.push(imageUrl);
continue;
}
const imageUrl = await loadImages(`${soureURL}_${index}.html`); const imageUrl = await loadImages(`${soureURL}_${index}.html`);
loadQueue.push(imageUrl); loadQueue.push(imageUrl);
} }
await saveImages({ [title]: loadQueue }); await saveImages({ [title]: loadQueue }, forNum);
} catch (error) { } catch (error) {
console.log(`loadHtml: 下载${title}时出现错误!`); console.log(`loadHtml: 下载${title}时出现错误!`);
console.log(error); console.log(error);

Loading…
Cancel
Save