Browse Source

重构 saveImages

main
YuJian920 3 years ago
parent
commit
d515fda061
  1. 51
      utils/saveImages.js
  2. 8
      yitudao.js

51
utils/saveImages.js

@ -5,35 +5,42 @@ const fs = require("fs"); @@ -5,35 +5,42 @@ const fs = require("fs");
* 图片下载
* @param {*} imgObject 图片 URL
*/
async function saveImages(imgObject) {
try {
function saveImages(imgObject) {
return new Promise((resolve, reject) => {
const forItem = Object.keys(imgObject);
for (let forItemIndex = 0; forItemIndex < forItem.length; forItemIndex++) {
const imTitle = forItem[forItemIndex]; // 文件名
for (let urlIndex = 0; urlIndex < imgObject[imTitle].length; urlIndex++) {
const eachItem = imgObject[forItem][urlIndex]; // url
console.log("正在下载 ===>", imTitle, urlIndex + 1, eachItem);
const filename = eachItem.split("/").pop();
const { data } = await axios({
url: eachItem,
responseType: "arraybuffer",
});
fs.access(`./Result/${imTitle}`, (accessErr) => {
if (accessErr) {
fs.mkdir(`./Result/${imTitle}`, (mkdirErr) => {
if (mkdirErr) console.log("目录创建失败");
fs.access(`./Result/${imTitle}`, async (accessErr) => {
if (accessErr) fs.mkdirSync(`./Result/${imTitle}`);
else {
const alreadyPath = fs.readdirSync(`./Result/${imTitle}`);
// 已有文件跳过
if (alreadyPath.length === imgObject[imTitle].length) {
console.log(`${imTitle} 已存在 跳过抓取`);
resolve()
return;
}
}
for (let urlIndex = 0; urlIndex < imgObject[imTitle].length; urlIndex++) {
const eachItem = imgObject[forItem][urlIndex]; // url
if (!eachItem) continue;
console.log("正在下载 ===>", imTitle, urlIndex + 1, eachItem);
try {
const filename = eachItem.split("/").pop();
const { data } = await axios({
url: eachItem,
responseType: "arraybuffer",
});
} else {
fs.writeFileSync(`./Result/${imTitle}/${filename}`, data, "binary");
} catch (error) {
console.log(`saveImages: 下载图片时出现错误!`);
console.log(error);
}
});
}
}
resolve();
});
}
} catch (error) {
console.log(`saveImages: 下载图片时出现错误!`);
console.log(error);
}
})
}
module.exports = saveImages;

8
yitudao.js

@ -11,7 +11,7 @@ const request = axios.create({ @@ -11,7 +11,7 @@ const request = axios.create({
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
dnt: "1",
"if-modified-since": "Thu, 06 Jan 2022 17:48:09 GMT",
"if-modified-since": "Thu, 15 Jan 2022 17:48:09 GMT",
"if-none-match": "1641491296",
"sec-ch-ua":
'" Not A;Brand";v="99", "Chromium";v="96", "Microsoft Edge";v="96"',
@ -36,14 +36,14 @@ urlArray = [ @@ -36,14 +36,14 @@ urlArray = [
"https://www.yitudao.com/meinv/rentiyishu/",
];
let CURRY_PAGENUMBER = 11; // 爬取起始页码
let CURRY_PAGENUMBER = 1; // 爬取起始页码
let MAX_PAGENUMBER = 573; // 爬取最大页码
// 爬取队列
const spiderQueue = async (soureUrl) => {
const url = `${soureUrl}${CURRY_PAGENUMBER}.html`;
request({ url }).then(async (res) => {
const $ = cheerio.load(res.data);
const $ = cheerio.load(res.data);=
const requestQueue = [];
$("a[title]").each((i, elem) => {
@ -101,4 +101,4 @@ const loadImages = async (url) => { @@ -101,4 +101,4 @@ const loadImages = async (url) => {
}
};
spiderQueue(urlArray[0]);
spiderQueue(urlArray[0]);
Loading…
Cancel
Save