爬取爱奇艺视频链接地址(简单爬虫)
核心
- 使用
request
cheerio
两个库。
request 用来在服务器端发送请求,请求回页面。
cheerio可以理解为nodejs里的jQuery,用来对页面做一些处理。
主体结构效果
服务端目录结构
前台页面效果(由于网络限制所以显示加载失败)
前台主要js代码
服务端主要代码
main.js
const express = require('express');
const app = express();
const bodyParser = require("body-parser");
const router = require('./router.js');
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({
extended: true
}));
app.use('/',router);
let server = app.listen(3000,function () {
console.log('服务监听在3000端口');
});
router.js
const express = require("express");
const router = express.Router();
const request = require('request');
const cheerio = require('cheerio');
router.all("*", function(req, res, next) {
res.header("Access-Control-Allow-Origin", "*");
res.header("Access-Control-Allow-Headers", "Origin, X-Requested-With,Content-Type,Content-Length, Authorization, Content-Type, Accept");
res.header("Access-Control-Allow-Methods", "PUT,POST,GET,DELETE,OPTIONS");
res.header("X-Powered-By", ' 3.2.1');
res.header("Content-Type", "application/json;charset=utf-8");
res.header("Access-Control-Max-Age", '1728000');
next();
});
router.post('/',function (req,res) {
let url = req.body.url;
request(url,function (error,response,body) {
if(!error && response.statusCode === 200){
$ = cheerio.load(body);
let url_arr = $('.album-numlist a');
let url_obj=[];
let film_title = $('.info-intro-title').text();
let result = {};
url_arr.each(function (index,item) {
let title = '第'+$(this).attr('title')+'集';
url_obj.push(title+'$'+$(this).attr('href')+'$qiyi');
});
result={
filmTitle:film_title,
urlList:url_obj
};
res.json(result);
}
})
});
module.exports=router;