爬取爱奇艺视频链接地址(简单爬虫)

核心

  1. 使用 request cheerio 两个库。

request 用来在服务器端发送请求,请求回页面。
cheerio可以理解为nodejs里的jQuery,用来对页面做一些处理。

主体结构效果

服务端目录结构

前台页面效果(由于网络限制所以显示加载失败)

前台主要js代码

服务端主要代码

main.js

const express = require('express');
const app = express();
const bodyParser = require("body-parser");
const router = require('./router.js');
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({
  extended: true
}));
app.use('/',router);
let server = app.listen(3000,function () {
  console.log('服务监听在3000端口');
});

router.js

const express = require("express");
const router = express.Router();
const request = require('request');
const cheerio = require('cheerio');
router.all("*", function(req, res, next) {
  res.header("Access-Control-Allow-Origin", "*");
  res.header("Access-Control-Allow-Headers", "Origin, X-Requested-With,Content-Type,Content-Length, Authorization, Content-Type, Accept");
  res.header("Access-Control-Allow-Methods", "PUT,POST,GET,DELETE,OPTIONS");
  res.header("X-Powered-By", ' 3.2.1');
  res.header("Content-Type", "application/json;charset=utf-8");
  res.header("Access-Control-Max-Age", '1728000');
  next();
});
router.post('/',function (req,res) {
  let url = req.body.url;
  request(url,function (error,response,body) {
    if(!error && response.statusCode === 200){
      $ = cheerio.load(body);
      let url_arr = $('.album-numlist a');
      let url_obj=[];
      let film_title = $('.info-intro-title').text();
      let result = {};
      url_arr.each(function (index,item) {
        let title = '第'+$(this).attr('title')+'集';
        url_obj.push(title+'$'+$(this).attr('href')+'$qiyi');
      });
      result={
        filmTitle:film_title,
        urlList:url_obj
      };
      res.json(result);
    }
  })
});
module.exports=router;

项目完整地址

github