import scrapy
from scrapy.linkextractors
import LinkExtractor
from scrapy.spiders
import CrawlSpider, Rule
from myproject.items
import JobItem
# from w3lib.html import remove_tags
import datetime
import hashlib
import re,math
from datetime
import timedelta
from w3lib.html
import remove_tags
class JobSpider(CrawlSpider):
name =
'job'
allowed_domains = [
'51job.com']
start_urls = [
'http://51job.com/']
custom_settings = {
'DEFAULT_REQUEST_HEADERS':{
# "HOST":"www.51job.com",
"Connection":
"keep-alive",
"Cookie":
"partner=baidupz;51job=cenglish=0&|&;guid=15239472266864690059; nsearch=jobarea=&|&ord_field=&|&recentSearch0=&|&recentSearch1=&|&recentSearch2=&|&recentSearch3=&|&recentSearch4=&|&collapse_expansion=; search=jobarea~`010000|!ord_field~`0|!recentSearch0~`1
转载请注明原文地址: https://www.6miu.com/read-2050009.html