library(xml2)
library(rvest)
site1 <-
"https://www.zhipin.com/c101280600/h_101280600/?query=数据分析&page=";
site2 <-
"&ka=page-";
page <-
1;
total<-data.frame(name=character(),salary=character(),city=character(), experience=character(),degree=character());
for(page
in 1:
30){
site <- paste(site1,page,site2,page,sep=
"");
html <- read_html(site);
job<-html_nodes(html,
"div.info-primary>h3.name")
job<-gsub(
"<h3 class=\"name\">",
"",job);
jobName<-gsub(
"<(span.*?)(class.*?)>(.*?)</h3>",
" ",job);
jobName
salary<-html_nodes(html,
"div.info-primary>h3.name>span.red")%>%html_text(trim=
T);
salary
df1 <- data.frame(jobName,salary);
jobMsg<-html_nodes(html,
"div.info-primary>p");
jobMsg<-gsub(
"<(em.*?)(class.*?)></em>",
" ",jobMsg);
jobMsg<-gsub(
"<(.?p)>",
"",jobMsg);
temp<-strsplit(jobMsg,
" ");
df2 <- data.frame(matrix(unlist(temp), nrow=
15, byrow=
T));
df<-data.frame(df1,df2);
total<-rbind(total,df);
}