-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathweb-scraping-rselenium.R
More file actions
60 lines (34 loc) · 1.37 KB
/
Copy pathweb-scraping-rselenium.R
File metadata and controls
60 lines (34 loc) · 1.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
library(RSelenium)
library(lubridate)
library(XML)
gera.dataset = function(anos = list("2017", "2018", "2019", "2020"), instituicao = list("ufba")){
dados = list()
inst = remDr$findElement(using = "id", 'form:j_idt31:inst:input')
inst$sendKeysToElement(instituicao)
Sys.sleep(2)
remDr$findElement(using = "xpath", "//select[@name = 'form:j_idt31:inst:listbox']/option[@value = '4354']")$clickElement()
Sys.sleep(4)
remDr$findElement(using = "xpath", "//select[@name = 'form:j_idt31:j_idt96']/option[@value = '205551']")$clickElement()
for(i in anos){
ano <- remDr$findElement(using = "id", "form:j_idt31:ano")
ano$clearElement()
ano$sendKeysToElement(list(i))
remDr$findElement(using = "xpath", "//input[@value = 'Consultar']")$clickElement()
Sys.sleep(3)
html.table = htmlParse(remDr$getPageSource()[[1]])
table = readHTMLTable(html.table)[[3]]
if(is.null(dados)){
dados = table
}else{
dados = rbind(dados, table)
}
Sys.sleep(5)
}
remDr$close()
dados = dados[,-ncol(dados)]
names(dados) = c("NM_PRODUCAO", "NM_DISCENTE", "NM_SUBTIPO_PRODUCAO", "DT_TITULACAO")
dados$DT_TITULACAO = dmy(dados$DT_TITULACAO)
dados = cbind(dados, "AN_BASE" = year(dados$DT_TITULACAO))
dados = dados[!duplicated.data.frame(dados),]
dados
}