package com.bvn13.covid19.scheduler.updater.stopcoronovirusrf; import com.bvn13.covid19.scheduler.updater.stopcoronovirusrf.model.RowData; import org.apache.camel.Exchange; import org.apache.camel.Handler; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import java.util.ArrayList; import java.util.List; @Component public class StopcoronovirusRfDataRetriever { //private static final String URL = "https://стопкоронавирус.рф/"; private static final String HOST = "xn--80aesfpebagmfblc0a.xn--p1ai"; private static final String URL = "https://xn--80aesfpebagmfblc0a.xn--p1ai/"; @Value("${app.user-agent}") private String userAgent; @Handler public void retrieveData(Exchange exchange) throws Exception { Document doc = Jsoup.connect(URL) .userAgent(userAgent) .timeout(30*1000) //.referrer("http://google.com") // .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") // .header("Accept-Encoding", "gzip, deflate, br") // .header("Accept-Language", "ru-RU,ru;q=0.5") // .header("Cache-Control", "no-cache") // .header("Connection", "keep-alive") // .header("Pragma", "no-cache") // .header("Host", HOST) .get(); Elements tableData = doc.select("div.d-map__list > table > tbody > tr"); List rows = new ArrayList<>(tableData.size()); for (Element row : tableData) { rows.add(RowData.builder() .region(row.selectFirst("th").text()) .sick(row.selectFirst("td > span.d-map__indicator_sick").parent().ownText()) .healed(row.selectFirst("td > span.d-map__indicator_healed").parent().ownText()) .died(row.selectFirst("td > span.d-map__indicator_die").parent().ownText()) .build()); } exchange.getIn().setHeader(StopcoronovirusRfUpdater.HEADER_DATE_OF_DATA, doc.selectFirst(".d-map__title > span").ownText()); exchange.getIn().setBody(rows); } }