BE_MapAddressUpdate2.groovy 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. import com.yinjie.heating.common.api.BusinessExecutor
  2. import com.yinjie.heating.common.datas.ERPModule
  3. import com.yinjie.heating.common.entity.base.ProcessStringItem
  4. import com.sweetfish.service.RetResult
  5. import org.apache.commons.lang3.StringUtils
  6. import org.apache.logging.log4j.LogManager
  7. import org.apache.logging.log4j.Logger
  8. import org.jsoup.Jsoup
  9. import org.jsoup.nodes.Document
  10. import org.jsoup.nodes.Element
  11. import org.jsoup.select.Elements
  12. import org.jsoup.select.Evaluator
  13. import javax.annotation.Resource
  14. import java.util.regex.Pattern
  15. /**
  16. * Created by jlutt on 2022-07-11
  17. * 行政区划更新
  18. * 国家统计局数据
  19. * @author jlutt
  20. */
  21. @SuppressWarnings(["HttpUrlsUsage", 'unused'])
  22. class BE_MapAddressUpdate2 implements BusinessExecutor<ProcessStringItem, ProcessStringItem> {
  23. protected final Logger logger = LogManager.getLogger(this.getClass().getSimpleName())
  24. //2023-08-29改为从国家统计局获取数据,民政局数据少东西
  25. @Resource(name = "APP_HOME")
  26. protected String appHome
  27. @Override
  28. String scriptName() {
  29. return "行政区划更新V2"
  30. }
  31. @Override
  32. ERPModule module() {
  33. return ERPModule.ADDRESSPARSER
  34. }
  35. def provinceList = []
  36. def cityList = []
  37. def countryList = []
  38. @Override
  39. RetResult<ProcessStringItem> execute(ProcessStringItem source) {
  40. provinceList.clear()
  41. cityList.clear()
  42. countryList.clear()
  43. try {
  44. getProvinces()
  45. provinceList.each { p ->
  46. getCities(p["code"] as String)
  47. }
  48. cityList.each { c ->
  49. getCounties(c["parentCode"] as String, c["code"] as String)
  50. }
  51. new File(appHome + File.separator + "conf" + File.separator + "addressdata", 'provices.json').withWriter('utf-8') { writer ->
  52. writer.writeLine '['
  53. provinceList.eachWithIndex { p, idx ->
  54. writer.writeLine ' {'
  55. writer.writeLine ' "code": "' + p["fullCode"] + '",'
  56. writer.writeLine ' "name": "' + p["name"] + '"'
  57. writer.writeLine ' }' + (idx != provinceList.size() - 1 ? ',' : "")
  58. }
  59. writer.writeLine(']')
  60. }
  61. new File(appHome + File.separator + "conf" + File.separator + "addressdata", 'cities.json').withWriter('utf-8') { writer ->
  62. writer.writeLine '['
  63. cityList.eachWithIndex { p, idx ->
  64. writer.writeLine ' {'
  65. writer.writeLine ' "code": "' + p["fullCode"] + '",'
  66. writer.writeLine ' "name": "' + p["name"] + '"'
  67. writer.writeLine ' }' + (idx != cityList.size() - 1 ? ',' : "")
  68. }
  69. writer.writeLine(']')
  70. }
  71. new File(appHome + File.separator + "conf" + File.separator + "addressdata", 'counties.json').withWriter('utf-8') { writer ->
  72. writer.writeLine '['
  73. countryList.eachWithIndex { p, idx ->
  74. writer.writeLine ' {'
  75. writer.writeLine ' "code": "' + p["fullCode"] + '",'
  76. writer.writeLine ' "name": "' + p["name"] + '"'
  77. writer.writeLine ' }' + (idx != countryList.size() - 1 ? ',' : "")
  78. }
  79. writer.writeLine(']')
  80. }
  81. } catch (IOException e) {
  82. e.printStackTrace()
  83. }
  84. return RetResult.<ProcessStringItem> successT().result(ProcessStringItem.newBuilder().itemValue("").build())
  85. }
  86. private Elements getElements(String url, Evaluator evaluator) {
  87. try {
  88. Document document = Jsoup.connect(url)
  89. .header("Accept", "*/*")
  90. .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.188")
  91. .maxBodySize(0)
  92. .followRedirects(true)
  93. .timeout(10000)
  94. .get()
  95. Elements elements = document.select(evaluator)
  96. // HTTP请求太频繁会导致报错“HTTP error fetching URL. Status=502, URL=……”或“Too many redirects occurred trying to load URL……”,这里暂停几秒再继续下一次HTTP请求
  97. Thread.sleep(2 * 1000)
  98. // logger.info(url + "符合条件的元素数:" + elements.size())
  99. return elements
  100. } catch (Exception e) {
  101. logger.error(url + ",执行出错:" + e)
  102. try {
  103. // 请求出错(如:Read timed out、502)后等待一段时间后再尝试,规避因同一IP频繁请求被限制问题
  104. Thread.sleep(30 * 60 * 1000)
  105. } catch (InterruptedException ex) {
  106. logger.error(url + ",执行出错:" + ex)
  107. }
  108. return null
  109. }
  110. }
  111. def getProvinces() {
  112. Elements elements = getElements("http://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2022/index.html",
  113. new Evaluator.AttributeWithValueEnding("href", ".html"))
  114. elements.each { element ->
  115. String href = element.attributes().get("href")
  116. if (href.matches("\\d+\\.html")) {
  117. String code = href.replaceAll("\\.html", "")
  118. def province = [
  119. code : code,
  120. fullCode : StringUtils.rightPad(code, 6, "0"),
  121. name : element.text(),
  122. parentCode: "",
  123. parentName: ""
  124. ]
  125. // logger.info(province.code + "=" + province.fullCode + "=" + province.name)
  126. provinceList << province
  127. }
  128. }
  129. }
  130. def getCities(String provinceCode) {
  131. Pattern pattern = Pattern.compile(provinceCode + "/\\d+\\.html")
  132. Elements elements = getElements("http://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2022/" + provinceCode + ".html",
  133. new Evaluator.AttributeWithValueMatching("href", pattern))
  134. for (Element element : elements) {
  135. String href = element.attributes().get("href")
  136. String text = element.text()
  137. if (pattern.matcher(href).matches()) {
  138. String code = href.replaceAll(provinceCode + "/", "").replaceAll("\\.html", "")
  139. if (!text.matches("\\d+")) {
  140. def city = [
  141. code : code,
  142. fullCode : StringUtils.rightPad(code, 6, "0"),
  143. name : element.text(),
  144. parentCode: provinceCode
  145. ]
  146. // logger.info(city.code + "=" + city.fullCode + "=" + city.name)
  147. cityList << city
  148. }
  149. }
  150. }
  151. }
  152. def getCounties(String provinceCode, String cityCode) {
  153. Pattern pattern = Pattern.compile("\\d+/\\d+\\.html")
  154. String url = "http://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2022/" + provinceCode + "/" + cityCode + ".html"
  155. Document document = Jsoup.connect(url)
  156. .header("Accept", "*/*")
  157. .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.188")
  158. .maxBodySize(0)
  159. .followRedirects(true)
  160. .timeout(10000)
  161. .get()
  162. Elements elements1 = document.select(("tr.countytr td:contains(市辖区)"))
  163. if (!elements1.isEmpty()) {
  164. Element td1 = elements1.first()
  165. String countyCode = StringUtils.left(td1.parent().select("td").get(0).text(), 6)
  166. def country = [
  167. code : countyCode,
  168. fullCode : countyCode,
  169. name : "市辖区",
  170. parentCode: cityCode
  171. ]
  172. countryList << country
  173. }
  174. Elements elements = document.select(new Evaluator.AttributeWithValueMatching("href", pattern))
  175. for (Element element : elements) {
  176. String href = element.attributes().get("href")
  177. String text = element.text()
  178. if (pattern.matcher(href).matches()) {
  179. String code = href.replaceAll("\\d+/", "").replaceAll("\\.html", "")
  180. if (!text.matches("\\d+")) {
  181. def country = [
  182. code : code,
  183. fullCode : StringUtils.rightPad(code, 6, "0"),
  184. name : element.text(),
  185. parentCode: cityCode
  186. ]
  187. // logger.info(country.code + "=" + country.fullCode + "=" + country.name)
  188. countryList << country
  189. }
  190. }
  191. }
  192. Thread.sleep(2 * 1000)
  193. }
  194. }