import com.sweetfish.service.RetResult import com.yinjie.heating.common.api.BusinessExecutor import com.yinjie.heating.common.datas.ERPModule import com.yinjie.heating.common.entity.base.ProcessStringItem import com.yinjie.heating.common.entity.system.ParseAreaResult import com.yinjie.heating.common.tool.ERPUtils import groovy.json.JsonSlurper import org.apache.commons.lang3.StringUtils import org.apache.logging.log4j.LogManager import org.apache.logging.log4j.Logger import javax.annotation.Resource import java.util.concurrent.atomic.AtomicInteger import java.util.concurrent.atomic.AtomicReference import java.util.regex.Matcher import java.util.regex.Pattern import java.util.stream.Collectors import java.util.stream.Stream /** * Created by jlutt on 2022-02-09 * 地址解析为省市区联系人电话 * @author jlutt */ class BE_ParseAddress implements BusinessExecutor { protected final Logger logger = LogManager.getLogger(this.getClass().getSimpleName()) private static final String EMPTY = "", BLANK = " " /** * 手机号正则 */ public static String mobile = "(86-1[0-9]{10})|(861[0-9]{10})|(1[0-9]{10})|(1[0-9]{2} [0-9]{4} [0-9]{4})|(1[0-9]{6} [0-9]{4})|(1[0-9]{6}-[0-9]{4})" public static Pattern mobilePattern = Pattern.compile(mobile, 32) /** * 座机号正则 */ public static String phone = "(([0-9]{3,4}-)[0-9]{7,8})|([0-9]{12})|([0-9]{11})|([0-9]{10})|([0-9]{9})|([0-9]{8})|([0-9]{7})" public static Pattern phonePattern = Pattern.compile(phone, 32) /** * 邮编正则 */ public static String zipCode = "([0-9]{6})" public static Pattern zipCodePattern = Pattern.compile(zipCode, 32) def excludeKeys = [ "自治区直辖县级行政区划", "省直辖县级行政区划", "联系人手机号码", "不代收货款", "所在地区", "详细地址", "收货地址", "手机号码", "发件人", "手机号", "收货人", "收件人", "不代收", "收货", "邮编", "电話", "电话", "地址", ":", ":", ";", ";", ",", ",", "。", "、" ] //省份有些简称,搜索省份时,可以去掉下面这些词再搜索一次,比如内蒙古自治区,可能简称为内蒙 public Set provinceKeys = new LinkedHashSet<>(Arrays.asList("特别行政区", "古自治区", "维吾尔自治区", "壮族自治区", "回族自治区", "自治区", "省省直辖", "省", "市")) public Set cityKeys = new LinkedHashSet<>(Arrays.asList("布依族苗族自治州", "苗族侗族自治州", "藏族羌族自治州", "哈尼族彝族自治州", "壮族苗族自治州", "傣族景颇族自治州", "蒙古族藏族自治州", "傣族自治州", "白族自治州", "藏族自治州", "彝族自治州", "回族自治州", "蒙古自治州", "朝鲜族自治州", "地区", "哈萨克自治州", "盟", "市")) public Set countyKeys = new LinkedHashSet<>(Arrays.asList("满族自治县", "满族蒙古族自治县", "蒙古族自治县", "朝鲜族自治县", "回族彝族自治县", "彝族回族苗族自治县", "彝族苗族自治县", "土家族苗族自治县", "布依族苗族自治县", "苗族布依族自治县", "彝族傣族自治县", "傣族彝族自治县", "仡佬族苗族自治县", "黎族苗族自治县", "苗族侗族自治县", "哈尼族彝族傣族自治县", "哈尼族彝族自治县", "彝族哈尼族拉祜族自治县", "傣族拉祜族佤族自治县", "傣族佤族自治县", "拉祜族佤族布朗族傣族自治县", "苗族瑶族傣族自治县", "彝族回族自治县", "独龙族怒族自治县", "保安族东乡族撒拉族自治县", "回族土族自治县", "撒拉族自治县", "哈萨克自治县", "塔吉克自治县", "回族自治县", "畲族自治县", "土家族自治县", "布依族自治县", "苗族自治县", "瑶族自治县", "侗族自治县", "水族自治县", "傈僳族自治县", "仫佬族自治县", "毛南族自治县", "黎族自治县", "羌族自治县", "彝族自治县", "藏族自治县", "纳西族自治县", "裕固族自治县", "哈萨克族自治县", "哈尼族自治县", "拉祜族自治县", "佤族自治县", "回族", "左旗", "右旗", "中旗", "后旗", "联合旗", "自治旗", "旗", "自治县", "区", "县", "市")) private Map provinceShort = new LinkedHashMap<>() private Map cityShort = new LinkedHashMap<>() private Map countyShort = new LinkedHashMap<>() private Map PROVINCES = new LinkedHashMap() private Map CITIES = new LinkedHashMap() private Map COUNTIES = new LinkedHashMap() @Resource(name = "APP_HOME") private String appHome @Override String scriptName() { return "地址解析为省市区联系人电话" } @Override ERPModule module() { return ERPModule.ADDRESSPARSER } @Override void start(long supplierCode) { def jsonSlurper = new JsonSlurper() def provicesJsonFile = new File(appHome + File.separator + "conf" + File.separator + "addressdata" + File.separator + "provices.json") def provicesJsonArray = jsonSlurper.parse(provicesJsonFile) provicesJsonArray.each { it -> PROVINCES.put(it["code"] as String, it["name"] as String) } def citiesJsonFile = new File(appHome + File.separator + "conf" + File.separator + "addressdata" + File.separator + "cities.json") def citiesJsonArray = jsonSlurper.parse(citiesJsonFile) citiesJsonArray.each { it -> CITIES.put(it["code"] as String, it["name"] as String) } def countiesJsonFile = new File(appHome + File.separator + "conf" + File.separator + "addressdata" + File.separator + "counties.json") def countiesJsonArray = jsonSlurper.parse(countiesJsonFile) countiesJsonArray.each { it -> COUNTIES.put(it["code"] as String, it["name"] as String) } for (Map.Entry entry : PROVINCES.entrySet()) { String result = entry.getValue() for (String key : provinceKeys) { result = result.replace(key, "") } provinceShort.put(entry.getKey(), result) } for (Map.Entry entry : CITIES.entrySet()) { String result = entry.getValue() if (result.length() > 2) { for (String key : cityKeys) { result = result.replace(key, "") } cityShort.put(entry.getKey(), result) } } for (Map.Entry entry : COUNTIES.entrySet()) { String result = entry.getValue() if ("雨花台区" == result) { result = "雨花区" } else if ("郑州高新技术产业开发区" == result) { result = "高新区" } if (result.length() > 2) { for (String key : countyKeys) { if (result.indexOf(key) > 0) { result = result.replace(key, "") } } countyShort.put(entry.getKey(), result) } } } @Override RetResult> executeList(ProcessStringItem source) { List results = parse(source.getItemValue(), false) return RetResult.> successT().result(results) } List parse(String address, boolean parseAll) { String inputAddress = address ParseAreaResult extraResult = new ParseAreaResult() //地址清洗 替换特殊字符,解析手机,座机,邮编,格式化多余空格 address = cleanAddress(address) //提取手机号 address = parseMobile(address, extraResult) //提取电话号码 address = parsePhone(address, extraResult) //提取邮编 address = parseZipCode(address, extraResult) address = address.replaceAll(" {2,}", BLANK) // logger.info(address) String memo = parseMemo(inputAddress) //解析地址 List results = parseAddress(address, parseAll) //将解析的地址结果重新解析一次名称 if ((results != null) && (!results.isEmpty())) { results.each { r -> r.setMobile(extraResult.getMobile()) r.setPhone(extraResult.getPhone()) r.setZipCode(extraResult.getZipCode()) parseName(r, 11) } } else { parseName(extraResult, 11) results.add(extraResult) } results.each { it.memo = memo } return results } private String cleanAddress(String address) { address = address.replaceAll("-", EMPTY) address = address.replaceAll("\\[.*?]", "") address = address.replaceAll("【.*?】", "") for (String key : excludeKeys) { address = address.replaceAll(key, " ") } //有一段替换代码groovy执行不了,改成java执行 return ERPUtils.formatParseAddress(address) } /** * 提取手机号码 */ private static String parseMobile(String address, ParseAreaResult result) { String mobile = patternGetStr(mobilePattern, address, 0) if (StringUtils.isNotEmpty(mobile)) { result.setMobile(mobile) return address.replaceAll(mobile, BLANK) } return address } /** * 提取座机号码 */ private static String parsePhone(String address, ParseAreaResult result) { String phone = patternGetStr(phonePattern, address, 0) if (StringUtils.isNotEmpty(phone)) { result.setPhone(phone) return address.replaceAll(phone, BLANK) } return address } /** * 提取邮编 */ private static String parseZipCode(String address, ParseAreaResult result) { String zipCode = patternGetStr(zipCodePattern, address, 0) if (StringUtils.isNotEmpty(zipCode)) { result.setZipCode(zipCode) return address.replaceAll(zipCode, BLANK) } return address } /** * 提取名称 */ private static void parseName(ParseAreaResult result, int maxLen) { //设置result if (StringUtils.isEmpty(result.getName())) { //List list = Stream.of(result.getDetails().split(" ")).collect(Collectors.toCollection(ArrayList::new)); if (StringUtils.isNotBlank(result.getDetails())) { List list = result.getDetails().split(" ") as List AtomicReference name = new AtomicReference<>("") AtomicInteger index = new AtomicInteger(-1) list.each { s -> index.addAndGet(1) if (StringUtils.isNotEmpty(s) && s.length() < maxLen) { if (StringUtils.isEmpty(name.get()) || name.get().length() > s.length()) { name.set(s) } } } if (StringUtils.isNotEmpty(name.get())) { result.setName(name.get().trim()) list.remove(index.get()) result.setDetails(list.stream().collect(Collectors.joining(" "))) } } else { result.setName("") } } } private static String parseMemo(String address) { def result = [] def matcher = address =~ /([\[【])(.*?)([]】])/ matcher.each { result << it[2] } return result.join(",") } private static String patternGetStr(Pattern pattern, CharSequence content, int groupIndex) { if (null != content && null != pattern) { Matcher matcher = pattern.matcher(content) return matcher.find() ? matcher.group(groupIndex) : null } else { return null } } private List parseAddress(String address, boolean parseAll) { List list = new ArrayList<>() list.addAll(0, parseByProvince(address)) if (parseAll || list.isEmpty() || !list.get(0).getParse()) { list.addAll(0, parseByCity(address)) if (parseAll || list.isEmpty() || !list.get(0).getParse()) { list.addAll(0, parseByCounty(address)) } } // 可信度排序 list.sort { a, b -> int aNameLength = StringUtils.isEmpty(a.name) ? -1 : a.getName().length() int bNameLength = StringUtils.isEmpty(b.getName()) ? -1 : b.getName().length() return a.getParse() && !b.getParse() ? -1 : !a.getParse() && b.getParse() ? 1 : aNameLength > bNameLength ? 1 : aNameLength < bNameLength ? -1 : 0 } return list } /** * 通过区解析地址 * @param addressBase * @return */ private List parseByCounty(String addressBase) { List results = new ArrayList<>() ParseAreaResult result = new ParseAreaResult() result.setType("parseByCounty") String address = addressBase for (Map.Entry entry : COUNTIES.entrySet()) { String countyCode = entry.getKey() String countyName = entry.getValue() int index = address.indexOf(countyName) String shortCounty = index > -1 ? "" : countyShort.get(countyCode) int countyLength = StringUtils.isNotEmpty(shortCounty) ? shortCounty.length() : countyName.length() if (StringUtils.isNotEmpty(shortCounty)) { index = address.indexOf(shortCounty) } if (index > -1) { if (countyCode.contains("-")) { countyCode = countyCode.split("-")[0] } result.setCode(countyCode) result.setCounty(countyName) result.setCountyCode(countyCode) String cityCode = countyCode.substring(0, 4) + "00" String city = CITIES.get(cityCode) result.setCityCode(cityCode) result.setCity(city) String provinceCode = countyCode.substring(0, 2) + "0000" String province = PROVINCES.get(provinceCode) result.setProvinceCode(provinceCode) result.setProvince(province) String leftAddress = address.substring(0, index) String _provinceName = "", _cityName = "" if (StringUtils.isNotEmpty(leftAddress)) { _provinceName = province int _index = leftAddress.indexOf(_provinceName) if (_index == -1) { _provinceName = provinceShort.get(countyCode.substring(0, 2) + "0000") _index = leftAddress.indexOf(_provinceName) if (_index == -1) { _provinceName = "" } } if (StringUtils.isNotEmpty(_provinceName)) { leftAddress = leftAddress.replaceAll(_provinceName, "") } _cityName = city _index = leftAddress.indexOf(_cityName) if (_index == -1) { _cityName = cityShort.get(countyCode.substring(0, 4) + "00") _index = (StringUtils.isBlank(_cityName)) ? -1 : leftAddress.indexOf(_cityName) if (_index == -1) { _cityName = "" } } if (StringUtils.isNotEmpty(_cityName)) { leftAddress = leftAddress.replaceAll(_cityName, "") } if (StringUtils.isNotEmpty(leftAddress)) { result.setName(leftAddress.trim()) } } address = address.substring(index + countyLength) if (StringUtils.isNotEmpty(_provinceName) || StringUtils.isNotEmpty(_cityName)) { result.setParse(true) break } else { //如果没有识别到地区 缓存本次结果,并重置数据 ParseAreaResult newResult = new ParseAreaResult() newResult.mobile = result.mobile newResult.phone = result.phone newResult.zipCode = result.zipCode newResult.province = result.province newResult.provinceCode = result.provinceCode newResult.city = result.city newResult.cityCode = result.cityCode newResult.county = result.county newResult.countyCode = result.countyCode newResult.address = result.address newResult.details = result.details newResult.code = result.code newResult.name = result.name newResult.type = result.type newResult.parse = result.parse newResult.setDetails(address.trim()) results.add(0, newResult) result.clean() address = addressBase } } } if (StringUtils.isNotEmpty(result.getCode())) { result.setDetails(address.trim()) results.add(0, result) } return results } /** * 通过省解析地址 * @param addressBase * @return */ private List parseByCity(String addressBase) { List results = new ArrayList<>() ParseAreaResult result = new ParseAreaResult() result.setType("parseByCity") String address = addressBase for (Map.Entry entry : CITIES.entrySet()) { String cityCode = entry.getKey() String cityName = entry.getValue() int index = address.indexOf(cityName) String shortCity = index > -1 ? "" : cityShort.get(cityCode) int cityLength = StringUtils.isNotEmpty(shortCity) ? shortCity.length() : cityName.length() if (StringUtils.isNotEmpty(shortCity)) { index = address.indexOf(shortCity) } if (index > -1) { result.setCode(cityCode) result.setCity(cityName) result.setCityCode(cityCode) String provinceCode = cityCode.substring(0, 2) + "0000" String province = PROVINCES.get(provinceCode) result.setProvinceCode(provinceCode) result.setProvince(province) String leftAddress = address.substring(0, index) String _provinceName = "" if (StringUtils.isNotEmpty(leftAddress)) { _provinceName = province int _index = leftAddress.indexOf(_provinceName) if (_index == -1) { _provinceName = provinceShort.get(cityCode.substring(0, 2) + "0000") _index = leftAddress.indexOf(_provinceName) if (_index == -1) { _provinceName = "" } } if (StringUtils.isNotEmpty(_provinceName)) { leftAddress = leftAddress.replace(_provinceName, "") } if (StringUtils.isNotEmpty(leftAddress)) { result.setName(leftAddress) } } address = address.substring(index + cityLength) address = parseAreaByCity(address, result) if (StringUtils.isNotEmpty(_provinceName) || StringUtils.isNotEmpty(result.getCounty())) { result.setParse(true) break } else { //如果没有识别到地区 缓存本次结果,并重置数据 ParseAreaResult newResult = new ParseAreaResult() newResult.mobile = result.mobile newResult.phone = result.phone newResult.zipCode = result.zipCode newResult.province = result.province newResult.provinceCode = result.provinceCode newResult.city = result.city newResult.cityCode = result.cityCode newResult.county = result.county newResult.countyCode = result.countyCode newResult.address = result.address newResult.details = result.details newResult.code = result.code newResult.name = result.name newResult.type = result.type newResult.parse = result.parse newResult.setDetails(address.trim()) results.add(0, newResult) result.clean() address = addressBase } } } if (StringUtils.isNotEmpty(result.getCode())) { result.setDetails(address.trim()) results.add(0, result) } return results } /** * 通过省解析地址 * @param addressBase * @return */ private List parseByProvince(String addressBase) { List results = new ArrayList<>() ParseAreaResult result = new ParseAreaResult() result.setType("parseByProvince") String address = addressBase for (Map.Entry entry : PROVINCES.entrySet()) { String code = entry.getKey() String province = entry.getValue() int index = address.indexOf(province) String shortProvince = index > -1 ? "" : provinceShort.get(code) int provinceLength = StringUtils.isNotEmpty(shortProvince) ? shortProvince.length() : province.length() if (StringUtils.isNotEmpty(shortProvince)) { index = address.indexOf(shortProvince) } if (index > -1) { if (index > 0) { result.setName(address.substring(0, index).trim()) address = address.substring(index).trim() } result.setCode(code) result.setProvince(province) result.setProvinceCode(code) String _address = address.substring(provinceLength) if (StringUtils.isNotBlank(_address)) { if (!_address.startsWith("市") || _address.indexOf(province) > -1) { address = _address } } //如果是用短名匹配的 要替换省关键字 if (StringUtils.isNotEmpty(shortProvince)) { for (String key : provinceKeys) { if (address.indexOf(key) == 0) { address = address.substring(key.length()) } } } String __address = parseCityByProvince(address, result) if (StringUtils.isEmpty(result.getCity())) { __address = parseCountyByProvince(address, result) } if (StringUtils.isNotEmpty(result.getCity())) { address = __address result.setParse(true) break } else { //如果没有识别到地区 缓存本次结果,并重置数据 ParseAreaResult newResult = new ParseAreaResult() newResult.mobile = result.mobile newResult.phone = result.phone newResult.zipCode = result.zipCode newResult.province = result.province newResult.provinceCode = result.provinceCode newResult.city = result.city newResult.cityCode = result.cityCode newResult.county = result.county newResult.countyCode = result.countyCode newResult.address = result.address newResult.details = result.details newResult.code = result.code newResult.name = result.name newResult.type = result.type newResult.parse = result.parse newResult.setDetails(address.trim()) results.add(0, newResult) result.clean() address = addressBase } } } //设置code if (StringUtils.isNotEmpty(result.getCode())) { result.setDetails(address.trim()) results.add(0, result) } return results } private String parseCountyByProvince(String address, ParseAreaResult result) { Map counties = getTargetsByCode(AreaEnum.COUNTY, result.getCode()) for (Map.Entry entry : counties.entrySet()) { String countyCode = entry.getKey() String countyName = entry.getValue() int index = address.indexOf(countyName) String shortCounty = index > -1 ? "" : countyShort.get(countyCode) int countyLength = StringUtils.isNotEmpty(shortCounty) ? shortCounty.length() : countyName.length() if (StringUtils.isNotEmpty(shortCounty)) { index = address.indexOf(shortCounty) } if (index > -1 && index < 6) { if (countyCode.contains("-")) { countyCode = countyCode.split("-")[0] } result.setCode(countyCode) result.setCounty(countyName) result.setCountyCode(countyCode) String cityCode = countyCode.substring(0, 4) + "00" String cityName = CITIES.get(cityCode) result.setCity(cityName) result.setCityCode(cityCode) address = address.substring(index + countyLength) if (StringUtils.isNotEmpty(shortCounty)) { for (String key : countyKeys) { if (address.indexOf(key) == 0) { address = address.substring(key.length()) } } } break } } return address } /** * 通过省解析城市信息 * @param address * @param result * @return */ private String parseCityByProvince(String address, ParseAreaResult result) { Map cities = getTargetsByCode(AreaEnum.CITY, result.getCode()) for (Map.Entry entry : cities.entrySet()) { String cityCode = entry.getKey() String cityName = entry.getValue() int index = address.indexOf(cityName) String shortCity = index > -1 ? "" : cityShort.get(cityCode) int cityLength = StringUtils.isNotEmpty(shortCity) ? shortCity.length() : cityName.length() if (StringUtils.isNotEmpty(shortCity)) { index = address.indexOf(shortCity) } if (index > -1 && index < 3) { result.setCode(cityCode) result.setCity(cityName) result.setCityCode(cityCode) address = address.substring(index + cityLength) //如果是用短名匹配的 要替换市关键字 if (StringUtils.isNotEmpty(shortCity)) { String finalAddress = address for (String key : cityKeys) { if (address.indexOf(key) == 0 && !StringUtils.equals(key, "市")) { //排除几个会导致异常的解析 boolean anyMatch = Stream.of("市北区", "市南区", "市中区", "市辖区").anyMatch { v -> finalAddress.indexOf(v) == 0 } if (!anyMatch) { address = address.substring(key.length()) } } } } address = parseAreaByCity(address, result) break } } return address } /** * 通过城市解析地区信息 * @param address * @param result * @return */ private String parseAreaByCity(String address, ParseAreaResult result) { Map counties = getTargetsByCode(AreaEnum.COUNTY, result.getCode()) for (Map.Entry entry : counties.entrySet()) { String countyCode = entry.getKey() String countyName = entry.getValue() int index = address.indexOf(countyName) String shortCounty = index > -1 ? "" : countyShort.get(countyCode) int countyLength = StringUtils.isNotEmpty(shortCounty) ? shortCounty.length() : countyName.length() if (StringUtils.isNotEmpty(shortCounty)) { index = address.indexOf(shortCounty) } if (index > -1 && index < 3) { if (countyCode.contains("-")) { countyCode = countyCode.split("-")[0] } result.setCode(countyCode) result.setCounty(countyName) result.setCountyCode(countyCode) address = address.substring(index + countyLength) if (StringUtils.isNotEmpty(shortCounty)) { for (String key : countyKeys) { if (address.indexOf(key) == 0) { address = address.substring(key.length()) } } } break } } return address } /** * 通过编码获取省市集合对象 * * @param target 省,市枚举 * @param code 编码,为地区,市,省 * @return 地址对象 */ private Map getTargetsByCode(AreaEnum target, String code) { Map targets = null if (AreaEnum.PROVINCE == target) { String provinceCode = code.substring(0, 2) targets = putTargets(provinceCode, PROVINCES) } else if (AreaEnum.CITY == target) { String provinceCode = code.substring(0, 2) targets = putTargets(provinceCode, CITIES) } else if (AreaEnum.COUNTY == target) { if ("00" == code.substring(2, 4)) { String provinceCode = code.substring(0, 2) targets = putTargets(provinceCode, COUNTIES) } else { String cityCode = code.substring(0, 4) targets = putTargets(cityCode, COUNTIES) } } return targets } /** * 查找sources中key以preCode开头的对象,并存储到targets中 * 其中break必须要依赖于数据是有一定顺序的,必须targets记录在同一范围,中间不能插入其他对象 * * @param preCode * @param sources */ private static Map putTargets(String preCode, Map sources) { Map targets = new LinkedHashMap<>() for (Map.Entry entry : sources.entrySet()) { int index = entry.getKey().indexOf(preCode) if (index == 0) { targets.put(entry.getKey(), entry.getValue()) } else if (targets.size() > 0 && index != 0) { break } } return targets } }