diff --git a/dc-backend/src/main/java/api/estimate/EstimateServlet.java b/dc-backend/src/main/java/api/estimate/EstimateServlet.java index 9e40f5d..22a0090 100644 --- a/dc-backend/src/main/java/api/estimate/EstimateServlet.java +++ b/dc-backend/src/main/java/api/estimate/EstimateServlet.java @@ -95,21 +95,10 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) t /*In use when click change button. * */ - String estiId=""; - try { - estiId = pathParam[2]; - System.out.println("estiId is "+estiId); - Logger.getLogger("estiId ",estiId); - }catch (NullPointerException nu){ - nu.printStackTrace(); - } - /* - * check whether ID is in the estimate table; - * if not,we create one.Then update. - - * if in,we directly get. - * */ - String[][] IDArray = DBUtil.select("estimate", new String[]{"estiId"}); + String estiId = ""; + estiId = pathParam[2]; + String[]params_estiId={"estiId"}; + String[][] IDArray = DBUtil.select("estimate", params_estiId); HashSet IDset = new HashSet<>(); for (String[] aID : IDArray) { IDset.add(aID[0]); @@ -124,9 +113,9 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) t String[] params = new String[]{ "linksXpath", "contentXpath", "startWord", "walkTimes", "contentLocation", "querySend" }; - String[] conParams = {"estiId"}; + String[] conPalues = {estiId}; - String[][] estiData = DBUtil.select("estimate", params, conParams, conPalues); + String[][] estiData = DBUtil.select("estimate", params, params_estiId, conPalues); for (int i = 0; i < params.length; i++) { if (estiData[0][i] != null) { @@ -136,7 +125,8 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) t } } - String[][] IDArray1 = DBUtil.select("urlbaseconf", new String[]{"webId"}); + String []params_webId={"webId"}; + String[][] IDArray1 = DBUtil.select("urlBaseConf", params_webId); HashSet IDset1 = new HashSet<>(); for (String[] aID : IDArray1) { IDset1.add(aID[0]); @@ -149,7 +139,7 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) t String[][] urlBaseConfData; if (IDset1.contains(estiId)) { - urlBaseConfData = DBUtil.select("urlbaseconf", params1, conParams1, conPalues1); + urlBaseConfData = DBUtil.select("urlBaseConf", params1, conParams1, conPalues1); } else { urlBaseConfData = new String[1][params1.length]; for (int i = 0; i < params1.length; i++) { diff --git a/dc-backend/src/main/java/application.properties b/dc-backend/src/main/java/application.properties index 01c5b54..4860777 100644 --- a/dc-backend/src/main/java/application.properties +++ b/dc-backend/src/main/java/application.properties @@ -1,6 +1,7 @@ mysqlUserName=root -mysqlPassword=123456 -mysqlURL=jdbc:mysql://localhost:3306/webcrawler?characterEncoding=UTF-8&useSSL=false&useAffectedRows=true&allowPublicKeyRetrieval=true&serverTimezone=GMT%2B8 + +mysqlPassword=ElTo@EW*nIWOt2Tk +mysqlURL=jdbc:mysql://10.24.11.134:3306/webcrawler?characterEncoding=UTF-8&useSSL=false&allowPublicKeyRetrieval=true baseWorkDir=/Users/cwc/Desktop/tencent/data-crawling/ diff --git a/dc-backend/src/main/java/util/DBUtil.java b/dc-backend/src/main/java/util/DBUtil.java index 6c43d87..9294804 100644 --- a/dc-backend/src/main/java/util/DBUtil.java +++ b/dc-backend/src/main/java/util/DBUtil.java @@ -1085,11 +1085,22 @@ public static String[][] selectAllTable_data(String table) throws Exception { } public static void main(String[] args){ - String[] params_value = {"aaaa","prefix"}; - String[] params= {"requestName","requestDesc"}; - boolean flag = DBUtil.insert("requesttable", params, params_value); - System.out.println("insert or not ?"+flag); - +// String[] params_value = {"aaaa","prefix"}; +// String[] params= {"requestName","requestDesc"}; +// boolean flag = DBUtil.insert("requesttable", params, params_value); +// System.out.println("insert or not ?"+flag); +// String[][] IDArray1 = DBUtil.select("urlBaseconf", new String[]{""}); + String[][] IDArray = new String[0][]; + try { + IDArray = DBUtil.select("estimate",new String[]{"estiId"}); + } catch (Exception e) { + e.printStackTrace(); + } + for(int i=0;i crawlLinks(String oldLinkHost, HttpURLConnection connection = (HttpURLConnection) url .openConnection(); connection.setRequestMethod("GET"); - connection.setConnectTimeout(2000); - connection.setReadTimeout(2000); + connection.setConnectTimeout(200000); + connection.setReadTimeout(200000); if (connection.getResponseCode() == 200) { InputStream inputStream = connection.getInputStream(); @@ -128,7 +128,7 @@ private Map crawlLinks(String oldLinkHost, new InputStreamReader(inputStream, "UTF-8")); String line = ""; Pattern pattern = Pattern - .compile("(.+)"); + .compile("<[aA]{1}.*?href=[\"']?((https?://)?/?[^\"']+)[\"']?.*?>(.+)"); Matcher matcher = null; while (((line = reader.readLine()) != null)&&stopFlag) { matcher = pattern.matcher(line); diff --git a/dc-backend/src/main/webapp/WEB-INF/lib/import_tool-jar-with-dependencies.jar b/dc-backend/src/main/webapp/WEB-INF/lib/import_tool-jar-with-dependencies.jar index e74a01b..4d30360 100644 Binary files a/dc-backend/src/main/webapp/WEB-INF/lib/import_tool-jar-with-dependencies.jar and b/dc-backend/src/main/webapp/WEB-INF/lib/import_tool-jar-with-dependencies.jar differ