java获取网页网址

xiaoxiao2023-09-20  62

package web; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.net.InetAddress; import java.net.Socket; import java.util.regex.Matcher; import java.util.regex.Pattern; public class TestSock { public void TestSocket() { } public static void main(String args[]) { String strServer = "www.163.com"; String strPage = "/"; try { int port = 80; InetAddress addr = InetAddress.getByName(strServer); Socket socket = new Socket(addr, port); BufferedWriter wr = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream(), "UTF-8")); wr.write("GET " + strPage + " HTTP/1.0\r\n"); wr.write("HOST:" + strServer + "\r\n"); wr.write("Accept:*/*\r\n"); wr.write("\r\n"); wr.flush(); BufferedReader rd = new BufferedReader(new InputStreamReader(socket.getInputStream(),"UTF-8")); String line; while ((line = rd.readLine()) != null) { String reg = "(http\\://[a-za-z0-9]{0,100}[.]{0,1})[^.\\s]*?\\.(com|cn|net|org|biz|info|cc|tv)"; //Pattern pattern = Pattern.compile("(http://|https://){1}([a-zA-Z]+)(.)(1)(.)(com|cn|com/cn|tw)"); Pattern pattern = Pattern.compile(reg); Matcher matcher = pattern.matcher(line); StringBuffer buffer = new StringBuffer(); while(matcher.find()){ buffer.append(matcher.group()); buffer.append("\r\n"); System.out.println(buffer.toString()); } } wr.close(); rd.close(); } catch (Exception e) { e.printStackTrace(); } } }
转载请注明原文地址: https://www.6miu.com/read-5008928.html

最新回复(0)