zfsafe/mycode/TargetManager.py

'''
对目标资产的管理，包括信息的更新维护等
'''
import re
import socket
import ipaddress
import geoip2.database
import ipwhois
import requests
import whois
import dns.resolver
import ssl
from urllib.parse import urlparse
from datetime import datetime

#pattern = r'^(https?://)?((?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}|(?:\d{1,3}\.){3}\d{1,3})(:\d+)?(/.*)?$'
pattern = r'^(https?://)?((?:[0-9]{1,3}\.){3}[0-9]{1,3}|(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,})(:\d+)?(/.*)?$'

class TargetManager:
    def __init__(self):
        pass

    def extract_and_store_ips(self,str_target: str):
        # 正则匹配IP地址（包含IPv4、IPv6及带端口的情况）
        ip_pattern = r'''
            (?P<ipv6>\[?([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\]?|  # 完整IPv6
            ::([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4}|             # 缩写IPv6
            (?P<ipv4>(\d{1,3}\.){3}\d{1,3})(?::\d+)?                # IPv4及端口
        '''
        candidates = re.finditer(ip_pattern, str_target, re.VERBOSE)

        valid_ips = []
        for match in candidates:
            raw_ip = match.group().lstrip('[').rstrip(']')  # 处理IPv6方括号

            # 分离IP和端口（如192.168.1.1:8080）
            if ':' in raw_ip and not raw_ip.count(':') > 1:  # 排除IPv6的冒号
                ip_part = raw_ip.split(':')[0]
            else:
                ip_part = raw_ip

            # 验证IP有效性并分类
            try:
                ip_obj = ipaddress.ip_address(ip_part)
                ip_type = 'v6' if ip_obj.version == 6 else 'v4'
                valid_ips.append({
                    'binary_ip': ip_obj.packed,
                    'ip_type': ip_type,
                    'original': ip_part
                })
            except ValueError:
                continue

    # 辅助函数：验证IPv4地址的有效性
    def _is_valid_ipv4(self,ip):
        parts = ip.split('.')
        if len(parts) != 4:
            return False
        for part in parts:
            if not part.isdigit():
                return False
        return True

    #验证目标格式的合法性，并提取域名或IP
    def validate_and_extract(self,input_str):
        '''
        :param input_str:
        :return: bool,real_target,int(1-IP,2-domain),fake_target
        '''
        type = None
        fake_target = ""
        real_target = ""
        target_type,target = self.is_valid_target(input_str)
        if not target_type: #非法目标
            return False,input_str,type,fake_target

        if target_type =="IPv4" or target_type=="IPv6":
            type = 1    #IP
            real_target = target
            fake_target = "192.168.3.107"
        elif target_type == "URL":
            type = 2 #domain
            real_target = target
            fake_target = "czzfkjxx"
        else:   #目标不合法
            return False,real_target,type,fake_target
        return True,real_target,type,fake_target

    #验证目标是否合法
    def is_valid_target(self,target):
        '''
        检查目标的合法性，并对于URL地址，提取域名部分，若是ip的URL，提取IP
        :param target:
        :return: target_type new_target
        '''
        # Check if target is a valid IP address (IPv4 or IPv6)
        try:
            ip = ipaddress.ip_address(target)
            if ip.version == 4:
                return 'IPv4',target
            elif ip.version == 6:
                return 'IPv6',target
        except ValueError:
            pass

        # Check if target is a valid URL
        try:
            result = urlparse(target)
            # Only allow http or https schemes
            if not result.scheme:
                result = urlparse('http://'+target)
            netloc = result.netloc
            if not netloc:
                return None,None
            # Handle IPv6 addresses in URLs (enclosed in brackets)
            if netloc.startswith('[') and netloc.endswith(']'):
                ip_str = netloc[1:-1]
                try:
                    ipaddress.IPv6Address(ip_str)
                    return 'IPv6',ipaddress
                except ValueError:
                    return None,None
            # Handle potential IPv4 addresses
            elif self._is_valid_ipv4(netloc):
                try:
                    ipaddress.IPv4Address(netloc)
                    return 'IPv4',ipaddress
                except ValueError:
                    return None,None
            # If not an IP-like string, assume it's a domain name and accept
            return 'URL',netloc
        except ValueError:
            return None,None

    def collect_ip_info(self,ip):
        info = {}
        try:
            # 首先尝试 RDAP 查询
            obj = ipwhois.IPWhois(ip)
            whois_info = obj.lookup_rdap()
            info['asn'] = whois_info.get('asn')  # 获取 ASN
            info['isp'] = whois_info.get('network', {}).get('name')  # 获取 ISP
        except (ipwhois.exceptions.IPDefinedError, ipwhois.exceptions.ASNRegistryError,
                requests.exceptions.RequestException) as e:
            # 如果 RDAP 失败，回退到 WHOIS 查询
            try:
                whois_info = obj.lookup_whois()
                info['asn'] = whois_info.get('asn')  # 获取 ASN
                if whois_info.get('nets'):
                    # 从 WHOIS 的 'nets' 中提取 ISP（通常在 description 字段）
                    info['isp'] = whois_info['nets'][0].get('description')
            except Exception as e:
                info['whois_error'] = str(e)  # 记录错误信息
        return info

    def collect_domain_info(self,domain):
        info = {}
        try:
            w = whois.whois(domain)
            info['registrar'] = w.registrar
            # 处理 creation_date
            if isinstance(w.creation_date, list):
                info['creation_date'] = [dt.strftime('%Y-%m-%d %H:%M:%S') if isinstance(dt, datetime) else str(dt) for
                                         dt in w.creation_date]
            elif isinstance(w.creation_date, datetime):
                info['creation_date'] = w.creation_date.strftime('%Y-%m-%d %H:%M:%S')
            else:
                info['creation_date'] = str(w.creation_date)

            # 处理 expiration_date
            if isinstance(w.expiration_date, list):
                info['expiration_date'] = [dt.strftime('%Y-%m-%d %H:%M:%S') if isinstance(dt, datetime) else str(dt) for
                                           dt in w.expiration_date]
            elif isinstance(w.expiration_date, datetime):
                info['expiration_date'] = w.expiration_date.strftime('%Y-%m-%d %H:%M:%S')
            else:
                info['expiration_date'] = str(w.expiration_date)

            info['user_name'] = str(w.name)
            info['emails'] = str(w.emails)
            info['status'] = str(w.status)

        except Exception as e:
            info['whois_error'] = str(e)

        try:
            answers = dns.resolver.resolve(domain, 'A')
            info['A_records'] = [r.to_text() for r in answers]
        except Exception as e:
            info['dns_error'] = str(e)

        return info

    def test(self,str_target):
        bok, target, type, fake_target = self.validate_and_extract(str_target)
        if not bok:
            print(f"{str_target}目标不合法{target}")
        else:
            print(f"{str_target}目标合法{target} ---- {fake_target}")

g_TM = TargetManager()

if __name__ == "__main__":
    #tm = TargetManager()
    #示例测试
    test_cases = [
        "256.254.1111.23",
        "8.8.8.8",
        "2001:db8::1",
        "http://www.crnn.cc/",
        "https://www.crnn.cn",
        "http://www.crnn.cc/product_category/network-security-services",
        "192.168.1.1:80",
        "example.com/path/to/resource",
        "www.crnn.cn",
        "oa.crnn.cn",
        "ftp://invalid.com",  # 不合规
        "http://300.400.500.600"  # 不合规
    ]

    # test_cases = [
    #     "http://www.crnn.cc/",
    #     "http://www.crnn.cc/product_category/network-security-services"
    # ]

    #tm.test("https://www.crnn.cn")

    for case in test_cases:
        g_TM.test(case)
初始版本0.1 3 months ago			`'''`
			`对目标资产的管理，包括信息的更新维护等`
			`'''`
			`import re`
V0.5.5.1 before split stage bak 3 weeks ago			`import socket`
			`import ipaddress`
			`import geoip2.database`
			`import ipwhois`
			`import requests`
			`import whois`
			`import dns.resolver`
			`import ssl`
			`from urllib.parse import urlparse`
			`from datetime import datetime`
初始版本0.1 3 months ago
			`#pattern = r'^(https?://)?((?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}\|(?:\d{1,3}\.){3}\d{1,3})(:\d+)?(/.*)?$'`
			`pattern = r'^(https?://)?((?:[0-9]{1,3}\.){3}[0-9]{1,3}\|(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,})(:\d+)?(/.*)?$'`

			`class TargetManager:`
			`def __init__(self):`
			`pass`

V0.5.5.1 before split stage bak 3 weeks ago			`def extract_and_store_ips(self,str_target: str):`
			`# 正则匹配IP地址（包含IPv4、IPv6及带端口的情况）`
			`ip_pattern = r'''`
			`(?P<ipv6>\[?([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\]?\| # 完整IPv6`
			`::([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4}\| # 缩写IPv6`
			`(?P<ipv4>(\d{1,3}\.){3}\d{1,3})(?::\d+)? # IPv4及端口`
			`'''`
			`candidates = re.finditer(ip_pattern, str_target, re.VERBOSE)`

			`valid_ips = []`
			`for match in candidates:`
			`raw_ip = match.group().lstrip('[').rstrip(']') # 处理IPv6方括号`

			`# 分离IP和端口（如192.168.1.1:8080）`
			`if ':' in raw_ip and not raw_ip.count(':') > 1: # 排除IPv6的冒号`
			`ip_part = raw_ip.split(':')[0]`
			`else:`
			`ip_part = raw_ip`

			`# 验证IP有效性并分类`
			`try:`
			`ip_obj = ipaddress.ip_address(ip_part)`
			`ip_type = 'v6' if ip_obj.version == 6 else 'v4'`
			`valid_ips.append({`
			`'binary_ip': ip_obj.packed,`
			`'ip_type': ip_type,`
			`'original': ip_part`
			`})`
			`except ValueError:`
			`continue`

初始版本0.1 3 months ago			`# 辅助函数：验证IPv4地址的有效性`
			`def _is_valid_ipv4(self,ip):`
			`parts = ip.split('.')`
			`if len(parts) != 4:`
			`return False`
			`for part in parts:`
V0.5.5.1 before split stage bak 3 weeks ago			`if not part.isdigit():`
初始版本0.1 3 months ago			`return False`
			`return True`

			`#验证目标格式的合法性，并提取域名或IP`
			`def validate_and_extract(self,input_str):`
			`'''`
			`:param input_str:`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`:return: bool,real_target,int(1-IP,2-domain),fake_target`
初始版本0.1 3 months ago			`'''`
			`type = None`
v0.5.3 a.增加了批量添加目标，重新调整了任务的启停； b.增加了数据过滤功能，过滤提交到llm的目标信息； c.增加了对通以qwen3模型的对接； d.https有短数据包不及时发送到前端的问题，暂时调回http； e.其他的一些bug和工具迭代。 1 month ago			`fake_target = ""`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`real_target = ""`
			`target_type,target = self.is_valid_target(input_str)`
			`if not target_type: #非法目标`
			`return False,input_str,type,fake_target`

			`if target_type =="IPv4" or target_type=="IPv6":`
			`type = 1 #IP`
			`real_target = target`
			`fake_target = "192.168.3.107"`
			`elif target_type == "URL":`
			`type = 2 #domain`
			`real_target = target`
			`fake_target = "czzfkjxx"`
			`else: #目标不合法`
			`return False,real_target,type,fake_target`
			`return True,real_target,type,fake_target`
初始版本0.1 3 months ago
V0.5.5.1 before split stage bak 3 weeks ago			`#验证目标是否合法`
			`def is_valid_target(self,target):`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`'''`
			`检查目标的合法性，并对于URL地址，提取域名部分，若是ip的URL，提取IP`
			`:param target:`
			`:return: target_type new_target`
			`'''`
V0.5.5.1 before split stage bak 3 weeks ago			`# Check if target is a valid IP address (IPv4 or IPv6)`
			`try:`
			`ip = ipaddress.ip_address(target)`
			`if ip.version == 4:`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`return 'IPv4',target`
V0.5.5.1 before split stage bak 3 weeks ago			`elif ip.version == 6:`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`return 'IPv6',target`
V0.5.5.1 before split stage bak 3 weeks ago			`except ValueError:`
			`pass`

			`# Check if target is a valid URL`
			`try:`
			`result = urlparse(target)`
			`# Only allow http or https schemes`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`if not result.scheme:`
			`result = urlparse('http://'+target)`
V0.5.5.1 before split stage bak 3 weeks ago			`netloc = result.netloc`
			`if not netloc:`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`return None,None`
V0.5.5.1 before split stage bak 3 weeks ago			`# Handle IPv6 addresses in URLs (enclosed in brackets)`
			`if netloc.startswith('[') and netloc.endswith(']'):`
			`ip_str = netloc[1:-1]`
			`try:`
			`ipaddress.IPv6Address(ip_str)`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`return 'IPv6',ipaddress`
V0.5.5.1 before split stage bak 3 weeks ago			`except ValueError:`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`return None,None`
V0.5.5.1 before split stage bak 3 weeks ago			`# Handle potential IPv4 addresses`
			`elif self._is_valid_ipv4(netloc):`
			`try:`
			`ipaddress.IPv4Address(netloc)`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`return 'IPv4',ipaddress`
V0.5.5.1 before split stage bak 3 weeks ago			`except ValueError:`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`return None,None`
V0.5.5.1 before split stage bak 3 weeks ago			`# If not an IP-like string, assume it's a domain name and accept`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`return 'URL',netloc`
V0.5.5.1 before split stage bak 3 weeks ago			`except ValueError:`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`return None,None`
V0.5.5.1 before split stage bak 3 weeks ago
			`def collect_ip_info(self,ip):`
			`info = {}`
			`try:`
			`# 首先尝试 RDAP 查询`
			`obj = ipwhois.IPWhois(ip)`
			`whois_info = obj.lookup_rdap()`
			`info['asn'] = whois_info.get('asn') # 获取 ASN`
			`info['isp'] = whois_info.get('network', {}).get('name') # 获取 ISP`
			`except (ipwhois.exceptions.IPDefinedError, ipwhois.exceptions.ASNRegistryError,`
			`requests.exceptions.RequestException) as e:`
			`# 如果 RDAP 失败，回退到 WHOIS 查询`
			`try:`
			`whois_info = obj.lookup_whois()`
			`info['asn'] = whois_info.get('asn') # 获取 ASN`
			`if whois_info.get('nets'):`
			`# 从 WHOIS 的 'nets' 中提取 ISP（通常在 description 字段）`
			`info['isp'] = whois_info['nets'][0].get('description')`
			`except Exception as e:`
			`info['whois_error'] = str(e) # 记录错误信息`
			`return info`

			`def collect_domain_info(self,domain):`
			`info = {}`
			`try:`
			`w = whois.whois(domain)`
			`info['registrar'] = w.registrar`
			`# 处理 creation_date`
			`if isinstance(w.creation_date, list):`
			`info['creation_date'] = [dt.strftime('%Y-%m-%d %H:%M:%S') if isinstance(dt, datetime) else str(dt) for`
			`dt in w.creation_date]`
			`elif isinstance(w.creation_date, datetime):`
			`info['creation_date'] = w.creation_date.strftime('%Y-%m-%d %H:%M:%S')`
			`else:`
			`info['creation_date'] = str(w.creation_date)`

			`# 处理 expiration_date`
			`if isinstance(w.expiration_date, list):`
			`info['expiration_date'] = [dt.strftime('%Y-%m-%d %H:%M:%S') if isinstance(dt, datetime) else str(dt) for`
			`dt in w.expiration_date]`
			`elif isinstance(w.expiration_date, datetime):`
			`info['expiration_date'] = w.expiration_date.strftime('%Y-%m-%d %H:%M:%S')`
			`else:`
			`info['expiration_date'] = str(w.expiration_date)`

			`info['user_name'] = str(w.name)`
			`info['emails'] = str(w.emails)`
			`info['status'] = str(w.status)`

			`except Exception as e:`
			`info['whois_error'] = str(e)`

			`try:`
			`answers = dns.resolver.resolve(domain, 'A')`
			`info['A_records'] = [r.to_text() for r in answers]`
			`except Exception as e:`
			`info['dns_error'] = str(e)`

			`return info`

			`def test(self,str_target):`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`bok, target, type, fake_target = self.validate_and_extract(str_target)`
			`if not bok:`
			`print(f"{str_target}目标不合法{target}")`
			`else:`
			`print(f"{str_target}目标合法{target} ---- {fake_target}")`
V0.5.5.1 before split stage bak 3 weeks ago
v0.1.1 node_tree_0.7 update MSG before bak 2 months ago			`g_TM = TargetManager()`

初始版本0.1 3 months ago			`if __name__ == "__main__":`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`#tm = TargetManager()`
			`#示例测试`
初始版本0.1 3 months ago			`test_cases = [`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`"256.254.1111.23",`
			`"8.8.8.8",`
			`"2001:db8::1",`
V0.5.5.1 before split stage bak 3 weeks ago			`"http://www.crnn.cc/",`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`"https://www.crnn.cn",`
			`"http://www.crnn.cc/product_category/network-security-services",`
			`"192.168.1.1:80",`
			`"example.com/path/to/resource",`
			`"www.crnn.cn",`
			`"oa.crnn.cn",`
			`"ftp://invalid.com", # 不合规`
			`"http://300.400.500.600" # 不合规`
初始版本0.1 3 months ago			`]`

V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`# test_cases = [`
			`# "http://www.crnn.cc/",`
			`# "http://www.crnn.cc/product_category/network-security-services"`
			`# ]`

V0.5.5.1 before split stage bak 3 weeks ago			`#tm.test("https://www.crnn.cn")`

初始版本0.1 3 months ago			`for case in test_cases:`
V0.5.5.2 1.dnoe split stage; 2.add assets manager; 3.1-2never test; 4.before polling taget bak; and db update; 2 weeks ago			`g_TM.test(case)`