import re
import os
from collections import Counter
from tools.ToolBase import ToolBase

class GobusterTool(ToolBase):

    def validate_instruction(self, instruction):
        '''
            指令过滤
            1.线程默认调整为-t 5 (没有找到-t 就添加）  ---暂时取消
            2.*medium.txt  替换为*small.txt        --- 暂时取消
            3.-p    静默输出，只输出有用结果
            :param instruction:
            :return:
        '''
        # 定义要修改的参数的正则表达式模式
        # thread_pattern = r'-t\s*\d+'
        # wordlist_pattern = r'-w\s*(/.*?/.*?-medium\.txt)'
        # # 检查是否有 -t 参数，若没有则添加 -t 5
        # if not re.search(thread_pattern, instruction):
        #     instruction += ' -t 5'
        #
        # # 检查 -w 后面的字典文件，若是 *medium.txt 则换成 *small.txt
        # if re.search(wordlist_pattern, instruction):
        #     instruction = re.sub(wordlist_pattern, lambda m: m.group(0).replace('-medium.txt', '-small.txt'),
        #                          instruction)
        timeout = 60*15
        if "-q" not in instruction:
            instruction += ' -q'

        if "--no-color" not in instruction:
            instruction += " --no-color"

        return  instruction,timeout

    def strip_ansi(self,s: str) -> str:
        """
        移除常见的 ANSI 转义序列，比如 ESC[2K, ESC[31m 等。
        """
        # 匹配 ESC[ ... letter  的模式
        ansi_escape = re.compile(r'\x1b\[[0-9;]*[A-Za-z]')
        return ansi_escape.sub('', s)

    def is_false_result(self,output,wordlist_path):
        output = self.strip_ansi(output)
        #输出结果的行数
        discovered = [line for line in output.splitlines() if line]
        #字典个数
        with open(wordlist_path, "r", encoding='utf-8', errors='ignore') as f:
            wordlist_count = sum(1 for line in f if line.strip())
        if wordlist_count and len(discovered) >= int(0.1 * wordlist_count):
            return False, "字典项中绝大部分路径都匹配成功，应该是应用端做了防护处理"
        return True, output

    def analyze_result(self, result,instruction,stderr,stdout):
        #指令结果分析 -q后对结果进行提取
        pattern = re.compile(r'-w\s+(\S+)')
        match = pattern.search(instruction)
        if match:
            wordlist_path = match.group(1)  #匹配字典路径
        else:
            return ("failure", "No wordlist path found in the command string")

        #重新生成个结果，400-5个，401-5个，200所有，其他还不知道有什么结果所有
        if stdout:
            bok,result = self.is_false_result(stdout,wordlist_path)
            if not bok:
                return result
            #结果基本合规，再做过滤
            result = ""
            i_400 = 0
            i_401 = 0
            lines = [line for line in stdout.splitlines() if line]
            for line in lines:
                if line:
                    badd = False
                    if "200" in line:
                        badd = True
                    elif "400" in line:
                        if i_400 < 5:   #400有5个页面就可以了
                            badd = True
                            i_400 += 1
                    elif "401" in line:
                        if i_401 < 5:
                            badd = True
                            i_401 += 1
                    else:   #未知项不太确定,先保留
                        badd = True
                    if badd:
                        result +='\n'
                        result += line
        return result

if __name__ == '__main__':
    sub  = GobusterTool()
    gobuster_command = "gobuster dir -u http://haitutech.cn -w /usr/directory-list-2.3-medium.txt -x php,html,zip,bak"
    print(sub.validate_instruction(gobuster_command))