from bs4 import BeautifulSoup
import re
from typing import Optional, Dict, List
import cssutils
from html import unescape

class HTMLBeautifier:
    """A comprehensive HTML beautifier and layout fixer"""
    
    def __init__(self):
        cssutils.log.setLevel(50)  # Suppress cssutils warnings
        
    def beautify(self, html_content: str, 
                 indent_chars: int = 4,
                 fix_attributes: bool = True,
                 sort_attributes: bool = False,
                 remove_empty_tags: bool = True,
                 fix_self_closing: bool = True,
                 normalize_whitespace: bool = True,
                 add_doctype: bool = True) -> str:
        """
        Beautify HTML with various layout fixes
        
        Args:
            html_content: Raw HTML string
            indent_chars: Number of spaces for indentation
            fix_attributes: Fix attribute quotes and spacing
            sort_attributes: Sort attributes alphabetically
            remove_empty_tags: Remove empty tags like <div></div>
            fix_self_closing: Fix self-closing tags
            normalize_whitespace: Normalize whitespace in text
            add_doctype: Add HTML5 doctype if missing
            
        Returns:
            Beautified HTML string
        """
        
        # Parse HTML
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Fix doctype
        if add_doctype and not self._has_doctype(html_content):
            html_content = '<!DOCTYPE html>\n' + str(soup)
            soup = BeautifulSoup(html_content, 'html.parser')
        
        # Fix structural issues
        if fix_self_closing:
            self._fix_self_closing_tags(soup)
        
        if remove_empty_tags:
            self._remove_empty_tags(soup)
        
        if normalize_whitespace:
            self._normalize_whitespace(soup)
        
        if fix_attributes:
            self._fix_attributes(soup, sort_attributes)
        
        # Pretty print
        pretty_html = soup.prettify(formatter="html")
        
        # Additional formatting improvements
        pretty_html = self._additional_formatting(pretty_html, indent_chars)
        
        return pretty_html
    
    def _has_doctype(self, html: str) -> bool:
        """Check if HTML has doctype declaration"""
        return bool(re.search(r'<!DOCTYPE\s+html', html, re.IGNORECASE))
    
    def _fix_self_closing_tags(self, soup: BeautifulSoup):
        """Fix self-closing tags for void elements"""
        void_elements = {'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 
                         'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'}
        
        for tag in soup.find_all():
            if tag.name in void_elements:
                # Ensure void elements don't have content
                tag.string = None
                for child in tag.contents[:]:
                    child.extract()
    
    def _remove_empty_tags(self, soup: BeautifulSoup):
        """Remove empty tags that don't serve a purpose"""
        empty_tags = []
        
        for tag in soup.find_all():
            # Check if tag has no content and no required attributes
            if (len(tag.get_text(strip=True)) == 0 and 
                not tag.find_all() and
                not (tag.name == 'img' and tag.get('src')) and
                not (tag.name == 'input' and tag.get('type')) and
                not (tag.name == 'br')):
                # Keep empty script/style tags
                if tag.name not in ['script', 'style']:
                    empty_tags.append(tag)
        
        for tag in empty_tags:
            tag.decompose()
    
    def _normalize_whitespace(self, soup: BeautifulSoup):
        """Normalize whitespace in text nodes"""
        for element in soup.find_all(text=True):
            if element.parent.name not in ['pre', 'code', 'script', 'style']:
                # Normalize whitespace
                normalized = re.sub(r'\s+', ' ', element)
                normalized = normalized.strip()
                if normalized:
                    element.replace_with(normalized)
    
    def _fix_attributes(self, soup: BeautifulSoup, sort_attributes: bool = False):
        """Fix attribute formatting and optionally sort them"""
        for tag in soup.find_all():
            if tag.attrs:
                # Fix class attribute spacing
                if 'class' in tag.attrs:
                    classes = ' '.join(sorted(set(tag['class'])))
                    tag['class'] = classes
                
                # Fix style attribute formatting
                if 'style' in tag.attrs:
                    style = tag['style']
                    if isinstance(style, str):
                        tag['style'] = ' '.join(style.split())
                
                # Sort attributes if requested
                if sort_attributes and len(tag.attrs) > 1:
                    sorted_attrs = sorted(tag.attrs.items())
                    tag.attrs.clear()
                    tag.attrs.update(sorted_attrs)
    
    def _additional_formatting(self, html: str, indent_chars: int) -> str:
        """Apply additional formatting improvements"""
        lines = html.split('\n')
        formatted_lines = []
        
        for line in lines:
            # Remove trailing whitespace
            line = line.rstrip()
            
            # Ensure proper indentation
            if line.strip():
                # Count current indentation
                current_indent = len(line) - len(line.lstrip())
                # Normalize indentation to spaces
                line = ' ' * current_indent + line.lstrip()
                formatted_lines.append(line)
            elif formatted_lines and formatted_lines[-1]:  # Avoid multiple blank lines
                formatted_lines.append('')
        
        # Remove consecutive blank lines
        result = []
        prev_empty = False
        for line in formatted_lines:
            is_empty = len(line.strip()) == 0
            if not (is_empty and prev_empty):
                result.append(line)
            prev_empty = is_empty
        
        return '\n'.join(result)
    
    def minify(self, html_content: str) -> str:
        """Minify HTML by removing unnecessary whitespace"""
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Remove comments
        for comment in soup.find_all(string=lambda text: isinstance(text, str) and text.strip().startswith('<!--')):
            comment.extract()
        
        # Remove whitespace between tags
        html = str(soup)
        html = re.sub(r'>\s+<', '><', html)
        html = re.sub(r'\s+', ' ', html)
        
        return html.strip()
    
    def inline_css(self, html_content: str, css_rules: Optional[Dict[str, Dict[str, str]]] = None) -> str:
        """Convert CSS rules to inline styles"""
        soup = BeautifulSoup(html_content, 'html.parser')
        
        if css_rules:
            for selector, styles in css_rules.items():
                for tag in soup.select(selector):
                    current_style = tag.get('style', '')
                    new_style = '; '.join([f"{k}: {v}" for k, v in styles.items()])
                    if current_style:
                        tag['style'] = f"{current_style}; {new_style}"
                    else:
                        tag['style'] = new_style
        
        return str(soup)
    
    def add_responsive_meta(self, html_content: str) -> str:
        """Add responsive viewport meta tag"""
        soup = BeautifulSoup(html_content, 'html.parser')
        
        if not soup.find('meta', attrs={'name': 'viewport'}):
            viewport_meta = soup.new_tag('meta')
            viewport_meta.attrs = {
                'name': 'viewport',
                'content': 'width=device-width, initial-scale=1.0'
            }
            
            head = soup.find('head')
            if head:
                head.insert(0, viewport_meta)
            else:
                head = soup.new_tag('head')
                head.append(viewport_meta)
                soup.html.insert(0, head)
        
        return str(soup)
    
    def fix_broken_html(self, html_content: str) -> str:
        """Attempt to fix common HTML issues"""
        # Fix unclosed tags
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Ensure html tag exists
        if not soup.find('html'):
            html_wrapper = soup.new_tag('html')
            for content in soup.contents:
                html_wrapper.append(content)
            new_soup = BeautifulSoup('<html></html>', 'html.parser')
            new_soup.html.append(html_wrapper)
            soup = new_soup
        
        # Ensure body tag exists
        if not soup.find('body'):
            body = soup.new_tag('body')
            for content in soup.html.contents:
                if content.name != 'head':
                    body.append(content)
            soup.html.clear()
            soup.html.append(body)
        
        return str(soup)


# Example usage and demonstration
def demo():
    """Demonstrate HTML beautification features"""
    
    # Sample messy HTML
    messy_html = """
    <html><head><title>Messy   Page</title>  </head>
    <body>
    <div class="container"id="main" style="color:red;font-size:16px"><h1>Welcome</h1>
    <p>   This is   a   messy   HTML   page   with   extra   spaces.   </p>
    <div class="content"><div><span>Nested   content</span></div></div>
    <div> </div>
    <img src='test.jpg'>
    <input type=text value="test" >
    <table><tr><td>Data</td><td>More data</td></tr></table>
    </div>
    <!-- This is a comment -->
    <p></p>
    </body>
    </html>
    """
    
    # Create beautifier instance
    beautifier = HTMLBeautifier()
    
    print("=" * 80)
    print("ORIGINAL HTML:")
    print("=" * 80)
    print(messy_html)
    
    print("\n" + "=" * 80)
    print("BEAUTIFIED HTML:")
    print("=" * 80)
    beautiful_html = beautifier.beautify(
        messy_html,
        indent_chars=2,
        sort_attributes=True,
        remove_empty_tags=True,
        fix_self_closing=True
    )
    print(beautiful_html)
    
    print("\n" + "=" * 80)
    print("WITH RESPONSIVE META TAG:")
    print("=" * 80)
    responsive_html = beautifier.add_responsive_meta(beautiful_html)
    print(responsive_html[:500] + "..." if len(responsive_html) > 500 else responsive_html)
    
    print("\n" + "=" * 80)
    print("MINIFIED HTML:")
    print("=" * 80)
    minified = beautifier.minify(messy_html)
    print(minified)


if __name__ == "__main__":
    # Install required dependencies
    import subprocess
    import sys
    
    try:
        from bs4 import BeautifulSoup
        import cssutils
    except ImportError:
        print("Installing required dependencies...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "beautifulsoup4", "cssutils"])
        from bs4 import BeautifulSoup
        import cssutils
    
    demo()
    
    # Example of advanced usage with custom CSS inlining
    print("\n" + "=" * 80)
    print("ADVANCED USAGE - INLINE CSS:")
    print("=" * 80)
    
    beautifier = HTMLBeautifier()
    html_with_css = """
    <div class="card">Card content</div>
    <button class="btn">Click me</button>
    """
    
    css_rules = {
        '.card': {
            'padding': '20px',
            'border': '1px solid #ddd',
            'border-radius': '5px',
            'margin': '10px'
        },
        '.btn': {
            'background': '#007bff',
            'color': 'white',
            'padding': '10px 20px',
            'border': 'none',
            'border-radius': '3px'
        }
    }
    
    inlined_html = beautifier.inline_css(html_with_css, css_rules)
    print(inlined_html)