from flask import Flask, render_template_string, request, jsonify
import pdfplumber
import re
from collections import Counter
import logging
import os
from werkzeug.utils import secure_filename

app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max file size
app.config['UPLOAD_FOLDER'] = 'temp_uploads'
logging.basicConfig(level=logging.INFO)

# Create upload folder if it doesn't exist
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

# KS2 writing standards criteria
KS2_CRITERIA = {
    'punctuation': {
        'expected': [':', ';', ',', '.', '(', ')', '!', '?', '"', "'"],
        'weight': 0.2
    },
    'sentence_structure': {
        'min_words_per_sentence': 8,
        'max_words_per_sentence': 25,
        'weight': 0.2
    },
    'vocabulary': {
        'advanced_words': set([
            'although', 'however', 'nevertheless', 'furthermore', 'moreover',
            'consequently', 'therefore', 'meanwhile', 'subsequently', 'despite'
        ]),
        'weight': 0.2
    },
    'paragraphing': {
        'min_sentences_per_paragraph': 3,
        'weight': 0.2
    },
    'text_length': {
        'min_words': 200,
        'weight': 0.2
    }
}

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() == 'pdf'

def extract_text_from_pdf(pdf_file):
    """Extract text from PDF with error handling"""
    text = ""
    temp_path = None
    try:
        # Save the file temporarily
        filename = secure_filename(pdf_file.filename)
        temp_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        pdf_file.save(temp_path)
        
        # Extract text
        with pdfplumber.open(temp_path) as pdf:
            for page in pdf.pages:
                extracted_text = page.extract_text()
                if extracted_text:
                    text += extracted_text + "\n"
        
        if not text.strip():
            raise ValueError("No text could be extracted from the PDF")
            
        return text.strip()
    
    except Exception as e:
        logging.error(f"PDF extraction error: {str(e)}")
        raise
    
    finally:
        # Clean up temporary file
        if temp_path and os.path.exists(temp_path):
            try:
                os.remove(temp_path)
            except Exception as e:
                logging.error(f"Error removing temporary file: {str(e)}")

def clean_text(text):
    """Clean and normalize text"""
    # Remove excessive whitespace
    text = re.sub(r'\s+', ' ', text)
    # Normalize quotes
    text = text.replace('"', '"').replace('"', '"')
    # Normalize apostrophes
    text = text.replace(''', "'").replace(''', "'")
    return text.strip()

def analyze_text(text):
    """Analyze text with improved error handling and validation"""
    try:
        # Clean the text
        text = clean_text(text)
        
        # Basic validation
        if not text:
            raise ValueError("No text provided for analysis")
            
        # Split into paragraphs, sentences, and words
        paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
        sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]
        words = [w.lower() for w in re.findall(r'\b\w+\b', text)]
        
        if not sentences:
            raise ValueError("No valid sentences found in the text")
            
        # Initialize scores and improvements
        scores = {}
        improvements = []
        
        # Check text length
        word_count = len(words)
        scores['text_length'] = min(word_count / KS2_CRITERIA['text_length']['min_words'], 1.0)
        
        if word_count < KS2_CRITERIA['text_length']['min_words']:
            improvements.append(f"Text length ({word_count} words) is below the expected {KS2_CRITERIA['text_length']['min_words']} words - try to write more")
        
        # Check punctuation
        punct_count = sum(1 for char in text if char in KS2_CRITERIA['punctuation']['expected'])
        punct_variety = len(set(char for char in text if char in KS2_CRITERIA['punctuation']['expected']))
        scores['punctuation'] = min(punct_variety / len(KS2_CRITERIA['punctuation']['expected']), 1.0)
        
        if scores['punctuation'] < 0.7:
            missing_punct = [p for p in KS2_CRITERIA['punctuation']['expected'] 
                           if p not in text]
            improvements.append(f"Try using more varied punctuation. Consider using: {', '.join(missing_punct[:3])}")
        
        # Check sentence structure
        avg_words_per_sentence = len(words) / len(sentences) if sentences else 0
        scores['sentence_structure'] = 1.0 if (
            KS2_CRITERIA['sentence_structure']['min_words_per_sentence'] <= 
            avg_words_per_sentence <= 
            KS2_CRITERIA['sentence_structure']['max_words_per_sentence']
        ) else 0.5
        
        if scores['sentence_structure'] < 1.0:
            if avg_words_per_sentence < KS2_CRITERIA['sentence_structure']['min_words_per_sentence']:
                improvements.append("Try writing longer, more detailed sentences")
            else:
                improvements.append("Some sentences may be too long - try breaking them up")
        
        # Check vocabulary
        advanced_words_used = sum(1 for word in words if word in KS2_CRITERIA['vocabulary']['advanced_words'])
        scores['vocabulary'] = min(advanced_words_used / 5, 1.0)
        
        if scores['vocabulary'] < 0.7:
            unused_advanced_words = [word for word in KS2_CRITERIA['vocabulary']['advanced_words'] 
                                   if word not in words][:3]
            improvements.append(f"Include more advanced connecting words such as: {', '.join(unused_advanced_words)}")
        
        # Calculate final score with validation
        total_score = sum(
            scores[criterion] * KS2_CRITERIA[criterion]['weight']
            for criterion in scores
        )
        
        if not 0 <= total_score <= 1:
            raise ValueError(f"Invalid total score calculated: {total_score}")
        
        meets_standard = total_score >= 0.7
        
        return {
            'meets_standard': meets_standard,
            'score': round(total_score * 100, 1),
            'improvements': improvements[:3],
            'detailed_scores': {k: round(v * 100, 1) for k, v in scores.items()},
            'stats': {
                'word_count': word_count,
                'sentence_count': len(sentences),
                'paragraph_count': len(paragraphs),
                'avg_words_per_sentence': round(avg_words_per_sentence, 1)
            }
        }
        
    except Exception as e:
        logging.error(f"Analysis error: {str(e)}")
        raise

# Enhanced HTML template with error handling and loading state
HTML_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
    <title>KS2 Writing Assessment Tool</title>
    <style>
        body { 
            font-family: Arial, sans-serif; 
            max-width: 800px; 
            margin: 0 auto; 
            padding: 20px;
        }
        .container { 
            background: #f5f5f5; 
            padding: 20px; 
            border-radius: 5px;
        }
        .result { 
            margin-top: 20px; 
            padding: 15px;
            background: #fff;
            border-radius: 5px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        .standard { 
            font-weight: bold; 
            color: #2c3e50;
        }
        .improvements { 
            margin-top: 10px;
        }
        .improvement-item { 
            color: #34495e; 
            margin: 5px 0;
            padding: 5px 0;
        }
        .error {
            color: #e74c3c;
            padding: 10px;
            background: #fadbd8;
            border-radius: 5px;
            margin: 10px 0;
        }
        .loading {
            display: none;
            margin: 20px 0;
        }
        .detailed-scores {
            margin-top: 20px;
            padding: 10px;
            background: #edf2f7;
            border-radius: 5px;
        }
        .stats {
            margin-top: 10px;
            font-size: 0.9em;
            color: #666;
        }
    </style>
    <script>
        function showLoading() {
            document.getElementById('loading').style.display = 'block';
            document.getElementById('result').style.display = 'none';
        }
        
        function validateFile() {
            const fileInput = document.getElementById('pdf-file');
            const file = fileInput.files[0];
            if (!file) {
                alert('Please select a file');
                return false;
            }
            if (!file.name.toLowerCase().endsWith('.pdf')) {
                alert('Please upload a PDF file');
                return false;
            }
            if (file.size > 16 * 1024 * 1024) {
                alert('File size must be less than 16MB');
                return false;
            }
            return true;
        }
    </script>
</head>
<body>
    <div class="container">
        <h1>KS2 Writing Assessment Tool</h1>
        
        <form action="/" method="post" enctype="multipart/form-data" onsubmit="return validateFile() && showLoading()">
            <input type="file" name="pdf" id="pdf-file" accept=".pdf" required>
            <button type="submit">Assess Writing</button>
        </form>
        
        <div id="loading" class="loading">
            Analyzing document... Please wait...
        </div>
        
        {% if error %}
        <div class="error">
            {{ error }}
        </div>
        {% endif %}
        
        {% if result %}
        <div id="result" class="result">
            <p class="standard">
                This writing is 
                {% if result.meets_standard %}
                    at the expected standard
                {% else %}
                    working towards the expected standard
                {% endif %}
                for Key Stage 2 (Overall Score: {{ result.score }}%)
            </p>
            
            <div class="detailed-scores">
                <h3>Detailed Scores:</h3>
                <ul>
                {% for criterion, score in result.detailed_scores.items() %}
                    <li>{{ criterion|title }}: {{ score }}%</li>
                {% endfor %}
                </ul>
            </div>
            
            <div class="stats">
                <h3>Text Statistics:</h3>
                <ul>
                {% for stat, value in result.stats.items() %}
                    <li>{{ stat|replace('_', ' ')|title }}: {{ value }}</li>
                {% endfor %}
                </ul>
            </div>
            
            {% if result.improvements %}
            <div class="improvements">
                <h3>Ways to improve:</h3>
                <ul>
                {% for improvement in result.improvements %}
                    <li class="improvement-item">{{ improvement }}</li>
                {% endfor %}
                </ul>
            </div>
            {% endif %}
        </div>
        {% endif %}
    </div>
</body>
</html>
"""

@app.route('/', methods=['GET', 'POST'])
def index():
    error = None
    result = None
    
    if request.method == 'POST':
        try:
            if 'pdf' not in request.files:
                raise ValueError('No file uploaded')
            
            pdf_file = request.files['pdf']
            if pdf_file.filename == '':
                raise ValueError('No file selected')
                
            if not allowed_file(pdf_file.filename):
                raise ValueError('Invalid file type. Please upload a PDF file')
            
            text = extract_text_from_pdf(pdf_file)
            result = analyze_text(text)
            
        except Exception as e:
            error = str(e)
            logging.error(f"Error processing request: {str(e)}")
    
    return render_template_string(HTML_TEMPLATE, result=result, error=error)

if __name__ == '__main__':
    app.run(debug=True)