#!/usr/bin/env python3
"""
自动上传会话记录到本地向量记忆
- 每天 12:00 和 00:00 运行
- 记录每个文件已上传的字节偏移量，支持增量上传
- 提取对话内容，分块存储
"""

import os
import sys
import json
from datetime import datetime
from pathlib import Path

# 导入本地记忆模块
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from local_memory import add_memory

SESSIONS_DIR = os.path.expanduser("~/.hermes/sessions")
STATE_FILE = os.path.expanduser("~/.hermes/scripts/.uploaded_sessions.json")

def load_state():
    """加载状态：{文件名: 已上传字节偏移量}"""
    if os.path.exists(STATE_FILE):
        with open(STATE_FILE, 'r') as f:
            return json.load(f)
    return {"offsets": {}, "last_run": None}

def save_state(state):
    """保存状态"""
    with open(STATE_FILE, 'w') as f:
        json.dump(state, f, indent=2, ensure_ascii=False)

def extract_new_messages(filepath, start_offset=0):
    """从指定偏移量开始提取新消息"""
    messages = []
    session_date = None
    
    with open(filepath, 'r', encoding='utf-8') as f:
        # 跳到上次的位置
        f.seek(start_offset)
        
        for line in f:
            try:
                msg = json.loads(line.strip())
                role = msg.get('role', '')
                
                # 提取日期
                if role == 'session_meta':
                    ts = msg.get('timestamp', '')
                    if ts:
                        session_date = ts[:10]
                    continue
                
                # 跳过工具调用
                if role in ('tool', 'tool_result'):
                    continue
                    
                content = msg.get('content', '')
                if not content or len(content) < 5:
                    continue
                    
                # 截断太长的内容
                if len(content) > 2000:
                    content = content[:2000] + "..."
                    
                messages.append(f"[{role}]: {content}")
            except:
                continue
        
        # 返回新的偏移量
        new_offset = f.tell()
    
    return messages, session_date, new_offset

def chunk_messages(messages, max_chunk_size=1500):
    """将消息分块"""
    chunks = []
    current_chunk = []
    current_size = 0
    
    for msg in messages:
        msg_size = len(msg)
        if current_size + msg_size > max_chunk_size and current_chunk:
            chunks.append("\n".join(current_chunk))
            current_chunk = []
            current_size = 0
        current_chunk.append(msg)
        current_size += msg_size + 1
    
    if current_chunk:
        chunks.append("\n".join(current_chunk))
    
    return chunks

def main():
    print(f"[{datetime.now()}] 开始自动上传会话...")
    
    state = load_state()
    offsets = state.get("offsets", {})
    
    # 获取所有会话文件
    session_files = sorted(Path(SESSIONS_DIR).glob("*.jsonl"))
    
    success_count = 0
    error_count = 0
    files_processed = 0
    
    for filepath in session_files:
        filename = filepath.name
        file_size = filepath.stat().st_size
        last_offset = offsets.get(filename, 0)
        
        # 如果文件没有新内容，跳过
        if last_offset >= file_size:
            continue
        
        files_processed += 1
        
        try:
            messages, session_date, new_offset = extract_new_messages(filepath, last_offset)
            
            if len(messages) < 2:  # 太少的新消息跳过，但更新偏移量
                offsets[filename] = new_offset
                continue
            
            chunks = chunk_messages(messages)
            
            for i, chunk in enumerate(chunks):
                prefix = f"[会话日期: {session_date or '未知'}] [文件: {filename}] [增量上传]\n"
                memory_content = prefix + chunk
                
                try:
                    add_memory(memory_content)
                    success_count += 1
                except Exception as e:
                    print(f"  添加记忆失败: {e}")
                    error_count += 1
            
            # 更新偏移量
            offsets[filename] = new_offset
            print(f"  ✓ {filename}: +{len(messages)} 条消息, {len(chunks)} 个分块 (offset: {last_offset} -> {new_offset})")
            
        except Exception as e:
            print(f"  ✗ {filename}: {e}")
            error_count += 1
    
    # 保存状态
    state["offsets"] = offsets
    state["last_run"] = datetime.now().isoformat()
    save_state(state)
    
    if files_processed == 0:
        print("没有新内容需要上传")
    else:
        print(f"\n完成！处理 {files_processed} 个文件, 成功: {success_count} 块, 错误: {error_count} 块")

if __name__ == "__main__":
    main()
