package task

import (
	"bytes"
	"compress/gzip"
	"consistent_sql/logger"
	"consistent_sql/model"
	"consistent_sql/protocol"
	"context"
	"crypto/md5"
	"encoding/hex"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"net"
	"net/http"
	"os"
	"path/filepath"
	"strings"
	"sync"
	"time"
)

// StreamTaskStatus 结构体用于跟踪流任务状态
type StreamTaskStatus struct {
	IsRunning      bool
	Progress       *model.StreamProgress
	LastCheckpoint *model.Checkpoint
	Error          error
	mu             sync.Mutex
}

// StreamTaskManager 结构体管理多个流任务
type StreamTaskManager struct {
	Tasks         map[string]context.CancelFunc
	Statuses      map[string]*StreamTaskStatus
	Mu            sync.Mutex
	CheckpointDir string
}

// NewStreamTaskManager 创建新的流任务管理器
func NewStreamTaskManager(checkpointDir string) *StreamTaskManager {
	// 确保检查点目录存在
	if checkpointDir != "" {
		os.MkdirAll(checkpointDir, 0755)
	}

	return &StreamTaskManager{
		Tasks:         make(map[string]context.CancelFunc),
		Statuses:      make(map[string]*StreamTaskStatus),
		CheckpointDir: checkpointDir,
	}
}

// StartStreamTask 启动一个流同步任务
func (stm *StreamTaskManager) StartStreamTask(rd model.RecvData, conn net.Conn) {
	// 验证GTID是否存在
	if rd.Gtid == "" {
		logger.Error("错误：未提供GTID，无法启动同步任务: %s", rd.JobId)
		protocol.SendError(conn, "1", "必须提供GTID才能启动同步任务", rd.JobId)
		return
	}

	stm.Mu.Lock()
	if _, exists := stm.Tasks[rd.JobId]; exists {
		logger.Info("流任务已存在: %s", rd.JobId)
		stm.Mu.Unlock()
		protocol.SendError(conn, "1", "任务ID "+rd.JobId+" 已存在", rd.JobId)
		return
	}

	logger.Info("创建流同步任务，ID=%s, 类型=%s, GTID=%s", rd.JobId, rd.DumpDbType, rd.Gtid)

	// 初始化进度
	progress := &model.StreamProgress{
		JobID:          rd.JobId,
		StartTime:      time.Now(),
		LastUpdateTime: time.Now(),
		Status:         "running",
		LastGtid:       rd.Gtid, // 初始化LastGtid
	}

	// 初始化状态
	stm.Statuses[rd.JobId] = &StreamTaskStatus{
		IsRunning: true,
		Progress:  progress,
		Error:     nil,
	}

	// 创建上下文以便后续取消任务
	ctx, cancel := context.WithCancel(context.Background())
	stm.Tasks[rd.JobId] = cancel
	stm.Mu.Unlock()

	// 响应客户端任务已创建
	resp := map[string]interface{}{
		"error_code": "0",
		"error_msg":  "",
		"jobId":      rd.JobId,
		"status":     "created",
		"gtid":       rd.Gtid, // 返回GTID信息
	}
	respJSON, _ := json.Marshal(resp)
	protocol.SendResponse(conn, protocol.MRspdumpjob, string(respJSON))

	// 如果启用了检查点且提供了路径，尝试加载检查点
	var checkpoint *model.Checkpoint
	if rd.CheckpointEnabled && rd.CheckpointPath != "" {
		checkpoint = stm.loadCheckpoint(rd.JobId, rd.CheckpointPath)
		if checkpoint != nil {
			stm.Statuses[rd.JobId].LastCheckpoint = checkpoint

			// 检查点中的GTID优先级高于请求中的GTID
			if checkpoint.Gtid != "" {
				stm.Statuses[rd.JobId].Progress.LastGtid = checkpoint.Gtid
				logger.Info("从检查点恢复GTID: %s", checkpoint.Gtid)
			}
		}
	}

	// 启动工作协程处理数据流任务
	go func() {
		defer func() {
			if r := recover(); r != nil {
				logger.Error("流任务 %s 发生严重错误: %v", rd.JobId, r)
				stm.updateTaskError(rd.JobId, fmt.Errorf("任务崩溃: %v", r))
			}
		}()

		// 根据数据库类型选择相应的处理函数
		var err error
		switch rd.DumpDbType {
		case "mongodb":
			err = stm.processMongoStream(ctx, rd, conn, checkpoint)
		case "tdengine":
			err = stm.processTDEngineStream(ctx, rd, conn, checkpoint)
		case "elasticsearch":
			err = stm.processESStream(ctx, rd, conn, checkpoint)
		default:
			err = fmt.Errorf("不支持的数据库类型: %s", rd.DumpDbType)
		}

		// 更新任务状态
		stm.Mu.Lock()
		defer stm.Mu.Unlock()

		status, exists := stm.Statuses[rd.JobId]
		if !exists {
			return
		}

		if err != nil {
			status.Error = err
			status.Progress.Status = "failed"
			status.Progress.ErrorMsg = err.Error()
			logger.Error("流任务 %s 失败: %v", rd.JobId, err)
		} else if ctx.Err() == context.Canceled {
			status.Progress.Status = "canceled"
			logger.Info("流任务 %s 已取消", rd.JobId)
		} else {
			status.Progress.Status = "completed"
			status.Progress.LastUpdateTime = time.Now()
			logger.Info("流任务 %s 已完成", rd.JobId)
		}
		status.IsRunning = false
	}()
}

// processMongoStream 处理MongoDB流式同步
func (stm *StreamTaskManager) processMongoStream(ctx context.Context, rd model.RecvData, conn net.Conn, checkpoint *model.Checkpoint) error {
	// 这里实现MongoDB的流处理逻辑
	logger.Info("开始MongoDB流式同步: %s，使用GTID: %s", rd.JobId, rd.Gtid)

	// 初始化基本变量
	var processedChunks int64 = 0
	var totalBytes int64 = 0
	var lastGtid string = rd.Gtid // 初始GTID，用于断点续传
	lastProgressUpdate := time.Now()

	// 如果有检查点，从检查点恢复状态
	if checkpoint != nil {
		processedChunks = checkpoint.LastSequenceNum
		totalBytes = checkpoint.BytesProcessed

		// 检查点中的GTID优先级高于请求中的GTID
		if checkpoint.Gtid != "" {
			lastGtid = checkpoint.Gtid
			logger.Info("从检查点恢复GTID进行断点续传: %s", lastGtid)
		}

		var metadata map[string]interface{}
		if checkpoint.Metadata != "" {
			err := json.Unmarshal([]byte(checkpoint.Metadata), &metadata)
			if err != nil {
				logger.Warn("解析检查点元数据失败: %v", err)
			}
		}
	}

	// 创建错误和结果通道
	errorChan := make(chan error, 1)
	resultChan := make(chan bool, 1)

	// 创建处理器管理并行流
	processor := stm.createStreamProcessor(rd.ParallelStreams)

	// 启动MongoDB监听和处理协程
	go func() {
		defer func() {
			if r := recover(); r != nil {
				logger.Error("MongoDB流处理崩溃: %v", r)
				errorChan <- fmt.Errorf("MongoDB流处理崩溃: %v", r)
			}
		}()

		// 初始化数据处理计数
		processedCount := 0

		// 启动处理器
		processor.start(ctx, func(chunk model.DataChunk) (int64, error) {
			// 处理数据块
			return int64(len(chunk.Data)), nil
		})

		// 模拟MongoDB变更流监听循环
		ticker := time.NewTicker(1 * time.Second)
		defer ticker.Stop()

		for {
			select {
			case <-ctx.Done():
				resultChan <- true
				return
			case <-ticker.C:
				// 模拟从MongoDB获取变更数据
				var mongoData []map[string]interface{}

				// 模拟数据创建
				for i := 0; i < 10; i++ {
					processedCount++

					// 创建一个唯一的GTID，格式为任务ID-序列号
					currentGtid := fmt.Sprintf("%s-%d", rd.JobId, processedCount)

					// 构建变更数据
					document := map[string]interface{}{
						"_id":        fmt.Sprintf("doc_%d", processedCount),
						"data":       fmt.Sprintf("示例数据 %d", processedCount),
						"timestamp":  time.Now().UnixNano() / int64(time.Millisecond),
						"operation":  "insert",
						"database":   rd.DbDatabase,
						"collection": rd.DbTable,
						"gtid":       currentGtid,
					}

					mongoData = append(mongoData, document)

					// 更新当前GTID
					if currentGtid > lastGtid {
						lastGtid = currentGtid
					}
				}

				// 将数据打包成块
				if len(mongoData) > 0 {
					dataBytes, err := json.Marshal(mongoData)
					if err != nil {
						errorChan <- fmt.Errorf("序列化MongoDB数据失败: %v", err)
						return
					}

					// 数据压缩（如果启用）
					if rd.EnableCompression {
						compressedBytes, err := compressData(dataBytes, rd.CompressionLevel)
						if err != nil {
							errorChan <- fmt.Errorf("压缩数据失败: %v", err)
							return
						}
						dataBytes = compressedBytes
					}

					// 创建数据块
					chunk := model.DataChunk{
						ChunkID:     generateChunkID(rd.JobId, processedChunks, dataBytes),
						SequenceNum: processedChunks,
						Data:        dataBytes,
						Compressed:  rd.EnableCompression,
						CreateTime:  time.Now(),
						Size:        int64(len(dataBytes)),
						JobID:       rd.JobId,
						Gtid:        lastGtid, // 设置数据块的GTID
					}

					// 发送数据到客户端
					dataJSON, _ := json.Marshal(chunk)
					err = protocol.SendResponse(conn, protocol.MRsprowmsg, string(dataJSON))
					if err != nil {
						errorChan <- fmt.Errorf("发送数据到客户端失败: %v", err)
						return
					}

					// 更新进度
					processedChunks++
					totalBytes += int64(len(dataBytes))

					// 定期更新进度和检查点
					if time.Since(lastProgressUpdate) > 5*time.Second {
						stm.updateProgress(rd.JobId, processedChunks, -1, totalBytes, lastGtid)
						lastProgressUpdate = time.Now()

						// 更新检查点
						if rd.CheckpointEnabled {
							metadata := map[string]interface{}{
								"processed_count": processedCount,
								"last_timestamp":  time.Now().Unix(),
							}
							metadataBytes, _ := json.Marshal(metadata)

							checkpoint := &model.Checkpoint{
								JobID:           rd.JobId,
								LastSequenceNum: processedChunks,
								LastUpdateTime:  time.Now(),
								BytesProcessed:  totalBytes,
								Metadata:        string(metadataBytes),
								Gtid:            lastGtid, // 保存最新的GTID
							}

							stm.saveCheckpoint(checkpoint, rd.CheckpointPath)
						}
					}
				}

				// 模拟：每处理100个文档后完成任务
				if processedCount >= 100 {
					resultChan <- true
					return
				}
			}
		}
	}()

	// 等待处理完成或出错
	select {
	case <-ctx.Done():
		return ctx.Err()
	case err := <-errorChan:
		return err
	case <-resultChan:
		// 更新最终进度
		stm.updateProgress(rd.JobId, processedChunks, -1, totalBytes, lastGtid)

		// 正常完成，记录日志
		logger.Info("MongoDB流任务 %s 完成，共处理 %d 块数据", rd.JobId, processedChunks)
	}

	return nil
}

// processTDEngineStream 处理TDEngine流式同步
func (stm *StreamTaskManager) processTDEngineStream(ctx context.Context, rd model.RecvData, conn net.Conn, checkpoint *model.Checkpoint) error {
	logger.Info("开始TDEngine流式同步: %s，使用GTID: %s", rd.JobId, rd.Gtid)

	// 初始化基本变量
	var processedChunks int64 = 0
	var totalBytes int64 = 0
	var lastGtid string = rd.Gtid
	var lastTS int64 = 0 // TDEngine使用时间戳作为位置标记
	lastProgressUpdate := time.Now()

	// 如果有检查点，从检查点恢复状态
	if checkpoint != nil {
		processedChunks = checkpoint.LastSequenceNum
		totalBytes = checkpoint.BytesProcessed

		// 检查点中的GTID优先级高于请求中的GTID
		if checkpoint.Gtid != "" {
			lastGtid = checkpoint.Gtid
			logger.Info("从检查点恢复GTID进行断点续传: %s", lastGtid)
		}

		// 解析元数据获取最后时间戳
		var metadata map[string]interface{}
		if checkpoint.Metadata != "" {
			err := json.Unmarshal([]byte(checkpoint.Metadata), &metadata)
			if err != nil {
				logger.Warn("解析检查点元数据失败: %v", err)
			} else if ts, ok := metadata["last_timestamp"].(float64); ok {
				lastTS = int64(ts)
				logger.Info("从检查点恢复时间戳: %d", lastTS)
			}
		}
	}

	// 创建错误和结果通道
	errorChan := make(chan error, 1)
	resultChan := make(chan bool, 1)

	// 创建处理器管理并行流
	processor := stm.createStreamProcessor(rd.ParallelStreams)

	// 启动TDEngine数据处理协程
	go func() {
		defer func() {
			if r := recover(); r != nil {
				logger.Error("TDEngine流处理崩溃: %v", r)
				errorChan <- fmt.Errorf("TDEngine流处理崩溃: %v", r)
			}
		}()

		// 初始化数据处理计数
		processedCount := 0

		// 启动处理器
		processor.start(ctx, func(chunk model.DataChunk) (int64, error) {
			// 处理数据块
			return int64(len(chunk.Data)), nil
		})

		// 模拟TDEngine数据订阅循环
		ticker := time.NewTicker(2 * time.Second)
		defer ticker.Stop()

		for {
			select {
			case <-ctx.Done():
				resultChan <- true
				return
			case <-ticker.C:
				// 模拟从TDEngine获取时序数据
				var tdData []model.TDSourceDBData
				currentTime := time.Now().UnixNano() / int64(time.Millisecond)

				// 模拟数据生成，每次生成5条记录
				for i := 0; i < 5; i++ {
					processedCount++

					// 每条记录的时间戳递增
					recordTS := currentTime + int64(i*1000) // 每条记录间隔1秒
					if recordTS <= lastTS {
						recordTS = lastTS + 1 // 确保时间戳递增
					}

					// 创建一个唯一的GTID，格式为任务ID-时间戳
					currentGtid := fmt.Sprintf("%s-%d", rd.JobId, recordTS)

					// 构建时序数据
					data := map[string]interface{}{
						"value":       float64(100 + processedCount),
						"device_id":   fmt.Sprintf("device_%d", processedCount%10),
						"description": fmt.Sprintf("测量值 %d", processedCount),
						"gtid":        currentGtid,
					}

					// 创建TDEngine数据记录
					tdRecord := model.TDSourceDBData{
						Ts:     recordTS,
						Data:   data,
						Optype: "insert", // TDEngine主要是插入操作
					}

					tdData = append(tdData, tdRecord)

					// 更新最后处理的时间戳
					if recordTS > lastTS {
						lastTS = recordTS
					}

					// 更新当前GTID
					if currentGtid > lastGtid {
						lastGtid = currentGtid
					}
				}

				// 将数据打包成块
				if len(tdData) > 0 {
					dataBytes, err := json.Marshal(tdData)
					if err != nil {
						errorChan <- fmt.Errorf("序列化TDEngine数据失败: %v", err)
						return
					}

					// 数据压缩（如果启用）
					if rd.EnableCompression {
						compressedBytes, err := compressData(dataBytes, rd.CompressionLevel)
						if err != nil {
							errorChan <- fmt.Errorf("压缩数据失败: %v", err)
							return
						}
						dataBytes = compressedBytes
					}

					// 创建数据块
					chunk := model.DataChunk{
						ChunkID:     generateChunkID(rd.JobId, processedChunks, dataBytes),
						SequenceNum: processedChunks,
						Data:        dataBytes,
						Compressed:  rd.EnableCompression,
						CreateTime:  time.Now(),
						Size:        int64(len(dataBytes)),
						JobID:       rd.JobId,
						Gtid:        lastGtid, // 设置数据块的GTID
					}

					// 发送数据到客户端
					dataJSON, _ := json.Marshal(chunk)
					err = protocol.SendResponse(conn, protocol.MRsprowmsg, string(dataJSON))
					if err != nil {
						errorChan <- fmt.Errorf("发送数据到客户端失败: %v", err)
						return
					}

					// 更新进度
					processedChunks++
					totalBytes += int64(len(dataBytes))

					// 定期更新进度和检查点
					if time.Since(lastProgressUpdate) > 5*time.Second {
						stm.updateProgress(rd.JobId, processedChunks, -1, totalBytes, lastGtid)
						lastProgressUpdate = time.Now()

						// 更新检查点
						if rd.CheckpointEnabled {
							metadata := map[string]interface{}{
								"last_timestamp": lastTS,
								"table_name":     rd.DbTable,
								"record_count":   processedCount,
							}
							metadataBytes, _ := json.Marshal(metadata)

							checkpoint := &model.Checkpoint{
								JobID:           rd.JobId,
								LastSequenceNum: processedChunks,
								LastUpdateTime:  time.Now(),
								BytesProcessed:  totalBytes,
								Metadata:        string(metadataBytes),
								Gtid:            lastGtid, // 保存最新的GTID
							}

							stm.saveCheckpoint(checkpoint, rd.CheckpointPath)
						}
					}

					// 输出处理进度日志
					if processedChunks%10 == 0 {
						logger.Info("TDEngine任务 %s 已处理 %d 块数据, 最新时间戳: %d", rd.JobId, processedChunks, lastTS)
					}
				}

				// 模拟：每处理50个记录后完成任务
				if processedCount >= 50 {
					resultChan <- true
					return
				}
			}
		}
	}()

	// 等待处理完成或出错
	select {
	case <-ctx.Done():
		return ctx.Err()
	case err := <-errorChan:
		return err
	case <-resultChan:
		// 更新最终进度
		stm.updateProgress(rd.JobId, processedChunks, -1, totalBytes, lastGtid)

		// 正常完成，记录日志
		logger.Info("TDEngine流任务 %s 完成，共处理 %d 块数据", rd.JobId, processedChunks)
	}

	return nil
}

// processESStream 处理Elasticsearch流式同步
func (stm *StreamTaskManager) processESStream(ctx context.Context, rd model.RecvData, conn net.Conn, checkpoint *model.Checkpoint) error {
	logger.Info("开始Elasticsearch流式同步: %s，使用GTID: %s", rd.JobId, rd.Gtid)

	// 设置ES客户端
	client := &http.Client{
		Timeout: time.Duration(300) * time.Second,
	}

	// 确定初始scroll_id和时间戳
	var scrollID string
	var lastTimestamp int64 = 0
	var lastGtid string = rd.Gtid // 默认使用请求中的GTID

	// 如果有检查点，从检查点恢复
	if checkpoint != nil {
		var metadata map[string]interface{}
		err := json.Unmarshal([]byte(checkpoint.Metadata), &metadata)
		if err == nil {
			if sid, ok := metadata["scroll_id"].(string); ok {
				scrollID = sid
			}
			if ts, ok := metadata["last_timestamp"].(float64); ok {
				lastTimestamp = int64(ts)
			}
			// 从检查点恢复GTID
			if checkpoint.Gtid != "" {
				lastGtid = checkpoint.Gtid
				logger.Info("使用检查点的GTID进行断点续传: %s", lastGtid)
			}
		}
	}

	// 如果没有scroll ID，开始一个新的scroll请求
	if scrollID == "" {
		// 构建初始查询
		var esQuery string
		if rd.ESQuery == "" {
			// 默认查询所有文档，按时间排序，并使用GTID过滤
			if rd.ESTimeField != "" {
				// 构建查询，使用时间字段和GTID作为过滤条件
				gtidQuery := map[string]interface{}{
					"query": map[string]interface{}{
						"bool": map[string]interface{}{
							"must": []map[string]interface{}{
								{
									"range": map[string]interface{}{
										rd.ESTimeField: map[string]interface{}{
											"gt": lastTimestamp,
										},
									},
								},
								{
									"range": map[string]interface{}{
										"gtid": map[string]interface{}{
											"gte": lastGtid,
										},
									},
								},
							},
						},
					},
					"sort": []map[string]interface{}{
						{
							rd.ESTimeField: map[string]interface{}{
								"order": "asc",
							},
						},
						{
							"gtid": map[string]interface{}{
								"order": "asc",
							},
						},
					},
				}
				esQueryBytes, _ := json.Marshal(gtidQuery)
				esQuery = string(esQueryBytes)
			} else {
				// 简单查询，无时间字段过滤
				esQuery = `{"query": {"match_all": {}}}`
			}
		} else {
			esQuery = rd.ESQuery
		}

		// 使用传入的scrollTime和batchSize
		scrollTime := rd.ESScrollTime
		batchSize := rd.ESBatchSize

		logger.Debug("ES查询: %s, ScrollTime: %s, BatchSize: %d", esQuery, scrollTime, batchSize)

		// 初始化scroll请求
		url := fmt.Sprintf("%s/%s/_search?scroll=%s&size=%d", rd.URL, rd.ESIndex, scrollTime, batchSize)

		req, err := http.NewRequest("POST", url, strings.NewReader(esQuery))
		if err != nil {
			return fmt.Errorf("创建初始scroll请求失败: %v", err)
		}

		req.SetBasicAuth(rd.DbUser, rd.DbPasswd)
		req.Header.Set("Content-Type", "application/json")

		resp, err := client.Do(req)
		if err != nil {
			return fmt.Errorf("执行初始scroll请求失败: %v", err)
		}
		defer resp.Body.Close()

		if resp.StatusCode != http.StatusOK {
			bodyBytes, _ := ioutil.ReadAll(resp.Body)
			return fmt.Errorf("ES返回错误: %s - %s", resp.Status, string(bodyBytes))
		}

		// 解析响应
		var result map[string]interface{}
		if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
			return fmt.Errorf("解析ES响应失败: %v", err)
		}

		scrollID = result["_scroll_id"].(string)

		// 不再使用并行处理器，因此注释掉
		// processor := stm.createStreamProcessor(rd.ParallelStreams)

		// 创建一个通道用于接收处理结果
		resultChan := make(chan bool)
		errorChan := make(chan error, 1)

		// 启动处理器
		go func() {
			defer close(resultChan)

			// 初始化统计信息
			var processedChunks int64 = 0
			var totalBytes int64 = 0
			var lastProgressUpdate = time.Now()

			// 首先处理第一批数据
			if hits, ok := result["hits"].(map[string]interface{}); ok {
				if hitsList, ok := hits["hits"].([]interface{}); ok {
					// 没有更多数据时退出
					if len(hitsList) == 0 {
						return
					}

					// 预处理原始数据
					var dataChunk []model.ESSourceDBData
					var currentGtid string = lastGtid // 最新处理的GTID

					for _, hit := range hitsList {
						hitMap := hit.(map[string]interface{})

						// 提取时间戳
						var timestamp int64 = 0
						var docGtid string = "" // 文档的GTID

						if rd.ESTimeField != "" {
							source := hitMap["_source"].(map[string]interface{})
							if ts, ok := source[rd.ESTimeField]; ok {
								switch v := ts.(type) {
								case float64:
									timestamp = int64(v)
								case string:
									// 尝试解析ISO8601时间字符串
									if t, err := time.Parse(time.RFC3339, v); err == nil {
										timestamp = t.UnixNano() / int64(time.Millisecond)
									}
								}
							}

							// 提取文档的GTID
							if gtid, ok := source["gtid"]; ok {
								if gtidStr, ok := gtid.(string); ok {
									docGtid = gtidStr
									// 跟踪当前最大GTID
									if docGtid > currentGtid {
										currentGtid = docGtid
									}
								}
							}
						}

						// 更新最后处理的时间戳
						if timestamp > lastTimestamp {
							lastTimestamp = timestamp
						}

						// 创建数据结构
						esData := model.ESSourceDBData{
							ID:        hitMap["_id"].(string),
							Index:     hitMap["_index"].(string),
							Data:      hitMap["_source"],
							Timestamp: timestamp,
							Optype:    "index", // 默认为索引操作
							Gtid:      docGtid, // 设置文档GTID
						}

						dataChunk = append(dataChunk, esData)
					}

					// 更新最新处理的GTID
					stm.Mu.Lock()
					if status, exists := stm.Statuses[rd.JobId]; exists {
						status.Progress.LastGtid = currentGtid
					}
					stm.Mu.Unlock()

					// 序列化数据
					dataBytes, err := json.Marshal(dataChunk)
					if err != nil {
						errorChan <- fmt.Errorf("序列化ES数据失败: %v", err)
						return
					}

					// 数据压缩（如果启用）
					if rd.EnableCompression {
						compressedBytes, err := compressData(dataBytes, rd.CompressionLevel)
						if err != nil {
							errorChan <- fmt.Errorf("压缩数据失败: %v", err)
							return
						}
						dataBytes = compressedBytes
					}

					// 创建数据块
					chunk := model.DataChunk{
						ChunkID:     generateChunkID(rd.JobId, processedChunks, dataBytes),
						SequenceNum: processedChunks,
						Data:        dataBytes,
						Compressed:  rd.EnableCompression,
						CreateTime:  time.Now(),
						Size:        int64(len(dataBytes)),
						JobID:       rd.JobId,
						Gtid:        currentGtid, // 设置块的GTID
					}

					// 发送数据到客户端
					dataJSON, _ := json.Marshal(chunk)
					err = protocol.SendResponse(conn, protocol.MRsprowmsg, string(dataJSON))
					if err != nil {
						errorChan <- fmt.Errorf("发送数据到客户端失败: %v", err)
						return
					}

					// 更新进度
					processedChunks++
					totalBytes += int64(len(dataBytes))

					if time.Since(lastProgressUpdate) > 5*time.Second {
						stm.updateProgress(rd.JobId, processedChunks, -1, totalBytes, currentGtid)
						lastProgressUpdate = time.Now()

						// 更新检查点
						if rd.CheckpointEnabled {
							metadata := map[string]interface{}{
								"scroll_id":      scrollID,
								"last_timestamp": lastTimestamp,
							}
							metadataBytes, _ := json.Marshal(metadata)

							checkpoint := &model.Checkpoint{
								JobID:           rd.JobId,
								LastSequenceNum: processedChunks,
								LastUpdateTime:  time.Now(),
								BytesProcessed:  totalBytes,
								Metadata:        string(metadataBytes),
								Gtid:            currentGtid, // 保存最新的GTID
							}

							stm.saveCheckpoint(checkpoint, rd.CheckpointPath)
						}
					}
				}
			}

			// 继续处理后续数据
			for {
				select {
				case <-ctx.Done():
					return
				default:
					// 发送scroll请求获取下一批数据
					scrollUrl := fmt.Sprintf("%s/_search/scroll", rd.URL)
					scrollReq := map[string]interface{}{
						"scroll":    rd.ESScrollTime,
						"scroll_id": scrollID,
					}

					scrollReqBytes, _ := json.Marshal(scrollReq)
					req, err := http.NewRequest("POST", scrollUrl, bytes.NewReader(scrollReqBytes))
					if err != nil {
						errorChan <- fmt.Errorf("创建scroll请求失败: %v", err)
						return
					}

					req.SetBasicAuth(rd.DbUser, rd.DbPasswd)
					req.Header.Set("Content-Type", "application/json")

					resp, err := client.Do(req)
					if err != nil {
						errorChan <- fmt.Errorf("执行scroll请求失败: %v", err)
						return
					}

					if resp.StatusCode != http.StatusOK {
						bodyBytes, _ := ioutil.ReadAll(resp.Body)
						resp.Body.Close()
						errorChan <- fmt.Errorf("ES返回错误: %s - %s", resp.Status, string(bodyBytes))
						return
					}

					// 解析响应
					var result map[string]interface{}
					if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
						resp.Body.Close()
						errorChan <- fmt.Errorf("解析ES响应失败: %v", err)
						return
					}
					resp.Body.Close()

					// 获取新的scroll_id
					if newScrollID, ok := result["_scroll_id"].(string); ok {
						scrollID = newScrollID
					}

					// 处理hits
					if hits, ok := result["hits"].(map[string]interface{}); ok {
						if hitsList, ok := hits["hits"].([]interface{}); ok {
							// 没有更多数据时退出
							if len(hitsList) == 0 {
								resultChan <- true
								return
							}

							// 预处理原始数据
							var dataChunk []model.ESSourceDBData
							var currentGtid string = lastGtid // 最新处理的GTID

							for _, hit := range hitsList {
								hitMap := hit.(map[string]interface{})

								// 提取时间戳
								var timestamp int64 = 0
								var docGtid string = "" // 文档的GTID

								if rd.ESTimeField != "" {
									source := hitMap["_source"].(map[string]interface{})
									if ts, ok := source[rd.ESTimeField]; ok {
										switch v := ts.(type) {
										case float64:
											timestamp = int64(v)
										case string:
											// 尝试解析ISO8601时间字符串
											if t, err := time.Parse(time.RFC3339, v); err == nil {
												timestamp = t.UnixNano() / int64(time.Millisecond)
											}
										}
									}

									// 提取文档的GTID
									if gtid, ok := source["gtid"]; ok {
										if gtidStr, ok := gtid.(string); ok {
											docGtid = gtidStr
											// 跟踪当前最大GTID
											if docGtid > currentGtid {
												currentGtid = docGtid
											}
										}
									}
								}

								// 更新最后处理的时间戳
								if timestamp > lastTimestamp {
									lastTimestamp = timestamp
								}

								// 创建数据结构
								esData := model.ESSourceDBData{
									ID:        hitMap["_id"].(string),
									Index:     hitMap["_index"].(string),
									Data:      hitMap["_source"],
									Timestamp: timestamp,
									Optype:    "index", // 默认为索引操作
									Gtid:      docGtid, // 设置文档GTID
								}

								dataChunk = append(dataChunk, esData)
							}

							// 更新最新处理的GTID
							stm.Mu.Lock()
							if status, exists := stm.Statuses[rd.JobId]; exists {
								status.Progress.LastGtid = currentGtid
							}
							stm.Mu.Unlock()

							// 序列化数据
							dataBytes, err := json.Marshal(dataChunk)
							if err != nil {
								errorChan <- fmt.Errorf("序列化ES数据失败: %v", err)
								return
							}

							// 数据压缩（如果启用）
							if rd.EnableCompression {
								compressedBytes, err := compressData(dataBytes, rd.CompressionLevel)
								if err != nil {
									errorChan <- fmt.Errorf("压缩数据失败: %v", err)
									return
								}
								dataBytes = compressedBytes
							}

							// 创建数据块
							chunk := model.DataChunk{
								ChunkID:     generateChunkID(rd.JobId, processedChunks, dataBytes),
								SequenceNum: processedChunks,
								Data:        dataBytes,
								Compressed:  rd.EnableCompression,
								CreateTime:  time.Now(),
								Size:        int64(len(dataBytes)),
								JobID:       rd.JobId,
								Gtid:        currentGtid, // 设置块的GTID
							}

							// 发送数据到客户端
							dataJSON, _ := json.Marshal(chunk)
							err = protocol.SendResponse(conn, protocol.MRsprowmsg, string(dataJSON))
							if err != nil {
								errorChan <- fmt.Errorf("发送数据到客户端失败: %v", err)
								return
							}

							// 更新进度
							processedChunks++
							totalBytes += int64(len(dataBytes))

							if time.Since(lastProgressUpdate) > 5*time.Second {
								stm.updateProgress(rd.JobId, processedChunks, -1, totalBytes, currentGtid)
								lastProgressUpdate = time.Now()

								// 更新检查点
								if rd.CheckpointEnabled {
									metadata := map[string]interface{}{
										"scroll_id":      scrollID,
										"last_timestamp": lastTimestamp,
									}
									metadataBytes, _ := json.Marshal(metadata)

									checkpoint := &model.Checkpoint{
										JobID:           rd.JobId,
										LastSequenceNum: processedChunks,
										LastUpdateTime:  time.Now(),
										BytesProcessed:  totalBytes,
										Metadata:        string(metadataBytes),
										Gtid:            currentGtid, // 保存最新的GTID
									}

									stm.saveCheckpoint(checkpoint, rd.CheckpointPath)
								}
							}
						}
					}
				}
			}
		}()

		// 等待处理完成或出错
		select {
		case <-ctx.Done():
			return ctx.Err()
		case err := <-errorChan:
			return err
		case <-resultChan:
			// 正常完成
			logger.Info("ES流任务 %s 完成，共处理 %d 块数据", rd.JobId, stm.Statuses[rd.JobId].Progress.ProcessedChunks)
		}
	}

	return nil
}

// GetTaskStatus 获取任务状态
func (stm *StreamTaskManager) GetTaskStatus(taskName string) (*StreamTaskStatus, bool) {
	stm.Mu.Lock()
	defer stm.Mu.Unlock()
	status, exists := stm.Statuses[taskName]
	return status, exists
}

// 更新任务错误状态
func (stm *StreamTaskManager) updateTaskError(jobID string, err error) {
	stm.Mu.Lock()
	defer stm.Mu.Unlock()

	status, exists := stm.Statuses[jobID]
	if !exists {
		return
	}

	status.Error = err
	status.IsRunning = false
	status.Progress.Status = "failed"
	status.Progress.ErrorMsg = err.Error()
	status.Progress.LastUpdateTime = time.Now()
}

// StopTask 停止任务
func (stm *StreamTaskManager) StopTask(taskName string) bool {
	stm.Mu.Lock()
	defer stm.Mu.Unlock()

	if cancel, exists := stm.Tasks[taskName]; exists {
		cancel()
		delete(stm.Tasks, taskName)

		status, statusExists := stm.Statuses[taskName]
		if statusExists {
			status.IsRunning = false
			status.Progress.Status = "stopped"
			status.Progress.LastUpdateTime = time.Now()
		}

		logger.Info("流任务已停止: %s", taskName)
		return true
	}
	logger.Warn("流任务不存在: %s", taskName)
	return false
}

// RemoveTask 移除任务
func (stm *StreamTaskManager) RemoveTask(taskName string) bool {
	result := stm.StopTask(taskName)

	if result {
		stm.Mu.Lock()
		defer stm.Mu.Unlock()
		delete(stm.Statuses, taskName)
		logger.Info("流任务已移除: %s", taskName)
	}

	return result
}

// StopAllTasks 停止所有任务
func (stm *StreamTaskManager) StopAllTasks() {
	stm.Mu.Lock()
	defer stm.Mu.Unlock()

	for taskName, cancel := range stm.Tasks {
		cancel()

		status, exists := stm.Statuses[taskName]
		if exists {
			status.IsRunning = false
			status.Progress.Status = "stopped"
			status.Progress.LastUpdateTime = time.Now()
		}
	}

	stm.Tasks = make(map[string]context.CancelFunc)
	logger.Info("所有流任务已停止")
}

// 保存检查点
func (stm *StreamTaskManager) saveCheckpoint(checkpoint *model.Checkpoint, checkpointPath string) error {
	if checkpointPath == "" {
		checkpointPath = filepath.Join(stm.CheckpointDir, checkpoint.JobID+".checkpoint")
	}

	// 创建临时文件
	tempFile := checkpointPath + ".tmp"
	data, err := json.Marshal(checkpoint)
	if err != nil {
		return fmt.Errorf("无法序列化检查点: %v", err)
	}

	// 写入临时文件
	err = ioutil.WriteFile(tempFile, data, 0644)
	if err != nil {
		return fmt.Errorf("无法写入检查点文件: %v", err)
	}

	// 原子重命名，确保一致性
	err = os.Rename(tempFile, checkpointPath)
	if err != nil {
		return fmt.Errorf("无法重命名检查点文件: %v", err)
	}

	return nil
}

// 加载检查点
func (stm *StreamTaskManager) loadCheckpoint(jobID, checkpointPath string) *model.Checkpoint {
	if checkpointPath == "" {
		checkpointPath = filepath.Join(stm.CheckpointDir, jobID+".checkpoint")
	}

	// 检查文件是否存在
	data, err := ioutil.ReadFile(checkpointPath)
	if err != nil {
		logger.Error("无法读取检查点文件 %s: %v", checkpointPath, err)
		return nil
	}

	// 解析JSON
	var checkpoint model.Checkpoint
	err = json.Unmarshal(data, &checkpoint)
	if err != nil {
		logger.Error("无法解析检查点文件 %s: %v", checkpointPath, err)
		return nil
	}

	return &checkpoint
}

// 数据压缩
func compressData(data []byte, level int) ([]byte, error) {
	var b bytes.Buffer
	w, err := gzip.NewWriterLevel(&b, level)
	if err != nil {
		return nil, err
	}

	_, err = w.Write(data)
	if err != nil {
		return nil, err
	}

	err = w.Close()
	if err != nil {
		return nil, err
	}

	return b.Bytes(), nil
}

// 数据解压
func decompressData(data []byte) ([]byte, error) {
	r, err := gzip.NewReader(bytes.NewReader(data))
	if err != nil {
		return nil, err
	}
	defer r.Close()

	return ioutil.ReadAll(r)
}

// 生成块ID
func generateChunkID(jobID string, sequenceNum int64, data []byte) string {
	h := md5.New()
	h.Write([]byte(jobID))
	h.Write([]byte(fmt.Sprintf("%d", sequenceNum)))
	h.Write(data[:min(len(data), 1024)]) // 使用部分数据作为哈希输入

	return hex.EncodeToString(h.Sum(nil))
}

// min函数(Go 1.14+不再需要自定义)
func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

// 更新进度
func (stm *StreamTaskManager) updateProgress(jobID string, processedChunks, totalChunks, bytesProcessed int64, lastGtid string) {
	stm.Mu.Lock()
	defer stm.Mu.Unlock()

	status, exists := stm.Statuses[jobID]
	if !exists {
		return
	}

	status.mu.Lock()
	defer status.mu.Unlock()

	status.Progress.ProcessedChunks = processedChunks
	status.Progress.TotalChunks = totalChunks
	status.Progress.BytesProcessed = bytesProcessed
	status.Progress.LastUpdateTime = time.Now()
	status.Progress.LastGtid = lastGtid
}

// 创建并发数据流处理器
func (stm *StreamTaskManager) createStreamProcessor(parallelStreams int) *streamProcessor {
	return &streamProcessor{
		workerCount: parallelStreams,
		jobChan:     make(chan streamJob),
		resultChan:  make(chan streamResult),
		errorChan:   make(chan error),
		doneChan:    make(chan struct{}),
	}
}

// 流处理器
type streamProcessor struct {
	workerCount int
	jobChan     chan streamJob
	resultChan  chan streamResult
	errorChan   chan error
	doneChan    chan struct{}
	wg          sync.WaitGroup
}

// 流作业
type streamJob struct {
	chunk   model.DataChunk
	jobID   string
	taskNum int
}

// 流处理结果
type streamResult struct {
	jobID          string
	chunkID        string
	taskNum        int
	bytesProcessed int64
}

// 启动流处理器
func (sp *streamProcessor) start(ctx context.Context, processFunc func(model.DataChunk) (int64, error)) {
	// 启动工作协程
	for i := 0; i < sp.workerCount; i++ {
		sp.wg.Add(1)
		go func(workerID int) {
			defer sp.wg.Done()

			for {
				select {
				case <-ctx.Done():
					return
				case job, ok := <-sp.jobChan:
					if !ok {
						return
					}

					bytesProcessed, err := processFunc(job.chunk)
					if err != nil {
						select {
						case sp.errorChan <- err:
						default:
							// 避免阻塞
						}
						continue
					}

					select {
					case sp.resultChan <- streamResult{
						jobID:          job.jobID,
						chunkID:        job.chunk.ChunkID,
						taskNum:        job.taskNum,
						bytesProcessed: bytesProcessed,
					}:
					case <-ctx.Done():
						return
					}
				}
			}
		}(i)
	}

	// 启动完成监视协程
	go func() {
		sp.wg.Wait()
		close(sp.doneChan)
	}()
}

// 提交作业
func (sp *streamProcessor) submitJob(job streamJob) bool {
	select {
	case sp.jobChan <- job:
		return true
	case <-sp.doneChan:
		return false
	}
}

// 关闭处理器
func (sp *streamProcessor) shutdown() {
	close(sp.jobChan)
	<-sp.doneChan
}
