import configparser
import os
import psycopg2
from elasticsearch import Elasticsearch
import json
import warnings
from elasticsearch import ElasticsearchWarning
import logging

from psycopg2._json import Json

# 忽略 Elasticsearch 警告
warnings.filterwarnings("ignore", category=ElasticsearchWarning)

# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# 读取配置文件
config = configparser.ConfigParser()
script_directory = os.path.dirname(os.path.realpath(__file__))
config.read(os.path.join(script_directory, 'config.ini'))

# 配置 Elasticsearch 和 PostgreSQL
es_host = config['Elasticsearch']['host']
es = Elasticsearch(es_host)

pg_dbname = config['PostgreSQL']['dbname']
pg_user = config['PostgreSQL']['user']
pg_password = config['PostgreSQL']['password']
pg_host = config['PostgreSQL']['host']
pg_port = config['PostgreSQL']['port']

pg_conn = psycopg2.connect(
    dbname=pg_dbname,
    user=pg_user,
    password=pg_password,
    host=pg_host,
    port=pg_port
)
pg_cursor = pg_conn.cursor()

# 创建 Schema
def create_schema(schema_name):
    create_schema_query = f"CREATE SCHEMA IF NOT EXISTS {schema_name};"
    pg_cursor.execute(create_schema_query)
    pg_conn.commit()
    logging.info(f"Schema created or already exists: {schema_name}")

# 创建断点记录表
def create_sync_checkpoint_table():
    create_table_query = """
    CREATE TABLE IF NOT EXISTS sync_checkpoint (
        index_name VARCHAR PRIMARY KEY,
        last_sync_id VARCHAR
    );
    """
    pg_cursor.execute(create_table_query)
    pg_conn.commit()
    logging.info("Sync checkpoint table created or already exists")

# 获取所有非系统索引
def get_non_system_indices():
    all_indices = es.indices.get_alias(index="*")
    non_system_indices = [index for index in all_indices if not index.startswith('.')]
    logging.info(f"Non-system indices: {non_system_indices}")
    return non_system_indices

# 创建 PostgreSQL 表
def create_table_for_index(index, schema_name):
    table_name = f"{schema_name}.{index}"
    create_table_query = f"""
    CREATE TABLE IF NOT EXISTS {table_name} (
        es_id VARCHAR PRIMARY KEY,
        __sys_obj_id__ VARCHAR,
        doc JSONB,
        optype VARCHAR DEFAULT 'insert'
    );
    """
    pg_cursor.execute(create_table_query)
    pg_conn.commit()
    logging.info(f"Table created or already exists: {table_name}")
    return table_name

# 获取 PostgreSQL 中的最后同步 ID
def get_last_sync_id(table_name):
    try:
        pg_cursor.execute(f"SELECT last_sync_id FROM sync_checkpoint WHERE index_name = %s", (table_name,))
        result = pg_cursor.fetchone()
        if result:
            return result[0]
        return None
    except Exception as e:
        logging.error(f"Error fetching last sync id from PostgreSQL: {e}")
        return None

# 更新 PostgreSQL 中的最后同步 ID
def update_last_sync_id(table_name, last_sync_id):
    try:
        pg_cursor.execute(
            f"INSERT INTO sync_checkpoint (index_name, last_sync_id) VALUES (%s, %s) ON CONFLICT (index_name) DO UPDATE SET last_sync_id = EXCLUDED.last_sync_id",
            (table_name, last_sync_id)
        )
        pg_conn.commit()
    except Exception as e:
        logging.error(f"Error updating last sync id in PostgreSQL: {e}")

# 分页获取数据
def fetch_data(index, scroll_id=None):
    if scroll_id:
        return es.scroll(scroll_id=scroll_id, scroll='2m')
    else:
        return es.search(index=index, scroll='2m', size=1000)

# 同步数据
def sync_data(index, schema_name):
    try:
        table_name = create_table_for_index(index, schema_name)
        last_sync_id = get_last_sync_id(table_name)

        data = fetch_data(index)
        scroll_id = data['_scroll_id']
        hits = data['hits']['hits']

        while hits:
            pg_cursor.execute("BEGIN")
            for hit in hits:
                es_id = hit['_id']
                doc = hit['_source']
                __sys_obj_id__ = doc.get('id')
                pg_cursor.execute(
                    f"INSERT INTO {table_name} (es_id, __sys_obj_id__, doc) VALUES (%s, %s, %s) ON CONFLICT (es_id) DO UPDATE SET doc = EXCLUDED.doc",
                    (es_id, __sys_obj_id__, Json(doc))
                )
                logging.info(f"Document {es_id} inserted/updated in table {table_name}")
            pg_cursor.execute("COMMIT")

            # 更新最后同步 ID
            if hits:
                last_sync_id = hits[-1]['_id']
                update_last_sync_id(table_name, last_sync_id)

            data = fetch_data(index, scroll_id)
            scroll_id = data['_scroll_id']
            hits = data['hits']['hits']

        logging.info(f"Data synced for index {index}")
    except Exception as e:
        logging.error(f"An error occurred while syncing index {index}: {e}")

# 主函数
def main():
    try:
        schema_name = 'sync_es'  # 定义 schema 名称
        create_schema(schema_name)
        create_sync_checkpoint_table()

        indices = get_non_system_indices()
        for index in indices:
            sync_data(index, schema_name)
    except Exception as e:
        logging.error(f"An error occurred: {e}")
    finally:
        pg_cursor.close()
        pg_conn.close()

if __name__ == "__main__":
    main()
