import copy
import json

import pymongo
import psycopg2
from psycopg2 import sql
import logging
import configparser
import threading

from psycopg2._json import Json

# 配置日志
logging.basicConfig(
    filename='mongo_to_pg.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# 读取配置文件
config = configparser.ConfigParser()
config.read('config.ini')

# MongoDB 配置
mongo_uri = config['MongoDB']['uri']
mongo_client = pymongo.MongoClient(mongo_uri)

# PostgreSQL 配置
pg_dbname = config['PostgreSQL']['dbname']
pg_user = config['PostgreSQL']['user']
pg_password = config['PostgreSQL']['password']
pg_host = config['PostgreSQL']['host']
pg_port = config['PostgreSQL']['port']

# 连接 PostgreSQL
pg_conn = psycopg2.connect(
    dbname=pg_dbname,
    user=pg_user,
    password=pg_password,
    host=pg_host,
    port=pg_port
)
pg_cursor = pg_conn.cursor()
logging.info(f"Connected to PostgreSQL{pg_cursor}")


# 创建 PostgreSQL schema 和表
def create_pg_schema_and_table(db_name, collection_name):
    schema_name = db_name
    table_name = collection_name

    # 创建 schema
    create_schema_query = sql.SQL("CREATE SCHEMA IF NOT EXISTS {}").format(sql.Identifier(schema_name))
    logging.info(f"Executing SQL: {create_schema_query.as_string(pg_cursor)}")
    pg_cursor.execute(create_schema_query)

    # 创建表
    create_table_query = sql.SQL(
        'CREATE TABLE IF NOT EXISTS {}.{} ( __sys_obj_id__ VARCHAR PRIMARY KEY, doc JSONB, optype VARCHAR)'
    ).format(
        sql.Identifier(schema_name),
        sql.Identifier(table_name)
    )

    logging.info(f"Executing SQL: {create_table_query.as_string(pg_cursor)}")
    pg_cursor.execute(create_table_query)
    pg_conn.commit()


# 监控所有数据库和集合的变化
def watch_mongo_changes(db_name, collection_name):
    db = mongo_client[db_name]
    collection = db[collection_name]

    pipeline = [{'$match': {'operationType': {'$in': ['insert', 'update', 'replace', 'delete']}}}]
    change_stream = collection.watch(pipeline)
    logging.info(f"开始监听 MongoDB 数据库 {db_name} 中集合 {collection_name} 的变化-->{change_stream}")

    for change in change_stream:
        operation_type = change['operationType']
        doc_id = str(change['documentKey']['_id'])
        logging.info(f"检测到 {operation_type} 操作: 在数据库 {db_name} 的集合 {collection_name}")
        try:
            create_pg_schema_and_table(db_name, collection_name)
            if operation_type == 'delete':
                handle_delete_operation(db_name, collection_name, doc_id)
            elif operation_type == 'update':
                logging.info(f"检测到 {change} 操作: 在数据库 {db_name} 的集合 {collection_name}")
            else:
                document = change['fullDocument']
                upsert_to_postgresql(db_name, collection_name, doc_id, document, operation_type)
        except Exception as e:
            logging.error(f"处理 {operation_type} 操作时发生错误: {e}")


# 插入或更新数据到 PostgreSQL

def deal_json_delete_id(document):
    json_object = copy.deepcopy(document)
    if '_id' in json_object:
        del json_object['_id']
    return Json(json_object)


def upsert_to_postgresql(db_name, collection_name, doc_id, document, operation_type):
    schema_name = db_name
    table_name = collection_name

    insert_query = sql.SQL(
        'INSERT INTO {}.{} ( __sys_obj_id__, doc, optype) VALUES (%s, %s, %s) '
    ).format(
        sql.Identifier(schema_name),
        sql.Identifier(table_name)
    )

    pg_cursor.execute(insert_query, (doc_id, deal_json_delete_id(document), operation_type))
    pg_conn.commit()

    logging.info(
        f"Executing SQL: {insert_query.as_string(pg_cursor)} with values: {doc_id}, {deal_json_delete_id(document)}, {operation_type}")
    logging.info(f"数据插入或更新到 PostgreSQL: {deal_json_delete_id(document)} 在表 {schema_name}.{table_name}")


# 处理删除操作
def handle_delete_operation(db_name, collection_name, doc_id):
    schema_name = db_name
    table_name = collection_name

    update_query = sql.SQL(
        'UPDATE {}.{} SET optype = %s WHERE  __sys_obj_id__ = %s'
    ).format(
        sql.Identifier(schema_name),
        sql.Identifier(table_name)
    )

    logging.info(f"Executing SQL: {update_query.as_string(pg_cursor)} with values: 'delete', {doc_id}")
    pg_cursor.execute(update_query, ('delete', doc_id))
    pg_conn.commit()
    logging.info(f"删除操作更新到 PostgreSQL: id = {doc_id} 在表 {schema_name}.{table_name}")


def main():
    try:
        for db_name in mongo_client.list_database_names():
            if db_name not in ["admin", "local", "config"]:  # 忽略系统数据库
                for collection_name in mongo_client[db_name].list_collection_names():
                    threading.Thread(target=watch_mongo_changes, args=(db_name, collection_name)).start()
    except pymongo.errors.PyMongoError as e:
        logging.error(f"MongoDB 错误: {e}")
    except Exception as e:
        logging.error(f"其他错误: {e}")
    # finally:
    # pg_cursor.close()
    # pg_conn.close()
    # logging.info("关闭数据库连接")


if __name__ == "__main__":
    main()
