import configparser
import copy
import json
import os

import psycopg2
from psycopg2.extras import Json
from pymongo import MongoClient
import logging
from tqdm import tqdm
from bson import ObjectId

# 设置日志记录
logging.basicConfig(filename='sync.log', level=logging.INFO,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# 读取配置文件
config = configparser.ConfigParser()
script_directory = os.path.dirname(os.path.realpath(__file__))
config.read(os.path.join(script_directory, 'config.ini'))

# MongoDB 配置
mongo_uri = config['MongoDB']['uri']

# PostgreSQL 配置
pg_dbname = config['PostgreSQL']['dbname']
pg_user = config['PostgreSQL']['user']
pg_password = config['PostgreSQL']['password']
pg_host = config['PostgreSQL']['host']
pg_port = config['PostgreSQL']['port']

# 连接 MongoDB
mongo_client = MongoClient(mongo_uri)
logging.info('Connected to MongoDB')

# 连接 PostgreSQL
pg_conn = psycopg2.connect(
    dbname=pg_dbname,
    user=pg_user,
    password=pg_password,
    host=pg_host,
    port=pg_port
)
pg_cursor = pg_conn.cursor()
logging.info('Connected to PostgreSQL')

# 创建 sync_status 表（如果不存在）
pg_cursor.execute("""
CREATE TABLE IF NOT EXISTS sync_status (
    schema_name TEXT,
    table_name TEXT,
    last_mongo_id TEXT,
    PRIMARY KEY (schema_name, table_name)
);
""")
pg_conn.commit()
logging.info('Created sync_status table if not exists')


# 获取上次同步的标记
def get_last_checkpoint(schema_name, table_name):
    pg_cursor.execute("SELECT last_mongo_id FROM sync_status WHERE schema_name = %s AND table_name = %s;",
                      (schema_name, table_name))
    result = pg_cursor.fetchone()
    return result[0] if result else None


# 更新同步标记
def update_checkpoint(schema_name, table_name, last_mongo_id):
    pg_cursor.execute("""
    INSERT INTO sync_status (schema_name, table_name, last_mongo_id)
    VALUES (%s, %s, %s)
    ON CONFLICT (schema_name, table_name) DO UPDATE SET last_mongo_id = EXCLUDED.last_mongo_id;
    """, (schema_name, table_name, last_mongo_id))
    pg_conn.commit()


# 将文档中的 ObjectId 转换为字符串
def convert_objectid_to_str(document):
    for key, value in document.items():
        if isinstance(value, ObjectId):
            document[key] = str(value)
        elif isinstance(value, dict):
            convert_objectid_to_str(value)
        elif isinstance(value, list):
            for item in value:
                if isinstance(item, dict):
                    convert_objectid_to_str(item)


def deal_json_delete_id(document):
    json_object = copy.deepcopy(document)
    if '_id' in json_object:
        del json_object['_id']
    return Json(json_object)


# 同步数据
def sync_data(mongo_db_name, collection_name, schema_name, table_name):
    mongo_db = mongo_client[mongo_db_name]
    mongo_collection = mongo_db[collection_name]

    last_checkpoint = get_last_checkpoint(schema_name, table_name)
    query = {} if last_checkpoint is None else {"_id": {"$gt": ObjectId(last_checkpoint)}}
    total_docs = mongo_collection.count_documents(query)
    cursor = mongo_collection.find(query).sort("_id").batch_size(1000)

    with tqdm(total=total_docs, desc=f"Syncing {schema_name}.{table_name}") as pbar:
        for document in cursor:
            convert_objectid_to_str(document)
            try:
                pg_cursor.execute(f"""
                INSERT INTO {schema_name}.{table_name} ( __sys_obj_id__, doc, optype)
                VALUES (%s, %s, %s)
                ON CONFLICT (__sys_obj_id__) DO UPDATE SET doc = EXCLUDED.doc;
                """, (str(document["_id"]), deal_json_delete_id(document), 'insert'))
                pg_conn.commit()
                # 更新最后的同步标记
                update_checkpoint(schema_name, table_name, str(document["_id"]))
                pbar.update(1)
            except Exception as e:
                logging.error(f"Error inserting document {document['_id']}: {e}")
                pg_conn.rollback()
                break


# 获取所有 MongoDB 数据库和集合
mongo_databases = mongo_client.list_database_names()

# 执行同步
for mongo_db_name in mongo_databases:
    if mongo_db_name in ["admin", "local", "config"]:
        continue  # 跳过 MongoDB 的系统数据库

    schema_name = mongo_db_name
    pg_cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name};")
    logging.info(f'Created schema {schema_name} if not exists')

    mongo_db = mongo_client[mongo_db_name]
    collections = mongo_db.list_collection_names()

    for collection_name in collections:
        table_name = collection_name
        pg_cursor.execute(f"""
        CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} (
             __sys_obj_id__ VARCHAR PRIMARY KEY,
            doc JSONB,
            optype VARCHAR DEFAULT 'insert'
        );
        """)
        logging.info(f'Created table {schema_name}.{table_name} if not exists')
        sync_data(mongo_db_name, collection_name, schema_name, table_name)

# 关闭连接
pg_cursor.close()
pg_conn.close()
mongo_client.close()
logging.info('Closed all database connections')





