Files
python-folder-to-mysql/FoldersToMySQL.py

144 lines
4.4 KiB
Python

import os
import sys
import datetime
import mysql.connector
# Author: Sébastien Plante
# Date: 2025-10-07
# Description:
# This script scans a specified directory for folders and synchronizes their names and last modified dates
# with a MySQL database. It marks folders as inactive if they are no longer present in the directory.
# Version 1.2 [2025-10-14]
# Changes:
# - folder_created_date now uses the folder's (OS) creation time.
# - folder_last_seen now uses the folder's last time it has been seen by the script.
# - Added last_updated field track when the record was last modified.
# ** This version is not backward compatible with previous versions, you will need to drop the existing table.
# Version 1.1 [2025-10-07]
# Changes:
# - Updated table name to be configurable via DB_CONFIG
# Requirements:
# Install python 3.x from https://www.python.org/downloads/
# pip install mysql-connector-python
# Usage:
# python FoldersToSQL.py
# Optionally, you can specify a different path to scan:
# python FoldersToSQL.py "C:\another folder"
# ================== CONFIGURATION ==================
SCAN_PATH = r"C:\test folder"
DB_CONFIG = {
"host": "localhost",
"user": "root",
"password": "qwerty1234",
"database": "testdb",
"table": "folders"
}
# ===================================================
TABLE_SQL = """
CREATE TABLE IF NOT EXISTS {table} (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) NOT NULL UNIQUE,
folder_created_date DATETIME NOT NULL,
folder_last_seen DATETIME NOT NULL,
active TINYINT(1) NOT NULL DEFAULT 1,
last_updated DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
"""
UPSERT_SQL = """
INSERT INTO {table} (name, folder_created_date, folder_last_seen, active, last_updated)
VALUES (%s, %s, %s, 1, %s)
ON DUPLICATE KEY UPDATE
folder_created_date = VALUES(folder_created_date),
folder_last_seen = VALUES(folder_last_seen),
active = 1,
last_updated = VALUES(last_updated);
"""
MARK_INACTIVE_SQL_TEMPLATE = "UPDATE {table} SET active = 0, last_updated = %s WHERE name NOT IN ({placeholders});"
MARK_ALL_INACTIVE_SQL = "UPDATE {table} SET active = 0, last_updated = %s;"
def get_connection():
conn_params = {k: v for k, v in DB_CONFIG.items() if k != "table"}
return mysql.connector.connect(**conn_params)
def ensure_table(cursor):
cursor.execute(TABLE_SQL.format(table=DB_CONFIG["table"]))
def list_directories(base_path):
try:
entries = os.listdir(base_path)
except OSError as e:
print(f"Error reading directory: {e}")
sys.exit(1)
dirs = []
for name in entries:
full = os.path.join(base_path, name)
if os.path.isdir(full):
ctime = datetime.datetime.fromtimestamp(
os.path.getctime(full)) # creation time
# mtime = datetime.datetime.fromtimestamp(os.path.getmtime(full)) # last modified time
dirs.append((name, ctime))
return dirs
def sync(base_path):
conn = get_connection()
try:
cur = conn.cursor()
ensure_table(cur)
current_dirs = list_directories(base_path)
current_names = {name for name, _ in current_dirs}
for name, ctime in current_dirs:
cur.execute(
UPSERT_SQL.format(table=DB_CONFIG["table"]),
(name, ctime, datetime.datetime.now(), datetime.datetime.now()),
)
if current_names:
placeholders = ",".join(["%s"] * len(current_names))
sql = MARK_INACTIVE_SQL_TEMPLATE.format(
table=DB_CONFIG["table"], placeholders=placeholders)
params = [datetime.datetime.now()] + list(current_names)
cur.execute(sql, params)
else:
sql = MARK_ALL_INACTIVE_SQL.format(table=DB_CONFIG["table"])
cur.execute(sql, (datetime.datetime.now(),))
conn.commit()
print(f"Synchronization complete. Active folders: {len(current_dirs)}")
except mysql.connector.Error as err:
print(f"MySQL error: {err}")
finally:
cur.close()
conn.close()
def main():
if len(sys.argv) >= 2:
base_path = sys.argv[1]
else:
base_path = SCAN_PATH
if not os.path.isdir(base_path):
print(f"Invalid path: {base_path}")
sys.exit(1)
print(f"Scanning directory: {base_path}")
sync(base_path)
if __name__ == "__main__":
main()