晋太元中,武陵人捕鱼为业。缘溪行,忘路之远近。忽逢桃花林,夹岸数百步,中无杂树,芳草鲜美,落英缤纷。渔人甚异之,复前行,欲穷其林。 林尽水源,便得一山,山有小口,仿佛若有光。便舍船,从口入。初极狭,才通人。复行数十步,豁然开朗。土地平旷,屋舍俨然,有良田、美池、桑竹之属。阡陌交通,鸡犬相闻。其中往来种作,男女衣着,悉如外人。黄发垂髫,并怡然自乐。 见渔人,乃大惊,问所从来。具答之。便要还家,设酒杀鸡作食。村中闻有此人,咸来问讯。自云先世避秦时乱,率妻子邑人来此绝境,不复出焉,遂与外人间隔。问今是何世,乃不知有汉,无论魏晋。此人一一为具言所闻,皆叹惋。余人各复延至其家,皆出酒食。停数日,辞去。此中人语云:“不足为外人道也。”(间隔 一作:隔绝) 既出,得其船,便扶向路,处处志之。及郡下,诣太守,说如此。太守即遣人随其往,寻向所志,遂迷,不复得路。 南阳刘子骥,高尚士也,闻之,欣然规往。未果,寻病终。后遂无问津者。
| DIR:/opt/cloudlinux/venv/lib64/python3.11/site-packages/ssa/ |
| Current File : //opt/cloudlinux/venv/lib64/python3.11/site-packages/ssa/db.py |
#!/opt/cloudlinux/venv/bin/python3 -sbb
# coding=utf-8
#
# Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2020 All Rights Reserved
#
# Licensed under CLOUD LINUX LICENSE AGREEMENT
# http://cloudlinux.com/docs/LICENCE.TXT
#
import os
import contextlib
import sqlite3
import logging
from datetime import datetime, timedelta
from sqlalchemy import (
Column,
Boolean,
DateTime,
Integer,
String,
create_engine,
event, func, text
)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.orm import Session
from sqlalchemy.orm.session import close_all_sessions
from sqlalchemy.exc import DatabaseError
SSA_DB = '/var/lve/ssa.db'
OLD_SSA_DB = SSA_DB + '.old'
RETENTION_TIME_DAYS = 1
Base = declarative_base()
logger = logging.getLogger("cleanup_old_data")
class RequestResult(Base):
"""
Describes processed request stored in database file.
E.g.
{
"timestamp": "1650008727",
"url": "http://mydomain.com/index.php",
"duration": 162077,
"hitting_limits": false,
"throttled_time": 0,
"io_throttled_time": 0,
"wordpress": true
}
Note: created_at, updated_at is saved in local TZ format
"""
__tablename__ = 'scrape_result'
id = Column(Integer, primary_key=True)
domain = Column(String, index=True, nullable=False)
path = Column(String, index=True, nullable=False)
timestamp = Column(Integer, nullable=False)
duration = Column(Integer, nullable=False)
is_slow_request = Column(Boolean, nullable=False)
hitting_limits = Column(Boolean, nullable=False)
throttled_time = Column(Integer, nullable=False)
io_throttled_time = Column(Integer, nullable=False)
wordpress = Column(Boolean, nullable=False)
# Index on created_at speeds up cleanup_old_data queries significantly
created_at = Column(DateTime(timezone=True), server_default=func.now(), index=True)
updated_at = Column(DateTime(timezone=True), onupdate=func.now(), server_default=func.now())
def cleanup_old_data(engine, batch_size=10000):
"""
Removes outdated records from database, saving disk space.
Deletions are batched to avoid loading the entire database into
the cgroup's page cache, which causes OOM on large databases.
Runs VACUUM after deletion to reclaim disk space.
"""
n_days_ago = datetime.today() - timedelta(days=RETENTION_TIME_DAYS)
total_deleted = 0
while True:
with session_scope(engine) as session:
result = session.execute(text(
"DELETE FROM scrape_result WHERE rowid IN "
"(SELECT rowid FROM scrape_result WHERE created_at < :cutoff LIMIT :batch)"
), {"cutoff": n_days_ago, "batch": batch_size})
deleted = result.rowcount
total_deleted += deleted
if deleted == 0:
break
logger.info("Cleanup deleted %d old records", total_deleted)
vacuum_database(engine)
def vacuum_database(engine):
"""
Run VACUUM command to reclaim disk space after deletions.
VACUUM must be run outside of a transaction, so we use raw connection.
SQLite VACUUM creates a full temporary copy of the database.
By default, the temp file goes to /var/tmp or /tmp which may be
tmpfs (RAM-backed). On systems with MemoryMax cgroup limits
(like ssa-agent.service), this causes OOM kills for large databases.
Setting temp_store_directory to the database's own directory ensures
the temp file is written to disk, not RAM.
"""
# Get a raw connection from the pool
raw_conn = engine.raw_connection()
original_isolation_level = raw_conn.isolation_level
try:
raw_conn.isolation_level = None
cursor = raw_conn.cursor()
# Determine and check the directory
db_dir = os.path.dirname(os.path.abspath(SSA_DB))
if os.access(db_dir, os.W_OK):
cursor.execute(f"PRAGMA temp_store_directory = '{db_dir}'")
logger.info(f"Set SQLite temp_store_directory to: {db_dir}")
else:
logger.warning(f"Directory {db_dir} not writable. VACUUM will use default /var/tmp.")
logger.info("Starting VACUUM operation...")
cursor.execute("VACUUM")
logger.info("VACUUM completed successfully.")
cursor.close()
except Exception as e:
logger.error(f"VACUUM failed: {str(e)}")
finally:
raw_conn.isolation_level = original_isolation_level
raw_conn.close()
def create_db_if_not_exist(engine):
if not is_db_present(engine):
Base.metadata.create_all(engine)
def is_db_present(engine):
if not os.path.isfile(SSA_DB):
return False
database_inspection = Inspector.from_engine(engine)
tables = [table for table in database_inspection.get_table_names()]
return len(tables) > 0
def setup_wal_mode(dbapi_con, con_record):
dbapi_con.execute('PRAGMA journal_mode = WAL')
def _setup_database(readonly):
connection_string = f'file:{SSA_DB}'
if readonly:
connection_string = f'{connection_string}?mode=ro'
creator = lambda: sqlite3.connect(connection_string, uri=True)
engine = create_engine(
'sqlite:////', creator=creator, echo=False,
)
event.listen(engine, 'connect', setup_wal_mode)
create_db_if_not_exist(engine)
return engine
def setup_database(readonly=False):
return _setup_database(readonly)
def restore_database(engine):
"""
Restore database by establish connections to old and new databases,
merge data to new one if possible and delete old one.
"""
if os.path.exists(SSA_DB):
# Closing all sessions to ensure that no sessions is using database during replacing
close_all_sessions()
os.replace(SSA_DB, OLD_SSA_DB)
new_engine = setup_database()
# Dispose of the existing engine to close and refresh all connections, ensuring it connects to the new database
engine.dispose()
old_engine = create_engine(f'sqlite:////{OLD_SSA_DB}')
try:
with session_scope(old_engine) as session_old, session_scope(new_engine) as session_new:
# Check if old database is able to read and it make sense to try save unharmed data
session_old.query(RequestResult).first()
merge_unharmed_data_from_database(session_old, session_new)
except DatabaseError:
pass
for path in (OLD_SSA_DB, SSA_DB + "-wal", SSA_DB + "-shm"):
if os.path.exists(path):
os.remove(path)
def merge_unharmed_data_from_database(session_old, session_new):
"""
Scrape all unharmed records from malformed database and merge them into new database.
"""
offset = 0
batch_size = 10
while True:
query = session_old.query(RequestResult).offset(offset).limit(batch_size)
try:
records_to_save = query.all()
if not records_to_save:
break
for record in records_to_save:
session_new.merge(record)
except DatabaseError:
for pos_in_batch in range(batch_size):
try:
record_to_save = query.offset(offset + pos_in_batch).first()
if not record_to_save:
break
session_new.merge(record_to_save)
except DatabaseError:
pass
session_new.commit()
offset += batch_size
def is_malformed_database(engine):
"""
Try integrity check of database file to see if it is malformed.
If database unable to execute it, will also count as malformed.
"""
if os.path.exists(OLD_SSA_DB) and os.path.exists(SSA_DB):
os.remove(OLD_SSA_DB)
try:
with session_scope(engine) as db:
result = db.execute(text("PRAGMA integrity_check"))
errors = result.fetchall()
return errors[0][0] != 'ok'
except DatabaseError:
return True
@contextlib.contextmanager
def session_scope(engine) -> Session:
"""
Provide a transactional scope around a series of operations.
"""
session = Session(bind=engine)
try:
yield session
session.commit()
except:
session.rollback()
raise
finally:
session.close()
|