2020import com .google .common .annotations .VisibleForTesting ;
2121import com .google .common .collect .Sets ;
2222import com .google .common .primitives .Bytes ;
23+ import java .io .File ;
2324import java .io .IOException ;
25+ import java .nio .charset .StandardCharsets ;
2426import java .nio .file .Files ;
2527import java .nio .file .Path ;
2628import java .nio .file .Paths ;
2931import java .util .Collections ;
3032import java .util .HashMap ;
3133import java .util .List ;
34+ import java .util .Locale ;
3235import java .util .Map ;
3336import java .util .Map .Entry ;
3437import java .util .Set ;
38+ import java .util .concurrent .ScheduledExecutorService ;
39+ import java .util .concurrent .ScheduledFuture ;
40+ import java .util .concurrent .TimeUnit ;
41+ import java .util .concurrent .atomic .AtomicReference ;
3542import java .util .concurrent .locks .ReadWriteLock ;
3643import java .util .concurrent .locks .ReentrantReadWriteLock ;
3744import java .util .stream .Collectors ;
4552import org .iq80 .leveldb .ReadOptions ;
4653import org .iq80 .leveldb .WriteBatch ;
4754import org .iq80 .leveldb .WriteOptions ;
55+ import org .tron .common .es .ExecutorServiceManager ;
4856import org .tron .common .parameter .CommonParameter ;
4957import org .tron .common .storage .WriteOptionsWrapper ;
5058import org .tron .common .storage .metric .DbStat ;
6169public class LevelDbDataSourceImpl extends DbStat implements DbSourceInter <byte []>,
6270 Iterable <Entry <byte [], byte []>>, Instance <LevelDbDataSourceImpl > {
6371
72+ /** First watchdog WARN fires this many seconds after factory.open() begins. */
73+ private static final long OPEN_WATCHDOG_INITIAL_DELAY_SEC = 60 ;
74+ /** Subsequent watchdog WARN lines are emitted on this interval. */
75+ private static final long OPEN_WATCHDOG_PERIOD_SEC = 30 ;
76+ /** Value of {@code Filename.currentFileName()}. */
77+ private static final String LEVELDB_CURRENT_FILE = "CURRENT" ;
78+
6479 private String dataBaseName ;
6580 private DB database ;
6681 private volatile boolean alive ;
@@ -121,6 +136,15 @@ private void openDatabase(Options dbOptions) throws IOException {
121136 if (!Files .isSymbolicLink (dbPath .getParent ())) {
122137 Files .createDirectories (dbPath .getParent ());
123138 }
139+ final long openStartNs = System .nanoTime ();
140+ final AtomicReference <String > manifestInfo = new AtomicReference <>();
141+ ScheduledExecutorService watchdog = ExecutorServiceManager
142+ .newSingleThreadScheduledExecutor ("db-open-watchdog-" + dataBaseName , true );
143+ ScheduledFuture <?> watchdogTask = watchdog .scheduleAtFixedRate (
144+ () -> logSlowOpen (dbPath , openStartNs , manifestInfo ),
145+ OPEN_WATCHDOG_INITIAL_DELAY_SEC ,
146+ OPEN_WATCHDOG_PERIOD_SEC ,
147+ TimeUnit .SECONDS );
124148 try {
125149 DbSourceInter .checkOrInitEngine (getEngine (), dbPath .toString (),
126150 TronError .ErrCode .LEVELDB_INIT );
@@ -139,7 +163,55 @@ private void openDatabase(Options dbOptions) throws IOException {
139163 logger .error ("Open Database {} failed" , dataBaseName , e );
140164 }
141165 throw new TronError (e , TronError .ErrCode .LEVELDB_INIT );
166+ } finally {
167+ watchdogTask .cancel (false );
168+ watchdog .shutdownNow ();
169+ }
170+ }
171+
172+ /**
173+ * Emits a WARN when factory.open() is still blocked — usually because the
174+ * MANIFEST has grown large enough to make replay expensive.
175+ */
176+ void logSlowOpen (Path dbPath , long startNs , AtomicReference <String > manifestInfoCache ) {
177+ try {
178+ long elapsedSec = TimeUnit .NANOSECONDS .toSeconds (System .nanoTime () - startNs );
179+ String manifestInfo = manifestInfoCache .get ();
180+ if (manifestInfo == null ) {
181+ manifestInfo = resolveManifestInfo (dbPath .toFile ());
182+ manifestInfoCache .compareAndSet (null , manifestInfo );
183+ }
184+ logger .warn ("DB {} open still in progress after {}s. path={}, {}. "
185+ + "This startup will complete; to speed up future restarts, run "
186+ + "`java -jar Toolkit.jar db archive -d {}` before the next startup "
187+ + "to rebuild the MANIFEST (the tool requires an exclusive DB lock, "
188+ + "so it cannot run while the node is up)." ,
189+ dataBaseName , elapsedSec , dbPath , manifestInfo , parentPath );
190+ } catch (Exception e ) {
191+ // Purely observational - never let the watchdog disrupt startup.
192+ logger .debug ("db-open-watchdog failure for {}: {}" , dataBaseName , e .getMessage ());
193+ }
194+ }
195+
196+ private static String resolveManifestInfo (File dbDir ) {
197+ File currentFile = new File (dbDir , LEVELDB_CURRENT_FILE );
198+ String name = "none" ;
199+ long size = 0 ;
200+ if (currentFile .isFile ()) {
201+ try {
202+ name = new String (Files .readAllBytes (currentFile .toPath ()),
203+ StandardCharsets .UTF_8 ).trim ();
204+ File manifest = new File (dbDir , name );
205+ if (manifest .isFile ()) {
206+ size = manifest .length ();
207+ }
208+ } catch (IOException ignored ) {
209+ // Best-effort — keep defaults. A new DB won't hit the 60s threshold
210+ // anyway, so reporting 0.00 MB here is the expected shape.
211+ }
142212 }
213+ return String .format (Locale .ROOT , "MANIFEST=%s (%.2f MB)" , name ,
214+ size / 1024.0 / 1024.0 );
143215 }
144216
145217 public Path getDbPath () {
0 commit comments