Package CedarBackup3 :: Package actions :: Module collect
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup3.actions.collect

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2008,2011,2015 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python 3 (>= 3.4) 
 29  # Project  : Cedar Backup, release 3 
 30  # Purpose  : Implements the standard 'collect' action. 
 31  # 
 32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 33   
 34  ######################################################################## 
 35  # Module documentation 
 36  ######################################################################## 
 37   
 38  """ 
 39  Implements the standard 'collect' action. 
 40  @sort: executeCollect 
 41  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 42  """ 
 43   
 44   
 45  ######################################################################## 
 46  # Imported modules 
 47  ######################################################################## 
 48   
 49  # System modules 
 50  import os 
 51  import logging 
 52  import pickle 
 53   
 54  # Cedar Backup modules 
 55  from CedarBackup3.filesystem import BackupFileList, FilesystemList 
 56  from CedarBackup3.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath 
 57  from CedarBackup3.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR 
 58  from CedarBackup3.actions.util import writeIndicatorFile 
 59   
 60   
 61  ######################################################################## 
 62  # Module-wide constants and variables 
 63  ######################################################################## 
 64   
 65  logger = logging.getLogger("CedarBackup3.log.actions.collect") 
 66   
 67   
 68  ######################################################################## 
 69  # Public functions 
 70  ######################################################################## 
 71   
 72  ############################ 
 73  # executeCollect() function 
 74  ############################ 
 75   
 76  # pylint: disable=W0613 
77 -def executeCollect(configPath, options, config):
78 """ 79 Executes the collect backup action. 80 81 @note: When the collect action is complete, we will write a collect 82 indicator to the collect directory, so it's obvious that the collect action 83 has completed. The stage process uses this indicator to decide whether a 84 peer is ready to be staged. 85 86 @param configPath: Path to configuration file on disk. 87 @type configPath: String representing a path on disk. 88 89 @param options: Program command-line options. 90 @type options: Options object. 91 92 @param config: Program configuration. 93 @type config: Config object. 94 95 @raise ValueError: Under many generic error conditions 96 @raise TarError: If there is a problem creating a tar file 97 """ 98 logger.debug("Executing the 'collect' action.") 99 if config.options is None or config.collect is None: 100 raise ValueError("Collect configuration is not properly filled in.") 101 if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and 102 (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)): 103 raise ValueError("There must be at least one collect file or collect directory.") 104 fullBackup = options.full 105 logger.debug("Full backup flag is [%s]", fullBackup) 106 todayIsStart = isStartOfWeek(config.options.startingDay) 107 resetDigest = fullBackup or todayIsStart 108 logger.debug("Reset digest flag is [%s]", resetDigest) 109 if config.collect.collectFiles is not None: 110 for collectFile in config.collect.collectFiles: 111 logger.debug("Working with collect file [%s]", collectFile.absolutePath) 112 collectMode = _getCollectMode(config, collectFile) 113 archiveMode = _getArchiveMode(config, collectFile) 114 digestPath = _getDigestPath(config, collectFile.absolutePath) 115 tarfilePath = _getTarfilePath(config, collectFile.absolutePath, archiveMode) 116 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 117 logger.debug("File meets criteria to be backed up today.") 118 _collectFile(config, collectFile.absolutePath, tarfilePath, 119 collectMode, archiveMode, resetDigest, digestPath) 120 else: 121 logger.debug("File will not be backed up, per collect mode.") 122 logger.info("Completed collecting file [%s]", collectFile.absolutePath) 123 if config.collect.collectDirs is not None: 124 for collectDir in config.collect.collectDirs: 125 logger.debug("Working with collect directory [%s]", collectDir.absolutePath) 126 collectMode = _getCollectMode(config, collectDir) 127 archiveMode = _getArchiveMode(config, collectDir) 128 ignoreFile = _getIgnoreFile(config, collectDir) 129 linkDepth = _getLinkDepth(collectDir) 130 dereference = _getDereference(collectDir) 131 recursionLevel = _getRecursionLevel(collectDir) 132 (excludePaths, excludePatterns) = _getExclusions(config, collectDir) 133 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 134 logger.debug("Directory meets criteria to be backed up today.") 135 _collectDirectory(config, collectDir.absolutePath, 136 collectMode, archiveMode, ignoreFile, linkDepth, dereference, 137 resetDigest, excludePaths, excludePatterns, recursionLevel) 138 else: 139 logger.debug("Directory will not be backed up, per collect mode.") 140 logger.info("Completed collecting directory [%s]", collectDir.absolutePath) 141 writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR, 142 config.options.backupUser, config.options.backupGroup) 143 logger.info("Executed the 'collect' action successfully.")
144 145 146 ######################################################################## 147 # Private utility functions 148 ######################################################################## 149 150 ########################## 151 # _collectFile() function 152 ########################## 153
154 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
155 """ 156 Collects a configured collect file. 157 158 The indicated collect file is collected into the indicated tarfile. 159 For files that are collected incrementally, we'll use the indicated 160 digest path and pay attention to the reset digest flag (basically, the reset 161 digest flag ignores any existing digest, but a new digest is always 162 rewritten). 163 164 The caller must decide what the collect and archive modes are, since they 165 can be on both the collect configuration and the collect file itself. 166 167 @param config: Config object. 168 @param absolutePath: Absolute path of file to collect. 169 @param tarfilePath: Path to tarfile that should be created. 170 @param collectMode: Collect mode to use. 171 @param archiveMode: Archive mode to use. 172 @param resetDigest: Reset digest flag. 173 @param digestPath: Path to digest file on disk, if needed. 174 """ 175 backupList = BackupFileList() 176 backupList.addFile(absolutePath) 177 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
178 179 180 ############################### 181 # _collectDirectory() function 182 ############################### 183
184 -def _collectDirectory(config, absolutePath, collectMode, archiveMode, 185 ignoreFile, linkDepth, dereference, resetDigest, 186 excludePaths, excludePatterns, recursionLevel):
187 """ 188 Collects a configured collect directory. 189 190 The indicated collect directory is collected into the indicated tarfile. 191 For directories that are collected incrementally, we'll use the indicated 192 digest path and pay attention to the reset digest flag (basically, the reset 193 digest flag ignores any existing digest, but a new digest is always 194 rewritten). 195 196 The caller must decide what the collect and archive modes are, since they 197 can be on both the collect configuration and the collect directory itself. 198 199 @param config: Config object. 200 @param absolutePath: Absolute path of directory to collect. 201 @param collectMode: Collect mode to use. 202 @param archiveMode: Archive mode to use. 203 @param ignoreFile: Ignore file to use. 204 @param linkDepth: Link depth value to use. 205 @param dereference: Dereference flag to use. 206 @param resetDigest: Reset digest flag. 207 @param excludePaths: List of absolute paths to exclude. 208 @param excludePatterns: List of patterns to exclude. 209 @param recursionLevel: Recursion level (zero for no recursion) 210 """ 211 if recursionLevel == 0: 212 # Collect the actual directory because we're at recursion level 0 213 logger.info("Collecting directory [%s]", absolutePath) 214 tarfilePath = _getTarfilePath(config, absolutePath, archiveMode) 215 digestPath = _getDigestPath(config, absolutePath) 216 217 backupList = BackupFileList() 218 backupList.ignoreFile = ignoreFile 219 backupList.excludePaths = excludePaths 220 backupList.excludePatterns = excludePatterns 221 backupList.addDirContents(absolutePath, linkDepth=linkDepth, dereference=dereference) 222 223 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath) 224 else: 225 # Find all of the immediate subdirectories 226 subdirs = FilesystemList() 227 subdirs.excludeFiles = True 228 subdirs.excludeLinks = True 229 subdirs.excludePaths = excludePaths 230 subdirs.excludePatterns = excludePatterns 231 subdirs.addDirContents(path=absolutePath, recursive=False, addSelf=False) 232 233 # Back up the subdirectories separately 234 for subdir in subdirs: 235 _collectDirectory(config, subdir, collectMode, archiveMode, 236 ignoreFile, linkDepth, dereference, resetDigest, 237 excludePaths, excludePatterns, recursionLevel-1) 238 excludePaths.append(subdir) # this directory is already backed up, so exclude it 239 240 # Back up everything that hasn't previously been backed up 241 _collectDirectory(config, absolutePath, collectMode, archiveMode, 242 ignoreFile, linkDepth, dereference, resetDigest, 243 excludePaths, excludePatterns, 0)
244 245 246 ############################ 247 # _executeBackup() function 248 ############################ 249
250 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
251 """ 252 Execute the backup process for the indicated backup list. 253 254 This function exists mainly to consolidate functionality between the 255 L{_collectFile} and L{_collectDirectory} functions. Those functions build 256 the backup list; this function causes the backup to execute properly and 257 also manages usage of the digest file on disk as explained in their 258 comments. 259 260 For collect files, the digest file will always just contain the single file 261 that is being backed up. This might little wasteful in terms of the number 262 of files that we keep around, but it's consistent and easy to understand. 263 264 @param config: Config object. 265 @param backupList: List to execute backup for 266 @param absolutePath: Absolute path of directory or file to collect. 267 @param tarfilePath: Path to tarfile that should be created. 268 @param collectMode: Collect mode to use. 269 @param archiveMode: Archive mode to use. 270 @param resetDigest: Reset digest flag. 271 @param digestPath: Path to digest file on disk, if needed. 272 """ 273 if collectMode != 'incr': 274 logger.debug("Collect mode is [%s]; no digest will be used.", collectMode) 275 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 276 logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize())) 277 else: 278 logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize())) 279 if len(backupList) > 0: 280 backupList.generateTarfile(tarfilePath, archiveMode, True) 281 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 282 else: 283 if resetDigest: 284 logger.debug("Based on resetDigest flag, digest will be cleared.") 285 oldDigest = {} 286 else: 287 logger.debug("Based on resetDigest flag, digest will loaded from disk.") 288 oldDigest = _loadDigest(digestPath) 289 (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True) 290 logger.debug("Removed %d unchanged files based on digest values.", removed) 291 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 292 logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize())) 293 else: 294 logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize())) 295 if len(backupList) > 0: 296 backupList.generateTarfile(tarfilePath, archiveMode, True) 297 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 298 _writeDigest(config, newDigest, digestPath)
299 300 301 ######################### 302 # _loadDigest() function 303 ######################### 304
305 -def _loadDigest(digestPath):
306 """ 307 Loads the indicated digest path from disk into a dictionary. 308 309 If we can't load the digest successfully (either because it doesn't exist or 310 for some other reason), then an empty dictionary will be returned - but the 311 condition will be logged. 312 313 @param digestPath: Path to the digest file on disk. 314 315 @return: Dictionary representing contents of digest path. 316 """ 317 if not os.path.isfile(digestPath): 318 digest = {} 319 logger.debug("Digest [%s] does not exist on disk.", digestPath) 320 else: 321 try: 322 with open(digestPath, "rb") as f: 323 digest = pickle.load(f, fix_imports=True) # be compatible with Python 2 324 logger.debug("Loaded digest [%s] from disk: %d entries.", digestPath, len(digest)) 325 except Exception as e: 326 digest = {} 327 logger.error("Failed loading digest [%s] from disk: %s", digestPath, e) 328 return digest
329 330 331 ########################## 332 # _writeDigest() function 333 ########################## 334
335 -def _writeDigest(config, digest, digestPath):
336 """ 337 Writes the digest dictionary to the indicated digest path on disk. 338 339 If we can't write the digest successfully for any reason, we'll log the 340 condition but won't throw an exception. 341 342 @param config: Config object. 343 @param digest: Digest dictionary to write to disk. 344 @param digestPath: Path to the digest file on disk. 345 """ 346 try: 347 with open(digestPath, "wb") as f: 348 pickle.dump(digest, f, 0, fix_imports=True) # be compatible with Python 2 349 changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup) 350 logger.debug("Wrote new digest [%s] to disk: %d entries.", digestPath, len(digest)) 351 except Exception as e: 352 logger.error("Failed to write digest [%s] to disk: %s", digestPath, e)
353 354 355 ######################################################################## 356 # Private attribute "getter" functions 357 ######################################################################## 358 359 ############################ 360 # getCollectMode() function 361 ############################ 362
363 -def _getCollectMode(config, item):
364 """ 365 Gets the collect mode that should be used for a collect directory or file. 366 If possible, use the one on the file or directory, otherwise take from collect section. 367 @param config: Config object. 368 @param item: C{CollectFile} or C{CollectDir} object 369 @return: Collect mode to use. 370 """ 371 if item.collectMode is None: 372 collectMode = config.collect.collectMode 373 else: 374 collectMode = item.collectMode 375 logger.debug("Collect mode is [%s]", collectMode) 376 return collectMode
377 378 379 ############################# 380 # _getArchiveMode() function 381 ############################# 382
383 -def _getArchiveMode(config, item):
384 """ 385 Gets the archive mode that should be used for a collect directory or file. 386 If possible, use the one on the file or directory, otherwise take from collect section. 387 @param config: Config object. 388 @param item: C{CollectFile} or C{CollectDir} object 389 @return: Archive mode to use. 390 """ 391 if item.archiveMode is None: 392 archiveMode = config.collect.archiveMode 393 else: 394 archiveMode = item.archiveMode 395 logger.debug("Archive mode is [%s]", archiveMode) 396 return archiveMode
397 398 399 ############################ 400 # _getIgnoreFile() function 401 ############################ 402
403 -def _getIgnoreFile(config, item):
404 """ 405 Gets the ignore file that should be used for a collect directory or file. 406 If possible, use the one on the file or directory, otherwise take from collect section. 407 @param config: Config object. 408 @param item: C{CollectFile} or C{CollectDir} object 409 @return: Ignore file to use. 410 """ 411 if item.ignoreFile is None: 412 ignoreFile = config.collect.ignoreFile 413 else: 414 ignoreFile = item.ignoreFile 415 logger.debug("Ignore file is [%s]", ignoreFile) 416 return ignoreFile
417 418 419 ############################ 420 # _getLinkDepth() function 421 ############################ 422
423 -def _getLinkDepth(item):
424 """ 425 Gets the link depth that should be used for a collect directory. 426 If possible, use the one on the directory, otherwise set a value of 0 (zero). 427 @param item: C{CollectDir} object 428 @return: Link depth to use. 429 """ 430 if item.linkDepth is None: 431 linkDepth = 0 432 else: 433 linkDepth = item.linkDepth 434 logger.debug("Link depth is [%d]", linkDepth) 435 return linkDepth
436 437 438 ############################ 439 # _getDereference() function 440 ############################ 441
442 -def _getDereference(item):
443 """ 444 Gets the dereference flag that should be used for a collect directory. 445 If possible, use the one on the directory, otherwise set a value of False. 446 @param item: C{CollectDir} object 447 @return: Dereference flag to use. 448 """ 449 if item.dereference is None: 450 dereference = False 451 else: 452 dereference = item.dereference 453 logger.debug("Dereference flag is [%s]", dereference) 454 return dereference
455 456 457 ################################ 458 # _getRecursionLevel() function 459 ################################ 460
461 -def _getRecursionLevel(item):
462 """ 463 Gets the recursion level that should be used for a collect directory. 464 If possible, use the one on the directory, otherwise set a value of 0 (zero). 465 @param item: C{CollectDir} object 466 @return: Recursion level to use. 467 """ 468 if item.recursionLevel is None: 469 recursionLevel = 0 470 else: 471 recursionLevel = item.recursionLevel 472 logger.debug("Recursion level is [%d]", recursionLevel) 473 return recursionLevel
474 475 476 ############################ 477 # _getDigestPath() function 478 ############################ 479
480 -def _getDigestPath(config, absolutePath):
481 """ 482 Gets the digest path associated with a collect directory or file. 483 @param config: Config object. 484 @param absolutePath: Absolute path to generate digest for 485 @return: Absolute path to the digest associated with the collect directory or file. 486 """ 487 normalized = buildNormalizedPath(absolutePath) 488 filename = "%s.%s" % (normalized, DIGEST_EXTENSION) 489 digestPath = os.path.join(config.options.workingDir, filename) 490 logger.debug("Digest path is [%s]", digestPath) 491 return digestPath
492 493 494 ############################# 495 # _getTarfilePath() function 496 ############################# 497
498 -def _getTarfilePath(config, absolutePath, archiveMode):
499 """ 500 Gets the tarfile path (including correct extension) associated with a collect directory. 501 @param config: Config object. 502 @param absolutePath: Absolute path to generate tarfile for 503 @param archiveMode: Archive mode to use for this tarfile. 504 @return: Absolute path to the tarfile associated with the collect directory. 505 """ 506 if archiveMode == 'tar': 507 extension = "tar" 508 elif archiveMode == 'targz': 509 extension = "tar.gz" 510 elif archiveMode == 'tarbz2': 511 extension = "tar.bz2" 512 normalized = buildNormalizedPath(absolutePath) 513 filename = "%s.%s" % (normalized, extension) 514 tarfilePath = os.path.join(config.collect.targetDir, filename) 515 logger.debug("Tarfile path is [%s]", tarfilePath) 516 return tarfilePath
517 518 519 ############################ 520 # _getExclusions() function 521 ############################ 522
523 -def _getExclusions(config, collectDir):
524 """ 525 Gets exclusions (file and patterns) associated with a collect directory. 526 527 The returned files value is a list of absolute paths to be excluded from the 528 backup for a given directory. It is derived from the collect configuration 529 absolute exclude paths and the collect directory's absolute and relative 530 exclude paths. 531 532 The returned patterns value is a list of patterns to be excluded from the 533 backup for a given directory. It is derived from the list of patterns from 534 the collect configuration and from the collect directory itself. 535 536 @param config: Config object. 537 @param collectDir: Collect directory object. 538 539 @return: Tuple (files, patterns) indicating what to exclude. 540 """ 541 paths = [] 542 if config.collect.absoluteExcludePaths is not None: 543 paths.extend(config.collect.absoluteExcludePaths) 544 if collectDir.absoluteExcludePaths is not None: 545 paths.extend(collectDir.absoluteExcludePaths) 546 if collectDir.relativeExcludePaths is not None: 547 for relativePath in collectDir.relativeExcludePaths: 548 paths.append(os.path.join(collectDir.absolutePath, relativePath)) 549 patterns = [] 550 if config.collect.excludePatterns is not None: 551 patterns.extend(config.collect.excludePatterns) 552 if collectDir.excludePatterns is not None: 553 patterns.extend(collectDir.excludePatterns) 554 logger.debug("Exclude paths: %s", paths) 555 logger.debug("Exclude patterns: %s", patterns) 556 return(paths, patterns)
557