文件创建流程

2018-10-10  本文已影响0人  fullgc

NameNodeRpcServer 实现了ClientProtocol协议,rpc调用create方法;create方法主要是做一些参数检查,active/standby nn检查,以及metrics监控项统计,然后进入startFile方法中

  public HdfsFileStatus create(String src, FsPermission masked,
      String clientName, EnumSetWritable<CreateFlag> flag,
      boolean createParent, short replication, long blockSize, 
      CryptoProtocolVersion[] supportedVersions)
      throws IOException {
    // check namenode 是否启动
    checkNNStartup();
    String clientMachine = getClientMachine();
    if (stateChangeLog.isDebugEnabled()) {
      stateChangeLog.debug("*DIR* NameNode.create: file "
                         +src+" for "+clientName+" at "+clientMachine);
    }
    // 检查src path是否满足条件,最大长度不超过8000个字符,level深度不超过1000
    if (!checkPathLength(src)) {
      throw new IOException("create: Pathname too long.  Limit "
          + MAX_PATH_LENGTH + " characters, " + MAX_PATH_DEPTH + " levels.");
    }
    namesystem.checkOperation(OperationCategory.WRITE);
    HdfsFileStatus fileStatus = namesystem.startFile(src, new PermissionStatus(
        getRemoteUser().getShortUserName(), null, masked),
        clientName, clientMachine, flag.get(), createParent, replication,
        blockSize, supportedVersions);
    metrics.incrFilesCreated();
    metrics.incrCreateFileOps();
    return fileStatus;
  }

startFile函数中会先访问缓存,判断本次请求是否是一次重试请求,根据clientId和callId来标示同一次rpc调用,如果是,则判断上一次请求是否成功,将缓存结果直接返回,否则,进入startFileInt函数,在finally中,我们可以看到会缓存成功的结果;如果创建失败,则执行logAuditEvent打印审计日志

  /**
   * Create a new file entry in the namespace.
   * 
   * For description of parameters and exceptions thrown see
   * {@link ClientProtocol#create}, except it returns valid file status upon
   * success
   */
  HdfsFileStatus startFile(String src, PermissionStatus permissions,
      String holder, String clientMachine, EnumSet<CreateFlag> flag,
      boolean createParent, short replication, long blockSize, 
      CryptoProtocolVersion[] supportedVersions)
      throws AccessControlException, SafeModeException,
      FileAlreadyExistsException, UnresolvedLinkException,
      FileNotFoundException, ParentNotDirectoryException, IOException {
    HdfsFileStatus status = null;
    CacheEntryWithPayload cacheEntry = RetryCache.waitForCompletion(retryCache,
        null);
    if (cacheEntry != null && cacheEntry.isSuccess()) {
      return (HdfsFileStatus) cacheEntry.getPayload();
    }
    
    try {
      status = startFileInt(src, permissions, holder, clientMachine, flag,
          createParent, replication, blockSize, supportedVersions,
          cacheEntry != null);
    } catch (AccessControlException e) {
      logAuditEvent(false, "create", src);
      throw e;
    } finally {
      RetryCache.setState(cacheEntry, status != null, status);
    }
    return status;
  }

startFileInt
blockMananger的verifyReplication用来check副本数是否在配置范围内,否则抛出异常
这个地方做了一些加密判断,没有很了解,后面再看,我们继续往下看

  private HdfsFileStatus startFileInt(final String srcArg,
      PermissionStatus permissions, String holder, String clientMachine,
      EnumSet<CreateFlag> flag, boolean createParent, short replication,
      long blockSize, CryptoProtocolVersion[] supportedVersions,
      boolean logRetryCache)
      throws AccessControlException, SafeModeException,
      FileAlreadyExistsException, UnresolvedLinkException,
      FileNotFoundException, ParentNotDirectoryException, IOException {
    String src = srcArg;
    if (NameNode.stateChangeLog.isDebugEnabled()) {
      StringBuilder builder = new StringBuilder();
      builder.append("DIR* NameSystem.startFile: src=" + src
              + ", holder=" + holder
              + ", clientMachine=" + clientMachine
              + ", createParent=" + createParent
              + ", replication=" + replication
              + ", createFlag=" + flag.toString()
              + ", blockSize=" + blockSize);
      builder.append(", supportedVersions=");
      if (supportedVersions != null) {
        builder.append(Arrays.toString(supportedVersions));
      } else {
        builder.append("null");
      }
      NameNode.stateChangeLog.debug(builder.toString());
    }
    if (!DFSUtil.isValidName(src)) {
      throw new InvalidPathException(src);
    }
    blockManager.verifyReplication(src, replication, clientMachine);

    boolean skipSync = false;
    HdfsFileStatus stat = null;
    FSPermissionChecker pc = getPermissionChecker();
    if (blockSize < minBlockSize) {
      throw new IOException("Specified block size is less than configured" +
          " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY
          + "): " + blockSize + " < " + minBlockSize);
    }
    byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src);
    boolean create = flag.contains(CreateFlag.CREATE);
    boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
    boolean isLazyPersist = flag.contains(CreateFlag.LAZY_PERSIST);

    waitForLoadingFSImage();

    /**
     * If the file is in an encryption zone, we optimistically create an
     * EDEK for the file by calling out to the configured KeyProvider.
     * Since this typically involves doing an RPC, we take the readLock
     * initially, then drop it to do the RPC.
     * 
     * Since the path can flip-flop between being in an encryption zone and not
     * in the meantime, we need to recheck the preconditions when we retake the
     * lock to do the create. If the preconditions are not met, we throw a
     * special RetryStartFileException to ask the DFSClient to try the create
     * again later.
     */
    CryptoProtocolVersion protocolVersion = null;
    CipherSuite suite = null;
    String ezKeyName = null;
    EncryptedKeyVersion edek = null;

    if (provider != null) {
      readLock();
      try {
        src = dir.resolvePath(pc, src, pathComponents);
        INodesInPath iip = dir.getINodesInPath4Write(src);
        // Nothing to do if the path is not within an EZ
        final EncryptionZone zone = dir.getEZForPath(iip);
        if (zone != null) {
          protocolVersion = chooseProtocolVersion(zone, supportedVersions);
          suite = zone.getSuite();
          ezKeyName = zone.getKeyName();

          Preconditions.checkNotNull(protocolVersion);
          Preconditions.checkNotNull(suite);
          Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN),
              "Chose an UNKNOWN CipherSuite!");
          Preconditions.checkNotNull(ezKeyName);
        }
      } finally {
        readUnlock();
      }

      Preconditions.checkState(
          (suite == null && ezKeyName == null) ||
              (suite != null && ezKeyName != null),
          "Both suite and ezKeyName should both be null or not null");

      // Generate EDEK if necessary while not holding the lock
      edek = generateEncryptedDataEncryptionKey(ezKeyName);
      EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
    }

    // Proceed with the create, using the computed cipher suite and 
    // generated EDEK
    BlocksMapUpdateInfo toRemoveBlocks = null;
    writeLock();
    try {
      checkOperation(OperationCategory.WRITE);
      checkNameNodeSafeMode("Cannot create file" + src);
      src = dir.resolvePath(pc, src, pathComponents);
      toRemoveBlocks = startFileInternal(pc, src, permissions, holder, 
          clientMachine, create, overwrite, createParent, replication, 
          blockSize, isLazyPersist, suite, protocolVersion, edek, logRetryCache);
      stat = dir.getFileInfo(src, false,
          FSDirectory.isReservedRawName(srcArg), true);
    } catch (StandbyException se) {
      skipSync = true;
      throw se;
    } finally {
      writeUnlock();
      // There might be transactions logged while trying to recover the lease.
      // They need to be sync'ed even when an exception was thrown.
      if (!skipSync) {
        getEditLog().logSync();
        if (toRemoveBlocks != null) {
          removeBlocks(toRemoveBlocks);
          toRemoveBlocks.clear();
        }
      }
    }

    logAuditEvent(true, "create", srcArg, null, stat);
    return stat;
  }

startFileInternal

  private BlocksMapUpdateInfo startFileInternal(FSPermissionChecker pc, 
      String src, PermissionStatus permissions, String holder, 
      String clientMachine, boolean create, boolean overwrite, 
      boolean createParent, short replication, long blockSize, 
      boolean isLazyPersist, CipherSuite suite, CryptoProtocolVersion version,
      EncryptedKeyVersion edek, boolean logRetryEntry)
      throws FileAlreadyExistsException, AccessControlException,
      UnresolvedLinkException, FileNotFoundException,
      ParentNotDirectoryException, RetryStartFileException, IOException {
    assert hasWriteLock();
    // Verify that the destination does not exist as a directory already.
    final INodesInPath iip = dir.getINodesInPath4Write(src);
    final INode inode = iip.getLastINode();
    if (inode != null && inode.isDirectory()) {
      throw new FileAlreadyExistsException(src +
          " already exists as a directory");
    }

    FileEncryptionInfo feInfo = null;

    final EncryptionZone zone = dir.getEZForPath(iip);
    if (zone != null) {
      // The path is now within an EZ, but we're missing encryption parameters
      if (suite == null || edek == null) {
        throw new RetryStartFileException();
      }
      // Path is within an EZ and we have provided encryption parameters.
      // Make sure that the generated EDEK matches the settings of the EZ.
      final String ezKeyName = zone.getKeyName();
      if (!ezKeyName.equals(edek.getEncryptionKeyName())) {
        throw new RetryStartFileException();
      }
      feInfo = new FileEncryptionInfo(suite, version,
          edek.getEncryptedKeyVersion().getMaterial(),
          edek.getEncryptedKeyIv(),
          ezKeyName, edek.getEncryptionKeyVersionName());
      Preconditions.checkNotNull(feInfo);
    }

    final INodeFile myFile = INodeFile.valueOf(inode, src, true);
    if (isPermissionEnabled) {
      if (overwrite && myFile != null) {
        if (!dir.doPermissionCheckPatch(pc, src, FsAction.WRITE))
          checkPathAccess(pc, src, FsAction.WRITE);
      }
      /*
       * To overwrite existing file, need to check 'w' permission 
       * of parent (equals to ancestor in this case)
       */
      if (!dir.doPermissionCheckPatch(pc, src, FsAction.WRITE))
        checkAncestorAccess(pc, src, FsAction.WRITE);
    }

    if (!createParent) {
      verifyParentDir(src);
    }

    try {
      BlocksMapUpdateInfo toRemoveBlocks = null;
      if (myFile == null) {
        if (!create) {
          throw new FileNotFoundException("Can't overwrite non-existent " +
              src + " for client " + clientMachine);
        }
      } else {
        if (overwrite) {
          toRemoveBlocks = new BlocksMapUpdateInfo();
          List<INode> toRemoveINodes = new ChunkedArrayList<INode>();
          long ret = dir.delete(src, toRemoveBlocks, toRemoveINodes, now());
          if (ret >= 0) {
            incrDeletedFileCount(ret);
            removePathAndBlocks(src, null, toRemoveINodes, true);
          }
        } else {
          // If lease soft limit time is expired, recover the lease
          recoverLeaseInternal(myFile, src, holder, clientMachine, false);
          throw new FileAlreadyExistsException(src + " for client " +
              clientMachine + " already exists");
        }
      }

      checkFsObjectLimit();
      INodeFile newNode = null;

      // Always do an implicit mkdirs for parent directory tree.
      Path parent = new Path(src).getParent();
      if (parent != null && mkdirsRecursively(parent.toString(),
              permissions, true, now())) {
        newNode = dir.addFile(src, permissions, replication, blockSize,
                              holder, clientMachine);
      }

      if (newNode == null) {
        throw new IOException("Unable to add " + src +  " to namespace");
      }
      leaseManager.addLease(newNode.getFileUnderConstructionFeature()
          .getClientName(), src);

      // Set encryption attributes if necessary
      if (feInfo != null) {
        dir.setFileEncryptionInfo(src, feInfo);
        newNode = dir.getInode(newNode.getId()).asFile();
      }

      setNewINodeStoragePolicy(newNode, iip, isLazyPersist);

      // record file record in log, record new generation stamp
      getEditLog().logOpenFile(src, newNode, overwrite, logRetryEntry);
      NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added {}" +
          " inode {} holder {}", src, newNode.getId(), holder);
      return toRemoveBlocks;
    } catch (IOException ie) {
      NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " +
          ie.getMessage());
      throw ie;
    }
  }

addFile这里主要调用的方法是addChild,会对目录做quota检查,然后将InodeFile添加到父亲InodeDirectory的children里,这里会将InodeFile增加UnderConstruct属性,标示是正在构建的Inode节点

  INodeFile addFile(String path, PermissionStatus permissions,
                    short replication, long preferredBlockSize,
                    String clientName, String clientMachine)
    throws FileAlreadyExistsException, QuotaExceededException,
      UnresolvedLinkException, SnapshotAccessControlException, AclException {

    long modTime = now();
    INodeFile newNode = newINodeFile(namesystem.allocateNewInodeId(),
        permissions, modTime, modTime, replication, preferredBlockSize);
    newNode.toUnderConstruction(clientName, clientMachine);

    boolean added = false;
    writeLock();
    try {
      added = addINode(path, newNode, permissions.getPermission());
    } finally {
      writeUnlock();
    }
    if (!added) {
      NameNode.stateChangeLog.info("DIR* addFile: failed to add " + path);
      return null;
    }

    if(NameNode.stateChangeLog.isDebugEnabled()) {
      NameNode.stateChangeLog.debug("DIR* addFile: " + path + " is added");
    }
    return newNode;
  }

addChild

  private boolean addChild(INodesInPath iip, int pos, INode child,
                           FsPermission modes, boolean checkQuota)
      throws QuotaExceededException {
    final INode[] inodes = iip.getINodes();
    // Disallow creation of /.reserved. This may be created when loading
    // editlog/fsimage during upgrade since /.reserved was a valid name in older
    // release. This may also be called when a user tries to create a file
    // or directory /.reserved.
    if (pos == 1 && inodes[0] == rootDir && isReservedName(child)) {
      throw new HadoopIllegalArgumentException(
          "File name \"" + child.getLocalName() + "\" is reserved and cannot "
              + "be created. If this is during upgrade change the name of the "
              + "existing file or directory to another name before upgrading "
              + "to the new release.");
    }
    // The filesystem limits are not really quotas, so this check may appear
    // odd. It's because a rename operation deletes the src, tries to add
    // to the dest, if that fails, re-adds the src from whence it came.
    // The rename code disables the quota when it's restoring to the
    // original location becase a quota violation would cause the the item
    // to go "poof".  The fs limits must be bypassed for the same reason.
    if (checkQuota) {
      verifyMaxComponentLength(child.getLocalNameBytes(), inodes, pos);
      verifyMaxDirItems(inodes, pos);
    }
    // always verify inode name
    verifyINodeName(child.getLocalNameBytes());
    
    final Quota.Counts counts = child.computeQuotaUsage();
    updateCount(iip, pos,
        counts.get(Quota.NAMESPACE), counts.get(Quota.DISKSPACE), checkQuota);
    boolean isRename = (child.getParent() != null);
    final INodeDirectory parent = inodes[pos-1].asDirectory();
    boolean added;
    try {
      added = parent.addChild(child, true, iip.getLatestSnapshotId());
    } catch (QuotaExceededException e) {
      updateCountNoQuotaCheck(iip, pos,
          -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
      throw e;
    }
    if (!added) {
      updateCountNoQuotaCheck(iip, pos,
          -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
    } else {
      iip.setINode(pos - 1, child.getParent());
      if (!isRename) {
        copyINodeDefaultAcl(child, modes);
      }
      addToInodeMap(child);
    }
    return added;
  }
上一篇下一篇

猜你喜欢

热点阅读