Android重学系列 NetworkManagementSer

2020-11-29  本文已影响0人  yjy239

前言

前四篇文章讲述了Okhttp的核心原理,得知Okhttp是基于Socket开发的,而不是基于HttpUrlConnection开发的。

其中对于客户端来说,核心有如下四个步骤:

本文着重来看看DNS做了什么事情。在Okhttp默认的DNS 对象就是如下这个DnsSystem对象。

    private class DnsSystem : Dns {
      override fun lookup(hostname: String): List<InetAddress> {
        try {
          return InetAddress.getAllByName(hostname).toList()
        } catch (e: NullPointerException) {
          throw UnknownHostException("Broken system behaviour for dns lookup of $hostname").apply {
            initCause(e)
          }
        }
      }
    }

然而本文并不会立即和大家聊DNS的解析过程。因为在Android的网络模块中,存在着2个十分重要的对象:

而DNS的查询恰好是通过2个模块的互相运作才能执行。

遇到什么问题可以来这里讨论https://www.jianshu.com/p/cce3535ce457

正文

netd进程启动

文件:/system/netd/server/netd.rc

service netd /system/bin/netd
    class main
    socket netd stream 0660 root system
    socket dnsproxyd stream 0660 root inet
    socket mdns stream 0660 root system
    socket fwmarkd stream 0660 root inet
    onrestart restart zygote
    onrestart restart zygote_secondary

关于rc文件如何解析,本文就不多赘述了。详情可以阅读我写的 系统启动到Activity(上) 一文。

能看到在这个过程中,会启动一个netd 进程,接着会启动netd,dnsproxyd,mdns,fwmarkd 四个socket。

文件:/system/netd/server/main.cpp

int main() {
    using android::net::gCtls;
    Stopwatch s;

    remove_pid_file();

    blockSigpipe();


    for (const auto& sock : { CommandListener::SOCKET_NAME,
                              DnsProxyListener::SOCKET_NAME,
                              FwmarkServer::SOCKET_NAME,
                              MDnsSdListener::SOCKET_NAME }) {
        setCloseOnExec(sock);
    }

    NetlinkManager *nm = NetlinkManager::Instance();
    if (nm == nullptr) {

        exit(1);
    };

    gCtls = new android::net::Controllers();
    gCtls->init();

    CommandListener cl;
    nm->setBroadcaster((SocketListener *) &cl);

    if (nm->start()) {
...
        exit(1);
    }

    std::unique_ptr<NFLogListener> logListener;
    {
        auto result = makeNFLogListener();
        if (!isOk(result)) {
...
            exit(1);
        }
        logListener = std::move(result.value());
        auto status = gCtls->wakeupCtrl.init(logListener.get());
...
    }


    setenv("ANDROID_DNS_MODE", "local", 1);
    DnsProxyListener dpl(&gCtls->netCtrl, &gCtls->eventReporter);
    if (dpl.startListener()) {

        exit(1);
    }

    MDnsSdListener mdnsl;
    if (mdnsl.startListener()) {

        exit(1);
    }

    FwmarkServer fwmarkServer(&gCtls->netCtrl, &gCtls->eventReporter, &gCtls->trafficCtrl);
    if (fwmarkServer.startListener()) {

        exit(1);
    }

    Stopwatch subTime;
    status_t ret;
    if ((ret = NetdNativeService::start()) != android::OK) {

        exit(1);
    }



    if (cl.startListener()) {

        exit(1);
    }


    write_pid_file();

    NetdHwService mHwSvc;
    if ((ret = mHwSvc.start()) != android::OK) {
        exit(1);
    }

    IPCThreadState::self()->joinThreadPool();

    remove_pid_file();

    exit(0);
}

这里我们分别看看都做了什么。

NetlinkManager 初始化

NetlinkManager *NetlinkManager::Instance() {
    if (!sInstance)
        sInstance = new NetlinkManager();
    return sInstance;
}

实例化NetlinkManager对象后,设置一个CommandListener 到NetlinkManager 中。

    void setBroadcaster(SocketListener *sl) { mBroadcaster = sl; }
    SocketListener *getBroadcaster() { return mBroadcaster; }

接着调用start方法启动NetlinkManager.

NetlinkManager start

int NetlinkManager::start() {
    if ((mUeventHandler = setupSocket(&mUeventSock, NETLINK_KOBJECT_UEVENT,
         0xffffffff, NetlinkListener::NETLINK_FORMAT_ASCII, false)) == NULL) {
        return -1;
    }

    if ((mRouteHandler = setupSocket(&mRouteSock, NETLINK_ROUTE,
                                     RTMGRP_LINK |
                                     RTMGRP_IPV4_IFADDR |
                                     RTMGRP_IPV6_IFADDR |
                                     RTMGRP_IPV6_ROUTE |
                                     (1 << (RTNLGRP_ND_USEROPT - 1)),
         NetlinkListener::NETLINK_FORMAT_BINARY, false)) == NULL) {
        return -1;
    }

    if ((mQuotaHandler = setupSocket(&mQuotaSock, NETLINK_NFLOG,
            NFLOG_QUOTA_GROUP, NetlinkListener::NETLINK_FORMAT_BINARY, false)) == NULL) {
...
    }

    if ((mStrictHandler = setupSocket(&mStrictSock, NETLINK_NETFILTER,
            0, NetlinkListener::NETLINK_FORMAT_BINARY_UNICAST, true)) == NULL) {
...
    }

    return 0;
}

这三个步骤都是生成一个socket对象,并设置domain为PF_NETLINK,然后设置不同的Type用于监听socket从内核模块释放的信息。

CommandListener 初始化

static constexpr const char* SOCKET_NAME = "netd";
CommandListener::CommandListener() : FrameworkListener(SOCKET_NAME, true) {
    registerLockingCmd(new InterfaceCmd());
    registerLockingCmd(new IpFwdCmd(), gCtls->tetherCtrl.lock);
    registerLockingCmd(new TetherCmd(), gCtls->tetherCtrl.lock);
    registerLockingCmd(new NatCmd(), gCtls->tetherCtrl.lock);
    registerLockingCmd(new ListTtysCmd());
    registerLockingCmd(new PppdCmd());
    registerLockingCmd(new BandwidthControlCmd(), gCtls->bandwidthCtrl.lock);
    registerLockingCmd(new IdletimerControlCmd());
    registerLockingCmd(new ResolverCmd());
    registerLockingCmd(new FirewallCmd(), gCtls->firewallCtrl.lock);
    registerLockingCmd(new ClatdCmd());
    registerLockingCmd(new NetworkCommand());
    registerLockingCmd(new StrictCmd());
}

在这里面设置了一个socket名字为netd,并在父类FrameworkListener进行初始化。

在这一层构造函数中,会注册不同命令的监听。当从netdsocket中接受到信息的到来就会检测获取第一个参数,是否是对应类型的CMD。

比如说,这里注册了InterfaceCmd对象:

CommandListener::InterfaceCmd::InterfaceCmd() :
                 NetdCommand("interface") {
}

为这个NetdCommand 设置了一个对应的字符串interface.当netdsocket中监听到第一个参数字符串是interface,找到注册到FrameworkListener集合中的命令数据,并且调用对应NetdCommandrunCommand方法,从而执行对应的逻辑。

DnsProxyListener 初始化

文件:/system/netd/server/DnsProxyListener.cpp

static constexpr const char* SOCKET_NAME = "dnsproxyd";
DnsProxyListener::DnsProxyListener(const NetworkController* netCtrl, EventReporter* eventReporter) :
        FrameworkListener(SOCKET_NAME), mNetCtrl(netCtrl), mEventReporter(eventReporter) {
    registerCmd(new GetAddrInfoCmd(this));
    registerCmd(new GetHostByAddrCmd(this));
    registerCmd(new GetHostByNameCmd(this));
}

DnsProxyListener 也是FrameworkListener派生类。注册了GetAddrInfoCmd命令域名查找ip地址对象,GetHostByAddrCmd通过返回给定ip地址服务器信息命令对象,GetHostByNameCmd通过域名查找服务器信息命令对象.

简单来看看这几个对象注册命令是什么:

GetAddrInfoCmd
DnsProxyListener::GetAddrInfoCmd::GetAddrInfoCmd(DnsProxyListener* dnsProxyListener) :
    NetdCommand("getaddrinfo"),
    mDnsProxyListener(dnsProxyListener) {
}

对应的命令识别字符串是getaddrinfo

GetHostByAddrCmd
DnsProxyListener::GetHostByAddrCmd::GetHostByAddrCmd(const DnsProxyListener* dnsProxyListener) :
        NetdCommand("gethostbyaddr"),
        mDnsProxyListener(dnsProxyListener) {
}

对应的命令识别字符串是gethostbyaddr

GetHostByNameCmd
DnsProxyListener::GetHostByNameCmd::GetHostByNameCmd(DnsProxyListener* dnsProxyListener) :
      NetdCommand("gethostbyname"),
      mDnsProxyListener(dnsProxyListener) {
}

对应的命令识别字符串是gethostbyname

DnsProxyListener startListener

int SocketListener::startListener(int backlog) {

    if (!mSocketName && mSock == -1) {

        errno = EINVAL;
        return -1;
    } else if (mSocketName) {
        if ((mSock = android_get_control_socket(mSocketName)) < 0) {

            return -1;
        }

        fcntl(mSock, F_SETFD, FD_CLOEXEC);
    }

    if (mListen && listen(mSock, backlog) < 0) {

        return -1;
    } else if (!mListen)
        mClients->push_back(new SocketClient(mSock, false, mUseCmdNum));

    if (pipe(mCtrlPipe)) {

        return -1;
    }

    if (pthread_create(&mThread, NULL, SocketListener::threadStart, this)) {

        return -1;
    }

    return 0;
}

能看到这个过程中

MDnsSdListener 初始化

文件:/system/netd/server/MDnsSdListener.cpp

static constexpr const char* SOCKET_NAME = "mdns";
MDnsSdListener::MDnsSdListener() : FrameworkListener(SOCKET_NAME, true) {
    Monitor *m = new Monitor();
    registerCmd(new Handler(m, this));
}

MDnsSdListener::Handler::Handler(Monitor *m, MDnsSdListener *listener) :
   NetdCommand("mdnssd") {
   mMonitor = m;
   mListener = listener;
}

注册了一个mdnssd 命令监听。当调用了startListener方法后就会构建一个mdnssocket 监听mdnssd命令的到来。

FwmarkServer 初始化

static constexpr const char* SOCKET_NAME = "fwmarkd";
FwmarkServer::FwmarkServer(NetworkController* networkController, EventReporter* eventReporter,
                           TrafficController* trafficCtrl)
    : SocketListener(SOCKET_NAME, true),
      mNetworkController(networkController),
      mEventReporter(eventReporter),
      mTrafficCtrl(trafficCtrl) {}

首先实例化一个fwmarkdsocket,并监听这个socket传递过来的内容。

注意TrafficController隶属于eBPF 网络流浪监控模块 。除了标记和取消标记socket外还承担了删除流量数据的职责。可以同时从/sys/fs/bpf 下其他文件中实时的读取不同uid,appid的流量数据。

NetworkManagementService 初始化

初始化位置在为文件SystemServer中:

            try {
                networkManagement = NetworkManagementService.create(context);
                ServiceManager.addService(Context.NETWORKMANAGEMENT_SERVICE, networkManagement);
            } catch (Throwable e) {
                reportWtf("starting NetworkManagement Service", e);
            }

文件:/frameworks/base/services/core/java/com/android/server/NetworkManagementService.java


    static final String NETD_SERVICE_NAME = "netd";

    static NetworkManagementService create(Context context, String socket, SystemServices services)
            throws InterruptedException {
        final NetworkManagementService service =
                new NetworkManagementService(context, socket, services);
        final CountDownLatch connectedSignal = service.mConnectedSignal;
        service.mThread.start();
        connectedSignal.await();
        service.connectNativeNetdService();
        return service;
    }

    public static NetworkManagementService create(Context context) throws InterruptedException {
        return create(context, NETD_SERVICE_NAME, new SystemServices());
    }
    private NetworkManagementService(
            Context context, String socket, SystemServices services) {
        mContext = context;
        mServices = services;

        // make sure this is on the same looper as our NativeDaemonConnector for sync purposes
        mFgHandler = new Handler(FgThread.get().getLooper());


        //PowerManager pm = (PowerManager)context.getSystemService(Context.POWER_SERVICE);
        PowerManager.WakeLock wl = null; //pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK, NETD_TAG);

        mConnector = new NativeDaemonConnector(
                new NetdCallbackReceiver(), socket, 10, NETD_TAG, 160, wl,
                FgThread.get().getLooper());
        mThread = new Thread(mConnector, NETD_TAG);

        mDaemonHandler = new Handler(FgThread.get().getLooper());

        // Add ourself to the Watchdog monitors.
        Watchdog.getInstance().addMonitor(this);

        mServices.registerLocalService(new LocalService());

        synchronized (mTetheringStatsProviders) {
            mTetheringStatsProviders.put(new NetdTetheringStatsProvider(), "netd");
        }
    }

NativeDaemonConnector 实例化

注意 NativeDaemonConnector是一个Runnable对象,在上面一节中进行Thread的实例化和start后会启动run方法。

    NativeDaemonConnector(INativeDaemonConnectorCallbacks callbacks, String socket,
            int responseQueueSize, String logTag, int maxLogSize, PowerManager.WakeLock wl) {
        this(callbacks, socket, responseQueueSize, logTag, maxLogSize, wl,
                FgThread.get().getLooper());
    }

    NativeDaemonConnector(INativeDaemonConnectorCallbacks callbacks, String socket,
            int responseQueueSize, String logTag, int maxLogSize, PowerManager.WakeLock wl,
            Looper looper) {
        mCallbacks = callbacks;
        mSocket = socket;
        mResponseQueue = new ResponseQueue(responseQueueSize);
        mWakeLock = wl;
        if (mWakeLock != null) {
            mWakeLock.setReferenceCounted(true);
        }
        mLooper = looper;
        mSequenceNumber = new AtomicInteger(0);
        TAG = logTag != null ? logTag : "NativeDaemonConnector";
        mLocalLog = new LocalLog(maxLogSize);
    }



    @Override
    public void run() {
        mCallbackHandler = new Handler(mLooper, this);

        while (true) {
            if (isShuttingDown()) break;
            try {
                listenToSocket();
            } catch (Exception e) {
                loge("Error in NativeDaemonConnector: " + e);
                if (isShuttingDown()) break;
                SystemClock.sleep(5000);
            }
        }
    }
listenToSocket

    private LocalSocketAddress determineSocketAddress() {

        if (mSocket.startsWith("__test__") && Build.IS_DEBUGGABLE) {
            return new LocalSocketAddress(mSocket);
        } else {
            return new LocalSocketAddress(mSocket, LocalSocketAddress.Namespace.RESERVED);
        }
    }

    private void listenToSocket() throws IOException {
        LocalSocket socket = null;

        try {
            socket = new LocalSocket();
            LocalSocketAddress address = determineSocketAddress();

            socket.connect(address);

            InputStream inputStream = socket.getInputStream();
            synchronized (mDaemonLock) {
                mOutputStream = socket.getOutputStream();
            }

            mCallbacks.onDaemonConnected();

            FileDescriptor[] fdList = null;
            byte[] buffer = new byte[BUFFER_SIZE];
            int start = 0;

            while (true) {
                int count = inputStream.read(buffer, start, BUFFER_SIZE - start);
                if (count < 0) {
                    loge("got " + count + " reading with start = " + start);
                    break;
                }
                fdList = socket.getAncillaryFileDescriptors();

                // Add our starting point to the count and reset the start.
                count += start;
                start = 0;

                for (int i = 0; i < count; i++) {
                    if (buffer[i] == 0) {
                        final String rawEvent = new String(
                                buffer, start, i - start, StandardCharsets.UTF_8);

                        boolean releaseWl = false;
                        try {
                            final NativeDaemonEvent event =
                                    NativeDaemonEvent.parseRawEvent(rawEvent, fdList);

                            log("RCV <- {" + event + "}");

                            if (event.isClassUnsolicited()) {
                                // TODO: migrate to sending NativeDaemonEvent instances
                                if (mCallbacks.onCheckHoldWakeLock(event.getCode())
                                        && mWakeLock != null) {
                                    mWakeLock.acquire();
                                    releaseWl = true;
                                }
                                Message msg = mCallbackHandler.obtainMessage(
                                        event.getCode(), uptimeMillisInt(), 0, event.getRawEvent());
                                if (mCallbackHandler.sendMessage(msg)) {
                                    releaseWl = false;
                                }
                            } else {
                                mResponseQueue.add(event.getCmdNumber(), event);
                            }
                        } catch (IllegalArgumentException e) {
                            log("Problem parsing message " + e);
                        } finally {
                            if (releaseWl) {
                                mWakeLock.release();
                            }
                        }

                        start = i + 1;
                    }
                }


                if (start != count) {
                    final int remaining = BUFFER_SIZE - start;
                    System.arraycopy(buffer, start, buffer, 0, remaining);
                    start = remaining;
                } else {
                    start = 0;
                }
            }
        } catch (IOException ex) {
            loge("Communications error: " + ex);
            throw ex;
        } finally {
  ...
        }
    }

这个过程实际上做的事情就是一件:

这里的netdsocket实际上就是指netd进程创建的时候,通过app_main 解析init.rc创建出来的socket。而在netd进程也构造了这个socket,随时进行发送或者监听信息。

值得注意的是,这里面有一个有趣的对象PowerManager.WakeLock.每当从netd监听到消息的时候,发现从netd进程传送过来的事件是从netd主动来的请求,那么就会调用PowerManager.WakeLock 立即唤醒设备发送通过Handler发送数据。

这也是为什么耗电量会被Android系统监听到的原因的,这个WakeLock 唤醒锁是一个重要的依据。

InetAddress.getAllByName DNS 查询过程

对NetworkManagementService以及netd 两个服务都有了大致的了解后,我们才好开展本次重点话题,Android是怎么进行通过域名进行DNS查询到ip地址的。

static final InetAddressImpl impl = new Inet6AddressImpl();

     */
    public static InetAddress[] getAllByName(String host)
        throws UnknownHostException {

        return impl.lookupAllHostAddr(host, NETID_UNSET).clone();
    }

这个过程实际上是交给了Inet6AddressImpl的lookupAllHostAddr处理。

Inet6AddressImpl lookupAllHostAddr

    @Override
    public InetAddress[] lookupAllHostAddr(String host, int netId) throws UnknownHostException {
        if (host == null || host.isEmpty()) {

            return loopbackAddresses();
        }

        // Is it a numeric address?
        InetAddress result = InetAddress.parseNumericAddressNoThrow(host);
        if (result != null) {
            result = InetAddress.disallowDeprecatedFormats(host, result);
            if (result == null) {
                throw new UnknownHostException("Deprecated IPv4 address format: " + host);
            }
            return new InetAddress[] { result };
        }

        return lookupHostByName(host, netId);
    }
Inet6AddressImpl lookupHostByName
    private static InetAddress[] lookupHostByName(String host, int netId)
            throws UnknownHostException {
        BlockGuard.getThreadPolicy().onNetwork();
        // Do we have a result cached?
        Object cachedResult = addressCache.get(host, netId);
        if (cachedResult != null) {
            if (cachedResult instanceof InetAddress[]) {
                // A cached positive result.
                return (InetAddress[]) cachedResult;
            } else {
                // A cached negative result.
                throw new UnknownHostException((String) cachedResult);
            }
        }
        try {
            StructAddrinfo hints = new StructAddrinfo();
            hints.ai_flags = AI_ADDRCONFIG;
            hints.ai_family = AF_UNSPEC;

            hints.ai_socktype = SOCK_STREAM;
            InetAddress[] addresses = Libcore.os.android_getaddrinfo(host, hints, netId);

            for (InetAddress address : addresses) {
                address.holder().hostName = host;
                address.holder().originalHostName = host;
            }
            addressCache.put(host, netId, addresses);
            return addresses;
        } catch (GaiException gaiException) {
...
        }
    }

构建了一个StructAddrinfo对象,保存了三个十分重要的标志:

并把StructAddrinfo作为参数传入方法Libcore.os.android_getaddrinfo中处理。

Libcore.os.android_getaddrinfo

这个方法实际上就是一个native方法,对应如下文件:
/libcore/luni/src/main/native/libcore_io_Linux.cpp

static jobjectArray Linux_android_getaddrinfo(JNIEnv* env, jobject, jstring javaNode,
        jobject javaHints, jint netId) {
    ScopedUtfChars node(env, javaNode);
    if (node.c_str() == NULL) {
        return NULL;
    }

    static jfieldID flagsFid = env->GetFieldID(JniConstants::structAddrinfoClass, "ai_flags", "I");
    static jfieldID familyFid = env->GetFieldID(JniConstants::structAddrinfoClass, "ai_family", "I");
    static jfieldID socktypeFid = env->GetFieldID(JniConstants::structAddrinfoClass, "ai_socktype", "I");
    static jfieldID protocolFid = env->GetFieldID(JniConstants::structAddrinfoClass, "ai_protocol", "I");

    addrinfo hints;
    memset(&hints, 0, sizeof(hints));
    hints.ai_flags = env->GetIntField(javaHints, flagsFid);
    hints.ai_family = env->GetIntField(javaHints, familyFid);
    hints.ai_socktype = env->GetIntField(javaHints, socktypeFid);
    hints.ai_protocol = env->GetIntField(javaHints, protocolFid);

    addrinfo* addressList = NULL;
    errno = 0;
    int rc = android_getaddrinfofornet(node.c_str(), NULL, &hints, netId, 0, &addressList);
    std::unique_ptr<addrinfo, addrinfo_deleter> addressListDeleter(addressList);
    if (rc != 0) {
        throwGaiException(env, "android_getaddrinfo", rc);
        return NULL;
    }

    // Count results so we know how to size the output array.
    int addressCount = 0;
    for (addrinfo* ai = addressList; ai != NULL; ai = ai->ai_next) {
        if (ai->ai_family == AF_INET || ai->ai_family == AF_INET6) {
            ++addressCount;
        } else {
            ALOGE("android_getaddrinfo unexpected ai_family %i", ai->ai_family);
        }
    }
    if (addressCount == 0) {
        return NULL;
    }

    // Prepare output array.
    jobjectArray result = env->NewObjectArray(addressCount, JniConstants::inetAddressClass, NULL);
    if (result == NULL) {
        return NULL;
    }

    // Examine returned addresses one by one, save them in the output array.
    int index = 0;
    for (addrinfo* ai = addressList; ai != NULL; ai = ai->ai_next) {
        if (ai->ai_family != AF_INET && ai->ai_family != AF_INET6) {
            // Unknown address family. Skip this address.
            ALOGE("android_getaddrinfo unexpected ai_family %i", ai->ai_family);
            continue;
        }

        // Convert each IP address into a Java byte array.
        sockaddr_storage& address = *reinterpret_cast<sockaddr_storage*>(ai->ai_addr);
        ScopedLocalRef<jobject> inetAddress(env, sockaddrToInetAddress(env, address, NULL));
        if (inetAddress.get() == NULL) {
            return NULL;
        }
        env->SetObjectArrayElement(result, index, inetAddress.get());
        ++index;
    }
    return result;
}

这个过程实际上就是从StructAddrinfo中拿到ai_flags,ai_family,ai_socktype作为参数传递到android_getaddrinfofornet方法中。并把取到的数据保存到Java数组中并返回。

android_getaddrinfofornet 从netd进程中查找dns
#if defined(__BIONIC__)
extern "C" int android_getaddrinfofornet(const char*, const char*, const struct addrinfo*, unsigned, unsigned, struct addrinfo**);
#else
static inline int android_getaddrinfofornet(const char* hostname, const char* servname,
    const struct addrinfo* hints, unsigned /*netid*/, unsigned /*mark*/, struct addrinfo** res) {
  return getaddrinfo(hostname, servname, hints, res);
}
#endif

注意接下来分为2个分之,是调用libc中内置的getaddrinfo方法还是调用bionic的android_getaddrinfofornet.

一半的jdk都是使用libc内置的方法,而android中都是使用bionic库进行处理。

文件:/bionic/libc/dns/net/getaddrinfo.c

__BIONIC_WEAK_FOR_NATIVE_BRIDGE
int
getaddrinfo(const char *hostname, const char *servname,
    const struct addrinfo *hints, struct addrinfo **res)
{
    return android_getaddrinfofornet(hostname, servname, hints, NETID_UNSET, MARK_UNSET, res);
}

__BIONIC_WEAK_FOR_NATIVE_BRIDGE
int
android_getaddrinfofornet(const char *hostname, const char *servname,
    const struct addrinfo *hints, unsigned netid, unsigned mark, struct addrinfo **res)
{
    struct android_net_context netcontext = {
        .app_netid = netid,
        .app_mark = mark,
        .dns_netid = netid,
        .dns_mark = mark,
        .uid = NET_CONTEXT_INVALID_UID,
        };
    return android_getaddrinfofornetcontext(hostname, servname, hints, &netcontext, res);
}

生成一个android_net_context 结构体后,调用 android_getaddrinfofornetcontext.

android_getaddrinfofornetcontext
__BIONIC_WEAK_FOR_NATIVE_BRIDGE
int
android_getaddrinfofornetcontext(const char *hostname, const char *servname,
    const struct addrinfo *hints, const struct android_net_context *netcontext,
    struct addrinfo **res)
{
    struct addrinfo sentinel;
    struct addrinfo *cur;
    int error = 0;
    struct addrinfo ai;
    struct addrinfo ai0;
    struct addrinfo *pai;
    const struct explore *ex;

    /* hostname is allowed to be NULL */
    /* servname is allowed to be NULL */
    /* hints is allowed to be NULL */
    assert(res != NULL);
    assert(netcontext != NULL);
    memset(&sentinel, 0, sizeof(sentinel));
    cur = &sentinel;
    pai = &ai;
    pai->ai_flags = 0;
    pai->ai_family = PF_UNSPEC;
    pai->ai_socktype = ANY;
    pai->ai_protocol = ANY;
    pai->ai_addrlen = 0;
    pai->ai_canonname = NULL;
    pai->ai_addr = NULL;
    pai->ai_next = NULL;

....
    if (hints) {
        /* error check for hints */
        if (hints->ai_addrlen || hints->ai_canonname ||
            hints->ai_addr || hints->ai_next)
            ERR(EAI_BADHINTS); /* xxx */
        if (hints->ai_flags & ~AI_MASK)
            ERR(EAI_BADFLAGS);
        switch (hints->ai_family) {
        case PF_UNSPEC:
        case PF_INET:
#ifdef INET6
        case PF_INET6:
#endif
            break;
        default:
            ERR(EAI_FAMILY);
        }
        memcpy(pai, hints, sizeof(*pai));

        /*
         * if both socktype/protocol are specified, check if they
         * are meaningful combination.
         */
        if (pai->ai_socktype != ANY && pai->ai_protocol != ANY) {
            for (ex = explore; ex->e_af >= 0; ex++) {
                if (pai->ai_family != ex->e_af)
                    continue;
                if (ex->e_socktype == ANY)
                    continue;
                if (ex->e_protocol == ANY)
                    continue;
                if (pai->ai_socktype == ex->e_socktype
                 && pai->ai_protocol != ex->e_protocol) {
                    ERR(EAI_BADHINTS);
                }
            }
        }
    }

    /*
     * check for special cases.  (1) numeric servname is disallowed if
     * socktype/protocol are left unspecified. (2) servname is disallowed
     * for raw and other inet{,6} sockets.
     */
    if (MATCH_FAMILY(pai->ai_family, PF_INET, 1)
#ifdef PF_INET6
     || MATCH_FAMILY(pai->ai_family, PF_INET6, 1)
#endif
        ) {
        ai0 = *pai; /* backup *pai */

        if (pai->ai_family == PF_UNSPEC) {
#ifdef PF_INET6
            pai->ai_family = PF_INET6;
#else
            pai->ai_family = PF_INET;
#endif
        }
        error = get_portmatch(pai, servname);
        if (error)
            ERR(error);

        *pai = ai0;
    }

    ai0 = *pai;

    /* NULL hostname, or numeric hostname */
...
#if defined(__ANDROID__)
    int gai_error = android_getaddrinfo_proxy(
        hostname, servname, hints, res, netcontext->app_netid);
    if (gai_error != EAI_SYSTEM) {
        return gai_error;
    }
#endif

    /*
     * hostname as alphabetical name.
     * we would like to prefer AF_INET6 than AF_INET, so we'll make a
     * outer loop by AFs.
     */
    for (ex = explore; ex->e_af >= 0; ex++) {
        *pai = ai0;

        /* require exact match for family field */
        if (pai->ai_family != ex->e_af)
            continue;

        if (!MATCH(pai->ai_socktype, ex->e_socktype,
                WILD_SOCKTYPE(ex))) {
            continue;
        }
        if (!MATCH(pai->ai_protocol, ex->e_protocol,
                WILD_PROTOCOL(ex))) {
            continue;
        }

        if (pai->ai_socktype == ANY && ex->e_socktype != ANY)
            pai->ai_socktype = ex->e_socktype;
        if (pai->ai_protocol == ANY && ex->e_protocol != ANY)
            pai->ai_protocol = ex->e_protocol;

        error = explore_fqdn(
            pai, hostname, servname, &cur->ai_next, netcontext);

        while (cur && cur->ai_next)
            cur = cur->ai_next;
    }

    /* XXX */
    if (sentinel.ai_next)
        error = 0;

    if (error)
        goto free;
    if (error == 0) {
        if (sentinel.ai_next) {
 good:
            *res = sentinel.ai_next;
            return SUCCESS;
        } else
            error = EAI_FAIL;
    }
 free:
 bad:
    if (sentinel.ai_next)
        freeaddrinfo(sentinel.ai_next);
    *res = NULL;
    return error;
}
android_getaddrinfo_proxy
static int
android_getaddrinfo_proxy(
    const char *hostname, const char *servname,
    const struct addrinfo *hints, struct addrinfo **res, unsigned netid)
{
    int success = 0;

...

    FILE* proxy = android_open_proxy();
    if (proxy == NULL) {
        return EAI_SYSTEM;
    }

    netid = __netdClientDispatch.netIdForResolv(netid);

    // Send the request.
    if (fprintf(proxy, "getaddrinfo %s %s %d %d %d %d %u",
            hostname == NULL ? "^" : hostname,
            servname == NULL ? "^" : servname,
            hints == NULL ? -1 : hints->ai_flags,
            hints == NULL ? -1 : hints->ai_family,
            hints == NULL ? -1 : hints->ai_socktype,
            hints == NULL ? -1 : hints->ai_protocol,
            netid) < 0) {
        goto exit;
    }
    // literal NULL byte at end, required by FrameworkListener
    if (fputc(0, proxy) == EOF ||
        fflush(proxy) != 0) {
        goto exit;
    }

    char buf[4];
    // read result code for gethostbyaddr
    if (fread(buf, 1, sizeof(buf), proxy) != sizeof(buf)) {
        goto exit;
    }

    int result_code = (int)strtol(buf, NULL, 10);
    // verify the code itself
    if (result_code != DnsProxyQueryResult) {
        fread(buf, 1, sizeof(buf), proxy);
        goto exit;
    }

    struct addrinfo* ai = NULL;
    struct addrinfo** nextres = res;
    while (1) {
        int32_t have_more;
        if (!readBE32(proxy, &have_more)) {
            break;
        }
        if (have_more == 0) {
            success = 1;
            break;
        }

        struct addrinfo* ai = calloc(1, sizeof(struct addrinfo) + sizeof(struct sockaddr_storage));
        if (ai == NULL) {
            break;
        }
        ai->ai_addr = (struct sockaddr*)(ai + 1);

        // struct addrinfo {
        //  int ai_flags;   /* AI_PASSIVE, AI_CANONNAME, AI_NUMERICHOST */
        //  int ai_family;  /* PF_xxx */
        //  int ai_socktype;    /* SOCK_xxx */
        //  int ai_protocol;    /* 0 or IPPROTO_xxx for IPv4 and IPv6 */
        //  socklen_t ai_addrlen;   /* length of ai_addr */
        //  char    *ai_canonname;  /* canonical name for hostname */
        //  struct  sockaddr *ai_addr;  /* binary address */
        //  struct  addrinfo *ai_next;  /* next structure in linked list */
        // };

        // Read the struct piece by piece because we might be a 32-bit process
        // talking to a 64-bit netd.
        int32_t addr_len;
        bool success =
                readBE32(proxy, &ai->ai_flags) &&
                readBE32(proxy, &ai->ai_family) &&
                readBE32(proxy, &ai->ai_socktype) &&
                readBE32(proxy, &ai->ai_protocol) &&
                readBE32(proxy, &addr_len);
        if (!success) {
            break;
        }

        // Set ai_addrlen and read the ai_addr data.
        ai->ai_addrlen = addr_len;
        if (addr_len != 0) {
            if ((size_t) addr_len > sizeof(struct sockaddr_storage)) {
                // Bogus; too big.
                break;
            }
            if (fread(ai->ai_addr, addr_len, 1, proxy) != 1) {
                break;
            }
        }

        // The string for ai_cannonname.
        int32_t name_len;
        if (!readBE32(proxy, &name_len)) {
            break;
        }
        if (name_len != 0) {
            ai->ai_canonname = (char*) malloc(name_len);
            if (fread(ai->ai_canonname, name_len, 1, proxy) != 1) {
                break;
            }
            if (ai->ai_canonname[name_len - 1] != '\0') {
                // The proxy should be returning this
                // NULL-terminated.
                break;
            }
        }

        *nextres = ai;
        nextres = &ai->ai_next;
        ai = NULL;
    }

    if (ai != NULL) {
        // Clean up partially-built addrinfo that we never ended up
        // attaching to the response.
        freeaddrinfo(ai);
    }
exit:
    if (proxy != NULL) {
        fclose(proxy);
    }

    if (success) {
        return 0;
    }

    // Proxy failed;
    // clean up memory we might've allocated.
    if (*res) {
        freeaddrinfo(*res);
        *res = NULL;
    }
    return EAI_NODATA;
}

此时App进程ANDROID_DNS_MODE不是local模式那么说明需要走代理,就会联通netd进程的/dev/socket/dnsproxyd 的socket接口,并返回。


__LIBC_HIDDEN__ FILE* android_open_proxy() {
    const char* cache_mode = getenv("ANDROID_DNS_MODE");
    bool use_proxy = (cache_mode == NULL || strcmp(cache_mode, "local") != 0);
    if (!use_proxy) {
        return NULL;
    }

    int s = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
...
    const int one = 1;
    setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));

    struct sockaddr_un proxy_addr;
    memset(&proxy_addr, 0, sizeof(proxy_addr));
    proxy_addr.sun_family = AF_UNIX;
    strlcpy(proxy_addr.sun_path, "/dev/socket/dnsproxyd", sizeof(proxy_addr.sun_path));

    if (TEMP_FAILURE_RETRY(connect(s, (const struct sockaddr*) &proxy_addr, sizeof(proxy_addr))) != 0) {
        close(s);
        return NULL;
    }

    return fdopen(s, "r+");
}

netd进程接受getaddrinfo命令

前面聊到了DnsProxyListener在内部中生成了dnsproxyd的socket。在DnsProxyListener构造函数中,就调用了registerCmd方法注册了不同命令的监听.

文件:/system/core/libsysutils/src/FrameworkListener.cpp

void FrameworkListener::registerCmd(FrameworkCommand *cmd) {
    mCommands->push_back(cmd);
}

把FrameworkCommand添加到mCommands集合中。

FrameworkListener的父类就是SocketListener,而SocketListener在调用startListener方法后,就会启动一个线程执行threadStart方法。

void *SocketListener::threadStart(void *obj) {
    SocketListener *me = reinterpret_cast<SocketListener *>(obj);

    me->runListener();
    pthread_exit(NULL);
    return NULL;
}

void SocketListener::runListener() {

    SocketClientCollection pendingList;

    while(1) {
        SocketClientCollection::iterator it;
        fd_set read_fds;
        int rc = 0;
        int max = -1;

        FD_ZERO(&read_fds);

        if (mListen) {
            max = mSock;
            FD_SET(mSock, &read_fds);
        }

        FD_SET(mCtrlPipe[0], &read_fds);
        if (mCtrlPipe[0] > max)
            max = mCtrlPipe[0];

        pthread_mutex_lock(&mClientsLock);
        for (it = mClients->begin(); it != mClients->end(); ++it) {
            // NB: calling out to an other object with mClientsLock held (safe)
            int fd = (*it)->getSocket();
            FD_SET(fd, &read_fds);
            if (fd > max) {
                max = fd;
            }
        }
        pthread_mutex_unlock(&mClientsLock);

        if ((rc = select(max + 1, &read_fds, NULL, NULL, NULL)) < 0) {
            if (errno == EINTR)
                continue;
...
            sleep(1);
            continue;
        } else if (!rc)
            continue;

...
        if (mListen && FD_ISSET(mSock, &read_fds)) {
            int c = TEMP_FAILURE_RETRY(accept4(mSock, nullptr, nullptr, SOCK_CLOEXEC));
            if (c < 0) {

                sleep(1);
                continue;
            }
            pthread_mutex_lock(&mClientsLock);
            mClients->push_back(new SocketClient(c, true, mUseCmdNum));
            pthread_mutex_unlock(&mClientsLock);
        }

        /* Add all active clients to the pending list first */
        pendingList.clear();
        pthread_mutex_lock(&mClientsLock);
        for (it = mClients->begin(); it != mClients->end(); ++it) {
            SocketClient* c = *it;
            // NB: calling out to an other object with mClientsLock held (safe)
            int fd = c->getSocket();
            if (FD_ISSET(fd, &read_fds)) {
                pendingList.push_back(c);
                c->incRef();
            }
        }
        pthread_mutex_unlock(&mClientsLock);

        /* Process the pending list, since it is owned by the thread,
         * there is no need to lock it */
        while (!pendingList.empty()) {
            /* Pop the first item from the list */
            it = pendingList.begin();
            SocketClient* c = *it;
            pendingList.erase(it);
            /* Process it, if false is returned, remove from list */
            if (!onDataAvailable(c)) {
                release(c, false);
            }
            c->decRef();
        }
    }
}

onDataAvailable 这个方法在FrameworkListener有实现。

FrameworkListener onDataAvailable

文件:/system/core/libsysutils/src/FrameworkListener.cpp

bool FrameworkListener::onDataAvailable(SocketClient *c) {
    char buffer[CMD_BUF_SIZE];
    int len;

    len = TEMP_FAILURE_RETRY(read(c->getSocket(), buffer, sizeof(buffer)));
...

    int offset = 0;
    int i;

    for (i = 0; i < len; i++) {
        if (buffer[i] == '\0') {
            /* IMPORTANT: dispatchCommand() expects a zero-terminated string */
            if (mSkipToNextNullByte) {
                mSkipToNextNullByte = false;
            } else {
                dispatchCommand(c, buffer + offset);
            }
            offset = i + 1;
        }
    }

    mSkipToNextNullByte = false;
    return true;
}

在onDataAvailable 中,调用了read方法读socket传送来的数据,并调用dispatchCommand把保存在pendingList的数据分发出去。

FrameworkListener dispatchCommand

void FrameworkListener::dispatchCommand(SocketClient *cli, char *data) {
    FrameworkCommandCollection::iterator i;
    int argc = 0;
    char *argv[FrameworkListener::CMD_ARGS_MAX];
    char tmp[CMD_BUF_SIZE];
    char *p = data;
    char *q = tmp;
    char *qlimit = tmp + sizeof(tmp) - 1;
    bool esc = false;
    bool quote = false;
    bool haveCmdNum = !mWithSeq;

    memset(argv, 0, sizeof(argv));
    memset(tmp, 0, sizeof(tmp));
    while(*p) {
...
    for (i = mCommands->begin(); i != mCommands->end(); ++i) {
        FrameworkCommand *c = *i;

        if (!strcmp(argv[0], c->getCommand())) {
            if (c->runCommand(cli, argc, argv)) {
                SLOGW("Handler '%s' error (%s)", c->getCommand(), strerror(errno));
            }
            goto out;
        }
    }
    cli->sendMsg(500, "Command not recognized", false);
out:
    int j;
    for (j = 0; j < argc; j++)
        free(argv[j]);
    return;

overflow:
    cli->sendMsg(500, "Command too long", false);
    goto out;
}

还记得FrameworkListener中的mCommands集合实际上是通过上面registerCmd设置进来的。

在这个过程中,会遍历所有设置进来的命令对象。校验每一个FrameworkCommand设置好的字符串,当命令第一个字符串和FrameworkCommand匹配上了才会执行对应FrameworkCommandrunCommand

netd进程监听到App进程发送的DNS查询命令

注意,此时App进程调用了如下命令:

getaddrinfo %s %s %d %d %d %d %u

换句话说,此时netd进程需要匹配getaddrinfo命令

DnsProxyListener::GetAddrInfoCmd::GetAddrInfoCmd(DnsProxyListener* dnsProxyListener) :
    NetdCommand("getaddrinfo"),
    mDnsProxyListener(dnsProxyListener) {
}

此时会匹配上GetAddrInfoCmd中的getaddrinfo的字符串。此时就会调用GetAddrInfoCmdrunCommand.

GetAddrInfoCmd runCommand

// Limits the number of outstanding DNS queries by client UID.
constexpr int MAX_QUERIES_PER_UID = 256;
android::netdutils::OperationLimiter<uid_t> queryLimiter(MAX_QUERIES_PER_UID);


int DnsProxyListener::GetAddrInfoCmd::runCommand(SocketClient *cli,
                                            int argc, char **argv) {
...

    char* name = argv[1];
...
    char* service = argv[2];
...

    struct addrinfo* hints = NULL;
    int ai_flags = atoi(argv[3]);
    int ai_family = atoi(argv[4]);
    int ai_socktype = atoi(argv[5]);
    int ai_protocol = atoi(argv[6]);
    unsigned netId = strtoul(argv[7], NULL, 10);
    const bool useLocalNameservers = checkAndClearUseLocalNameserversFlag(&netId);
    const uid_t uid = cli->getUid();

    android_net_context netcontext;
    mDnsProxyListener->mNetCtrl->getNetworkContext(netId, uid, &netcontext);
    if (useLocalNameservers) {
        netcontext.flags |= NET_CONTEXT_FLAG_USE_LOCAL_NAMESERVERS;
    }

    if (ai_flags != -1 || ai_family != -1 ||
        ai_socktype != -1 || ai_protocol != -1) {
        hints = (struct addrinfo*) calloc(1, sizeof(struct addrinfo));
        hints->ai_flags = ai_flags;
        hints->ai_family = ai_family;
        hints->ai_socktype = ai_socktype;
        hints->ai_protocol = ai_protocol;
    }


    const int metricsLevel = mDnsProxyListener->mEventReporter->getMetricsReportingLevel();

    DnsProxyListener::GetAddrInfoHandler* handler =
            new DnsProxyListener::GetAddrInfoHandler(cli, name, service, hints, netcontext,
                    metricsLevel, mDnsProxyListener->mEventReporter->getNetdEventListener());
    tryThreadOrError(cli, handler);
    return 0;
}
template<typename T>
void tryThreadOrError(SocketClient* cli, T* handler) {
    cli->incRef();

    const int rval = threadLaunch(handler);
    if (rval == 0) {
        // SocketClient decRef() happens in the handler's run() method.
        return;
    }

...
}

注意threadLaunch这个方式实际上就是创建了一个线程,并执行了handler的run方法。

GetAddrInfoHandler run
void DnsProxyListener::GetAddrInfoHandler::run() {
...

    struct addrinfo* result = NULL;
    Stopwatch s;
    maybeFixupNetContext(&mNetContext);
    const uid_t uid = mClient->getUid();
    uint32_t rv = 0;
    if (queryLimiter.start(uid)) {
        rv = android_getaddrinfofornetcontext(mHost, mService, mHints, &mNetContext, &result);
        queryLimiter.finish(uid);
    } else {
...
    }
    const int latencyMs = lround(s.timeTaken());

    if (rv) {
        // getaddrinfo failed
        mClient->sendBinaryMsg(ResponseCode::DnsProxyOperationFailed, &rv, sizeof(rv));
    } else {
        bool success = !mClient->sendCode(ResponseCode::DnsProxyQueryResult);
        struct addrinfo* ai = result;
        while (ai && success) {
            success = sendBE32(mClient, 1) && sendaddrinfo(mClient, ai);
            ai = ai->ai_next;
        }
        success = success && sendBE32(mClient, 0);
       ....
    }
    std::vector<String16> ip_addrs;
    int total_ip_addr_count = 0;
...
    mClient->decRef();
...
}

到这里又返回了android_getaddrinfofornetcontext。

explore_fqdn 查询DNS服务

文件:/bionic/libc/dns/net/getaddrinfo.c

值得注意的是,这里又调用了之前App进程调用的bionic库中的android_getaddrinfofornetcontext方法。由于netd进程在初始化时候设置了ANDROID_DNS_MODElocal,此时就不会再一次走到android_getaddrinfo_proxy中,而是直接走到下半部分的 explore_fqdn方法。

/*
 * FQDN hostname, DNS lookup
 */
static int
explore_fqdn(const struct addrinfo *pai, const char *hostname,
    const char *servname, struct addrinfo **res,
    const struct android_net_context *netcontext)
{
    struct addrinfo *result;
    struct addrinfo *cur;
    int error = 0;
    static const ns_dtab dtab[] = {
        NS_FILES_CB(_files_getaddrinfo, NULL)
        { NSSRC_DNS, _dns_getaddrinfo, NULL },  /* force -DHESIOD */
        NS_NIS_CB(_yp_getaddrinfo, NULL)
        { 0, 0, 0 }
    };

    assert(pai != NULL);
    /* hostname may be NULL */
    /* servname may be NULL */
    assert(res != NULL);

    result = NULL;

    /*
     * if the servname does not match socktype/protocol, ignore it.
     */
    if (get_portmatch(pai, servname) != 0)
        return 0;

    switch (nsdispatch(&result, dtab, NSDB_HOSTS, "getaddrinfo",
            default_dns_files, hostname, pai, netcontext)) {
    case NS_TRYAGAIN:
        error = EAI_AGAIN;
        goto free;
    case NS_UNAVAIL:
        error = EAI_FAIL;
        goto free;
    case NS_NOTFOUND:
        error = EAI_NODATA;
        goto free;
    case NS_SUCCESS:
        error = 0;
        for (cur = result; cur; cur = cur->ai_next) {
            GET_PORT(cur, servname);
            /* canonname should be filled already */
        }
        break;
    }

    *res = result;

    return 0;

free:
    if (result)
        freeaddrinfo(result);
    return error;
}

构建一个ns_dtab 数组,内含有三个结构体,这三个结构体实际上会被nsdispatch接受。这个过程会以传递到nsdispatch中最后三个为参数,并循环调用_files_getaddrinfo_dns_getaddrinfo,_yp_getaddrinfo方法,直到找到该域名对应的ip地址。

我们分别来考量一下这三个方法都做了什么?

_files_getaddrinfo 从文件缓存中查询ip地址
static int
_files_getaddrinfo(void *rv, void *cb_data, va_list ap)
{
    const char *name;
    const struct addrinfo *pai;
    struct addrinfo sentinel, *cur;
    struct addrinfo *p;
    FILE *hostf = NULL;

    name = va_arg(ap, char *);
    pai = va_arg(ap, struct addrinfo *);

//  fprintf(stderr, "_files_getaddrinfo() name = '%s'\n", name);
    memset(&sentinel, 0, sizeof(sentinel));
    cur = &sentinel;

    _sethtent(&hostf);
    while ((p = _gethtent(&hostf, name, pai)) != NULL) {
        cur->ai_next = p;
        while (cur && cur->ai_next)
            cur = cur->ai_next;
    }
    _endhtent(&hostf);

    *((struct addrinfo **)rv) = sentinel.ai_next;
    if (sentinel.ai_next == NULL)
        return NS_NOTFOUND;
    return NS_SUCCESS;
}


static void
_sethtent(FILE **hostf)
{

    if (!*hostf)
        *hostf = fopen(_PATH_HOSTS, "re");
    else
        rewind(*hostf);
}

注意_PATH_HOSTS是指:

#define _PATH_HOSTS "/system/etc/hosts"

这个过程实际上就是打开/system/etc/hosts 系统环境文件,然后读取去该文件中的内容,转化addrinfo结构体。并和当前的域名进行匹配。

如果熟悉Linux的读者肯定知道/system/etc/hosts这个文件实际上就是缓存了每一次从网络中通过DNS服务器查询到的结果。

_dns_getaddrinfo

如果缓存查不到,那么调用 _dns_getaddrinfo查询DNS服务器,看看域名对应的ip地址是什么?

static int
_dns_getaddrinfo(void *rv, void *cb_data, va_list ap)
{
    struct addrinfo *ai;
    querybuf *buf, *buf2;
    const char *name;
    const struct addrinfo *pai;
    struct addrinfo sentinel, *cur;
    struct res_target q, q2;
    res_state res;
    const struct android_net_context *netcontext;

    name = va_arg(ap, char *);
    pai = va_arg(ap, const struct addrinfo *);
    netcontext = va_arg(ap, const struct android_net_context *);
    //fprintf(stderr, "_dns_getaddrinfo() name = '%s'\n", name);

    memset(&q, 0, sizeof(q));
    memset(&q2, 0, sizeof(q2));
    memset(&sentinel, 0, sizeof(sentinel));
    cur = &sentinel;

    buf = malloc(sizeof(*buf));
    if (buf == NULL) {
        h_errno = NETDB_INTERNAL;
        return NS_NOTFOUND;
    }
    buf2 = malloc(sizeof(*buf2));
    if (buf2 == NULL) {
        free(buf);
        h_errno = NETDB_INTERNAL;
        return NS_NOTFOUND;
    }

    switch (pai->ai_family) {
    case AF_UNSPEC:
        /* prefer IPv6 */
        q.name = name;
        q.qclass = C_IN;
        q.answer = buf->buf;
        q.anslen = sizeof(buf->buf);
        int query_ipv6 = 1, query_ipv4 = 1;
        if (pai->ai_flags & AI_ADDRCONFIG) {
            query_ipv6 = _have_ipv6(netcontext->app_mark, netcontext->uid);
            query_ipv4 = _have_ipv4(netcontext->app_mark, netcontext->uid);
        }
        if (query_ipv6) {
            q.qtype = T_AAAA;
            if (query_ipv4) {
                q.next = &q2;
                q2.name = name;
                q2.qclass = C_IN;
                q2.qtype = T_A;
                q2.answer = buf2->buf;
                q2.anslen = sizeof(buf2->buf);
            }
        } else if (query_ipv4) {
            q.qtype = T_A;
        } else {
            free(buf);
            free(buf2);
            return NS_NOTFOUND;
        }
        break;
    case AF_INET:
...
        break;
    case AF_INET6:
...
        break;
    default:
        free(buf);
        free(buf2);
        return NS_UNAVAIL;
    }

    res = __res_get_state();
    if (res == NULL) {
        free(buf);
        free(buf2);
        return NS_NOTFOUND;
    }


    res_setnetcontext(res, netcontext);
    if (res_searchN(name, &q, res) < 0) {
        __res_put_state(res);
        free(buf);
        free(buf2);
        return NS_NOTFOUND;
    }
    ai = getanswer(buf, q.n, q.name, q.qtype, pai);
    if (ai) {
        cur->ai_next = ai;
        while (cur && cur->ai_next)
            cur = cur->ai_next;
    }
    if (q.next) {
        ai = getanswer(buf2, q2.n, q2.name, q2.qtype, pai);
        if (ai)
            cur->ai_next = ai;
    }
    free(buf);
    free(buf2);
    if (sentinel.ai_next == NULL) {
        __res_put_state(res);
        switch (h_errno) {
        case HOST_NOT_FOUND:
            return NS_NOTFOUND;
        case TRY_AGAIN:
            return NS_TRYAGAIN;
        default:
            return NS_UNAVAIL;
        }
    }

    _rfc6724_sort(&sentinel, netcontext->app_mark, netcontext->uid);

    __res_put_state(res);

    *((struct addrinfo **)rv) = sentinel.ai_next;
    return NS_SUCCESS;
}

注意在Java层的StructInfo的ai_family中设置了AF_UNSPEC标志位,ai_flagsAI_ADDRCONFIG

那么就会从_have_ipv6以及_have_ipv4查询。

static int
_have_ipv6(unsigned mark, uid_t uid) {
    static const struct sockaddr_in6 sin6_test = {
        .sin6_family = AF_INET6,
        .sin6_addr.s6_addr = {  // 2000::
            0x20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
        };
    sockaddr_union addr = { .in6 = sin6_test };
    return _find_src_addr(&addr.generic, NULL, mark, uid) == 1;
}


static int
_have_ipv4(unsigned mark, uid_t uid) {
    static const struct sockaddr_in sin_test = {
        .sin_family = AF_INET,
        .sin_addr.s_addr = __constant_htonl(0x08080808L)  // 8.8.8.8
    };
    sockaddr_union addr = { .in = sin_test };
    return _find_src_addr(&addr.generic, NULL, mark, uid) == 1;
}

这两个参数主要是用来确定是ipv6还是ipv4的。AI_ADDRCONFIG标志位不是用来检测链接是否可用,而是应该检测在本地中是否进行了配置。然而bionic不支持getifaddrs.因此需要先链接一下网络,确定DNS服务器是否正常。

8.8.8.8是google面向全球的查询的DNS服务器,2000::是ipv6全球单播地址。

核心来看看res_searchN都做了什么?

res_searchN
static int
res_searchN(const char *name, struct res_target *target, res_state res)
{
    const char *cp, * const *domain;
    HEADER *hp;
    u_int dots;
    int trailing_dot, ret, saved_herrno;
    int got_nodata = 0, got_servfail = 0, tried_as_is = 0;


    hp = (HEADER *)(void *)target->answer;  /*XXX*/

    for (cp = name; *cp; cp++)
        dots += (*cp == '.');
    trailing_dot = 0;
    if (cp > name && *--cp == '.')
        trailing_dot++;

...


    saved_herrno = -1;
    if (dots >= res->ndots) {
        ret = res_querydomainN(name, NULL, target, res);
        if (ret > 0)
            return (ret);
        saved_herrno = h_errno;
        tried_as_is++;
    }

    if ((!dots && (res->options & RES_DEFNAMES)) ||
        (dots && !trailing_dot && (res->options & RES_DNSRCH))) {
        int done = 0;

        _resolv_populate_res_for_net(res);

        for (domain = (const char * const *)res->dnsrch;
           *domain && !done;
           domain++) {

            ret = res_querydomainN(name, *domain, target, res);
            if (ret > 0)
                return ret;

            if (errno == ECONNREFUSED) {
                h_errno = TRY_AGAIN;
                return -1;
            }

            switch (h_errno) {
            case NO_DATA:
                got_nodata++;
                /* FALLTHROUGH */
            case HOST_NOT_FOUND:
                /* keep trying */
                break;
            case TRY_AGAIN:
                if (hp->rcode == SERVFAIL) {
                    /* try next search element, if any */
                    got_servfail++;
                    break;
                }
                /* FALLTHROUGH */
            default:
                /* anything else implies that we're done */
                done++;
            }

            if (!(res->options & RES_DNSRCH))
                    done++;
        }
    }

    if (!tried_as_is) {
        ret = res_querydomainN(name, NULL, target, res);
        if (ret > 0)
            return ret;
    }

...
}

能看到此时会把整个域名如www.baidu.com 通过.的符号计算有多少个。如果阅读过我之前写的序言,就知道其实DNS就是根据.划分的层级开始逐层遍历查找不同层级的DNS服务器。

这里前后进行了两次res_querydomainN查询。 第一次是判断当前域名是否有超过阈值,超出了阈值后会进行一次查询。剩下的部分会在下面的循环中根据res->dnsrch的分割好的域名层层一次进行查询。

res_querydomainN
static int
res_querydomainN(const char *name, const char *domain,
    struct res_target *target, res_state res)
{
    char nbuf[MAXDNAME];
    const char *longname = nbuf;
    size_t n, d;
...

    if (domain == NULL) {
        /*
         * Check for trailing '.';
         * copy without '.' if present.
         */
        n = strlen(name);
        if (n + 1 > sizeof(nbuf)) {
            h_errno = NO_RECOVERY;
            return -1;
        }
        if (n > 0 && name[--n] == '.') {
            strncpy(nbuf, name, n);
            nbuf[n] = '\0';
        } else
            longname = name;
    } else {
        n = strlen(name);
        d = strlen(domain);
        if (n + 1 + d + 1 > sizeof(nbuf)) {
            h_errno = NO_RECOVERY;
            return -1;
        }
        snprintf(nbuf, sizeof(nbuf), "%s.%s", name, domain);
    }
    return res_queryN(longname, target, res);
}

能看到这个过程会根据域名如www.baidu.com从尾部开始把每一个层级划分出来设置到longname中。

res_queryN

static int
res_queryN(const char *name, /* domain name */ struct res_target *target,
    res_state res)
{
    u_char buf[MAXPACKET];
    HEADER *hp;
    int n;
    struct res_target *t;
    int rcode;
    int ancount;

    assert(name != NULL);
    /* XXX: target may be NULL??? */

    rcode = NOERROR;
    ancount = 0;

    for (t = target; t; t = t->next) {
        int class, type;
        u_char *answer;
        int anslen;
        u_int oflags;

        hp = (HEADER *)(void *)t->answer;
        oflags = res->_flags;

again:
        hp->rcode = NOERROR;    /* default */

        /* make it easier... */
        class = t->qclass;
        type = t->qtype;
        answer = t->answer;
        anslen = t->anslen;


        n = res_nmkquery(res, QUERY, name, class, type, NULL, 0, NULL,
            buf, sizeof(buf));
#ifdef RES_USE_EDNS0
...
#endif
        if (n <= 0) {

            h_errno = NO_RECOVERY;
            return n;
        }
        n = res_nsend(res, buf, n, answer, anslen);
...

        if (n < 0 || hp->rcode != NOERROR || ntohs(hp->ancount) == 0) {
            rcode = hp->rcode;  /* record most recent error */
#ifdef RES_USE_EDNS0
...
#endif
...
            continue;
        }

        ancount += ntohs(hp->ancount);

        t->n = n;
    }

    if (ancount == 0) {
        switch (rcode) {
        case NXDOMAIN:
            h_errno = HOST_NOT_FOUND;
            break;
        case SERVFAIL:
            h_errno = TRY_AGAIN;
            break;
        case NOERROR:
            h_errno = NO_DATA;
            break;
        case FORMERR:
        case NOTIMP:
        case REFUSED:
        default:
            h_errno = NO_RECOVERY;
            break;
        }
        return -1;
    }
    return ancount;
}

关于DNS的扩展字段这里就不多聊,我们直接看又关键如下几个关键步骤:

res_nsend

文件:/bionic/libc/dns/resolv/res_send.c

int
res_nsend(res_state statp,
      const u_char *buf, int buflen, u_char *ans, int anssiz)
{
    int gotsomewhere, terrno, try, v_circuit, resplen, ns, n;
    char abuf[NI_MAXHOST];
    ResolvCacheStatus     cache_status = RESOLV_CACHE_UNSUPPORTED;
...
    v_circuit = (statp->options & RES_USEVC) || buflen > PACKETSZ;
    gotsomewhere = 0;
    terrno = ETIMEDOUT;

    int  anslen = 0;
    cache_status = _resolv_cache_lookup(
            statp->netid, buf, buflen,
            ans, anssiz, &anslen);

    if (cache_status == RESOLV_CACHE_FOUND) {
        return anslen;
    } else if (cache_status != RESOLV_CACHE_UNSUPPORTED) {
        // had a cache miss for a known network, so populate the thread private
        // data so the normal resolve path can do its thing
        _resolv_populate_res_for_net(statp);
    }
    if (statp->nscount == 0) {
        // We have no nameservers configured, so there's no point trying.
        // Tell the cache the query failed, or any retries and anyone else asking the same
        // question will block for PENDING_REQUEST_TIMEOUT seconds instead of failing fast.
        _resolv_cache_query_failed(statp->netid, buf, buflen);
        errno = ESRCH;
        return (-1);
    }

    /*
     * If the ns_addr_list in the resolver context has changed, then
     * invalidate our cached copy and the associated timing data.
     */
    if (EXT(statp).nscount != 0) {
        int needclose = 0;
        struct sockaddr_storage peer;
        socklen_t peerlen;

        if (EXT(statp).nscount != statp->nscount) {
            needclose++;
        } else {
            for (ns = 0; ns < statp->nscount; ns++) {
                if (statp->nsaddr_list[ns].sin_family &&
                    !sock_eq((struct sockaddr *)(void *)&statp->nsaddr_list[ns],
                         (struct sockaddr *)(void *)&EXT(statp).ext->nsaddrs[ns])) {
                    needclose++;
                    break;
                }

                if (EXT(statp).nssocks[ns] == -1)
                    continue;
                peerlen = sizeof(peer);
                if (getpeername(EXT(statp).nssocks[ns],
                    (struct sockaddr *)(void *)&peer, &peerlen) < 0) {
                    needclose++;
                    break;
                }
                if (!sock_eq((struct sockaddr *)(void *)&peer,
                    get_nsaddr(statp, (size_t)ns))) {
                    needclose++;
                    break;
                }
            }
        }
        if (needclose) {
            res_nclose(statp);
            EXT(statp).nscount = 0;
        }
    }

...


    /*
     * Send request, RETRY times, or until successful.
     */
    for (try = 0; try < statp->retry; try++) {
        struct __res_stats stats[MAXNS];
        struct __res_params params;
        int revision_id = _resolv_cache_get_resolver_stats(statp->netid, &params, stats);
        bool usable_servers[MAXNS];
        android_net_res_stats_get_usable_servers(&params, stats, statp->nscount,
            usable_servers);

        for (ns = 0; ns < statp->nscount; ns++) {
        if (!usable_servers[ns]) continue;
        struct sockaddr *nsap;
        int nsaplen;
        time_t now = 0;
        int rcode = RCODE_INTERNAL_ERROR;
        int delay = 0;
        nsap = get_nsaddr(statp, (size_t)ns);
        nsaplen = get_salen(nsap);
        statp->_flags &= ~RES_F_LASTMASK;
        statp->_flags |= (ns << RES_F_LASTSHIFT);

....


        if (v_circuit) {
...
        } else {
            /* Use datagrams. */
...
            n = send_dg(statp, buf, buflen, ans, anssiz, &terrno,
                    ns, &v_circuit, &gotsomewhere, &now, &rcode, &delay);

            /* Only record stats the first time we try a query. See above. */
            if (try == 0) {
                struct __res_sample sample;
                _res_stats_set_sample(&sample, now, rcode, delay);
                _resolv_cache_add_resolver_stats_sample(statp->netid, revision_id,
                    ns, &sample, params.max_samples);
            }
...
            if (n < 0)
                goto fail;
            if (n == 0)
                goto next_ns;
...
            if (v_circuit)
                goto same_ns;
            resplen = n;
        }


        if (cache_status == RESOLV_CACHE_NOTFOUND) {
            _resolv_cache_add(statp->netid, buf, buflen,
                      ans, resplen);
        }
        /*
         * If we have temporarily opened a virtual circuit,
         * or if we haven't been asked to keep a socket open,
         * close the socket.
         */
        if ((v_circuit && (statp->options & RES_USEVC) == 0U) ||
            (statp->options & RES_STAYOPEN) == 0U) {
            res_nclose(statp);
        }
...
        return (resplen);
 next_ns: ;
       } /*foreach ns*/
    } /*foreach retry*/
    res_nclose(statp);
    if (!v_circuit) {
        if (!gotsomewhere)
            errno = ECONNREFUSED;   /* no nameservers found */
        else
            errno = ETIMEDOUT;  /* no answer obtained */
    } else
        errno = terrno;

    _resolv_cache_query_failed(statp->netid, buf, buflen);

    return (-1);
 fail:

...
    return (-1);
}

这里我们就以经典的UDP请求为例子。

send_dg 发送之前设置好的HEADER数据,如果send_dg返回的结果是0,说明还有域名没有解析到,需要进一步的迭代查询。

send_dg
static int
send_dg(res_state statp,
    const u_char *buf, int buflen, u_char *ans, int anssiz,
    int *terrno, int ns, int *v_circuit, int *gotsomewhere,
    time_t *at, int *rcode, int* delay)
{
    *at = time(NULL);
    *rcode = RCODE_INTERNAL_ERROR;
    *delay = 0;
    const HEADER *hp = (const HEADER *)(const void *)buf;
    HEADER *anhp = (HEADER *)(void *)ans;
    const struct sockaddr *nsap;
    int nsaplen;
    struct timespec now, timeout, finish, done;
    fd_set dsmask;
    struct sockaddr_storage from;
    socklen_t fromlen;
    int resplen, seconds, n, s;

    nsap = get_nsaddr(statp, (size_t)ns);
    nsaplen = get_salen(nsap);
    if (EXT(statp).nssocks[ns] == -1) {
        EXT(statp).nssocks[ns] = socket(nsap->sa_family, SOCK_DGRAM | SOCK_CLOEXEC, 0);
        if (EXT(statp).nssocks[ns] > highestFD) {
            res_nclose(statp);
            errno = ENOTSOCK;
        }
        if (EXT(statp).nssocks[ns] < 0) {
            switch (errno) {
            case EPROTONOSUPPORT:
#ifdef EPFNOSUPPORT
...
#endif
            case EAFNOSUPPORT:
                Perror(statp, stderr, "socket(dg)", errno);
                return (0);
            default:
                *terrno = errno;
                Perror(statp, stderr, "socket(dg)", errno);
                return (-1);
            }
        }

        fchown(EXT(statp).nssocks[ns], AID_DNS, -1);
        if (statp->_mark != MARK_UNSET) {
            if (setsockopt(EXT(statp).nssocks[ns], SOL_SOCKET,
                    SO_MARK, &(statp->_mark), sizeof(statp->_mark)) < 0) {
                res_nclose(statp);
                return -1;
            }
        }

    if (sendto(s, (const char*)buf, buflen, 0, nsap, nsaplen) != buflen)
    {
        Aerror(statp, stderr, "sendto", errno, nsap, nsaplen);
        res_nclose(statp);
        return (0);
    }
#endif /* !CANNOT_CONNECT_DGRAM */


    seconds = get_timeout(statp, ns);
    now = evNowTime();
    timeout = evConsTime((long)seconds, 0L);
    finish = evAddTime(now, timeout);
retry:
    n = retrying_select(s, &dsmask, NULL, &finish);

    if (n == 0) {
        *rcode = RCODE_TIMEOUT;
        *gotsomewhere = 1;
        return (0);
    }
    if (n < 0) {
        Perror(statp, stderr, "select", errno);
        res_nclose(statp);
        return (0);
    }
    errno = 0;
    fromlen = sizeof(from);
    resplen = recvfrom(s, (char*)ans, (size_t)anssiz,0,
               (struct sockaddr *)(void *)&from, &fromlen);
...
    return (resplen);
}

DNS 服务器的设置

看了半天好像没说到是怎么设置DNS查询的服务器的。可以看到send_dg方法中通过nsap = get_nsaddr(statp, (size_t)ns);方法获取需要请求的DNS地址。

其实这个数据是在netd第一次获取res_state的时候初始化好的,方法是来自__res_get_state.这个方法会一个线程唯一调用一次res_ninit方法读取系统中设置好的配置。

文件: /bionic/libc/dns/resolv/res_init.c

下面是核心的阶段

    if ((fp = fopen(_PATH_RESCONF, "re")) != NULL) {
        /* read the config file */
        while (fgets(buf, sizeof(buf), fp) != NULL) {
        if (*buf == ';' || *buf == '#')
            continue;
        if (MATCH(buf, "domain")) {
            if (haveenv)    /* skip if have from environ */
                continue;
            cp = buf + sizeof("domain") - 1;
            while (*cp == ' ' || *cp == '\t')
                cp++;
            if ((*cp == '\0') || (*cp == '\n'))
                continue;
            strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1);
            statp->defdname[sizeof(statp->defdname) - 1] = '\0';
            if ((cp = strpbrk(statp->defdname, " \t\n")) != NULL)
                *cp = '\0';
            havesearch = 0;
            continue;
        }
        if (MATCH(buf, "search")) {
            if (haveenv)    /* skip if have from environ */
                continue;
            cp = buf + sizeof("search") - 1;
            while (*cp == ' ' || *cp == '\t')
                cp++;
            if ((*cp == '\0') || (*cp == '\n'))
                continue;
            strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1);
            statp->defdname[sizeof(statp->defdname) - 1] = '\0';
            if ((cp = strchr(statp->defdname, '\n')) != NULL)
                *cp = '\0';
            cp = statp->defdname;
            pp = statp->dnsrch;
            *pp++ = cp;
            for (n = 0; *cp && pp < statp->dnsrch + MAXDNSRCH; cp++) {
                if (*cp == ' ' || *cp == '\t') {
                    *cp = 0;
                    n = 1;
                } else if (n) {
                    *pp++ = cp;
                    n = 0;
                }
            }

            while (*cp != '\0' && *cp != ' ' && *cp != '\t')
                cp++;
            *cp = '\0';
            *pp++ = 0;
            havesearch = 1;
            continue;
        }

        if (MATCH(buf, "nameserver") && nserv < MAXNS) {
            struct addrinfo hints, *ai;
            char sbuf[NI_MAXSERV];
            const size_t minsiz =
                sizeof(statp->_u._ext.ext->nsaddrs[0]);

            cp = buf + sizeof("nameserver") - 1;
            while (*cp == ' ' || *cp == '\t')
            cp++;
            cp[strcspn(cp, ";# \t\n")] = '\0';
            if ((*cp != '\0') && (*cp != '\n')) {
            memset(&hints, 0, sizeof(hints));
            hints.ai_family = PF_UNSPEC;
            hints.ai_socktype = SOCK_DGRAM; /*dummy*/
            hints.ai_flags = AI_NUMERICHOST;
            sprintf(sbuf, "%u", NAMESERVER_PORT);
            if (getaddrinfo(cp, sbuf, &hints, &ai) == 0 &&
                ai->ai_addrlen <= minsiz) {
                if (statp->_u._ext.ext != NULL) {
                memcpy(&statp->_u._ext.ext->nsaddrs[nserv],
                    ai->ai_addr, ai->ai_addrlen);
                }
                if (ai->ai_addrlen <=
                    sizeof(statp->nsaddr_list[nserv])) {
                memcpy(&statp->nsaddr_list[nserv],
                    ai->ai_addr, ai->ai_addrlen);
                } else
                statp->nsaddr_list[nserv].sin_family = 0;
                freeaddrinfo(ai);
                nserv++;
            }
            }
            continue;
        }
        if (MATCH(buf, "sortlist")) {
            struct in_addr a;

            cp = buf + sizeof("sortlist") - 1;
            while (nsort < MAXRESOLVSORT) {
            while (*cp == ' ' || *cp == '\t')
                cp++;
            if (*cp == '\0' || *cp == '\n' || *cp == ';')
                break;
            net = cp;
            while (*cp && !ISSORTMASK(*cp) && *cp != ';' &&
                   isascii(*cp) && !isspace((unsigned char)*cp))
                cp++;
            n = *cp;
            *cp = 0;
            if (inet_aton(net, &a)) {
                statp->sort_list[nsort].addr = a;
                if (ISSORTMASK(n)) {
                *cp++ = n;
                net = cp;
                while (*cp && *cp != ';' &&
                    isascii(*cp) &&
                    !isspace((unsigned char)*cp))
                    cp++;
                n = *cp;
                *cp = 0;
                if (inet_aton(net, &a)) {
                    statp->sort_list[nsort].mask = a.s_addr;
                } else {
                    statp->sort_list[nsort].mask =
                    net_mask(statp->sort_list[nsort].addr);
                }
                } else {
                statp->sort_list[nsort].mask =
                    net_mask(statp->sort_list[nsort].addr);
                }
                nsort++;
            }
            *cp = n;
            }
            continue;
        }
        if (MATCH(buf, "options")) {
            res_setoptions(statp, buf + sizeof("options") - 1, "conf");
            continue;
        }
        }
        if (nserv > 0)
        statp->nscount = nserv;
        statp->nsort = nsort;
        (void) fclose(fp);
    }

在Android中对应的文件名为resolv.conf

_PATH_RESCONF        "/etc/ppp/resolv.conf"

下面是一个例子:

domain  51osos.com

search  [www.51osos.com](http://www.51osos.com/)  51osos.com

nameserver 202.102.192.68

nameserver 202.102.192.69

这段代码就是对这个字符串进行解析:

对于我们来说,值得注意的是nameserver对应的数据,会注入到statp->nsaddr_list列表中,在后续发送时候就会根据这些DNS服务器进行解析。其中必定存在8.8.8.8google面向全球提供的DNS服务器。

这一段的知识来源是来自我对Linux系统的理解和结合Android源码的流程进行理解,然而我没在Android虚拟机中没找到对应的文件,如果知道的朋友请告诉一下我学习一下。

UDP 传输的DNS查询数据格式

既然知道这是怎么传送的,来看看传送的数据是什么。

一说起DNS查询报文,我们上一张大家十分熟悉的图

DNS查询报文.gif

DNS报文可以分为两种类型:

但是整个数据协议格式都是一致的。

DNS报文可以分为如下三个区域的:

DNS请求查询阶段中,会设置基础和问题部分。而资源记录部分则通过0进行占位。

基础部分

下面是一个查询的例子(数据来源于 http://c.biancheng.net/view/6457.html):

Domain Name System (query)
    Transaction ID: 0x9ad0                              #事务ID
    Flags: 0x0000 Standard query                        #报文中的标志字段
        0... .... .... .... = Response: Message is a query
                                                        #QR字段, 值为0, 因为是一个请求包
        .000 0... .... .... = Opcode: Standard query (0)
                                                        #Opcode字段, 值为0, 因为是标准查询
        .... ..0. .... .... = Truncated: Message is not truncated
                                                        #TC字段
        .... ...0 .... .... = Recursion desired: Don't do query recursively 
                                                        #RD字段
        .... .... .0.. .... = Z: reserved (0)           #保留字段, 值为0
        .... .... ...0 .... = Non-authenticated data: Unacceptable   
                                                        #保留字段, 值为0
    Questions: 1                                        #问题计数, 这里有1个问题
    Answer RRs: 0                                       #回答资源记录数
    Authority RRs: 0                                    #权威名称服务器计数
    Additional RRs: 0                                   #附加资源记录数

在这个部分里面为基础部分,基础部分又称为报文首部,在bionic中用Header结构体进行表示。

DNS基础部分.gif

当DNS相应查询后,就会在资源查询部分添加查询的结果。

下面是一个查询响应的例子:

Domain Name System (response)
    Transaction ID: 0x9ad0                                    #事务ID
    Flags: 0x8180 Standard query response, No error           #报文中的标志字段
        1... .... .... .... = Response: Message is a response
                                                              #QR字段, 值为1, 因为是一个响应包
        .000 0... .... .... = Opcode: Standard query (0)      # Opcode字段
        .... .0.. .... .... = Authoritative: Server is not an authority for
        domain                                                #AA字段
        .... ..0. .... .... = Truncated: Message is not truncated
                                                              #TC字段
        .... ...1 .... .... = Recursion desired: Do query recursively 
                                                              #RD字段
        .... .... 1... .... = Recursion available: Server can do recursive
        queries                                               #RA字段
        .... .... .0.. .... = Z: reserved (0)
        .... .... ..0. .... = Answer authenticated: Answer/authority portion
        was not authenticated by the server
        .... .... ...0 .... = Non-authenticated data: Unacceptable
        .... .... .... 0000 = Reply code: No error (0)        #返回码字段
    Questions: 1
    Answer RRs: 2
    Authority RRs: 5
    Additional RRs: 5

整个首部是通过如下结构体进行表示:


typedef struct {
    unsigned    id :16;     /* query identification number */
            /* fields in third byte */
    unsigned    rd :1;      /* recursion desired */
    unsigned    tc :1;      /* truncated message */
    unsigned    aa :1;      /* authoritive answer */
    unsigned    opcode :4;  /* purpose of message */
    unsigned    qr :1;      /* response flag */
            /* fields in fourth byte */
    unsigned    rcode :4;   /* response code */
    unsigned    cd: 1;      /* checking disabled by resolver */
    unsigned    ad: 1;      /* authentic data from named */
    unsigned    unused :1;  /* unused bits (MBZ as of 4.9.3a3) */
    unsigned    ra :1;      /* recursion available */
            /* remaining bytes */
    unsigned    qdcount :16;    /* number of question entries */
    unsigned    ancount :16;    /* number of answer entries */
    unsigned    nscount :16;    /* number of authority entries */
    unsigned    arcount :16;    /* number of resource entries */
} HEADER;

问题部分

DNS问题查询部分.gif

整个问题部分十分简单,分为如下几个部分:

举一个例子:

Domain Name System (query)                        #查询请求
    Queries                                       #问题部分
        baidu.com: type A, class IN
            Name: baidu.com                       #查询名字段, 这里请求域名baidu.com
            [Name Length: 9]
            [Label Count: 2]
            Type: A (Host Address) (1)            #查询类型字段, 这里为A类型
            Class: IN (0x0001)                    #查询类字段, 这里为互联网地址

那么基础部分和问题部分表现在源码中是什么形式呢?其实这部分工作是交给res_mkquery完成的。

res_mkquery 构建DNS查询的基础部分和问题部分

文件:/bionic/libc/dns/resolv/res_mkquery.c

        n = res_nmkquery(res, QUERY, name, class, type, NULL, 0, NULL,
            buf, sizeof(buf));
int
res_nmkquery(res_state statp,
         int op,            /* opcode of query */
         const char *dname,     /* domain name */
         int class, int type,   /* class and type of query */
         const u_char *data,    /* resource record data */
         int datalen,       /* length of data */
         const u_char *newrr_in,    /* new rr for modify or append */
         u_char *buf,       /* buffer to put query */
         int buflen)        /* size of buffer */
{
    register HEADER *hp;
    register u_char *cp, *ep;
    register int n;
    u_char *dnptrs[20], **dpp, **lastdnptr;

    UNUSED(newrr_in);

    if ((buf == NULL) || (buflen < HFIXEDSZ))
        return (-1);
    memset(buf, 0, HFIXEDSZ);
    hp = (HEADER *)(void *)buf;
    hp->id = htons(res_randomid());
    hp->opcode = op;
    hp->rd = (statp->options & RES_RECURSE) != 0U;
    hp->ad = (statp->options & RES_USE_DNSSEC) != 0U;
    hp->rcode = NOERROR;
    cp = buf + HFIXEDSZ;
    ep = buf + buflen;
    dpp = dnptrs;
    *dpp++ = buf;
    *dpp++ = NULL;
    lastdnptr = dnptrs + sizeof dnptrs / sizeof dnptrs[0];
    /*
     * perform opcode specific processing
     */
    switch (op) {
    case QUERY: /*FALLTHROUGH*/
    case NS_NOTIFY_OP:
        if (ep - cp < QFIXEDSZ)
            return (-1);
        if ((n = dn_comp(dname, cp, ep - cp - QFIXEDSZ, dnptrs,
            lastdnptr)) < 0)
            return (-1);
        cp += n;
        ns_put16(type, cp);
        cp += INT16SZ;
        ns_put16(class, cp);
        cp += INT16SZ;
        hp->qdcount = htons(1);
        if (op == QUERY || data == NULL)
            break;

        if ((ep - cp) < RRFIXEDSZ)
            return (-1);
        n = dn_comp((const char *)data, cp, ep - cp - RRFIXEDSZ,
                dnptrs, lastdnptr);
        if (n < 0)
            return (-1);
        cp += n;
        ns_put16(T_NULL, cp);
        cp += INT16SZ;
        ns_put16(class, cp);
        cp += INT16SZ;
        ns_put32(0, cp);
        cp += INT32SZ;
        ns_put16(0, cp);
        cp += INT16SZ;
        hp->arcount = htons(1);
        break;

    case IQUERY:
    ...
    default:
        return (-1);
    }
    return (cp - buf);
}

此时调用的标志位为QUERY,此时会依次往HEADER中设置了如下的数据:

其他默认为0.接着cp 是缓冲区的地址指针。因为这个Header最后会记录到缓冲区buf中,此时向后移动一个HEADER的大小,就是设置问题区域的位置了。

因此下面不断的向后移动16位,依次设置了dname(查询名,dn_com通过name_compress进行编码压缩),type(查询类型)以及class(查询类)。

获取DNS服务器返回的查询结果

先来看看整个格式:


DNS资源记录部分.gif

这里分为如下6个部分:

下面是一个例子:

Answers                                                      #“回答问题区域”字段
    baidu.com: type A, class IN, addr 220.181.57.216         #资源记录部分
        Name: baidu.com                                      #域名字段, 这里请求的域名为baidu.com
        Type: A (Host Address) (1)                           #类型字段, 这里为A类型
        Class: IN (0x0001)                                   #类字段
        Time to live: 5                                      #生存时间
        Data length: 4                                       #数据长度
        Address: 220.181.57.216                              #资源数据, 这里为IP地址
    baidu.com: type A, class IN, addr 123.125.115.110        #资源记录部分
        Name: baidu.com
        Type: A (Host Address) (1)
        Class: IN (0x0001)
        Time to live: 5
        Data length: 4
        Address: 123.125.115.110

表现在代码的形式,就是在核心方法在_dns_getaddrinfo

ai = getanswer(buf, q.n, q.name, q.qtype, pai);
typedef union {
    HEADER hdr;
    u_char buf[MAXPACKET];
} querybuf;

注意此时的用于回应的结果是一个联合体。

struct res_target {
    struct res_target *next;
    const char *name;   /* domain name */
    int qclass, qtype;  /* class and type of query */
    u_char *answer;     /* buffer to put answer */
    int anslen;     /* size of answer buffer */
    int n;          /* result length */
};

res_target 是一个链表项,记录了返回数据模块中的信息,用于辅助解析querybuf

static struct addrinfo *
getanswer(const querybuf *answer, int anslen, const char *qname, int qtype,
    const struct addrinfo *pai)
{
    struct addrinfo sentinel, *cur;
    struct addrinfo ai;
    const struct afd *afd;
    char *canonname;
    const HEADER *hp;
    const u_char *cp;
    int n;
    const u_char *eom;
    char *bp, *ep;
    int type, class, ancount, qdcount;
    int haveanswer, had_error;
    char tbuf[MAXDNAME];
    int (*name_ok) (const char *);
    char hostbuf[8*1024];


    memset(&sentinel, 0, sizeof(sentinel));
    cur = &sentinel;

    canonname = NULL;
    eom = answer->buf + anslen;
    switch (qtype) {
    case T_A:
    case T_AAAA:
    case T_ANY: /*use T_ANY only for T_A/T_AAAA lookup*/
        name_ok = res_hnok;
        break;
    default:
        return NULL;    /* XXX should be abort(); */
    }

    hp = &answer->hdr;
    ancount = ntohs(hp->ancount);
    qdcount = ntohs(hp->qdcount);
    bp = hostbuf;
    ep = hostbuf + sizeof hostbuf;
    cp = answer->buf;
    BOUNDED_INCR(HFIXEDSZ);
    if (qdcount != 1) {
        h_errno = NO_RECOVERY;
        return (NULL);
    }
    n = dn_expand(answer->buf, eom, cp, bp, ep - bp);
    if ((n < 0) || !(*name_ok)(bp)) {
        h_errno = NO_RECOVERY;
        return (NULL);
    }
    BOUNDED_INCR(n + QFIXEDSZ);
    if (qtype == T_A || qtype == T_AAAA || qtype == T_ANY) {

        n = strlen(bp) + 1;     /* for the \0 */
        if (n >= MAXHOSTNAMELEN) {
            h_errno = NO_RECOVERY;
            return (NULL);
        }
        canonname = bp;
        bp += n;
        /* The qname can be abbreviated, but h_name is now absolute. */
        qname = canonname;
    }
    haveanswer = 0;
    had_error = 0;
    while (ancount-- > 0 && cp < eom && !had_error) {
        n = dn_expand(answer->buf, eom, cp, bp, ep - bp);
        if ((n < 0) || !(*name_ok)(bp)) {
            had_error++;
            continue;
        }
        cp += n;            /* name */
        BOUNDS_CHECK(cp, 3 * INT16SZ + INT32SZ);
        type = _getshort(cp);
        cp += INT16SZ;          /* type */
        class = _getshort(cp);
        cp += INT16SZ + INT32SZ;    /* class, TTL */
        n = _getshort(cp);
        cp += INT16SZ;          /* len */
        BOUNDS_CHECK(cp, n);
        if (class != C_IN) {
            cp += n;
            continue;       
        }
        if ((qtype == T_A || qtype == T_AAAA || qtype == T_ANY) &&
            type == T_CNAME) {
            n = dn_expand(answer->buf, eom, cp, tbuf, sizeof tbuf);
            if ((n < 0) || !(*name_ok)(tbuf)) {
                had_error++;
                continue;
            }
            cp += n;
            /* Get canonical name. */
            n = strlen(tbuf) + 1;   /* for the \0 */
            if (n > ep - bp || n >= MAXHOSTNAMELEN) {
                had_error++;
                continue;
            }
            strlcpy(bp, tbuf, (size_t)(ep - bp));
            canonname = bp;
            bp += n;
            continue;
        }
        if (qtype == T_ANY) {
            if (!(type == T_A || type == T_AAAA)) {
                cp += n;
                continue;
            }
        } else if (type != qtype) {
            if (type != T_KEY && type != T_SIG)

            cp += n;
            continue;       /* XXX - had_error++ ? */
        }
        switch (type) {
        case T_A:
        case T_AAAA:
            if (strcasecmp(canonname, bp) != 0) {
                cp += n;
                continue;   /* XXX - had_error++ ? */
            }
            if (type == T_A && n != INADDRSZ) {
                cp += n;
                continue;
            }
            if (type == T_AAAA && n != IN6ADDRSZ) {
                cp += n;
                continue;
            }
            if (type == T_AAAA) {
                struct in6_addr in6;
                memcpy(&in6, cp, IN6ADDRSZ);
                if (IN6_IS_ADDR_V4MAPPED(&in6)) {
                    cp += n;
                    continue;
                }
            }
            if (!haveanswer) {
                int nn;

                canonname = bp;
                nn = strlen(bp) + 1;    /* for the \0 */
                bp += nn;
            }

            /* don't overwrite pai */
            ai = *pai;
            ai.ai_family = (type == T_A) ? AF_INET : AF_INET6;
            afd = find_afd(ai.ai_family);
            if (afd == NULL) {
                cp += n;
                continue;
            }
            cur->ai_next = get_ai(&ai, afd, (const char *)cp);
            if (cur->ai_next == NULL)
                had_error++;
            while (cur && cur->ai_next)
                cur = cur->ai_next;
            cp += n;
            break;
        default:
            abort();
        }
        if (!had_error)
            haveanswer++;
    }
    if (haveanswer) {
        if (!canonname)
            (void)get_canonname(pai, sentinel.ai_next, qname);
        else
            (void)get_canonname(pai, sentinel.ai_next, canonname);
        h_errno = NETDB_SUCCESS;
        return sentinel.ai_next;
    }

    h_errno = NO_RECOVERY;
    return NULL;
}

这里有三个重要的指针:

流程如下:

struct addrinfo {
    int ai_flags;   /* AI_PASSIVE, AI_CANONNAME, AI_NUMERICHOST */
    int ai_family;  /* PF_xxx */
    int ai_socktype;    /* SOCK_xxx */
    int ai_protocol;    /* 0 or IPPROTO_xxx for IPv4 and IPv6 */
    socklen_t ai_addrlen;   /* length of ai_addr */
    char    *ai_canonname;  /* canonical name for hostname */
    struct  sockaddr *ai_addr;  /* binary address */
    struct  addrinfo *ai_next;  /* next structure in linked list */
};
static struct addrinfo *
get_ai(const struct addrinfo *pai, const struct afd *afd, const char *addr)
{
    char *p;
    struct addrinfo *ai;

    ai = (struct addrinfo *)malloc(sizeof(struct addrinfo)
        + (afd->a_socklen));
    if (ai == NULL)
        return NULL;

    memcpy(ai, pai, sizeof(struct addrinfo));
    ai->ai_addr = (struct sockaddr *)(void *)(ai + 1);
    memset(ai->ai_addr, 0, (size_t)afd->a_socklen);

    ai->ai_addrlen = afd->a_socklen;

    ai->ai_addr->sa_family = ai->ai_family = afd->a_af;
    p = (char *)(void *)(ai->ai_addr);
    memcpy(p + afd->a_off, addr, (size_t)afd->a_addrlen);
    return ai;
}

说明后面的cp数据接就是addrinfo结构体的内容。当生成了addrinfo后就会返回到底层,并从netd通过socket传送到App进程中,最后返回到Java层的api中。

总结

DNS发送过程.png

整个流程用图来表示就比较简单了。

在netd进程首先会从线程内存中查询是否有符合的目标,不存在则从文件/system/etc/hosts中读取缓存好的域名对应的ip地址,如果找不到就从配置文件/etc/ppp/resolv.conf 中每一个DNS服务器找到对应的服务器进行迭代查询,最终返回addrinfo结构体的结果,转化成Java对象返回。

也是一个经典的3层缓存模式.

上一篇 下一篇

猜你喜欢

热点阅读