summaryrefslogtreecommitdiffstats
path: root/luni/src/main/native/canonicalize_path.cpp
blob: b2a2a01ccffebc04894cd8c8091a2100f13b3f58 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/*
 * Copyright (c) 2003 Constantin S. Svintsoff <kostik@iclub.nsu.ru>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The names of the authors may not be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "readlink.h"

#include <string>

#include <errno.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <unistd.h>

/**
 * This differs from realpath(3) mainly in its behavior when a path element does not exist or can
 * not be searched. realpath(3) treats that as an error and gives up, but we have Java-compatible
 * behavior where we just assume the path element was not a symbolic link. This leads to a textual
 * treatment of ".." from that point in the path, which may actually lead us back to a path we
 * can resolve (as in "/tmp/does-not-exist/../blah.txt" which would be an error for realpath(3)
 * but "/tmp/blah.txt" under the traditional Java interpretation).
 *
 * This implementation also removes all the fixed-length buffers of the C original.
 */
bool canonicalize_path(const char* path, std::string& resolved) {
    // 'path' must be an absolute path.
    if (path[0] != '/') {
        errno = EINVAL;
        return false;
    }

    resolved = "/";
    if (path[1] == '\0') {
        return true;
    }

    // Iterate over path components in 'left'.
    int symlinkCount = 0;
    std::string left(path + 1);
    while (!left.empty()) {
        // Extract the next path component.
        size_t nextSlash = left.find('/');
        std::string nextPathComponent = left.substr(0, nextSlash);
        if (nextSlash != std::string::npos) {
            left.erase(0, nextSlash + 1);
        } else {
            left.clear();
        }
        if (nextPathComponent.empty()) {
            continue;
        } else if (nextPathComponent == ".") {
            continue;
        } else if (nextPathComponent == "..") {
            // Strip the last path component except when we have single "/".
            if (resolved.size() > 1) {
                resolved.erase(resolved.rfind('/'));
            }
            continue;
        }

        // Append the next path component.
        if (resolved[resolved.size() - 1] != '/') {
            resolved += '/';
        }
        resolved += nextPathComponent;

        // See if we've got a symbolic link, and resolve it if so.
        struct stat sb;
        if (lstat(resolved.c_str(), &sb) == 0 && S_ISLNK(sb.st_mode)) {
            if (symlinkCount++ > MAXSYMLINKS) {
                errno = ELOOP;
                return false;
            }

            std::string symlink;
            if (!readlink(resolved.c_str(), symlink)) {
                return false;
            }
            if (symlink[0] == '/') {
                // The symbolic link is absolute, so we need to start from scratch.
                resolved = "/";
            } else if (resolved.size() > 1) {
                // The symbolic link is relative, so we just lose the last path component (which
                // was the link).
                resolved.erase(resolved.rfind('/'));
            }

            if (!left.empty()) {
                const char* maybeSlash = (symlink[symlink.size() - 1] != '/') ? "/" : "";
                left = symlink + maybeSlash + left;
            } else {
                left = symlink;
            }
        }
    }

    // Remove trailing slash except when the resolved pathname is a single "/".
    if (resolved.size() > 1 && resolved[resolved.size() - 1] == '/') {
        resolved.erase(resolved.size() - 1, 1);
    }
    return true;
}