FFmpeg
url.c
Go to the documentation of this file.
1 /*
2  * URL utility functions
3  * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 
23 #include "avformat.h"
24 #include "internal.h"
25 #include "config.h"
26 #include "url.h"
27 #if CONFIG_NETWORK
28 #include "network.h"
29 #endif
30 #include "libavutil/avassert.h"
31 #include "libavutil/avstring.h"
32 
33 /**
34  * @file
35  * URL utility functions.
36  */
37 
38 int ff_url_join(char *str, int size, const char *proto,
39  const char *authorization, const char *hostname,
40  int port, const char *fmt, ...)
41 {
42 #if CONFIG_NETWORK
43  struct addrinfo hints = { 0 }, *ai;
44 #endif
45 
46  str[0] = '\0';
47  if (proto)
48  av_strlcatf(str, size, "%s://", proto);
49  if (authorization && authorization[0])
50  av_strlcatf(str, size, "%s@", authorization);
51 #if CONFIG_NETWORK && defined(AF_INET6)
52  /* Determine if hostname is a numerical IPv6 address,
53  * properly escape it within [] in that case. */
54  hints.ai_flags = AI_NUMERICHOST;
55  if (!getaddrinfo(hostname, NULL, &hints, &ai)) {
56  if (ai->ai_family == AF_INET6) {
57  av_strlcat(str, "[", size);
58  av_strlcat(str, hostname, size);
59  av_strlcat(str, "]", size);
60  } else {
61  av_strlcat(str, hostname, size);
62  }
63  freeaddrinfo(ai);
64  } else
65 #endif
66  /* Not an IPv6 address, just output the plain string. */
67  av_strlcat(str, hostname, size);
68 
69  if (port >= 0)
70  av_strlcatf(str, size, ":%d", port);
71  if (fmt) {
72  va_list vl;
73  size_t len = strlen(str);
74 
75  va_start(vl, fmt);
76  vsnprintf(str + len, size > len ? size - len : 0, fmt, vl);
77  va_end(vl);
78  }
79  return strlen(str);
80 }
81 
82 static const char *find_delim(const char *delim, const char *cur, const char *end)
83 {
84  while (cur < end && !strchr(delim, *cur))
85  cur++;
86  return cur;
87 }
88 
89 int ff_url_decompose(URLComponents *uc, const char *url, const char *end)
90 {
91  const char *cur, *aend, *p;
92 
93  av_assert0(url);
94  if (!end)
95  end = url + strlen(url);
96  cur = uc->url = url;
97 
98  /* scheme */
99  uc->scheme = cur;
100  p = find_delim(":/?#", cur, end); /* lavf "schemes" can contain options but not some RFC 3986 delimiters */
101  if (*p == ':')
102  cur = p + 1;
103 
104  /* authority */
105  uc->authority = cur;
106  if (end - cur >= 2 && cur[0] == '/' && cur[1] == '/') {
107  cur += 2;
108  aend = find_delim("/?#", cur, end);
109 
110  /* userinfo */
111  uc->userinfo = cur;
112  p = find_delim("@", cur, aend);
113  if (*p == '@')
114  cur = p + 1;
115 
116  /* host */
117  uc->host = cur;
118  if (*cur == '[') { /* hello IPv6, thanks for using colons! */
119  p = find_delim("]", cur, aend);
120  if (*p != ']')
121  return AVERROR(EINVAL);
122  if (p + 1 < aend && p[1] != ':')
123  return AVERROR(EINVAL);
124  cur = p + 1;
125  } else {
126  cur = find_delim(":", cur, aend);
127  }
128 
129  /* port */
130  uc->port = cur;
131  cur = aend;
132  } else {
133  uc->userinfo = uc->host = uc->port = cur;
134  }
135 
136  /* path */
137  uc->path = cur;
138  cur = find_delim("?#", cur, end);
139 
140  /* query */
141  uc->query = cur;
142  if (*cur == '?')
143  cur = find_delim("#", cur, end);
144 
145  /* fragment */
146  uc->fragment = cur;
147 
148  uc->end = end;
149  return 0;
150 }
151 
152 static int is_fq_dos_path(const char *path)
153 {
154  if ((path[0] >= 'a' && path[0] <= 'z' || path[0] >= 'A' && path[0] <= 'Z') &&
155  path[1] == ':' &&
156  (path[2] == '/' || path[2] == '\\'))
157  return 1;
158  if ((path[0] == '/' || path[0] == '\\') &&
159  (path[1] == '/' || path[1] == '\\'))
160  return 1;
161  return 0;
162 }
163 
164 static int append_path(char *root, char *out_end, char **rout,
165  const char *in, const char *in_end)
166 {
167  char *out = *rout;
168  const char *d, *next;
169 
170  if (in < in_end && *in == '/')
171  in++; /* already taken care of */
172  while (in < in_end) {
173  d = find_delim("/", in, in_end);
174  next = d + (d < in_end && *d == '/');
175  if (d - in == 1 && in[0] == '.') {
176  /* skip */
177  } else if (d - in == 2 && in[0] == '.' && in[1] == '.') {
178  av_assert1(out[-1] == '/');
179  if (out - root > 1)
180  while (out > root && (--out)[-1] != '/');
181  } else {
182  if (out_end - out < next - in)
183  return AVERROR(ENOMEM);
184  memmove(out, in, next - in);
185  out += next - in;
186  }
187  in = next;
188  }
189  *rout = out;
190  return 0;
191 }
192 
193 int ff_make_absolute_url2(char *buf, int size, const char *base,
194  const char *rel, int handle_dos_paths)
195 {
196  URLComponents ub, uc;
197  char *out, *out_end, *path;
198  const char *keep, *base_path_end;
199  int use_base_path, simplify_path = 0, ret;
200  const char *base_separators = "/";
201 
202  /* This is tricky.
203  For HTTP, http://server/site/page + ../media/file
204  should resolve into http://server/media/file
205  but for filesystem access, dir/playlist + ../media/file
206  should resolve into dir/../media/file
207  because dir could be a symlink, and .. points to
208  the actual parent of the target directory.
209 
210  We'll consider that URLs with an actual scheme and authority,
211  i.e. starting with scheme://, need parent dir simplification,
212  while bare paths or pseudo-URLs starting with proto: without
213  the double slash do not.
214 
215  For real URLs, the processing is similar to the algorithm described
216  here:
217  https://tools.ietf.org/html/rfc3986#section-5
218  */
219 
220  if (!size)
221  return AVERROR(ENOMEM);
222  out = buf;
223  out_end = buf + size - 1;
224 
225  if (!base)
226  base = "";
227  if (handle_dos_paths) {
228  if ((ret = ff_url_decompose(&ub, base, NULL)) < 0)
229  goto error;
230  if (is_fq_dos_path(base) || av_strstart(base, "file:", NULL) || ub.path == ub.url) {
231  base_separators = "/\\";
232  if (is_fq_dos_path(rel))
233  base = "";
234  }
235  }
236  if ((ret = ff_url_decompose(&ub, base, NULL)) < 0 ||
237  (ret = ff_url_decompose(&uc, rel, NULL)) < 0)
238  goto error;
239 
240  keep = ub.url;
241 #define KEEP(component, also) do { \
242  if (uc.url_component_end_##component == uc.url && \
243  ub.url_component_end_##component > keep) { \
244  keep = ub.url_component_end_##component; \
245  also \
246  } \
247  } while (0)
248  KEEP(scheme, );
249  KEEP(authority_full, simplify_path = 1;);
250  KEEP(path,);
251  KEEP(query,);
252  KEEP(fragment,);
253 #undef KEEP
254 #define COPY(start, end) do { \
255  size_t len = end - start; \
256  if (len > out_end - out) { \
257  ret = AVERROR(ENOMEM); \
258  goto error; \
259  } \
260  memmove(out, start, len); \
261  out += len; \
262  } while (0)
263  COPY(ub.url, keep);
264  COPY(uc.url, uc.path);
265 
266  use_base_path = URL_COMPONENT_HAVE(ub, path) && keep <= ub.path;
267  if (uc.path > uc.url)
268  use_base_path = 0;
269  if (URL_COMPONENT_HAVE(uc, path) && uc.path[0] == '/')
270  use_base_path = 0;
271  if (use_base_path) {
272  base_path_end = ub.url_component_end_path;
273  if (URL_COMPONENT_HAVE(uc, path))
274  while (base_path_end > ub.path && !strchr(base_separators, base_path_end[-1]))
275  base_path_end--;
276  }
277  if (keep > ub.path)
278  simplify_path = 0;
279  if (URL_COMPONENT_HAVE(uc, scheme))
280  simplify_path = 0;
281  if (URL_COMPONENT_HAVE(uc, authority))
282  simplify_path = 1;
283  /* No path at all, leave it */
284  if (!use_base_path && !URL_COMPONENT_HAVE(uc, path))
285  simplify_path = 0;
286 
287  if (simplify_path) {
288  const char *root = "/";
289  COPY(root, root + 1);
290  path = out;
291  if (use_base_path) {
292  ret = append_path(path, out_end, &out, ub.path, base_path_end);
293  if (ret < 0)
294  goto error;
295  }
296  if (URL_COMPONENT_HAVE(uc, path)) {
297  ret = append_path(path, out_end, &out, uc.path, uc.url_component_end_path);
298  if (ret < 0)
299  goto error;
300  }
301  } else {
302  if (use_base_path)
303  COPY(ub.path, base_path_end);
304  COPY(uc.path, uc.url_component_end_path);
305  }
306 
307  COPY(uc.url_component_end_path, uc.end);
308 #undef COPY
309  *out = 0;
310  return 0;
311 
312 error:
313  snprintf(buf, size, "invalid:%s",
314  ret == AVERROR(ENOMEM) ? "truncated" :
315  ret == AVERROR(EINVAL) ? "syntax_error" : "");
316  return ret;
317 }
318 
319 int ff_make_absolute_url(char *buf, int size, const char *base,
320  const char *rel)
321 {
322  return ff_make_absolute_url2(buf, size, base, rel, HAVE_DOS_PATHS);
323 }
324 
326 {
327  AVIODirEntry *entry = av_mallocz(sizeof(AVIODirEntry));
328  if (entry) {
329  entry->type = AVIO_ENTRY_UNKNOWN;
330  entry->size = -1;
331  entry->modification_timestamp = -1;
332  entry->access_timestamp = -1;
333  entry->status_change_timestamp = -1;
334  entry->user_id = -1;
335  entry->group_id = -1;
336  entry->filemode = -1;
337  }
338  return entry;
339 }
error
static void error(const char *err)
Definition: target_bsf_fuzzer.c:31
ff_make_absolute_url2
int ff_make_absolute_url2(char *buf, int size, const char *base, const char *rel, int handle_dos_paths)
Convert a relative url into an absolute url, given a base url.
Definition: url.c:193
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
out
FILE * out
Definition: movenc.c:54
URLComponents::url
const char * url
whole URL, for reference
Definition: url.h:350
URLComponents
Definition: url.h:349
find_delim
static const char * find_delim(const char *delim, const char *cur, const char *end)
Definition: url.c:82
AVIODirEntry::type
int type
Type of the entry.
Definition: avio.h:89
URLComponents::path
const char * path
Definition: url.h:356
base
uint8_t base
Definition: vp3data.h:128
av_strlcatf
size_t av_strlcatf(char *dst, size_t size, const char *fmt,...)
Definition: avstring.c:104
URL_COMPONENT_HAVE
#define URL_COMPONENT_HAVE(uc, component)
Definition: url.h:372
AVIO_ENTRY_UNKNOWN
@ AVIO_ENTRY_UNKNOWN
Definition: avio.h:68
URLComponents::fragment
const char * fragment
including initial '#' if present
Definition: url.h:358
fragment
Definition: dashdec.c:34
AVIODirEntry::access_timestamp
int64_t access_timestamp
Time of last access in microseconds since unix epoch, -1 if unknown.
Definition: avio.h:95
freeaddrinfo
#define freeaddrinfo
Definition: network.h:218
ub
#define ub(width, name)
Definition: cbs_h2645.c:264
AVIODirEntry::modification_timestamp
int64_t modification_timestamp
Time of last modification in microseconds since unix epoch, -1 if unknown.
Definition: avio.h:93
avassert.h
AI_NUMERICHOST
#define AI_NUMERICHOST
Definition: network.h:187
ff_url_join
int ff_url_join(char *str, int size, const char *proto, const char *authorization, const char *hostname, int port, const char *fmt,...)
Definition: url.c:38
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
URLComponents::query
const char * query
including initial '?' if present
Definition: url.h:357
internal.h
append_path
static int append_path(char *root, char *out_end, char **rout, const char *in, const char *in_end)
Definition: url.c:164
NULL
#define NULL
Definition: coverity.c:32
AVIODirEntry::size
int64_t size
File size in bytes, -1 if unknown.
Definition: avio.h:92
URLComponents::authority
const char * authority
"//" if it is a real URL
Definition: url.h:352
URLComponents::end
const char * end
Definition: url.h:359
AVIODirEntry::group_id
int64_t group_id
Group ID of owner, -1 if unknown.
Definition: avio.h:100
COPY
#define COPY(start, end)
AVIODirEntry::filemode
int64_t filemode
Unix file mode, -1 if unknown.
Definition: avio.h:101
URLComponents::host
const char * host
Definition: url.h:354
size
int size
Definition: twinvq_data.h:10344
AVIODirEntry
Describes single entry of the directory.
Definition: avio.h:87
AVIODirEntry::status_change_timestamp
int64_t status_change_timestamp
Time of last status change in microseconds since unix epoch, -1 if unknown.
Definition: avio.h:97
av_strstart
int av_strstart(const char *str, const char *pfx, const char **ptr)
Return non-zero if pfx is a prefix of str.
Definition: avstring.c:37
KEEP
#define KEEP(component, also)
getaddrinfo
#define getaddrinfo
Definition: network.h:217
AVIODirEntry::user_id
int64_t user_id
User ID of owner, -1 if unknown.
Definition: avio.h:99
ff_alloc_dir_entry
AVIODirEntry * ff_alloc_dir_entry(void)
Allocate directory entry with default values.
Definition: url.c:325
vsnprintf
#define vsnprintf
Definition: snprintf.h:36
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
url.h
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:254
len
int len
Definition: vorbis_enc_data.h:426
URLComponents::port
const char * port
including initial ':' if present
Definition: url.h:355
URLComponents::userinfo
const char * userinfo
including final '@' if present
Definition: url.h:353
ret
ret
Definition: filter_design.txt:187
av_strlcat
size_t av_strlcat(char *dst, const char *src, size_t size)
Append the string src to the string dst, but to a total length of no more than size - 1 bytes,...
Definition: avstring.c:96
avformat.h
network.h
URLComponents::scheme
const char * scheme
possibly including lavf-specific options
Definition: url.h:351
addrinfo::ai_flags
int ai_flags
Definition: network.h:138
ff_make_absolute_url
int ff_make_absolute_url(char *buf, int size, const char *base, const char *rel)
Convert a relative url into an absolute url, given a base url.
Definition: url.c:319
d
d
Definition: ffmpeg_filter.c:156
convert_header.str
string str
Definition: convert_header.py:20
avstring.h
is_fq_dos_path
static int is_fq_dos_path(const char *path)
Definition: url.c:152
addrinfo
Definition: network.h:137
ff_url_decompose
int ff_url_decompose(URLComponents *uc, const char *url, const char *end)
Parse an URL to find the components.
Definition: url.c:89
snprintf
#define snprintf
Definition: snprintf.h:34