Skip to content

Commit 102488d

Browse files
committed
FEAT: POSIX wildcard support in read's file argument
implements: Oldes/Rebol-issues#2443
1 parent 5060cea commit 102488d

File tree

3 files changed

+123
-9
lines changed

3 files changed

+123
-9
lines changed

src/include/reb-file.h

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ enum {
3939
RFM_NAME_MEM, // converted name allocated in mem
4040
RFM_DIR = 16,
4141
RFM_DRIVES, // used only on Windows to get logical drives letters (read %/)
42+
RFM_PATTERN, // used only on Posix for reading wildcard patterns (read %*.txt)
4243
};
4344

4445
// RFE - REBOL File Error

src/os/posix/dev-file.c

+103-8
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,15 @@
3838
***********************************************************************/
3939

4040
#include <stdio.h>
41+
#include <stdlib.h>
4142
#include <string.h>
4243
#include <unistd.h>
4344
#include <sys/types.h>
4445
#include <sys/stat.h>
4546
#include <fcntl.h>
4647
#include <dirent.h>
4748
#include <errno.h>
49+
#include <glob.h>
4850

4951
#include "reb-host.h"
5052
#include "host-lib.h"
@@ -191,21 +193,25 @@ static int Get_File_Info(REBREQ *file)
191193
**
192194
***********************************************************************/
193195
{
194-
struct stat info;
195196
struct dirent *d;
196197
char *cp;
197198
DIR *h;
198-
int n;
199-
200-
// Remove * from tail, if present. (Allowed because the
201-
// path was copied into to-local-path first).
202-
n = strlen(cp = dir->file.path);
203-
if (n > 0 && cp[n-1] == '*') cp[n-1] = 0;
199+
int len, n = 0;
204200

205201
// If no dir handle, open the dir:
206202
if (!(h = dir->handle)) {
203+
// Remove * from tail, if present. (Allowed because the
204+
// path was copied into to-local-path first).
205+
len = strlen((cp = dir->file.path));
206+
if (len > 0 && cp[len-1] == '*') {
207+
// keep track that we removed *
208+
n = len-1;
209+
cp[n] = 0;
210+
}
207211
h = opendir(dir->file.path);
208212
if (!h) {
213+
// revert back the * char as it may be part of pattern matching
214+
if (n > 0) cp[n] = '*';
209215
dir->error = errno;
210216
return DR_ERROR;
211217
}
@@ -237,6 +243,8 @@ static int Get_File_Info(REBREQ *file)
237243
// most efficient, because it does not require a separate
238244
// file system call for determining directories.
239245
if (d->d_type == DT_DIR) SET_FLAG(file->modes, RFM_DIR);
246+
// NOTE: DT_DIR may be enabled using _BSD_SOURCE define
247+
// https://stackoverflow.com/a/9241608/494472
240248
#else
241249
if (Is_Dir(dir->file.path, file->file.path)) SET_FLAG(file->modes, RFM_DIR);
242250
#endif
@@ -248,6 +256,85 @@ static int Get_File_Info(REBREQ *file)
248256
}
249257

250258

259+
/***********************************************************************
260+
**
261+
*/ static int Read_Pattern(REBREQ *dir, REBREQ *file)
262+
/*
263+
** This function will read a file with wildcards, one file entry
264+
** at a time, then close when no more files are found.
265+
**
266+
** Although GLOB allows to pass patterns which match content
267+
** thru multiple directories, that is intentionally disabled,
268+
** because such a functionality would not be easy to implement
269+
** and because result would have to be full path, which also
270+
** may not be the best choice from user's view.
271+
**
272+
** Actually the result is truncated so only files are returned and
273+
** not complete paths.
274+
**
275+
***********************************************************************/
276+
{
277+
char *cp;
278+
glob_t *g;
279+
int n, p, end;
280+
int wld = -1;
281+
282+
if (!(g = dir->handle)) {
283+
//printf("init pattern: %s\n", dir->file.path);
284+
285+
n = strlen((cp = dir->file.path));
286+
for (p = 0; p < n; p++) {
287+
if (cp[p] == '/') {
288+
// store position of the directory separator
289+
end = p;
290+
if (wld > 0) {
291+
// don't support wildcards thru multiple directories
292+
// like: %../?/?.png
293+
// as this is not available on Windows
294+
295+
//puts("Not supported pattern!");
296+
dir->error = -GLOB_NOMATCH; // result will be []
297+
return DR_ERROR;
298+
}
299+
}
300+
else if (cp[p] == '*' || cp[p] == '?') wld = p;
301+
}
302+
// keep position of the last directory separator so it can be used
303+
// to limit result into just a file and not a full path
304+
dir->clen = end + 1;
305+
306+
g = MAKE_NEW(glob_t); // deallocate once done!
307+
n = glob(dir->file.path, GLOB_MARK, NULL, g);
308+
if (n) {
309+
//printf("glob: %s err: %i errno: %i\n", dir->file.path, n, errno);
310+
globfree(g);
311+
OS_Free(g);
312+
dir->error = -n; // using negative number as on Windows
313+
return DR_ERROR;
314+
}
315+
//printf("found patterns: %li\n", g->gl_pathc);
316+
// all patterns are already in the glob buffer,
317+
// but we will not report them all at once
318+
dir->handle = g;
319+
dir->actual = 0;
320+
dir->length = g->gl_pathc;
321+
dir->modes = 1 << RFM_PATTERN; // changing mode from RFM_DIR to RFM_PATTERN
322+
CLR_FLAG(dir->flags, RRF_DONE);
323+
}
324+
if(dir->actual >= g->gl_pathc) {
325+
globfree(g);
326+
OS_Free(g);
327+
SET_FLAG(dir->flags, RRF_DONE); // no more files
328+
return DR_DONE;
329+
}
330+
//printf("path[%i]: %s\n", dir->actual, g->gl_pathv[dir->actual]);
331+
file->modes = 0;
332+
//TODO: assert if: 0 <= dir->clen <= MAX_FILE_NAME ???
333+
// only file part is returned...
334+
COPY_BYTES(file->file.path, g->gl_pathv[dir->actual++] + dir->clen, MAX_FILE_NAME - dir->clen);
335+
return DR_DONE;
336+
}
337+
251338
/***********************************************************************
252339
**
253340
*/ DEVICE_CMD Open_File(REBREQ *file)
@@ -355,7 +442,15 @@ static int Get_File_Info(REBREQ *file)
355442
ssize_t num_bytes;
356443

357444
if (GET_FLAG(file->modes, RFM_DIR)) {
358-
return Read_Directory(file, (REBREQ*)file->data);
445+
int ret = Read_Directory(file, (REBREQ*)file->data);
446+
// If there is no id yet and reading failed, we will
447+
// try to use file as a pattern...
448+
if (ret == DR_ERROR && !file->id) goto init_pattern;
449+
return ret;
450+
}
451+
else if (GET_FLAG(file->modes, RFM_PATTERN)) {
452+
init_pattern:
453+
return Read_Pattern(file, (REBREQ*)file->data);
359454
}
360455

361456
if (!file->id) {

src/tests/units/file-test.r3

+19-1
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,27 @@ secure [%/ allow]
9898
]
9999
--test-- "wildcard *?*"
100100
--assert all [
101-
block? probe files: wildcard %units/files/ %*2186-UTF??*.txt
101+
block? files: wildcard %units/files/ %*2186-UTF??*.txt
102102
4 = length? files
103103
]
104+
--test-- "read wildcard"
105+
;@@ https://github.com/Oldes/Rebol-issues/issues/2443
106+
--assert all [
107+
block? files: try [read %units/files/issue-2186-*.txt]
108+
4 = length? files
109+
to logic! find files %issue-2186-UTF16-LE.txt
110+
]
111+
--assert all [
112+
block? files: try [read %units/files/issue-2186-UTF??-BE.txt]
113+
2 = length? files
114+
to logic! find files %issue-2186-UTF32-BE.txt
115+
]
116+
--assert all [
117+
block? files: try [read %units/files/*2186-UTF??*.txt]
118+
4 = length? files
119+
to logic! find files %issue-2186-UTF16-BE.txt
120+
]
121+
104122
===end-group===
105123

106124
~~~end-file~~~

0 commit comments

Comments
 (0)