diff --git a/docs/grep.md b/docs/grep.md new file mode 100644 index 0000000..bcb89ad --- /dev/null +++ b/docs/grep.md @@ -0,0 +1,70 @@ +# grep + +## NAME + +grep - print lines that match patterns + +## SYNOPSIS + +``` +grep [-R] [-F] [-v] _pattern_ [file...] +``` + +## DESCRIPTION + +`grep` searches for the specified pattern in each file. If no file is provided, +`grep` matches against standard input. Patterns are a limited regular +expression, using `^` for the beginning of a line, `$` for the end of a line, +`.` to denote any character, and `\*` for Kleene stars. + +If `-R` or multiple files are specified, then the path name is printed in red +before the matching line. + +## OPTIONS + +`-R` Read all files under each directory recursively. + +`-F` Use a fixed string as the pattern rather than a regular expression. Can + yield slight performance boost. + +`-v` Invert the matching, printing only the lines that _don't_ match + +Any other flag will fail. + +## EXIT STATUS + +`grep` exits with 0 if at least one line matched (or didn't with `-v`), 1 if no +lines match (or all with `-v`), and 2 if an error occured. + +## BUGS/CONSIDERATIONS + +* Flags _must_ be specified before the pattern, and must be specified + individually. +* 2 is returned if _any_ error occurs, even if there is a match. Generally a + problem when matching against multiple files, and one matches while another + errors. +* Running `grep -R _pattern_` is **not** equivalent to `grep -R _pattern_ .`, + instead being equivalent to `grep _pattern_`. This is a bug. +* Recursion is depth-first, and is ordered by inode creation time rather than + lexically. +* Breaks up lines longer than 1024 characters. Shouldn't affect most people. + +## EXAMPLES + +`$ grep -R pattern dir` + +matches `pattern` against all files in `dir` and its subdirectories (that can +be opened). + +`$ grep -F F.g README.md` + +matches README.md against the fixed string `F.g`. The pattern is not +interpreted as a regular expression, so `Fog` does not match. + +`$ grep -v Fog README.md` + +matches README.md and prints the lines that _don't_ match the pattern `Fog`. + +`$ ls | grep README` + +finds a file in the current directory whose name matches the pattern `README` diff --git a/user/grep.c b/user/grep.c index 2315a0c..428ae2f 100644 --- a/user/grep.c +++ b/user/grep.c @@ -2,27 +2,47 @@ #include "kernel/types.h" #include "kernel/stat.h" +#include "kernel/fs.h" #include "user/user.h" char buf[1024]; int match(char*, char*); +int dirgrep(char*, int, char*); + +// -F, -R, -v flags +// No bool type, so use uint8 instead +uint8 Fflag = 0; +uint8 Rflag = 0; +uint8 vflag = 0; + +uint8 printPath = 0; + +// 0 if a match, 1 if not (2 if error) +uint8 exitVal = 1; void -grep(char *pattern, int fd) +grep(char *pattern, int fd, char *path) { int n, m; char *p, *q; m = 0; + // Write continuously to the buffer. If runs out of space, move current + // position to beginning of buffer and read again. while((n = read(fd, buf+m, sizeof(buf)-m-1)) > 0){ m += n; buf[m] = '\0'; p = buf; while((q = strchr(p, '\n')) != 0){ *q = 0; - if(match(pattern, p)){ + // Print line if match and no -v flag or if no match and -v flag + // Simple xor operation + if (match(pattern, p) != vflag) { *q = '\n'; + if (printPath) + fprintf(1, "\x1b[31m%s:\x1b[0m", path); write(1, p, q+1 - p); + exitVal = 0; } p = q+1; } @@ -40,25 +60,110 @@ main(int argc, char *argv[]) char *pattern; if(argc <= 1){ - fprintf(2, "usage: grep pattern [file ...]\n"); - exit(1); + fprintf(2, "usage: grep [-F] [-R] [-v] pattern [file ...]\n"); + exit(2); + } + + // check for flags + i = 1; + while(1) { + if (argv[i][0] == '-') { + switch (argv[i][1]) { + case 'F': + Fflag = 1; + break; + case 'R': + Rflag = 1; + break; + case 'v': + vflag = 1; + break; + default: + fprintf(2, "Unrecognized flag: -%c\n", argv[i][1]); + exit(2); + } + i++; + } else { + break; + } } - pattern = argv[1]; - if(argc <= 2){ - grep(pattern, 0); - exit(0); + pattern = argv[i++]; + + if(argc <= i){ + printPath = 0; + grep(pattern, 0, "\0"); + exit(exitVal); } - for(i = 2; i < argc; i++){ + // Print path if recursion is used or if multiple files are passed + if(Rflag || i + 1 < argc) { + printPath = 1; + } + + while(i < argc){ if((fd = open(argv[i], 0)) < 0){ - printf("grep: cannot open %s\n", argv[i]); + fprintf(2, "grep: cannot open %s\n", argv[i]); exit(1); } - grep(pattern, fd); + + // Recursion for -R + if (Rflag) { + dirgrep(pattern, fd, argv[i]); + } else { + grep(pattern, fd, argv[i]); + } + + // cleanup close(fd); + i++; } - exit(0); + exit(exitVal); +} + +// Recursively go through directories +int +dirgrep(char *pattern, int fd, char *path) { + struct stat st; + + if (fstat(fd, &st) < 0) { + fprintf(2, "grep: cannot stat %s", path); + exitVal = 2; + return 1; + } + + switch (st.type) { + case T_FILE: + grep(pattern, fd, path); + break; + case T_DIR: + char buf[128], *p; + struct dirent de; + int child_fd; + + strcpy(buf, path); + p = buf + strlen(buf); + *p++ = '/'; + + while (read(fd, &de, sizeof(de)) == sizeof(de)) { + // skip if not a proper file or directory + if (de.inum == 0 || strcmp(de.name, ".") == 0 || strcmp(de.name, "..") == 0) + continue; + + memmove(p, de.name, DIRSIZ); + + // run dirgrep on the new path + if ((child_fd = open(buf, 0)) < 0){ + fprintf(2, "grep: cannot open %s\n", buf); + exitVal = 2; + continue; + } + + dirgrep(pattern, child_fd, buf); + close(child_fd); + } + } + return 0; } // Regexp matcher from Kernighan & Pike, @@ -71,7 +176,8 @@ int matchstar(int, char*, char*); int match(char *re, char *text) { - if(re[0] == '^') + // Ignore regex stuff when -F is passed + if(re[0] == '^' && !Fflag) return matchhere(re+1, text); do{ // must look at empty string if(matchhere(re, text)) @@ -85,11 +191,14 @@ int matchhere(char *re, char *text) { if(re[0] == '\0') return 1; - if(re[1] == '*') - return matchstar(re[0], re+2, text); - if(re[0] == '$' && re[1] == '\0') - return *text == '\0'; - if(*text!='\0' && (re[0]=='.' || re[0]==*text)) + // Ignore regex stuff when -F is passed + if (!Fflag){ + if(re[1] == '*') + return matchstar(re[0], re+2, text); + if(re[0] == '$' && re[1] == '\0') + return *text == '\0'; + } + if(*text!='\0' && ((!Fflag && re[0]=='.') || re[0]==*text)) return matchhere(re+1, text+1); return 0; }