kitaev@213: /* kitaev@213: * Copyright (c) 2010-2011 TMate Software Ltd kitaev@213: * kitaev@213: * This program is free software; you can redistribute it and/or modify kitaev@213: * it under the terms of the GNU General Public License as published by kitaev@213: * the Free Software Foundation; version 2 of the License. kitaev@213: * kitaev@213: * This program is distributed in the hope that it will be useful, kitaev@213: * but WITHOUT ANY WARRANTY; without even the implied warranty of kitaev@213: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the kitaev@213: * GNU General Public License for more details. kitaev@213: * kitaev@213: * For information on how to redistribute this software under kitaev@213: * the terms of a license other than GNU General Public License kitaev@213: * contact TMate Software at support@hg4j.com kitaev@213: */ kitaev@213: package org.tmatesoft.hg.repo; kitaev@213: kitaev@213: import java.io.BufferedReader; kitaev@213: import java.io.File; kitaev@213: import java.io.FileReader; kitaev@213: import java.io.IOException; kitaev@213: import java.util.ArrayList; kitaev@213: import java.util.Collections; kitaev@213: import java.util.List; kitaev@213: import java.util.regex.Pattern; kitaev@213: kitaev@213: import org.tmatesoft.hg.util.Path; kitaev@213: kitaev@213: /** kitaev@213: * Handling of ignored paths according to .hgignore configuration kitaev@213: * kitaev@213: * @author Artem Tikhomirov kitaev@213: * @author TMate Software Ltd. kitaev@213: */ kitaev@213: public class HgIgnore { kitaev@213: kitaev@213: private List entries; kitaev@213: kitaev@213: HgIgnore() { kitaev@213: entries = Collections.emptyList(); kitaev@213: } kitaev@213: kitaev@213: /* package-local */void read(File hgignoreFile) throws IOException { kitaev@213: if (!hgignoreFile.exists()) { kitaev@213: return; kitaev@213: } kitaev@213: ArrayList result = new ArrayList(entries); // start with existing kitaev@213: String syntax = "regex"; // or "glob" kitaev@213: BufferedReader fr = new BufferedReader(new FileReader(hgignoreFile)); kitaev@213: String line; kitaev@213: while ((line = fr.readLine()) != null) { kitaev@213: line = line.trim(); kitaev@213: if (line.startsWith("syntax:")) { kitaev@213: syntax = line.substring("syntax:".length()).trim(); kitaev@213: if (!"regex".equals(syntax) && !"glob".equals(syntax)) { kitaev@213: throw new IllegalStateException(line); kitaev@213: } kitaev@213: } else if (line.length() > 0) { kitaev@213: // shall I account for local paths in the file (i.e. kitaev@213: // back-slashed on windows)? kitaev@213: int x; kitaev@213: if ((x = line.indexOf('#')) >= 0) { kitaev@213: line = line.substring(0, x).trim(); kitaev@213: if (line.length() == 0) { kitaev@213: continue; kitaev@213: } kitaev@213: } kitaev@213: if ("glob".equals(syntax)) { kitaev@213: // hgignore(5) kitaev@213: // (http://www.selenic.com/mercurial/hgignore.5.html) says slashes '\' are escape characters, kitaev@213: // hence no special treatment of Windows path kitaev@213: // however, own attempts make me think '\' on Windows are not treated as escapes kitaev@213: line = glob2regex(line); kitaev@213: } kitaev@213: result.add(Pattern.compile(line)); // case-sensitive kitaev@213: } kitaev@213: } kitaev@213: result.trimToSize(); kitaev@213: entries = result; kitaev@213: } kitaev@213: kitaev@213: // note, #isIgnored(), even if queried for directories and returned positive reply, may still get kitaev@213: // a file from that ignored folder to get examined. Thus, patterns like "bin" shall match not only a folder, kitaev@213: // but any file under that folder as well kitaev@213: // Alternatively, file walker may memorize folder is ignored and uses this information for all nested files. However, kitaev@213: // this approach would require walker (a) return directories (b) provide nesting information. This may become kitaev@213: // troublesome when one walks not over io.File, but Eclipse's IResource or any other custom VFS. kitaev@213: // kitaev@213: // kitaev@213: // might be interesting, although looks like of no direct use in my case kitaev@213: // @see http://stackoverflow.com/questions/1247772/is-there-an-equivalent-of-java-util-regex-for-glob-type-patterns kitaev@213: private String glob2regex(String line) { kitaev@213: assert line.length() > 0; kitaev@213: StringBuilder sb = new StringBuilder(line.length() + 10); kitaev@213: sb.append('^'); // help avoid matcher.find() to match 'bin' pattern in the middle of the filename kitaev@213: int start = 0, end = line.length() - 1; kitaev@213: // '*' at the beginning and end of a line are useless for Pattern kitaev@213: // XXX although how about **.txt - such globs can be seen in a config, are they valid for HgIgnore? kitaev@213: while (start <= end && line.charAt(start) == '*') start++; kitaev@213: while (end > start && line.charAt(end) == '*') end--; kitaev@213: kitaev@213: for (int i = start; i <= end; i++) { kitaev@213: char ch = line.charAt(i); kitaev@213: if (ch == '.' || ch == '\\') { kitaev@213: sb.append('\\'); kitaev@213: } else if (ch == '?') { kitaev@213: // simple '.' substitution might work out, however, more formally kitaev@213: // a char class seems more appropriate to avoid accidentally kitaev@213: // matching a subdirectory with ? char (i.e. /a/b?d against /a/bad, /a/bed and /a/b/d) kitaev@213: // @see http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_13_03 kitaev@213: // quote: "The slash character in a pathname shall be explicitly matched by using one or more slashes in the pattern; kitaev@213: // it shall neither be matched by the asterisk or question-mark special characters nor by a bracket expression" kitaev@213: sb.append("[^/]"); kitaev@213: continue; kitaev@213: } else if (ch == '*') { kitaev@213: sb.append("[^/]*?"); kitaev@213: continue; kitaev@213: } kitaev@213: sb.append(ch); kitaev@213: } kitaev@213: return sb.toString(); kitaev@213: } kitaev@213: kitaev@213: // TODO use PathGlobMatcher kitaev@213: public boolean isIgnored(Path path) { kitaev@213: for (Pattern p : entries) { kitaev@213: if (p.matcher(path).find()) { kitaev@213: return true; kitaev@213: } kitaev@213: } kitaev@213: return false; kitaev@213: } kitaev@213: }