Problem
How can I remove (update) anything in the index?
Solution
import org.egothor.dir.TankerImpl;
import org.egothor.parser.Tokenizer;
import org.egothor.data.*;
import org.egothor.parser.plain.Plain;
import org.egothor.parser.misc.*;
import java.io.StringReader;
public class Updater {
public static void main(String[] args) throws Exception {
TankerImpl index = new TankerImpl("index/",false,32,10);
// generate 100 docs and append them to the index
for (int i=0;i<100;i++) {
FTField field = new MyTxtField("This is my document no. "+i);
DocMetadata dm = new DocMetadata();
dm.put("S","Some summary...");
dm.put("L","Location of this doc is on my HDD...");
dm.put("T","Title of the document "+i);
dm.put("K",Integer.toString(i));
Document doc = new Document(dm, field);
index.append(doc);
}
index.close();
// remove docs 0,2,4,6,8,10, etc.
// see the "K" field
// open index
index = new TankerImpl("index/",false,32,10);
// get enumeration of all barrels
java.util.Enumeration e = index.elements();
long sum = 0;
while (e.hasMoreElements()) {
Object o = e.nextElement();
// if it is a barrel that is able to modify its data structure
if (o instanceof BarrelShaker) {
BarrelShaker bs = (BarrelShaker) o;
System.out.println("Barrel size: "+bs.size()+
" (already removed "+bs.deleted()+
" docs)");
// iterate over meta data
IMetaReader imr = bs.openDocMeta();
long d = 0;
while (imr.hasMoreElements()) {
DocMetadata mt = (DocMetadata) imr.nextElement();
// uid of the document in the barrel bs
long uid = imr.getUid();
int id = Integer.parseInt(mt.get("K","1").toString());
if (id%2 == 0) {
bs.removeDoc(uid);
d++;
}
}
imr.close();
bs.close();
System.out.println(" removed: "+d+
" (removed docs in the barrel: "+bs.deleted()+")");
sum += d;
} else if (o instanceof Barrel) {
// if it is a different sort of a barrel (i.e. meta searcher)
Barrel b = (Barrel) o;
// ...only close the object
b.close();
}
}
index.close();
System.out.println("Removed (total): "+sum);
}
}
class MyTxtField extends FTField {
String text;
MyTxtField(String text) { this.text = text; }
public Tokenizer words() {
return
new LowerCase( new Plain( new StringReader(text) ) );
}
}
After first run, you will see:
Barrel size: 64 (already removed 0 docs)
removed: 32 (removed docs in the barrel: 32)
Barrel size: 36 (already removed 0 docs)
removed: 18 (removed docs in the barrel: 18)
Removed (total): 50
After second run, the following lines are printed out:
Barrel size: 64 (already removed 32 docs)
removed: 0 (removed docs in the barrel: 32)
Barrel size: 36 (already removed 18 docs)
removed: 0 (removed docs in the barrel: 18)
Barrel size: 36 (already removed 0 docs)
removed: 18 (removed docs in the barrel: 18)
Barrel size: 64 (already removed 0 docs)
removed: 32 (removed docs in the barrel: 32)
Removed (total): 50
--
LeoGalambos - 26 May 2004