HADOOP-2350 Scanner api returns null row names, or skips row names if different column families do not have entries for some rows

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@602334 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2007-12-08 06:54:31 +00:00
parent 3c80b8c75e
commit 870cb3b5cf
8 changed files with 265 additions and 48 deletions

View File

@ -64,6 +64,8 @@ Trunk (unreleased changes)
HADOOP-2338 Fix NullPointerException in master server. HADOOP-2338 Fix NullPointerException in master server.
HADOOP-2380 REST servlet throws NPE when any value node has an empty string HADOOP-2380 REST servlet throws NPE when any value node has an empty string
(Bryan Duxbury via Stack) (Bryan Duxbury via Stack)
HADOOP-2350 Scanner api returns null row names, or skips row names if
different column families do not have entries for some rows
IMPROVEMENTS IMPROVEMENTS
HADOOP-2401 Add convenience put method that takes writable HADOOP-2401 Add convenience put method that takes writable

View File

@ -332,7 +332,7 @@ class HMerge implements HConstants {
HRegion root = HRegion root =
new HRegion(dir, hlog,fs, conf, HRegionInfo.rootRegionInfo, null, null); new HRegion(dir, hlog,fs, conf, HRegionInfo.rootRegionInfo, null, null);
HInternalScannerInterface rootScanner = HScannerInterface rootScanner =
root.getScanner(META_COLS, new Text(), System.currentTimeMillis(), null); root.getScanner(META_COLS, new Text(), System.currentTimeMillis(), null);
try { try {

View File

@ -1088,7 +1088,7 @@ public class HRegion implements HConstants {
* @return HScannerInterface * @return HScannerInterface
* @throws IOException * @throws IOException
*/ */
public HInternalScannerInterface getScanner(Text[] cols, Text firstRow, public HScannerInterface getScanner(Text[] cols, Text firstRow,
long timestamp, RowFilterInterface filter) throws IOException { long timestamp, RowFilterInterface filter) throws IOException {
lock.readLock().lock(); lock.readLock().lock();
try { try {
@ -1485,33 +1485,21 @@ public class HRegion implements HConstants {
/** /**
* HScanner is an iterator through a bunch of rows in an HRegion. * HScanner is an iterator through a bunch of rows in an HRegion.
*/ */
private class HScanner implements HInternalScannerInterface { private class HScanner implements HScannerInterface {
private HInternalScannerInterface[] scanners; private HInternalScannerInterface[] scanners;
private boolean wildcardMatch = false; private TreeMap<Text, byte []>[] resultSets;
private boolean multipleMatchers = false; private HStoreKey[] keys;
/** Create an HScanner with a handle on many HStores. */ /** Create an HScanner with a handle on many HStores. */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
HScanner(Text[] cols, Text firstRow, long timestamp, HStore[] stores, HScanner(Text[] cols, Text firstRow, long timestamp, HStore[] stores,
RowFilterInterface filter) throws IOException { RowFilterInterface filter) throws IOException {
this.scanners = new HInternalScannerInterface[stores.length];
// Advance to the first key in each store. this.scanners = new HInternalScannerInterface[stores.length];
// All results will match the required column-set and scanTime.
try { try {
for (int i = 0; i < stores.length; i++) { for (int i = 0; i < stores.length; i++) {
HInternalScannerInterface scanner = scanners[i] = stores[i].getScanner(timestamp, cols, firstRow, filter);
scanners[i] = }
stores[i].getScanner(timestamp, cols, firstRow, filter);
if (scanner.isWildcardScanner()) {
this.wildcardMatch = true;
}
if (scanner.isMultipleMatchScanner()) {
this.multipleMatchers = true;
}
}
} catch(IOException e) { } catch(IOException e) {
for (int i = 0; i < this.scanners.length; i++) { for (int i = 0; i < this.scanners.length; i++) {
@ -1521,35 +1509,100 @@ public class HRegion implements HConstants {
} }
throw e; throw e;
} }
// Advance to the first key in each store.
// All results will match the required column-set and scanTime.
this.resultSets = new TreeMap[scanners.length];
this.keys = new HStoreKey[scanners.length];
for (int i = 0; i < scanners.length; i++) {
keys[i] = new HStoreKey();
resultSets[i] = new TreeMap<Text, byte []>();
if(scanners[i] != null && !scanners[i].next(keys[i], resultSets[i])) {
closeScanner(i);
}
}
// As we have now successfully completed initialization, increment the // As we have now successfully completed initialization, increment the
// activeScanner count. // activeScanner count.
activeScannerCount.incrementAndGet(); activeScannerCount.incrementAndGet();
} }
/** @return true if the scanner is a wild card scanner */
public boolean isWildcardScanner() {
return wildcardMatch;
}
/** @return true if the scanner is a multiple match scanner */
public boolean isMultipleMatchScanner() {
return multipleMatchers;
}
/** {@inheritDoc} */ /** {@inheritDoc} */
public boolean next(HStoreKey key, SortedMap<Text, byte[]> results) public boolean next(HStoreKey key, SortedMap<Text, byte[]> results)
throws IOException { throws IOException {
boolean haveResults = false; boolean moreToFollow = false;
// Find the lowest-possible key.
Text chosenRow = null;
long chosenTimestamp = -1;
for (int i = 0; i < this.keys.length; i++) {
if (scanners[i] != null &&
(chosenRow == null ||
(keys[i].getRow().compareTo(chosenRow) < 0) ||
((keys[i].getRow().compareTo(chosenRow) == 0) &&
(keys[i].getTimestamp() > chosenTimestamp)))) {
chosenRow = new Text(keys[i].getRow());
chosenTimestamp = keys[i].getTimestamp();
}
}
// Store the key and results for each sub-scanner. Merge them as
// appropriate.
if (chosenTimestamp >= 0) {
// Here we are setting the passed in key with current row+timestamp
key.setRow(chosenRow);
key.setVersion(chosenTimestamp);
key.setColumn(HConstants.EMPTY_TEXT);
for (int i = 0; i < scanners.length; i++) {
if (scanners[i] != null && keys[i].getRow().compareTo(chosenRow) == 0) {
// NOTE: We used to do results.putAll(resultSets[i]);
// but this had the effect of overwriting newer
// values with older ones. So now we only insert
// a result if the map does not contain the key.
for (Map.Entry<Text, byte[]> e : resultSets[i].entrySet()) {
if (!results.containsKey(e.getKey())) {
results.put(e.getKey(), e.getValue());
}
}
resultSets[i].clear();
if (!scanners[i].next(keys[i], resultSets[i])) {
closeScanner(i);
}
}
}
}
for (int i = 0; i < scanners.length; i++) { for (int i = 0; i < scanners.length; i++) {
if (scanners[i] != null) { // If the current scanner is non-null AND has a lower-or-equal
if (scanners[i].next(key, results)) { // row label, then its timestamp is bad. We need to advance it.
haveResults = true; while ((scanners[i] != null) &&
} else { (keys[i].getRow().compareTo(chosenRow) <= 0)) {
resultSets[i].clear();
if (!scanners[i].next(keys[i], resultSets[i])) {
closeScanner(i); closeScanner(i);
} }
} }
} }
return haveResults;
moreToFollow = chosenTimestamp >= 0;
if (results == null || results.size() <= 0) {
// If we got no results, then there is no more to follow.
moreToFollow = false;
}
// Make sure scanners closed if no more results
if (!moreToFollow) {
for (int i = 0; i < scanners.length; i++) {
if (null != scanners[i]) {
closeScanner(i);
}
}
}
return moreToFollow;
} }
@ -1563,6 +1616,8 @@ public class HRegion implements HConstants {
} }
} finally { } finally {
scanners[i] = null; scanners[i] = null;
resultSets[i] = null;
keys[i] = null;
} }
} }

View File

@ -1373,7 +1373,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
requestCount.incrementAndGet(); requestCount.incrementAndGet();
try { try {
String scannerName = String.valueOf(scannerId); String scannerName = String.valueOf(scannerId);
HInternalScannerInterface s = scanners.get(scannerName); HScannerInterface s = scanners.get(scannerName);
if (s == null) { if (s == null) {
throw new UnknownScannerException("Name: " + scannerName); throw new UnknownScannerException("Name: " + scannerName);
} }
@ -1433,7 +1433,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
try { try {
HRegion r = getRegion(regionName); HRegion r = getRegion(regionName);
long scannerId = -1L; long scannerId = -1L;
HInternalScannerInterface s = HScannerInterface s =
r.getScanner(cols, firstRow, timestamp, filter); r.getScanner(cols, firstRow, timestamp, filter);
scannerId = rand.nextLong(); scannerId = rand.nextLong();
String scannerName = String.valueOf(scannerId); String scannerName = String.valueOf(scannerId);
@ -1457,7 +1457,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
requestCount.incrementAndGet(); requestCount.incrementAndGet();
try { try {
String scannerName = String.valueOf(scannerId); String scannerName = String.valueOf(scannerId);
HInternalScannerInterface s = null; HScannerInterface s = null;
synchronized(scanners) { synchronized(scanners) {
s = scanners.remove(scannerName); s = scanners.remove(scannerName);
} }
@ -1472,9 +1472,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
} }
} }
Map<String, HInternalScannerInterface> scanners = Map<String, HScannerInterface> scanners =
Collections.synchronizedMap(new HashMap<String, Collections.synchronizedMap(new HashMap<String, HScannerInterface>());
HInternalScannerInterface>());
/** /**
* Instantiated as a scanner lease. * Instantiated as a scanner lease.
@ -1490,7 +1489,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
/** {@inheritDoc} */ /** {@inheritDoc} */
public void leaseExpired() { public void leaseExpired() {
LOG.info("Scanner " + this.scannerName + " lease expired"); LOG.info("Scanner " + this.scannerName + " lease expired");
HInternalScannerInterface s = null; HScannerInterface s = null;
synchronized(scanners) { synchronized(scanners) {
s = scanners.remove(this.scannerName); s = scanners.remove(this.scannerName);
} }

View File

@ -283,7 +283,7 @@ public class TestHRegion extends HBaseTestCase implements RegionUnavailableListe
startTime = System.currentTimeMillis(); startTime = System.currentTimeMillis();
HInternalScannerInterface s = HScannerInterface s =
r.getScanner(cols, new Text(), System.currentTimeMillis(), null); r.getScanner(cols, new Text(), System.currentTimeMillis(), null);
int numFetched = 0; int numFetched = 0;
try { try {
@ -630,7 +630,7 @@ public class TestHRegion extends HBaseTestCase implements RegionUnavailableListe
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
HInternalScannerInterface s = HScannerInterface s =
r.getScanner(cols, new Text(), System.currentTimeMillis(), null); r.getScanner(cols, new Text(), System.currentTimeMillis(), null);
try { try {

View File

@ -69,7 +69,7 @@ public class TestScanner extends HBaseTestCase {
private void scan(boolean validateStartcode, String serverName) private void scan(boolean validateStartcode, String serverName)
throws IOException { throws IOException {
HInternalScannerInterface scanner = null; HScannerInterface scanner = null;
TreeMap<Text, byte []> results = new TreeMap<Text, byte []>(); TreeMap<Text, byte []> results = new TreeMap<Text, byte []>();
HStoreKey key = new HStoreKey(); HStoreKey key = new HStoreKey();
@ -108,7 +108,7 @@ public class TestScanner extends HBaseTestCase {
} }
} finally { } finally {
HInternalScannerInterface s = scanner; HScannerInterface s = scanner;
scanner = null; scanner = null;
if(s != null) { if(s != null) {
s.close(); s.close();

View File

@ -0,0 +1,161 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.hadoop.io.Text;
/** test the scanner API at all levels */
public class TestScannerAPI extends HBaseClusterTestCase {
private final Text[] columns = new Text[] {
new Text("a:"),
new Text("b:")
};
private final Text startRow = new Text("0");
private final TreeMap<Text, SortedMap<Text, byte[]>> values =
new TreeMap<Text, SortedMap<Text, byte[]>>();
/**
* @throws Exception
*/
public TestScannerAPI() throws Exception {
super();
try {
TreeMap<Text, byte[]> columns = new TreeMap<Text, byte[]>();
columns.put(new Text("a:1"), "1".getBytes(HConstants.UTF8_ENCODING));
values.put(new Text("1"), columns);
columns = new TreeMap<Text, byte[]>();
columns.put(new Text("a:2"), "2".getBytes(HConstants.UTF8_ENCODING));
columns.put(new Text("b:2"), "2".getBytes(HConstants.UTF8_ENCODING));
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
/**
* @throws IOException
*/
public void testApi() throws IOException {
final String tableName = getName();
// Create table
HBaseAdmin admin = new HBaseAdmin(conf);
HTableDescriptor tableDesc = new HTableDescriptor(tableName);
for (int i = 0; i < columns.length; i++) {
tableDesc.addFamily(new HColumnDescriptor(columns[i].toString()));
}
admin.createTable(tableDesc);
// Insert values
HTable table = new HTable(conf, new Text(getName()));
for (Map.Entry<Text, SortedMap<Text, byte[]>> row: values.entrySet()) {
long lockid = table.startUpdate(row.getKey());
for (Map.Entry<Text, byte[]> val: row.getValue().entrySet()) {
table.put(lockid, val.getKey(), val.getValue());
}
table.commit(lockid);
}
HRegion region = null;
try {
SortedMap<Text, HRegion> regions =
cluster.getRegionThreads().get(0).getRegionServer().getOnlineRegions();
for (Map.Entry<Text, HRegion> e: regions.entrySet()) {
if (!e.getValue().getRegionInfo().isMetaRegion()) {
region = e.getValue();
}
}
} catch (Exception e) {
e.printStackTrace();
IOException iox = new IOException("error finding region");
iox.initCause(e);
throw iox;
}
@SuppressWarnings("null")
HScannerInterface scanner =
region.getScanner(columns, startRow, System.currentTimeMillis(), null);
try {
verify(scanner);
} finally {
scanner.close();
}
scanner = table.obtainScanner(columns, startRow);
try {
verify(scanner);
} finally {
scanner.close();
}
scanner = table.obtainScanner(columns, startRow);
try {
for (Iterator<Map.Entry<HStoreKey, SortedMap<Text, byte[]>>> iterator =
scanner.iterator();
iterator.hasNext();
) {
Map.Entry<HStoreKey, SortedMap<Text, byte[]>> row = iterator.next();
HStoreKey key = row.getKey();
assertTrue("row key", values.containsKey(key.getRow()));
SortedMap<Text, byte[]> results = row.getValue();
SortedMap<Text, byte[]> columnValues = values.get(key.getRow());
assertEquals(columnValues.size(), results.size());
for (Map.Entry<Text, byte[]> e: columnValues.entrySet()) {
Text column = e.getKey();
assertTrue("column", results.containsKey(column));
assertTrue("value", Arrays.equals(columnValues.get(column),
results.get(column)));
}
}
} finally {
scanner.close();
}
}
private void verify(HScannerInterface scanner) throws IOException {
HStoreKey key = new HStoreKey();
SortedMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
while (scanner.next(key, results)) {
Text row = key.getRow();
assertTrue("row key", values.containsKey(row));
SortedMap<Text, byte[]> columnValues = values.get(row);
assertEquals(columnValues.size(), results.size());
for (Map.Entry<Text, byte[]> e: columnValues.entrySet()) {
Text column = e.getKey();
assertTrue("column", results.containsKey(column));
assertTrue("value", Arrays.equals(columnValues.get(column),
results.get(column)));
}
results.clear();
}
}
}

View File

@ -228,7 +228,7 @@ public class TestSplit extends MultiRegionTable {
final Text firstValue) final Text firstValue)
throws IOException { throws IOException {
Text [] cols = new Text[] {new Text(column)}; Text [] cols = new Text[] {new Text(column)};
HInternalScannerInterface s = r.getScanner(cols, HScannerInterface s = r.getScanner(cols,
HConstants.EMPTY_START_ROW, System.currentTimeMillis(), null); HConstants.EMPTY_START_ROW, System.currentTimeMillis(), null);
try { try {
HStoreKey curKey = new HStoreKey(); HStoreKey curKey = new HStoreKey();