private Object readResolve() { // Look up the name to ensure singletons // This is not entirely safe; see Effective Java, 2nd Ed., #77 for details. // However, we do not depend on singletons for security, only for correctness. // It is acceptable if malicious serialization streams result in broken objects. return forName(name); }
/** * Parse an entity derivation from a JSON node. * @param node The JSON node. * @return The entity derivation. */ public static EntityDerivation fromJSON(JsonNode node) { JsonNode src = node.get("source_type"); Preconditions.checkArgument(src != null, "missing source_type"); JsonNode tgt = node.get("entity_type"); Preconditions.checkArgument(tgt != null, "missing entity_type"); JsonNode attr = node.get("source_attribute"); Preconditions.checkArgument(attr != null, "missing source_attribute"); return create(EntityType.forName(tgt.asText()), EntityType.forName(src.asText()), TypedName.create(attr.asText(), Long.class)); } }
/** * {@inheritDoc} * * This implementation uses an internal {@link Query} implementation to prepare queries for * {@link #streamEntities(EntityQuery)} and {@link #streamEntityGroups(EntityQuery, TypedName)}. */ @Override public <V extends Entity> Query<V> query(Class<V> type) { EntityType etype = viewClassCache.get(type); if (etype == null) { DefaultEntityType det = type.getAnnotation(DefaultEntityType.class); if (det == null) { throw new IllegalArgumentException(type + " has no default entity type annotation"); } etype = EntityType.forName(det.value()); viewClassCache.put(type, etype); } return new JavaQuery<>(this, etype, type); }
@Test public void testCreate() { EntityType wombat = EntityType.forName("wombat"); EntityType wombat2 = EntityType.forName("wombat"); EntityType woozle = EntityType.forName("woozle"); assertThat(wombat.getName(), equalTo("wombat")); assertThat(woozle.getName(), equalTo("woozle")); assertThat(wombat, equalTo(wombat2)); assertThat(wombat, sameInstance(wombat2)); assertThat(wombat, not(equalTo(woozle))); }
@Test public void testMissingDefaults() { EntityDefaults defaults = EntityDefaults.lookup(EntityType.forName("wombat")); // we don't know anything about wombats assertThat(defaults, nullValue()); }
public Entity likeBatch(long uid, long iid, int count) { return Entities.newBuilder(EntityType.forName("like-batch")) .setId(++entityId) .setLongAttribute(CommonAttributes.USER_ID, uid) .setLongAttribute(CommonAttributes.ITEM_ID, iid) .setAttribute(CommonAttributes.COUNT, count) .build(); } }
@Test public void testLoadRatingsDeriveBobcats() throws IOException, URISyntaxException { URI baseURI = TextEntitySourceTest.class.getResource("ratings.csv").toURI(); JsonNode node = reader.readTree("[{\"file\": \"ratings.csv\", \"format\": \"csv\"}, {\"type\": \"derived\", \"source_type\": \"rating\", \"entity_type\": \"bobcat\", \"source_attribute\": \"item\"}]"); StaticDataSource daoProvider = StaticDataSource.fromJSON(node, baseURI); // we should have one text source for ratings; derived aren't sources assertThat(daoProvider.getSourcesForType(CommonTypes.RATING), contains(instanceOf(TextEntitySource.class))); DataAccessObject dao = daoProvider.get(); verifyRatingsCsvData(dao, EntityType.forName("bobcat")); // we should have have a bunch of bobcats LongSet bobcats = dao.getEntityIds(EntityType.forName("bobcat")); assertThat(bobcats, equalTo(dao.getEntityIds(CommonTypes.ITEM))); }
@Test public void testBaseIdConfig() throws IOException { JsonNode node = reader.readTree("{\"file\": \"ratings.tsv\", \"base_id\": 100}"); TextEntitySource fr = TextEntitySource.fromJSON("test", node, Paths.get("").toUri()); assertThat(fr, notNullValue()); assertThat(fr.getURL(), equalTo(Paths.get("ratings.tsv").toUri().toURL())); assertThat(fr.getFormat(), instanceOf(DelimitedColumnEntityFormat.class)); DelimitedColumnEntityFormat format = (DelimitedColumnEntityFormat) fr.getFormat(); assertThat(format.getDelimiter(), equalTo("\t")); assertThat(format.getEntityType(), equalTo(EntityType.forName("rating"))); assertThat(format.getBaseId(), equalTo(100L)); assertThat(format.getEntityBuilder(), equalTo((Class) RatingBuilder.class)); }
@Test public void testRatingDefaults() { EntityDefaults defaults = EntityDefaults.lookup(EntityType.forName("rating")); assertThat(defaults, notNullValue()); assertThat(defaults.getEntityType(), equalTo(EntityType.forName("rating"))); assertThat(defaults.getCommonAttributes(), containsInAnyOrder((TypedName) CommonAttributes.USER_ID, CommonAttributes.ITEM_ID, CommonAttributes.RATING, CommonAttributes.TIMESTAMP)); assertThat(defaults.getDefaultColumns(), contains((TypedName) CommonAttributes.USER_ID, CommonAttributes.ITEM_ID, CommonAttributes.RATING, CommonAttributes.TIMESTAMP)); // FIXME Re-enable this assert when rating builders work // assertThat(defaults.getDefaultBuilder(), // equalTo((Class) RatingBuilder.class)); } }
@Test public void testWeirdDelimiterConfig() throws IOException { JsonNode node = reader.readTree("{\"file\": \"ratings.dat\", \"format\": \"delimited\", \"delimiter\": \"::\"}"); TextEntitySource fr = TextEntitySource.fromJSON("test", node, Paths.get("").toUri()); assertThat(fr, notNullValue()); assertThat(fr.getURL(), equalTo(Paths.get("ratings.dat").toUri().toURL())); assertThat(fr.getFormat(), instanceOf(DelimitedColumnEntityFormat.class)); DelimitedColumnEntityFormat format = (DelimitedColumnEntityFormat) fr.getFormat(); assertThat(format.getDelimiter(), equalTo("::")); assertThat(format.getEntityType(), equalTo(EntityType.forName("rating"))); assertThat(format.getHeaderLines(), equalTo(0)); assertThat(format.usesHeader(), equalTo(false)); assertThat(format.getEntityBuilder(), equalTo((Class) RatingBuilder.class)); }
@Test public void testSkipHeaderConfig() throws IOException { JsonNode node = reader.readTree("{\"file\": \"ratings.tsv\", \"header\": 2}"); TextEntitySource fr = TextEntitySource.fromJSON("test", node, Paths.get("").toUri()); assertThat(fr, notNullValue()); assertThat(fr.getURL(), equalTo(Paths.get("ratings.tsv").toUri().toURL())); assertThat(fr.getFormat(), instanceOf(DelimitedColumnEntityFormat.class)); DelimitedColumnEntityFormat format = (DelimitedColumnEntityFormat) fr.getFormat(); assertThat(format.getDelimiter(), equalTo("\t")); assertThat(format.getEntityType(), equalTo(EntityType.forName("rating"))); assertThat(format.getHeaderLines(), equalTo(2)); assertThat(format.usesHeader(), equalTo(false)); assertThat(format.getEntityBuilder(), equalTo((Class) RatingBuilder.class)); }
@Test public void testHeaderConfig() throws IOException { JsonNode node = reader.readTree("{\"file\": \"ratings.tsv\", \"header\": true}"); TextEntitySource fr = TextEntitySource.fromJSON("test", node, Paths.get("").toUri()); assertThat(fr, notNullValue()); assertThat(fr.getURL(), equalTo(Paths.get("ratings.tsv").toUri().toURL())); assertThat(fr.getFormat(), instanceOf(DelimitedColumnEntityFormat.class)); DelimitedColumnEntityFormat format = (DelimitedColumnEntityFormat) fr.getFormat(); assertThat(format.getDelimiter(), equalTo("\t")); assertThat(format.getEntityType(), equalTo(EntityType.forName("rating"))); assertThat(format.getHeaderLines(), equalTo(1)); assertThat(format.usesHeader(), equalTo(true)); assertThat(format.getEntityBuilder(), equalTo((Class) RatingBuilder.class)); }
@Test public void testSerialize() { EntityType wombat = EntityType.forName("wombat"); EntityType cloned = SerializationUtils.clone(wombat); assertThat(cloned, sameInstance(wombat)); }
@Test public void testMinimalCSVConfig() throws IOException { JsonNode node = reader.readTree("{\"file\": \"ratings.csv\", \"format\": \"csv\"}"); TextEntitySource fr = TextEntitySource.fromJSON("test", node, Paths.get("").toUri()); assertThat(fr, notNullValue()); assertThat(fr.getURL(), equalTo(Paths.get("ratings.csv").toUri().toURL())); assertThat(fr.getFormat(), instanceOf(DelimitedColumnEntityFormat.class)); DelimitedColumnEntityFormat format = (DelimitedColumnEntityFormat) fr.getFormat(); assertThat(format.getDelimiter(), equalTo(",")); assertThat(format.getEntityType(), equalTo(EntityType.forName("rating"))); assertThat(format.getHeaderLines(), equalTo(0)); assertThat(format.usesHeader(), equalTo(false)); assertThat(format.getBaseId(), equalTo(0L)); assertThat(format.getEntityBuilder(), equalTo((Class) RatingBuilder.class)); }
@Test public void testDefaults() { DelimitedColumnEntityFormat format = new DelimitedColumnEntityFormat(); assertThat(format.getHeaderLines(), equalTo(0)); assertThat(format.usesHeader(), equalTo(false)); assertThat(format.getDelimiter(), equalTo("\t")); assertThat(format.getEntityType(), equalTo(EntityType.forName("rating"))); assertThat(format.getEntityBuilder(), equalTo((Class) BasicEntityBuilder.class)); }
@Test public void testMinimalTSVConfig() throws IOException { JsonNode node = reader.readTree("{\"file\": \"ratings.tsv\", \"name\": \"woozle\"}"); EntitySource raw = EntitySources.fromJSON(node, Paths.get("").toUri()); assertThat(raw, notNullValue()); assertThat(raw, instanceOf(TextEntitySource.class)); TextEntitySource src = (TextEntitySource) raw; assertThat(src.getName(), equalTo("woozle")); assertThat(src.getURL(), equalTo(Paths.get("ratings.tsv").toUri().toURL())); assertThat(src.getFormat(), instanceOf(DelimitedColumnEntityFormat.class)); DelimitedColumnEntityFormat format = (DelimitedColumnEntityFormat) src.getFormat(); assertThat(format.getDelimiter(), equalTo("\t")); assertThat(format.getEntityType(), equalTo(EntityType.forName("rating"))); assertThat(format.getHeaderLines(), equalTo(0)); assertThat(format.usesHeader(), equalTo(false)); assertThat(format.getEntityBuilder(), equalTo((Class) RatingBuilder.class)); EntitySource.Layout layout = src.getLayout(); assertThat(layout, notNullValue()); assertThat(layout.getEntityType(), equalTo(CommonTypes.RATING)); assertThat(layout.getAttributes(), containsInAnyOrder(CommonAttributes.ENTITY_ID, CommonAttributes.USER_ID, CommonAttributes.ITEM_ID, CommonAttributes.RATING, CommonAttributes.TIMESTAMP)); }
@Test public void testBasicParseLine() { DelimitedColumnEntityFormat format = new DelimitedColumnEntityFormat(); format.setDelimiter(","); EntityType pcType = EntityType.forName("pop_count"); format.setEntityType(pcType); format.addColumn(CommonAttributes.ITEM_ID); format.addColumn(CommonAttributes.COUNT); assertThat(format.getAttributes(), containsInAnyOrder(CommonAttributes.ENTITY_ID, CommonAttributes.ITEM_ID, CommonAttributes.COUNT)); LineEntityParser parser = format.makeParser(Collections.<String>emptyList()); assertThat(parser, notNullValue()); Entity pc = parser.parse("42,10"); assertThat(pc, notNullValue()); assertThat(pc.getId(), equalTo(1L)); assertThat(pc.get(CommonAttributes.ITEM_ID), equalTo(42L)); assertThat(pc.get(CommonAttributes.COUNT), equalTo(10)); // make sure the ID (row count) advances pc = parser.parse("78,2"); assertThat(pc, notNullValue()); assertThat(pc.getId(), equalTo(2L)); assertThat(pc.get(CommonAttributes.ITEM_ID), equalTo(78L)); assertThat(pc.get(CommonAttributes.COUNT), equalTo(2)); }
format.setDelimiter(","); EntityType pcType = EntityType.forName("pop_count"); format.setEntityType(pcType);
@Test public void testParseLineWithBaseId() { DelimitedColumnEntityFormat format = new DelimitedColumnEntityFormat(); format.setDelimiter(","); format.setBaseId(42); EntityType pcType = EntityType.forName("pop_count"); format.setEntityType(pcType); format.addColumn(CommonAttributes.ITEM_ID); format.addColumn(CommonAttributes.COUNT); assertThat(format.getAttributes(), containsInAnyOrder(CommonAttributes.ENTITY_ID, CommonAttributes.ITEM_ID, CommonAttributes.COUNT)); LineEntityParser parser = format.makeParser(Collections.<String>emptyList()); assertThat(parser, notNullValue()); Entity pc = parser.parse("42,10"); assertThat(pc, notNullValue()); assertThat(pc.getId(), equalTo(43L)); assertThat(pc.get(CommonAttributes.ITEM_ID), equalTo(42L)); assertThat(pc.get(CommonAttributes.COUNT), equalTo(10)); // make sure the ID (row count) advances pc = parser.parse("78,2"); assertThat(pc, notNullValue()); assertThat(pc.getId(), equalTo(44L)); assertThat(pc.get(CommonAttributes.ITEM_ID), equalTo(78L)); assertThat(pc.get(CommonAttributes.COUNT), equalTo(2)); }