mirror of https://github.com/apache/nifi.git
NIFI-505: Initial import of language translation nar
This commit is contained in:
parent
ff0bd2c669
commit
178c5cd287
|
@ -0,0 +1,36 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-language-translation-bundle</artifactId>
|
||||||
|
<version>0.1.0-incubating-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>nifi-language-translation-nar</artifactId>
|
||||||
|
<packaging>nar</packaging>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-yandex-processors</artifactId>
|
||||||
|
<version>0.1.0-incubating-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
1
nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/.gitignore
vendored
Normal file
1
nifi/nifi-nar-bundles/nifi-language-translation-bundle/nifi-yandex-processors/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
/target/
|
|
@ -0,0 +1,63 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-language-translation-bundle</artifactId>
|
||||||
|
<version>0.1.0-incubating-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>nifi-yandex-processors</artifactId>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-processor-utils</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.sun.jersey</groupId>
|
||||||
|
<artifactId>jersey-client</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.sun.jersey</groupId>
|
||||||
|
<artifactId>jersey-json</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-mock</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-simple</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
|
@ -0,0 +1,325 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.yandex;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import javax.ws.rs.core.MediaType;
|
||||||
|
import javax.ws.rs.core.MultivaluedMap;
|
||||||
|
|
||||||
|
import org.apache.nifi.annotation.behavior.DynamicProperty;
|
||||||
|
import org.apache.nifi.annotation.behavior.SupportsBatching;
|
||||||
|
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||||
|
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||||
|
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||||
|
import org.apache.nifi.annotation.documentation.Tags;
|
||||||
|
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||||
|
import org.apache.nifi.annotation.lifecycle.OnStopped;
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
|
import org.apache.nifi.components.ValidationContext;
|
||||||
|
import org.apache.nifi.components.ValidationResult;
|
||||||
|
import org.apache.nifi.components.Validator;
|
||||||
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
|
import org.apache.nifi.processor.AbstractProcessor;
|
||||||
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
|
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||||
|
import org.apache.nifi.processor.Relationship;
|
||||||
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.processor.io.InputStreamCallback;
|
||||||
|
import org.apache.nifi.processor.io.OutputStreamCallback;
|
||||||
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
|
import org.apache.nifi.processors.yandex.model.Translation;
|
||||||
|
import org.apache.nifi.processors.yandex.util.Languages;
|
||||||
|
import org.apache.nifi.processors.yandex.util.ObjectMapperResolver;
|
||||||
|
import org.apache.nifi.stream.io.StreamUtils;
|
||||||
|
import org.apache.nifi.util.StopWatch;
|
||||||
|
|
||||||
|
import com.sun.jersey.api.client.Client;
|
||||||
|
import com.sun.jersey.api.client.ClientResponse;
|
||||||
|
import com.sun.jersey.api.client.ClientResponse.Status;
|
||||||
|
import com.sun.jersey.api.client.WebResource;
|
||||||
|
import com.sun.jersey.api.client.config.ClientConfig;
|
||||||
|
import com.sun.jersey.api.client.config.DefaultClientConfig;
|
||||||
|
import com.sun.jersey.api.json.JSONConfiguration;
|
||||||
|
import com.sun.jersey.core.util.MultivaluedMapImpl;
|
||||||
|
|
||||||
|
@SupportsBatching
|
||||||
|
@Tags({"yandex", "translate", "translation", "language"})
|
||||||
|
@CapabilityDescription("Translates content and attributes from one language to another")
|
||||||
|
@WritesAttributes({
|
||||||
|
@WritesAttribute(attribute="yandex.translate.failure.reason", description="If the text cannot be translated, this attribute will be set indicating the reason for the failure"),
|
||||||
|
@WritesAttribute(attribute="language", description="When the translation succeeds, if the content was translated, this attribute will be set indicating the new language of the content")
|
||||||
|
})
|
||||||
|
@DynamicProperty(name="The name of an attribute to set that will contain the translated text of the value",
|
||||||
|
value="The value to translate",
|
||||||
|
supportsExpressionLanguage=true,
|
||||||
|
description="User-defined properties are used to translate arbitrary text based on attributes.")
|
||||||
|
public class YandexTranslate extends AbstractProcessor {
|
||||||
|
|
||||||
|
public static final PropertyDescriptor KEY = new PropertyDescriptor.Builder()
|
||||||
|
.name("Yandex API Key")
|
||||||
|
.description("The API Key that is registered with Yandex")
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.required(true)
|
||||||
|
.build();
|
||||||
|
public static final PropertyDescriptor SOURCE_LANGUAGE = new PropertyDescriptor.Builder()
|
||||||
|
.name("Input Language")
|
||||||
|
.description("The language of incoming data")
|
||||||
|
.required(true)
|
||||||
|
.defaultValue("sp")
|
||||||
|
.expressionLanguageSupported(true)
|
||||||
|
.addValidator(new LanguageNameValidator())
|
||||||
|
.build();
|
||||||
|
public static final PropertyDescriptor TARGET_LANGUAGE = new PropertyDescriptor.Builder()
|
||||||
|
.name("Target Language")
|
||||||
|
.description("The language to translate the text into")
|
||||||
|
.required(true)
|
||||||
|
.defaultValue("en")
|
||||||
|
.expressionLanguageSupported(true)
|
||||||
|
.addValidator(new LanguageNameValidator())
|
||||||
|
.build();
|
||||||
|
public static final PropertyDescriptor TRANSLATE_CONTENT = new PropertyDescriptor.Builder()
|
||||||
|
.name("Translate Content")
|
||||||
|
.description("Specifies whether or not the content should be translated. If false, only the text specified by user-defined properties will be translated.")
|
||||||
|
.required(true)
|
||||||
|
.allowableValues("true", "false")
|
||||||
|
.defaultValue("false")
|
||||||
|
.build();
|
||||||
|
public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder()
|
||||||
|
.name("Character Set")
|
||||||
|
.description("Specifies the character set of the data to be translated")
|
||||||
|
.required(true)
|
||||||
|
.defaultValue("UTF-8")
|
||||||
|
.expressionLanguageSupported(true)
|
||||||
|
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||||
|
.name("success")
|
||||||
|
.description("This relationship is used when the translation is successful")
|
||||||
|
.build();
|
||||||
|
public static final Relationship REL_COMMS_FAILURE = new Relationship.Builder()
|
||||||
|
.name("comms.failure")
|
||||||
|
.description("This relationship is used when the translation fails due to a problem such as a network failure, and for which the translation should be attempted again")
|
||||||
|
.build();
|
||||||
|
public static final Relationship REL_TRANSLATION_FAILED = new Relationship.Builder()
|
||||||
|
.name("translation.failure")
|
||||||
|
.description("This relationship is used if the translation cannot be performed for some reason other than communications failure")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
private List<PropertyDescriptor> descriptors;
|
||||||
|
private Set<Relationship> relationships;
|
||||||
|
|
||||||
|
private volatile Client client;
|
||||||
|
|
||||||
|
private static final String URL = "https://translate.yandex.net/api/v1.5/tr.json/translate";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void init(final ProcessorInitializationContext context) {
|
||||||
|
final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>();
|
||||||
|
descriptors.add(KEY);
|
||||||
|
descriptors.add(SOURCE_LANGUAGE);
|
||||||
|
descriptors.add(TARGET_LANGUAGE);
|
||||||
|
descriptors.add(TRANSLATE_CONTENT);
|
||||||
|
descriptors.add(CHARACTER_SET);
|
||||||
|
this.descriptors = Collections.unmodifiableList(descriptors);
|
||||||
|
|
||||||
|
final Set<Relationship> relationships = new HashSet<Relationship>();
|
||||||
|
relationships.add(REL_SUCCESS);
|
||||||
|
relationships.add(REL_COMMS_FAILURE);
|
||||||
|
relationships.add(REL_TRANSLATION_FAILED);
|
||||||
|
this.relationships = Collections.unmodifiableSet(relationships);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Relationship> getRelationships() {
|
||||||
|
return this.relationships;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||||
|
return descriptors;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
|
||||||
|
return new PropertyDescriptor.Builder()
|
||||||
|
.name(propertyDescriptorName)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(true)
|
||||||
|
.dynamic(true)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
|
||||||
|
final List<ValidationResult> results = new ArrayList<>();
|
||||||
|
if ( validationContext.getProperty(TRANSLATE_CONTENT).asBoolean().equals(Boolean.FALSE) ) {
|
||||||
|
boolean foundDynamic = false;
|
||||||
|
for ( final PropertyDescriptor descriptor : validationContext.getProperties().keySet() ) {
|
||||||
|
if ( descriptor.isDynamic() ) {
|
||||||
|
foundDynamic = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( !foundDynamic ) {
|
||||||
|
results.add(new ValidationResult.Builder().subject("Text to translate").input("<none>").valid(false).explanation("Must either set 'Translate Content' to true or add at least one user-defined property").build());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
@OnScheduled
|
||||||
|
public void onScheduled(final ProcessContext context) {
|
||||||
|
final ClientConfig config = new DefaultClientConfig();
|
||||||
|
config.getFeatures().put(JSONConfiguration.FEATURE_POJO_MAPPING, Boolean.TRUE);
|
||||||
|
config.getClasses().add(ObjectMapperResolver.class);
|
||||||
|
|
||||||
|
client = Client.create(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
@OnStopped
|
||||||
|
public void destroyClient() {
|
||||||
|
if ( client != null ) {
|
||||||
|
client.destroy();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
|
||||||
|
FlowFile flowFile = session.get();
|
||||||
|
if ( flowFile == null ) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final StopWatch stopWatch = new StopWatch(true);
|
||||||
|
final String key = context.getProperty(KEY).getValue();
|
||||||
|
final String sourceLanguage = context.getProperty(SOURCE_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
|
||||||
|
final String targetLanguage = context.getProperty(TARGET_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
|
||||||
|
final String encoding = context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue();
|
||||||
|
|
||||||
|
final List<String> attributeNames = new ArrayList<>();
|
||||||
|
final List<String> textValues = new ArrayList<>();
|
||||||
|
for ( final PropertyDescriptor descriptor : context.getProperties().keySet() ) {
|
||||||
|
if ( descriptor.isDynamic() ) {
|
||||||
|
attributeNames.add(descriptor.getName()); // add to list so that we know the order when the translations come back.
|
||||||
|
textValues.add(context.getProperty(descriptor).evaluateAttributeExpressions(flowFile).getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( context.getProperty(TRANSLATE_CONTENT).asBoolean() ) {
|
||||||
|
final byte[] buff = new byte[(int) flowFile.getSize()];
|
||||||
|
session.read(flowFile, new InputStreamCallback() {
|
||||||
|
@Override
|
||||||
|
public void process(final InputStream in) throws IOException {
|
||||||
|
StreamUtils.fillBuffer(in, buff);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
final String content = new String(buff, Charset.forName(encoding));
|
||||||
|
textValues.add(content);
|
||||||
|
}
|
||||||
|
|
||||||
|
WebResource webResource = client.resource(URL);
|
||||||
|
|
||||||
|
final MultivaluedMap<String, String> paramMap = new MultivaluedMapImpl();
|
||||||
|
paramMap.put("text", textValues);
|
||||||
|
paramMap.add("key", key);
|
||||||
|
paramMap.add("lang", sourceLanguage + "-" + targetLanguage);
|
||||||
|
|
||||||
|
WebResource.Builder builder = webResource
|
||||||
|
.accept(MediaType.APPLICATION_JSON)
|
||||||
|
.type(MediaType.APPLICATION_FORM_URLENCODED);
|
||||||
|
builder = builder.entity(paramMap);
|
||||||
|
|
||||||
|
final ClientResponse response;
|
||||||
|
try {
|
||||||
|
response = builder.post(ClientResponse.class);
|
||||||
|
} catch (final Exception e) {
|
||||||
|
getLogger().error("Failed to make request to Yandex to transate text for {} due to {}; routing to comms.failure", new Object[] {flowFile, e});
|
||||||
|
session.transfer(flowFile, REL_COMMS_FAILURE);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( response.getStatus() != Status.OK.getStatusCode() ) {
|
||||||
|
getLogger().error("Failed to translate text using Yandex for {}; response was {}: {}; routing to {}", new Object[] {
|
||||||
|
flowFile, response.getStatus(), response.getStatusInfo().getReasonPhrase(), REL_TRANSLATION_FAILED.getName()});
|
||||||
|
flowFile = session.putAttribute(flowFile, "yandex.translate.failure.reason", response.getStatusInfo().getReasonPhrase());
|
||||||
|
session.transfer(flowFile, REL_TRANSLATION_FAILED);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final Map<String, String> newAttributes = new HashMap<>();
|
||||||
|
final Translation translation = response.getEntity(Translation.class);
|
||||||
|
final List<String> texts = translation.getText();
|
||||||
|
for (int i=0; i < texts.size(); i++) {
|
||||||
|
final String text = texts.get(i);
|
||||||
|
if ( i < attributeNames.size() ) {
|
||||||
|
final String attributeName = attributeNames.get(i);
|
||||||
|
newAttributes.put(attributeName, text);
|
||||||
|
} else {
|
||||||
|
flowFile = session.write(flowFile, new OutputStreamCallback() {
|
||||||
|
@Override
|
||||||
|
public void process(final OutputStream out) throws IOException {
|
||||||
|
out.write(text.getBytes(encoding));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
newAttributes.put("language", targetLanguage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( !newAttributes.isEmpty() ) {
|
||||||
|
flowFile = session.putAllAttributes(flowFile, newAttributes);
|
||||||
|
}
|
||||||
|
|
||||||
|
stopWatch.stop();
|
||||||
|
session.transfer(flowFile, REL_SUCCESS);
|
||||||
|
getLogger().info("Successfully translated {} items for {} from {} to {} in {}; routing to success", new Object[] {texts.size(), flowFile, sourceLanguage, targetLanguage, stopWatch.getDuration()});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static class LanguageNameValidator implements Validator {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
|
||||||
|
if ( context.isExpressionLanguagePresent(input) ) {
|
||||||
|
return new ValidationResult.Builder().subject(subject).input(input).valid(true).explanation("Expression Language Present").build();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( Languages.getLanguageMap().keySet().contains(input.toLowerCase()) ) {
|
||||||
|
return new ValidationResult.Builder().subject(subject).input(input).valid(true).build();
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ValidationResult.Builder().subject(subject).input(input).valid(false).explanation(input + " is not a language that is supported by Yandex").build();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,52 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.yandex.model;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import javax.xml.bind.annotation.XmlRootElement;
|
||||||
|
|
||||||
|
@XmlRootElement(name = "translation")
|
||||||
|
public class Translation {
|
||||||
|
private int code;
|
||||||
|
private String lang;
|
||||||
|
private List<String> text;
|
||||||
|
|
||||||
|
public int getCode() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCode(final int code) {
|
||||||
|
this.code = code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLang() {
|
||||||
|
return lang;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLang(final String lang) {
|
||||||
|
this.lang = lang;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getText() {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setText(final List<String> text) {
|
||||||
|
this.text = text;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.yandex.util;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class Languages {
|
||||||
|
private static final Map<String, String> languageAbbreviationMap = new HashMap<>();
|
||||||
|
|
||||||
|
static {
|
||||||
|
languageAbbreviationMap.put("ar", "arabic");
|
||||||
|
languageAbbreviationMap.put("az", "azerbaijani");
|
||||||
|
languageAbbreviationMap.put("be", "belarusian");
|
||||||
|
languageAbbreviationMap.put("bg", "bulgarian");
|
||||||
|
languageAbbreviationMap.put("bs", "bosnian");
|
||||||
|
languageAbbreviationMap.put("ca", "catalan");
|
||||||
|
languageAbbreviationMap.put("cs", "czech");
|
||||||
|
languageAbbreviationMap.put("da", "danish");
|
||||||
|
languageAbbreviationMap.put("de", "german");
|
||||||
|
languageAbbreviationMap.put("el", "greek");
|
||||||
|
languageAbbreviationMap.put("en", "english");
|
||||||
|
languageAbbreviationMap.put("es", "spanish");
|
||||||
|
languageAbbreviationMap.put("et", "estonian");
|
||||||
|
languageAbbreviationMap.put("fi", "finnish");
|
||||||
|
languageAbbreviationMap.put("fr", "french");
|
||||||
|
languageAbbreviationMap.put("he", "hebrew");
|
||||||
|
languageAbbreviationMap.put("hr", "croatian");
|
||||||
|
languageAbbreviationMap.put("hu", "hungarian");
|
||||||
|
languageAbbreviationMap.put("hy", "armenian");
|
||||||
|
languageAbbreviationMap.put("id", "indonesian");
|
||||||
|
languageAbbreviationMap.put("is", "icelandic");
|
||||||
|
languageAbbreviationMap.put("it", "italian");
|
||||||
|
languageAbbreviationMap.put("ja", "japanese");
|
||||||
|
languageAbbreviationMap.put("ka", "georgian");
|
||||||
|
languageAbbreviationMap.put("ko", "korean");
|
||||||
|
languageAbbreviationMap.put("lt", "lithuanian");
|
||||||
|
languageAbbreviationMap.put("lv", "latvian");
|
||||||
|
languageAbbreviationMap.put("mk", "macedonian");
|
||||||
|
languageAbbreviationMap.put("ms", "malay");
|
||||||
|
languageAbbreviationMap.put("mt", "maltese");
|
||||||
|
languageAbbreviationMap.put("nl", "dutch");
|
||||||
|
languageAbbreviationMap.put("no", "norwegian");
|
||||||
|
languageAbbreviationMap.put("pl", "polish");
|
||||||
|
languageAbbreviationMap.put("pt", "portuguese");
|
||||||
|
languageAbbreviationMap.put("ro", "romanian");
|
||||||
|
languageAbbreviationMap.put("ru", "russian");
|
||||||
|
languageAbbreviationMap.put("sk", "slovak");
|
||||||
|
languageAbbreviationMap.put("sl", "slovenian");
|
||||||
|
languageAbbreviationMap.put("sq", "albanian");
|
||||||
|
languageAbbreviationMap.put("sr", "serbian");
|
||||||
|
languageAbbreviationMap.put("sv", "swedish");
|
||||||
|
languageAbbreviationMap.put("th", "thai");
|
||||||
|
languageAbbreviationMap.put("tr", "turkish");
|
||||||
|
languageAbbreviationMap.put("uk", "ukrainian");
|
||||||
|
languageAbbreviationMap.put("vi", "vietnamese");
|
||||||
|
languageAbbreviationMap.put("zh", "chinese");
|
||||||
|
|
||||||
|
final Map<String, String> reverseMap = new HashMap<>();
|
||||||
|
for ( final Map.Entry<String, String> entry : languageAbbreviationMap.entrySet() ) {
|
||||||
|
reverseMap.put(entry.getValue(), entry.getKey());
|
||||||
|
}
|
||||||
|
|
||||||
|
languageAbbreviationMap.putAll(reverseMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static Map<String, String> getLanguageMap() {
|
||||||
|
return Collections.unmodifiableMap(languageAbbreviationMap);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.yandex.util;
|
||||||
|
|
||||||
|
import javax.ws.rs.ext.ContextResolver;
|
||||||
|
import javax.ws.rs.ext.Provider;
|
||||||
|
import org.codehaus.jackson.map.AnnotationIntrospector;
|
||||||
|
import org.codehaus.jackson.map.DeserializationConfig;
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
import org.codehaus.jackson.map.SerializationConfig;
|
||||||
|
import org.codehaus.jackson.map.annotate.JsonSerialize.Inclusion;
|
||||||
|
import org.codehaus.jackson.xc.JaxbAnnotationIntrospector;
|
||||||
|
|
||||||
|
@Provider
|
||||||
|
public class ObjectMapperResolver implements ContextResolver<ObjectMapper> {
|
||||||
|
|
||||||
|
private final ObjectMapper mapper;
|
||||||
|
|
||||||
|
public ObjectMapperResolver() throws Exception {
|
||||||
|
mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
final AnnotationIntrospector jaxbIntrospector = new JaxbAnnotationIntrospector();
|
||||||
|
final SerializationConfig serializationConfig = mapper.getSerializationConfig();
|
||||||
|
final DeserializationConfig deserializationConfig = mapper.getDeserializationConfig();
|
||||||
|
|
||||||
|
mapper.setSerializationConfig(serializationConfig.withSerializationInclusion(Inclusion.NON_NULL).withAnnotationIntrospector(jaxbIntrospector));
|
||||||
|
mapper.setDeserializationConfig(deserializationConfig.withAnnotationIntrospector(jaxbIntrospector));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ObjectMapper getContext(Class<?> objectType) {
|
||||||
|
return mapper;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
org.apache.nifi.processors.yandex.YandexTranslate
|
|
@ -0,0 +1,141 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.yandex;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.apache.nifi.processors.yandex.YandexTranslate;
|
||||||
|
import org.apache.nifi.util.MockFlowFile;
|
||||||
|
import org.apache.nifi.util.TestRunner;
|
||||||
|
import org.apache.nifi.util.TestRunners;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Ignore;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@Ignore("For local testing only; requires local file to be populated with Yandex API Key")
|
||||||
|
public class TestYandexTranslate {
|
||||||
|
|
||||||
|
private TestRunner testRunner;
|
||||||
|
private String apiKey;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void init() throws IOException {
|
||||||
|
testRunner = TestRunners.newTestRunner(YandexTranslate.class);
|
||||||
|
|
||||||
|
final Properties properties = new Properties();
|
||||||
|
try (final InputStream in = new FileInputStream(new File("C:/dev/notes/yandex-info.txt"))) {
|
||||||
|
properties.load(in);
|
||||||
|
}
|
||||||
|
apiKey = properties.getProperty("api_key").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTranslateContent() {
|
||||||
|
testRunner.setProperty(YandexTranslate.KEY, apiKey);
|
||||||
|
testRunner.setProperty(YandexTranslate.SOURCE_LANGUAGE, "fr");
|
||||||
|
testRunner.setProperty(YandexTranslate.TARGET_LANGUAGE, "en");
|
||||||
|
testRunner.setProperty(YandexTranslate.TRANSLATE_CONTENT, "true");
|
||||||
|
testRunner.setProperty(YandexTranslate.CHARACTER_SET, "UTF-8");
|
||||||
|
|
||||||
|
testRunner.enqueue("bonjour".getBytes());
|
||||||
|
testRunner.run();
|
||||||
|
|
||||||
|
testRunner.assertAllFlowFilesTransferred(YandexTranslate.REL_SUCCESS, 1);
|
||||||
|
final MockFlowFile out = testRunner.getFlowFilesForRelationship(YandexTranslate.REL_SUCCESS).get(0);
|
||||||
|
|
||||||
|
final String outText = new String(out.toByteArray());
|
||||||
|
assertEquals("hello", outText);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTranslateSingleAttribute() {
|
||||||
|
testRunner.setProperty(YandexTranslate.KEY, apiKey);
|
||||||
|
testRunner.setProperty(YandexTranslate.SOURCE_LANGUAGE, "fr");
|
||||||
|
testRunner.setProperty(YandexTranslate.TARGET_LANGUAGE, "en");
|
||||||
|
testRunner.setProperty(YandexTranslate.TRANSLATE_CONTENT, "false");
|
||||||
|
testRunner.setProperty(YandexTranslate.CHARACTER_SET, "UTF-8");
|
||||||
|
testRunner.setProperty("translated", "bonjour");
|
||||||
|
|
||||||
|
testRunner.enqueue(new byte[0]);
|
||||||
|
testRunner.run();
|
||||||
|
|
||||||
|
testRunner.assertAllFlowFilesTransferred(YandexTranslate.REL_SUCCESS, 1);
|
||||||
|
final MockFlowFile out = testRunner.getFlowFilesForRelationship(YandexTranslate.REL_SUCCESS).get(0);
|
||||||
|
|
||||||
|
assertEquals(0, out.toByteArray().length);
|
||||||
|
out.assertAttributeEquals("translated", "hello");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTranslateMultipleAttributes() {
|
||||||
|
testRunner.setProperty(YandexTranslate.KEY, apiKey);
|
||||||
|
testRunner.setProperty(YandexTranslate.SOURCE_LANGUAGE, "fr");
|
||||||
|
testRunner.setProperty(YandexTranslate.TARGET_LANGUAGE, "en");
|
||||||
|
testRunner.setProperty(YandexTranslate.TRANSLATE_CONTENT, "false");
|
||||||
|
testRunner.setProperty(YandexTranslate.CHARACTER_SET, "UTF-8");
|
||||||
|
testRunner.setProperty("hello", "bonjour");
|
||||||
|
testRunner.setProperty("translate", "traduire");
|
||||||
|
testRunner.setProperty("fun", "amusant");
|
||||||
|
|
||||||
|
testRunner.enqueue(new byte[0]);
|
||||||
|
testRunner.run();
|
||||||
|
|
||||||
|
testRunner.assertAllFlowFilesTransferred(YandexTranslate.REL_SUCCESS, 1);
|
||||||
|
final MockFlowFile out = testRunner.getFlowFilesForRelationship(YandexTranslate.REL_SUCCESS).get(0);
|
||||||
|
|
||||||
|
assertEquals(0, out.toByteArray().length);
|
||||||
|
out.assertAttributeEquals("hello", "hello");
|
||||||
|
out.assertAttributeEquals("translate", "translate");
|
||||||
|
out.assertAttributeEquals("fun", "fun");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTranslateContentAndMultipleAttributes() {
|
||||||
|
testRunner.setProperty(YandexTranslate.KEY, apiKey);
|
||||||
|
testRunner.setProperty(YandexTranslate.SOURCE_LANGUAGE, "fr");
|
||||||
|
testRunner.setProperty(YandexTranslate.TARGET_LANGUAGE, "en");
|
||||||
|
testRunner.setProperty(YandexTranslate.TRANSLATE_CONTENT, "true");
|
||||||
|
testRunner.setProperty(YandexTranslate.CHARACTER_SET, "UTF-8");
|
||||||
|
testRunner.setProperty("hello", "bonjour");
|
||||||
|
testRunner.setProperty("translate", "traduire");
|
||||||
|
testRunner.setProperty("fun", "amusant");
|
||||||
|
testRunner.setProperty("nifi", "nifi");
|
||||||
|
|
||||||
|
testRunner.enqueue("ordinateur".getBytes());
|
||||||
|
testRunner.run();
|
||||||
|
|
||||||
|
testRunner.assertAllFlowFilesTransferred(YandexTranslate.REL_SUCCESS, 1);
|
||||||
|
final MockFlowFile out = testRunner.getFlowFilesForRelationship(YandexTranslate.REL_SUCCESS).get(0);
|
||||||
|
|
||||||
|
out.assertContentEquals("computer");
|
||||||
|
|
||||||
|
out.assertAttributeEquals("hello", "hello");
|
||||||
|
out.assertAttributeEquals("translate", "translate");
|
||||||
|
out.assertAttributeEquals("fun", "fun");
|
||||||
|
out.assertAttributeEquals("nifi", "nifi");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-nar-bundles</artifactId>
|
||||||
|
<version>0.1.0-incubating-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>nifi-language-translation-bundle</artifactId>
|
||||||
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
|
<modules>
|
||||||
|
<module>nifi-yandex-processors</module>
|
||||||
|
<module>nifi-language-translation-nar</module>
|
||||||
|
</modules>
|
||||||
|
|
||||||
|
<dependencyManagement>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.sun.jersey</groupId>
|
||||||
|
<artifactId>jersey-client</artifactId>
|
||||||
|
<version>${jersey.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.sun.jersey</groupId>
|
||||||
|
<artifactId>jersey-json</artifactId>
|
||||||
|
<version>${jersey.version}</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</dependencyManagement>
|
||||||
|
|
||||||
|
</project>
|
Loading…
Reference in New Issue