From f323e7b6eb3c25ca1dd74cf118938e5fe8495b65 Mon Sep 17 00:00:00 2001 From: Amrit Kaur Date: Sun, 27 Oct 2019 17:21:10 +0100 Subject: [PATCH 1/7] Adding entity resolution feature to SANSA-ML. The feature uses minhashLSH based approach to identify and link similar entities in two datasets --- ...tyResolution_RDFData_CountVectorizer.scala | 362 ++++++++++++++++++ .../EntityResolution_RDFData_HashingTF.scala | 355 +++++++++++++++++ 2 files changed, 717 insertions(+) create mode 100644 sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_CountVectorizer.scala create mode 100644 sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_HashingTF.scala diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_CountVectorizer.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_CountVectorizer.scala new file mode 100644 index 0000000..27980c5 --- /dev/null +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_CountVectorizer.scala @@ -0,0 +1,362 @@ +package net.sansa_stack.ml.spark.entity_resolution + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.DataFrame +import org.apache.spark.rdd.RDD +import org.apache.jena.graph.Triple +import org.apache.spark.ml.feature.{ Tokenizer, HashingTF } +import org.apache.spark.ml.feature.CountVectorizer +import org.apache.spark.ml.feature.MinHashLSH +import org.apache.spark.ml.feature.MinHashLSHModel +import org.apache.spark.sql.Dataset +import org.apache.spark.sql.functions._ +import org.apache.spark.ml.feature.RegexTokenizer +import org.apache.spark.sql.Row +import org.apache.spark.RangePartitioner +import org.apache.spark.storage.StorageLevel +import org.apache.spark.RangePartitioner +import org.apache.spark.RangePartitioner + +/* A generic Entity Resolution Approach for larger datasets(for e.g. 24.8GB) + * Dataset1 has 47 million entities, approx 19GB + * Dataset2 has 19 million entities, approx 5.8GB + * */ +class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: RDD[Triple], triplesRDD2: RDD[Triple], + teacher: DataFrame, threshold_subject: Double, jsimilarity_predicate: Double, + threshold_object: Double, vocab_size: Long, output_path: String) extends Serializable { + + /** + * Simple Api to call other apis + * Execute entity profile generation + * Step1- Find matching entities based on LSH subjects, by lsh_subjects api + * Step2- Compare the predicates for matched entities, by get_similar_predicates api + * Step3- Compare the objects for intersecting predicates in matched entities, by get_similar_objects api + */ + def run(): RDD[(String, String)] = { + + // Define entity profiles from triplesRDD1 and triplesRDD2 + val entity_profiles1 = get_entity_profiles(triplesRDD1) + val entity_profiles2 = get_entity_profiles(triplesRDD2) + + // Similar entities matched based on subjects + val ds_subjects1: RDD[(String, String, String, String)] = lsh_subjects(entity_profiles1, entity_profiles2) + val ds_subjects = ds_subjects1.repartition(600).persist(StorageLevel.MEMORY_AND_DISK) + + // Compare the predicates for matched entities by subject + val refined_data_pred = get_similar_predicates(ds_subjects) + val ds_predicates = refined_data_pred.repartition(400).persist(StorageLevel.MEMORY_AND_DISK) + + // Compare the objects for intersecting predicates in matched entities by predicate level knowledge + val refined_objects = get_similarity_objects(ds_predicates) + + //Evaluate our results with groundtruth data + val output = evaluation(refined_objects) + + output + + } + + /** + * Filters triples and defines entity profiles. + * The triplesRDD needs filteration. We perform data cleansing by removing common wikilinks predicates listed in removePredicates List + * and owl:sameas triples + * This api further uses map partition broadcasting the List remospark, triplesRDD1spark, triplesRDD1vePredicates, to find triples containing the listed predicates in distinct_triples rdd + * Finally, subtract those triples to get filtered_triples RDD + * Consider only triples with objects as URI or with "en" literal language or no literal language + * Group all triples of a particular subject to form the entity profiles in the format: + * + * + * @param triplesRDD contains RDF data in the form of triples + * @return ([(subject,predicate1:object1 , predicate2:object2 , ... , predicaten:objectn)]), + * where subject is the key and the group of paired predicate:object forms the value + * ex:- (Budapest_City, areaCode:1 , country:Hungary) + */ + def get_entity_profiles(triplesRDD: RDD[Triple]): RDD[(String, String)] = { + + //predicates to be filtered out from triples + val removePredicates: List[String] = List("owl:sameas", "wikiPageID", "wikiPageRevisionID", "wikiPageRevisionLink", + "wikiPageUsesTemplate", "wikiPageHistoryLink", "wikiPageExternalLink", "wikiPageEditLink", "wikiPageExtracted", + "wikiPageLength", "wikiPageModified", "wikiPageOutDegree", "wikiPageRedirects") + + val broadcastVar = spark.sparkContext.broadcast(removePredicates) // broadcast here small RDD + val distinct_triples = triplesRDD.distinct() + val remove_triples = distinct_triples.mapPartitions({ f => + val k = broadcastVar.value + for { + x <- f + z <- k + if x.getPredicate.getURI().contains(z)) + } yield (x) + + }) + + val filtered_triples = distinct_triples.subtract(remove_triples) + + //Define entity profiles + val entity = filtered_triples.filter(f => (f.getObject().isURI() || f.getObject.getLiteralLanguage == "en" || f.getObject.getLiteralLanguage == "")) + .map(f => { + val key = f.getSubject.getURI.split("/").last.trim() + val pred = f.getPredicate.getURI.split(Array('/', '#')).last.trim() + if (f.getObject.isURI()) { + val obj = f.getObject.getURI.split("/").last.trim() + val value = pred + ":" + obj //predicate and object are seperated by ':' + (key, value) + } else { + val obj = f.getObject.getLiteral.toString().split(Array('^', '@')).head.trim() + val value = pred + ":" + obj.replace(":", "") + (key, value) + } + }).reduceByKey(_ + " , " + _) // triples seperated by ' , ' + + entity + } + + /** + * This api matches similar entities based on similarity of their subjects + * Get subject data from entites_RDD1 and entites_RDD2. + * Tokenise it by "_" to form ent_sub1 and ent_sub2 for comparison + * ex:- (Budapest_City, Set(Budapest, City)) + * + * Apply LSH technique on the tokenised subjects to get matched pairs on threshold_subject, specified by user + * Join the predicate:object knowledge for each of the entity matches returned + * + * @param entites_RDD1 and entitites_RDD2 contains the entity profiles, generated from get_entity_profiles, to be compared for match + * @param threshold_subject - the similarity threshold set for approxsimilarityjoin + * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject , entity2_predicates:objectspairs)]), + * where entity1_subject and entity2_subject are the matched pairs + */ + def lsh_subjects(entites_RDD1: RDD[(String, String)], entities_RDD2: RDD[(String, String)]): RDD[(String, String, String, String)] = { + + //Get subject data and tokenise it + val ent_sub1 = entites_RDD1.map(f => { (f._1, f._1.split("_")) }) + val part_rdd1 = new RangePartitioner(400, ent_sub1) + val partitioned_rdd1 = ent_sub1.partitionBy(part_rdd1).persist(StorageLevel.MEMORY_AND_DISK) + + val ent_sub2 = entities_RDD2.map(f => { (f._1, f._1.split("_")) }) + val part_rdd2 = new RangePartitioner(400, ent_sub2) + val partitioned_rdd2 = ent_sub2.partitionBy(part_rdd2).persist(StorageLevel.MEMORY_AND_DISK) + + val entities_Df1 = spark.createDataFrame(partitioned_rdd1).toDF("entities", "ent_sub") + val entities_Df2 = spark.createDataFrame(partitioned_rdd2).toDF("entities", "ent_sub") + + //Apply LSH technique by HashingTF vectorisation + val (cvfeatured_entities_Df1: DataFrame, cvfeatured_entities_Df2: DataFrame) = applyCountVectorizer_sub("ent_sub", "features", entities_Df1, entities_Df2) + val (model_sub: MinHashLSHModel, transformed_sub_Df1: DataFrame, transformed_sub_Df2: DataFrame) = minHashLSH(cvfeatured_entities_Df1, cvfeatured_entities_Df2) + val ds_subjects = approxsimilarityjoin(model_sub, transformed_sub_Df1, transformed_sub_Df2) + + //Combine predicate:object level knowledge for the matched pairs + val ds_subjects_rdd = ds_subjects.rdd + val ds_subjects_data1 = ds_subjects_rdd.map(f => { (f.get(0).toString(), f.get(1).toString()) }).join(entites_RDD1) + val ds_subjects_data2 = ds_subjects_data1.map(f => { (f._2._1, (f._1, f._2._2)) }).join(entities_RDD2) + val ds_subjects_data = ds_subjects_data2.map(f => { (f._2._1._1, f._2._1._2, f._1, f._2._2) }) + + ds_subjects_data + } + + /** + * This api vectroises the entity subjects tokenised to form features + * + * Apply CountVectorizer vectorisation on the tokenised subjects, setting our setVocabSize means that in our dictionary we will be adding approximately terms<=vocab_size. Terms are in the inp_column + * + * @param inp_col specifies the input column for vectorisation + * @param out_col specifies the output column containing features + * data1 and data2 are dataframes containing the tokenised subjects + * @return Dataframes with vectorised features i.e. tokenised subjects are vectorised here + */ + def applyCountVectorizer_sub(inp_col: String, out_col: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) = { + val data = data1.union(data2).distinct() + val countVectorizer = new CountVectorizer().setInputCol(inp_col).setOutputCol(out_col).setVocabSize(vocab_size.toInt).setMinDF(1).fit(data) + val cvfeatured_entities_Df1 = countVectorizer.transform(data1) + val cvfeatured_entities_Df2 = countVectorizer.transform(data2) + return (cvfeatured_entities_Df1, cvfeatured_entities_Df2) + } + + /** + * This api MinHashes the featured entity subjects + * + * setting our setNumHashTables to 3 means 3 hashvalues to be generated for each feature + * + * @param featured_entites_Df1 and featured_entites_Df2 specifies the featured dataframes generated by applyHashingTf_sub api + * @return MinHashLSH model with Dataframes containing minhashes generated for the features + */ + def minHashLSH(featured_entites_Df1: DataFrame, featured_entites_Df2: DataFrame): (MinHashLSHModel, DataFrame, DataFrame) = { + val mh = new MinHashLSH().setNumHashTables(3).setInputCol("features").setOutputCol("hashed_values") + val featured_data = featured_entites_Df1.union(featured_entites_Df2).distinct() + val model = mh.fit(featured_data) + val transformed_entities_Df1 = model.transform(featured_entites_Df1) + val transformed_entities_Df2 = model.transform(featured_entites_Df2)/* + * + * */ + return (model, transformed_entities_Df1, transformed_entities_Df2) + } + + /** + * This api applies approxsimilarity join to detect entity matches with subject similarity + * + * Applying approxSimilarityJoin with threshold specified by user on subjects + * A lower threshold means the entity matches found are closely related + * + * @param model - MinHashLSHModel generated by minHashLSH api + * @param df1 and df2 specifies the dataframes, generated by minHashLSH api + * @param threshold- threshold for subject similarity specified by user + * @return matched entity pairs + */ + def approxsimilarityjoin(model: MinHashLSHModel, df1: DataFrame, df2: DataFrame): DataFrame = { + val dataset = model.approxSimilarityJoin(df1, df2, threshold_subject) + val refined_ds = dataset.select(col("datasetA.entities").alias("entity1"), col("datasetB.entities").alias("entity2")) //only for lsh1subjects + refined_ds + } + + /** + * This api compares predicate level knowledge of similar entities matched pairs generated by lsh_subjects api + * Compute jaccard similarity on the predicates of paired entity matches + * Filter the entity matches with similarity more than jSimilarity, specified by user + * + * @param similar_subj_rdd contains the entity matches based on subjects with intergated attribute level knowledge, generated from lsh_subjects + * @param jSimilartiy - the Jaccard similarity threshold set for predicate level comparison + * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject, entity2_predicates:objectspairs, intersecting_predicates, jsimilarityofpredicates)]), + * where entity1_subject and entity2_subject are the matched pairs on predicate level knowledge + */ + def get_similar_predicates(similar_subj_rdd: RDD[(String, String, String, String)]): RDD[(String, List[String], String, List[String], List[String], Double)] = { + val refined_data_sub = similar_subj_rdd.map(f => { + val sub1 = f._1 // entity1_subject + val s_data1 = f._2 // entity1_predicateobject_pairs + val sub2 = f._3 // entity2_subject + val s_data2 = f._4 //// entity2_predicateobject_pairs + + //segregate each of the predicate_object pairs for both the entities + val pred_obj1 = s_data1.split(" , ").toList + val pred_obj2 = s_data2.split(" , ").toList + + //empty lists for predicates + var list_pred1 = List[String]() + var list_pred2 = List[String]() + + //extract only predicates from the predicate_object for both entities for comparison + for (x <- pred_obj1) { + list_pred1 = list_pred1 :+ x.split(":").head + } + for (x <- pred_obj2) { + list_pred2 = list_pred2 :+ x.split(":").head + } + + //Find common predicates among the entities + val intersect_pred = list_pred1.intersect(list_pred2) + val union_pred = list_pred1.length + list_pred2.length - intersect_pred.length + + //calculate jaccard similarity on predicate level knowledge of both entities for comparison + val similarity = intersect_pred.length.toDouble / union_pred.toDouble + + (sub1, pred_obj1, sub2, pred_obj2, intersect_pred, similarity) + }) + similar_subj_rdd.unpersist() + + //filter the entity pairs with jaccard similarities that fit or are above user defined jsimilarity for predicate level knoledge comparison + val refined_data_pred = refined_data_sub.filter(f => f._6 >= jSimilartiy_predicate) + + refined_data_pred + } + + /** + * This api removes false positives by compares object level knowledge of similar entities matched pairs generated by get_similar_predicates api + * Compute jaccard similarity on the objects of paired entity matches, only for the intersecting predicates + * Filter the entity matches with similarity more than threshold_objects, specified by user + * + * @param ds_pred contains the entity matches based on predicate level knowledge + * @param threshold_objects - the Jaccard similarity threshold set for object level comparison + * @return ([(entity1_subject, entity2_subject, jsimilarityofobjects)]), + * where entity1_subject and entity2_subject are the matched pairs on object level knowledge + */ + def get_similarity_objects(ds_pred: RDD[(String, List[String], String, List[String], List[String], Double)]): RDD[(String, String, Double)] = { + val mapped_objects = ds_pred.map(f => { + val sub1 = f._1 // entity1_subject + val pred_obj1 = f._2 // entity1_predicateobject_pairs + val sub2 = f._3 // entity2_subject + val pred_obj2 = f._4 // entity2_predicateobject_pairs + val common_pred = f._5 // intersecting_predicates of both entites for comparing their objects + + var obj1: String = " " + var obj2: String = " " + + // Segregate objects of only intersecting predicates among the two entities + for (x <- pred_obj1) { + val pred = x.split(":").head + val obj = x.split(":").last + if (common_pred.contains(pred)) + obj1 = obj1 + " " + obj + } + + for (x <- pred_obj2) { + val pred = x.split(":").head + val obj = x.split(":").last + if (common_pred.contains(pred)) + obj2 = obj2 + " " + obj + } + + val sub_obj1 = obj1.trim().split(" ").toList.distinct + val sub_obj2 = obj2.trim().split(" ").toList.distinct + + //Compute jaccard similarity on the objects + val intersect_obj = sub_obj1.intersect(sub_obj2).length + val union_obj = sub_obj1.length + sub_obj2.length - intersect_obj + + val similarity = intersect_obj.toDouble / union_obj.toDouble + + (sub1, sub2, similarity) + }) + ds_pred.unpersist() + // Extract entity matches with similarity more than threshold_objects, specified by user + val results = mapped_objects.filter(f => f._3 >= threshold_object) + + results + } + + /** + * This api evaluates our results by comparing it with groundtruth + * Compute Precision, Recall and F1-Measure + * + * @param result contains the entity matches predicted by our algorithm, generated from get_similarity_objects api + * @param teacher - the groundtruth for comparison + * @param output_path - path to save the result rdd + */ + def evaluation(result: RDD[(String, String, Double)]): RDD[(String, String)] = { + val predicted_rdd = result.map(f => { + (f._1, f._2) + }) + val teacher_rdd = teacher.rdd + val actual_rdd = teacher_rdd.map(f => { + (f.get(0).toString(), f.get(1).toString()) + }) + + //Calculate TruePostives for precision, recall and f1-measure + val truePositives = actual_rdd.intersection(predicted_rdd).count + println("***************************************************************************************") + println("***************************************************************************************") + + val actual = actual_rdd.count() + val results = predicted_rdd.count() + println("Actual: " + actual) + println("Predicted: " + results) + println("True Positives: " + truePositives) + val precision = (truePositives * 100.00) / (results) + println("Precision: " + precision) + val recall = (truePositives * 100) / (actual) + println("Recall: " + recall) + val f1_measure = (2 * precision * recall) / (precision + recall) + println("F1-measure: " + f1_measure) + println("***************************************************************************************") + println("***************************************************************************************") + //Save the output_rdd + predicted_rdd.coalesce(1).saveAsTextFile(output_path) + println("Output Saved!") + + predicted_rdd + } +} + +object EntityResolution_RDFData_CountVectorizer { + def apply(spark: SparkSession, triplesRDD1: RDD[Triple], triplesRDD2: RDD[Triple], + teacher: DataFrame, threshold_subject: Double, jsimilarity_predicate: Double, + threshold_object: Double, vocab_size: Long, output_path: String): EntityResolution_RDFData_CountVectorizer = new EntityResolution_RDFData_CountVectorizer(spark, triplesRDD1, triplesRDD2, + teacher, threshold_subject, jsimilarity_predicate, threshold_object, vocab_size, output_path) +} diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_HashingTF.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_HashingTF.scala new file mode 100644 index 0000000..e24ec6c --- /dev/null +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_HashingTF.scala @@ -0,0 +1,355 @@ +package net.sansa_stack.ml.spark.entity_resolution + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.DataFrame +import org.apache.spark.rdd.RDD +import org.apache.jena.graph.Triple +import org.apache.spark.ml.feature.{ Tokenizer, HashingTF } +import org.apache.spark.ml.feature.CountVectorizer +import org.apache.spark.ml.feature.MinHashLSH +import org.apache.spark.ml.feature.MinHashLSHModel +import org.apache.spark.sql.Dataset +import org.apache.spark.sql.functions._ +import org.apache.spark.ml.feature.RegexTokenizer +import org.apache.spark.sql.Row +import org.apache.spark.RangePartitioner +import org.apache.spark.storage.StorageLevel +import org.apache.spark.RangePartitioner +import org.apache.spark.RangePartitioner + +/* A generic Entity Resolution Approach for larger datasets(for e.g. 24.8GB) + * Dataset1 has 47 million entities, approx 19GB + * Dataset2 has 19 million entities, approx 5.8GB + * */ +class EntityResolution_RDFData_HashingTF(spark: SparkSession, triplesRDD1: RDD[Triple], triplesRDD2: RDD[Triple], + teacher: DataFrame, threshold_subject: Double, jsimilarity_predicate: Double, + threshold_object: Double, vocab_size: Long, output_path: String) extends Serializable { + + /** + * Simple Api to call other apis + * Execute entity profile generation + * Step1- Find matching entities based on LSH subjects, by lsh_subjects api + * Step2- Compare the predicates for matched entities, by get_similar_predicates api + * Step3- Compare the objects for intersecting predicates in matched entities, by get_similar_objects api + */ + def run(): RDD[(String, String)] = { + + // Define entity profiles from triplesRDD1 and triplesRDD2 + val entity_profiles1 = get_entity_profiles(triplesRDD1) + val entity_profiles2 = get_entity_profiles(triplesRDD2) + + // Similar entities matched based on subjects + val ds_subjects1: RDD[(String, String, String, String)] = lsh_subjects(entity_profiles1, entity_profiles2) + val ds_subjects = ds_subjects1.repartition(600).persist(StorageLevel.MEMORY_AND_DISK) + + // Compare the predicates for matched entities by subject + val refined_data_pred = get_similar_predicates(ds_subjects) + val ds_predicates = refined_data_pred.repartition(400).persist(StorageLevel.MEMORY_AND_DISK) + + // Compare the objects for intersecting predicates in matched entities by predicate level knowledge + val refined_objects = get_similarity_objects(ds_predicates) + + //Evaluate our results with groundtruth data + val output = evaluation(refined_objects, teacher, output_path) + + output + } + + /** + * Filters triples and defines entity profiles. + * The triplesRDD needs filteration. We perform data cleansing by removing common wikilinks predicates listed in removePredicates List + * and owl:sameas triples + * This api further uses map partition broadcasting the List removePredicates, to find triples containing the listed predicates in distinct_triples rdd + * Finally, subtract those triples to get filtered_triples RDD + * Consider only triples with objects as URI or with "en" literal language or no literal language + * Group all triples of a particular subject to form the entity profiles in the format: + * + * + * @param triplesRDD contains RDF data in the form of triples + * @return ([(subject,predicate1:object1 , predicate2:object2 , ... , predicaten:objectn)]), + * where subject is the key and the group of paired predicate:object forms the value + * ex:- (Budapest_City, areaCode:1 , country:Hungary) + */ + def get_entity_profiles(triplesRDD: RDD[Triple]): RDD[(String, String)] = { + + //predicates to be filtered out from triples + val removePredicates: List[String] = List("owl:sameas", "wikiPageID", "wikiPageRevisionID", "wikiPageRevisionLink", + "wikiPageUsesTemplate", "wikiPageHistoryLink", "wikiPageExternalLink", "wikiPageEditLink", "wikiPageExtracted", + "wikiPageLength", "wikiPageModified", "wikiPageOutDegree", "wikiPageRedirects") + + val broadcastVar = spark.sparkContext.broadcast(removePredicates) // broadcast here small RDD + val distinct_triples = triplesRDD.distinct() + val remove_triples = distinct_triples.mapPartitions({ f => + val k = broadcastVar.value + for { + x <- f + z <- k + if x.getPredicate.getURI().contains(z)) + } yield (x) + + }) + + val filtered_triples = distinct_triples.subtract(remove_triples) + + //Define entity profiles + val entity = filtered_triples.filter(f => (f.getObject().isURI() || f.getObject.getLiteralLanguage == "en" || f.getObject.getLiteralLanguage == "")) + .map(f => { + val key = f.getSubject.getURI.split("/").last.trim() + val pred = f.getPredicate.getURI.split(Array('/', '#')).last.trim() + if (f.getObject.isURI()) { + val obj = f.getObject.getURI.split("/").last.trim() + val value = pred + ":" + obj //predicate and object are seperated by ':' + (key, value) + } else { + val obj = f.getObject.getLiteral.toString().split(Array('^', '@')).head.trim() + val value = pred + ":" + obj.replace(":", "") + (key, value) + } + }).reduceByKey(_ + " , " + _) // triples seperated by ' , ' + return entity + } + + /** + * This api matches similar entities based on similarity of their subjects + * Get subject data from entites_RDD1 and entites_RDD2. + * Tokenise it by "_" to form ent_sub1 and ent_sub2 for comparison + * ex:- (Budapest_City, Set(Budapest, City)) + * + * Apply LSH technique on the tokenised subjects to get matched pairs on threshold_subject, specified by user + * Join the predicate:object knowledge for each of the entity matches returned + * + * @param entites_RDD1 and entitites_RDD2 contains the entity profiles, generated from get_entity_profiles, to be compared for match + * @param threshold_subject - the similarity threshold set for approxsimilarityjoin + * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject , entity2_predicates:objectspairs)]), + * where entity1_subject and entity2_subject are the matched pairs + */ + def lsh_subjects(entites_RDD1: RDD[(String, String)], entities_RDD2: RDD[(String, String)]): RDD[(String, String, String, String)] = { + + //Get subject data and tokenise it + val ent_sub1 = entites_RDD1.map(f => { (f._1, f._1.split("_")) }) + val part_rdd1 = new RangePartitioner(400, ent_sub1) + val partitioned_rdd1 = ent_sub1.partitionBy(part_rdd1).persist(StorageLevel.MEMORY_AND_DISK) + + val ent_sub2 = entities_RDD2.map(f => { (f._1, f._1.split("_")) }) + val part_rdd2 = new RangePartitioner(400, ent_sub2) + val partitioned_rdd2 = ent_sub2.partitionBy(part_rdd2).persist(StorageLevel.MEMORY_AND_DISK) + + val entities_Df1 = spark.createDataFrame(partitioned_rdd1).toDF("entities", "ent_sub") + val entities_Df2 = spark.createDataFrame(partitioned_rdd2).toDF("entities", "ent_sub") + + //Apply LSH technique by HashingTF vectorisation + val (hashfeatured_entities_Df1: DataFrame, hashfeatured_entities_Df2: DataFrame) = applyHashingTf_sub("ent_sub", "features", entities_Df1, entities_Df2) + val (model_sub: MinHashLSHModel, transformed_sub_Df1: DataFrame, transformed_sub_Df2: DataFrame) = minHashLSH(hashfeatured_entities_Df1, hashfeatured_entities_Df2) + val ds_subjects = approxsimilarityjoin(model_sub, transformed_sub_Df1, transformed_sub_Df2) + + //Combine predicate:object level knowledge for the matched pairs + val ds_subjects_rdd = ds_subjects.rdd + val ds_subjects_data1 = ds_subjects_rdd.map(f => { (f.get(0).toString(), f.get(1).toString()) }).join(entites_RDD1) + val ds_subjects_data2 = ds_subjects_data1.map(f => { (f._2._1, (f._1, f._2._2)) }).join(entities_RDD2) + val ds_subjects_data = ds_subjects_data2.map(f => { (f._2._1._1, f._2._1._2, f._1, f._2._2) }) + return ds_subjects_data + } + + /** + * This api vectroises the entity subjects tokenised to form features + * + * Apply HashingTF vectorisation on the tokenised subjects, setting our setNumFeatures means that it would probably encounter those many different terms/words in the inp_column + * We try to avoid collisions by keeping this value high. + * + * @param inp_col specifies the input column for vectorisation + * @param out_col specifies the output column containing features + * data1 and data2 are dataframes containing the tokenised subjects + * @return Dataframes with vectorised features i.e. tokenised subjects are vectorised here + */ + def applyHashingTf_sub(inp_col: String, out_col: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) = { + val hashingTf = new HashingTF().setInputCol(inp_col).setOutputCol(out_col).setNumFeatures(vocab_size.toInt) + val hashfeatured_entities_Df1 = hashingTf.transform(data1) + val hashfeatured_entities_Df2 = hashingTf.transform(data2) + return (hashfeatured_entities_Df1, hashfeatured_entities_Df2) + } + + /** + * This api MinHashes the featured entity subjects + * + * setting our setNumHashTables to 3 means 3 hashvalues to be generated for each feature + * + * @param featured_entites_Df1 and featured_entites_Df2 specifies the featured dataframes generated by applyHashingTf_sub api + * @return MinHashLSH model with Dataframes containing minhashes generated for the features + */ + def minHashLSH(featured_entites_Df1: DataFrame, featured_entites_Df2: DataFrame): (MinHashLSHModel, DataFrame, DataFrame) = { + val mh = new MinHashLSH().setNumHashTables(3).setInputCol("features").setOutputCol("hashed_values") + val featured_data = featured_entites_Df1.union(featured_entites_Df2).distinct() + val model = mh.fit(featured_data) + val transformed_entities_Df1 = model.transform(featured_entites_Df1) + val transformed_entities_Df2 = model.transform(featured_entites_Df2) + return (model, transformed_entities_Df1, transformed_entities_Df2) + } + + /** + * This api applies approxsimilarity join to detect entity matches with subject similarity + * + * Applying approxSimilarityJoin with threshold specified by user on subjects + * A lower threshold means the entity matches found are closely related + * + * @param model - MinHashLSHModel generated by minHashLSH api + * @param df1 and df2 specifies the dataframes, generated by minHashLSH api + * @param threshold- threshold for subject similarity specified by user + * @return matched entity pairs + */ + def approxsimilarityjoin(model: MinHashLSHModel, df1: DataFrame, df2: DataFrame): DataFrame = { + val dataset = model.approxSimilarityJoin(df1, df2, threshold_subject) + val refined_ds = dataset.select(col("datasetA.entities").alias("entity1"), col("datasetB.entities").alias("entity2")) //only for lsh1subjects + return refined_ds + } + + /** + * This api compares predicate level knowledge of similar entities matched pairs generated by lsh_subjects api + * Compute jaccard similarity on the predicates of paired entity matches + * Filter the entity matches with similarity more than jSimilarity, specified by user + * + * @param similar_subj_rdd contains the entity matches based on subjects with intergated attribute level knowledge, generated from lsh_subjects + * @param jSimilartiy - the Jaccard similarity threshold set for predicate level comparison + * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject, entity2_predicates:objectspairs, intersecting_predicates, jsimilarityofpredicates)]), + * where entity1_subject and entity2_subject are the matched pairs on predicate level knowledge + */ + def get_similar_predicates(similar_subj_rdd: RDD[(String, String, String, String)]): RDD[(String, List[String], String, List[String], List[String], Double)] = { + val refined_data_sub = similar_subj_rdd.map(f => { + val sub1 = f._1 // entity1_subject + val s_data1 = f._2 // entity1_predicateobject_pairs + val sub2 = f._3 // entity2_subject + val s_data2 = f._4 //// entity2_predicateobject_pairs + + //segregate each of the predicate_object pairs for both the entities + val pred_obj1 = s_data1.split(" , ").toList + val pred_obj2 = s_data2.split(" , ").toList + + //empty lists for predicates + var list_pred1 = List[String]() + var list_pred2 = List[String]() + + //extract only predicates from the predicate_object for both entities for comparison + for (x <- pred_obj1) { + list_pred1 = list_pred1 :+ x.split(":").head + } + for (x <- pred_obj2) { + list_pred2 = list_pred2 :+ x.split(":").head + } + + //Find common predicates among the entities + val intersect_pred = list_pred1.intersect(list_pred2) + val union_pred = list_pred1.length + list_pred2.length - intersect_pred.length + + //calculate jaccard similarity on predicate level knowledge of both entities for comparison + val similarity = intersect_pred.length.toDouble / union_pred.toDouble + + (sub1, pred_obj1, sub2, pred_obj2, intersect_pred, similarity) + }) + similar_subj_rdd.unpersist() + + //filter the entity pairs with jaccard similarities that fit or are above user defined jsimilarity for predicate level knoledge comparison + val refined_data_pred = refined_data_sub.filter(f => f._6 >= jSimilartiy_predicate) + return refined_data_pred + } + + /** + * This api removes false positives by compares object level knowledge of similar entities matched pairs generated by get_similar_predicates api + * Compute jaccard similarity on the objects of paired entity matches, only for the intersecting predicates + * Filter the entity matches with similarity more than threshold_objects, specified by user + * + * @param ds_pred contains the entity matches based on predicate level knowledge + * @param threshold_objects - the Jaccard similarity threshold set for object level comparison + * @return ([(entity1_subject, entity2_subject, jsimilarityofobjects)]), + * where entity1_subject and entity2_subject are the matched pairs on object level knowledge + */ + def get_similarity_objects(ds_pred: RDD[(String, List[String], String, List[String], List[String], Double)]): RDD[(String, String, Double)] = { + val mapped_objects = ds_pred.map(f => { + val sub1 = f._1 // entity1_subject + val pred_obj1 = f._2 // entity1_predicateobject_pairs + val sub2 = f._3 // entity2_subject + val pred_obj2 = f._4 // entity2_predicateobject_pairs + val common_pred = f._5 // intersecting_predicates of both entites for comparing their objects + + var obj1: String = " " + var obj2: String = " " + + // Segregate objects of only intersecting predicates among the two entities + for (x <- pred_obj1) { + val pred = x.split(":").head + val obj = x.split(":").last + if (common_pred.contains(pred)) + obj1 = obj1 + " " + obj + } + + for (x <- pred_obj2) { + val pred = x.split(":").head + val obj = x.split(":").last + if (common_pred.contains(pred)) + obj2 = obj2 + " " + obj + } + + val sub_obj1 = obj1.trim().split(" ").toList.distinct + val sub_obj2 = obj2.trim().split(" ").toList.distinct + + //Compute jaccard similarity on the objects + val intersect_obj = sub_obj1.intersect(sub_obj2).length + val union_obj = sub_obj1.length + sub_obj2.length - intersect_obj + + val similarity = intersect_obj.toDouble / union_obj.toDouble + + (sub1, sub2, similarity) + }) + ds_pred.unpersist() + // Extract entity matches with similarity more than threshold_objects, specified by user + val results = mapped_objects.filter(f => f._3 >= threshold_object) + return results + } + + /** + * This api evaluates our results by comparing it with groundtruth + * Compute Precision, Recall and F1-Measure + * + * @param result contains the entity matches predicted by our algorithm, generated from get_similarity_objects api + * @param teacher - the groundtruth for comparison + * @param output_path - path to save the result rdd + */ + def evaluation(result: RDD[(String, String, Double)]): RDD[(String, String)] = { + val predicted_rdd = result.map(f => { + (f._1, f._2) + }) + val teacher_rdd = teacher.rdd + val actual_rdd = teacher_rdd.map(f => { + (f.get(0).toString(), f.get(1).toString()) + }) + + //Calculate TruePostives for precision, recall and f1-measure + val truePositives = actual_rdd.intersection(predicted_rdd).count + println("***************************************************************************************") + println("***************************************************************************************") + + val actual = actual_rdd.count() + val results = predicted_rdd.count() + println("Actual: " + actual) + println("Predicted: " + results) + println("True Positives: " + truePositives) + val precision = (truePositives * 100.00) / (results) + println("Precision: " + precision) + val recall = (truePositives * 100) / (actual) + println("Recall: " + recall) + val f1_measure = (2 * precision * recall) / (precision + recall) + println("F1-measure: " + f1_measure) + println("***************************************************************************************") + println("***************************************************************************************") + //Save the output_rdd + predicted_rdd.coalesce(1).saveAsTextFile(output_path) + println("Output Saved!") + + predicted_rdd + } +} + +object EntityResolution_RDFData_HashingTF { + def apply(spark: SparkSession, triplesRDD1: RDD[Triple], triplesRDD2: RDD[Triple], + teacher: DataFrame, threshold_subject: Double, jsimilarity_predicate: Double, + threshold_object: Double, vocab_size: Long, output_path: String): EntityResolution_RDFData_HashingTF = new EntityResolution_RDFData_HashingTF(spark, triplesRDD1, triplesRDD2, + teacher, threshold_subject, jsimilarity_predicate, threshold_object, vocab_size, output_path) +} From 684cc6d990adb91ea2048dcdd052bb97856b1720 Mon Sep 17 00:00:00 2001 From: Amrit Kaur Date: Tue, 31 Dec 2019 22:16:36 +0100 Subject: [PATCH 2/7] updated changes as per review --- ...ta_CountVectorizer.scala => Commons.scala} | 241 ++++++------ .../entity_resolution/ERCountVectorizer.scala | 41 ++ .../spark/entity_resolution/ERHashingTF.scala | 41 ++ .../EntityResolution_RDFData_HashingTF.scala | 355 ------------------ .../predicatesfilteration.txt | 13 + 5 files changed, 204 insertions(+), 487 deletions(-) rename sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/{EntityResolution_RDFData_CountVectorizer.scala => Commons.scala} (52%) create mode 100644 sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala create mode 100644 sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala delete mode 100644 sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_HashingTF.scala create mode 100644 sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/predicatesfilteration.txt diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_CountVectorizer.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala similarity index 52% rename from sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_CountVectorizer.scala rename to sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala index 27980c5..91ef919 100644 --- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_CountVectorizer.scala +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala @@ -4,8 +4,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.DataFrame import org.apache.spark.rdd.RDD import org.apache.jena.graph.Triple -import org.apache.spark.ml.feature.{ Tokenizer, HashingTF } -import org.apache.spark.ml.feature.CountVectorizer +import org.apache.spark.ml.feature.Tokenizer import org.apache.spark.ml.feature.MinHashLSH import org.apache.spark.ml.feature.MinHashLSHModel import org.apache.spark.sql.Dataset @@ -16,17 +15,22 @@ import org.apache.spark.RangePartitioner import org.apache.spark.storage.StorageLevel import org.apache.spark.RangePartitioner import org.apache.spark.RangePartitioner +import scala.io.Source +import java.io.Serializable /* A generic Entity Resolution Approach for larger datasets(for e.g. 24.8GB) * Dataset1 has 47 million entities, approx 19GB * Dataset2 has 19 million entities, approx 5.8GB * */ -class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: RDD[Triple], triplesRDD2: RDD[Triple], - teacher: DataFrame, threshold_subject: Double, jsimilarity_predicate: Double, - threshold_object: Double, vocab_size: Long, output_path: String) extends Serializable { +abstract class Commons(val spark: SparkSession, val sourceData1: RDD[Triple], val sourceData2: RDD[Triple], + val teacher: DataFrame, val thresholdSubject: Double, val jsimilarityPredicate: Double, + val thresholdObject: Double, val vocabSize: Long, val outputPath: String) extends Serializable { + +@transient lazy val consoleLog: Logger = Logger.getLogger(getClass.getName) +private val PARTITIONS = 400 +private val REPARTITION_NUMBER = 600 /** - * Simple Api to call other apis * Execute entity profile generation * Step1- Find matching entities based on LSH subjects, by lsh_subjects api * Step2- Compare the predicates for matched entities, by get_similar_predicates api @@ -35,22 +39,22 @@ class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: def run(): RDD[(String, String)] = { // Define entity profiles from triplesRDD1 and triplesRDD2 - val entity_profiles1 = get_entity_profiles(triplesRDD1) - val entity_profiles2 = get_entity_profiles(triplesRDD2) + val entityProfiles1 = getEntityProfiles(sourceData1) + val entityProfiles2 = getEntityProfiles(sourceData2) // Similar entities matched based on subjects - val ds_subjects1: RDD[(String, String, String, String)] = lsh_subjects(entity_profiles1, entity_profiles2) - val ds_subjects = ds_subjects1.repartition(600).persist(StorageLevel.MEMORY_AND_DISK) + val dsSubjects1: RDD[(String, String, String, String)] = lshSubjects(entityProfiles1, entityProfiles2) + val dsSubjects = dsSubjects1.repartition(REPARTITION_NUMBER).persist(StorageLevel.MEMORY_AND_DISK) // Compare the predicates for matched entities by subject - val refined_data_pred = get_similar_predicates(ds_subjects) - val ds_predicates = refined_data_pred.repartition(400).persist(StorageLevel.MEMORY_AND_DISK) + val refinedDataPred = getSimilarPredicates(dsSubjects) + val dsPredicates = refinedDataPred.repartition(PARTITIONS).persist(StorageLevel.MEMORY_AND_DISK) // Compare the objects for intersecting predicates in matched entities by predicate level knowledge - val refined_objects = get_similarity_objects(ds_predicates) + val refinedObjects = getSimilarityObjects(dsPredicates) //Evaluate our results with groundtruth data - val output = evaluation(refined_objects) + val output = evaluation(refinedObjects) output @@ -71,16 +75,19 @@ class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: * where subject is the key and the group of paired predicate:object forms the value * ex:- (Budapest_City, areaCode:1 , country:Hungary) */ - def get_entity_profiles(triplesRDD: RDD[Triple]): RDD[(String, String)] = { + def getEntityProfiles(sourceData: RDD[Triple]): RDD[(String, String)] = { //predicates to be filtered out from triples - val removePredicates: List[String] = List("owl:sameas", "wikiPageID", "wikiPageRevisionID", "wikiPageRevisionLink", + /** + * val rPredicates: List[String] = List("owl:sameas", "wikiPageID", "wikiPageRevisionID", "wikiPageRevisionLink", "wikiPageUsesTemplate", "wikiPageHistoryLink", "wikiPageExternalLink", "wikiPageEditLink", "wikiPageExtracted", "wikiPageLength", "wikiPageModified", "wikiPageOutDegree", "wikiPageRedirects") - + * */ + val filename = "predicatesfilteration.txt" + val removePredicates: List[String] = Source.fromFile(filename).getLines.toList val broadcastVar = spark.sparkContext.broadcast(removePredicates) // broadcast here small RDD - val distinct_triples = triplesRDD.distinct() - val remove_triples = distinct_triples.mapPartitions({ f => + val distinctTriples = sourceData.distinct() + val removeTriples = distinctTriples.mapPartitions({ f => val k = broadcastVar.value for { x <- f @@ -90,10 +97,10 @@ class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: }) - val filtered_triples = distinct_triples.subtract(remove_triples) + val filteredTriples = distinctTriples.subtract(removeTriples) //Define entity profiles - val entity = filtered_triples.filter(f => (f.getObject().isURI() || f.getObject.getLiteralLanguage == "en" || f.getObject.getLiteralLanguage == "")) + val entity = filteredTriples.filter(f => (f.getObject().isURI() || f.getObject.getLiteralLanguage == "en" || f.getObject.getLiteralLanguage == "")) .map(f => { val key = f.getSubject.getURI.split("/").last.trim() val pred = f.getPredicate.getURI.split(Array('/', '#')).last.trim() @@ -125,52 +132,36 @@ class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject , entity2_predicates:objectspairs)]), * where entity1_subject and entity2_subject are the matched pairs */ - def lsh_subjects(entites_RDD1: RDD[(String, String)], entities_RDD2: RDD[(String, String)]): RDD[(String, String, String, String)] = { - + def lshSubjects(entitesRDD1: RDD[(String, String)], entitiesRDD2: RDD[(String, String)]): RDD[(String, String, String, String)] = { + //Get subject data and tokenise it - val ent_sub1 = entites_RDD1.map(f => { (f._1, f._1.split("_")) }) - val part_rdd1 = new RangePartitioner(400, ent_sub1) - val partitioned_rdd1 = ent_sub1.partitionBy(part_rdd1).persist(StorageLevel.MEMORY_AND_DISK) + val entSub1 = entitesRDD1.map(f => { (f._1, f._1.split("_")) }) + val partrdd1 = new RangePartitioner(PARTITIONS, entSub1) + val partitionedrdd1 = entSub1.partitionBy(partrdd1).persist(StorageLevel.MEMORY_AND_DISK) - val ent_sub2 = entities_RDD2.map(f => { (f._1, f._1.split("_")) }) - val part_rdd2 = new RangePartitioner(400, ent_sub2) - val partitioned_rdd2 = ent_sub2.partitionBy(part_rdd2).persist(StorageLevel.MEMORY_AND_DISK) + val entSub2 = entitiesRDD2.map(f => { (f._1, f._1.split("_")) }) + val partrdd2 = new RangePartitioner(PARTITIONS, entSub2) + val partitionedrdd2 = entSub2.partitionBy(partrdd2).persist(StorageLevel.MEMORY_AND_DISK) - val entities_Df1 = spark.createDataFrame(partitioned_rdd1).toDF("entities", "ent_sub") - val entities_Df2 = spark.createDataFrame(partitioned_rdd2).toDF("entities", "ent_sub") + val entitiesDf1 = spark.createDataFrame(partitionedrdd1).toDF("entities", "ent_sub") + val entitiesDf2 = spark.createDataFrame(partitionedrdd2).toDF("entities", "ent_sub") - //Apply LSH technique by HashingTF vectorisation - val (cvfeatured_entities_Df1: DataFrame, cvfeatured_entities_Df2: DataFrame) = applyCountVectorizer_sub("ent_sub", "features", entities_Df1, entities_Df2) - val (model_sub: MinHashLSHModel, transformed_sub_Df1: DataFrame, transformed_sub_Df2: DataFrame) = minHashLSH(cvfeatured_entities_Df1, cvfeatured_entities_Df2) - val ds_subjects = approxsimilarityjoin(model_sub, transformed_sub_Df1, transformed_sub_Df2) + //Apply LSH technique by vectorisation through HashingTF or CountVectorizer + val (featuredEntitiesDf1: DataFrame, featuredEntitiesDf2: DataFrame) = vectorise("ent_sub", "features", entitiesDf1, entitiesDf2) + val (modelSub: MinHashLSHModel, transformedSubDf1: DataFrame, transformedSubDf2: DataFrame) = minHashLSH(featuredEntitiesDf1, featuredEntitiesDf2) + val dsSubjects = approxSimilarityJoin(modelSub, transformedSubDf1, transformedSubDf2) //Combine predicate:object level knowledge for the matched pairs - val ds_subjects_rdd = ds_subjects.rdd - val ds_subjects_data1 = ds_subjects_rdd.map(f => { (f.get(0).toString(), f.get(1).toString()) }).join(entites_RDD1) - val ds_subjects_data2 = ds_subjects_data1.map(f => { (f._2._1, (f._1, f._2._2)) }).join(entities_RDD2) - val ds_subjects_data = ds_subjects_data2.map(f => { (f._2._1._1, f._2._1._2, f._1, f._2._2) }) + val dsSubjectsRDD = dsSubjects.rdd + val dsSubjectsData1 = dsSubjectsRDD.map(f => { (f.get(0).toString(), f.get(1).toString()) }).join(entitesRDD1) + val dsSubjectsData2 = dsSubjectsData1.map(f => { (f._2._1, (f._1, f._2._2)) }).join(entitiesRDD2) + val dsSubjectsData = dsSubjectsData2.map(f => { (f._2._1._1, f._2._1._2, f._1, f._2._2) }) - ds_subjects_data - } - - /** - * This api vectroises the entity subjects tokenised to form features - * - * Apply CountVectorizer vectorisation on the tokenised subjects, setting our setVocabSize means that in our dictionary we will be adding approximately terms<=vocab_size. Terms are in the inp_column - * - * @param inp_col specifies the input column for vectorisation - * @param out_col specifies the output column containing features - * data1 and data2 are dataframes containing the tokenised subjects - * @return Dataframes with vectorised features i.e. tokenised subjects are vectorised here - */ - def applyCountVectorizer_sub(inp_col: String, out_col: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) = { - val data = data1.union(data2).distinct() - val countVectorizer = new CountVectorizer().setInputCol(inp_col).setOutputCol(out_col).setVocabSize(vocab_size.toInt).setMinDF(1).fit(data) - val cvfeatured_entities_Df1 = countVectorizer.transform(data1) - val cvfeatured_entities_Df2 = countVectorizer.transform(data2) - return (cvfeatured_entities_Df1, cvfeatured_entities_Df2) + dsSubjectsData } + def vectorise(inpCol: String, outCol: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) //abstract method + /** * This api MinHashes the featured entity subjects * @@ -179,15 +170,13 @@ class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: * @param featured_entites_Df1 and featured_entites_Df2 specifies the featured dataframes generated by applyHashingTf_sub api * @return MinHashLSH model with Dataframes containing minhashes generated for the features */ - def minHashLSH(featured_entites_Df1: DataFrame, featured_entites_Df2: DataFrame): (MinHashLSHModel, DataFrame, DataFrame) = { + def minHashLSH(featuredEntitesDf1: DataFrame, featuredEntitesDf2: DataFrame): (MinHashLSHModel, DataFrame, DataFrame) = { val mh = new MinHashLSH().setNumHashTables(3).setInputCol("features").setOutputCol("hashed_values") - val featured_data = featured_entites_Df1.union(featured_entites_Df2).distinct() - val model = mh.fit(featured_data) - val transformed_entities_Df1 = model.transform(featured_entites_Df1) - val transformed_entities_Df2 = model.transform(featured_entites_Df2)/* - * - * */ - return (model, transformed_entities_Df1, transformed_entities_Df2) + val featuredData = featuredEntitesDf1.union(featuredEntitesDf2).distinct() + val model = mh.fit(featuredData) + val transformedEntitiesDf1 = model.transform(featuredEntitesDf1) + val transformedEntitiesDf2 = model.transform(featuredEntitesDf2) + return (model, transformedEntitiesDf1, transformedEntitiesDf2) } /** @@ -201,10 +190,10 @@ class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: * @param threshold- threshold for subject similarity specified by user * @return matched entity pairs */ - def approxsimilarityjoin(model: MinHashLSHModel, df1: DataFrame, df2: DataFrame): DataFrame = { - val dataset = model.approxSimilarityJoin(df1, df2, threshold_subject) - val refined_ds = dataset.select(col("datasetA.entities").alias("entity1"), col("datasetB.entities").alias("entity2")) //only for lsh1subjects - refined_ds + def approxSimilarityJoin(model: MinHashLSHModel, df1: DataFrame, df2: DataFrame): DataFrame = { + val dataset = model.approxSimilarityJoin(df1, df2, thresholdSubject) + val refinedDs = dataset.select(col("datasetA.entities").alias("entity1"), col("datasetB.entities").alias("entity2")) //only for lsh1subjects + refinedDs } /** @@ -217,44 +206,44 @@ class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject, entity2_predicates:objectspairs, intersecting_predicates, jsimilarityofpredicates)]), * where entity1_subject and entity2_subject are the matched pairs on predicate level knowledge */ - def get_similar_predicates(similar_subj_rdd: RDD[(String, String, String, String)]): RDD[(String, List[String], String, List[String], List[String], Double)] = { - val refined_data_sub = similar_subj_rdd.map(f => { + def getSimilarPredicates(similarSubjRDD: RDD[(String, String, String, String)]): RDD[(String, List[String], String, List[String], List[String], Double)] = { + val refinedDataSub = similarSubjRDD.map(f => { val sub1 = f._1 // entity1_subject - val s_data1 = f._2 // entity1_predicateobject_pairs + val sdata1 = f._2 // entity1_predicateobject_pairs val sub2 = f._3 // entity2_subject - val s_data2 = f._4 //// entity2_predicateobject_pairs + val sdata2 = f._4 //// entity2_predicateobject_pairs //segregate each of the predicate_object pairs for both the entities - val pred_obj1 = s_data1.split(" , ").toList - val pred_obj2 = s_data2.split(" , ").toList + val predObj1 = sdata1.split(" , ").toList + val predObj2 = sdata2.split(" , ").toList //empty lists for predicates - var list_pred1 = List[String]() - var list_pred2 = List[String]() + var listPred1 = List[String]() + var listPred2 = List[String]() //extract only predicates from the predicate_object for both entities for comparison - for (x <- pred_obj1) { - list_pred1 = list_pred1 :+ x.split(":").head + for (x <- predObj1) { + listPred1 = listPred1 :+ x.split(":").head } - for (x <- pred_obj2) { - list_pred2 = list_pred2 :+ x.split(":").head + for (x <- predObj2) { + listPred2 = listPred2 :+ x.split(":").head } //Find common predicates among the entities - val intersect_pred = list_pred1.intersect(list_pred2) - val union_pred = list_pred1.length + list_pred2.length - intersect_pred.length + val intersectPred = listPred1.intersect(listPred2) + val unionPred = listPred1.length + listPred2.length - intersectPred.length //calculate jaccard similarity on predicate level knowledge of both entities for comparison - val similarity = intersect_pred.length.toDouble / union_pred.toDouble + val similarity = intersectPred.length.toDouble / unionPred.toDouble - (sub1, pred_obj1, sub2, pred_obj2, intersect_pred, similarity) + (sub1, predObj1, sub2, predObj2, intersectPred, similarity) }) - similar_subj_rdd.unpersist() + similarSubjRDD.unpersist() //filter the entity pairs with jaccard similarities that fit or are above user defined jsimilarity for predicate level knoledge comparison - val refined_data_pred = refined_data_sub.filter(f => f._6 >= jSimilartiy_predicate) + val refinedDataPred = refinedDataSub.filter(f => f._6 >= jSimilartiyPredicate) - refined_data_pred + refinedDataPred } /** @@ -267,46 +256,46 @@ class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: * @return ([(entity1_subject, entity2_subject, jsimilarityofobjects)]), * where entity1_subject and entity2_subject are the matched pairs on object level knowledge */ - def get_similarity_objects(ds_pred: RDD[(String, List[String], String, List[String], List[String], Double)]): RDD[(String, String, Double)] = { - val mapped_objects = ds_pred.map(f => { + def getSimilarityObjects(dsPred: RDD[(String, List[String], String, List[String], List[String], Double)]): RDD[(String, String, Double)] = { + val mappedObjects = dsPred.map(f => { val sub1 = f._1 // entity1_subject - val pred_obj1 = f._2 // entity1_predicateobject_pairs + val predObj1 = f._2 // entity1_predicateobject_pairs val sub2 = f._3 // entity2_subject - val pred_obj2 = f._4 // entity2_predicateobject_pairs - val common_pred = f._5 // intersecting_predicates of both entites for comparing their objects + val predObj2 = f._4 // entity2_predicateobject_pairs + val commonPred = f._5 // intersecting_predicates of both entites for comparing their objects var obj1: String = " " var obj2: String = " " // Segregate objects of only intersecting predicates among the two entities - for (x <- pred_obj1) { + for (x <- predObj1) { val pred = x.split(":").head val obj = x.split(":").last - if (common_pred.contains(pred)) + if (commonPred.contains(pred)) obj1 = obj1 + " " + obj } - for (x <- pred_obj2) { + for (x <- predObj2) { val pred = x.split(":").head val obj = x.split(":").last - if (common_pred.contains(pred)) + if (commonPred.contains(pred)) obj2 = obj2 + " " + obj } - val sub_obj1 = obj1.trim().split(" ").toList.distinct - val sub_obj2 = obj2.trim().split(" ").toList.distinct + val subObj1 = obj1.trim().split(" ").toList.distinct + val subObj2 = obj2.trim().split(" ").toList.distinct //Compute jaccard similarity on the objects - val intersect_obj = sub_obj1.intersect(sub_obj2).length - val union_obj = sub_obj1.length + sub_obj2.length - intersect_obj + val intersectObj = subObj1.intersect(subObj2).length + val unionObj = subObj1.length + subObj2.length - intersectObj - val similarity = intersect_obj.toDouble / union_obj.toDouble + val similarity = intersectObj.toDouble / unionObj.toDouble (sub1, sub2, similarity) }) - ds_pred.unpersist() + dsPred.unpersist() // Extract entity matches with similarity more than threshold_objects, specified by user - val results = mapped_objects.filter(f => f._3 >= threshold_object) + val results = mappedObjects.filter(f => f._3 >= thresholdObject) results } @@ -320,43 +309,31 @@ class EntityResolution_RDFData_CountVectorizer(spark: SparkSession, triplesRDD1: * @param output_path - path to save the result rdd */ def evaluation(result: RDD[(String, String, Double)]): RDD[(String, String)] = { - val predicted_rdd = result.map(f => { + val predictedRDD = result.map(f => { (f._1, f._2) }) - val teacher_rdd = teacher.rdd - val actual_rdd = teacher_rdd.map(f => { + val teacherRDD = teacher.rdd + val actualRDD = teacherRDD.map(f => { (f.get(0).toString(), f.get(1).toString()) }) //Calculate TruePostives for precision, recall and f1-measure - val truePositives = actual_rdd.intersection(predicted_rdd).count - println("***************************************************************************************") - println("***************************************************************************************") + val truePositives = actualRDD.intersection(predictedRDD).count + consoleLog.info("***************************************************************************************") - val actual = actual_rdd.count() - val results = predicted_rdd.count() - println("Actual: " + actual) - println("Predicted: " + results) - println("True Positives: " + truePositives) + val actual = actualRDD.count() + val results = predictedRDD.count() val precision = (truePositives * 100.00) / (results) - println("Precision: " + precision) + consoleLog.info(s"Precision: $precision") val recall = (truePositives * 100) / (actual) - println("Recall: " + recall) - val f1_measure = (2 * precision * recall) / (precision + recall) - println("F1-measure: " + f1_measure) - println("***************************************************************************************") - println("***************************************************************************************") + consoleLog.info(s"Recall: $recall") + val fMeasure = (2 * precision * recall) / (precision + recall) + consoleLog.info(s"F-Measure: $fMeasure") + consoleLog.info("***************************************************************************************") + //Save the output_rdd - predicted_rdd.coalesce(1).saveAsTextFile(output_path) - println("Output Saved!") + predictedRDD.coalesce(1).saveAsTextFile(outputPath) - predicted_rdd + predictedRDD } } - -object EntityResolution_RDFData_CountVectorizer { - def apply(spark: SparkSession, triplesRDD1: RDD[Triple], triplesRDD2: RDD[Triple], - teacher: DataFrame, threshold_subject: Double, jsimilarity_predicate: Double, - threshold_object: Double, vocab_size: Long, output_path: String): EntityResolution_RDFData_CountVectorizer = new EntityResolution_RDFData_CountVectorizer(spark, triplesRDD1, triplesRDD2, - teacher, threshold_subject, jsimilarity_predicate, threshold_object, vocab_size, output_path) -} diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala new file mode 100644 index 0000000..c86cd64 --- /dev/null +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala @@ -0,0 +1,41 @@ +package net.sansa_stack.ml.spark.entity_resolution + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.DataFrame +import org.apache.spark.rdd.RDD +import org.apache.jena.graph.Triple +import org.apache.spark.ml.feature.CountVectorizer +import java.io.Serializable + +class ERCountVectorizer(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RDD[Triple], + teacher: DataFrame, thresholdSubject: Double, jsimilarityPredicate: Double, + thresholdObject: Double, vocabSize: Long, outputPath: String) extends Commons(spark, sourceData1, sourceData2, + teacher, thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize, outputPath) with Serializable { + + /** + * This api vectroises the entity subjects tokenised to form features + * + * Apply CountVectorizer vectorisation on the tokenised subjects, setting our setVocabSize means that in our dictionary we will be adding approximately terms<=vocab_size. Terms are in the inp_column + * + * @param inp_col specifies the input column for vectorisation + * @param out_col specifies the output column containing features + * data1 and data2 are dataframes containing the tokenised subjects + * @return Dataframes with vectorised features i.e. tokenised subjects are vectorised here + */ + override def vectorise(inpCol: String, outCol: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) = { + val data = data1.union(data2).distinct() + val countVectorizer = new CountVectorizer().setInputCol(inpCol).setOutputCol(outCol).setVocabSize(vocabSize.toInt).setMinDF(1).fit(data) + val featuredEntitiesDf1 = countVectorizer.transform(data1) + val featuredEntitiesDf2 = countVectorizer.transform(data2) + return (featuredEntitiesDf1, featuredEntitiesDf2) + } + + run +} + +object ERCountVectorizer { + def apply(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RDD[Triple], + teacher: DataFrame, thresholdSubject: Double, jsimilarityPredicate: Double, + thresholdObject: Double, vocabSize: Long, outputPath: String): ERCountVectorizer = new ERCountVectorizer(spark, sourceData1, sourceData2, + teacher, thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize, outputPath) +} diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala new file mode 100644 index 0000000..064832f --- /dev/null +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala @@ -0,0 +1,41 @@ +package net.sansa_stack.ml.spark.entity_resolution + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.DataFrame +import org.apache.spark.rdd.RDD +import org.apache.jena.graph.Triple +import org.apache.spark.ml.feature.HashingTF +import java.io.Serializable + +class ERHashingTF(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RDD[Triple], + teacher: DataFrame, thresholdSubject: Double, jsimilarityPredicate: Double, + thresholdObject: Double, vocabSize: Long, outputPath: String) extends Commons(spark, sourceData1, sourceData2, + teacher, thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize, outputPath) with Serializable { + + /** + * This api vectroises the entity subjects tokenised to form features + * + * Apply HashingTF vectorisation on the tokenised subjects, setting our setNumFeatures means that it would probably encounter those many different terms/words in the inp_column + * We try to avoid collisions by keeping this value high. + * + * @param inp_col specifies the input column for vectorisation + * @param out_col specifies the output column containing features + * data1 and data2 are dataframes containing the tokenised subjects + * @return Dataframes with vectorised features i.e. tokenised subjects are vectorised here + */ + override def vectorise(inpCol: String, outCol: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) = { + val hashingTf = new HashingTF().setInputCol(inpCol).setOutputCol(outCol).setNumFeatures(vocabSize.toInt) + val featuredEntitiesDf1 = hashingTf.transform(data1) + val featuredEntitiesDf2 = hashingTf.transform(data2) + return (featuredEntitiesDf1, featuredEntitiesDf2) + } + + run +} + +object ERHashingTF { + def apply(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RDD[Triple], + teacher: DataFrame, thresholdSubject: Double, jsimilarityPredicate: Double, + thresholdObject: Double, vocabSize: Long, outputPath: String): ERHashingTF = new ERHashingTF(spark, sourceData1, sourceData2, + teacher, thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize, outputPath) +} diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_HashingTF.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_HashingTF.scala deleted file mode 100644 index e24ec6c..0000000 --- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/EntityResolution_RDFData_HashingTF.scala +++ /dev/null @@ -1,355 +0,0 @@ -package net.sansa_stack.ml.spark.entity_resolution - -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.DataFrame -import org.apache.spark.rdd.RDD -import org.apache.jena.graph.Triple -import org.apache.spark.ml.feature.{ Tokenizer, HashingTF } -import org.apache.spark.ml.feature.CountVectorizer -import org.apache.spark.ml.feature.MinHashLSH -import org.apache.spark.ml.feature.MinHashLSHModel -import org.apache.spark.sql.Dataset -import org.apache.spark.sql.functions._ -import org.apache.spark.ml.feature.RegexTokenizer -import org.apache.spark.sql.Row -import org.apache.spark.RangePartitioner -import org.apache.spark.storage.StorageLevel -import org.apache.spark.RangePartitioner -import org.apache.spark.RangePartitioner - -/* A generic Entity Resolution Approach for larger datasets(for e.g. 24.8GB) - * Dataset1 has 47 million entities, approx 19GB - * Dataset2 has 19 million entities, approx 5.8GB - * */ -class EntityResolution_RDFData_HashingTF(spark: SparkSession, triplesRDD1: RDD[Triple], triplesRDD2: RDD[Triple], - teacher: DataFrame, threshold_subject: Double, jsimilarity_predicate: Double, - threshold_object: Double, vocab_size: Long, output_path: String) extends Serializable { - - /** - * Simple Api to call other apis - * Execute entity profile generation - * Step1- Find matching entities based on LSH subjects, by lsh_subjects api - * Step2- Compare the predicates for matched entities, by get_similar_predicates api - * Step3- Compare the objects for intersecting predicates in matched entities, by get_similar_objects api - */ - def run(): RDD[(String, String)] = { - - // Define entity profiles from triplesRDD1 and triplesRDD2 - val entity_profiles1 = get_entity_profiles(triplesRDD1) - val entity_profiles2 = get_entity_profiles(triplesRDD2) - - // Similar entities matched based on subjects - val ds_subjects1: RDD[(String, String, String, String)] = lsh_subjects(entity_profiles1, entity_profiles2) - val ds_subjects = ds_subjects1.repartition(600).persist(StorageLevel.MEMORY_AND_DISK) - - // Compare the predicates for matched entities by subject - val refined_data_pred = get_similar_predicates(ds_subjects) - val ds_predicates = refined_data_pred.repartition(400).persist(StorageLevel.MEMORY_AND_DISK) - - // Compare the objects for intersecting predicates in matched entities by predicate level knowledge - val refined_objects = get_similarity_objects(ds_predicates) - - //Evaluate our results with groundtruth data - val output = evaluation(refined_objects, teacher, output_path) - - output - } - - /** - * Filters triples and defines entity profiles. - * The triplesRDD needs filteration. We perform data cleansing by removing common wikilinks predicates listed in removePredicates List - * and owl:sameas triples - * This api further uses map partition broadcasting the List removePredicates, to find triples containing the listed predicates in distinct_triples rdd - * Finally, subtract those triples to get filtered_triples RDD - * Consider only triples with objects as URI or with "en" literal language or no literal language - * Group all triples of a particular subject to form the entity profiles in the format: - * - * - * @param triplesRDD contains RDF data in the form of triples - * @return ([(subject,predicate1:object1 , predicate2:object2 , ... , predicaten:objectn)]), - * where subject is the key and the group of paired predicate:object forms the value - * ex:- (Budapest_City, areaCode:1 , country:Hungary) - */ - def get_entity_profiles(triplesRDD: RDD[Triple]): RDD[(String, String)] = { - - //predicates to be filtered out from triples - val removePredicates: List[String] = List("owl:sameas", "wikiPageID", "wikiPageRevisionID", "wikiPageRevisionLink", - "wikiPageUsesTemplate", "wikiPageHistoryLink", "wikiPageExternalLink", "wikiPageEditLink", "wikiPageExtracted", - "wikiPageLength", "wikiPageModified", "wikiPageOutDegree", "wikiPageRedirects") - - val broadcastVar = spark.sparkContext.broadcast(removePredicates) // broadcast here small RDD - val distinct_triples = triplesRDD.distinct() - val remove_triples = distinct_triples.mapPartitions({ f => - val k = broadcastVar.value - for { - x <- f - z <- k - if x.getPredicate.getURI().contains(z)) - } yield (x) - - }) - - val filtered_triples = distinct_triples.subtract(remove_triples) - - //Define entity profiles - val entity = filtered_triples.filter(f => (f.getObject().isURI() || f.getObject.getLiteralLanguage == "en" || f.getObject.getLiteralLanguage == "")) - .map(f => { - val key = f.getSubject.getURI.split("/").last.trim() - val pred = f.getPredicate.getURI.split(Array('/', '#')).last.trim() - if (f.getObject.isURI()) { - val obj = f.getObject.getURI.split("/").last.trim() - val value = pred + ":" + obj //predicate and object are seperated by ':' - (key, value) - } else { - val obj = f.getObject.getLiteral.toString().split(Array('^', '@')).head.trim() - val value = pred + ":" + obj.replace(":", "") - (key, value) - } - }).reduceByKey(_ + " , " + _) // triples seperated by ' , ' - return entity - } - - /** - * This api matches similar entities based on similarity of their subjects - * Get subject data from entites_RDD1 and entites_RDD2. - * Tokenise it by "_" to form ent_sub1 and ent_sub2 for comparison - * ex:- (Budapest_City, Set(Budapest, City)) - * - * Apply LSH technique on the tokenised subjects to get matched pairs on threshold_subject, specified by user - * Join the predicate:object knowledge for each of the entity matches returned - * - * @param entites_RDD1 and entitites_RDD2 contains the entity profiles, generated from get_entity_profiles, to be compared for match - * @param threshold_subject - the similarity threshold set for approxsimilarityjoin - * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject , entity2_predicates:objectspairs)]), - * where entity1_subject and entity2_subject are the matched pairs - */ - def lsh_subjects(entites_RDD1: RDD[(String, String)], entities_RDD2: RDD[(String, String)]): RDD[(String, String, String, String)] = { - - //Get subject data and tokenise it - val ent_sub1 = entites_RDD1.map(f => { (f._1, f._1.split("_")) }) - val part_rdd1 = new RangePartitioner(400, ent_sub1) - val partitioned_rdd1 = ent_sub1.partitionBy(part_rdd1).persist(StorageLevel.MEMORY_AND_DISK) - - val ent_sub2 = entities_RDD2.map(f => { (f._1, f._1.split("_")) }) - val part_rdd2 = new RangePartitioner(400, ent_sub2) - val partitioned_rdd2 = ent_sub2.partitionBy(part_rdd2).persist(StorageLevel.MEMORY_AND_DISK) - - val entities_Df1 = spark.createDataFrame(partitioned_rdd1).toDF("entities", "ent_sub") - val entities_Df2 = spark.createDataFrame(partitioned_rdd2).toDF("entities", "ent_sub") - - //Apply LSH technique by HashingTF vectorisation - val (hashfeatured_entities_Df1: DataFrame, hashfeatured_entities_Df2: DataFrame) = applyHashingTf_sub("ent_sub", "features", entities_Df1, entities_Df2) - val (model_sub: MinHashLSHModel, transformed_sub_Df1: DataFrame, transformed_sub_Df2: DataFrame) = minHashLSH(hashfeatured_entities_Df1, hashfeatured_entities_Df2) - val ds_subjects = approxsimilarityjoin(model_sub, transformed_sub_Df1, transformed_sub_Df2) - - //Combine predicate:object level knowledge for the matched pairs - val ds_subjects_rdd = ds_subjects.rdd - val ds_subjects_data1 = ds_subjects_rdd.map(f => { (f.get(0).toString(), f.get(1).toString()) }).join(entites_RDD1) - val ds_subjects_data2 = ds_subjects_data1.map(f => { (f._2._1, (f._1, f._2._2)) }).join(entities_RDD2) - val ds_subjects_data = ds_subjects_data2.map(f => { (f._2._1._1, f._2._1._2, f._1, f._2._2) }) - return ds_subjects_data - } - - /** - * This api vectroises the entity subjects tokenised to form features - * - * Apply HashingTF vectorisation on the tokenised subjects, setting our setNumFeatures means that it would probably encounter those many different terms/words in the inp_column - * We try to avoid collisions by keeping this value high. - * - * @param inp_col specifies the input column for vectorisation - * @param out_col specifies the output column containing features - * data1 and data2 are dataframes containing the tokenised subjects - * @return Dataframes with vectorised features i.e. tokenised subjects are vectorised here - */ - def applyHashingTf_sub(inp_col: String, out_col: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) = { - val hashingTf = new HashingTF().setInputCol(inp_col).setOutputCol(out_col).setNumFeatures(vocab_size.toInt) - val hashfeatured_entities_Df1 = hashingTf.transform(data1) - val hashfeatured_entities_Df2 = hashingTf.transform(data2) - return (hashfeatured_entities_Df1, hashfeatured_entities_Df2) - } - - /** - * This api MinHashes the featured entity subjects - * - * setting our setNumHashTables to 3 means 3 hashvalues to be generated for each feature - * - * @param featured_entites_Df1 and featured_entites_Df2 specifies the featured dataframes generated by applyHashingTf_sub api - * @return MinHashLSH model with Dataframes containing minhashes generated for the features - */ - def minHashLSH(featured_entites_Df1: DataFrame, featured_entites_Df2: DataFrame): (MinHashLSHModel, DataFrame, DataFrame) = { - val mh = new MinHashLSH().setNumHashTables(3).setInputCol("features").setOutputCol("hashed_values") - val featured_data = featured_entites_Df1.union(featured_entites_Df2).distinct() - val model = mh.fit(featured_data) - val transformed_entities_Df1 = model.transform(featured_entites_Df1) - val transformed_entities_Df2 = model.transform(featured_entites_Df2) - return (model, transformed_entities_Df1, transformed_entities_Df2) - } - - /** - * This api applies approxsimilarity join to detect entity matches with subject similarity - * - * Applying approxSimilarityJoin with threshold specified by user on subjects - * A lower threshold means the entity matches found are closely related - * - * @param model - MinHashLSHModel generated by minHashLSH api - * @param df1 and df2 specifies the dataframes, generated by minHashLSH api - * @param threshold- threshold for subject similarity specified by user - * @return matched entity pairs - */ - def approxsimilarityjoin(model: MinHashLSHModel, df1: DataFrame, df2: DataFrame): DataFrame = { - val dataset = model.approxSimilarityJoin(df1, df2, threshold_subject) - val refined_ds = dataset.select(col("datasetA.entities").alias("entity1"), col("datasetB.entities").alias("entity2")) //only for lsh1subjects - return refined_ds - } - - /** - * This api compares predicate level knowledge of similar entities matched pairs generated by lsh_subjects api - * Compute jaccard similarity on the predicates of paired entity matches - * Filter the entity matches with similarity more than jSimilarity, specified by user - * - * @param similar_subj_rdd contains the entity matches based on subjects with intergated attribute level knowledge, generated from lsh_subjects - * @param jSimilartiy - the Jaccard similarity threshold set for predicate level comparison - * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject, entity2_predicates:objectspairs, intersecting_predicates, jsimilarityofpredicates)]), - * where entity1_subject and entity2_subject are the matched pairs on predicate level knowledge - */ - def get_similar_predicates(similar_subj_rdd: RDD[(String, String, String, String)]): RDD[(String, List[String], String, List[String], List[String], Double)] = { - val refined_data_sub = similar_subj_rdd.map(f => { - val sub1 = f._1 // entity1_subject - val s_data1 = f._2 // entity1_predicateobject_pairs - val sub2 = f._3 // entity2_subject - val s_data2 = f._4 //// entity2_predicateobject_pairs - - //segregate each of the predicate_object pairs for both the entities - val pred_obj1 = s_data1.split(" , ").toList - val pred_obj2 = s_data2.split(" , ").toList - - //empty lists for predicates - var list_pred1 = List[String]() - var list_pred2 = List[String]() - - //extract only predicates from the predicate_object for both entities for comparison - for (x <- pred_obj1) { - list_pred1 = list_pred1 :+ x.split(":").head - } - for (x <- pred_obj2) { - list_pred2 = list_pred2 :+ x.split(":").head - } - - //Find common predicates among the entities - val intersect_pred = list_pred1.intersect(list_pred2) - val union_pred = list_pred1.length + list_pred2.length - intersect_pred.length - - //calculate jaccard similarity on predicate level knowledge of both entities for comparison - val similarity = intersect_pred.length.toDouble / union_pred.toDouble - - (sub1, pred_obj1, sub2, pred_obj2, intersect_pred, similarity) - }) - similar_subj_rdd.unpersist() - - //filter the entity pairs with jaccard similarities that fit or are above user defined jsimilarity for predicate level knoledge comparison - val refined_data_pred = refined_data_sub.filter(f => f._6 >= jSimilartiy_predicate) - return refined_data_pred - } - - /** - * This api removes false positives by compares object level knowledge of similar entities matched pairs generated by get_similar_predicates api - * Compute jaccard similarity on the objects of paired entity matches, only for the intersecting predicates - * Filter the entity matches with similarity more than threshold_objects, specified by user - * - * @param ds_pred contains the entity matches based on predicate level knowledge - * @param threshold_objects - the Jaccard similarity threshold set for object level comparison - * @return ([(entity1_subject, entity2_subject, jsimilarityofobjects)]), - * where entity1_subject and entity2_subject are the matched pairs on object level knowledge - */ - def get_similarity_objects(ds_pred: RDD[(String, List[String], String, List[String], List[String], Double)]): RDD[(String, String, Double)] = { - val mapped_objects = ds_pred.map(f => { - val sub1 = f._1 // entity1_subject - val pred_obj1 = f._2 // entity1_predicateobject_pairs - val sub2 = f._3 // entity2_subject - val pred_obj2 = f._4 // entity2_predicateobject_pairs - val common_pred = f._5 // intersecting_predicates of both entites for comparing their objects - - var obj1: String = " " - var obj2: String = " " - - // Segregate objects of only intersecting predicates among the two entities - for (x <- pred_obj1) { - val pred = x.split(":").head - val obj = x.split(":").last - if (common_pred.contains(pred)) - obj1 = obj1 + " " + obj - } - - for (x <- pred_obj2) { - val pred = x.split(":").head - val obj = x.split(":").last - if (common_pred.contains(pred)) - obj2 = obj2 + " " + obj - } - - val sub_obj1 = obj1.trim().split(" ").toList.distinct - val sub_obj2 = obj2.trim().split(" ").toList.distinct - - //Compute jaccard similarity on the objects - val intersect_obj = sub_obj1.intersect(sub_obj2).length - val union_obj = sub_obj1.length + sub_obj2.length - intersect_obj - - val similarity = intersect_obj.toDouble / union_obj.toDouble - - (sub1, sub2, similarity) - }) - ds_pred.unpersist() - // Extract entity matches with similarity more than threshold_objects, specified by user - val results = mapped_objects.filter(f => f._3 >= threshold_object) - return results - } - - /** - * This api evaluates our results by comparing it with groundtruth - * Compute Precision, Recall and F1-Measure - * - * @param result contains the entity matches predicted by our algorithm, generated from get_similarity_objects api - * @param teacher - the groundtruth for comparison - * @param output_path - path to save the result rdd - */ - def evaluation(result: RDD[(String, String, Double)]): RDD[(String, String)] = { - val predicted_rdd = result.map(f => { - (f._1, f._2) - }) - val teacher_rdd = teacher.rdd - val actual_rdd = teacher_rdd.map(f => { - (f.get(0).toString(), f.get(1).toString()) - }) - - //Calculate TruePostives for precision, recall and f1-measure - val truePositives = actual_rdd.intersection(predicted_rdd).count - println("***************************************************************************************") - println("***************************************************************************************") - - val actual = actual_rdd.count() - val results = predicted_rdd.count() - println("Actual: " + actual) - println("Predicted: " + results) - println("True Positives: " + truePositives) - val precision = (truePositives * 100.00) / (results) - println("Precision: " + precision) - val recall = (truePositives * 100) / (actual) - println("Recall: " + recall) - val f1_measure = (2 * precision * recall) / (precision + recall) - println("F1-measure: " + f1_measure) - println("***************************************************************************************") - println("***************************************************************************************") - //Save the output_rdd - predicted_rdd.coalesce(1).saveAsTextFile(output_path) - println("Output Saved!") - - predicted_rdd - } -} - -object EntityResolution_RDFData_HashingTF { - def apply(spark: SparkSession, triplesRDD1: RDD[Triple], triplesRDD2: RDD[Triple], - teacher: DataFrame, threshold_subject: Double, jsimilarity_predicate: Double, - threshold_object: Double, vocab_size: Long, output_path: String): EntityResolution_RDFData_HashingTF = new EntityResolution_RDFData_HashingTF(spark, triplesRDD1, triplesRDD2, - teacher, threshold_subject, jsimilarity_predicate, threshold_object, vocab_size, output_path) -} diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/predicatesfilteration.txt b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/predicatesfilteration.txt new file mode 100644 index 0000000..6c90dfc --- /dev/null +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/predicatesfilteration.txt @@ -0,0 +1,13 @@ +owl:sameas +wikiPageID +wikiPageRevisionID +wikiPageRevisionLink +wikiPageUsesTemplate +wikiPageHistoryLink +wikiPageExternalLink +wikiPageEditLink +wikiPageExtracted +wikiPageLength +wikiPageModified +wikiPageOutDegree +wikiPageRedirects \ No newline at end of file From 5e5aace898753253e7b5610741d926e7bddbb5e5 Mon Sep 17 00:00:00 2001 From: Amrit Kaur Date: Thu, 13 Feb 2020 00:15:05 +0100 Subject: [PATCH 3/7] Suggested changes made --- .../src/main/resources/application.conf | 5 + .../ml/spark/entity_resolution/Commons.scala | 212 ++++-------- .../entity_resolution/ERCountVectorizer.scala | 31 +- .../spark/entity_resolution/ERHashingTF.scala | 29 +- .../predicatesfilteration.txt | 13 - .../resources/entity_resolution/source1.nt | 325 ++++++++++++++++++ .../resources/entity_resolution/source2.nt | 281 +++++++++++++++ .../ml/spark/entity_resolution/ERTests.scala | 50 +++ 8 files changed, 762 insertions(+), 184 deletions(-) create mode 100644 sansa-ml-spark/src/main/resources/application.conf delete mode 100644 sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/predicatesfilteration.txt create mode 100644 sansa-ml-spark/src/test/resources/entity_resolution/source1.nt create mode 100644 sansa-ml-spark/src/test/resources/entity_resolution/source2.nt create mode 100644 sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala diff --git a/sansa-ml-spark/src/main/resources/application.conf b/sansa-ml-spark/src/main/resources/application.conf new file mode 100644 index 0000000..8061177 --- /dev/null +++ b/sansa-ml-spark/src/main/resources/application.conf @@ -0,0 +1,5 @@ +sansa.entity_resolution.partitions = 400 +sansa.entity_resolution.repartition_number = 600 +sansa.entity_resolution.removePredicatesList =["owl:sameas", "wikiPageID", "wikiPageRevisionID", "wikiPageRevisionLink", + "wikiPageUsesTemplate", "wikiPageHistoryLink", "wikiPageExternalLink", "wikiPageEditLink", "wikiPageExtracted", + "wikiPageLength", "wikiPageModified", "wikiPageOutDegree", "wikiPageRedirects"] \ No newline at end of file diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala index 91ef919..080d1e4 100644 --- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala @@ -1,34 +1,28 @@ package net.sansa_stack.ml.spark.entity_resolution -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.DataFrame -import org.apache.spark.rdd.RDD +import java.io.Serializable + import org.apache.jena.graph.Triple -import org.apache.spark.ml.feature.Tokenizer +import org.apache.spark.RangePartitioner import org.apache.spark.ml.feature.MinHashLSH import org.apache.spark.ml.feature.MinHashLSHModel +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.DataFrame import org.apache.spark.sql.Dataset -import org.apache.spark.sql.functions._ -import org.apache.spark.ml.feature.RegexTokenizer import org.apache.spark.sql.Row -import org.apache.spark.RangePartitioner +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions._ import org.apache.spark.storage.StorageLevel -import org.apache.spark.RangePartitioner -import org.apache.spark.RangePartitioner import scala.io.Source -import java.io.Serializable -/* A generic Entity Resolution Approach for larger datasets(for e.g. 24.8GB) +/** + * A generic Entity Resolution Approach for larger datasets(for e.g. 24.8GB) * Dataset1 has 47 million entities, approx 19GB * Dataset2 has 19 million entities, approx 5.8GB - * */ + */ abstract class Commons(val spark: SparkSession, val sourceData1: RDD[Triple], val sourceData2: RDD[Triple], - val teacher: DataFrame, val thresholdSubject: Double, val jsimilarityPredicate: Double, - val thresholdObject: Double, val vocabSize: Long, val outputPath: String) extends Serializable { - -@transient lazy val consoleLog: Logger = Logger.getLogger(getClass.getName) -private val PARTITIONS = 400 -private val REPARTITION_NUMBER = 600 + val thresholdSubject: Double, val jsimilarityPredicate: Double, + val thresholdObject: Double, val vocabSize: Long) extends Serializable { /** * Execute entity profile generation @@ -36,30 +30,27 @@ private val REPARTITION_NUMBER = 600 * Step2- Compare the predicates for matched entities, by get_similar_predicates api * Step3- Compare the objects for intersecting predicates in matched entities, by get_similar_objects api */ - def run(): RDD[(String, String)] = { - + def run(): RDD[(String, String, Double)] = { + import com.typesafe.config._ + val conf = ConfigFactory.load("application.conf") + val rpredicates = conf.getStringList("sansa.entity_resolution.removePredicatesList") + import scala.collection.JavaConverters._ + val removePredicates = rpredicates.asScala.toList + val partitions = conf.getInt("sansa.entity_resolution.partitions") + val repartitionNumber = conf.getInt("sansa.entity_resolution.repartition_number") // Define entity profiles from triplesRDD1 and triplesRDD2 - val entityProfiles1 = getEntityProfiles(sourceData1) - val entityProfiles2 = getEntityProfiles(sourceData2) - + val entityProfiles1 = getEntityProfiles(sourceData1, removePredicates) + val entityProfiles2 = getEntityProfiles(sourceData2, removePredicates) // Similar entities matched based on subjects - val dsSubjects1: RDD[(String, String, String, String)] = lshSubjects(entityProfiles1, entityProfiles2) - val dsSubjects = dsSubjects1.repartition(REPARTITION_NUMBER).persist(StorageLevel.MEMORY_AND_DISK) - + val dsSubjects1: RDD[(String, String, String, String)] = lshSubjects(entityProfiles1, entityProfiles2, partitions) + val dsSubjects = dsSubjects1.repartition(repartitionNumber).persist(StorageLevel.MEMORY_AND_DISK) // Compare the predicates for matched entities by subject val refinedDataPred = getSimilarPredicates(dsSubjects) - val dsPredicates = refinedDataPred.repartition(PARTITIONS).persist(StorageLevel.MEMORY_AND_DISK) - + val dsPredicates = refinedDataPred.repartition(partitions).persist(StorageLevel.MEMORY_AND_DISK) // Compare the objects for intersecting predicates in matched entities by predicate level knowledge - val refinedObjects = getSimilarityObjects(dsPredicates) - - //Evaluate our results with groundtruth data - val output = evaluation(refinedObjects) - + val output = getSimilarityObjects(dsPredicates) output - - } - + } /** * Filters triples and defines entity profiles. * The triplesRDD needs filteration. We perform data cleansing by removing common wikilinks predicates listed in removePredicates List @@ -69,22 +60,13 @@ private val REPARTITION_NUMBER = 600 * Consider only triples with objects as URI or with "en" literal language or no literal language * Group all triples of a particular subject to form the entity profiles in the format: * - * * @param triplesRDD contains RDF data in the form of triples - * @return ([(subject,predicate1:object1 , predicate2:object2 , ... , predicaten:objectn)]), + * @return ([(subject,predicate1:object1 , predicate2:object2 , ... , predicaten:objectn)]), * where subject is the key and the group of paired predicate:object forms the value * ex:- (Budapest_City, areaCode:1 , country:Hungary) */ - def getEntityProfiles(sourceData: RDD[Triple]): RDD[(String, String)] = { - - //predicates to be filtered out from triples - /** - * val rPredicates: List[String] = List("owl:sameas", "wikiPageID", "wikiPageRevisionID", "wikiPageRevisionLink", - "wikiPageUsesTemplate", "wikiPageHistoryLink", "wikiPageExternalLink", "wikiPageEditLink", "wikiPageExtracted", - "wikiPageLength", "wikiPageModified", "wikiPageOutDegree", "wikiPageRedirects") - * */ - val filename = "predicatesfilteration.txt" - val removePredicates: List[String] = Source.fromFile(filename).getLines.toList + def getEntityProfiles(sourceData: RDD[Triple], removePredicates: List[String]): RDD[(String, String)] = { + // predicates to be filtered out from triples val broadcastVar = spark.sparkContext.broadcast(removePredicates) // broadcast here small RDD val distinctTriples = sourceData.distinct() val removeTriples = distinctTriples.mapPartitions({ f => @@ -92,81 +74,68 @@ private val REPARTITION_NUMBER = 600 for { x <- f z <- k - if x.getPredicate.getURI().contains(z)) + if x.getPredicate.getURI().contains(z) } yield (x) - - }) - - val filteredTriples = distinctTriples.subtract(removeTriples) - - //Define entity profiles - val entity = filteredTriples.filter(f => (f.getObject().isURI() || f.getObject.getLiteralLanguage == "en" || f.getObject.getLiteralLanguage == "")) + }) + val filteredTriples = distinctTriples.subtract(removeTriples) + // Define entity profiles + val entity = filteredTriples.filter(f => (f.getObject().isURI() || f.getObject.getLiteralLanguage == "en" || f.getObject.getLiteralLanguage == "")) .map(f => { - val key = f.getSubject.getURI.split("/").last.trim() - val pred = f.getPredicate.getURI.split(Array('/', '#')).last.trim() + val key = f.getSubject.getLocalName + val pred = f.getPredicate.getLocalName if (f.getObject.isURI()) { - val obj = f.getObject.getURI.split("/").last.trim() - val value = pred + ":" + obj //predicate and object are seperated by ':' + val obj = f.getObject.getLocalName + val value = pred + ":" + obj // predicate and object are seperated by ':' (key, value) } else { - val obj = f.getObject.getLiteral.toString().split(Array('^', '@')).head.trim() + val obj = f.getObject.getLiteralValue.toString() val value = pred + ":" + obj.replace(":", "") (key, value) } }).reduceByKey(_ + " , " + _) // triples seperated by ' , ' - entity } - /** * This api matches similar entities based on similarity of their subjects * Get subject data from entites_RDD1 and entites_RDD2. * Tokenise it by "_" to form ent_sub1 and ent_sub2 for comparison * ex:- (Budapest_City, Set(Budapest, City)) - * * Apply LSH technique on the tokenised subjects to get matched pairs on threshold_subject, specified by user * Join the predicate:object knowledge for each of the entity matches returned - * * @param entites_RDD1 and entitites_RDD2 contains the entity profiles, generated from get_entity_profiles, to be compared for match * @param threshold_subject - the similarity threshold set for approxsimilarityjoin - * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject , entity2_predicates:objectspairs)]), + * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject , entity2_predicates:objectspairs)]), * where entity1_subject and entity2_subject are the matched pairs */ - def lshSubjects(entitesRDD1: RDD[(String, String)], entitiesRDD2: RDD[(String, String)]): RDD[(String, String, String, String)] = { - - //Get subject data and tokenise it + def lshSubjects(entitesRDD1: RDD[(String, String)], entitiesRDD2: RDD[(String, String)], partitions: Int): RDD[(String, String, String, String)] = { + // Get subject data and tokenise it val entSub1 = entitesRDD1.map(f => { (f._1, f._1.split("_")) }) - val partrdd1 = new RangePartitioner(PARTITIONS, entSub1) + val partrdd1 = new RangePartitioner(partitions, entSub1) val partitionedrdd1 = entSub1.partitionBy(partrdd1).persist(StorageLevel.MEMORY_AND_DISK) val entSub2 = entitiesRDD2.map(f => { (f._1, f._1.split("_")) }) - val partrdd2 = new RangePartitioner(PARTITIONS, entSub2) + val partrdd2 = new RangePartitioner(partitions, entSub2) val partitionedrdd2 = entSub2.partitionBy(partrdd2).persist(StorageLevel.MEMORY_AND_DISK) val entitiesDf1 = spark.createDataFrame(partitionedrdd1).toDF("entities", "ent_sub") val entitiesDf2 = spark.createDataFrame(partitionedrdd2).toDF("entities", "ent_sub") - //Apply LSH technique by vectorisation through HashingTF or CountVectorizer + // Apply LSH technique by vectorisation through HashingTF or CountVectorizer val (featuredEntitiesDf1: DataFrame, featuredEntitiesDf2: DataFrame) = vectorise("ent_sub", "features", entitiesDf1, entitiesDf2) val (modelSub: MinHashLSHModel, transformedSubDf1: DataFrame, transformedSubDf2: DataFrame) = minHashLSH(featuredEntitiesDf1, featuredEntitiesDf2) val dsSubjects = approxSimilarityJoin(modelSub, transformedSubDf1, transformedSubDf2) - - //Combine predicate:object level knowledge for the matched pairs + // Combine predicate:object level knowledge for the matched pairs val dsSubjectsRDD = dsSubjects.rdd val dsSubjectsData1 = dsSubjectsRDD.map(f => { (f.get(0).toString(), f.get(1).toString()) }).join(entitesRDD1) val dsSubjectsData2 = dsSubjectsData1.map(f => { (f._2._1, (f._1, f._2._2)) }).join(entitiesRDD2) val dsSubjectsData = dsSubjectsData2.map(f => { (f._2._1._1, f._2._1._2, f._1, f._2._2) }) - dsSubjectsData - } + } - def vectorise(inpCol: String, outCol: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) //abstract method - - /** + def vectorise(inpCol: String, outCol: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) // abstract method + /** * This api MinHashes the featured entity subjects - * * setting our setNumHashTables to 3 means 3 hashvalues to be generated for each feature - * * @param featured_entites_Df1 and featured_entites_Df2 specifies the featured dataframes generated by applyHashingTf_sub api * @return MinHashLSH model with Dataframes containing minhashes generated for the features */ @@ -181,10 +150,8 @@ private val REPARTITION_NUMBER = 600 /** * This api applies approxsimilarity join to detect entity matches with subject similarity - * * Applying approxSimilarityJoin with threshold specified by user on subjects * A lower threshold means the entity matches found are closely related - * * @param model - MinHashLSHModel generated by minHashLSH api * @param df1 and df2 specifies the dataframes, generated by minHashLSH api * @param threshold- threshold for subject similarity specified by user @@ -192,7 +159,7 @@ private val REPARTITION_NUMBER = 600 */ def approxSimilarityJoin(model: MinHashLSHModel, df1: DataFrame, df2: DataFrame): DataFrame = { val dataset = model.approxSimilarityJoin(df1, df2, thresholdSubject) - val refinedDs = dataset.select(col("datasetA.entities").alias("entity1"), col("datasetB.entities").alias("entity2")) //only for lsh1subjects + val refinedDs = dataset.select(col("datasetA.entities").alias("entity1"), col("datasetB.entities").alias("entity2")) // only for lsh1subjects refinedDs } @@ -200,10 +167,9 @@ private val REPARTITION_NUMBER = 600 * This api compares predicate level knowledge of similar entities matched pairs generated by lsh_subjects api * Compute jaccard similarity on the predicates of paired entity matches * Filter the entity matches with similarity more than jSimilarity, specified by user - * * @param similar_subj_rdd contains the entity matches based on subjects with intergated attribute level knowledge, generated from lsh_subjects * @param jSimilartiy - the Jaccard similarity threshold set for predicate level comparison - * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject, entity2_predicates:objectspairs, intersecting_predicates, jsimilarityofpredicates)]), + * @return ([(entity1_subject, entity1_predicates:objectspairs, entity2_subject, entity2_predicates:objectspairs, intersecting_predicates, jsimilarityofpredicates)]), * where entity1_subject and entity2_subject are the matched pairs on predicate level knowledge */ def getSimilarPredicates(similarSubjRDD: RDD[(String, String, String, String)]): RDD[(String, List[String], String, List[String], List[String], Double)] = { @@ -211,38 +177,31 @@ private val REPARTITION_NUMBER = 600 val sub1 = f._1 // entity1_subject val sdata1 = f._2 // entity1_predicateobject_pairs val sub2 = f._3 // entity2_subject - val sdata2 = f._4 //// entity2_predicateobject_pairs - - //segregate each of the predicate_object pairs for both the entities - val predObj1 = sdata1.split(" , ").toList + val sdata2 = f._4 // entity2_predicateobject_pairs + // segregate each of the predicate_object pairs for both the entities + val predObj1 = sdata1.split(" , ").toList val predObj2 = sdata2.split(" , ").toList - - //empty lists for predicates + // empty lists for predicates var listPred1 = List[String]() var listPred2 = List[String]() - - //extract only predicates from the predicate_object for both entities for comparison + // extract only predicates from the predicate_object for both entities for comparison for (x <- predObj1) { listPred1 = listPred1 :+ x.split(":").head } for (x <- predObj2) { listPred2 = listPred2 :+ x.split(":").head } - - //Find common predicates among the entities + // Find common predicates among the entities val intersectPred = listPred1.intersect(listPred2) val unionPred = listPred1.length + listPred2.length - intersectPred.length - - //calculate jaccard similarity on predicate level knowledge of both entities for comparison + // calculate jaccard similarity on predicate level knowledge of both entities for comparison val similarity = intersectPred.length.toDouble / unionPred.toDouble (sub1, predObj1, sub2, predObj2, intersectPred, similarity) }) similarSubjRDD.unpersist() - - //filter the entity pairs with jaccard similarities that fit or are above user defined jsimilarity for predicate level knoledge comparison - val refinedDataPred = refinedDataSub.filter(f => f._6 >= jSimilartiyPredicate) - + // filter the entity pairs with jaccard similarities that fit or are above user defined jsimilarity for predicate level knoledge comparison + val refinedDataPred = refinedDataSub.filter(f => f._6 >= jsimilarityPredicate) refinedDataPred } @@ -250,10 +209,9 @@ private val REPARTITION_NUMBER = 600 * This api removes false positives by compares object level knowledge of similar entities matched pairs generated by get_similar_predicates api * Compute jaccard similarity on the objects of paired entity matches, only for the intersecting predicates * Filter the entity matches with similarity more than threshold_objects, specified by user - * * @param ds_pred contains the entity matches based on predicate level knowledge * @param threshold_objects - the Jaccard similarity threshold set for object level comparison - * @return ([(entity1_subject, entity2_subject, jsimilarityofobjects)]), + * @return ([(entity1_subject, entity2_subject, jsimilarityofobjects)]), * where entity1_subject and entity2_subject are the matched pairs on object level knowledge */ def getSimilarityObjects(dsPred: RDD[(String, List[String], String, List[String], List[String], Double)]): RDD[(String, String, Double)] = { @@ -271,21 +229,23 @@ private val REPARTITION_NUMBER = 600 for (x <- predObj1) { val pred = x.split(":").head val obj = x.split(":").last - if (commonPred.contains(pred)) + if (commonPred.contains(pred)) { obj1 = obj1 + " " + obj + } } for (x <- predObj2) { val pred = x.split(":").head val obj = x.split(":").last - if (commonPred.contains(pred)) + if (commonPred.contains(pred)) { obj2 = obj2 + " " + obj + } } val subObj1 = obj1.trim().split(" ").toList.distinct val subObj2 = obj2.trim().split(" ").toList.distinct - //Compute jaccard similarity on the objects + // Compute jaccard similarity on the objects val intersectObj = subObj1.intersect(subObj2).length val unionObj = subObj1.length + subObj2.length - intersectObj @@ -296,44 +256,6 @@ private val REPARTITION_NUMBER = 600 dsPred.unpersist() // Extract entity matches with similarity more than threshold_objects, specified by user val results = mappedObjects.filter(f => f._3 >= thresholdObject) - results } - - /** - * This api evaluates our results by comparing it with groundtruth - * Compute Precision, Recall and F1-Measure - * - * @param result contains the entity matches predicted by our algorithm, generated from get_similarity_objects api - * @param teacher - the groundtruth for comparison - * @param output_path - path to save the result rdd - */ - def evaluation(result: RDD[(String, String, Double)]): RDD[(String, String)] = { - val predictedRDD = result.map(f => { - (f._1, f._2) - }) - val teacherRDD = teacher.rdd - val actualRDD = teacherRDD.map(f => { - (f.get(0).toString(), f.get(1).toString()) - }) - - //Calculate TruePostives for precision, recall and f1-measure - val truePositives = actualRDD.intersection(predictedRDD).count - consoleLog.info("***************************************************************************************") - - val actual = actualRDD.count() - val results = predictedRDD.count() - val precision = (truePositives * 100.00) / (results) - consoleLog.info(s"Precision: $precision") - val recall = (truePositives * 100) / (actual) - consoleLog.info(s"Recall: $recall") - val fMeasure = (2 * precision * recall) / (precision + recall) - consoleLog.info(s"F-Measure: $fMeasure") - consoleLog.info("***************************************************************************************") - - //Save the output_rdd - predictedRDD.coalesce(1).saveAsTextFile(outputPath) - - predictedRDD - } } diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala index c86cd64..c2597f5 100644 --- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala @@ -1,22 +1,24 @@ package net.sansa_stack.ml.spark.entity_resolution -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.DataFrame -import org.apache.spark.rdd.RDD +import java.io.Serializable + import org.apache.jena.graph.Triple import org.apache.spark.ml.feature.CountVectorizer -import java.io.Serializable +import org.apache.spark.ml.linalg.Vector +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.DataTypes class ERCountVectorizer(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RDD[Triple], - teacher: DataFrame, thresholdSubject: Double, jsimilarityPredicate: Double, - thresholdObject: Double, vocabSize: Long, outputPath: String) extends Commons(spark, sourceData1, sourceData2, - teacher, thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize, outputPath) with Serializable { + thresholdSubject: Double, jsimilarityPredicate: Double, + thresholdObject: Double, vocabSize: Long) extends Commons(spark, sourceData1, sourceData2, + thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize) with Serializable { /** * This api vectroises the entity subjects tokenised to form features - * * Apply CountVectorizer vectorisation on the tokenised subjects, setting our setVocabSize means that in our dictionary we will be adding approximately terms<=vocab_size. Terms are in the inp_column - * * @param inp_col specifies the input column for vectorisation * @param out_col specifies the output column containing features * data1 and data2 are dataframes containing the tokenised subjects @@ -25,8 +27,9 @@ class ERCountVectorizer(spark: SparkSession, sourceData1: RDD[Triple], sourceDat override def vectorise(inpCol: String, outCol: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) = { val data = data1.union(data2).distinct() val countVectorizer = new CountVectorizer().setInputCol(inpCol).setOutputCol(outCol).setVocabSize(vocabSize.toInt).setMinDF(1).fit(data) - val featuredEntitiesDf1 = countVectorizer.transform(data1) - val featuredEntitiesDf2 = countVectorizer.transform(data2) + val isNoneZeroVector = udf({v: Vector => v.numNonzeros > 0}, DataTypes.BooleanType) + val featuredEntitiesDf1 = countVectorizer.transform(data1).filter(isNoneZeroVector(col(outCol))) + val featuredEntitiesDf2 = countVectorizer.transform(data2).filter(isNoneZeroVector(col(outCol))) return (featuredEntitiesDf1, featuredEntitiesDf2) } @@ -35,7 +38,7 @@ class ERCountVectorizer(spark: SparkSession, sourceData1: RDD[Triple], sourceDat object ERCountVectorizer { def apply(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RDD[Triple], - teacher: DataFrame, thresholdSubject: Double, jsimilarityPredicate: Double, - thresholdObject: Double, vocabSize: Long, outputPath: String): ERCountVectorizer = new ERCountVectorizer(spark, sourceData1, sourceData2, - teacher, thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize, outputPath) + thresholdSubject: Double, jsimilarityPredicate: Double, + thresholdObject: Double, vocabSize: Long): ERCountVectorizer = new ERCountVectorizer(spark, sourceData1, sourceData2, + thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize) } diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala index 064832f..30e03cf 100644 --- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala @@ -1,16 +1,20 @@ package net.sansa_stack.ml.spark.entity_resolution -import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.DataFrame -import org.apache.spark.rdd.RDD +import java.io.Serializable + import org.apache.jena.graph.Triple import org.apache.spark.ml.feature.HashingTF -import java.io.Serializable +import org.apache.spark.ml.linalg.Vector +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.DataTypes class ERHashingTF(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RDD[Triple], - teacher: DataFrame, thresholdSubject: Double, jsimilarityPredicate: Double, - thresholdObject: Double, vocabSize: Long, outputPath: String) extends Commons(spark, sourceData1, sourceData2, - teacher, thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize, outputPath) with Serializable { + thresholdSubject: Double, jsimilarityPredicate: Double, + thresholdObject: Double, vocabSize: Long) extends Commons(spark, sourceData1, sourceData2, + thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize) with Serializable { /** * This api vectroises the entity subjects tokenised to form features @@ -25,8 +29,9 @@ class ERHashingTF(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RD */ override def vectorise(inpCol: String, outCol: String, data1: DataFrame, data2: DataFrame): (DataFrame, DataFrame) = { val hashingTf = new HashingTF().setInputCol(inpCol).setOutputCol(outCol).setNumFeatures(vocabSize.toInt) - val featuredEntitiesDf1 = hashingTf.transform(data1) - val featuredEntitiesDf2 = hashingTf.transform(data2) + val isNoneZeroVector = udf({v: Vector => v.numNonzeros > 0}, DataTypes.BooleanType) + val featuredEntitiesDf1 = hashingTf.transform(data1).filter(isNoneZeroVector(col(outCol))) + val featuredEntitiesDf2 = hashingTf.transform(data2).filter(isNoneZeroVector(col(outCol))) return (featuredEntitiesDf1, featuredEntitiesDf2) } @@ -35,7 +40,7 @@ class ERHashingTF(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RD object ERHashingTF { def apply(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RDD[Triple], - teacher: DataFrame, thresholdSubject: Double, jsimilarityPredicate: Double, - thresholdObject: Double, vocabSize: Long, outputPath: String): ERHashingTF = new ERHashingTF(spark, sourceData1, sourceData2, - teacher, thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize, outputPath) + thresholdSubject: Double, jsimilarityPredicate: Double, + thresholdObject: Double, vocabSize: Long): ERHashingTF = new ERHashingTF(spark, sourceData1, sourceData2, + thresholdSubject, jsimilarityPredicate, thresholdObject, vocabSize) } diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/predicatesfilteration.txt b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/predicatesfilteration.txt deleted file mode 100644 index 6c90dfc..0000000 --- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/predicatesfilteration.txt +++ /dev/null @@ -1,13 +0,0 @@ -owl:sameas -wikiPageID -wikiPageRevisionID -wikiPageRevisionLink -wikiPageUsesTemplate -wikiPageHistoryLink -wikiPageExternalLink -wikiPageEditLink -wikiPageExtracted -wikiPageLength -wikiPageModified -wikiPageOutDegree -wikiPageRedirects \ No newline at end of file diff --git a/sansa-ml-spark/src/test/resources/entity_resolution/source1.nt b/sansa-ml-spark/src/test/resources/entity_resolution/source1.nt new file mode 100644 index 0000000..91d30aa --- /dev/null +++ b/sansa-ml-spark/src/test/resources/entity_resolution/source1.nt @@ -0,0 +1,325 @@ + . + "8014125"^^ . + "May 2006 – current"^^ . + . + "741159510"^^ . + . + "Ali Baban"@en . + . + . + "current"^^ . + "Minister of Planning and Development Co-operation"^^ . + "male"@en . + . + "2007"^^ . + "2002"^^ . + "719121594"^^ . + . + "male"@en . + . + . + "Ali Babacan"@nl . + . + . + "1945916"^^ . + . + "Ali Babacan"@de . + "Ali"@en . + . + "1967-4-4"^^ . + . + . + "Chief Negotiator for Turkish Accession to the European Union" . + . + . + "Deputy Prime Minister of Turkey" . + "Minister of Economic Affairs" . + "Ali Babacan"@en . + . + . + . + . + . + . + . + . + "Hilmi Emir"^^ . + "Babacan"@en . + "Ali Babacan"@es . + "Ali Babacan"@fr . + . + . + . + . + . + "Ali Baba"@de . + . + "743734575"^^ . + . + "Ali Baba"@en . + . + . + . + "Ali Baba"@en . + . + . + "Al\u00ED Bab\u00E1"@es . + . + "\u0639\u0644\u064A \u0628\u0627\u0628\u0627 \u0648\u0627\u0644\u0623\u0631\u0628\u0639\u0648\u0646 \u0644\u0635\u0627"@ar . + "927344"^^ . + . + . + . + "\u963F\u91CC\u5DF4\u5DF4"@zh . + . + "0.03"^^ . + "1.17"^^ . + . + "0.026"^^ . + . + . + "162"^^ . + "22"^^ . + . + "\u9ED1\u8393"@zh . + "0.646"^^ . + "Species"^^ . + . + "5.3"^^ . + . + . + "The blackberry is an edible fruit produced by many species in the Rubus genus in the Rosaceae family, hybrids among these species within the Rubus subgenus, and hybrids between the Rubus and Idaeobatus subgenera. The taxonomy of the blackberries has historically been confused because of hybridization and apomixis, so that species have often been grouped together and called species aggregates. For example, the entire subgenus Rubus has been called the Rubus fruticosus aggregate, although the species R. fruticosus is considered a synonym of R. plicatus."@en . + "Ripe, ripening, and unripe blackberries,"^^ . + . + . + . + "La m\u00FBre, m\u00FBre sauvage ou parfois m\u00FBron est le fruit de la ronce commune, buisson \u00E9pineux tr\u00E8s envahissant du genre Rubus de la famille des Rosac\u00E9es comme le framboisier. En Suisse romande, en Haute-Savoie ainsi que dans le Nord-Pas-de-Calais, elle s'appelle aussi \u00AB meuron \u00BB. Les m\u00FBres sauvages sont pr\u00E9c\u00E9d\u00E9es de leurs fleurs, blanches ou blanc ros\u00E9, de deux \u00E0 trois centim\u00E8tres de diam\u00E8tre, regroup\u00E9es en corymbes ; elles ont cinq s\u00E9pales, cinq p\u00E9tales et de nombreuses \u00E9tamines."@fr . + . + . + "\u9ED1\u8393\u6A39\uFF0C\u6216\u7E3D\u7A31\u9ED1\u8393\uFF0C\u662F\u4E00\u7A2E\u5EE3\u6CDB\u5206\u5E03\u4E14\u77E5\u540D\u7684\u704C\u6728\uFF1B\u5C6C\u65BC\u6A39\u8393\u985E\uFF08\u8594\u8587\u79D1\u61F8\u9264\u5B50\u5C6C\uFF09\u7684\u690D\u7269\uFF0C\u53EF\u4EE5\u9577\u52303\u516C\u5C3A\uFF0810\u82F1\u544E\uFF09\u9AD8\uFF0C\u4E26\u751F\u7522\u8EDF\u8CEA\u7684\u679C\u5BE6\u2014\u2014\u5E38\u7528\u4F5C\u751C\u9EDE\u3001\u679C\u91AC\u3001\u7121\u5B50\u679C\u51CD\uFF0C\u6709\u6642\u4E5F\u7528\u4F86\u88FD\u6C34\u679C\u9152\u3002\u6709\u591A\u7A2E\u61F8\u9264\u5B50\u5C6C\u690D\u7269\u7A31\u4F5C\u9ED1\u8393\uFF0C\u800C\u4E14\u9019\u4E9B\u7A2E\u5F88\u5BB9\u6613\u767C\u751F\u96DC\u4EA4\uFF0C\u56E0\u6B64\u683D\u57F9\u7A2E\u7684\u7A2E\u6578\u6703\u8F03\u539F\u751F\u7A2E\u7684\u591A\u4E0A\u8A31\u591A\u3002\u9AD4\u578B\u8F03\u5C0F\u7684\u8FD1\u89AA\u2014\u2014\u6B27\u6D32\u6728\u8393\uFF08\u5B66\u540D\uFF1ARubus caesius\uFF09\u53EF\u4EE5\u900F\u904E\u5176\u679C\u5BE6\u4E0A\u767D\u8272\u81D8\u72C0\u7684\u5916\u5C64\u8207\u9ED1\u8393\u4F86\u4F5C\u5340\u5225\uFF0C\u800C\u4E14\u5B83\u5011\u7684\u5C0F\u6838\u679C\u6578\u91CF\u4E5F\u8F03\u5C11\u3002"@zh . + . + "180"^^ . + "744624673"^^ . + "*Rubus ursinus\n* Rubus laciniatus\u2014Evergreen blackberry\n*Rubus argutus\n* Rubus armeniacus\u2014Himalayan blackberry\n*Rubus plicatus\n*Rubus ulmifolius\nAnd hundreds more microspecies"^^ . + . + "25"^^ . + "29"^^ . + "De cultuurbraam, die geteeld wordt voor de vruchten, is ontstaan uit kruisingen tussen verschillende braamsoorten, waardoor er geen soortnaam aan gegeven kan worden. Meestal worden de rassen van de cultuurbraam daarom in Rubus sectie Moriferi gerangschikt. De beroepsmatige teelt van bramen in Nederland bedraagt ongeveer 20 ha met een aanvoer van ongeveer 160.000 kg. Teelt van bramen in Nederland: Naast verse consumptie worden bramen ook diepgevroren of verwerkt tot sap, tot bramen op lichte siroop en tot jam. De verse bramen zijn maar enkele dagen houdbaar."@nl . + . + "The blackberry is an edible fruit produced by many species in the Rubus genus in the Rosaceae family, hybrids among these species within the Rubus subgenus, and hybrids between the Rubus and Idaeobatus subgenera. The taxonomy of the blackberries has historically been confused because of hybridization and apomixis, so that species have often been grouped together and called species aggregates. For example, the entire subgenus Rubus has been called the Rubus fruticosus aggregate, although the species R. fruticosus is considered a synonym of R. plicatus."@en . + "Blackberries, raw"^^ . + "Rubus"^^ . + "M\u00FBre (fruit de la ronce)"@fr . + "0.53"^^ . + . + . + . + . + . + "1"^^ . + . + . + . + . + . + "Braam (cultuurbraam)"@nl . + . + . + "1.39"^^ . + "21"^^ . + . + . + "Mora es el nombre que reciben diversos frutos comestibles de distintas especies bot\u00E1nicas. Son frutas o bayas que, a pesar de proceder de especies vegetales completamente diferentes, poseen aspecto similar y caracter\u00EDsticas comunes. En ocasiones, las distintas moras pueden ser confundidas e incluso obviadas, dado que al usar la palabra mora para hablar de dicha fruta, puede hacerse referencia, simplificando, a dos tipos de bayas procedentes de dos g\u00E9neros distintos de vegetales con rasgos fenot\u00EDpicos muy dispares entre s\u00ED, el g\u00E9nero Morus y el g\u00E9nero Rubus. Ambos g\u00E9neros dan moras, pero no son la misma fruta, unas vienen de unos \u00E1rboles com\u00FAnmente llamados moreras y morales (que son del g\u00E9nero Morus), y las otras provienen de unas plantas sarmentosas y espinosas com\u00FAnmente llamadas zarzas "@es . + "De cultuurbraam, die geteeld wordt voor de vruchten, is ontstaan uit kruisingen tussen verschillende braamsoorten, waardoor er geen soortnaam aan gegeven kan worden. Meestal worden de rassen van de cultuurbraam daarom in Rubus sectie Moriferi gerangschikt. De beroepsmatige teelt van bramen in Nederland bedraagt ongeveer 20 ha met een aanvoer van ongeveer 160.000 kg. Teelt van bramen in Nederland: Naast verse consumptie worden bramen ook diepgevroren of verwerkt tot sap, tot bramen op lichte siroop en tot jam. De verse bramen zijn maar enkele dagen houdbaar."@nl . + . + "of an unidentified blackberry species\n220pxBlackberry flower, Rubus fruticosus species aggregate."^^ . + "Mora (fruta)"@es . + "Blackberry"@en . + . + "214"^^ . + "Blackberry"@en . + "4.88"^^ . + "19.8"^^ . + . + "0.02"^^ . + . + . + "72339"^^ . + "9.61"^^ . + . + . + "La m\u00FBre, m\u00FBre sauvage ou parfois m\u00FBron est le fruit de la ronce commune, buisson \u00E9pineux tr\u00E8s envahissant du genre Rubus de la famille des Rosac\u00E9es comme le framboisier. En Suisse romande, en Haute-Savoie ainsi que dans le Nord-Pas-de-Calais, elle s'appelle aussi \u00AB meuron \u00BB. Les m\u00FBres sauvages sont pr\u00E9c\u00E9d\u00E9es de leurs fleurs, blanches ou blanc ros\u00E9, de deux \u00E0 trois centim\u00E8tres de diam\u00E8tre, regroup\u00E9es en corymbes ; elles ont cinq s\u00E9pales, cinq p\u00E9tales et de nombreuses \u00E9tamines. La m\u00FBre est de couleur noir bleu\u00E2tre \u00E0 maturit\u00E9, vers septembre. C'est un fruit compos\u00E9 form\u00E9 de l'agr\u00E9gation des carpelles modifi\u00E9s et transform\u00E9s en petites drupes (drup\u00E9oles) qui restent adh\u00E9rentes au r\u00E9ceptacle floral."@fr . + "20"^^ . + "0.62"^^ . + "Mora es el nombre que reciben diversos frutos comestibles de distintas especies bot\u00E1nicas. Son frutas o bayas que, a pesar de proceder de especies vegetales completamente diferentes, poseen aspecto similar y caracter\u00EDsticas comunes. En ocasiones, las distintas moras pueden ser confundidas e incluso obviadas, dado que al usar la palabra mora para hablar de dicha fruta, puede hacerse referencia, simplificando, a dos tipos de bayas procedentes de dos g\u00E9neros distintos de vegetales con rasgos fenot\u00EDpicos muy dispares entre s\u00ED, el g\u00E9nero Morus y el g\u00E9nero Rubus. Ambos g\u00E9neros dan moras, pero no son la misma fruta, unas vienen de unos \u00E1rboles com\u00FAnmente llamados moreras y morales (que son del g\u00E9nero Morus), y las otras provienen de unas plantas sarmentosas y espinosas com\u00FAnmente llamadas zarzas (que son del g\u00E9nero Rubus). No obstante, la diferenciaci\u00F3n es en realidad a\u00FAn mayor, dado que dentro de ambos g\u00E9neros hay un importante n\u00FAmero de especies distintas. En total existen m\u00E1s de 300 especies de moras diferentes. Adem\u00E1s, la comercializaci\u00F3n de estas bayas ha propiciado la creaci\u00F3n de diversas hibridaciones que no existen en la naturaleza."@es . + "1"^^ . + "\u9ED1\u8393\u6A39\uFF0C\u6216\u7E3D\u7A31\u9ED1\u8393\uFF0C\u662F\u4E00\u7A2E\u5EE3\u6CDB\u5206\u5E03\u4E14\u77E5\u540D\u7684\u704C\u6728\uFF1B\u5C6C\u65BC\u6A39\u8393\u985E\uFF08\u8594\u8587\u79D1\u61F8\u9264\u5B50\u5C6C\uFF09\u7684\u690D\u7269\uFF0C\u53EF\u4EE5\u9577\u52303\u516C\u5C3A\uFF0810\u82F1\u544E\uFF09\u9AD8\uFF0C\u4E26\u751F\u7522\u8EDF\u8CEA\u7684\u679C\u5BE6\u2014\u2014\u5E38\u7528\u4F5C\u751C\u9EDE\u3001\u679C\u91AC\u3001\u7121\u5B50\u679C\u51CD\uFF0C\u6709\u6642\u4E5F\u7528\u4F86\u88FD\u6C34\u679C\u9152\u3002\u6709\u591A\u7A2E\u61F8\u9264\u5B50\u5C6C\u690D\u7269\u7A31\u4F5C\u9ED1\u8393\uFF0C\u800C\u4E14\u9019\u4E9B\u7A2E\u5F88\u5BB9\u6613\u767C\u751F\u96DC\u4EA4\uFF0C\u56E0\u6B64\u683D\u57F9\u7A2E\u7684\u7A2E\u6578\u6703\u8F03\u539F\u751F\u7A2E\u7684\u591A\u4E0A\u8A31\u591A\u3002\u9AD4\u578B\u8F03\u5C0F\u7684\u8FD1\u89AA\u2014\u2014\u6B27\u6D32\u6728\u8393\uFF08\u5B66\u540D\uFF1ARubus caesius\uFF09\u53EF\u4EE5\u900F\u904E\u5176\u679C\u5BE6\u4E0A\u767D\u8272\u81D8\u72C0\u7684\u5916\u5C64\u8207\u9ED1\u8393\u4F86\u4F5C\u5340\u5225\uFF0C\u800C\u4E14\u5B83\u5011\u7684\u5C0F\u6838\u679C\u6578\u91CF\u4E5F\u8F03\u5C11\u3002"@zh . + . + . + "0.49"^^ . + . + . + . + "Superman, il cui nome kryptoniano \u00E8 Kal-El, mentre il suo nome terrestre \u00E8 Clark Kent, \u00E8 un personaggio dei fumetti creato da Jerry Siegel e Joe Shuster nel 1933, ma pubblicato dalla DC Comics soltanto nel 1938. \u00C8 soprannominato anche \"L'Uomo d'Acciaio\" (in originale: Man of Steel) oppure \"L'Uomo del Domani\" (The Man of Tomorrow). Un uomo in grado di sollevare un'auto, con un costume blu addosso ed un mantello rosso sulle spalle, contornato da un gruppo di passanti impauriti: \u00E8 questa la prima immagine di Superman, quella con cui fa il suo esordio nelle edicole statunitensi."@it . + . + "Superman ou Super-Homem \u00E9 um super-her\u00F3i fict\u00EDcio de hist\u00F3ria em quadrinhos americanas publicado pela DC Comics, uma empresa subsidi\u00E1ria do grupo Time Warner. Superman, entretanto, j\u00E1 foi adaptado para diversos outros meios desde os anos 1930, como cinema, r\u00E1dio, televis\u00E3o, literatura e Video game. Superman \u00E9 um super-her\u00F3i criado pela dupla de autores de quadrinhos Joe Shuster e Jerry Siegel. Sua primeira apari\u00E7\u00E3o foi apresentada na revista Action Comics #1 em 1938, nos Estados Unidos.O personagem nasceu no fict\u00EDcio planeta Krypton e foi chamado pelos seus pais de Kal-El (que significaria Filho das Estrelas no idioma kryptoniano). Foi mandado \u00E0 Terra por seu pai, Jor-El, um cientista, momentos antes do planeta explodir. O foguete aterrissou na Terra na cidade de Smallville (por alguns anos, foi traduzida no Brasil como Pequenopolis), onde o jovem Kal-El foi descoberto pelo casal de fazendeiros Jonathan e Martha Kent. Conforme foi crescendo, ele descobriu que tinha habilidades diferentes dos humanos. Quando n\u00E3o est\u00E1 com o tradicional uniforme azul e vermelho, ele vive como Clark Kent, rep\u00F3rter profissional no Planeta Di\u00E1rio (Estrela Di\u00E1ria em Portugal). Clark trabalha como rep\u00F3rter/jornalista no Planeta Di\u00E1rio com Lois Lane e com Jimmy Olsen. \u00C9 um dos mais importantes personagens da cultura pop ocidental, sendo o primeiro her\u00F3i dos quadrinhos a ter uma revista intitulada com seu nome: Superman #1, publicada no ver\u00E3o de 1939. Al\u00E9m disso, Superman foi licenciado e adaptado para diversas m\u00EDdias, desde r\u00E1dio at\u00E9 televis\u00E3o e cinema. O filme Superman Returns foi lan\u00E7ado em 2006, com uma aceita\u00E7\u00E3o dos f\u00E3s abaixo das expectativas, entretanto recebeu avalia\u00E7\u00F5es positivas da critica especializada, apesar de ter tido um desempenho considerado fraco em sua bilheteria, tendo custado cerca de USD250 milh\u00F5es e arrecadado quase USD400 milh\u00F5es em n\u00EDvel global. Em 2013, depois de sete anos afastados das telas, foi lan\u00E7ado mundialmente um novo filme do personagem. Ao contr\u00E1rio do anterior e assim como o primeiro longa da franquia com Christopher Reeve nos anos 1970, conta a origem do mesmo, sendo o primeiro filme do Superman depois da reformula\u00E7\u00E3o sofrida pela DC Comics em 2011, intitulada Os Novos 52, que acarretou algumas altera\u00E7\u00F5es no personagem e em seu universo e que foram incorporadas pela produ\u00E7\u00E3o. Chamado simplesmente de Man of Steel (em portugu\u00EAs, Homem de A\u00E7o). Contou com a dire\u00E7\u00E3o de Zack Snyder e com a produ\u00E7\u00E3o de Christopher Nolan (este \u00FAltimo not\u00F3rio por ter sido o diretor da aclamada trilogia de filmes The Dark Knight, do tamb\u00E9m super-her\u00F3i da Comics Batman). Tem o ator brit\u00E2nico Henry Cavill como Superman/Clark Kent, e a atriz americana Amy Adams como Lois Lane. Mais bem recebido pela cr\u00EDtica e pelo p\u00FAblico em geral, o longa teve um or\u00E7amento estimado em USD225 milh\u00F5es e obteve algo em torno de USD700 milh\u00F5es de bilheteria mundialmente. Uma sequ\u00EAncia mais abrangente, intitulada Batman vs Superman: A Origem da Justi\u00E7a, que deve apresentar a intera\u00E7\u00E3o entre Superman e Batman, que com um or\u00E7amento superior a USD350 milh\u00F5es, ser\u00E1 lan\u00E7ado mundialmente em 2016. A origem e poderes do personagem foram sendo expandidos e alterados gradativamente ao longo dos anos para acompanhar a evolu\u00E7\u00E3o do p\u00FAblico. A hist\u00F3ria do Superman foi alterada para permitir as aventuras do Superboy e outros sobreviventes como a Supermo\u00E7a e Krypto, o superc\u00E3o. O personagem foi revisado e atualizado mais recentemente em 1986. John Byrne recriou o personagem, reduzindo os poderes do Superman e apagando diversas personagens da vers\u00E3o oficial das hist\u00F3rias, o que atraiu a aten\u00E7\u00E3o dos meios de comunica\u00E7\u00E3o. A cobertura da imprensa foi novamente recebida na d\u00E9cada de 1990, com A Morte do Superman, uma hist\u00F3ria na qual o personagem era dada como morto (Superman na verdade n\u00E3o morre, ele so tinha ficado inconsciente/em coma tempor\u00E1rio). A propriedade sobre o personagem foi objeto de disputa, com Siegel e Shuster reclamando o retorno de sua propriedade legal. Os direitos autorais s\u00E3o novamente objeto de disputa, com a mudan\u00E7a das leis de direitos autorais permitindo \u00E0 esposa e \u00E0 filha de Siegel reclamar uma parte dos direitos autorais, levando a uma disputa com a companhia Warner Bros. E \u00E9 at\u00E9 hoje um dos tr\u00EAs super-her\u00F3is mais populares do mundo, os outros s\u00E3o Batman (tamb\u00E9m da DC) e o Homem-Aranha (da Marvel Comics)."@pt . + "Action Comics #1"^^ . + . + . + "Superman ou Super-Homem \u00E9 um super-her\u00F3i fict\u00EDcio de hist\u00F3ria em quadrinhos americanas publicado pela DC Comics, uma empresa subsidi\u00E1ria do grupo Time Warner. Superman, entretanto, j\u00E1 foi adaptado para diversos outros meios desde os anos 1930, como cinema, r\u00E1dio, televis\u00E3o, literatura e Video game. Superman \u00E9 um super-her\u00F3i criado pela dupla de autores de quadrinhos Joe Shuster e Jerry Siegel. Sua primeira apari\u00E7\u00E3o foi apresentada na revista Action Comics #1 em 1938, nos Estados Unidos.O personagem nasceu no fict\u00EDcio planeta Krypton e foi chamado pelos seus pais de Kal-El (que significaria Filho das Estrelas no idioma kryptoniano). Foi mandado \u00E0 Terra por seu pai, Jor-El, um cientista, momentos antes do planeta explodir. O foguete aterrissou na Terra na cidade de Smallville (por alguns ano"@pt . + . + "y"^^ . + . + . + . + . + . + . + . + . + . + . + "no"^^ . + "Superman, ook wel 'De Man van Staal' genoemd, is een personage en imaginaire superheld die zijn eerste verschijning maakte in de Amerikaanse Action Comics #1 in 1938. Hij is als stripfiguur de eerste superheld die bovenmenselijke krachten bezit en lanceerde hiermee een genre waarin meer superhelden volgden, zoals Batman en Wonder Woman. Zijn alter ego is Clark Kent. Sindsdien is Superman een van de populairste en bekendste comic-figuren aller tijden geworden en wordt hij vaak gezien als een cultureel icoon van Amerika."@nl . + "Art by Alex Ross"^^ . + "400"^^ . + "Superman (jego alter ego \u2013 Clark Kent, urodzony jako Kal-El) \u2013 fikcyjna posta\u0107 (superbohater), znana z serii komiks\u00F3w wydawanych przez DC Comics, oraz wszelkich adaptacji zwi\u0105zanych z ta postaci\u0105. Zosta\u0142 stworzony przez Joe Shustera i Jerry\u2019ego Siegela. Po raz pierwszy pojawi\u0142 si\u0119 w magazynie Action Comics vol. 1 #1 (czerwiec 1938 roku). Superman jest na pierwszym miejscu najwa\u017Cniejszych bohater\u00F3w komiksowych w zestawieniu magazynu Empire, jak r\u00F3wnie\u017C serwisu internetowego IGN."@pl . + "2500"^^ . + . + "Superman ist der Name einer Comicfigur, die in den 1930er-Jahren von den beiden US-Amerikanern Jerry Siegel und Joe Shuster geschaffen wurde. Die Figur wird gemeinhin als der erste Superheld der Comicgeschichte betrachtet und z\u00E4hlt zu dem Kreis der fiktiven Charaktere mit dem weltweit h\u00F6chsten Wiedererkennungswert."@de . + . + . + . + . + . + . + "\u30B9\u30FC\u30D1\u30FC\u30DE\u30F3(Superman)\u306F\u3001\u30A2\u30E1\u30EA\u30ABDC\u30B3\u30DF\u30C3\u30AF\u520A\u884C\u306E\u30B3\u30DF\u30C3\u30AF\u3002\u67B6\u7A7A\u306E\u30D2\u30FC\u30ED\u30FC\u3092\u4E3B\u5F79\u3068\u3057\u305F\u30B3\u30DF\u30C3\u30AF\u3001\u30A2\u30CB\u30E1\u3001\u6620\u753B\u3001\u30C9\u30E9\u30DE\u4F5C\u54C1\u3002 1938\u5E74\u306B\u539F\u4F5C\u30B8\u30A7\u30EA\u30FC\u30FB\u30B7\u30FC\u30B2\u30EB\u304A\u3088\u3073\u4F5C\u753B\u30B8\u30E7\u30FC\u30FB\u30B7\u30E3\u30B9\u30BF\u30FC\u306B\u3088\u308A\u3001\u30A2\u30AF\u30B7\u30E7\u30F3\u30FB\u30B3\u30DF\u30C3\u30AF\u30B9\u8A8C\u7B2C1\u53F7\u3067\u521D\u767B\u5834\u3057\u305F\u3002"@ja . + . + . + . + "Douglas Fairbanks and Harold Lloyd influenced the look of Superman and Clark Kent, respectively."^^ . + "\u0627\u0644\u0631\u062C\u0644 \u0627\u0644\u062E\u0627\u0631\u0642 \u0623\u0648 \u0633\u0648\u0628\u0631\u0645\u0627\u0646 (\u0628\u0627\u0644\u0625\u0646\u062C\u0644\u064A\u0632\u064A\u0629: Superman) \u0647\u0648 \u0634\u062E\u0635\u064A\u0629 \u0648\u0647\u0645\u064A\u0629 \u0648\u0628\u0637\u0644 \u062E\u0627\u0631\u0642 \u064A\u0638\u0647\u0631 \u0641\u064A \u0645\u0646\u0634\u0648\u0631\u0627\u062A \u062F\u064A \u0633\u064A \u0643\u0648\u0645\u0643\u0633. \u0623\u0646\u0647 \u064A\u062F\u0639\u0649 \u0628\u0627\u0644\u0628\u0637\u0644 \u0627\u0644\u062C\u0628\u0627\u0631 \u0648\u0628\u0632\u063A \u0646\u062C\u0645\u0647 \u0641\u064A \u0639\u0627\u0645 1938. \u064A\u0634\u062A\u0647\u0631 \u0628\u0644\u0642\u0628 \u0627\u0644\u0631\u062C\u0644 \u0627\u0644\u0641\u0648\u0644\u0627\u0630\u064A. \u0638\u0647\u0631 \u0627\u0644\u0631\u062C\u0644 \u0627\u0644\u062E\u0627\u0631\u0642 \u0639\u0644\u0649 \u0635\u0641\u062D\u0627\u062A \u0627\u0644\u0639\u062F\u062F \u0627\u0644\u0623\u0648\u0644 \u0645\u0646 \u0642\u0635\u0635 \u0627\u0644\u062D\u0631\u0643\u0629 \u0627\u0644\u0645\u0635\u0648\u0631\u0629 (\u0623\u0643\u0634\u0646 \u0643\u0648\u0645\u0643\u0633) \u0628\u0634\u0647\u0631 \u064A\u0648\u0646\u064A\u0648 \u0645\u0646 \u0639\u0627\u0645 1938. \u0641\u0642\u062F \u0623\u0635\u0628\u062D \u0627\u0644\u0631\u062C\u0644 \u0627\u0644\u062E\u0627\u0631\u0642 (\u0633\u0648\u0628\u0631\u0645\u0627\u0646) \u062A\u062F\u0631\u064A\u062C\u064A\u0627\u064B \u0623\u0634\u0647\u0631 \u0628\u0637\u0644 \u062E\u0627\u0631\u0642 \u0641\u064A \u0627\u0644\u0639\u0627\u0644\u0645. \u062C\u0639\u0644 \u0645\u062C\u0644\u0629 \u0627\u0644\u0631\u062C\u0644 \u0627\u0644\u062E\u0627\u0631\u0642 \u0623\u0634\u0647\u0631 \u0645\u062C\u0644\u0629 \u0645\u0635\u0648\u0631\u0629 \u0641\u064A \u0627\u0644\u0639\u0627\u0644\u0645 \u0648\u062A\u0645\u062A \u062A\u0631\u062C\u0645\u062A\u0647\u0627 \u0644\u0623\u063A\u0644\u0628 \u0644\u063A\u0627\u062A \u0627\u0644\u0639\u0627\u0644\u0645."@ar . + . + . + . + . + "The \"S\" symbol that became iconic and a collection of Superman merchandise"^^ . + . + . + "restore indef semi"^^ . + . + . + "(This article is about the superhero. For other uses, see Superman (disambiguation).) Superman is a fictional superhero appearing in American comic books published by DC Comics. The character was created by writer Jerry Siegel and artist Joe Shuster, high school students living in Cleveland, Ohio, in 1933. They sold Superman to Detective Comics, the future DC Comics, in 1938. Superman debuted in Action Comics #1 (cover-dated June 1938) and subsequently appeared in various radio serials, newspaper strips, television programs, films, and video games. With this success, Superman helped to create the superhero archetype and establish its primacy within the American comic book. The character is also referred to by such epithets as the Man of Steel, the Man of Tomorrow, and The Last Son of Krypton. The origin story of Superman relates that he was born Kal-El on the alien planet Krypton, before being rocketed to Earth as an infant by his scientist father Jor-El, moments before Krypton's destruction. Discovered and adopted by a Kansas farmer and his wife, the child is raised as Clark Kent and imbued with a strong moral compass. Very early on he started to display various superhuman abilities, which, upon reaching maturity, he resolved to use for the benefit of humanity through a secret \"Superman\" identity. Superman resides and operates in the fictional American city of Metropolis. As Clark Kent, he is a journalist for the Daily Planet, a Metropolis newspaper. Superman's love interest is generally Lois Lane, and his archenemy is supervillain Lex Luthor. He is typically a member of the Justice League and close ally of Batman and Wonder Woman. Like other characters in the DC Universe, several alternate versions of Superman have been depicted over the years. Superman's appearance is distinctive and iconic; he usually wears a blue costume with a red-and-yellow emblem on the chest, consisting of the letter S in a shield shape, and a red cape. This shield is used in many media to symbolize the character. Superman is widely considered an American cultural icon. He has fascinated scholars, with cultural theorists, commentators, and critics alike exploring the character's impact and role in the United States and worldwide. The character's ownership has often been the subject of dispute, with Siegel and Shuster twice suing for the return of rights. The character has been adapted extensively and portrayed in other forms of media as well, including films, television series, and video games. Several actors have portrayed Superman in motion pictures and TV series including Kirk Alyn, George Reeves, Christopher Reeve, Tom Welling, Brandon Routh, Henry Cavill, and Tyler Hoechlin."@en . + . + "Superman"@en . + "Superman"^^ . + . + "3918"^^ . + . + . + "Personal_Superman_Collection.jpg"^^ . + "Douglas Fairbanks - A Pictorial History of the Silent Screen.jpg"^^ . + . + "1987"^^ . + . + . + "Clark Kent"@en . + "y"^^ . + "\u0633\u0648\u0628\u0631\u0645\u0627\u0646"@ar . + . + . + . + "\u0421\u0443\u043F\u0435\u0440\u043C\u0435\u043D"@ru . + "Superman"@pl . + "28381"^^ . + "\u30B9\u30FC\u30D1\u30FC\u30DE\u30F3"@ja . + . + . + . + . + . + "Superman"^^ . + . + . + "745132439"^^ . + "4029"^^ . + . + . + "Superman"@pt . + "no"^^ . + . + "Related topics"^^ . + "super"^^ . + "\u0421\u0443\u043F\u0435\u0440\u043C\u0435\u0301\u043D (\u0430\u043D\u0433\u043B. Superman) \u2014 \u0441\u0443\u043F\u0435\u0440\u0433\u0435\u0440\u043E\u0439 \u043A\u043E\u043C\u0438\u043A\u0441\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0432\u044B\u043F\u0443\u0441\u043A\u0430\u044E\u0442\u0441\u044F \u043A\u043E\u043C\u043F\u0430\u043D\u0438\u0435\u0439 DC Comics. \u041E\u043D \u0441\u0447\u0438\u0442\u0430\u0435\u0442\u0441\u044F \u0438\u043A\u043E\u043D\u043E\u0439 \u0430\u043C\u0435\u0440\u0438\u043A\u0430\u043D\u0441\u043A\u043E\u0439 \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u044B. \u041F\u0440\u0438\u0434\u0443\u043C\u0430\u043D\u043D\u044B\u0439 \u043F\u0438\u0441\u0430\u0442\u0435\u043B\u0435\u043C \u0414\u0436\u0435\u0440\u0440\u0438 \u0421\u0438\u0433\u0435\u043B\u043E\u043C \u0438 \u0445\u0443\u0434\u043E\u0436\u043D\u0438\u043A\u043E\u043C \u0414\u0436\u043E \u0428\u0443\u0441\u0442\u0435\u0440\u043E\u043C \u0438 \u043F\u0440\u043E\u0434\u0430\u043D\u043D\u044B\u0439 \u043A\u043E\u043C\u043F\u0430\u043D\u0438\u0438 Detective Comics (\u043F\u043E\u0437\u0434\u043D\u0435\u0435 DC Comics), \u043F\u0435\u0440\u0441\u043E\u043D\u0430\u0436 \u0432\u043F\u0435\u0440\u0432\u044B\u0435 \u043F\u043E\u044F\u0432\u0438\u043B\u0441\u044F \u043D\u0430 \u0441\u0442\u0440\u0430\u043D\u0438\u0446\u0430\u0445 Action Comics #1 (\u0438\u044E\u043D\u044C 1938-\u0433\u043E), \u0430 \u0432\u043F\u043E\u0441\u043B\u0435\u0434\u0441\u0442\u0432\u0438\u0438 \u043F\u043E\u044F\u0432\u043B\u044F\u043B\u0441\u044F \u0432 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0445 \u0440\u0430\u0434\u0438\u043E\u043F\u0435\u0440\u0435\u0434\u0430\u0447\u0430\u0445, \u0442\u0435\u043B\u0435\u0432\u0438\u0437\u0438\u043E\u043D\u043D\u044B\u0445 \u043F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u0430\u0445, \u0444\u0438\u043B\u044C\u043C\u0430\u0445, \u043D\u0430 \u0433\u0430\u0437\u0435\u0442\u043D\u044B\u0445 \u043F\u043E\u043B\u043E\u0441\u0430\u0445 \u0438 \u0432 \u0432\u0438\u0434\u0435\u043E\u0438\u0433\u0440\u0430\u0445. \u0421\u0432\u043E\u0438\u043C \u0443\u0441\u043F\u0435\u0445\u043E\u043C \u0421\u0443\u043F\u0435\u0440\u043C\u0435\u043D \u043F\u043E\u043C\u043E\u0433 \u0441\u043E\u0437\u0434\u0430\u0442\u044C \u0436\u0430\u043D\u0440 \u0441\u0443\u043F\u0435\u0440\u0433\u0435\u0440\u043E\u044F \u0438 \u0437\u0430\u043D\u044F\u043B \u043F\u0435\u0440\u0432\u0435\u043D\u0441\u0442\u0432\u043E \u0441\u0440\u0435\u0434\u0438 \u0430\u043C\u0435\u0440\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043A\u043E\u043C\u0438\u043A\u0441\u043E\u0432. \u0412\u043D\u0435\u0448\u043D\u0438\u0439 \u0432\u0438\u0434 \u043F\u0435\u0440\u0441\u043E\u043D\u0430\u0436\u0430 \u044F\u0432\u043B\u044F\u0435\u0442\u0441\u044F \u043E\u0442\u043B\u0438\u0447\u0438\u0442\u0435\u043B\u044C\u043D\u044B\u043C: \u043A\u0440\u0430\u0441\u043D\u044B\u0439, \u0441\u0438\u043D\u0438\u0439 \u0438 \u0436\u0451\u043B\u0442\u044B\u0439 \u0446\u0432\u0435\u0442\u0430 \u043A\u043E\u0441\u0442\u044E\u043C\u0430, \u0432 \u043A\u043E\u043C\u043F\u043B\u0435\u043A\u0442\u0435 \u0441 \u043F\u043B\u0430\u0449\u043E\u043C \u0438 \u044D\u043C\u0431\u043B\u0435\u043C\u043E\u0439 \u0432 \u0432\u0438\u0434\u0435 \u0442\u0440\u0435\u0443\u0433\u043E\u043B\u044C\u043D\u043E\u0433\u043E \u0449\u0438\u0442\u0430 \u0441 \u0431\u0443\u043A\u0432\u043E\u0439 \u00ABS\u00BB \u0432 \u0446\u0435\u043D\u0442\u0440\u0435. \u0412 \u043D\u0430\u0441\u0442\u043E\u044F\u0449\u0435\u0435 \u0432\u0440\u0435\u043C\u044F \u044D\u0442\u0430 \u044D\u043C\u0431\u043B\u0435\u043C\u0430 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u0435\u0442\u0441\u044F \u0432 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0445 \u043C\u0435\u0434\u0438\u0430-\u043E\u0442\u0440\u0430\u0441\u043B\u044F\u0445 \u043A\u0430\u043A \u0441\u0438\u043C\u0432\u043E\u043B \u043F\u0435\u0440\u0441\u043E\u043D\u0430\u0436\u0430. \u041E\u0440\u0438\u0433\u0438\u043D\u0430\u043B\u044C\u043D\u0430\u044F \u0438\u0441\u0442\u043E\u0440\u0438\u044F \u0440\u0430\u0441\u0441\u043A\u0430\u0437\u044B\u0432\u0430\u0435\u0442, \u0447\u0442\u043E \u0421\u0443\u043F\u0435\u0440\u043C\u0435\u043D \u043F\u043E\u044F\u0432\u0438\u043B\u0441\u044F \u043D\u0430 \u0441\u0432\u0435\u0442 \u043D\u0430 \u043F\u043B\u0430\u043D\u0435\u0442\u0435 \u041A\u0440\u0438\u043F\u0442\u043E\u043D \u0438 \u043F\u0440\u0438 \u0440\u043E\u0436\u0434\u0435\u043D\u0438\u0438 \u043F\u043E\u043B\u0443\u0447\u0438\u043B \u0438\u043C\u044F \u041A\u0430\u043B-\u042D\u043B. \u0415\u0449\u0451 \u043C\u043B\u0430\u0434\u0435\u043D\u0446\u0435\u043C \u043E\u043D \u0431\u044B\u043B \u043E\u0442\u043F\u0440\u0430\u0432\u043B\u0435\u043D \u043D\u0430 \u0417\u0435\u043C\u043B\u044E \u0441\u0432\u043E\u0438\u043C \u043E\u0442\u0446\u043E\u043C-\u0443\u0447\u0451\u043D\u044B\u043C \u0414\u0436\u043E\u0440-\u042D\u043B\u043E\u043C \u0437\u0430 \u043D\u0435\u0441\u043A\u043E\u043B\u044C\u043A\u043E \u043C\u0438\u043D\u0443\u0442 \u0434\u043E \u0443\u043D\u0438\u0447\u0442\u043E\u0436\u0435\u043D\u0438\u044F \u041A\u0440\u0438\u043F\u0442\u043E\u043D\u0430. \u0415\u0433\u043E \u043D\u0430\u0448\u043B\u0430 \u0438 \u043F\u0440\u0438\u044E\u0442\u0438\u043B\u0430 \u0441\u0435\u043C\u044C\u044F \u043A\u0430\u043D\u0437\u0430\u0441\u0441\u043A\u043E\u0433\u043E \u0444\u0435\u0440\u043C\u0435\u0440\u0430. \u0417\u0435\u043C\u043D\u044B\u0435 \u0440\u043E\u0434\u0438\u0442\u0435\u043B\u0438 \u0434\u0430\u043B\u0438 \u0440\u0435\u0431\u0451\u043D\u043A\u0443 \u0438\u043C\u044F \u041A\u043B\u0430\u0440\u043A \u041A\u0435\u043D\u0442. \u0415\u0449\u0451 \u0432 \u0440\u0430\u043D\u043D\u0435\u043C \u0432\u043E\u0437\u0440\u0430\u0441\u0442\u0435 \u0443 \u043C\u0430\u043B\u044C\u0447\u0438\u043A\u0430 \u043F\u0440\u043E\u044F\u0432\u0438\u043B\u0438\u0441\u044C \u0441\u0432\u0435\u0440\u0445\u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u043A\u0438\u0435 \u0441\u043F\u043E\u0441\u043E\u0431\u043D\u043E\u0441\u0442\u0438, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u043E\u043D \u0440\u0435\u0448\u0438\u043B \u043F\u0440\u0438\u043C\u0435\u043D\u044F\u0442\u044C \u043D\u0430 \u0431\u043B\u0430\u0433\u043E \u0447\u0435\u043B\u043E\u0432\u0435\u0447\u0435\u0441\u0442\u0432\u0443. \u0412\u043B\u0438\u044F\u043D\u0438\u0435 \u0421\u0443\u043F\u0435\u0440\u043C\u0435\u043D\u0430 \u043D\u0430 \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u0443 \u0421\u0428\u0410 \u0438 \u043E\u0441\u0442\u0430\u043B\u044C\u043D\u043E\u0433\u043E \u043C\u0438\u0440\u0430 \u0438\u0437\u0443\u0447\u0430\u044E\u0442 \u0443\u0447\u0451\u043D\u044B\u0435 \u0438 \u043A\u0440\u0438\u0442\u0438\u043A\u0438. \u0423\u043C\u0431\u0435\u0440\u0442\u043E \u042D\u043A\u043E \u043E\u0431\u0441\u0443\u0436\u0434\u0430\u043B \u043C\u0438\u0444\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u0441\u043F\u043E\u0441\u043E\u0431\u043D\u043E\u0441\u0442\u0438 \u043F\u0435\u0440\u0441\u043E\u043D\u0430\u0436\u0430, \u0430 \u041B\u0430\u0440\u0440\u0438 \u041D\u0438\u0432\u0435\u043D \u0438\u0437\u0443\u0447\u0430\u043B \u043F\u043E\u0441\u043B\u0435\u0434\u0441\u0442\u0432\u0438\u044F \u0435\u0433\u043E \u0441\u0435\u043A\u0441\u0443\u0430\u043B\u044C\u043D\u044B\u0445 \u043E\u0442\u043D\u043E\u0448\u0435\u043D\u0438\u0439 \u0441 \u041B\u043E\u0438\u0441 \u041B\u0435\u0439\u043D. \u0418\u0441\u043F\u043E\u043B\u044C\u0437\u043E\u0432\u0430\u043D\u0438\u0435 \u043F\u0435\u0440\u0441\u043E\u043D\u0430\u0436\u0430 \u0447\u0430\u0441\u0442\u043E \u0431\u044B\u043B\u043E \u043F\u0440\u0435\u0434\u043C\u0435\u0442\u043E\u043C \u0441\u043F\u043E\u0440\u0430, \u0421\u0438\u0433\u0435\u043B \u0438 \u0428\u0443\u0441\u0442\u0435\u0440 \u0434\u0432\u0430\u0436\u0434\u044B \u043F\u043E\u0434\u0430\u0432\u0430\u043B\u0438 \u0438\u0441\u043A \u0434\u043B\u044F \u0432\u043E\u0437\u0432\u0440\u0430\u0449\u0435\u043D\u0438\u044F \u0438\u043C \u043F\u0435\u0440\u0441\u043E\u043D\u0430\u0436\u0430."@ru . + . + . + "Superman"@it . + . + "Category:Superman"^^ . + "\u30B9\u30FC\u30D1\u30FC\u30DE\u30F3(Superman)\u306F\u3001\u30A2\u30E1\u30EA\u30ABDC\u30B3\u30DF\u30C3\u30AF\u520A\u884C\u306E\u30B3\u30DF\u30C3\u30AF\u3002\u67B6\u7A7A\u306E\u30D2\u30FC\u30ED\u30FC\u3092\u4E3B\u5F79\u3068\u3057\u305F\u30B3\u30DF\u30C3\u30AF\u3001\u30A2\u30CB\u30E1\u3001\u6620\u753B\u3001\u30C9\u30E9\u30DE\u4F5C\u54C1\u3002 1938\u5E74\u306B\u539F\u4F5C\u30B8\u30A7\u30EA\u30FC\u30FB\u30B7\u30FC\u30B2\u30EB\u304A\u3088\u3073\u4F5C\u753B\u30B8\u30E7\u30FC\u30FB\u30B7\u30E3\u30B9\u30BF\u30FC\u306B\u3088\u308A\u3001\u30A2\u30AF\u30B7\u30E7\u30F3\u30FB\u30B3\u30DF\u30C3\u30AF\u30B9\u8A8C\u7B2C1\u53F7\u3067\u521D\u767B\u5834\u3057\u305F\u3002"@ja . + "Superman ist der Name einer Comicfigur, die in den 1930er-Jahren von den beiden US-Amerikanern Jerry Siegel und Joe Shuster geschaffen wurde. Die Figur wird gemeinhin als der erste Superheld der Comicgeschichte betrachtet und z\u00E4hlt zu dem Kreis der fiktiven Charaktere mit dem weltweit h\u00F6chsten Wiedererkennungswert."@de . + . + . + "yes"^^ . + . + "\u0421\u0443\u043F\u0435\u0440\u043C\u0435\u0301\u043D (\u0430\u043D\u0433\u043B. Superman) \u2014 \u0441\u0443\u043F\u0435\u0440\u0433\u0435\u0440\u043E\u0439 \u043A\u043E\u043C\u0438\u043A\u0441\u043E\u0432, \u043A\u043E\u0442\u043E\u0440\u044B\u0435 \u0432\u044B\u043F\u0443\u0441\u043A\u0430\u044E\u0442\u0441\u044F \u043A\u043E\u043C\u043F\u0430\u043D\u0438\u0435\u0439 DC Comics. \u041E\u043D \u0441\u0447\u0438\u0442\u0430\u0435\u0442\u0441\u044F \u0438\u043A\u043E\u043D\u043E\u0439 \u0430\u043C\u0435\u0440\u0438\u043A\u0430\u043D\u0441\u043A\u043E\u0439 \u043A\u0443\u043B\u044C\u0442\u0443\u0440\u044B. \u041F\u0440\u0438\u0434\u0443\u043C\u0430\u043D\u043D\u044B\u0439 \u043F\u0438\u0441\u0430\u0442\u0435\u043B\u0435\u043C \u0414\u0436\u0435\u0440\u0440\u0438 \u0421\u0438\u0433\u0435\u043B\u043E\u043C \u0438 \u0445\u0443\u0434\u043E\u0436\u043D\u0438\u043A\u043E\u043C \u0414\u0436\u043E \u0428\u0443\u0441\u0442\u0435\u0440\u043E\u043C \u0438 \u043F\u0440\u043E\u0434\u0430\u043D\u043D\u044B\u0439 \u043A\u043E\u043C\u043F\u0430\u043D\u0438\u0438 Detective Comics (\u043F\u043E\u0437\u0434\u043D\u0435\u0435 DC Comics), \u043F\u0435\u0440\u0441\u043E\u043D\u0430\u0436 \u0432\u043F\u0435\u0440\u0432\u044B\u0435 \u043F\u043E\u044F\u0432\u0438\u043B\u0441\u044F \u043D\u0430 \u0441\u0442\u0440\u0430\u043D\u0438\u0446\u0430\u0445 Action Comics #1 (\u0438\u044E\u043D\u044C 1938-\u0433\u043E), \u0430 \u0432\u043F\u043E\u0441\u043B\u0435\u0434\u0441\u0442\u0432\u0438\u0438 \u043F\u043E\u044F\u0432\u043B\u044F\u043B\u0441\u044F \u0432 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0445 \u0440\u0430\u0434\u0438\u043E\u043F\u0435\u0440\u0435\u0434\u0430\u0447\u0430\u0445, \u0442\u0435\u043B\u0435\u0432\u0438\u0437\u0438\u043E\u043D\u043D\u044B\u0445 \u043F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u0430\u0445, \u0444\u0438\u043B\u044C\u043C\u0430\u0445, \u043D\u0430 \u0433\u0430\u0437\u0435\u0442\u043D\u044B\u0445 \u043F\u043E\u043B\u043E\u0441\u0430\u0445 \u0438 \u0432 \u0432\u0438\u0434\u0435\u043E\u0438\u0433\u0440\u0430\u0445. \u0421\u0432\u043E\u0438\u043C \u0443\u0441\u043F\u0435\u0445\u043E\u043C \u0421\u0443\u043F\u0435\u0440\u043C\u0435\u043D \u043F\u043E\u043C\u043E\u0433 \u0441\u043E\u0437\u0434\u0430\u0442\u044C \u0436\u0430\u043D\u0440 \u0441\u0443\u043F\u0435\u0440\u0433\u0435\u0440\u043E\u044F \u0438 \u0437\u0430\u043D\u044F\u043B \u043F\u0435\u0440\u0432\u0435\u043D\u0441\u0442\u0432\u043E \u0441\u0440\u0435\u0434\u0438 \u0430\u043C\u0435\u0440\u0438\u043A\u0430\u043D\u0441\u043A\u0438\u0445 \u043A\u043E\u043C\u0438\u043A\u0441\u043E\u0432. \u0412\u043D\u0435\u0448\u043D\u0438\u0439 \u0432\u0438\u0434 \u043F\u0435\u0440\u0441\u043E\u043D\u0430\u0436\u0430 \u044F\u0432\u043B\u044F\u0435\u0442\u0441\u044F \u043E\u0442\u043B\u0438\u0447\u0438\u0442\u0435\u043B\u044C\u043D\u044B\u043C: \u043A\u0440\u0430\u0441\u043D\u044B\u0439, \u0441\u0438\u043D\u0438\u0439 \u0438 \u0436\u0451\u043B\u0442\u044B\u0439 \u0446\u0432\u0435\u0442\u0430 \u043A\u043E\u0441\u0442\u044E\u043C\u0430, \u0432 \u043A\u043E\u043C\u043F\u043B\u0435\u043A\u0442\u0435 \u0441 \u043F\u043B\u0430\u0449\u043E\u043C \u0438 \u044D\u043C\u0431\u043B\u0435\u043C\u043E\u0439 \u0432 \u0432\u0438\u0434\u0435 \u0442\u0440\u0435\u0443\u0433\u043E\u043B\u044C\u043D\u043E\u0433\u043E \u0449\u0438\u0442\u0430 \u0441 \u0431\u0443\u043A\u0432\u043E\u0439 \u00ABS\u00BB \u0432 \u0446\u0435\u043D\u0442\u0440\u0435. \u0412 \u043D\u0430\u0441\u0442\u043E\u044F\u0449\u0435\u0435 \u0432\u0440\u0435\u043C\u044F \u044D\u0442\u0430 \u044D\u043C\u0431\u043B\u0435\u043C\u0430 \u0438\u0441\u043F\u043E\u043B\u044C\u0437\u0443\u0435\u0442\u0441\u044F \u0432 \u0440\u0430\u0437\u043B\u0438\u0447\u043D\u044B\u0445 \u043C\u0435\u0434\u0438\u0430-\u043E\u0442\u0440\u0430\u0441\u043B\u044F\u0445 \u043A\u0430\u043A \u0441\u0438\u043C\u0432\u043E\u043B \u043F"@ru . + "Harold Lloyd - A Pictorial History of the Silent Screen.jpg"^^ . + . + "Superman"^^ . + . + . + . + "*"^^ . + "(This article is about the superhero. For other uses, see Superman (disambiguation).) Superman is a fictional superhero appearing in American comic books published by DC Comics. The character was created by writer Jerry Siegel and artist Joe Shuster, high school students living in Cleveland, Ohio, in 1933. They sold Superman to Detective Comics, the future DC Comics, in 1938. Superman debuted in Action Comics #1 (cover-dated June 1938) and subsequently appeared in various radio serials, newspaper strips, television programs, films, and video games. With this success, Superman helped to create the superhero archetype and establish its primacy within the American comic book. The character is also referred to by such epithets as the Man of Steel, the Man of Tomorrow, and The Last Son of Krypt"@en . + "Superman (cuyo nombre kryptoniano es Kal-El y su nombre terrestre es Clark Kent) es un personaje ficticio, un superh\u00E9roe de los c\u00F3mics que aparece en las publicaciones de DC Comics.Creado por el escritor estadounidense Jerry Siegel y el artista canadiense Joe Shuster en 1933, cuando ambos se encontraban viviendo en Cleveland, Ohio; lo vendieron a Detective Comics, Inc. en 1938 por USD 130 y la primera aventura del personaje fue publicada en Action Comics 1 (junio de 1938) para luego aparecer en varios seriales de radio, programas de televisi\u00F3n, pel\u00EDculas, tiras peri\u00F3dicas y videojuegos. Con el \u00E9xito de sus aventuras, Superman ayud\u00F3 a crear el g\u00E9nero del superh\u00E9roe y estableci\u00F3 su primac\u00EDa dentro del c\u00F3mic estadounidense. La apariencia del personaje es distintiva e ic\u00F3nica: un traje azul y rojo, con una capa y un escudo de \u201CS\u201D estilizado en su pecho,escudo que se ha convertido en un s\u00EDmbolo del personaje en todo tipo de medios de comunicaci\u00F3n. La historia original de Superman relata que naci\u00F3 con el nombre de Kal-El en el planeta Krypton; su padre, el cient\u00EDfico Jor-El, y su madre Lara Lor-Van, lo enviaron en una nave espacial con destino a la Tierra cuando era un ni\u00F1o, momentos antes de la destrucci\u00F3n de su planeta. Fue descubierto y adoptado por Jonathan Kent y Martha Kent, una pareja de granjeros de Smallville, Kansas, que lo criaron con el nombre de Clark Kent y le inculcaron un estricto c\u00F3digo moral. El joven Kent comenz\u00F3 a mostrar habilidades superhumanas, las mismas que al llegar a su madurez decidir\u00EDa usar para el beneficio de la humanidad. Aunque denominado, algunas veces, de manera poco halagadora, como \u00ABel gran Boy Scout azul\u00BB por otros superh\u00E9roes, Superman tambi\u00E9n es conocido como \u00ABEl Hombre de Acero\u00BB, \u00ABEl Hombre del Ma\u00F1ana\u00BB y \u00ABEl \u00DAltimo Hijo de Krypton\u00BB por el p\u00FAblico general de los c\u00F3mics. Bajo la identidad de Clark Kent, Superman vive en medio de los humanos como un \u00ABt\u00EDmido reportero\u00BB del diario Daily Planet de Metr\u00F3polis. Ah\u00ED trabaja junto a la reportera Lois Lane, con la cual ha sido vinculado rom\u00E1nticamente. DC Comics/Warner Bros. expandi\u00F3 lentamente el reparto de personajes secundarios, poderes y s\u00EDmbolos de Superman a trav\u00E9s de los a\u00F1os. Se modific\u00F3 su pasado para permitir sus aventuras como Superboy y se crearon otros supervivientes de Krypton, incluyendo a Supergirl y Krypto, el Superperro. Adem\u00E1s, Superman fue licenciado y adaptado a una gran variedad de medios, desde la radio a la televisi\u00F3n y el cine. En este \u00FAltimo se dio la que quiz\u00E1s sea su interpretaci\u00F3n m\u00E1s notable: Christopher Reeve tanto en Superman: la pel\u00EDcula (1978) como en su secuela Superman II (1981), pel\u00EDculas de Richard Donner que recibieron la aclamaci\u00F3n un\u00E1nime de la cr\u00EDtica y se volvieron las pel\u00EDculas m\u00E1s exitosas de Warner Bros. en su momento. No obstante, las dos siguientes secuelas, Superman III y Superman IV: The Quest for Peace no tuvieron tanto \u00E9xito a nivel de recaudaci\u00F3n. La pel\u00EDcula Superman Returns, que fue estrenada en 2006, fue poco exitosa dentro de los Estados Unidos pero en el resto del mundo sobrepas\u00F3 las expectativas iniciales de sus creadores. Desde el debut de Superman han transcurrido siete d\u00E9cadas, durante las cuales el personaje ha sido recreado y renovado en m\u00FAltiples ocasiones. Una modificaci\u00F3n mayor y significativa ocurri\u00F3 en 1986, cuando el autor John Byrne moderniz\u00F3 y realiz\u00F3 un masivo retcon al personaje, reduciendo los poderes de Superman y eliminando varios personajes del canon, en una estrategia que atrajo mucha atenci\u00F3n de los medios. Volvi\u00F3 a aparecer en la prensa en la d\u00E9cada de 1990, cuando DC Comics public\u00F3 La Muerte de Superman, una historia en la que el personaje falleci\u00F3 y luego revivi\u00F3. Superman ha resultado fascinante para los acad\u00E9micos y tanto tesistas culturales como comentaristas y cr\u00EDticos han explorado el impacto del personaje y su rol en los Estados Unidos y en el resto del mundo. Umberto Eco discuti\u00F3 las cualidades mitol\u00F3gicas del personaje a inicios de los a\u00F1os 1960 y Larry Niven escribi\u00F3 sobre las caracter\u00EDsticas de una hipot\u00E9tica relaci\u00F3n sexual entre el personaje y Lois Lane. La propiedad del personaje ha sido frecuentemente un objeto de disputa; Siegel y Shuster entablaron demandas en dos ocasiones para recuperar su posesi\u00F3n legal. Los derechos del personaje se encuentran de nuevo en discusi\u00F3n, debido a que los cambios a la ley de derechos de reproducci\u00F3n les permitieron a la esposa y a la hija de Siegel alegar por una parte de los derechos, algo que la compa\u00F1\u00EDa due\u00F1a de DC, Warner Bros., disputa."@es . + "Superman"@en . + . + "\u8D85\u4EBA\uFF08\u82F1\u8BED\uFF1ASuperman\uFF09\u662F\u4E00\u540D\u51FA\u73FE\u65BCDC\u6F2B\u756B\u7684\u865B\u69CB\u8D85\u7D1A\u82F1\u96C4\u89D2\u8272\uFF0C\u540C\u6642\u666E\u904D\u4E5F\u88AB\u8A8D\u70BA\u662F\u7F8E\u570B\u7684\u6587\u5316\u5076\u50CF\u3002\u89D2\u8272\u7531\u4F5C\u5BB6\u5091\u745E\u00B7\u897F\u683C\u723E\u548C\u85DD\u8853\u5BB6\u55AC\u00B7\u8212\u65AF\u7279\u65BC1933\u5E74\u7684\u9AD8\u4E2D\u5B78\u751F\u6642\u671F\u6240\u5275\u4F5C\uFF0C\u7576\u6642\u4ED6\u5011\u751F\u6D3B\u65BC\u4FC4\u4EA5\u4FC4\u5DDE\u7684\u514B\u91CC\u592B\u862D\uFF1B\u5F8C\u4F86\uFF0C\u8D85\u4EBA\u65BC1938\u5E74\u8CE3\u7D66\u5075\u63A2\u6F2B\u756B\u516C\u53F8\uFF08\u5373\u73FE\u5728\u7684DC\u6F2B\u756B\uFF09\u3002\u8D85\u4EBA\u9996\u6B21\u767B\u5834\u65BC\u300A\u52D5\u4F5C\u6F2B\u756B\u300B#1\uFF081938\u5E746\u6708\uFF09\uFF0C\u4E26\u63A5\u8457\u767C\u5C55\u51FA\u5404\u7A2E\u5A92\u9AD4\uFF0C\u5982\u5EE3\u64AD\u5287\u3001\u5831\u7D19\u9023\u74B0\u6F2B\u756B\u3001\u96FB\u8996\u5287\u3001\u96FB\u5F71\u548C\u96FB\u5B50\u904A\u6232\u3002\u57FA\u65BC\u4ED6\u7684\u6210\u529F\uFF0C\u8D85\u4EBA\u6210\u4E86\u5275\u9020\u8D85\u7D1A\u82F1\u96C4\u98A8\u683C\u7684\u59CB\u7956\u548C\u78BA\u7ACB\u7F8E\u570B\u6F2B\u756B\u7684\u91CD\u8981\u5730\u4F4D\u3002\u8D85\u4EBA\u7684\u5916\u89C0\u662F\u4E00\u500B\u7368\u7279\u6027\u7684\u6A19\u8A8C\u3002\u4ED6\u5E38\u7A7F\u8457\u85CD\u8272\u670D\u88DD\u3001\u7D05\u8272\u6597\u7BF7\u8207\u7D05\u9EC3\u76F8\u9593\u7684\u76FE\u724C\u5F62\u300CS\u300D\u6A19\u8A8C\uFF0C\u4EE5\u53CA\u7D93\u5178\u7684\u7D05\u8272\u4E09\u89D2\u8932\uFF08\u65BC2011\u5E74\u7684\u65B052\u8A08\u5283\u4E2D\u53BB\u9664\uFF09\u3002\u9019\u7A2E\u5916\u89C0\u6210\u70BA\u88AB\u7121\u6578\u5A92\u9AD4\u63A1\u7528\u7684\u8C61\u5FB5\u3002 \u8D85\u4EBA\u7684\u5916\u661F\u51FA\u751F\u540D\u70BA\u51F1\u00B7\u827E\u723E\uFF08\u82F1\u8BED\uFF1AKal-El\uFF09\u4E26\u8D77\u6E90\u65BC\u6C2A\u661F\uFF1B\u5728\u6C2A\u661F\u7206\u70B8\u524D\uFF0C\u9084\u662F\u5B30\u5152\u7684\u51F1\u00B7\u827E\u723E\u88AB\u8EAB\u70BA\u79D1\u5B78\u5BB6\u7684\u7236\u89AA\u55AC\u00B7\u827E\u723E\u7528\u592A\u7A7A\u8239\u9001\u5F80\u5730\u7403\u3002\u592A\u7A7A\u8239\u5230\u9054\u5730\u7403\u5F8C\u88AB\u582A\u85A9\u65AF\u5DDE\u7684\u80AF\u7279\u4E00\u5BB6\u767C\u73FE\uFF0C\u4E26\u70BA\u4ED6\u547D\u540D\u70BA\u514B\u62C9\u514B\u00B7\u80AF\u7279\uFF08\u82F1\u8BED\uFF1AClark Kent\uFF09\u800C\u6276\u990A\u4ED6\u9577\u5927\u3002\u514B\u62C9\u514B\u5F9E\u5C0F\u5C31\u5C55\u73FE\u4E86\u8D85\u4E4E\u5E38\u4EBA\u7684\u529B\u91CF\u52A0\u4E0A\u990A\u7236\u6BCD\u7684\u6559\u5C0E\u4E0B\uFF0C\u65E5\u6F38\u6210\u719F\u5F8C\uFF0C\u4ED6\u6C7A\u5FC3\u5229\u7528\u81EA\u5DF1\u7684\u80FD\u529B\u70BA\u4EBA\u985E\u9020\u798F\u3002\u9577\u5927\u5F8C\uFF0C\u8D85\u4EBA\u5C45\u4F4F\u5728\u7F8E\u570B\u865B\u69CB\u57CE\u5E02\u5927\u90FD\u6703\uFF0C\u4EE5\u514B\u62C9\u514B\u00B7\u80AF\u7279\u9019\u500B\u540D\u5B57\u5728\u300A\u661F\u7403\u65E5\u5831\u300B\u7576\u4E00\u540D\u8A18\u8005\u3002\u540C\u4E8B\u5973\u53CB\u70BA\u9732\u858F\u7D72\u00B7\u84EE\u6069\u3002\u4ED6\u5E38\u5E38\u9700\u8981\u9762\u5C0D\u5404\u7A2E\u5F37\u5927\u7684\u8D85\u7D1A\u53CD\u6D3E\uFF0C\u5982\u9054\u514B\u8CFD\u5FB7\u3001\u6BC0\u6EC5\u65E5\u3001\u8166\u9B54\u548C\u539F\u5B50\u9AB7\u9ACF\u7B49\uFF0C\u5176\u4E2D\u6301\u7E8C\u6700\u4E45\u7684\u5927\u6575\u70BA\u96F7\u514B\u65AF\u00B7\u8DEF\u745F\u3002\u8D85\u4EBA\u7684\u8D85\u80FD\u529B\u5305\u62EC\u8D85\u7D1A\u529B\u6C23\u3001\u8D85\u7D1A\u611F\u5B98\u3001\u8D85\u7D1A\u901F\u5EA6\u3001\u8D85\u7D1A\u8010\u529B\u3001\u8D85\u7D1A\u667A\u529B\u3001\u5200\u69CD\u4E0D\u5165\u3001\u71B1\u5C04\u7DDA\u3001\u51B7\u51CD\u547C\u5438\u548C\u98DB\u884C\u7B49\u5F37\u5927\u80FD\u529B\u3002 \u8457\u8FF7\u65BC\u8D85\u4EBA\u7684\u5B78\u8005\u3001\u6587\u5316\u7814\u7A76\u8005\u548C\u8A55\u8AD6\u5BB6\u90FD\u66FE\u63A2\u8A0E\u904E\u8A72\u89D2\u8272\u7684\u5F71\u97FF\u8207\u4F5C\u7528\uFF1B\u6B64\u5916\uFF0C\u8D85\u4EBA\u7684\u7248\u6B0A\u4E5F\u5F15\u767C\u904E\u6578\u6B21\u722D\u8B70\uFF0C\u897F\u683C\u723E\u548C\u8212\u65AF\u7279\u7D93\u904E\u5169\u6B21\u8D77\u8A34\u800C\u5F97\u5230\u5176\u6240\u6709\u6B0A\u30022011\u5E745\u6708\uFF0C\u5728IGN\u8A55\u51FA\u7684100\u5F37\u6F2B\u756B\u82F1\u96C4\u4E2D\u8D85\u4EBA\u540D\u5217\u7B2C\u4E00\uFF0C\u9084\u8A8D\u70BA\u4ED6\u662F\u4ECA\u65E5\u6240\u77E5\u8D85\u7D1A\u82F1\u96C4\u7684\u85CD\u5716\u3002 2010\u5E742\u6708\uFF0C\u5728\u62CD\u8CE3\u6703\u4E0A\u539F\u7248\u7684\u300A\u52D5\u4F5C\u6F2B\u756B\u300B#1\u4EE5100\u842C\u7F8E\u5143\u7684\u9A5A\u4EBA\u552E\u50F9\u8CE3\u51FA\u3002\u8D85\u4EBA\u5728\u96FB\u5F71\u4E0A\u5DF2\u7531\u6F14\u54E1\u5BC7\u514B\u00B7\u827E\u6797\u3001\u55AC\u6CBB\u00B7\u674E\u7DAD\u3001\u514B\u91CC\u65AF\u591A\u798F\u00B7\u674E\u7DAD\u3001\u5E03\u862D\u767B\u00B7\u52DE\u65AF\u548C\u4EA8\u5229\u00B7\u5361\u7DAD\u723E\u8A6E\u91CB\u904E\u3002"@zh . + "Superman (cuyo nombre kryptoniano es Kal-El y su nombre terrestre es Clark Kent) es un personaje ficticio, un superh\u00E9roe de los c\u00F3mics que aparece en las publicaciones de DC Comics.Creado por el escritor estadounidense Jerry Siegel y el artista canadiense Joe Shuster en 1933, cuando ambos se encontraban viviendo en Cleveland, Ohio; lo vendieron a Detective Comics, Inc. en 1938 por USD 130 y la primera aventura del personaje fue publicada en Action Comics 1 (junio de 1938) para luego aparecer en varios seriales de radio, programas de televisi\u00F3n, pel\u00EDculas, tiras peri\u00F3dicas y videojuegos. Con el \u00E9xito de sus aventuras, Superman ayud\u00F3 a crear el g\u00E9nero del superh\u00E9roe y estableci\u00F3 su primac\u00EDa dentro del c\u00F3mic estadounidense. La apariencia del personaje es distintiva e ic\u00F3nica: un traje azul y"@es . + . + "Superman S symbol.svg"^^ . + "300"^^ . + . + "\u8D85\u4EBA\uFF08\u82F1\u8BED\uFF1ASuperman\uFF09\u662F\u4E00\u540D\u51FA\u73FE\u65BCDC\u6F2B\u756B\u7684\u865B\u69CB\u8D85\u7D1A\u82F1\u96C4\u89D2\u8272\uFF0C\u540C\u6642\u666E\u904D\u4E5F\u88AB\u8A8D\u70BA\u662F\u7F8E\u570B\u7684\u6587\u5316\u5076\u50CF\u3002\u89D2\u8272\u7531\u4F5C\u5BB6\u5091\u745E\u00B7\u897F\u683C\u723E\u548C\u85DD\u8853\u5BB6\u55AC\u00B7\u8212\u65AF\u7279\u65BC1933\u5E74\u7684\u9AD8\u4E2D\u5B78\u751F\u6642\u671F\u6240\u5275\u4F5C\uFF0C\u7576\u6642\u4ED6\u5011\u751F\u6D3B\u65BC\u4FC4\u4EA5\u4FC4\u5DDE\u7684\u514B\u91CC\u592B\u862D\uFF1B\u5F8C\u4F86\uFF0C\u8D85\u4EBA\u65BC1938\u5E74\u8CE3\u7D66\u5075\u63A2\u6F2B\u756B\u516C\u53F8\uFF08\u5373\u73FE\u5728\u7684DC\u6F2B\u756B\uFF09\u3002\u8D85\u4EBA\u9996\u6B21\u767B\u5834\u65BC\u300A\u52D5\u4F5C\u6F2B\u756B\u300B#1\uFF081938\u5E746\u6708\uFF09\uFF0C\u4E26\u63A5\u8457\u767C\u5C55\u51FA\u5404\u7A2E\u5A92\u9AD4\uFF0C\u5982\u5EE3\u64AD\u5287\u3001\u5831\u7D19\u9023\u74B0\u6F2B\u756B\u3001\u96FB\u8996\u5287\u3001\u96FB\u5F71\u548C\u96FB\u5B50\u904A\u6232\u3002\u57FA\u65BC\u4ED6\u7684\u6210\u529F\uFF0C\u8D85\u4EBA\u6210\u4E86\u5275\u9020\u8D85\u7D1A\u82F1\u96C4\u98A8\u683C\u7684\u59CB\u7956\u548C\u78BA\u7ACB\u7F8E\u570B\u6F2B\u756B\u7684\u91CD\u8981\u5730\u4F4D\u3002\u8D85\u4EBA\u7684\u5916\u89C0\u662F\u4E00\u500B\u7368\u7279\u6027\u7684\u6A19\u8A8C\u3002\u4ED6\u5E38\u7A7F\u8457\u85CD\u8272\u670D\u88DD\u3001\u7D05\u8272\u6597\u7BF7\u8207\u7D05\u9EC3\u76F8\u9593\u7684\u76FE\u724C\u5F62\u300CS\u300D\u6A19\u8A8C\uFF0C\u4EE5\u53CA\u7D93\u5178\u7684\u7D05\u8272\u4E09\u89D2\u8932\uFF08\u65BC2011\u5E74\u7684\u65B052\u8A08\u5283\u4E2D\u53BB\u9664\uFF09\u3002\u9019\u7A2E\u5916\u89C0\u6210\u70BA\u88AB\u7121\u6578\u5A92\u9AD4\u63A1\u7528\u7684\u8C61\u5FB5\u3002 \u8457\u8FF7\u65BC\u8D85\u4EBA\u7684\u5B78\u8005\u3001\u6587\u5316\u7814\u7A76\u8005\u548C\u8A55\u8AD6\u5BB6\u90FD\u66FE\u63A2\u8A0E\u904E\u8A72\u89D2\u8272\u7684\u5F71\u97FF\u8207\u4F5C\u7528\uFF1B\u6B64\u5916\uFF0C\u8D85\u4EBA\u7684\u7248\u6B0A\u4E5F\u5F15\u767C\u904E\u6578\u6B21\u722D\u8B70\uFF0C\u897F\u683C\u723E\u548C\u8212\u65AF\u7279\u7D93\u904E\u5169\u6B21\u8D77\u8A34\u800C\u5F97\u5230\u5176\u6240\u6709\u6B0A\u30022011\u5E745\u6708\uFF0C\u5728IGN\u8A55\u51FA\u7684100\u5F37\u6F2B\u756B\u82F1\u96C4\u4E2D\u8D85\u4EBA\u540D\u5217\u7B2C\u4E00\uFF0C\u9084\u8A8D\u70BA\u4ED6\u662F\u4ECA\u65E5\u6240\u77E5\u8D85\u7D1A\u82F1\u96C4\u7684\u85CD\u5716\u3002 2010\u5E742\u6708\uFF0C\u5728\u62CD\u8CE3\u6703\u4E0A\u539F\u7248\u7684\u300A\u52D5\u4F5C\u6F2B\u756B\u300B#1\u4EE5100\u842C\u7F8E\u5143\u7684\u9A5A\u4EBA\u552E\u50F9\u8CE3\u51FA\u3002\u8D85\u4EBA\u5728\u96FB\u5F71\u4E0A\u5DF2\u7531\u6F14\u54E1\u5BC7\u514B\u00B7\u827E\u6797\u3001\u55AC\u6CBB\u00B7\u674E\u7DAD\u3001\u514B\u91CC\u65AF\u591A\u798F\u00B7\u674E\u7DAD\u3001\u5E03\u862D\u767B\u00B7\u52DE\u65AF\u548C\u4EA8\u5229\u00B7\u5361\u7DAD\u723E\u8A6E\u91CB\u904E\u3002"@zh . + . + . + . + . + "Superman"@de . + . + "male"@en . + "*Superhuman strength, speed, durability, and longevity\n*Flight\n*Heat vision\n*Freezing breath\n*Extrasensory and visual powers, including X-ray vision"^^ . + . + "\u0627\u0644\u0631\u062C\u0644 \u0627\u0644\u062E\u0627\u0631\u0642 \u0623\u0648 \u0633\u0648\u0628\u0631\u0645\u0627\u0646 (\u0628\u0627\u0644\u0625\u0646\u062C\u0644\u064A\u0632\u064A\u0629: Superman) \u0647\u0648 \u0634\u062E\u0635\u064A\u0629 \u0648\u0647\u0645\u064A\u0629 \u0648\u0628\u0637\u0644 \u062E\u0627\u0631\u0642 \u064A\u0638\u0647\u0631 \u0641\u064A \u0645\u0646\u0634\u0648\u0631\u0627\u062A \u062F\u064A \u0633\u064A \u0643\u0648\u0645\u0643\u0633. \u0623\u0646\u0647 \u064A\u062F\u0639\u0649 \u0628\u0627\u0644\u0628\u0637\u0644 \u0627\u0644\u062C\u0628\u0627\u0631 \u0648\u0628\u0632\u063A \u0646\u062C\u0645\u0647 \u0641\u064A \u0639\u0627\u0645 1938. \u064A\u0634\u062A\u0647\u0631 \u0628\u0644\u0642\u0628 \u0627\u0644\u0631\u062C\u0644 \u0627\u0644\u0641\u0648\u0644\u0627\u0630\u064A. \u0638\u0647\u0631 \u0627\u0644\u0631\u062C\u0644 \u0627\u0644\u062E\u0627\u0631\u0642 \u0639\u0644\u0649 \u0635\u0641\u062D\u0627\u062A \u0627\u0644\u0639\u062F\u062F \u0627\u0644\u0623\u0648\u0644 \u0645\u0646 \u0642\u0635\u0635 \u0627\u0644\u062D\u0631\u0643\u0629 \u0627\u0644\u0645\u0635\u0648\u0631\u0629 (\u0623\u0643\u0634\u0646 \u0643\u0648\u0645\u0643\u0633) \u0628\u0634\u0647\u0631 \u064A\u0648\u0646\u064A\u0648 \u0645\u0646 \u0639\u0627\u0645 1938. \u0641\u0642\u062F \u0623\u0635\u0628\u062D \u0627\u0644\u0631\u062C\u0644 \u0627\u0644\u062E\u0627\u0631\u0642 (\u0633\u0648\u0628\u0631\u0645\u0627\u0646) \u062A\u062F\u0631\u064A\u062C\u064A\u0627\u064B \u0623\u0634\u0647\u0631 \u0628\u0637\u0644 \u062E\u0627\u0631\u0642 \u0641\u064A \u0627\u0644\u0639\u0627\u0644\u0645. \u062C\u0639\u0644 \u0645\u062C\u0644\u0629 \u0627\u0644\u0631\u062C\u0644 \u0627\u0644\u062E\u0627\u0631\u0642 \u0623\u0634\u0647\u0631 \u0645\u062C\u0644\u0629 \u0645\u0635\u0648\u0631\u0629 \u0641\u064A \u0627\u0644\u0639\u0627\u0644\u0645 \u0648\u062A\u0645\u062A \u062A\u0631\u062C\u0645\u062A\u0647\u0627 \u0644\u0623\u063A\u0644\u0628 \u0644\u063A\u0627\u062A \u0627\u0644\u0639\u0627\u0644\u0645. \u0645\u062E\u062A\u0631\u0639\u0627 \u0634\u062E\u0635\u064A\u0629 \u0633\u0648\u0628\u0631\u0645\u0627\u0646 \u0647\u0645\u0627 \u062C\u064A\u0631\u064A \u0633\u064A\u063A\u0644 (Jerry Siegel) \u0648\u062C\u0648 \u0634\u0627\u0633\u062A\u0631 (Joe Shuster) \u0623\u062B\u0646\u0627\u0621 \u0639\u0645\u0644\u0647\u0627 \u0644\u062F\u0649 \u0634\u0631\u0643\u0629 \u0646\u0627\u0634\u064A\u0648\u0646\u0627\u0644 \u0643\u0648\u0645\u0643\u0633 (National Comics) \u0648\u0627\u0644\u062A\u064A \u0623\u0635\u0628\u062D\u062A \u0627\u0644\u064A\u0648\u0645 \u062A\u062D\u0645\u0644 \u0627\u0633\u0645 \u062F\u064A \u0633\u064A \u0643\u0648\u0645\u0643\u0633 (DC Comics). \u062A\u0637\u0648\u0631\u062A \u0642\u0635\u0635 \u0633\u0648\u0628\u0631\u0645\u0627\u0646 \u0645\u0646 \u0635\u0641\u062D\u0627\u062A \u0627\u0644\u0645\u062C\u0644\u0627\u062A \u0625\u0644\u0649 \u0645\u0633\u0644\u0633\u0644\u0627\u062A \u0627\u0644\u0625\u0630\u0627\u0639\u0629 \u062B\u0645 \u0627\u0644\u062A\u0644\u0641\u0632\u064A\u0648\u0646 \u062B\u0645 \u0627\u0644\u0623\u0641\u0644\u0627\u0645 \u0627\u0644\u0633\u064A\u0646\u0645\u0627\u0626\u064A\u0629 \u0648\u0627\u0644\u0639\u0627\u0628 \u0627\u0644\u0641\u064A\u062F\u064A\u0648."@ar . + "\u8D85\u4EBA"@zh . + "Superman est un super-h\u00E9ros de bande dessin\u00E9e am\u00E9ricaine appartenant au monde imaginaire de l\u2019Univers DC. Ce personnage de fiction est consid\u00E9r\u00E9 comme une ic\u00F4ne culturelle am\u00E9ricaine, aim\u00E9 par les enfant du monde entier. Cr\u00E9\u00E9 en janvier 1933 par l\u2019\u00E9crivain am\u00E9ricain Jerry Siegel et l\u2019artiste canadien Joe Shuster tandis qu\u2019ils habitaient tous deux \u00E0 Cleveland, le personnage fut vendu \u00E0 Detective Comics. Il appara\u00EEt pour la premi\u00E8re fois dans un Action Comics en juin 1938. Le personnage est ensuite repris dans plusieurs pi\u00E8ces radiophoniques, \u00E9missions de t\u00E9l\u00E9vision, films, comic strips et jeux vid\u00E9o. Au vu du succ\u00E8s et de la notori\u00E9t\u00E9 de ses aventures, Superman est essentiel dans la cr\u00E9ation d\u2019un genre propre aux super-h\u00E9ros. L\u2019apparence du personnage est particuli\u00E8rement iconique : un costume compos\u00E9 d\u2019un justaucorps rouge, bleu et jaune, de collants et de bottes moulantes, d\u2019une ceinture, avec une cape et sur le torse, le dessin d\u2019un \u2018S\u2019 rouge sur un \u00E9cu jaune. Cet \u00E9cu est maintenant tr\u00E8s souvent utilis\u00E9 pour symboliser le personnage. De nombreuses \u00E9tudes portent sur Superman, son impact et son r\u00F4le symboliques aux \u00C9tats-Unis et dans le reste du monde. Umberto Eco, par exemple, examine les qualit\u00E9s mythologiques du personnage. La propri\u00E9t\u00E9 l\u00E9gale du personnage a \u00E9t\u00E9 sujette \u00E0 de nombreuses controverses. Siegel et Shuster ont par deux fois intent\u00E9 un proc\u00E8s pour en r\u00E9cup\u00E9rer les droits."@fr . + . + "Superman"@nl . + "Superman est un super-h\u00E9ros de bande dessin\u00E9e am\u00E9ricaine appartenant au monde imaginaire de l\u2019Univers DC. Ce personnage de fiction est consid\u00E9r\u00E9 comme une ic\u00F4ne culturelle am\u00E9ricaine, aim\u00E9 par les enfant du monde entier. L\u2019apparence du personnage est particuli\u00E8rement iconique : un costume compos\u00E9 d\u2019un justaucorps rouge, bleu et jaune, de collants et de bottes moulantes, d\u2019une ceinture, avec une cape et sur le torse, le dessin d\u2019un \u2018S\u2019 rouge sur un \u00E9cu jaune. Cet \u00E9cu est maintenant tr\u00E8s souvent utilis\u00E9 pour symboliser le personnage."@fr . + . + . + "Superman"@fr . + . + "3008"^^ . + "Superman, il cui nome kryptoniano \u00E8 Kal-El, mentre il suo nome terrestre \u00E8 Clark Kent, \u00E8 un personaggio dei fumetti creato da Jerry Siegel e Joe Shuster nel 1933, ma pubblicato dalla DC Comics soltanto nel 1938. \u00C8 soprannominato anche \"L'Uomo d'Acciaio\" (in originale: Man of Steel) oppure \"L'Uomo del Domani\" (The Man of Tomorrow). Un uomo in grado di sollevare un'auto, con un costume blu addosso ed un mantello rosso sulle spalle, contornato da un gruppo di passanti impauriti: \u00E8 questa la prima immagine di Superman, quella con cui fa il suo esordio nelle edicole statunitensi. Sebbene venga spesso accostato al Superuomo nietzscheano, \u00E8 in realt\u00E0 pi\u00F9 vicino al \"Plus Man\" di Ralph Waldo Emerson, ovvero un uomo talmente potente da abbattere qualsiasi ostacolo gli si ponga davanti. Il titolo di \"\u00DCbermensch\" viene cos\u00EC assegnato a Batman, che utilizza la propria forza d'animo per sovrastare le ipocrisie e l'assenza di valori nel mondo. Cos\u00EC pu\u00F2 iniziare la storia di quello che inizialmente \u00E8 stato presentato in lingua italiana con il nome di Ciclone l'uomo d'acciaio e poi di Nembo Kid (il ragazzo delle nuvole). Il sito web IGN ha inserito Superman alla prima posizione nella classifica dei cento maggiori eroi della storia dei fumetti prima di Batman."@it . + . + . + . + . + "Kal-El (birth name)"@en . + "Superman (jego alter ego \u2013 Clark Kent, urodzony jako Kal-El) \u2013 fikcyjna posta\u0107 (superbohater), znana z serii komiks\u00F3w wydawanych przez DC Comics, oraz wszelkich adaptacji zwi\u0105zanych z ta postaci\u0105. Zosta\u0142 stworzony przez Joe Shustera i Jerry\u2019ego Siegela. Po raz pierwszy pojawi\u0142 si\u0119 w magazynie Action Comics vol. 1 #1 (czerwiec 1938 roku). Pierwowzorem Supermana by\u0142a posta\u0107 z\u0142oczy\u0144cy o telepatycznych zdolno\u015Bciach z historii The Reign of the Superman, kt\u00F3ra ukaza\u0142a si\u0119 w styczniu 1933 roku na \u0142amach trzeciego numeru wydawanego przez Shustera i Siegela fanzinu Science Fiction. Historia ta czerpa\u0142a z koncepcji nadcz\u0142owieka (\u00DCbermensch), opisanej przez Friedricha Nietzschego, w ksi\u0105\u017Cce pod tytu\u0142em Tako rzecze Zaratustra (Also Sprach Zarathustra). P\u00F3\u017Aniej jednak Siegel i Shuster postanowili uczyni\u0107 z tej postaci herosa, a w tworzeniu jego nowego wizerunku pomog\u0142y im kreacje aktorskie Douglasa Fairbanksa, odtw\u00F3rcy tytu\u0142owych r\u00F3l m.in. w takich filmach jak: Znak Zorro (The Mark of Zorro) z 1920 roku, czy te\u017C Robin Hood z 1922 roku. Du\u017C\u0105 rol\u0119 w kreacji postaci Supermana odegra\u0142a fantastyczno-naukowa powie\u015B\u0107 Philipa Wylie'a pod tytu\u0142em Gladiator z 1930 roku. Jej g\u0142\u00F3wny bohater \u2013 Hugo Danner, obdarzony nadludzk\u0105 si\u0142a i szybko\u015Bci\u0105 jest powszechnie uwa\u017Cany za protoplast\u0119 p\u00F3\u017Aniejszego przybysza z planety Krypton. Z kolei z innej powie\u015Bci Wylie'a pod tytu\u0142em When Worlds Collide z 1933 roku, zaczerpni\u0119to motyw ucieczki z planety, kt\u00F3r\u0105 czeka zag\u0142ada. Tak\u017Ce tw\u00F3rczo\u015B\u0107 innego pisarza science fiction, Edgara Rice\u2019a Burroughsa, autora powie\u015Bci o przygodach Johna Carter'a na Marsie m.in. Ksi\u0119\u017Cniczka Marsa (A Princess of Mars) z 1912 roku, mia\u0142a stanowi\u0107 wa\u017Cn\u0105 inspiracj\u0119 dla tw\u00F3rc\u00F3w Supermana. Tw\u00F3rcy czerpali r\u00F3wnie\u017C natchnienie z Judaizmu (Shuster i Siegel byli z pochodzenia \u017Bydami) i mitologii greckiej: Moj\u017Cesz (idea zbawiciela, pochodz\u0105cego z niemal wymar\u0142ego ludu), jak r\u00F3wnie\u017C Samson i Herakles (idea bohatera, dokonuj\u0105cych czyn\u00F3w niewykonalnych dla zwyk\u0142ego \u015Bmiertelnika). Prawdziwe, krypto\u0144skie nazwisko Supermana brzmi Kal-El (oryginalnie Kal-L), co przypomina hebrajskie \u201Eg\u0142os Boga\u201D. Sam przyrostek -El, jest w rzeczywisto\u015Bci nazwiskiem rodowym i pochodzi od semickiego s\u0142owa \u201EEl\u201D, oznaczaj\u0105cego boga. Ziemskie imi\u0119 Kal-Ela \u2013 Clark, zosta\u0142o zainspirowane imieniem ameryka\u0144skiego aktora Clarka Gable'a, natomiast jego nazwisko Kent, zosta\u0142o zapo\u017Cyczone od imienia innego aktora, Kenta Taylora. Pierwowzorem alter ego, Supermana, czyli nie\u015Bmia\u0142ego reportera Clarka Kenta pracuj\u0105cego na co dzie\u0144 w redakcji Daily Planet, by\u0142y r\u00F3\u017Cne kreacje filmowe ameryka\u0144skiego aktora komediowego Harolda Lloyda, jak r\u00F3wnie\u017C sam wsp\u00F3\u0142tw\u00F3rca postaci Joe Shuster, kt\u00F3ry w latach 30. XX wieku pracowa\u0142 dla gazety Toronto Star. Superman jest powszechnie uwa\u017Cany za ikon\u0119 ameryka\u0144skiej popkultury XX wieku. Jest obok Batmana i Spider-Mana najbardziej rozpoznawaln\u0105 postaci\u0105 komiksow\u0105. Jako przybysz z innej planety stanowi odzwierciedlenie przedstawiciela obcej kultury, imigranta, kt\u00F3ry zaaklimatyzowa\u0142 si\u0119 w ameryka\u0144skim spo\u0142ecze\u0144stwie, co jest z reszt\u0105 kolejn\u0105 aluzj\u0105 do przesz\u0142o\u015Bci Joe Shustera, gdy\u017C on sam by\u0142 imigrantem z Kanady. Superman obdarzony jest nadludzkimi zdolno\u015Bciami, dzi\u0119ki kt\u00F3rym jest w stanie nie\u015B\u0107 pomoc ludzko\u015Bci, stawa\u0107 w obronie s\u0142abszych i uci\u015Bnionych. Nosi on niebiesko-czerwony kostium z peleryn\u0105, wzorowany na stroju si\u0142acza cyrkowego. Na jego piersi widnieje charakterystyczny symbol \u2013 du\u017Ca \u0142aci\u0144ska litera \u201ES\u201D, wpisana w przypominaj\u0105ca diament tarcz\u0119. Znany jest tak\u017Ce pod przydomkami: \u201EThe Last Son of Krypton\u201D, \u201EThe Man of Steel\u201D i \u201EThe Man of Tomorrow\u201D. Superman poza komiksem pojawia\u0142 si\u0119 r\u00F3wnie\u017C w licznych serialach animowanych, filmach fabularnych i grach komputerowych bazuj\u0105cych na komiksach DC Comics. Pierwszy raz na w wersji aktorskiej pojawi\u0142 si\u0119 w 15-odcinkowym serialu kinowym Superman z 1948 roku, oraz w kontynuacji pod tytu\u0142em Atom Man vs. Superman z 1950 roku w g\u0142\u00F3wn\u0105 rol\u0119 wcieli\u0142 si\u0119 aktor Kirk Alyn. W serialu telewizyjnym Adventures of Superman z lat 1952\u20131958, oraz w pe\u0142nometra\u017Cowym filmie Superman and the Mole Men z 1951 roku, rol\u0119 Supermana zagra\u0142 aktor George Reeves. W 4-cz\u0119siowej serii film\u00F3w kinowych, zapocz\u0105tkowanej obrazem Superman (Superman: The Movie) z 1978 roku, w re\u017Cyserii Richarda Donnera, w posta\u0107 Supermana wcieli\u0142 si\u0119 aktor Christopher Reeve. W serialu telewizyjnym Superboy z lat 1988\u20131992 w tytu\u0142ow\u0105 rol\u0119 wcieli\u0142 si\u0119 aktor John Newton. W serialu telewizyjnym Nowe przygody Supermana (Lois & Clark: New Adventures of Superman) z lat 1993\u20131997 w rol\u0119 cz\u0142owieka ze stali wcieli\u0142 si\u0119 aktor Dean Cain. W serialu telewizyjnym Tajemnice Smallville (Smallville), w rol\u0119 m\u0142odego Clarka Kenta wcieli\u0142 si\u0119 aktor Tom Welling. W pseudo-sequelu do serii film\u00F3w z lat 1978\u20131987 pod tytu\u0142em Superman: Powr\u00F3t (Superman Returns) z 2006 roku (re\u017Cyseria Bryan Singer), nast\u0119pc\u0105 Christophera Reeve'a zosta\u0142 aktor Brandon Routh. W filmie Cz\u0142owiek ze stali (Man of Steel) w re\u017Cyserii Zacka Snydera, rol\u0119 Supermana zagra\u0142 aktor Henry Cavill. Superman jest na pierwszym miejscu najwa\u017Cniejszych bohater\u00F3w komiksowych w zestawieniu magazynu Empire, jak r\u00F3wnie\u017C serwisu internetowego IGN."@pl . + . + . + . + "Superman"@es . + "no"^^ . + "no"^^ . + "Superman, ook wel 'De Man van Staal' genoemd, is een personage en imaginaire superheld die zijn eerste verschijning maakte in de Amerikaanse Action Comics #1 in 1938. Hij is als stripfiguur de eerste superheld die bovenmenselijke krachten bezit en lanceerde hiermee een genre waarin meer superhelden volgden, zoals Batman en Wonder Woman. Zijn alter ego is Clark Kent. Sindsdien is Superman een van de populairste en bekendste comic-figuren aller tijden geworden en wordt hij vaak gezien als een cultureel icoon van Amerika. Superman is een vast personage binnen het DC Universum en heeft model gestaan voor tal van andere superhelden. Door de jaren heen is het personage steeds verder ontwikkeld en kreeg hij meer superkrachten, een gedetailleerdere afkomst en uitgebreide cast van nevenpersonages, zoals een jonge Superman (Superboy), Supergirl, Krypto, Lois Lane en zijn aartsvijand Lex Luthor. Het personage heeft behalve in strips ook in tal van films, televisieseries en radiohoorspelen meegespeeld, en heeft een uitgebreide merchandising."@nl . + . + . + . + "DC Comics"^^ . + "superhero appearing in DC Comics publications and related media"@en . + . + . + . \ No newline at end of file diff --git a/sansa-ml-spark/src/test/resources/entity_resolution/source2.nt b/sansa-ml-spark/src/test/resources/entity_resolution/source2.nt new file mode 100644 index 0000000..68b3562 --- /dev/null +++ b/sansa-ml-spark/src/test/resources/entity_resolution/source2.nt @@ -0,0 +1,281 @@ + . + "Ali Baba"@de . + . + "743734575"^^ . + . + "Ali Baba"@en . + . + . + . + "Ali Baba en de veertig rovers is een sprookje uit Duizend-en-een nacht. Het verhaal werd echter pas in de 18e eeuw toegevoegd door de Fransman Antoine Galland, de eerste Europese vertaler van het werk. Het verhaal handelt over Ali Baba die toevallig ziet hoe een troep rovers hun schatkamer openen met de toverspreuk \"Sesam, open u\". Als de rovers weggaan, opent Ali Baba de schatkamer om zijn zakken te vullen. De rovers ontdekken hem, maar samen met zijn slimme slavin Morgiana verslaat Ali Baba de rovers."@nl . + . + "Ali Bab\u00E1"@pt . + . + "Al\u00EC Bab\u00E0 e i quaranta ladroni"@it . + "Jest on prostym rzemie\u015Blnikiem perskim (arabskim wed\u0142ug innych \u017Ar\u00F3de\u0142), kt\u00F3ry przez przypadek posiad\u0142 magiczn\u0105 formu\u0142\u0119 \u201ESezamie, otw\u00F3rz si\u0119!\", po wym\u00F3wieniu kt\u00F3rej dost\u0119pna stawa\u0142a si\u0119 grota, w kt\u00F3rej tytu\u0142owi rozb\u00F3jnicy przechowywali swe \u0142upy."@pl . + . + "Ali Baba"@en . + . + . + . + . + . + . + . + . + . + . + "Ali Baba (arabisch \u0639\u0644\u064A \u0628\u0627\u0628\u0627) ist eine Figur der 270. Geschichte aus der Geschichtensammlung Tausendundeine Nacht, die den Titel Ali Baba und die vierzig R\u00E4uber (arab. Ali Baba wal arba'een harami) tr\u00E4gt. In der arabischen Fassung von Tausendundeine Nacht war diese Geschichte noch nicht enthalten. Sie war in der ersten europ\u00E4ischen \u00DCbersetzung durch den franz\u00F6sischen Orientalisten Antoine Galland enthalten, der sie angeblich 1709 in Paris von einem aus Syrien stammenden M\u00E4rchenerz\u00E4hler geh\u00F6rt hat. Wahrscheinlich handelt es sich aber um Gallands eigenes Werk."@de . + "Jest on prostym rzemie\u015Blnikiem perskim (arabskim wed\u0142ug innych \u017Ar\u00F3de\u0142), kt\u00F3ry przez przypadek posiad\u0142 magiczn\u0105 formu\u0142\u0119 \u201ESezamie, otw\u00F3rz si\u0119!\", po wym\u00F3wieniu kt\u00F3rej dost\u0119pna stawa\u0142a si\u0119 grota, w kt\u00F3rej tytu\u0142owi rozb\u00F3jnicy przechowywali swe \u0142upy."@pl . + "Al\u00ED Bab\u00E0 e i quaranta ladroni (titolo completo: Storia di \u02BFAl\u012B B\u0101b\u0101 e dei quaranta ladroni, sterminati da una schiava) \u00E8 una storia d'origine persiana. Si tratta di un racconto che viene presentato come facente parte della silloge favolistica in lingua araba che va sotto il nome di Le mille e una notte (Alf layla wa layla), bench\u00E9 esso non ne abbia mai fatto davvero parte, come ben dimostrato da tutti i manoscritti che sono serviti alla collazione dell'opera."@it . + "Ali Baba et les Quarante Voleurs"@fr . + "\u0410\u043B\u0438-\u0411\u0430\u0431\u0430 \u0438 \u0441\u043E\u0440\u043E\u043A \u0440\u0430\u0437\u0431\u043E\u0439\u043D\u0438\u043A\u043E\u0432"@ru . + "Al\u00ED Bab\u00E1 (\u00E1rabe \u0639\u0644\u064A \u0628\u0627\u0628\u0627, persa \u0639\u0644\u06CC\u200C\u0628\u0627\u0628\u0627) es un personaje de ficci\u00F3n descrito en el cuento de aventuras Al\u00ED Bab\u00E1 y los cuarenta ladrones, perteneciente a Las mil y una noches. Algunos cr\u00EDticos creen que esta historia fue a\u00F1adida al libro por uno de sus transcriptores europeos, Antoine Galland, un orientalista franc\u00E9s del siglo XVIII que pudo haberla o\u00EDdo en forma oral de un cuentista maronita de Alepo. Sin embargo, Richard Francis Burton afirmaba que era parte del libro original de Las mil y una noches. Esta historia tambi\u00E9n ha sido popular como trama de pantomimas, siendo quiz\u00E1 la m\u00E1s famosa de ellas Chu Chin Chow (1916)."@es . + . + "Al\u00ED Bab\u00E1"@es . + . + "\u0639\u0644\u064A \u0628\u0627\u0628\u0627 \u0648\u0627\u0644\u0623\u0631\u0628\u0639\u0648\u0646 \u0644\u0635\u0627"@ar . + "927344"^^ . + . + . + "\u963F\u91CC\u5DF4\u5DF4"@zh . + . + "Ali Baba en de veertig rovers is een sprookje uit Duizend-en-een nacht. Het verhaal werd echter pas in de 18e eeuw toegevoegd door de Fransman Antoine Galland, de eerste Europese vertaler van het werk. Het verhaal handelt over Ali Baba die toevallig ziet hoe een troep rovers hun schatkamer openen met de toverspreuk \"Sesam, open u\". Als de rovers weggaan, opent Ali Baba de schatkamer om zijn zakken te vullen. De rovers ontdekken hem, maar samen met zijn slimme slavin Morgiana verslaat Ali Baba de rovers."@nl . + . + "Ali Baba (arabisch \u0639\u0644\u064A \u0628\u0627\u0628\u0627) ist eine Figur der 270. Geschichte aus der Geschichtensammlung Tausendundeine Nacht, die den Titel Ali Baba und die vierzig R\u00E4uber (arab. Ali Baba wal arba'een harami) tr\u00E4gt. In der arabischen Fassung von Tausendundeine Nacht war diese Geschichte noch nicht enthalten. Sie war in der ersten europ\u00E4ischen \u00DCbersetzung durch den franz\u00F6sischen Orientalisten Antoine Galland enthalten, der sie angeblich 1709 in Paris von einem aus Syrien stammenden M\u00E4rchenerz\u00E4hler geh\u00F6rt hat. Wahrscheinlich handelt es sich aber um Gallands eigenes Werk."@de . + "\u30A2\u30EA\u30D0\u30D0\u306840\u4EBA\u306E\u76D7\u8CCA"@ja . + . + . + "Ali Bab\u00E1 (em \u00E1rabe: \u0639\u0644\u064A \u0628\u0627\u0628\u0627) \u00E9 uma personagem fict\u00EDcia baseada na Ar\u00E1bia pr\u00E9-isl\u00E2mica. O conto est\u00E1 descrito nas aventuras de Ali Bab\u00E1 e os Quarenta Ladr\u00F5es, que faz parte do Livro das Mil e Uma Noites ou (Noites na Ar\u00E1bia). Alguns cr\u00EDticos acreditam que esta hist\u00F3ria tenha sido adicionada ao Livro das Mil e Uma Noites por um dos seus transcritores europeus, Antoine Galland, que foi um orientalista franc\u00EAs do s\u00E9culo XVIII que talvez a tenha ouvido, de um contador de hist\u00F3rias \u00E1rabe de Alepo. No entanto, Richard F. Burton garantiu que o conto faz parte do original Livro das Mil e Uma Noites. Esta hist\u00F3ria tamb\u00E9m tem sido utilizada como popular pantomima no famoso pantomima/musical Chu Chin Chow (1916)."@pt . + . + "Ali Bab\u00E1 (em \u00E1rabe: \u0639\u0644\u064A \u0628\u0627\u0628\u0627) \u00E9 uma personagem fict\u00EDcia baseada na Ar\u00E1bia pr\u00E9-isl\u00E2mica. O conto est\u00E1 descrito nas aventuras de Ali Bab\u00E1 e os Quarenta Ladr\u00F5es, que faz parte do Livro das Mil e Uma Noites ou (Noites na Ar\u00E1bia). Alguns cr\u00EDticos acreditam que esta hist\u00F3ria tenha sido adicionada ao Livro das Mil e Uma Noites por um dos seus transcritores europeus, Antoine Galland, que foi um orientalista franc\u00EAs do s\u00E9culo XVIII que talvez a tenha ouvido, de um contador de hist\u00F3rias \u00E1rabe de Alepo. No entanto, Richard F. Burton garantiu que o conto faz parte do original Livro das Mil e Uma Noites. Esta hist\u00F3ria tamb\u00E9m tem sido utilizada como popular pantomima no famoso pantomima/musical Chu Chin Chow (1916)."@pt . + . + . + "\u00AB\u0410\u043B\u0438\u0301-\u0411\u0430\u0431\u0430\u0301 \u0438 \u0441\u043E\u0301\u0440\u043E\u043A \u0440\u0430\u0437\u0431\u043E\u0301\u0439\u043D\u0438\u043A\u043E\u0432\u00BB \u2014 \u0432\u043E\u0441\u0442\u043E\u0447\u043D\u0430\u044F \u0441\u043A\u0430\u0437\u043A\u0430, \u043A\u043E\u0442\u043E\u0440\u0430\u044F \u0441\u043E \u0432\u0440\u0435\u043C\u0435\u043D\u0438 \u0410\u043D\u0442\u0443\u0430\u043D\u0430 \u0413\u0430\u043B\u043B\u0430\u043D\u0430 (1646\u20141715) \u0432\u043A\u043B\u044E\u0447\u0430\u0435\u0442\u0441\u044F \u0432 \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u0441\u0431\u043E\u0440\u043D\u0438\u043A\u0430 \u00AB\u0422\u044B\u0441\u044F\u0447\u0430 \u0438 \u043E\u0434\u043D\u0430 \u043D\u043E\u0447\u044C\u00BB. \u041D\u0430\u0438\u0431\u043E\u043B\u0435\u0435 \u0440\u0430\u043D\u043D\u044F\u044F \u0432\u0435\u0440\u0441\u0438\u044F \u043D\u0430\u043F\u0438\u0441\u0430\u043D\u0430 \u043D\u0430 \u0444\u0440\u0430\u043D\u0446\u0443\u0437\u0441\u043A\u043E\u043C \u044F\u0437\u044B\u043A\u0435 \u043F\u0435\u0440\u043E\u043C \u0441\u0430\u043C\u043E\u0433\u043E \u0413\u0430\u043B\u043B\u0430\u043D\u0430, \u0432 \u0447\u044C\u0451\u043C \u0440\u0430\u0441\u043F\u043E\u0440\u044F\u0436\u0435\u043D\u0438\u0438 \u043D\u0430\u0445\u043E\u0434\u0438\u043B\u0441\u044F \u0430\u0440\u0430\u0431\u0441\u043A\u0438\u0439 \u043E\u0440\u0438\u0433\u0438\u043D\u0430\u043B \u044D\u0442\u043E\u0439 \u0438 \u0434\u0440\u0443\u0433\u0438\u0445 \u0441\u043A\u0430\u0437\u043E\u043A, \u0432\u043F\u043E\u0441\u043B\u0435\u0434\u0441\u0442\u0432\u0438\u0438 \u0443\u0442\u0435\u0440\u044F\u043D\u043D\u044B\u0439. \u0412 \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0445 \u0438\u0437\u0434\u0430\u043D\u0438\u044F\u0445 \u0447\u0430\u0441\u0442\u043E \u0432\u043E\u0441\u043F\u0440\u043E\u0438\u0437\u0432\u043E\u0434\u0438\u0442\u0441\u044F \u0441 \u0441\u043E\u043A\u0440\u0430\u0449\u0435\u043D\u0438\u044F\u043C\u0438. \u041F\u043E \u0410\u0430\u0440\u043D\u0435-\u0422\u043E\u043C\u043F\u0441\u043E\u043D\u0443, \u043E\u0442\u043D\u043E\u0441\u0438\u0442\u0441\u044F \u043A \u0442\u0438\u043F\u0443 \u0441\u043A\u0430\u0437\u043E\u043A \u2116 676. \u0412 \u0430\u0440\u0430\u0431\u0441\u043A\u0438\u0445 \u0441\u0442\u0440\u0430\u043D\u0430\u0445 \u0441\u043B\u0435\u0434\u043E\u0432 \u0441\u044E\u0436\u0435\u0442\u0430 \u043D\u0435 \u0441\u043E\u0445\u0440\u0430\u043D\u0438\u043B\u043E\u0441\u044C. \u0415\u0441\u0442\u044C \u043F\u0440\u0435\u0434\u043F\u043E\u043B\u043E\u0436\u0435\u043D\u0438\u0435, \u0447\u0442\u043E \u0413\u0430\u043B\u043B\u0430\u043D \u0437\u0430\u043F\u0438\u0441\u0430\u043B \u0440\u0430\u0441\u0441\u043A\u0430\u0437 \u0432\u043E \u0432\u0440\u0435\u043C\u044F \u043F\u0443\u0442\u0435\u0448\u0435\u0441\u0442\u0432\u0438\u044F \u043F\u043E \u0411\u043B\u0438\u0436\u043D\u0435\u043C\u0443 \u0412\u043E\u0441\u0442\u043E\u043A\u0443. \u0420\u0443\u043A\u043E\u043F\u0438\u0441\u044C \u0441\u043A\u0430\u0437\u043A\u0438 \u043D\u0430 \u0430\u0440\u0430\u0431\u0441\u043A\u043E\u043C \u044F\u0437\u044B\u043A\u0435, \u043E\u0431\u043D\u0430\u0440\u0443\u0436\u0435\u043D\u043D\u0430\u044F \u0432 \u043D\u0430\u0447\u0430\u043B\u0435 XX \u0432\u0435\u043A\u0430 \u0432 \u0411\u043E\u0434\u043B\u0438\u0430\u043D\u0441\u043A\u043E\u0439 \u0431\u0438\u0431\u043B\u0438\u043E\u0442\u0435\u043A\u0435 \u041E\u043A\u0441\u0444\u043E\u0440\u0434\u0441\u043A\u043E\u0433\u043E \u0443\u043D\u0438\u0432\u0435\u0440\u0441\u0438\u0442\u0435\u0442\u0430, \u043E\u043A\u0430\u0437\u0430\u043B\u0430\u0441\u044C \u043C\u0438\u0441\u0442\u0438\u0444\u0438\u043A\u0430\u0446\u0438\u0435\u0439."@ru . + "Al\u00ED Bab\u00E1 (\u00E1rabe \u0639\u0644\u064A \u0628\u0627\u0628\u0627, persa \u0639\u0644\u06CC\u200C\u0628\u0627\u0628\u0627) es un personaje de ficci\u00F3n descrito en el cuento de aventuras Al\u00ED Bab\u00E1 y los cuarenta ladrones, perteneciente a Las mil y una noches. Algunos cr\u00EDticos creen que esta historia fue a\u00F1adida al libro por uno de sus transcriptores europeos, Antoine Galland, un orientalista franc\u00E9s del siglo XVIII que pudo haberla o\u00EDdo en forma oral de un cuentista maronita de Alepo. Sin embargo, Richard Francis Burton afirmaba que era parte del libro original de Las mil y una noches. Esta historia tambi\u00E9n ha sido popular como trama de pantomimas, siendo quiz\u00E1 la m\u00E1s famosa de ellas Chu Chin Chow (1916)."@es . + . + . + . + . + "BlackBerry"@en . + . + . + . + "\u9ED1\u8393\u624B\u673A\uFF08\u82F1\u8BED\uFF1ABlackBerry\uFF09\u662F\u52A0\u62FF\u5927\u9ED1\u8393\u516C\u53F8\u7684\u624B\u63D0\u7121\u7DDA\u901A\u4FE1\u8A2D\u5099\u8207\u7121\u7DDA\u670D\u52D9\u54C1\u724C\uFF0C\u65BC1999\u5E74\u96A8\u8457\u7121\u7DDA\u884C\u52D5\u670D\u52D9\u8207\u9996\u652FBlackBerry\u88DD\u7F6E\u2014\u2014BlackBerry 850 \u547C\u53EB\u5668\u2014\u2014\u63A8\u51FA\u3002BlackBerry\u7684\u884C\u52D5\u88DD\u7F6E\u5728\u81FA\u7063\u88AB\u66B1\u7A31\u70BA\u9ED1\u8393\u673A\uFF0C\u5176\u6700\u5927\u7279\u8272\u662F\u652F\u63F4Push Mail\u670D\u52D9\u4EE5\u53CA\u9644\u6709\u5C0F\u578B\u4F46\u5B8C\u6574\u7684QWERTY\u9375\u76E4\u3002Push Mail\u670D\u52D9\u80FD\u4E3B\u52D5\u5C07\u4FE1\u4EF6\u4F3A\u670D\u5668\u4E0A\u7684\u65B0\u90F5\u4EF6\u50B3\u9001\u4E00\u4EFD\u5230BlackBerry\u624B\u6301\u88DD\u7F6E\u4E0A\uFF0C\u800C\u4E0D\u7528\u4F7F\u7528\u8005\u4E0A\u7DB2\u6AA2\u67E5\u662F\u5426\u6709\u65B0\u90F5\u4EF6\uFF0CQWERTY\u9375\u76E4\u5247\u65B9\u4FBF\u4F7F\u7528\u8005\u96A8\u6642\u96A8\u5730\u5EFA\u7ACB\u90F5\u4EF6\uFF0C\u52A0\u4E0A\u901A\u8A0A\u9304\u3001\u884C\u4E8B\u66C6\u529F\u80FD\uFF0C\u4F7F\u5F97BlackBerry\u6210\u70BA\u7576\u6642\u6700\u65B9\u4FBF\u7684\u884C\u52D5\u96FB\u5B50\u90F5\u4EF6\u89E3\u6C7A\u65B9\u6848\uFF0C\u56E0\u800C\u53D7\u5230\u5546\u52D9\u4EBA\u58EB\u559C\u611B\u8207\u5EE3\u6CDB\u63A1\u7528\uFF0C\u4E00\u4E9B\u5927\u578B\u4F01\u696D\u66F4\u63D0\u4F9B\u9ED1\u8393\u6A5F\u4E88\u5176\u884C\u653F\u4EBA\u54E1\u53CA\u5176\u4ED6\u50F1\u54E1\u4F7F\u7528\u3002 2002\u5E74\u63A8\u51FA\u884C\u52D5\u96FB\u8A71\u5F8C\uFF0C\u9678\u7E8C\u52A0\u5165\u6587\u5B57\u77ED\u4FE1\u3001\u7DB2\u8DEF\u50B3\u771F\u3001\u7DB2\u9801\u700F\u89BD\u53CA\u5176\u4ED6\u7121\u7DDA\u8CC7\u8A0A\u670D\u52D9\u30022008\u5E74\u958B\u59CB\u63A8\u51FA\u5B8C\u5168\u89F8\u63A7\u7684\u6A5F\u578B\uFF0C\u4EE5\u865B\u64EC\u9375\u76E4\u53D6\u4EE3\u5BE6\u9AD4\u9375\u76E4\uFF0C\u4EE5\u53CA\u66F4\u9032\u4E00\u6B65\u7684\u591A\u9EDE\u89F8\u63A7\u529F\u80FD\u6A5F\u578B\u3002BlackBerry\u7684\u670D\u52D9\u4EA6\u7531\u6700\u958B\u59CB\u7684\u4E3B\u52D5\u63A8\u9001\u96FB\u5B50\u90F5\u4EF6\u64F4\u5C55\u5230\u5305\u542B\u901A\u8A0A\u9304\u3001\u884C\u4E8B\u66C6\u3001\u96FB\u5B50\u6587\u4EF6\u7B49\uFF0C\u4E26\u70BA\u5176\u4ED6\u54C1\u724C\u7684\u624B\u63D0\u96FB\u8A71\u53CA\u500B\u4EBA\u6578\u78BC\u52A9\u7406\u63A8\u51FABlackBerry Connect\u8EDF\u4EF6\uFF0C\u63D0\u4F9BPush Mail\u670D\u52D9\u3002"@zh . + . + "BlackBerry"@fr . + "BlackBerry"@pt . + . + . + "71996"^^ . + . + . + . + "BlackBerry (smartphone)"@nl . + "BlackBerry \u2013 smartfon wprowadzony w roku 1999, obs\u0142uguj\u0105cy wiadomo\u015Bci e-mail, rozmowy g\u0142osowe, wiadomo\u015Bci tekstowe (SMS), faksowanie przez internet, przegl\u0105danie stron WWW oraz inne us\u0142ugi informacyjne. BlackBerry jest produktem kanadyjskiej firmy Research In Motion. Urz\u0105dzenia BlackBerry przesy\u0142aj\u0105 dane bezprzewodowo poprzez sieci operator\u00F3w telefonii kom\u00F3rkowych lub Wi-Fi. BlackBerry przyczyni\u0142o si\u0119 do post\u0119pu na rynku urz\u0105dze\u0144 mobilnych, jako pierwsze koncentruj\u0105c si\u0119 na wiadomo\u015Bciach e-mail. Zastosowano tu po raz pierwszy metod\u0119 push e-mail pozwalaj\u0105c\u0105 na sta\u0142e po\u0142\u0105czenie terminala z serwerem BlackBerry i odbieranie poczty w czasie rzeczywistym. Tym co wyr\u00F3\u017Cnia BlackBerry, jest system operacyjny BlackBerry OS niestosowany przez innych producent\u00F3w i nieco sparta\u0144ski, je\u015Bli chodzi o rozrywk\u0119, ale przede wszystkim BIS (BlackBerry Internet Service) lub - dla klient\u00F3w biznesowych - BES (BlackBerry Enterprise Server) - p\u0142atne us\u0142ugi pozwalaj\u0105ce korzysta\u0107 z e-maila oraz (u niekt\u00F3rych operator\u00F3w za wi\u0119ksz\u0105 op\u0142at\u0105) daj\u0105ce nielimitowany dost\u0119p do internetu. Terminale Blackberry dzia\u0142aj\u0105 w wi\u0119kszo\u015Bci \u015Bwiatowych system\u00F3w GSM. Warunkiem pe\u0142nej funkcjonalno\u015Bci terminala BlackBerry jest w\u0142\u0105czona us\u0142uga BlackBerry u us\u0142ugodawcy, czyli operatora telefonii kom\u00F3rkowej. Dzi\u0119ki niej u\u017Cytkownik mo\u017Ce pobiera\u0107 i wysy\u0142a\u0107 dane za pomoc\u0105 bezpiecznego APN (punktu dost\u0119powego) blackberry.net. Dzi\u0119ki temu \u015Bci\u0105ganie poczty nie jest inicjowane przez u\u017Cytkownika, ale maile \u201Ewypychane\u201D s\u0105 przez serwer i przychodz\u0105 na urz\u0105dzenie samoczynnie, a ich obs\u0142uga jest taka sama jak obs\u0142uga SMS-\u00F3w. Us\u0142uga BlackBerry pozwala te\u017C na przegl\u0105danie stron WWW i WAP (blackberry internet service tylko WAP) oraz odbieranie wiadomo\u015Bci MMS. Bez w\u0142\u0105czonej us\u0142ugi BlackBerry telefon mo\u017Ce s\u0142u\u017Cy\u0107 jedynie do dzwonienia i pisania SMS-\u00F3w (da si\u0119 to wszystko omin\u0105\u0107, np. by korzysta\u0107 z internetu wystarczy zainstalowa\u0107 inn\u0105 przegl\u0105dark\u0119). Urz\u0105dzenie posiada certyfikat bezpiecze\u0144stwa NATO RESTRICTED.Od niedawna Telefony BlackBerry maj\u0105 wbudowan\u0105 funkcj\u0119 zwyk\u0142ego radia tak wi\u0119c u\u017Cytkownicy tej marki nie musz\u0105 zadowala\u0107 si\u0119 jedynie muzyk\u0105 p\u0142yn\u0105c\u0105 z radia internetowego. Przy intensywnym pobieraniu danych przez sie\u0107 3G operatora mocno nagrzewa si\u0119 r\u00F3wnie\u017C tylna cz\u0119\u015B\u0107 obudowy telefonu. Us\u0142uga BlackBerry jest dost\u0119pna w dw\u00F3ch wersjach: BlackBerry Internet Service (BIS) i Blackberry Enterprise Server (BES). BIS przeznaczona jest g\u0142\u00F3wnie dla u\u017Cytk\u00F3w indywidualnych. Umo\u017Cliwia obs\u0142ug\u0119 do 10 kont pocztowych typu POP3 lub IMAP4. BES zapewnia obs\u0142ug\u0119 poczty opartej na serwerach korporacyjnych w du\u017Cych przedsi\u0119biorstwach (Microsoft Exchange, Novell Groupwise, Lotus Domino). Poza mo\u017Cliwo\u015Bci\u0105 wysy\u0142ania i odbierania maili u\u017Cytkownik BES-a ma zdalny dost\u0119p ze swego urz\u0105dzenia do firmowej ksi\u0105\u017Cki adresowej oraz kalendarza.Us\u0142ugi BlackBerry s\u0105 r\u00F3wnie\u017C dost\u0119pne na niekt\u00F3rych telefonach innych producent\u00F3w, dla kt\u00F3rych jest dost\u0119pny program pocztowy BlackBerry. S\u0105 to: \n* AT&T Tilt \n* HTC Advantage X7500 \n* HTC TyTN \n* Motorola MPx220 (wybrane modele) \n* Nokia 6810 \n* Nokia 6820 \n* Nokia 9300 \n* Nokia 9300i \n* Nokia 9500 \n* Wszystkie telefony Nokia E-Series (poza modelami Nokia E71 i Nokia E66) \n* Qtek 9100 \n* Qtek 9000 \n* Samsung t719 \n* Siemens SK65, \n* Sony Ericsson P910 \n* Sony Ericsson P990 \n* Sony Ericsson M600i \n* Sony Ericsson P1i Terminale BlackBerry charakteryzuj\u0105 si\u0119 stylistyk\u0105 podobn\u0105 do urz\u0105dze\u0144 PDA, nowo\u015Bci\u0105 zastosowan\u0105 przez producenta jest unikatowa klawiatura z technologi\u0105 SureType pozwalaj\u0105ca na szybkie pisanie.W przypadku zagubienia lub kradzie\u017Cy terminala istnieje mo\u017Cliwo\u015B\u0107 zdalnego zablokowania i wymazania danych z urz\u0105dzenia.Podczas zaawansowanego kodowania wiadomo\u015Bci wykorzystywany jest algorytm Triple-DES z kluczem o d\u0142ugo\u015Bci 168 bit\u00F3w lub AES 256, jednak firma RIM udost\u0119pnia wybranym podmiotom (np. rz\u0105dom pa\u0144stw) technologi\u0119 umo\u017Cliwiaj\u0105c\u0105 dost\u0119p do przesy\u0142anych danych. Urz\u0105dzenie posiada sprz\u0119towe wspomaganie szyfrowania, aczkolwiek szczeg\u00F3\u0142y tego usprawnienia s\u0105 pilnie strze\u017Cone przez firm\u0119 RIM.Na \u015Bwiecie z rozwi\u0105zania korzysta oko\u0142o 12 milion\u00F3w u\u017Cytkownik\u00F3w (dane ze stycznia 2008 r.). Urz\u0105dzenia BlackBerry s\u0105 zaaprobowane i u\u017Cywane przez Bia\u0142y Dom, Kongres Stan\u00F3w Zjednoczonych, armi\u0119 i ameryka\u0144skie si\u0142y lotnicze, rz\u0105d kanadyjski (r\u00F3wnie\u017C lokalny), cz\u0142onk\u00F3w rz\u0105d\u00F3w Holandii, Niemiec, Austrii, policj\u0119, stra\u017C i s\u0142u\u017Cby miejskie w wielu krajach europejskich. Od 2007 roku rz\u0105dy Francji, Niemiec i Arabii Saudyjskiej zakazywa\u0142y u\u017Cywania aparat\u00F3w BlackBerry do korespondencji s\u0142u\u017Cbowej ze wzgl\u0119du na niedostateczne bezpiecze\u0144stwo transmisji, a Unia Europejska ich nie wybra\u0142a ze wzgl\u0119du na brak wspierania aplikacji zewn\u0119trznych i przysz\u0142ych technologii."@pl . + . + "10700000"^^ . + . + . + "BlackBerry"@es . + . + "BlackBerry \u2013 smartfon wprowadzony w roku 1999, obs\u0142uguj\u0105cy wiadomo\u015Bci e-mail, rozmowy g\u0142osowe, wiadomo\u015Bci tekstowe (SMS), faksowanie przez internet, przegl\u0105danie stron WWW oraz inne us\u0142ugi informacyjne. \n* AT&T Tilt \n* HTC Advantage X7500 \n* HTC TyTN \n* Motorola MPx220 (wybrane modele) \n* Nokia 6810 \n* Nokia 6820 \n* Nokia 9300 \n* Nokia 9300i \n* Nokia 9500 \n* Wszystkie telefony Nokia E-Series (poza modelami Nokia E71 i Nokia E66) \n* Qtek 9100 \n* Qtek 9000 \n* Samsung t719 \n* Siemens SK65, \n* Sony Ericsson P910 \n* Sony Ericsson P990 \n* Sony Ericsson M600i \n* Sony Ericsson P1i"@pl . + "Blackberry Logo.svg"^^ . + "BlackBerry est une ligne de t\u00E9l\u00E9phones intelligents, cr\u00E9\u00E9e et d\u00E9velopp\u00E9e par Mike Lazaridis depuis 1999 puis rejoint par Jim Balsillie, d'abord sous le nom de RIM Research In Motion, puis du produit d\u00E9nomm\u00E9 BlackBerry, utilisant le syst\u00E8me d'exploitation propri\u00E9taire Blackberry OS, puis \u00E0 partir de janvier 2013 le passage sous le syst\u00E8me d'exploitation BlackBerry 10 fait que l'entreprise, dans un but de clart\u00E9, a adopt\u00E9 le nom unique de BlackBerry pour l'entreprise et les produits."@fr . + . + "BlackBerry"@it . + "BlackBerry (letteralmente \"mora\" in inglese) \u00E8 il marchio commerciale dei dispositivi portatili smartphone (i primi modelli, come l'850, erano cercapersone con funzionalit\u00E0 aggiuntive di messaggistica e agenda) prodotti dalla societ\u00E0 canadese BlackBerry Limited (precedentemente conosciuta come Research In Motion o RIM fino all'inizio del 2013) fondata dall'ingegnere elettrico Mike Lazaridis, oltre che dell'infrastruttura che permette il loro funzionamento."@it . + . + "BlackBerry"@pl . + . + "BlackBerry \u00E9 uma linha de smartphones e tablets criada pela empresa canadense BlackBerry (antiga Research in Motion). Tais aparelhos s\u00E3o conhecidos por serem focados no mercado corporativo, possuindo fun\u00E7\u00F5es que permitem a produ\u00E7\u00E3o de conte\u00FAdo profissional em qualquer lugar. A Blackberry desenvolveu um smartphone com a plataforma Android, procurando aumentar sua participa\u00E7\u00E3o no mercado de smartphones do mundo. O lan\u00E7amento do telem\u00F3vel foi no dia 6 de Novembro de 2015, nos Estados Unidos, pelo valor de US$ 699,00. Em 2016, foi anunciado que a BlackBerry vai deixar de fabricar smatphones. A produtora de telem\u00F3veis vai passar produ\u00E7\u00E3o dos equipamentos para as m\u00E3os de parceiros internacionais, permitindo focar todas as suas aten\u00E7\u00F5es no desenvolvimento de software."@pt . + . + . + . + . + . + "BlackBerry \u2014 \u0431\u0435\u0441\u043F\u0440\u043E\u0432\u043E\u0434\u043D\u043E\u0439 \u043A\u043E\u043C\u043C\u0443\u043D\u0438\u043A\u0430\u0442\u043E\u0440, \u0432\u043F\u0435\u0440\u0432\u044B\u0435 \u043F\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043B\u0435\u043D\u043D\u044B\u0439 \u0432 1997 \u0433\u043E\u0434\u0443 \u043A\u043E\u043C\u043F\u0430\u043D\u0438\u0435\u0439 Research In Motion."@ru . + "BlackBerry is a line of smartphones and services designed and marketed by BlackBerry Limited (formerly known as Research In Motion/RIM). The very first RIM device was the Inter@ctive Pager 900, a clamshell-type device that allowed two-way paging, announced on September 18, 1996. After the success of the 900, the Inter@ctive Pager 800 was created for IBM, which bought US$10 million worth of them on February 4, 1998. The next device to be released was the Inter@ctive Pager 950, on August 26, 1998. The very first device to carry the BlackBerry name was the BlackBerry 850, an email pager, released January 19, 1999. Although identical in appearance to the 950, the 850 was the first device to integrate email and the name Inter@ctive Pager was no longer used to brand the device."@en . + . + . + "\u9ED1\u8393\u624B\u673A\uFF08\u82F1\u8BED\uFF1ABlackBerry\uFF09\u662F\u52A0\u62FF\u5927\u9ED1\u8393\u516C\u53F8\u7684\u624B\u63D0\u7121\u7DDA\u901A\u4FE1\u8A2D\u5099\u8207\u7121\u7DDA\u670D\u52D9\u54C1\u724C\uFF0C\u65BC1999\u5E74\u96A8\u8457\u7121\u7DDA\u884C\u52D5\u670D\u52D9\u8207\u9996\u652FBlackBerry\u88DD\u7F6E\u2014\u2014BlackBerry 850 \u547C\u53EB\u5668\u2014\u2014\u63A8\u51FA\u3002BlackBerry\u7684\u884C\u52D5\u88DD\u7F6E\u5728\u81FA\u7063\u88AB\u66B1\u7A31\u70BA\u9ED1\u8393\u673A\uFF0C\u5176\u6700\u5927\u7279\u8272\u662F\u652F\u63F4Push Mail\u670D\u52D9\u4EE5\u53CA\u9644\u6709\u5C0F\u578B\u4F46\u5B8C\u6574\u7684QWERTY\u9375\u76E4\u3002Push Mail\u670D\u52D9\u80FD\u4E3B\u52D5\u5C07\u4FE1\u4EF6\u4F3A\u670D\u5668\u4E0A\u7684\u65B0\u90F5\u4EF6\u50B3\u9001\u4E00\u4EFD\u5230BlackBerry\u624B\u6301\u88DD\u7F6E\u4E0A\uFF0C\u800C\u4E0D\u7528\u4F7F\u7528\u8005\u4E0A\u7DB2\u6AA2\u67E5\u662F\u5426\u6709\u65B0\u90F5\u4EF6\uFF0CQWERTY\u9375\u76E4\u5247\u65B9\u4FBF\u4F7F\u7528\u8005\u96A8\u6642\u96A8\u5730\u5EFA\u7ACB\u90F5\u4EF6\uFF0C\u52A0\u4E0A\u901A\u8A0A\u9304\u3001\u884C\u4E8B\u66C6\u529F\u80FD\uFF0C\u4F7F\u5F97BlackBerry\u6210\u70BA\u7576\u6642\u6700\u65B9\u4FBF\u7684\u884C\u52D5\u96FB\u5B50\u90F5\u4EF6\u89E3\u6C7A\u65B9\u6848\uFF0C\u56E0\u800C\u53D7\u5230\u5546\u52D9\u4EBA\u58EB\u559C\u611B\u8207\u5EE3\u6CDB\u63A1\u7528\uFF0C\u4E00\u4E9B\u5927\u578B\u4F01\u696D\u66F4\u63D0\u4F9B\u9ED1\u8393\u6A5F\u4E88\u5176\u884C\u653F\u4EBA\u54E1\u53CA\u5176\u4ED6\u50F1\u54E1\u4F7F\u7528\u3002 2002\u5E74\u63A8\u51FA\u884C\u52D5\u96FB\u8A71\u5F8C\uFF0C\u9678\u7E8C\u52A0\u5165\u6587\u5B57\u77ED\u4FE1\u3001\u7DB2\u8DEF\u50B3\u771F\u3001\u7DB2\u9801\u700F\u89BD\u53CA\u5176\u4ED6\u7121\u7DDA\u8CC7\u8A0A\u670D\u52D9\u30022008\u5E74\u958B\u59CB\u63A8\u51FA\u5B8C\u5168\u89F8\u63A7\u7684\u6A5F\u578B\uFF0C\u4EE5\u865B\u64EC\u9375\u76E4\u53D6\u4EE3\u5BE6\u9AD4\u9375\u76E4\uFF0C\u4EE5\u53CA\u66F4\u9032\u4E00\u6B65\u7684\u591A\u9EDE\u89F8\u63A7\u529F\u80FD\u6A5F\u578B\u3002BlackBerry\u7684\u670D\u52D9\u4EA6\u7531\u6700\u958B\u59CB\u7684\u4E3B\u52D5\u63A8\u9001\u96FB\u5B50\u90F5\u4EF6\u64F4\u5C55\u5230\u5305\u542B\u901A\u8A0A\u9304\u3001\u884C\u4E8B\u66C6\u3001\u96FB\u5B50\u6587\u4EF6\u7B49\uFF0C\u4E26\u70BA\u5176\u4ED6\u54C1\u724C\u7684\u624B\u63D0\u96FB\u8A71\u53CA\u500B\u4EBA\u6578\u78BC\u52A9\u7406\u63A8\u51FABlackBerry Connect\u8EDF\u4EF6\uFF0C\u63D0\u4F9BPush Mail\u670D\u52D9\u3002"@zh . + . + . + "\u0628\u0644\u0627\u0643\u0628\u064A\u0631\u064A (\u0628\u0627\u0644\u0625\u0646\u062C\u0644\u064A\u0632\u064A\u0629: BlackBerry) \u0648\u064A\u0639\u0646\u064A (\u0627\u0644\u062A\u0648\u062A \u0627\u0644\u0623\u0633\u0648\u062F)\u060C \u0647\u0648 \u0646\u0648\u0639 \u0645\u0646 \u0627\u0644\u0647\u0648\u0627\u062A\u0641 \u0627\u0644\u0630\u0643\u064A\u0629 \u0627\u0644\u062A\u064A \u062A\u062F\u0639\u0645 \u062E\u062F\u0645\u0629 \u0627\u0644\u0628\u0631\u064A\u062F \u0627\u0644\u0625\u0644\u0643\u062A\u0631\u0648\u0646\u064A\u060C \u062A\u0645 \u062A\u0637\u0648\u064A\u0631\u0647 \u0645\u0646 \u0642\u0628\u0644 \u0634\u0631\u0643\u0629 \u0631\u064A\u0633\u0631\u0634 \u0625\u0646 \u0645\u0648\u0634\u0646 \u0627\u0644\u0643\u0646\u062F\u064A\u0629. \u064A\u062A\u0645\u064A\u0632 \u0627\u0644\u0628\u0644\u0627\u0643\u0628\u064A\u0631\u064A \u0628\u0634\u0643\u0644 \u0631\u0626\u064A\u0633\u064A \u0628\u0642\u062F\u0631\u062A\u0647 \u0639\u0644\u0649 \u0627\u0633\u062A\u0642\u0628\u0627\u0644 \u0648\u0625\u0631\u0633\u0627\u0644 \u0627\u0644\u0628\u0631\u064A\u062F \u0627\u0644\u0625\u0644\u0643\u062A\u0631\u0648\u0646\u064A \u062D\u064A\u062B\u0645\u0627 \u062A\u0648\u0641\u0631\u062A \u0634\u0628\u0643\u0629 \u0627\u062A\u0635\u0627\u0644\u0627\u062A \u062E\u0644\u0648\u064A\u0629 \u0644\u0639\u062F\u062F \u0643\u0628\u064A\u0631 \u0645\u0646 \u0634\u0631\u0643\u0627\u062A \u0627\u0644\u0627\u062A\u0635\u0627\u0644\u0627\u062A \u062D\u0648\u0644 \u0627\u0644\u0639\u0627\u0644\u0645\u060C \u0628\u0627\u0644\u0625\u0636\u0627\u0641\u0629 \u0625\u0644\u0649 \u062A\u0637\u0628\u064A\u0642\u0627\u062A \u0627\u0644\u0647\u0648\u0627\u062A\u0641 \u0627\u0644\u0630\u0643\u064A\u0629 \u0627\u0644\u062A\u0642\u0644\u064A\u062F\u064A\u0629 (\u062F\u0641\u062A\u0631 \u0627\u0644\u0639\u0646\u0627\u0648\u064A\u0646 \u0648\u0627\u0644\u062A\u0642\u0648\u064A\u0645 \u0648\u0642\u0648\u0627\u0626\u0645 \u0627\u0644\u0648\u0627\u062C\u0628\u0627\u062A \u0648\u0642\u062F\u0631\u0627\u062A \u0627\u0644\u0647\u0627\u062A\u0641 \u0627\u0644\u0645\u062A\u0639\u0627\u0631\u0641 \u0639\u0644\u064A\u0647\u0627\u060C \u0627\u0644\u062E). \u062A\u0634\u0643\u0644 \u0645\u0628\u064A\u0639\u0627\u062A \u0628\u0644\u0627\u0643\u0628\u064A\u0631\u064A 3% \u0645\u0646 \u0645\u0628\u064A\u0639\u0627\u062A \u0627\u0644\u0647\u0648\u0627\u062A\u0641 \u0627\u0644\u0630\u0643\u064A\u0629 \u062D\u0648\u0644 \u0627\u0644\u0639\u0627\u0644\u0645 \u0641\u064A \u0627\u0644\u0639\u0627\u0645 2011\u0645 . \u062A\u062A\u0648\u0641\u0631 \u062E\u062F\u0645\u0629 \u0628\u0644\u0627\u0643\u0628\u064A\u0631\u064A \u062D\u0627\u0644\u064A\u0627 \u0641\u064A \u0623\u0643\u062B\u0631 \u0645\u0646 90 \u062F\u0648\u0644\u0629."@ar . + "BlackBerry"@ja . + "BlackBerry"@en . + "Blackberry"@de . + . + . + "Das Blackberry (Eigenschreibweise BlackBerry, Aussprache [\u02C8bl\u00E6kb\u0259ri], englisch f\u00FCr Brombeere) ist ein vom gleichnamigen kanadischen Unternehmen Blackberry (zuvor Research In Motion, kurz RIM) entwickeltes und vertriebenes Mobiltelefon (Smartphone). Charakteristisch ist die physikalische Tastatur, welche bis heute in fast allen Modellen Anwendung findet. 1999 brachte RIM das erste Ger\u00E4t mit der Bezeichnung Blackberry 850 auf den Markt, welches nachfolgend das Zeitalter der mobilen Kommunikation einl\u00E4utete. Erstmals war es m\u00F6glich, E-Mails \u00FCber eine mobile Internetverbindung abzurufen oder neue Termine drahtlos zu synchronisieren. Als Meilenstein der Technikgeschichte und erstes Smartphone gilt das 2002 vorgestellte Nachfolgemodell Blackberry 5810 mit einem integrierten Mobiltelefon."@de . + . + "BlackBerry es una marca de tel\u00E9fonos inteligentes desarrollada por la compa\u00F1\u00EDa canadiense BlackBerry, que integra el servicio de correo electr\u00F3nico m\u00F3vil desde 1999; aunque incluye las aplicaciones t\u00EDpicas de un tel\u00E9fono inteligente: libreta de direcciones, agenda, calendario, lista de tareas, bloc de notas, navegador, aplicaciones de redes sociales, as\u00ED como c\u00E1mara de fotograf\u00EDa integrada en todos los dispositivos. BlackBerry se hizo famosa por su teclado QWERTY incorporado, y por su capacidad para enviar y recibir correo electr\u00F3nico de Internet accediendo a las redes de las compa\u00F1\u00EDas de telefon\u00EDa celular que brindan este servicio. Debido a esta popularidad, el t\u00E9rmino blackberry tambi\u00E9n ha pasado a ser de uso com\u00FAn para hacer referencia a cualquier tel\u00E9fono celular inteligente que incor"@es . + . + "BlackBerry\uFF08\u30D6\u30E9\u30C3\u30AF\u30D9\u30EA\u30FC\uFF09\u306F\u3001\u30AB\u30CA\u30C0\u306E\u30EA\u30B5\u30FC\u30C1\u30FB\u30A4\u30F3\u30FB\u30E2\u30FC\u30B7\u30E7\u30F3\u304C\u30011999\u5E74\u306B\u958B\u767A\u3057\u305F\u30B9\u30DE\u30FC\u30C8\u30D5\u30A9\u30F3\u3002\u6B27\u7C73\u306E\u30D3\u30B8\u30CD\u30B9\u30DE\u30F3\u3092\u4E2D\u5FC3\u306B\u5E83\u304F\u4F7F\u308F\u308C\u3066\u304A\u308A\u3001\u4E16\u754C\u3067175\u30AB\u56FD7000\u4E07\u4EBA\u4EE5\u4E0A\u304C\u5229\u7528\u3057\u30011\u51045\u5343\u4E07\u53F0\u4EE5\u4E0A\u304C\u8CA9\u58F2\u3055\u308C\u3066\u3044\u308B\u3002\u65E5\u672C\u3067\u3082\u3001\u7D044,000\u793E\u304C\u5C0E\u5165\u3092\u3057\u3066\u3044\u308B\u3002\u4E3B\u306B\u3001\u6CD5\u4EBA\u5411\u3051\u30B5\u30FC\u30D3\u30B9\u306EBlackBerry Enterprise Service (BES) \u3068\u3001\u500B\u4EBA\u30FB\u4E2D\u5C0F\u4F01\u696D\u5411\u3051\u30B5\u30FC\u30D3\u30B9\u306EBlackBerry Internet Service (BIS) \u306E2\u7A2E\u985E\u304C\u3042\u308B\u3002"@ja . + . + "BlackBerry es una marca de tel\u00E9fonos inteligentes desarrollada por la compa\u00F1\u00EDa canadiense BlackBerry, que integra el servicio de correo electr\u00F3nico m\u00F3vil desde 1999; aunque incluye las aplicaciones t\u00EDpicas de un tel\u00E9fono inteligente: libreta de direcciones, agenda, calendario, lista de tareas, bloc de notas, navegador, aplicaciones de redes sociales, as\u00ED como c\u00E1mara de fotograf\u00EDa integrada en todos los dispositivos. BlackBerry se hizo famosa por su teclado QWERTY incorporado, y por su capacidad para enviar y recibir correo electr\u00F3nico de Internet accediendo a las redes de las compa\u00F1\u00EDas de telefon\u00EDa celular que brindan este servicio. Debido a esta popularidad, el t\u00E9rmino blackberry tambi\u00E9n ha pasado a ser de uso com\u00FAn para hacer referencia a cualquier tel\u00E9fono celular inteligente que incorpore un teclado completo. BlackBerry usa un Sistema operativo propio, el BlackBerry OS, el cual adem\u00E1s de las prestaciones de un tel\u00E9fono inteligente incorpora su propio servicio de mensajer\u00EDa llamado BBM. El t\u00E9rmino fue acu\u00F1ado por la compa\u00F1\u00EDa de marcas Lexicon Branding, inspir\u00E1ndose en la semejanza de las teclas de los tel\u00E9fonos BlackBerry con las drupas de las que est\u00E1 compuesta la mora."@es . + . + "BlackBerry\uFF08\u30D6\u30E9\u30C3\u30AF\u30D9\u30EA\u30FC\uFF09\u306F\u3001\u30AB\u30CA\u30C0\u306E\u30EA\u30B5\u30FC\u30C1\u30FB\u30A4\u30F3\u30FB\u30E2\u30FC\u30B7\u30E7\u30F3\u304C\u30011999\u5E74\u306B\u958B\u767A\u3057\u305F\u30B9\u30DE\u30FC\u30C8\u30D5\u30A9\u30F3\u3002\u6B27\u7C73\u306E\u30D3\u30B8\u30CD\u30B9\u30DE\u30F3\u3092\u4E2D\u5FC3\u306B\u5E83\u304F\u4F7F\u308F\u308C\u3066\u304A\u308A\u3001\u4E16\u754C\u3067175\u30AB\u56FD7000\u4E07\u4EBA\u4EE5\u4E0A\u304C\u5229\u7528\u3057\u30011\u51045\u5343\u4E07\u53F0\u4EE5\u4E0A\u304C\u8CA9\u58F2\u3055\u308C\u3066\u3044\u308B\u3002\u65E5\u672C\u3067\u3082\u3001\u7D044,000\u793E\u304C\u5C0E\u5165\u3092\u3057\u3066\u3044\u308B\u3002\u4E3B\u306B\u3001\u6CD5\u4EBA\u5411\u3051\u30B5\u30FC\u30D3\u30B9\u306EBlackBerry Enterprise Service (BES) \u3068\u3001\u500B\u4EBA\u30FB\u4E2D\u5C0F\u4F01\u696D\u5411\u3051\u30B5\u30FC\u30D3\u30B9\u306EBlackBerry Internet Service (BIS) \u306E2\u7A2E\u985E\u304C\u3042\u308B\u3002"@ja . + . + "BlackBerry (letteralmente \"mora\" in inglese) \u00E8 il marchio commerciale dei dispositivi portatili smartphone (i primi modelli, come l'850, erano cercapersone con funzionalit\u00E0 aggiuntive di messaggistica e agenda) prodotti dalla societ\u00E0 canadese BlackBerry Limited (precedentemente conosciuta come Research In Motion o RIM fino all'inizio del 2013) fondata dall'ingegnere elettrico Mike Lazaridis, oltre che dell'infrastruttura che permette il loro funzionamento."@it . + . + "BlackBerry"@ru . + "BlackBerry is a line of smartphones and services designed and marketed by BlackBerry Limited (formerly known as Research In Motion/RIM). The very first RIM device was the Inter@ctive Pager 900, a clamshell-type device that allowed two-way paging, announced on September 18, 1996. After the success of the 900, the Inter@ctive Pager 800 was created for IBM, which bought US$10 million worth of them on February 4, 1998. The next device to be released was the Inter@ctive Pager 950, on August 26, 1998. The very first device to carry the BlackBerry name was the BlackBerry 850, an email pager, released January 19, 1999. Although identical in appearance to the 950, the 850 was the first device to integrate email and the name Inter@ctive Pager was no longer used to brand the device. BlackBerry devices can record video, take photos, play music and also provide functions such as web browsing, email, instant messaging, and the multi-platform BlackBerry Messenger service. The BlackBerry line traditionally used proprietary operating systems developed by BlackBerry Limited known as BlackBerry OS. In 2013, BlackBerry introduced BlackBerry 10, a major revamp of the platform based on QNX operating system. BlackBerry 10 was meant to replace the aging BlackBerry OS platform with a new system that was more in line with the user experiences of modern smartphone operating systems. The first BB10 powered device was the BlackBerry Z10, which was followed by other all-touch and keyboard-equipped models; including the BlackBerry Q10, BlackBerry Classic, BlackBerry Passport, and the BlackBerry Leap. BlackBerry was considered one of the major smartphone vendors in the world, specializing in secure communications and mobile productivity. At its peak in September 2013, there were 85 million BlackBerry subscribers worldwide. However, BlackBerry has since lost its dominant position in the market due to the success of the Android and iOS platforms; the same numbers had fallen to 23 million in March 2016. In 2015, BlackBerry re-focused its business strategy and began to release Android-based smartphones, beginning with the BlackBerry Priv slider and then the BlackBerry DTEK50. However, BlackBerry COO Marty Beard told Bloomberg that \"The company\u2019s never said that we would not build another BB10 device.\" On September 28, 2016, Blackberry announced it will stop designing its own phones. Plus it has made a first licensing partnership with Indonesian company to set up a new joint venture company called \"BB Merah Putih\". Indonesia is the biggest market for BlackBerry devices and will make the devices in Indonesia."@en . + "BlackBerry \u2014 \u0431\u0435\u0441\u043F\u0440\u043E\u0432\u043E\u0434\u043D\u043E\u0439 \u043A\u043E\u043C\u043C\u0443\u043D\u0438\u043A\u0430\u0442\u043E\u0440, \u0432\u043F\u0435\u0440\u0432\u044B\u0435 \u043F\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043B\u0435\u043D\u043D\u044B\u0439 \u0432 1997 \u0433\u043E\u0434\u0443 \u043A\u043E\u043C\u043F\u0430\u043D\u0438\u0435\u0439 Research In Motion."@ru . + . + "Een BlackBerry is een smartphone en pda die gebruikt kan worden voor mobiele telefonie, mobiel internet (ontvangen en versturen van e-mail en internetsurfen), PIM (Personal Information Manager) en andere applicaties. De BlackBerry kwam in 1999 voor het eerst op de markt. Hij werd ontwikkeld door het gelijknamige, Canadese bedrijf BlackBerry (voorheen RIM). Hij ondersteunt push e-mail. Het draadloze netwerk dat wordt gebruikt komt van de reguliere mobieletelefonieproviders.In 2010 kondigde BlackBerry een tablet-pc aan met de naam BlackBerry PlayBook, die in juni 2011 op de markt kwam."@nl . + . + . + . + "Das Blackberry (Eigenschreibweise BlackBerry, Aussprache [\u02C8bl\u00E6kb\u0259ri], englisch f\u00FCr Brombeere) ist ein vom gleichnamigen kanadischen Unternehmen Blackberry (zuvor Research In Motion, kurz RIM) entwickeltes und vertriebenes Mobiltelefon (Smartphone). Charakteristisch ist die physikalische Tastatur, welche bis heute in fast allen Modellen Anwendung findet. 1999 brachte RIM das erste Ger\u00E4t mit der Bezeichnung Blackberry 850 auf den Markt, welches nachfolgend das Zeitalter der mobilen Kommunikation einl\u00E4utete. Erstmals war es m\u00F6glich, E-Mails \u00FCber eine mobile Internetverbindung abzurufen oder neue Termine drahtlos zu synchronisieren. Als Meilenstein der Technikgeschichte und erstes Smartphone gilt das 2002 vorgestellte Nachfolgemodell Blackberry 5810 mit einem integrierten Mobiltelefon. Blackberry entwickelte im Verlauf unterschiedliche Smartphones auf Basis des hauseigenen Betriebssystems Blackberry OS, darunter die popul\u00E4ren Baureihen Bold und Curve. Das Unternehmen konzentrierte sich zu Beginn schwerpunktm\u00E4\u00DFig auf Gesch\u00E4ftskunden, erweiterte im Verlauf allerdings die Funktionalit\u00E4t, um das Segment der Privatanwender f\u00FCr sich zu gewinnen. Im Oktober 2011 stellte Blackberry auf der hauseigenen Entwicklerkonferenz das neue Betriebssystem Blackberry 10 vor, welches von aktuellen Modellen wie dem Blackberry Q10, Classic, Z10 oder Passport genutzt wird. Ende 2014 wurde der Amazon Appstore f\u00FCr alle Blackberry-10-Ger\u00E4te zur Verf\u00FCgung gestellt, wor\u00FCber alle bekannten Android-Apps installiert werden k\u00F6nnen. Ein Jahr sp\u00E4ter ver\u00F6ffentlichte Blackberry mit dem Modell Priv das erste eigene Smartphone mit Android-System. Am 28. September 2016 k\u00FCndigte Blackberry an, das Hardwaregesch\u00E4ft und die daran angeschlossene Entwicklung an externe Partner auszulagern um Kosten zu sparen. Der Vorstandsvorsitzende John Chen erkl\u00E4rte nachfolgend in einem Interview, dass es auch in Zukunft Smartphones der Marke Blackberry mit Tastatur geben wird."@de . + "BlackBerry est une ligne de t\u00E9l\u00E9phones intelligents, cr\u00E9\u00E9e et d\u00E9velopp\u00E9e par Mike Lazaridis depuis 1999 puis rejoint par Jim Balsillie, d'abord sous le nom de RIM Research In Motion, puis du produit d\u00E9nomm\u00E9 BlackBerry, utilisant le syst\u00E8me d'exploitation propri\u00E9taire Blackberry OS, puis \u00E0 partir de janvier 2013 le passage sous le syst\u00E8me d'exploitation BlackBerry 10 fait que l'entreprise, dans un but de clart\u00E9, a adopt\u00E9 le nom unique de BlackBerry pour l'entreprise et les produits. \u00C0 ce propos, le Syst\u00E8me d'exploitation BlackBerry 10 est fond\u00E9 sur le micro noyau QNX, logiciel de la soci\u00E9t\u00E9 \u00E9ponyme rachet\u00E9e en 2010 par la soci\u00E9t\u00E9 canadienne RIM. Le groupe BlackBerry annonce, le 23 septembre 2013, une proposition pour son \u00E9ventuel rachat par le fonds d'investissement canadien Fairfax, premier actionnaire du groupe avec 10 % des parts pour un montant d'environ 4,7 milliards de dollars. Finalement cette offre est abandonn\u00E9e le 4 novembre 2013. BlackBerry s'est depuis relanc\u00E9 avec une lev\u00E9e de fonds de 1 Milliard de dollars aupr\u00E8s de Fairfax Holdings, a restructur\u00E9 l'entreprise en remerciant 40% des effectifs, en vendant la majorit\u00E9 de son parc immobilier de Waterloo afin de se procurer des liquidit\u00E9s. Sous l'impulsion de son nouveau CEO John Chen, BlackBerry se recentre plut\u00F4t sur le monde professionnel, d\u00E9l\u00E8gue la partie entr\u00E9e de gamme de sa production de smartphone \u00E0 Foxconn tout en gardant le milieu et haut de gamme en interne. \u00C0 la surprise g\u00E9n\u00E9rale, le groupe annonce par un communiqu\u00E9 le 25 septembre 2015 le lancement d'ici \u00E0 la fin de l'ann\u00E9e de son premier appareil sous Android baptis\u00E9 Priv. Le BlackBerry Priv est disponible en pr\u00E9commande chez Orange et Sosh depuis d\u00E9but 2016 et \u00E9galement SFR depuis f\u00E9vrier 2016. Le 5 juillet 2016, le groupe annonce la fin du Blackberry classic pour se concentrer sur ses smartphones sous Android n'excluant pas de se retirer du march\u00E9 des smartphones pour se concentrer sur les logiciels. En effet le groupe a accumul\u00E9 les pertes au printemps 2016 (670 millions de dollars), n'arrivant \u00E0 vendre qu'un demi million de smartphones. En septembre 2016, Blackberry annonce qu'il va cesser la fabrication de t\u00E9l\u00E9phones pour se concentrer sur des activit\u00E9s de services aux entreprises et aux gouvernements."@fr . + . + . + . + "Een BlackBerry is een smartphone en pda die gebruikt kan worden voor mobiele telefonie, mobiel internet (ontvangen en versturen van e-mail en internetsurfen), PIM (Personal Information Manager) en andere applicaties. De BlackBerry kwam in 1999 voor het eerst op de markt. Hij werd ontwikkeld door het gelijknamige, Canadese bedrijf BlackBerry (voorheen RIM). Hij ondersteunt push e-mail. Het draadloze netwerk dat wordt gebruikt komt van de reguliere mobieletelefonieproviders.In 2010 kondigde BlackBerry een tablet-pc aan met de naam BlackBerry PlayBook, die in juni 2011 op de markt kwam. Op 30 januari 2013 onthulde het bedrijf twee nieuwe smartphones die, vergeleken met vroegere modellen, niet meer over het op Java-gebaseerde platform beschikken, maar een door het bedrijf nieuw ontworpen besturingssysteem, BlackBerry 10, dat gebouwd is op het QNX-besturingssysteem. Vanwege de veelal verslavende werking van de BlackBerry in de zakenwereld en onder consumenten, heeft BlackBerry als bijnaam \"Crackberry\" gekregen."@nl . + "\u9ED1\u8393\u624B\u6A5F"@zh . + . + . + "745093944"^^ . + . + "BlackBerry \u00E9 uma linha de smartphones e tablets criada pela empresa canadense BlackBerry (antiga Research in Motion). Tais aparelhos s\u00E3o conhecidos por serem focados no mercado corporativo, possuindo fun\u00E7\u00F5es que permitem a produ\u00E7\u00E3o de conte\u00FAdo profissional em qualquer lugar. A Blackberry desenvolveu um smartphone com a plataforma Android, procurando aumentar sua participa\u00E7\u00E3o no mercado de smartphones do mundo. O lan\u00E7amento do telem\u00F3vel foi no dia 6 de Novembro de 2015, nos Estados Unidos, pelo valor de US$ 699,00."@pt . + . + "\u0628\u0644\u0627\u0643 \u0628\u064A\u0631\u064A"@ar . + "of an unidentified blackberry species\n220pxBlackberry flower, Rubus fruticosus species aggregate."^^ . + "Mora (fruta)"@es . + "Blackberry"@en . + . + "214"^^ . + "Blackberry"@en . + "4.88"^^ . + "19.8"^^ . + . + "0.02"^^ . + . + . + "72339"^^ . + "9.61"^^ . + . + . + "20"^^ . + "0.62"^^ . + "1"^^ . + . + "25"^^ . + "29"^^ . + "180"^^ . + "Rubus"^^ . + . + "1"^^ . + . + . + . + "0.03"^^ . + "1.17"^^ . + "*Rubus ursinus\n* Rubus laciniatus\u2014Evergreen blackberry\n*Rubus argutus\n* Rubus armeniacus\u2014Himalayan blackberry\n*Rubus plicatus\n*Rubus ulmifolius\nAnd hundreds more microspecies"^^ . + . + "0.49"^^ . + . + . + . + . + "Heat vision"^^ . + . + . + "Super-Mulher (AO 1990: Supermulher; Superwoman, no original) \u00E9 o nome de v\u00E1rias personagens fict\u00EDcias criadas pela DC Comics, com a inten\u00E7\u00E3o de criar uma vers\u00E3o feminina do Superman. No Brasil, a Mulher-Maravilha j\u00E1 foi chamada de Super-Mulher (assim como em Portugal), quando suas aventuras forma publicadas pela antiga Editora Orbis."@pt . + . + . + . + . + "Supermulher"@pt . + . + . + . + . + . + "Superwoman es una Supervillana de Superman y tal vez de la Mujer Maravilla del Universo DC."@es . + "Crime Syndicate of America, Crime Syndicate"^^ . + . + . + "Lois Lane"@en . + "Invulnerability"^^ . + "Justice League of America #29"^^ . + "Superwoman \u00E8 il nome dato a numerosi personaggi pubblicati nel corso degli anni dalla DC Comics, molti dei quali, come la popolare Supergirl, sono donne con poteri simili a quelli di Superman. Il nome \"Superwoman\" fu originariamente un copyright di Detective Comics nello sforzo di prevenire che la concorrenza ne potesse fare uso. Come era prevedibile, fu creata una copia non ufficiale di un fumetto dal titolo Superwoman. La copertina fu una riproduzione di More Fun Comics con la ristampa di contenuti interni del terzo numero. La prima vera apparizione di Superwoman, che solitamente fu pensato come una pista di inizio per l'introduzione successiva di Kara Zor-El nei panni di Supergirl, fu pubblicata nella serie Action Comics."@it . + . + "Super-Mulher (AO 1990: Supermulher; Superwoman, no original) \u00E9 o nome de v\u00E1rias personagens fict\u00EDcias criadas pela DC Comics, com a inten\u00E7\u00E3o de criar uma vers\u00E3o feminina do Superman. No Brasil, a Mulher-Maravilha j\u00E1 foi chamada de Super-Mulher (assim como em Portugal), quando suas aventuras forma publicadas pela antiga Editora Orbis."@pt . + "[[#Dana Dearden"@en . + "Superwoman is the name of several fictional characters from DC Comics. Most of them are, like Supergirl, women with powers similar to those of Superman. The name was trademarked by Detective Comics, to prevent competitors from using it. As was the practice, an ashcan copy was created with the title of Superwoman. The cover was a reproduction of More Fun Comics, with the interior being a reprint of the third issue. The first true appearance of Superwoman was in Action Comics."@en . + . + . + . + . + "740445504"^^ . + . + . + "Superwoman (personaggio)"@it . + . + "Superwoman \u00E8 il nome dato a numerosi personaggi pubblicati nel corso degli anni dalla DC Comics, molti dei quali, come la popolare Supergirl, sono donne con poteri simili a quelli di Superman. Il nome \"Superwoman\" fu originariamente un copyright di Detective Comics nello sforzo di prevenire che la concorrenza ne potesse fare uso. Come era prevedibile, fu creata una copia non ufficiale di un fumetto dal titolo Superwoman. La copertina fu una riproduzione di More Fun Comics con la ristampa di contenuti interni del terzo numero. La prima vera apparizione di Superwoman, che solitamente fu pensato come una pista di inizio per l'introduzione successiva di Kara Zor-El nei panni di Supergirl, fu pubblicata nella serie Action Comics."@it . + . + "Superwoman"@en . + . + . + . + . + "Action Comics #60"^^ . + . + "Superwoman"@en . + . + . + . + "fictional character"@en . + . + . + "1266342"^^ . + . + "Superwoman"@en . + . + "Superwoman is the name of several fictional characters from DC Comics. Most of them are, like Supergirl, women with powers similar to those of Superman. The name was trademarked by Detective Comics, to prevent competitors from using it. As was the practice, an ashcan copy was created with the title of Superwoman. The cover was a reproduction of More Fun Comics, with the interior being a reprint of the third issue. The first true appearance of Superwoman was in Action Comics."@en . + . + . + "Energy manipulation"^^ . + . + "Lois Lane"^^ . + . + . + . + "*Super strength, speed, Heat Vision agility, reflexes, stamina, and endurance\n* Flight\n* Superior hand-to-hand combatant\n* Healing factor\n* Magical Weaponry, Lasso of submission , a weaponized [[#Skills and resources"^^ . + "y"^^ . + "super"^^ . + . + . + . + . + . + . + "Superwoman"^^ . + . + . + "DC Comics"^^ . + "y"^^ . + . + . + "Superhuman speed"^^ . + . + . + "The Antimatter Superwoman. Art by Frank Quitely."^^ . + "yes."^^ . + . + "Superwoman"@es . + . + "Superhuman strength"^^ . + "Luma Lynai"@en . + . + "Kristin Wells"@en . + . + . + . + . + "Superwoman es una Supervillana de Superman y tal vez de la Mujer Maravilla del Universo DC."@es . + "Damnation Island"^^ . + "Lois Lane/Diana (Earth 3)"@en . + . \ No newline at end of file diff --git a/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala b/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala new file mode 100644 index 0000000..f94cbf6 --- /dev/null +++ b/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala @@ -0,0 +1,50 @@ +package net.sansa_stack.ml.spark.entity_resolution + +import com.holdenkarau.spark.testing.DataFrameSuiteBase +import org.apache.jena.riot.Lang +import org.apache.spark.rdd.RDD +import org.scalatest.FunSuite + +import net.sansa_stack.ml.spark.entity_resolution._ + +class ERTests extends FunSuite with DataFrameSuiteBase { + + import net.sansa_stack.rdf.spark.io._ + + val source1 = getClass.getResource("/entity_resolution/source1.nt").getPath + val source2 = getClass.getResource("/entity_resolution/source2.nt").getPath + val lang = Lang.NTRIPLES + val triplesSource1 = spark.rdf(lang)(source1) + val triplesSource2 = spark.rdf(lang)(source2) + val expected = Array("Ali_Baba : Ali_Baba", "Blackberry : Blackberry") + val teacherRDD = spark.sparkContext.parallelize(expected) + + test("performing entity resolution using HashingTF method should result in 2 matches") { + var outputRDD: RDD[(String, String, Double)] = null + + val erTest1 = new ERHashingTF(spark, triplesSource1, triplesSource2, 0.10, 0.50, 0.20, 6) + outputRDD = erTest1.run() + val predictedRDD = outputRDD.map(f => { + (f._1 + " : " + f._2) + }) + + val cnt = teacherRDD.intersection(predictedRDD).count() + assert(cnt==2) + } + + test("performing entity resolution using CountVetcorizerModel method should result in 2 matches") { + var outputRDD: RDD[(String, String, Double)] = null + + val erTest2 = new ERCountVectorizer(spark, triplesSource1, triplesSource2, 0.10, 0.50, 0.20, 8) + outputRDD = erTest2.run() + val predictedRDD = outputRDD.map(f => { + (f._1 + " : " + f._2) + }) + + val cnt = teacherRDD.intersection(predictedRDD).count() + assert(cnt==2) + } + +} + + From 4f9c7fea6ffd910404c2eb6a54c12bb53cc5ebbe Mon Sep 17 00:00:00 2001 From: Amrit Kaur Date: Thu, 13 Feb 2020 16:46:22 +0100 Subject: [PATCH 4/7] Final changes --- sansa-ml-spark/src/main/resources/application.conf | 2 +- .../ml/spark/entity_resolution/ERCountVectorizer.scala | 2 -- .../ml/spark/entity_resolution/ERHashingTF.scala | 2 -- .../sansa_stack/ml/spark/entity_resolution/ERTests.scala | 6 ++---- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/sansa-ml-spark/src/main/resources/application.conf b/sansa-ml-spark/src/main/resources/application.conf index 8061177..d4745d8 100644 --- a/sansa-ml-spark/src/main/resources/application.conf +++ b/sansa-ml-spark/src/main/resources/application.conf @@ -1,5 +1,5 @@ sansa.entity_resolution.partitions = 400 sansa.entity_resolution.repartition_number = 600 -sansa.entity_resolution.removePredicatesList =["owl:sameas", "wikiPageID", "wikiPageRevisionID", "wikiPageRevisionLink", +sansa.entity_resolution.removePredicatesList =["sameAs", "wikiPageID", "wikiPageRevisionID", "wikiPageRevisionLink", "wikiPageUsesTemplate", "wikiPageHistoryLink", "wikiPageExternalLink", "wikiPageEditLink", "wikiPageExtracted", "wikiPageLength", "wikiPageModified", "wikiPageOutDegree", "wikiPageRedirects"] \ No newline at end of file diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala index c2597f5..7ad004e 100644 --- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERCountVectorizer.scala @@ -32,8 +32,6 @@ class ERCountVectorizer(spark: SparkSession, sourceData1: RDD[Triple], sourceDat val featuredEntitiesDf2 = countVectorizer.transform(data2).filter(isNoneZeroVector(col(outCol))) return (featuredEntitiesDf1, featuredEntitiesDf2) } - - run } object ERCountVectorizer { diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala index 30e03cf..7f2fcf6 100644 --- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/ERHashingTF.scala @@ -34,8 +34,6 @@ class ERHashingTF(spark: SparkSession, sourceData1: RDD[Triple], sourceData2: RD val featuredEntitiesDf2 = hashingTf.transform(data2).filter(isNoneZeroVector(col(outCol))) return (featuredEntitiesDf1, featuredEntitiesDf2) } - - run } object ERHashingTF { diff --git a/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala b/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala index f94cbf6..e5d1fc3 100644 --- a/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala +++ b/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala @@ -22,7 +22,7 @@ class ERTests extends FunSuite with DataFrameSuiteBase { test("performing entity resolution using HashingTF method should result in 2 matches") { var outputRDD: RDD[(String, String, Double)] = null - val erTest1 = new ERHashingTF(spark, triplesSource1, triplesSource2, 0.10, 0.50, 0.20, 6) + val erTest1 = new ERHashingTF(spark, triplesSource1, triplesSource2, 0.15, 0.30, 0.50, 3) outputRDD = erTest1.run() val predictedRDD = outputRDD.map(f => { (f._1 + " : " + f._2) @@ -31,11 +31,10 @@ class ERTests extends FunSuite with DataFrameSuiteBase { val cnt = teacherRDD.intersection(predictedRDD).count() assert(cnt==2) } - test("performing entity resolution using CountVetcorizerModel method should result in 2 matches") { var outputRDD: RDD[(String, String, Double)] = null - val erTest2 = new ERCountVectorizer(spark, triplesSource1, triplesSource2, 0.10, 0.50, 0.20, 8) + val erTest2 = new ERCountVectorizer(spark, triplesSource1, triplesSource2, 0.10, 0.40, 0.50, 5) outputRDD = erTest2.run() val predictedRDD = outputRDD.map(f => { (f._1 + " : " + f._2) @@ -44,7 +43,6 @@ class ERTests extends FunSuite with DataFrameSuiteBase { val cnt = teacherRDD.intersection(predictedRDD).count() assert(cnt==2) } - } From 6e92b0d3a04ad054f87c282359d8c27ec727ddd5 Mon Sep 17 00:00:00 2001 From: Gezim Sejdiu Date: Sat, 21 Mar 2020 00:24:46 +0100 Subject: [PATCH 5/7] Make ERTests suite works (still issue with the data DatatypeFormatException) --- .../ml/spark/entity_resolution/ERTests.scala | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala b/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala index e5d1fc3..a5a835b 100644 --- a/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala +++ b/sansa-ml-spark/src/test/scala/net/sansa_stack/ml/spark/entity_resolution/ERTests.scala @@ -5,8 +5,6 @@ import org.apache.jena.riot.Lang import org.apache.spark.rdd.RDD import org.scalatest.FunSuite -import net.sansa_stack.ml.spark.entity_resolution._ - class ERTests extends FunSuite with DataFrameSuiteBase { import net.sansa_stack.rdf.spark.io._ @@ -14,19 +12,21 @@ class ERTests extends FunSuite with DataFrameSuiteBase { val source1 = getClass.getResource("/entity_resolution/source1.nt").getPath val source2 = getClass.getResource("/entity_resolution/source2.nt").getPath val lang = Lang.NTRIPLES - val triplesSource1 = spark.rdf(lang)(source1) - val triplesSource2 = spark.rdf(lang)(source2) val expected = Array("Ali_Baba : Ali_Baba", "Blackberry : Blackberry") - val teacherRDD = spark.sparkContext.parallelize(expected) test("performing entity resolution using HashingTF method should result in 2 matches") { var outputRDD: RDD[(String, String, Double)] = null + val triplesSource1 = spark.rdf(lang)(source1) + val triplesSource2 = spark.rdf(lang)(source2) + val erTest1 = new ERHashingTF(spark, triplesSource1, triplesSource2, 0.15, 0.30, 0.50, 3) outputRDD = erTest1.run() val predictedRDD = outputRDD.map(f => { (f._1 + " : " + f._2) }) + val teacherRDD = spark.sparkContext.parallelize(expected) + val cnt = teacherRDD.intersection(predictedRDD).count() assert(cnt==2) @@ -34,12 +34,17 @@ class ERTests extends FunSuite with DataFrameSuiteBase { test("performing entity resolution using CountVetcorizerModel method should result in 2 matches") { var outputRDD: RDD[(String, String, Double)] = null + val triplesSource1 = spark.rdf(lang)(source1) + val triplesSource2 = spark.rdf(lang)(source2) + val erTest2 = new ERCountVectorizer(spark, triplesSource1, triplesSource2, 0.10, 0.40, 0.50, 5) outputRDD = erTest2.run() val predictedRDD = outputRDD.map(f => { (f._1 + " : " + f._2) }) + val teacherRDD = spark.sparkContext.parallelize(expected) + val cnt = teacherRDD.intersection(predictedRDD).count() assert(cnt==2) } From 70ce1b7626a2df1d56e8b94bffeaac5f8f8e8a43 Mon Sep 17 00:00:00 2001 From: Amrit Kaur Date: Sat, 28 Mar 2020 22:23:51 +0100 Subject: [PATCH 6/7] Bugfix --- .../net/sansa_stack/ml/spark/entity_resolution/Commons.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala index 080d1e4..cb42739 100644 --- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala @@ -88,7 +88,7 @@ abstract class Commons(val spark: SparkSession, val sourceData1: RDD[Triple], va val value = pred + ":" + obj // predicate and object are seperated by ':' (key, value) } else { - val obj = f.getObject.getLiteralValue.toString() + val obj = f.getObject.getLiteral.toString().split(Array('^', '@')).head.trim() val value = pred + ":" + obj.replace(":", "") (key, value) } From ee7120c7303ec43df17e79b4d91fdc32f93e9f1a Mon Sep 17 00:00:00 2001 From: Amrit Kaur Date: Sat, 4 Apr 2020 20:43:24 +0200 Subject: [PATCH 7/7] Bugfix --- .../net/sansa_stack/ml/spark/entity_resolution/Commons.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala index cb42739..246eff3 100644 --- a/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala +++ b/sansa-ml-spark/src/main/scala/net/sansa_stack/ml/spark/entity_resolution/Commons.scala @@ -88,7 +88,7 @@ abstract class Commons(val spark: SparkSession, val sourceData1: RDD[Triple], va val value = pred + ":" + obj // predicate and object are seperated by ':' (key, value) } else { - val obj = f.getObject.getLiteral.toString().split(Array('^', '@')).head.trim() + val obj = f.getObject.getLiteral.getLexicalForm() val value = pred + ":" + obj.replace(":", "") (key, value) }