Compare commits

...

5 Commits

5 changed files with 204 additions and 65 deletions

21
langs/en.js Normal file
View File

@ -0,0 +1,21 @@
module.exports = {
concept: {
create: '^conceptualise an? ~ ([a-zA-Z0-9 ]*) ~ ([A-Z0-9]+)(?: that)?',
edit: '^conceptualise the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) (?:has|is|~)',
parseValue: 'has the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) as ~ ([a-zA-Z0-9 ]*) ~',
parseParent: '^is an? ([a-zA-Z0-9 ]*)',
parseRel: '~ ([a-zA-Z0-9 ]*) ~ the ([a-zA-Z0-9 ]*) ([A-Z0-9]+)',
parseSyn: '~ is expressed by ~ ([a-zA-Z0-9 ]*)'
},
instance: {
create: '^there is an? ([a-zA-Z0-9 ]*) named ([a-zA-Z0-9_]+|\'[a-zA-Z0-9_ ]+\')(?: that)?',
edit: 'the ([a-zA-Z0-9_ ]+) ([a-zA-Z0-9_]+|\'[a-zA-Z0-9_ ]+\')',
parseRel: '(?!has)([a-zA-Z0-9 ]*) the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_\' ]*)',
parseRawVal: '^has ([a-zA-Z0-9]*|\'[^\'\]*(?:\\.[^\'\]*)*\') as ([a-zA-Z0-9 ]*)',
parseInstanceVal: 'has the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_]*|\'[a-zA-Z0-9_ ]*\') as ([a-zzA-Z0-9 ]*)',
parseInstanceSubConcept: '(?:is| )?an? ([a-zA-Z0-9 ]*)',
parseInstanceSynonym: 'is expressed by (\'[a-zA-Z0-9 ]*\'|[a-zA-Z0-9]*)'
},
and: 'and',
value: 'value'
};

3
langs/index.js Normal file
View File

@ -0,0 +1,3 @@
var en = require('./en.js');
module.exports = {en};

View File

@ -16,6 +16,7 @@
*/
'use strict';
const LanguageManager = require('./LanguageManager.js');
const CEAgent = require('./CEAgent.js');
const CEParser = require('./CEParser.js');
const QuestionParser = require('./QuestionParser.js');
@ -118,6 +119,7 @@ class CENode {
const descendants = concept.descendants.concat(concept);
const childrenIds = [];
for (const descendant of descendants) { childrenIds.push(descendant.id); }
for (const ancestor of concept.ancestors) { childrenIds.push(ancestor.id); }
for (const instance of this.instances) {
if (instance && childrenIds.indexOf(instance.concept.id) > -1) {
instanceList.push(instance);
@ -245,6 +247,7 @@ class CENode {
* sentence sets to be processed.
*/
constructor(...models) {
this.languageManager = new LanguageManager(this);
this.ceParser = new CEParser(this);
this.questionParser = new QuestionParser(this);
this.nlParser = new NLParser(this);

View File

@ -18,6 +18,7 @@
const CEConcept = require('./CEConcept.js');
const CEInstance = require('./CEInstance.js');
const en = require('../langs/en.js');
const quotes = {
escape(string) {
@ -28,6 +29,15 @@ const quotes = {
},
};
const newConcept = new RegExp(en.concept.create, 'i');
const editConcept = new RegExp(en.concept.edit);
const newInstance = new RegExp(en.instance.create);
const editInstance = new RegExp(en.instance.edit);
const andRegex = new RegExp('\\b' + en.and + '\\b', 'gi');
const and = en.and;
const value = en.value;
class CEParser {
/*
@ -43,20 +53,21 @@ class CEParser {
*/
parse(input, source) {
const t = input.replace(/\s+/g, ' ').replace(/\.+$/, '').trim(); // Whitespace -> single space
if (t.match(/^conceptualise an?/i)) {
if (newConcept.test(t)){
return this.newConcept(t, source);
} else if (t.match(/^conceptualise the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) (?:has|is|~)/i)) {
} else if (editConcept.test(t)) {
return this.modifyConcept(t, source);
} else if (t.match(/^there is an? ([a-zA-Z0-9 ]*) named/i)) {
} else if (newInstance.test(t)) {
return this.newInstance(t, source);
} else if (t.match(/^the ([a-zA-Z0-9 ]*)/i)) {
} else if (editInstance.test(t)) {
return this.modifyInstance(t, source);
}
return [false, null];
}
newConcept(t, source) {
const match = t.match(/^conceptualise an? ~ ([a-zA-Z0-9 ]*) ~ ([A-Z0-9]+)/i);
const match = newConcept.exec(t);
const conceptName = match[1];
const storedConcept = this.node.getConceptByName(conceptName);
let concept = null;
@ -65,16 +76,18 @@ class CEParser {
}
concept = new CEConcept(this.node, conceptName, source);
const remainder = t.replace(/^conceptualise an? ~ ([a-zA-Z0-9 ]*) ~ ([A-Z0-9]+) that/, '');
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
for (const fact of facts) {
this.processConceptFact(concept, fact, source);
const remainder = t.replace(newConcept, '');
const facts = remainder.replace(andRegex, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
if (facts){
for (const fact of facts) {
this.processConceptFact(concept, fact, source);
}
}
return [true, t, concept];
}
modifyConcept(t, source) {
const conceptInfo = t.match(/^conceptualise the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) (?:has|is|~)/);
const conceptInfo = editConcept.exec(t);
if (!conceptInfo) {
return [false, 'Unable to parse sentence'];
}
@ -87,7 +100,7 @@ class CEParser {
const remainderRegex = new RegExp(`^conceptualise the ${conceptName} ${conceptVar}`, 'i');
const remainder = t.replace(remainderRegex, '');
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
const facts = remainder.replace(andRegex, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
for (const fact of facts) {
this.processConceptFact(concept, fact, source);
}
@ -95,27 +108,29 @@ class CEParser {
}
processConceptFact(concept, fact, source) {
const input = fact.trim().replace(/\+/g, 'and');
if (input.match(/has the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) as ~ ([a-zA-Z0-9 ]*) ~/g)) {
const re = /has the ([a-zA-Z0-9 ]*) ([A-Z0-9]+) as ~ ([a-zA-Z0-9 ]*) ~/g;
const match = re.exec(input);
const parseVal = new RegExp(en.concept.parseValue);
const parsePar = new RegExp(en.concept.parseParent);
const parseRel = new RegExp(en.concept.parseRel);
const parseSyn = new RegExp(en.concept.parseSyn);
const input = fact.trim().replace(/\+/g, and);
if (parseVal.test(input)){
const match = parseVal.exec(input);
const valConceptName = match[1];
const label = match[3];
const valConcept = valConceptName === 'value' ? 0 : this.node.getConceptByName(valConceptName);
const valConcept = valConceptName === value ? 0 : this.node.getConceptByName(valConceptName);
concept.addValue(label, valConcept, source);
}
if (input.match(/^is an? ([a-zA-Z0-9 ]*)/)) {
const re = /^is an? ([a-zA-Z0-9 ]*)/;
const match = re.exec(input);
if (parsePar.test(input)){
const match = parsePar.exec(input);
const parentConceptName = match[1];
const parentConcept = this.node.getConceptByName(parentConceptName);
if (parentConcept) {
concept.addParent(parentConcept);
}
}
if (input.match(/~ ([a-zA-Z0-9 ]*) ~ the ([a-zA-Z0-9 ]*) ([A-Z0-9]+)/)) {
const re = /~ ([a-zA-Z0-9 ]*) ~ the ([a-zA-Z0-9 ]*) ([A-Z0-9]+)/;
const match = re.exec(input);
if (parseRel.test(input)){
const match = parseRel.exec(input);
const label = match[1];
const relConceptName = match[2];
const relConcept = this.node.getConceptByName(relConceptName);
@ -123,22 +138,17 @@ class CEParser {
concept.addRelationship(label, relConcept, source);
}
}
if (input.match(/~ is expressed by ~ ([a-zA-Z0-9 ]*)/)) {
const re = /~ is expressed by ~ ([a-zA-Z0-9 ]*)/;
const match = re.exec(input);
if (parseSyn.test(input)){
const match = parseSyn.exec(input);
const synonym = match[1];
concept.addSynonym(synonym);
}
}
newInstance(t, source) {
let names = t.match(/^there is an? ([a-zA-Z0-9 ]*) named '([^'\\]*(?:\\.[^'\\]*)*)'/i);
if (!names) {
names = t.match(/^there is an? ([a-zA-Z0-9 ]*) named ([a-zA-Z0-9_]*)/i);
if (!names) { return [false, 'Unable to determine name of instance.']; }
}
const names = newInstance.exec(t)
const conceptName = names[1];
const instanceName = names[2].replace(/\\/g, '');
const instanceName = names[2].replace(/\\/g, '').replace(/'/g, '');
const concept = this.node.getConceptByName(conceptName);
const currentInstance = this.node.getInstanceByName(instanceName, concept);
if (!concept) {
@ -150,10 +160,12 @@ class CEParser {
const instance = new CEInstance(this.node, concept, instanceName, source);
instance.sentences.push(t);
const remainder = t.replace(/^there is an? (?:[a-zA-Z0-9 ]*) named (?:[a-zA-Z0-9_]*|'[a-zA-Z0-9_ ]*') that/, '');
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
for (const fact of facts) {
this.processInstanceFact(instance, fact, source);
const remainder = t.replace(newInstance, '');
const facts = remainder.replace(andRegex, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
if (facts){
for (const fact of facts) {
this.processInstanceFact(instance, fact, source);
}
}
return [true, t, instance];
}
@ -161,33 +173,42 @@ class CEParser {
modifyInstance(t, source) {
let concept;
let instance;
let instanceName;
if (t.match(/^the ([a-zA-Z0-9 ]*)/i)) {
const names = t.match(/^the ([a-zA-Z0-9 ]*)/i);
const names = editInstance.exec(t);
concept = this.node.getConceptByName(names[1]);
if (concept){
instance = this.node.getInstanceByName(names[2].replace(/\\/g, '').replace(/'/g, ''));
}
else {
const nameTokens = names[1].split(' ');
for (const conceptCheck of this.node.concepts) {
if (names[1].toLowerCase().indexOf(conceptCheck.name.toLowerCase()) === 0) {
concept = conceptCheck;
instanceName = nameTokens[concept.name.split(' ').length];
instance = this.node.getInstanceByName(instanceName, concept);
let currentName = '';
for (const index in nameTokens){
currentName += ' ' + nameTokens[index];
concept = this.node.getConceptByName(currentName.trim());
if (concept){
break;
}
}
}
if (!instance && t.match(/^the ([a-zA-Z0-9 ]*) '([^'\\]*(?:\\.[^'\\]*)*)'/i)) {
const names = t.match(/^the ([a-zA-Z0-9 ]*) '([^'\\]*(?:\\.[^'\\]*)*)'/i);
if (names) {
concept = this.node.getConceptByName(names[1]);
instanceName = names[2].replace(/\\/g, '');
instance = this.node.getInstanceByName(instanceName, concept);
if (concept){
const possibleInstances = this.node.getInstances(concept.name, true);
let lowestIndex = null;
for (const potential of possibleInstances){
const check = new RegExp('\\b(' + potential.name + (potential.synonyms.length ? '|' + potential.synonyms.join('|') : '') + ')\\b', 'i');
const match = check.exec(t);
if (match && (lowestIndex === null || match.index < lowestIndex)){
lowestIndex = match.index;
instance = potential;
}
}
}
}
if (!concept || !instance) {
return [false, `Unknown concept/instance combination in: ${t}`];
}
instance.sentences.push(t);
const tokens = t.split(' ');
tokens.splice(0, 1 + concept.name.split(' ').length + instanceName.split(' ').length);
tokens.splice(0, 1 + concept.name.split(' ').length + instance.name.split(' ').length);
const remainder = tokens.join(' ');
const facts = remainder.replace(/\band\b/g, '+').match(/(?:'(?:\\.|[^'])*'|[^+])+/g);
if (facts) {
@ -200,9 +221,14 @@ class CEParser {
processInstanceFact(instance, fact, source) {
const input = fact.trim().replace(/\+/g, 'and');
if (input.match(/^(?!has)([a-zA-Z0-9 ]*) the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_' ]*)/)) {
const re = /^(?!has)([a-zA-Z0-9 ]*) the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_' ]*)/;
const match = re.exec(input);
const parseRel = new RegExp(en.instance.parseRel);
const parseRawVal = new RegExp(en.instance.parseRawVal);
const parseInstanceVal = new RegExp(en.instance.parseInstanceVal);
const parseInstanceSubConcept = new RegExp(en.instance.parseInstanceSubConcept);
const parseInstanceSynonym = new RegExp(en.instance.parseInstanceSynonym);
if (parseRel.test(input)){
const match = parseRel.exec(input);
const label = match[1];
const relConceptName = match[2];
const relInstanceName = match[3].replace(/'/g, '');
@ -216,16 +242,14 @@ class CEParser {
instance.addRelationship(label, relInstance, true, source);
}
}
if (input.match(/^has ([a-zA-Z0-9]*|'[^'\\]*(?:\\.[^'\\]*)*') as ([a-zA-Z0-9 ]*)/)) {
const re = /^has ([a-zA-Z0-9]*|'[^'\\]*(?:\\.[^'\\]*)*') as ([a-zA-Z0-9 ]*)/;
const match = re.exec(input);
if (parseRawVal.test(input)){
const match = parseRawVal.exec(input);
const value = quotes.unescape(match[1]);
const label = match[2];
instance.addValue(label, value, true, source);
}
if (input.match(/^has the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9_]*|'[a-zA-Z0-9_ ]*') as ([a-zA-Z0-9 ]*)/)) {
const re = /^has the ([a-zA-Z0-9 ]*) ([a-zA-Z0-9]*|'[a-zA-Z0-9 ]*') as ([a-zA-Z0-9 ]*)/;
const match = re.exec(input);
if (parseInstanceVal.test(input)){
const match = parseInstanceVal.exec(input);
const valConceptName = match[1];
const valInstanceName = match[2].replace(/'/g, '');
const label = match[3];
@ -238,13 +262,12 @@ class CEParser {
instance.addValue(label, valInstance, true, source);
}
}
if (input.match(/(?:is| )?an? ([a-zA-Z0-9 ]*)/g)) {
const re = /(?:is| )?an? ([a-zA-Z0-9 ]*)/g;
const match = re.exec(input);
if (parseInstanceSubConcept.test(input)){
const match = parseInstanceSubConcept.exec(input);
instance.addSubConcept(this.node.getConceptByName(match && match[1] && match[1].trim()));
}
if (input.match(/is expressed by ('[a-zA-Z0-9 ]*'|[a-zA-Z0-9]*)/)) {
const match = input.match(/is expressed by ('[a-zA-Z0-9 ]*'|[a-zA-Z0-9]*)/);
if (parseInstanceSynonym.test(input)){
const match = parseInstanceSynonym.exec(input);
const synonym = match && match[1] && match[1].replace(/'/g, '').trim();
instance.addSynonym(synonym);
}

89
src/LanguageManager.js Normal file
View File

@ -0,0 +1,89 @@
/*
* Copyright 2017 W.M. Webberley & A.D. Preece (Cardiff University)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
'use strict';
const langs = require('../langs');
const placeholders = {
conceptName: '([a-zA-Z0-9 ]*)',
conceptVar: '([A-Z0-9]+)',
instanceName: '([a-zA-Z0-9_]+|\'[a-zA-Z0-9_ ]+\')',
relationshipLabel: '([a-zA-Z0-9 ]*)'
};
class LanguageManager {
static getEntry(o, s) {
s = s.replace(/\[(\w+)\]/g, '.$1');
s = s.replace(/^\./, '');
const a = s.split('.');
for (let i = 0, n = a.length; i < n; ++i) {
const k = a[i];
if (k in o) {
o = o[k];
} else {
return;
}
}
return o;
}
getExpression(key) {
let pattern = LanguageManager.getEntry(this.lang, key);
const extractions = {};
if (pattern){
console.log(pattern)
for (const placeholder in placeholders){
if (pattern.indexOf(placeholder) > -1){
const re = new RegExp('<' + placeholder + '>');
pattern = pattern.replace(re, placeholders[placeholder]);
}
}
}
console.log(pattern)
return pattern;
}
is(key, string){
const re = new RegExp(this.getExpression(key), 'i');
return re.test(string);
}
parse(key, string){
const re = new RegExp(this.getExpression(key), 'i');
return re.exec(string);
}
extract(key, string){
}
addLanguage(key, language){
langs[key] = language;
}
setLanguage(key) {
if (key in langs){
this.lang = langs[key];
}
}
constructor(node) {
this.node = node;
this.lang = langs['en'];
}
}
module.exports = LanguageManager;