javascript - Javascript - 逐字比較兩個句子,並返回與某些條件匹配的單詞數

下面是一段代碼,可以逐字比較兩個句子,並返回單詞與某些條件的匹配次數:

第一個句子是字元串:


 let speechResult ="they're were protecting him i knew that i was aware";



如您所見,它有兩個i,但第二句話只有一個i:


let expectSt = ['i was sent to earth to protect you'];



應該把這個匹配看作一個匹配而不是兩個匹配,如果第二個句子中出現兩個i匹配,那麼將把i匹配看作兩個匹配。

6)是:是否匹配結果

這是代碼:


// Sentences we should compare word by word


let speechResult ="they're were protecting him i knew that i was aware";


let expectSt = ['i was sent to earth to protect you'];



// Create arrays of words from above sentences


let speechResultWords = speechResult.split(/s+/);


let expectStWords = expectSt[0].split(/s+/);



// Here you are.. 


//console.log(speechResultWords)


//console.log(expectStWords)



// Count Matches between two sentences


function includeWords(){


// Declare a variable to hold the count number of matches 


let countMatches = 0; 


for(let a = 0; a < speechResultWords.length; a++){



 for(let b = 0; b < expectStWords.length; b++){



 if(speechResultWords[a].includes(expectStWords[b])){


 console.log(speechResultWords[a] + ' includes in ' + expectStWords[b]);


 countMatches++


 }



 } // End of first for loop 



} // End of second for loop



return countMatches;


};



// Finally initiate the function to count the matches


let matches = includeWords();


console.log('Matched words: ' + matches);


 

时间: 原作者:

你可以對想要的單詞進行計數,並通過檢查單詞計數來迭代給定的單詞。


function includeWords(wanted, seen) {


 var wantedMap = wanted.split(/s+/).reduce((m, s) => m.set(s, (m.get(s) || 0) + 1), new Map),


 wantedArray = Array.from(wantedMap.keys()),


 count = 0;



 seen.split(/s+/)


 .forEach(s => {


 var key = wantedArray.find(t => s === t || s.length > 3 && t.length > 3 && (s.startsWith(t) || t.startsWith(s)));


 if (!wantedMap.get(key)) return;


 console.log(s, key)


 ++count;


 wantedMap.set(key, wantedMap.get(key) - 1);


 });



 return count;


}



let matches = includeWords('i was sent to earth to protect you', 'they're were protecting him i knew that i was aware');



console.log('Matched words: ' + matches);
.as-console-wrapper { max-height: 100% !important; top: 0; }

原作者:

我認為這應該有效:


let speechResult ="they're were protecting him i knew that i was aware";


let expectSt = ['i was sent to earth to protect you'];



function includeWords(){


 let countMatches = 0; 


 let ArrayFromStr = speechResult.split("");


 let Uniq = new Set(ArrayFromStr)


 let NewArray = [Uniq]


 let str2 = expectSt[0]



 for (word in NewArray){


 if (str2.includes(word)){


 countMatches += 1


 }


 }



 return countMatches;


};



let matches = includeWords();



原作者:

循環訪問字元串,並使用空字元串更新匹配單詞的索引,並將匹配項存儲在數組中。


let speechResult ="they're were protecting him i knew that i was aware";


let expectSt = ['i was sent to earth to protect you'];



// Create arrays of words from above sentences


let speechResultWords = speechResult.split(/s+/);


let expectStWords = expectSt[0].split(/s+/);



const matches = [];



speechResultWords.forEach(str => {


 for(let i=0; i<expectStWords.length; i++) {


 const innerStr = expectStWords[i];


 if(innerStr && (str.startsWith(innerStr) || innerStr.startsWith(str)) && (str.includes(innerStr) || innerStr.includes(str))) {


 if(str.length >= innerStr.length) {


 matches.push(innerStr);


 expectStWords[i] = '';


 } else {


 matches.push(str);


 }


 break;


 }


 }


});



console.log(matches.length);

原作者:

通過使用stemming,可以知道詞幹相同的詞。

比如,

  • 對於動詞:protect, protected, protecting,...
  • 還有複數:ball, balls

  • 詞幹:使用一些詞幹分析器(例如,PorterStemmer,它有一個js實現),
  • 計算"乾式空間"上的出現次數,這很簡單,

下例使用PorterStemmer


const examples = [


 ['protecting','i'],


 ['protecting','protect'],


 ['protect','protecting'],


 ['him','i'],


 ['i','i'],


 ['they're were protecting him i knew that i was aware','i was sent to earth to protect you'],


 ['i i', 'i i i i i']


]


function tokenize(s) {


 // this is not good, get yourself a good tokenizer


 return s.split(/s+/).filter(x=>x.replace(/[^a-zA-Z0-9']/g,''))


}



function countWords(a, b){


 const sa = tokenize(a).map(t => stemmer(t))


 const sb = tokenize(b).map(t => stemmer(t))


 const m = sa.reduce((m, w) => (m[w] = (m[w] || 0) + 1, m), {})


 return sb.reduce((count, w) => {


 if (m[w]) {


 m[w]--


 return count + 1


 }


 return count


 }, 0)


}


examples.forEach(([a,b], i) => console.log(`ex ${i+1}: ${countWords(a,b)}`))
<script src="https://cdn.jsdelivr.net/gh/kristopolous/Porter-Stemmer/PorterStemmer1980.js"></script>

原作者:
...