Javascript filter HTML content to exclude italics tags and content

I am currently extracting html content from ckeditor. Normally it looks like

Category 1
News 1 (link> News content
John: This is my comment
News 2 (link> News content
Mark: This is my comment

Category 2
News 1 (link> News content
John: This is my comment
News 2 (link> News content
Mark: This is my comment

But the ckeditor return the unnecessary HTML code and this is what it is like

<table>
<tbody>
<tr> <td>
  <table>
    <tr>
      <td>
        <div>
          <p style ="color:red">Category 1 </p>
           <p style ="color:black">
             <a href="abc.com"> <font>News 1 </font></a> <font>Content...</font>
              <br>
              <i>John </i><font><i>This is my comment </i></font> 
           </p>
           <p style ="color:black">
             <a href="abc.com"> <font>News 2 </font></a> <font>Content...</font>
              <br>
              <i>Mark </i><font><i>This is my comment </i></font> 
           </p>
         </div>
       </td>
      </tr>
     </table>
  </td>
 </tr>
</table>
<table>
<tbody>
<tr> <td>
  <table>
    <tr>
      <td>
        <div>
          <p style ="color:red">Category 2 </p>
           <p style ="color:black">
             <a href="abc.com"> <font>News 3 </font></a> <font>Content...</font>
              <br>
              <i>John </i><font><i>This is my comment </i></font> 
           </p>
           <p style ="color:black">
             <a href="abc.com"> <font>News 4 </font></a> <font>Content...</font>
              <br>
              <i>Mark </i><font><i>This is my comment </i></font> 
           </p>
         </div>
       </td>
      </tr>
     </table>
  </td>
 </tr>
</table>

I want to use an array to extract the information like this

var obj = { category 1: { News1: News1, News2: News 2 }, category 2: { News3: News 3, News4: News 4} };

I have used the javascript map function to extract the category name and news including the comment, any idea how can I use filter to exclude the italics part of HTML code?

This is what I have tried

  let doc = parser.parseFromString(this.text, 'text/html")
  let tableCon = doc.getElementByClassName ("tableclass")
  tableCon.map(element =>{
      let innerTable= Array.from(element.getElementbyTagName ('p'))
      categoryName = innerTable[0].innerText
        innerTable.shift();
        innerTable.map(ele =>{
          totalContent = ele.innerHTML
       }
     }

……

So I am able to get the category name and all content but couldn’t filter the italics content

Answer

Ok, your code didn’t work for me at all, so I just wrote from scratch.. also for each table(the top top 2 table elements that repeat the same structure going in) I just gave them a className called tableClass

As to how the code below works.. I made 2 functions, one that gets the stuff from each top top table, and the other that returns the data in your requested format(THE SOLUTION). Hoped this helped 😀

let tableCon = document.getElementsByClassName("tableClass")
function results(elem){
  let newsTitles=[]; let newsContents=[]
  for(let i=0;i<8;i++){elem=elem.children[0]}
  let category=elem.children[0].innerText
  for(let i=1;i<elem.children.length;i++){
    let element=elem.children[i]
    newsTitles.push(element.children[0].children[0].innerText)
    newsContents.push(element.children[1].innerText)
  }
  return [category,newsTitles,newsContents]
}

function newsObj(table){
  let obj={}
  for(let i=0;i<table.length;i++){
    let tempObj={}
    let [category,newsTitles,newsContents]=results(table[i])
    obj[category]=tempObj
    for(let j=0;j<newsTitles.length;j++){
      tempObj[newsTitles[j]]=newsContents[j]
      //change the above line to the commented line below to have stuff like "News 1" where stuff like "Content..." are
      //tempObj[newsTitles[j]]=newsTitles[j]
    }
  }
  return obj
}
console.log(newsObj(tableCon))
<table class="tableClass">
  <tbody>
    <tr> <td>
      <table>
        <tr>
          <td>
            <div>
              <p style ="color:red">Category 1 </p>
                <p style ="color:black">
                  <a href="abc.com"> <font>News 1 </font></a> <font>Content...</font>
                  <br>
                  <i>John </i><font><i>This is my comment </i></font> 
               </p>
               <p style ="color:black">
                 <a href="abc.com"> <font>News 2 </font></a> <font>Content...</font>
                  <br>
                  <i>Mark </i><font><i>This is my comment </i></font> 
               </p>
             </div>
           </td>
          </tr>
         </table>
      </td>
     </tr>
   </table>
<table class="tableClass">
  <tbody>
    <tr> <td>
      <table>
        <tr>
          <td>
            <div>
              <p style ="color:red">Category 2 </p>
               <p style ="color:black">
                 <a href="abc.com"> <font>News 3 </font></a> <font>Content...</font>
                  <br>
                  <i>John </i><font><i>This is my comment </i></font> 
               </p>
               <p style ="color:black">
                 <a href="abc.com"> <font>News 4 </font></a> <font>Content...</font>
                  <br>
                  <i>Mark </i><font><i>This is my comment </i></font> 
               </p>
             </div>
           </td>
          </tr>
         </table>
      </td>
     </tr>
</table>

Now as to the fiddle you showed me, you placed the tableClass in places where it shouldn’t be. Look at the example below(I didn’t place it in EVERY table, only the “top top” ones)

I didn’t need to add newsTitle, newsContent and category classNames all over the place, but IF you change your html layout again, and if the layout will change constantly(as in the HTML structure itself changing), yea, use classNames for them

let tableCon = document.getElementsByClassName("MsoNormalTable")
function results(elem){
  let newsTitles=[]; let newsContents=[]
  for(let i=0;i<8;i++){elem=elem.children[0]}
  let category=elem.children[0].innerText
  elem=elem.parentNode
  for(let i=1;i<elem.children.length;i++){
    let element=elem.children[i]
    let text=element.children[0].children[0].innerText
    newsTitles.push(text)
    newsContents.push(element.innerText.substr(text.length))
  }
  let filter=(a)=>a.toLowerCase()!=a.toUpperCase()
  //filter out empty text(as you have html structures LIKE a newsTitle or a newsContent but EMPTY)
  newsTitles=newsTitles.filter(filter)
  newsContents=newsContents.filter(filter)
  return [category,newsTitles,newsContents]
}

function newsObj(table){
  let obj={}
  for(let i=0;i<table.length;i++){
    let tempObj={}
    let [category,newsTitles,newsContents]=results(table[i])
    obj[category]=tempObj
    for(let j=0;j<newsTitles.length;j++){
      tempObj[newsTitles[j]]=newsContents[j]
      //change the above line to the commented line below to have stuff like "News 1" where stuff like "Content..." are
      //tempObj[newsTitles[j]]=newsTitles[j]
    }
  }
  return obj
}
console.log(newsObj(tableCon))
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" width="640" style="width: 480pt;">
    <tbody>
        <tr>
            <td style="border-top: none; border-right: none; border-left: none; border-image: initial; border-bottom: 1pt solid rgb(204, 204, 204); padding: 18.75pt 11.25pt;">
                <table class="MsoNormal" border="0" cellspacing="0" cellpadding="0">
                    <tbody>
                        <tr>
                            <td width="610" style="width: 457.5pt; padding: 0cm;">
                                <p class="MsoNormal" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: Calibri, sans-serif; line-height: 10.5pt; vertical-align: baseline;">
                                    <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">PC

                                        <o:p></o:p>
                                    </span>
                                </p>
                                <p class="MsoNormal" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: Calibri, sans-serif; line-height: 10.5pt; vertical-align: baseline;">
                                    <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                        <o:p>&nbsp;</o:p>
                                    </span>
                                </p>
                                <p class="MsoNormal" style="margin: 0cm 0cm 12pt; font-size: 12pt; font-family: Calibri, sans-serif; line-height: 10.5pt; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">
                                    <b>
                                        <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">Brand1 </span>
                                    </b>
                                    <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">is simply dummy</span>
                                    <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black;">
                                        <a href="https://example.com/link.html" title="https://example.com/link.html">
                                            <span style="color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">text of the printing and typesetting industry</span>
                                        </a>
                                        <span style="background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;"> text of the printing and typesetting industry</span>
                                        <br>
                                            <a href="mailto:[email protected]" title="mailto:[email protected]">
                                                <i>
                                                    <span style="color: rgb(255, 255, 10); background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">Abc:</span>
                                                </i>
                                            </a>&nbsp;

                                            <i>
                                                <span style="background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;"> Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book</span>
                                            </i>
                                        </span>
                                        <span style="font-size: 10pt; font-family: Arial, sans-serif;">
                                            <o:p></o:p>
                                        </span>
                                    </p>
                                    <p class="MsoNormal" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: Calibri, sans-serif; line-height: 10.5pt; vertical-align: baseline;">
                                        <b>
                                            <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black;">Brand2 </span>
                                        </b>
                                        <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                            <a href="https://example.com" title="https://example.com">
                                                <span style="color: black;"> Lorem Ipsum has been the industry's standard dummy text ever since the 1500s</span>
                                            </a>
                                        </span>
                                        <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black;">  when an unknown printer took a galley of type and scrambled it to make a type specimen book</span>
                                        <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                            <br>
                                                <br>
                                                </span>
                                                <b>
                                                    <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black;">Brand3 </span>
                                                </b>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black;">will </span>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                                    <a href="https://example.com">
                                                        <span style="color: black;"> Lorem Ipsum has been the </span>
                                                    </a>
                                                </span>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black;"> on 10 MAR</span>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                                    <o:p></o:p>
                                                </span>
                                            </p>
                                        </td>
                                    </tr>
                                </tbody>
                            </table>
                        </td>
                    </tr>
                </tbody>
            </table>
            <p class="MsoNormal" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: Calibri, sans-serif; color: rgb(0, 0, 0); text-align: start;"></p>
            <table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" width="640" style="width: 480pt;">
                <tbody>
                    <tr>
                        <td style="border-top: none; border-right: none; border-left: none; border-image: initial; border-bottom: 1pt solid rgb(204, 204, 204); padding: 18.75pt 11.25pt;">
                            <table class="MsoNormal" border="0" cellspacing="0" cellpadding="0">
                                <tbody>
                                    <tr>
                                        <td width="610" style="width: 457.5pt; padding: 0cm;">
                                            <p class="MsoNormal" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: Calibri, sans-serif; line-height: 10.5pt; vertical-align: baseline;">
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">CATEGORY2

                                                    <o:p></o:p>
                                                </span>
                                            </p>
                                            <p class="MsoNormal" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: Calibri, sans-serif; line-height: 10.5pt; vertical-align: baseline;">
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                                    <o:p>&nbsp;</o:p>
                                                </span>
                                            </p>
                                            <p class="MsoNormal" style="margin: 0cm 5pt 12pt 0cm; font-size: 12pt; font-family: Calibri, sans-serif; line-height: 10.5pt;">
                                                <b>
                                                    <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">Bran3 </span>
                                                </b>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">has reportedly agreed to </span>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                                    <a href="https://example.com">
                                                        <span style="color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;"> Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book</span>
                                                    </a>
                                                </span>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">;  Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book</span>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                                    <o:p></o:p>
                                                </span>
                                            </p>
                                            <p class="MsoNormal" style="margin: 0cm 5pt 0.0001pt 0cm; font-size: 12pt; font-family: Calibri, sans-serif; line-height: 10.5pt;">
                                                <b>
                                                    <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">BRAND3 </span>
                                                </b>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">is </span>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                                    <a href="https://example.com">
                                                        <span style="color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;"> Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book</span>
                                                    </a>
                                                </span>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black; background-image: initial; background-position: initial; background-size: initial; background-repeat: initial; background-attachment: initial; background-origin: initial; background-clip: initial;">  Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book</span>
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                                    <o:p></o:p>
                                                </span>
                                            </p>
                                            <p class="MsoNormal" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: Calibri, sans-serif; line-height: 10.5pt; vertical-align: baseline;">
                                                <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                                    <br>
                                                    </span>
                                                    <b>
                                                        <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black;">BRAND4</span>
                                                    </b>
                                                    <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                                        <a href="https://example.com">
                                                            <span style="color: black;">is looking at</span>
                                                        </a>
                                                    </span>
                                                    <span style="font-size: 10pt; font-family: Arial, sans-serif; color: black;">  Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book</span>
                                                    <span style="font-size: 10pt; font-family: Arial, sans-serif; color: rgb(255, 102, 0);">
                                                        <o:p></o:p>
                                                    </span>
                                                </p>
                                            </td>
                                        </tr>
                                    </tbody>
                                </table>
                            </td>
                        </tr>
                    </tbody>
                </table>
                <div></div>

Leave a Reply

Your email address will not be published. Required fields are marked *