From news@news.msfc.nasa.gov Mon Mar 31 15:33:56 EST 1997
Article: 31434 of news.software.nntp
Path: news.math.psu.edu!news.ems.psu.edu!news3.cac.psu.edu!howland.erols.net!europa.clark.net!news.msfc.nasa.gov!not-for-mail
From: news@news.msfc.nasa.gov (Unknown News Administrator)
Newsgroups: news.software.nntp
Subject: Re: Duplicate articles with INN (bug?)
Date: 31 Mar 1997 19:18:01 GMT
Organization: NASA/MSFC
Lines: 166
Sender: news@hammer.msfc.nasa.gov
Message-ID: <5hp2l9$jqa$1@hammer.msfc.nasa.gov>
References: <5heo2g$eol$1@sun579.rz.ruhr-uni-bochum.de> <5hh1f6$64k$1@sun579.rz.ruhr-uni-bochum.de> <5hhpre$60$1@news.gv.tsc.tdk.com>
NNTP-Posting-Host: hammer.msfc.nasa.gov
Xref: news.math.psu.edu news.software.nntp:31434

gdonl@tsc.tdk.com (Don Lewis) writes:

>INN 1.5, FreeBSD 2.1.6, w/o MMAP, with a bunch of feeds.  I didn't see
>any duplicate article files though.  It appears that the duplicate history
>entries are added when innd rejects articles with "437 Duplicate article"
>responses.

This does indeed seem to be the problem if REMEMBER_TRASH is set to DO.

Below is a patch that reflects all of the changes I have made to my working
copy of art.c since INN-1.5.1 came out.  It does three things:

  1. Some better checking for bad headers.  I didn't come up with this
  one and can't seem to remember where it came from.  INN-workers list,
  maybe?

  2. Check hop count *before* checking for path exclusion.  Mainly done
  as an experiment to see if it would produce a noticeable improvement
  in performance.  It might, if you have newsfeeds entries with H flags
  and looonnng lists of path components to exclude.  Really can't hurt.

  3. Don't write a history record for articles rejected because they
  are duplicates.

I would have separated out and cleaned up these patches, but it's been a
busy day.


*** innd/art.c.orig	Tue Dec 17 08:40:40 1996
--- innd/art.c	Mon Mar 31 12:02:51 1997
***************
*** 689,696 ****
  	    *errorp = "EOF in headers";
  	    return NULL;
  	case ':':
! 	    if (colon == NULL)
  		colon = out;
  	    break;
  	}
  	if ((*out++ = *in++) == '\n' && !ISWHITE(*in))
--- 689,701 ----
  	    *errorp = "EOF in headers";
  	    return NULL;
  	case ':':
! 	    if (colon == NULL) {
  		colon = out;
+ 		if (start == colon) {
+ 		    *errorp = "Field without name in header";
+ 		    return NULL;
+ 		}
+ 	    }
  	    break;
  	}
  	if ((*out++ = *in++) == '\n' && !ISWHITE(*in))
***************
*** 1590,1598 ****
--- 1595,1608 ----
  	    /* Too small for the site. */
  	    continue;
  
+ /*
  	if ((!sp->IgnorePath && ListHas(hops, sp->Name))
  	 || (sp->Hops && hopcount > sp->Hops)
  	 || (sp->Groupcount && Groupcount > sp->Groupcount))
+ */
+ 	if ((sp->Hops && hopcount > sp->Hops)
+ 	 || (!sp->IgnorePath && ListHas(hops, sp->Name))
+ 	 || (sp->Groupcount && Groupcount > sp->Groupcount))
  	    /* Site already saw the article; path too long; or too much
  	     * cross-posting. */
  	    continue;
***************
*** 1738,1743 ****
--- 1748,1754 ----
      char		ControlWord[SMBUF];
      int			ControlHeader;
      int			oerrno;
+     int			adupe;
  #if defined(DO_PERL)
      char		*perlrc;
  #endif /* DO_PERL */
***************
*** 1767,1774 ****
      Data.TimeReceivedLength = strlen(Data.TimeReceived);
  
      /* A duplicate? */
!     if (error == NULL && HIShavearticle(Data.MessageID))
  	error = "Duplicate article";
  
      /* And now check the path for unwanted sites -- Andy */
      for( j = 0 ; ME.Exclusions && ME.Exclusions[j] ; j++ ) {
--- 1778,1788 ----
      Data.TimeReceivedLength = strlen(Data.TimeReceived);
  
      /* A duplicate? */
!     adupe = 0;
!     if (error == NULL && HIShavearticle(Data.MessageID)) {
  	error = "Duplicate article";
+ 	adupe = 1;
+     }
  
      /* And now check the path for unwanted sites -- Andy */
      for( j = 0 ; ME.Exclusions && ME.Exclusions[j] ; j++ ) {
***************
*** 1784,1792 ****
  	(void)sprintf(buff, "%d %s", NNTP_REJECTIT_VAL, error);
  	ARTlog(&Data, ART_REJECT, buff);
  #if	defined(DO_REMEMBER_TRASH)
!         if (Mode == OMrunning && !HISwrite(&Data, ""))
!             syslog(L_ERROR, "%s cant write history %s %m",
!                    LogName, Data.MessageID);
  #endif	/* defined(DO_REMEMBER_TRASH) */
  	ARTreject(buff, article);
  	return buff;
--- 1798,1808 ----
  	(void)sprintf(buff, "%d %s", NNTP_REJECTIT_VAL, error);
  	ARTlog(&Data, ART_REJECT, buff);
  #if	defined(DO_REMEMBER_TRASH)
! 	if (adupe == 0) {
! 	    if (Data.MessageID && Mode == OMrunning && !HISwrite(&Data, ""))
! 		syslog(L_ERROR, "%s cant write history %s %m",
! 		       LogName, Data.MessageID);
! 	}
  #endif	/* defined(DO_REMEMBER_TRASH) */
  	ARTreject(buff, article);
  	return buff;
***************
*** 1799,1805 ****
          syslog(L_NOTICE, "rejecting[perl] %s %s", HDR(_message_id), buff);
          ARTlog(&Data, ART_REJECT, buff);
  #if	defined(DO_REMEMBER_TRASH)
!         if (Mode == OMrunning && !HISwrite(&Data, ""))
              syslog(L_ERROR, "%s cant write history %s %m",
                     LogName, Data.MessageID);
  #endif	/* defined(DO_REMEMBER_TRASH) */
--- 1815,1821 ----
          syslog(L_NOTICE, "rejecting[perl] %s %s", HDR(_message_id), buff);
          ARTlog(&Data, ART_REJECT, buff);
  #if	defined(DO_REMEMBER_TRASH)
!         if (Data.MessageID && Mode == OMrunning && !HISwrite(&Data, ""))
              syslog(L_ERROR, "%s cant write history %s %m",
                     LogName, Data.MessageID);
  #endif	/* defined(DO_REMEMBER_TRASH) */
***************
*** 1843,1849 ****
                         buff);
  		ARTlog(&Data, ART_REJECT, buff);
  #if	defined(DO_REMEMBER_TRASH)
!                 if (Mode == OMrunning && !HISwrite(&Data, ""))
                      syslog(L_ERROR, "%s cant write history %s %m",
                             LogName, Data.MessageID);
  #endif	/* defined(DO_REMEMBER_TRASH) */
--- 1859,1865 ----
                         buff);
  		ARTlog(&Data, ART_REJECT, buff);
  #if	defined(DO_REMEMBER_TRASH)
!                 if (Data.MessageID && Mode == OMrunning && !HISwrite(&Data, ""))
                      syslog(L_ERROR, "%s cant write history %s %m",
                             LogName, Data.MessageID);
  #endif	/* defined(DO_REMEMBER_TRASH) */
-- 
J. Porter Clark, d/b/a
+--+
|oo| The Unknown News Administrator
|  | news@news.msfc.nasa.gov
`^^'


