问题描述
使用PDF可以理解,字符/单词可以放置在多个位置。我也明白
(因此文本的一部分可能不在rect范围内,iText不会剪切文本 片段)
我也找到了这个帮助。 Extracting text from a rectangle using iText ( .Net ) does give me the entire line
namespace iText
{
public static class Test
{
public static void Main(String[] args)
{
var reader = new PdfReader("mypdf.pdf");
PdfDocument pdfDoc = new PdfDocument(reader);
var addressRect = new Rectangle(0,0);
var addressRegionFilter = new TextRegionEventFilter(addressRect);
var filterListener = new RectangleTextExtractionStrategy(new LocationTextExtractionStrategy(),addressRect);
var addresstext = PdfTextExtractor.GetTextFromPage(pdfDoc.GetPage(1),filterListener);
pdfDoc.Close();
}
}
public class RectangleTextExtractionStrategy : ITextExtractionStrategy
{
private ITextExtractionStrategy innerStrategy = null;
private Rectangle rectangle;
public RectangleTextExtractionStrategy(ITextExtractionStrategy strategy,Rectangle rectangle)
{
this.innerStrategy = strategy;
this.rectangle = rectangle;
}
public void EventOccurred(IEventData iEventData,EventType eventType)
{
if (eventType != EventType.RENDER_TEXT)
return;
TextRenderInfo tri = (TextRenderInfo)iEventData;
foreach (TextRenderInfo subTri in tri.GetCharacterRenderInfos())
{
Rectangle r2 = new CharacterRenderInfo(subTri).GetBoundingBox();
if (Intersects(r2))
innerStrategy.EventOccurred(subTri,EventType.RENDER_TEXT);
}
}
public string GetResultantText()
{
return innerStrategy.GetResultantText();
}
public ICollection<EventType> GetSupportedEvents()
{
return innerStrategy.GetSupportedEvents();
}
private bool Intersects(Rectangle rectangle)
{
var addressRect = new Rectangle(62,20,6,7);
bool intersect = rectangle.Contains(addressRect);
if(intersect)
return true;
return false;
}
}
}
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)