Documentation
¶
Index ¶
- Variables
- func IsRetryableError(err error) bool
- func IsValidRobotsPolicy(policy string) bool
- func LoadAnalysisRegexConfig() error
- func LoadAnalystInsightsRegexConfig() error
- func LoadFinancialsRegexConfig() error
- func LoadNewsRegexConfig() error
- func LoadRegexConfig() error
- func NewClient(config *Config, httpxPool *httpx.Client) *client
- func ParseNews(html []byte, baseURL string, now time.Time) ([]NewsItem, *NewsStats, error)
- func ParseYahooDate(ts any) (time.Time, bool)
- func ParseYahooPeriod(periodStr string) (time.Time, time.Time, bool)
- func StringToFloat64(s string) (float64, bool)
- func StringToInt64(s string) (int64, bool)
- type AnalysisDTO
- type AnalysisRegexConfig
- type AnalystInsightsDTO
- type AnalystInsightsRegexConfig
- type BackoffPolicy
- func (bp *BackoffPolicy) CalculateDelay(attempt int) time.Duration
- func (bp *BackoffPolicy) CalculateDelayWithRetryAfter(attempt int, retryAfter time.Duration) time.Duration
- func (bp *BackoffPolicy) CalculateDelays(maxAttempts int) []time.Duration
- func (bp *BackoffPolicy) GetStats() map[string]interface{}
- func (bp *BackoffPolicy) Validate() error
- type BackoffPolicyConfig
- type Client
- type ColumnPatterns
- type ComprehensiveAnalysisDTO
- type ComprehensiveFinancialsDTO
- type ComprehensiveKeyStatisticsDTO
- type ComprehensiveProfileDTO
- type Config
- type Currency
- type EndpointConfig
- type Executive
- type FetchMeta
- type FinancialDataPoint
- type FinancialsDTO
- type FinancialsRegexConfig
- type HistoricalQuarter
- type InflightTracker
- type KeyStatisticsDTO
- type LogEntry
- type Logger
- func (l *Logger) GetStats() map[string]interface{}
- func (l *Logger) LogBackoff(url, host string, delay time.Duration)
- func (l *Logger) LogConfig(config *Config)
- func (l *Logger) LogDebug(message string, fields map[string]interface{})
- func (l *Logger) LogError(message string, err error, fields map[string]interface{})
- func (l *Logger) LogInfo(message string, fields map[string]interface{})
- func (l *Logger) LogRateLimit(url, host, errorMsg string)
- func (l *Logger) LogRequest(url, host string, status, attempt int, duration time.Duration, bytes int, ...)
- func (l *Logger) LogRetry(url, host string, attempt int, reason, errorMsg string)
- func (l *Logger) LogRobotsDenied(url, host, errorMsg string)
- func (l *Logger) LogRobotsFetch(host string, success bool, errorMsg string)
- func (l *Logger) SetOutput(output interface{ ... })
- type Metrics
- func (m *Metrics) GetStats() map[string]interface{}
- func (m *Metrics) RecordBackoff(host, reason string)
- func (m *Metrics) RecordBackoffSleep(host, reason string, duration time.Duration)
- func (m *Metrics) RecordInflight(host string, count int)
- func (m *Metrics) RecordLatency(host string, duration time.Duration)
- func (m *Metrics) RecordNews(outcome string)
- func (m *Metrics) RecordNewsParseLatency(duration time.Duration)
- func (m *Metrics) RecordPageBytes(host string, bytes int)
- func (m *Metrics) RecordRequest(host, outcome, code string)
- func (m *Metrics) RecordRetry(host, reason string)
- func (m *Metrics) RecordRobotsDenied(host string)
- type NewsItem
- type NewsRegexConfig
- type NewsStats
- type Officer
- type PeriodLine
- type ProfileDTO
- type QuarterlyEPS
- type RateLimitConfig
- type RateLimiter
- type Recommendation
- type RegexConfig
- type RetryConfig
- type RobotsCache
- type RobotsManager
- type RobotsPolicy
- type RobotsRule
- type Scaled
- type ScrapeError
- type Tracer
- func (t *Tracer) EndSpan(span interface{})
- func (t *Tracer) GetStats() map[string]interface{}
- func (t *Tracer) RecordSpanError(span interface{}, err error)
- func (t *Tracer) StartFetchSpan(ctx context.Context, url, host string) (context.Context, interface{})
- func (t *Tracer) UpdateSpan(span interface{}, status, bytes int, duration time.Duration)
- type YahooFinanceData
- type YahooInt
- type YahooNum
- type YahooString
Constants ¶
This section is empty.
Variables ¶
var ( ErrRobotsDenied = &ScrapeError{Type: "robots_denied", Message: "robots.txt disallows this path"} ErrTimeout = &ScrapeError{Type: "timeout", Message: "request timeout"} ErrTooManyRedirects = &ScrapeError{Type: "too_many_redirects", Message: "exceeded maximum redirect limit"} ErrRetryExhausted = &ScrapeError{Type: "retry_exhausted", Message: "maximum retry attempts exceeded"} ErrRateLimited = &ScrapeError{Type: "rate_limited", Message: "rate limit exceeded"} ErrCircuitOpen = &ScrapeError{Type: "circuit_open", Message: "circuit breaker is open"} ErrInvalidURL = &ScrapeError{Type: "invalid_url", Message: "invalid URL format"} ErrContentTooLarge = &ScrapeError{Type: "content_too_large", Message: "response content exceeds size limit"} // Parse-specific errors ErrNoQuoteSummary = &ScrapeError{Type: "no_quote_summary", Message: "could not locate quoteSummary script payload"} ErrJSONUnescape = &ScrapeError{Type: "json_unescape", Message: "failed to unescape JSON from envelope body"} ErrJSONDecode = &ScrapeError{Type: "json_decode", Message: "failed to decode JSON structure"} ErrMissingFieldBase = &ScrapeError{Type: "missing_field", Message: "required field is missing"} ErrSchemaDriftBase = &ScrapeError{Type: "schema_drift", Message: "unexpected schema change detected"} // News-specific errors ErrNewsNoArticles = &ScrapeError{Type: "news_no_articles", Message: "no news articles found"} ErrNewsParse = &ScrapeError{Type: "news_parse", Message: "failed to parse news HTML"} )
Predefined error types
Functions ¶
func IsRetryableError ¶
IsRetryableError determines if an error should trigger a retry
func IsValidRobotsPolicy ¶
IsValidRobotsPolicy checks if a robots policy is valid
func LoadAnalysisRegexConfig ¶
func LoadAnalysisRegexConfig() error
LoadAnalysisRegexConfig loads the regex patterns from YAML file
func LoadAnalystInsightsRegexConfig ¶
func LoadAnalystInsightsRegexConfig() error
LoadAnalystInsightsRegexConfig loads the regex patterns from YAML file
func LoadFinancialsRegexConfig ¶
func LoadFinancialsRegexConfig() error
LoadFinancialsRegexConfig loads the regex patterns from YAML file
func LoadNewsRegexConfig ¶
func LoadNewsRegexConfig() error
LoadNewsRegexConfig loads the news regex patterns from YAML file
func LoadRegexConfig ¶
func LoadRegexConfig() error
LoadRegexConfig loads the regex patterns from YAML file
func ParseNews ¶
ParseNews extracts news articles from HTML with robust error handling and deduplication
func ParseYahooDate ¶
ParseYahooDate parses various Yahoo date formats
func ParseYahooPeriod ¶
ParseYahooPeriod parses Yahoo's period format (e.g., "2023-12-31")
func StringToFloat64 ¶
StringToFloat64 safely converts a string to float64
func StringToInt64 ¶
StringToInt64 safely converts a string to int64
Types ¶
type AnalysisDTO ¶
type AnalysisDTO struct {
Symbol string `json:"symbol"`
Market string `json:"market"`
Currency Currency `json:"currency"`
RecTrends []Recommendation `json:"rec_trends"`
EPSQuarterly []QuarterlyEPS `json:"eps_quarterly"`
AsOf time.Time `json:"as_of"`
}
AnalysisDTO represents extracted analysis data
type AnalysisRegexConfig ¶
type AnalysisRegexConfig struct {
EarningsEstimate struct {
SectionPattern string `yaml:"section_pattern"`
CurrencyPattern string `yaml:"currency_pattern"`
TableRowPattern string `yaml:"table_row_pattern"`
} `yaml:"earnings_estimate"`
RevenueEstimate struct {
SectionPattern string `yaml:"section_pattern"`
CurrencyPattern string `yaml:"currency_pattern"`
TableRowPattern string `yaml:"table_row_pattern"`
} `yaml:"revenue_estimate"`
EarningsHistory struct {
SectionPattern string `yaml:"section_pattern"`
CurrencyPattern string `yaml:"currency_pattern"`
HeaderPattern string `yaml:"header_pattern"`
TableRowPattern string `yaml:"table_row_pattern"`
TableCellPattern string `yaml:"table_cell_pattern"`
} `yaml:"earnings_history"`
EPSTrend struct {
SectionPattern string `yaml:"section_pattern"`
CurrencyPattern string `yaml:"currency_pattern"`
TableRowPattern string `yaml:"table_row_pattern"`
} `yaml:"eps_trend"`
EPSRevisions struct {
SectionPattern string `yaml:"section_pattern"`
CurrencyPattern string `yaml:"currency_pattern"`
TableRowPattern string `yaml:"table_row_pattern"`
} `yaml:"eps_revisions"`
GrowthEstimate struct {
SectionPattern string `yaml:"section_pattern"`
TableRowPattern string `yaml:"table_row_pattern"`
} `yaml:"growth_estimate"`
}
AnalysisRegexConfig holds the regex patterns for analysis extraction
type AnalystInsightsDTO ¶
type AnalystInsightsDTO struct {
Symbol string `json:"symbol"`
Market string `json:"market"`
AsOf time.Time `json:"as_of"`
// Price Targets
CurrentPrice *float64 `json:"current_price,omitempty"`
TargetMeanPrice *float64 `json:"target_mean_price,omitempty"`
TargetMedianPrice *float64 `json:"target_median_price,omitempty"`
TargetHighPrice *float64 `json:"target_high_price,omitempty"`
TargetLowPrice *float64 `json:"target_low_price,omitempty"`
// Analyst Opinions
NumberOfAnalysts *int `json:"number_of_analysts,omitempty"`
RecommendationMean *float64 `json:"recommendation_mean,omitempty"`
RecommendationKey *string `json:"recommendation_key,omitempty"`
}
AnalystInsightsDTO represents analyst insights data from Yahoo Finance
func ParseAnalystInsights ¶
func ParseAnalystInsights(html []byte, symbol, market string) (*AnalystInsightsDTO, error)
ParseAnalystInsights parses analyst insights data from Yahoo Finance HTML
type AnalystInsightsRegexConfig ¶
type AnalystInsightsRegexConfig struct {
FinancialData struct {
CombinedPattern string `yaml:"combined_pattern"`
} `yaml:"financial_data"`
IndividualFields struct {
CurrentPrice string `yaml:"current_price"`
TargetMeanPrice string `yaml:"target_mean_price"`
TargetMedianPrice string `yaml:"target_median_price"`
TargetHighPrice string `yaml:"target_high_price"`
TargetLowPrice string `yaml:"target_low_price"`
RecommendationMean string `yaml:"recommendation_mean"`
RecommendationKey string `yaml:"recommendation_key"`
NumberOfAnalysts string `yaml:"number_of_analysts"`
} `yaml:"individual_fields"`
}
AnalystInsightsRegexConfig holds the regex patterns for analyst insights extraction
type BackoffPolicy ¶
type BackoffPolicy struct {
BaseDelay time.Duration
MaxDelay time.Duration
Multiplier float64
JitterFactor float64
}
BackoffPolicy implements exponential backoff with jitter
func DefaultBackoffPolicy ¶
func DefaultBackoffPolicy() *BackoffPolicy
DefaultBackoffPolicy returns a sensible default backoff policy
func NewBackoffPolicy ¶
func NewBackoffPolicy(baseDelay, maxDelay time.Duration, multiplier, jitterFactor float64) *BackoffPolicy
NewBackoffPolicy creates a new backoff policy with custom parameters
func (*BackoffPolicy) CalculateDelay ¶
func (bp *BackoffPolicy) CalculateDelay(attempt int) time.Duration
CalculateDelay calculates the backoff delay for a given attempt
func (*BackoffPolicy) CalculateDelayWithRetryAfter ¶
func (bp *BackoffPolicy) CalculateDelayWithRetryAfter(attempt int, retryAfter time.Duration) time.Duration
CalculateDelayWithRetryAfter calculates backoff delay considering Retry-After header
func (*BackoffPolicy) CalculateDelays ¶
func (bp *BackoffPolicy) CalculateDelays(maxAttempts int) []time.Duration
CalculateDelays calculates delays for multiple attempts (useful for testing)
func (*BackoffPolicy) GetStats ¶
func (bp *BackoffPolicy) GetStats() map[string]interface{}
GetStats returns statistics about the backoff policy
func (*BackoffPolicy) Validate ¶
func (bp *BackoffPolicy) Validate() error
Validate validates the backoff policy parameters
type BackoffPolicyConfig ¶
type BackoffPolicyConfig struct {
BaseDelay time.Duration
MaxDelay time.Duration
Multiplier float64
JitterFactor float64
}
BackoffPolicyConfig represents backoff configuration
func DefaultBackoffPolicyConfig ¶
func DefaultBackoffPolicyConfig() *BackoffPolicyConfig
DefaultBackoffPolicyConfig returns a sensible default backoff policy
type ColumnPatterns ¶
type ColumnPatterns struct {
MarketCap string `yaml:"market_cap"`
EnterpriseValue string `yaml:"enterprise_value"`
TrailingPE string `yaml:"trailing_pe"`
ForwardPE string `yaml:"forward_pe"`
PEGRatio string `yaml:"peg_ratio"`
PriceSales string `yaml:"price_sales"`
PriceBook string `yaml:"price_book"`
EnterpriseValueRevenue string `yaml:"enterprise_value_revenue"`
EnterpriseValueEBITDA string `yaml:"enterprise_value_ebitda"`
}
type ComprehensiveAnalysisDTO ¶
type ComprehensiveAnalysisDTO struct {
Symbol string `json:"symbol"`
Market string `json:"market"`
AsOf time.Time `json:"as_of"`
// Earnings Estimate
EarningsEstimate struct {
Currency string `json:"currency"`
CurrentQtr struct {
NoOfAnalysts *int `json:"no_of_analysts,omitempty"`
AvgEstimate *float64 `json:"avg_estimate,omitempty"`
LowEstimate *float64 `json:"low_estimate,omitempty"`
HighEstimate *float64 `json:"high_estimate,omitempty"`
YearAgoEPS *float64 `json:"year_ago_eps,omitempty"`
} `json:"current_qtr"`
NextQtr struct {
NoOfAnalysts *int `json:"no_of_analysts,omitempty"`
AvgEstimate *float64 `json:"avg_estimate,omitempty"`
LowEstimate *float64 `json:"low_estimate,omitempty"`
HighEstimate *float64 `json:"high_estimate,omitempty"`
YearAgoEPS *float64 `json:"year_ago_eps,omitempty"`
} `json:"next_qtr"`
CurrentYear struct {
NoOfAnalysts *int `json:"no_of_analysts,omitempty"`
AvgEstimate *float64 `json:"avg_estimate,omitempty"`
LowEstimate *float64 `json:"low_estimate,omitempty"`
HighEstimate *float64 `json:"high_estimate,omitempty"`
YearAgoEPS *float64 `json:"year_ago_eps,omitempty"`
} `json:"current_year"`
NextYear struct {
NoOfAnalysts *int `json:"no_of_analysts,omitempty"`
AvgEstimate *float64 `json:"avg_estimate,omitempty"`
LowEstimate *float64 `json:"low_estimate,omitempty"`
HighEstimate *float64 `json:"high_estimate,omitempty"`
YearAgoEPS *float64 `json:"year_ago_eps,omitempty"`
} `json:"next_year"`
} `json:"earnings_estimate"`
// Revenue Estimate
RevenueEstimate struct {
Currency string `json:"currency"`
CurrentQtr struct {
NoOfAnalysts *int `json:"no_of_analysts,omitempty"`
AvgEstimate *string `json:"avg_estimate,omitempty"` // Keep as string due to "B" suffix
LowEstimate *string `json:"low_estimate,omitempty"`
HighEstimate *string `json:"high_estimate,omitempty"`
YearAgoSales *string `json:"year_ago_sales,omitempty"`
SalesGrowthYearEst *string `json:"sales_growth_year_est,omitempty"`
} `json:"current_qtr"`
NextQtr struct {
NoOfAnalysts *int `json:"no_of_analysts,omitempty"`
AvgEstimate *string `json:"avg_estimate,omitempty"`
LowEstimate *string `json:"low_estimate,omitempty"`
HighEstimate *string `json:"high_estimate,omitempty"`
YearAgoSales *string `json:"year_ago_sales,omitempty"`
SalesGrowthYearEst *string `json:"sales_growth_year_est,omitempty"`
} `json:"next_qtr"`
CurrentYear struct {
NoOfAnalysts *int `json:"no_of_analysts,omitempty"`
AvgEstimate *string `json:"avg_estimate,omitempty"`
LowEstimate *string `json:"low_estimate,omitempty"`
HighEstimate *string `json:"high_estimate,omitempty"`
YearAgoSales *string `json:"year_ago_sales,omitempty"`
SalesGrowthYearEst *string `json:"sales_growth_year_est,omitempty"`
} `json:"current_year"`
NextYear struct {
NoOfAnalysts *int `json:"no_of_analysts,omitempty"`
AvgEstimate *string `json:"avg_estimate,omitempty"`
LowEstimate *string `json:"low_estimate,omitempty"`
HighEstimate *string `json:"high_estimate,omitempty"`
YearAgoSales *string `json:"year_ago_sales,omitempty"`
SalesGrowthYearEst *string `json:"sales_growth_year_est,omitempty"`
} `json:"next_year"`
} `json:"revenue_estimate"`
// Earnings History (dynamic dates)
EarningsHistory struct {
Currency string `json:"currency"`
Data []struct {
Date string `json:"date"`
EPSEst *float64 `json:"eps_est,omitempty"`
EPSActual *float64 `json:"eps_actual,omitempty"`
Difference *float64 `json:"difference,omitempty"`
SurprisePercent *string `json:"surprise_percent,omitempty"`
} `json:"data"`
} `json:"earnings_history"`
// EPS Trend
EPSTrend struct {
Currency string `json:"currency"`
CurrentQtr struct {
CurrentEstimate *float64 `json:"current_estimate,omitempty"`
Days7Ago *float64 `json:"days_7_ago,omitempty"`
Days30Ago *float64 `json:"days_30_ago,omitempty"`
Days60Ago *float64 `json:"days_60_ago,omitempty"`
Days90Ago *float64 `json:"days_90_ago,omitempty"`
} `json:"current_qtr"`
NextQtr struct {
CurrentEstimate *float64 `json:"current_estimate,omitempty"`
Days7Ago *float64 `json:"days_7_ago,omitempty"`
Days30Ago *float64 `json:"days_30_ago,omitempty"`
Days60Ago *float64 `json:"days_60_ago,omitempty"`
Days90Ago *float64 `json:"days_90_ago,omitempty"`
} `json:"next_qtr"`
CurrentYear struct {
CurrentEstimate *float64 `json:"current_estimate,omitempty"`
Days7Ago *float64 `json:"days_7_ago,omitempty"`
Days30Ago *float64 `json:"days_30_ago,omitempty"`
Days60Ago *float64 `json:"days_60_ago,omitempty"`
Days90Ago *float64 `json:"days_90_ago,omitempty"`
} `json:"current_year"`
NextYear struct {
CurrentEstimate *float64 `json:"current_estimate,omitempty"`
Days7Ago *float64 `json:"days_7_ago,omitempty"`
Days30Ago *float64 `json:"days_30_ago,omitempty"`
Days60Ago *float64 `json:"days_60_ago,omitempty"`
Days90Ago *float64 `json:"days_90_ago,omitempty"`
} `json:"next_year"`
} `json:"eps_trend"`
// EPS Revisions
EPSRevisions struct {
Currency string `json:"currency"`
CurrentQtr struct {
UpLast7Days *int `json:"up_last_7_days,omitempty"`
UpLast30Days *int `json:"up_last_30_days,omitempty"`
DownLast7Days *int `json:"down_last_7_days,omitempty"`
DownLast30Days *int `json:"down_last_30_days,omitempty"`
} `json:"current_qtr"`
NextQtr struct {
UpLast7Days *int `json:"up_last_7_days,omitempty"`
UpLast30Days *int `json:"up_last_30_days,omitempty"`
DownLast7Days *int `json:"down_last_7_days,omitempty"`
DownLast30Days *int `json:"down_last_30_days,omitempty"`
} `json:"next_qtr"`
CurrentYear struct {
UpLast7Days *int `json:"up_last_7_days,omitempty"`
UpLast30Days *int `json:"up_last_30_days,omitempty"`
DownLast7Days *int `json:"down_last_7_days,omitempty"`
DownLast30Days *int `json:"down_last_30_days,omitempty"`
} `json:"current_year"`
NextYear struct {
UpLast7Days *int `json:"up_last_7_days,omitempty"`
UpLast30Days *int `json:"up_last_30_days,omitempty"`
DownLast7Days *int `json:"down_last_7_days,omitempty"`
DownLast30Days *int `json:"down_last_30_days,omitempty"`
} `json:"next_year"`
} `json:"eps_revisions"`
// Growth Estimates (only ticker data, not S&P 500)
GrowthEstimate struct {
CurrentQtr *string `json:"current_qtr,omitempty"`
NextQtr *string `json:"next_qtr,omitempty"`
CurrentYear *string `json:"current_year,omitempty"`
NextYear *string `json:"next_year,omitempty"`
} `json:"growth_estimate"`
}
ComprehensiveAnalysisDTO represents comprehensive analysis data from Yahoo Finance
func ParseAnalysis ¶
func ParseAnalysis(html []byte, symbol, market string) (*ComprehensiveAnalysisDTO, error)
ParseAnalysis parses analysis data from Yahoo Finance HTML
type ComprehensiveFinancialsDTO ¶
type ComprehensiveFinancialsDTO struct {
Symbol string `json:"symbol"`
Market string `json:"market"`
Currency string `json:"currency"`
AsOf time.Time `json:"as_of"`
// Current values (most recent quarter)
Current struct {
TotalRevenue *Scaled `json:"total_revenue,omitempty"`
CostOfRevenue *Scaled `json:"cost_of_revenue,omitempty"`
GrossProfit *Scaled `json:"gross_profit,omitempty"`
OperatingExpense *Scaled `json:"operating_expense,omitempty"`
OperatingIncome *Scaled `json:"operating_income,omitempty"`
NetNonOperatingInterestIncomeExpense *Scaled `json:"net_non_operating_interest_income_expense,omitempty"`
OtherIncomeExpense *Scaled `json:"other_income_expense,omitempty"`
PretaxIncome *Scaled `json:"pretax_income,omitempty"`
TaxProvision *Scaled `json:"tax_provision,omitempty"`
NetIncomeCommonStockholders *Scaled `json:"net_income_common_stockholders,omitempty"`
BasicEPS *Scaled `json:"basic_eps,omitempty"`
DilutedEPS *Scaled `json:"diluted_eps,omitempty"`
BasicAverageShares *int64 `json:"basic_average_shares,omitempty"`
DilutedAverageShares *int64 `json:"diluted_average_shares,omitempty"`
TotalExpenses *Scaled `json:"total_expenses,omitempty"`
NormalizedIncome *Scaled `json:"normalized_income,omitempty"`
EBIT *Scaled `json:"ebit,omitempty"`
EBITDA *Scaled `json:"ebitda,omitempty"`
ReconciledCostOfRevenue *Scaled `json:"reconciled_cost_of_revenue,omitempty"`
ReconciledDepreciation *Scaled `json:"reconciled_depreciation,omitempty"`
NormalizedEBITDA *Scaled `json:"normalized_ebitda,omitempty"`
// Balance Sheet fields
TotalAssets *Scaled `json:"total_assets,omitempty"`
TotalCapitalization *Scaled `json:"total_capitalization,omitempty"`
CommonStockEquity *Scaled `json:"common_stock_equity,omitempty"`
CapitalLeaseObligations *Scaled `json:"capital_lease_obligations,omitempty"`
NetTangibleAssets *Scaled `json:"net_tangible_assets,omitempty"`
WorkingCapital *Scaled `json:"working_capital,omitempty"`
InvestedCapital *Scaled `json:"invested_capital,omitempty"`
TangibleBookValue *Scaled `json:"tangible_book_value,omitempty"`
TotalDebt *Scaled `json:"total_debt,omitempty"`
ShareIssued *int64 `json:"share_issued,omitempty"`
// Cash Flow fields
OperatingCashFlow *Scaled `json:"operating_cash_flow,omitempty"`
InvestingCashFlow *Scaled `json:"investing_cash_flow,omitempty"`
FinancingCashFlow *Scaled `json:"financing_cash_flow,omitempty"`
EndCashPosition *Scaled `json:"end_cash_position,omitempty"`
CapitalExpenditure *Scaled `json:"capital_expenditure,omitempty"`
IssuanceOfDebt *Scaled `json:"issuance_of_debt,omitempty"`
RepaymentOfDebt *Scaled `json:"repayment_of_debt,omitempty"`
RepurchaseOfCapitalStock *Scaled `json:"repurchase_of_capital_stock,omitempty"`
FreeCashFlow *Scaled `json:"free_cash_flow,omitempty"`
} `json:"current"`
// Historical values
Historical struct {
Q2_2025 struct {
Date string `json:"date"`
TotalRevenue *Scaled `json:"total_revenue,omitempty"`
CostOfRevenue *Scaled `json:"cost_of_revenue,omitempty"`
GrossProfit *Scaled `json:"gross_profit,omitempty"`
OperatingExpense *Scaled `json:"operating_expense,omitempty"`
OperatingIncome *Scaled `json:"operating_income,omitempty"`
NetNonOperatingInterestIncomeExpense *Scaled `json:"net_non_operating_interest_income_expense,omitempty"`
OtherIncomeExpense *Scaled `json:"other_income_expense,omitempty"`
PretaxIncome *Scaled `json:"pretax_income,omitempty"`
TaxProvision *Scaled `json:"tax_provision,omitempty"`
NetIncomeCommonStockholders *Scaled `json:"net_income_common_stockholders,omitempty"`
BasicEPS *Scaled `json:"basic_eps,omitempty"`
DilutedEPS *Scaled `json:"diluted_eps,omitempty"`
BasicAverageShares *int64 `json:"basic_average_shares,omitempty"`
DilutedAverageShares *int64 `json:"diluted_average_shares,omitempty"`
TotalExpenses *Scaled `json:"total_expenses,omitempty"`
NormalizedIncome *Scaled `json:"normalized_income,omitempty"`
EBIT *Scaled `json:"ebit,omitempty"`
EBITDA *Scaled `json:"ebitda,omitempty"`
ReconciledCostOfRevenue *Scaled `json:"reconciled_cost_of_revenue,omitempty"`
ReconciledDepreciation *Scaled `json:"reconciled_depreciation,omitempty"`
NormalizedEBITDA *Scaled `json:"normalized_ebitda,omitempty"`
} `json:"q2_2025"`
Q1_2025 struct {
Date string `json:"date"`
TotalRevenue *Scaled `json:"total_revenue,omitempty"`
CostOfRevenue *Scaled `json:"cost_of_revenue,omitempty"`
GrossProfit *Scaled `json:"gross_profit,omitempty"`
OperatingExpense *Scaled `json:"operating_expense,omitempty"`
OperatingIncome *Scaled `json:"operating_income,omitempty"`
NetNonOperatingInterestIncomeExpense *Scaled `json:"net_non_operating_interest_income_expense,omitempty"`
OtherIncomeExpense *Scaled `json:"other_income_expense,omitempty"`
PretaxIncome *Scaled `json:"pretax_income,omitempty"`
TaxProvision *Scaled `json:"tax_provision,omitempty"`
NetIncomeCommonStockholders *Scaled `json:"net_income_common_stockholders,omitempty"`
BasicEPS *Scaled `json:"basic_eps,omitempty"`
DilutedEPS *Scaled `json:"diluted_eps,omitempty"`
BasicAverageShares *int64 `json:"basic_average_shares,omitempty"`
DilutedAverageShares *int64 `json:"diluted_average_shares,omitempty"`
TotalExpenses *Scaled `json:"total_expenses,omitempty"`
NormalizedIncome *Scaled `json:"normalized_income,omitempty"`
EBIT *Scaled `json:"ebit,omitempty"`
EBITDA *Scaled `json:"ebitda,omitempty"`
ReconciledCostOfRevenue *Scaled `json:"reconciled_cost_of_revenue,omitempty"`
ReconciledDepreciation *Scaled `json:"reconciled_depreciation,omitempty"`
NormalizedEBITDA *Scaled `json:"normalized_ebitda,omitempty"`
} `json:"q1_2025"`
Q4_2024 struct {
Date string `json:"date"`
TotalRevenue *Scaled `json:"total_revenue,omitempty"`
CostOfRevenue *Scaled `json:"cost_of_revenue,omitempty"`
GrossProfit *Scaled `json:"gross_profit,omitempty"`
OperatingExpense *Scaled `json:"operating_expense,omitempty"`
OperatingIncome *Scaled `json:"operating_income,omitempty"`
NetNonOperatingInterestIncomeExpense *Scaled `json:"net_non_operating_interest_income_expense,omitempty"`
OtherIncomeExpense *Scaled `json:"other_income_expense,omitempty"`
PretaxIncome *Scaled `json:"pretax_income,omitempty"`
TaxProvision *Scaled `json:"tax_provision,omitempty"`
NetIncomeCommonStockholders *Scaled `json:"net_income_common_stockholders,omitempty"`
BasicEPS *Scaled `json:"basic_eps,omitempty"`
DilutedEPS *Scaled `json:"diluted_eps,omitempty"`
BasicAverageShares *int64 `json:"basic_average_shares,omitempty"`
DilutedAverageShares *int64 `json:"diluted_average_shares,omitempty"`
TotalExpenses *Scaled `json:"total_expenses,omitempty"`
NormalizedIncome *Scaled `json:"normalized_income,omitempty"`
EBIT *Scaled `json:"ebit,omitempty"`
EBITDA *Scaled `json:"ebitda,omitempty"`
ReconciledCostOfRevenue *Scaled `json:"reconciled_cost_of_revenue,omitempty"`
ReconciledDepreciation *Scaled `json:"reconciled_depreciation,omitempty"`
NormalizedEBITDA *Scaled `json:"normalized_ebitda,omitempty"`
} `json:"q4_2024"`
Q3_2024 struct {
Date string `json:"date"`
TotalRevenue *Scaled `json:"total_revenue,omitempty"`
CostOfRevenue *Scaled `json:"cost_of_revenue,omitempty"`
GrossProfit *Scaled `json:"gross_profit,omitempty"`
OperatingExpense *Scaled `json:"operating_expense,omitempty"`
OperatingIncome *Scaled `json:"operating_income,omitempty"`
NetNonOperatingInterestIncomeExpense *Scaled `json:"net_non_operating_interest_income_expense,omitempty"`
OtherIncomeExpense *Scaled `json:"other_income_expense,omitempty"`
PretaxIncome *Scaled `json:"pretax_income,omitempty"`
TaxProvision *Scaled `json:"tax_provision,omitempty"`
NetIncomeCommonStockholders *Scaled `json:"net_income_common_stockholders,omitempty"`
BasicEPS *Scaled `json:"basic_eps,omitempty"`
DilutedEPS *Scaled `json:"diluted_eps,omitempty"`
BasicAverageShares *int64 `json:"basic_average_shares,omitempty"`
DilutedAverageShares *int64 `json:"diluted_average_shares,omitempty"`
TotalExpenses *Scaled `json:"total_expenses,omitempty"`
NormalizedIncome *Scaled `json:"normalized_income,omitempty"`
EBIT *Scaled `json:"ebit,omitempty"`
EBITDA *Scaled `json:"ebitda,omitempty"`
ReconciledCostOfRevenue *Scaled `json:"reconciled_cost_of_revenue,omitempty"`
ReconciledDepreciation *Scaled `json:"reconciled_depreciation,omitempty"`
NormalizedEBITDA *Scaled `json:"normalized_ebitda,omitempty"`
} `json:"q3_2024"`
Q2_2024 struct {
Date string `json:"date"`
TotalRevenue *Scaled `json:"total_revenue,omitempty"`
CostOfRevenue *Scaled `json:"cost_of_revenue,omitempty"`
GrossProfit *Scaled `json:"gross_profit,omitempty"`
OperatingExpense *Scaled `json:"operating_expense,omitempty"`
OperatingIncome *Scaled `json:"operating_income,omitempty"`
NetNonOperatingInterestIncomeExpense *Scaled `json:"net_non_operating_interest_income_expense,omitempty"`
OtherIncomeExpense *Scaled `json:"other_income_expense,omitempty"`
PretaxIncome *Scaled `json:"pretax_income,omitempty"`
TaxProvision *Scaled `json:"tax_provision,omitempty"`
NetIncomeCommonStockholders *Scaled `json:"net_income_common_stockholders,omitempty"`
BasicEPS *Scaled `json:"basic_eps,omitempty"`
DilutedEPS *Scaled `json:"diluted_eps,omitempty"`
BasicAverageShares *int64 `json:"basic_average_shares,omitempty"`
DilutedAverageShares *int64 `json:"diluted_average_shares,omitempty"`
TotalExpenses *Scaled `json:"total_expenses,omitempty"`
NormalizedIncome *Scaled `json:"normalized_income,omitempty"`
EBIT *Scaled `json:"ebit,omitempty"`
EBITDA *Scaled `json:"ebitda,omitempty"`
ReconciledCostOfRevenue *Scaled `json:"reconciled_cost_of_revenue,omitempty"`
ReconciledDepreciation *Scaled `json:"reconciled_depreciation,omitempty"`
NormalizedEBITDA *Scaled `json:"normalized_ebitda,omitempty"`
} `json:"q2_2024"`
} `json:"historical"`
}
ComprehensiveFinancialsDTO holds all financials data including historical
func ParseComprehensiveFinancials ¶
func ParseComprehensiveFinancials(html []byte, symbol, market string) (*ComprehensiveFinancialsDTO, error)
ParseComprehensiveFinancials extracts comprehensive financials data from HTML using JSON parsing
func ParseComprehensiveFinancialsWithCurrency ¶
func ParseComprehensiveFinancialsWithCurrency(html, financialsHTML []byte, symbol, market string) (*ComprehensiveFinancialsDTO, error)
ParseComprehensiveFinancialsWithCurrency parses financial data from one HTML source and currency from financials HTML
type ComprehensiveKeyStatisticsDTO ¶
type ComprehensiveKeyStatisticsDTO struct {
Symbol string `json:"symbol"`
Market string `json:"market"`
Currency string `json:"currency"`
AsOf time.Time `json:"as_of"`
// Current values (most recent data)
Current struct {
MarketCap *Scaled `json:"market_cap,omitempty"`
EnterpriseValue *Scaled `json:"enterprise_value,omitempty"`
TrailingPE *Scaled `json:"trailing_pe,omitempty"`
ForwardPE *Scaled `json:"forward_pe,omitempty"`
PEGRatio *Scaled `json:"peg_ratio,omitempty"`
PriceSales *Scaled `json:"price_sales,omitempty"`
PriceBook *Scaled `json:"price_book,omitempty"`
EnterpriseValueRevenue *Scaled `json:"enterprise_value_revenue,omitempty"`
EnterpriseValueEBITDA *Scaled `json:"enterprise_value_ebitda,omitempty"`
} `json:"current"`
// Additional statistics (from other parts of the page)
Additional struct {
Beta *Scaled `json:"beta,omitempty"`
SharesOutstanding *int64 `json:"shares_outstanding,omitempty"`
ProfitMargin *Scaled `json:"profit_margin,omitempty"`
OperatingMargin *Scaled `json:"operating_margin,omitempty"`
ReturnOnAssets *Scaled `json:"return_on_assets,omitempty"`
ReturnOnEquity *Scaled `json:"return_on_equity,omitempty"`
} `json:"additional"`
// Historical values - dynamic quarters
Historical []HistoricalQuarter `json:"historical,omitempty"`
}
ComprehensiveKeyStatisticsDTO holds all key statistics data
func ParseComprehensiveKeyStatistics ¶
func ParseComprehensiveKeyStatistics(html []byte, symbol, market string) (*ComprehensiveKeyStatisticsDTO, error)
ParseComprehensiveKeyStatistics extracts comprehensive key statistics data from HTML
type ComprehensiveProfileDTO ¶
type ComprehensiveProfileDTO struct {
Symbol string `json:"symbol"`
Market string `json:"market"`
AsOf time.Time `json:"as_of"`
// Company Information
CompanyName string `json:"company_name,omitempty"`
ShortName string `json:"short_name,omitempty"`
Address1 string `json:"address1,omitempty"`
City string `json:"city,omitempty"`
State string `json:"state,omitempty"`
Zip string `json:"zip,omitempty"`
Country string `json:"country,omitempty"`
Phone string `json:"phone,omitempty"`
Website string `json:"website,omitempty"`
Industry string `json:"industry,omitempty"`
Sector string `json:"sector,omitempty"`
FullTimeEmployees *int64 `json:"full_time_employees,omitempty"`
BusinessSummary string `json:"business_summary,omitempty"`
// Key Executives
Executives []Executive `json:"executives,omitempty"`
// Additional Information
MaxAge *int64 `json:"max_age,omitempty"`
AuditRisk *int64 `json:"audit_risk,omitempty"`
BoardRisk *int64 `json:"board_risk,omitempty"`
CompensationRisk *int64 `json:"compensation_risk,omitempty"`
OverallRisk *int64 `json:"overall_risk,omitempty"`
GovernanceEpochDate *int64 `json:"governance_epoch_date,omitempty"`
CompensationAsOfEpochDate *int64 `json:"compensation_as_of_epoch_date,omitempty"`
}
ComprehensiveProfileDTO holds comprehensive profile data
func ParseComprehensiveProfile ¶
func ParseComprehensiveProfile(html []byte, symbol, market string) (*ComprehensiveProfileDTO, error)
ParseComprehensiveProfile extracts comprehensive profile data from HTML using JSON parsing
type Config ¶
type Config struct {
Enabled bool `yaml:"enabled"`
UserAgent string `yaml:"user_agent"`
TimeoutMs int `yaml:"timeout_ms"`
QPS float64 `yaml:"qps"`
Burst int `yaml:"burst"`
Retry RetryConfig `yaml:"retry"`
RobotsPolicy string `yaml:"robots_policy"`
CacheTTLMs int `yaml:"cache_ttl_ms"`
Endpoints EndpointConfig `yaml:"endpoints"`
}
Config represents the scraping configuration
func DefaultConfig ¶
func DefaultConfig() *Config
DefaultConfig returns a sensible default configuration
type Currency ¶
type Currency = string
Currency represents an ISO-4217 currency code
func CoerceCurrency ¶
CoerceCurrency extracts currency from various Yahoo formats
type EndpointConfig ¶
type EndpointConfig struct {
KeyStatistics bool `yaml:"key_statistics"`
Financials bool `yaml:"financials"`
Analysis bool `yaml:"analysis"`
Profile bool `yaml:"profile"`
News bool `yaml:"news"`
}
EndpointConfig represents endpoint-specific configuration
type Executive ¶
type Executive struct {
Name string `json:"name,omitempty"`
Title string `json:"title,omitempty"`
YearBorn *int `json:"year_born,omitempty"`
TotalPay *int64 `json:"total_pay,omitempty"`
ExercisedValue *int64 `json:"exercised_value,omitempty"`
UnexercisedValue *int64 `json:"unexercised_value,omitempty"`
}
Executive represents a company executive
type FetchMeta ¶
type FetchMeta struct {
URL string `json:"url"`
Host string `json:"host"`
Status int `json:"status"`
Attempt int `json:"attempt"`
Bytes int `json:"bytes"`
Gzip bool `json:"gzip"`
Redirects int `json:"redirects"`
Duration time.Duration `json:"duration"`
FromCache bool `json:"from_cache"` // reserved for optional HTML in-run cache
RobotsPolicy string `json:"robots_policy"`
}
FetchMeta contains metadata about a fetch operation
type FinancialDataPoint ¶
type FinancialDataPoint struct {
DataID int64 `json:"dataId"`
AsOfDate string `json:"asOfDate"`
PeriodType string `json:"periodType"`
CurrencyCode string `json:"currencyCode"`
ReportedValue struct {
Raw float64 `json:"raw"`
Fmt string `json:"fmt"`
} `json:"reportedValue"`
}
FinancialDataPoint represents a single financial data point from Yahoo Finance
type FinancialsDTO ¶
type FinancialsDTO struct {
Symbol string `json:"symbol"`
Market string `json:"market"`
Lines []PeriodLine `json:"lines"`
AsOf time.Time `json:"as_of"`
}
FinancialsDTO represents extracted financial statements data
type FinancialsRegexConfig ¶
type FinancialsRegexConfig struct {
Currency struct {
Pattern string `yaml:"pattern"`
} `yaml:"currency"`
IncomeStatement struct {
TotalRevenue string `yaml:"total_revenue"`
CostOfRevenue string `yaml:"cost_of_revenue"`
OperatingIncome string `yaml:"operating_income"`
NetIncome string `yaml:"net_income"`
BasicEPS string `yaml:"basic_eps"`
DilutedEPS string `yaml:"diluted_eps"`
EBITDA string `yaml:"ebitda"`
EBIT string `yaml:"ebit"`
TotalExpenses string `yaml:"total_expenses"`
NormalizedEBITDA string `yaml:"normalized_ebitda"`
} `yaml:"income_statement"`
BasicAverageShares string `yaml:"basic_average_shares"`
DilutedAverageShares string `yaml:"diluted_average_shares"`
} `yaml:"shares"`
BalanceSheet struct {
TotalAssets string `yaml:"total_assets"`
TotalCapitalization string `yaml:"total_capitalization"`
CommonStockEquity string `yaml:"common_stock_equity"`
CapitalLeaseObligations string `yaml:"capital_lease_obligations"`
NetTangibleAssets string `yaml:"net_tangible_assets"`
WorkingCapital string `yaml:"working_capital"`
InvestedCapital string `yaml:"invested_capital"`
TangibleBookValue string `yaml:"tangible_book_value"`
TotalDebt string `yaml:"total_debt"`
ShareIssued string `yaml:"share_issued"`
} `yaml:"balance_sheet"`
CashFlow struct {
OperatingCashFlow string `yaml:"operating_cash_flow"`
InvestingCashFlow string `yaml:"investing_cash_flow"`
FinancingCashFlow string `yaml:"financing_cash_flow"`
EndCashPosition string `yaml:"end_cash_position"`
CapitalExpenditure string `yaml:"capital_expenditure"`
IssuanceOfDebt string `yaml:"issuance_of_debt"`
RepaymentOfDebt string `yaml:"repayment_of_debt"`
RepurchaseOfCapitalStock string `yaml:"repurchase_of_capital_stock"`
FreeCashFlow string `yaml:"free_cash_flow"`
} `yaml:"cash_flow"`
}
FinancialsRegexConfig holds the regex patterns for financials extraction
type HistoricalQuarter ¶
type HistoricalQuarter struct {
Date string `json:"date"`
MarketCap *Scaled `json:"market_cap,omitempty"`
EnterpriseValue *Scaled `json:"enterprise_value,omitempty"`
TrailingPE *Scaled `json:"trailing_pe,omitempty"`
ForwardPE *Scaled `json:"forward_pe,omitempty"`
PEGRatio *Scaled `json:"peg_ratio,omitempty"`
PriceSales *Scaled `json:"price_sales,omitempty"`
PriceBook *Scaled `json:"price_book,omitempty"`
EnterpriseValueRevenue *Scaled `json:"enterprise_value_revenue,omitempty"`
EnterpriseValueEBITDA *Scaled `json:"enterprise_value_ebitda,omitempty"`
}
type InflightTracker ¶
type InflightTracker struct {
// contains filtered or unexported fields
}
InflightTracker tracks in-flight requests per host
func NewInflightTracker ¶
func NewInflightTracker() *InflightTracker
NewInflightTracker creates a new in-flight tracker
func (*InflightTracker) Decrement ¶
func (it *InflightTracker) Decrement(host string)
Decrement decrements the in-flight count for a host
func (*InflightTracker) GetAllCounts ¶
func (it *InflightTracker) GetAllCounts() map[string]int
GetAllCounts returns all in-flight counts
func (*InflightTracker) GetCount ¶
func (it *InflightTracker) GetCount(host string) int
GetCount returns the current in-flight count for a host
func (*InflightTracker) Increment ¶
func (it *InflightTracker) Increment(host string)
Increment increments the in-flight count for a host
type KeyStatisticsDTO ¶
type KeyStatisticsDTO struct {
Symbol string `json:"symbol"`
Market string `json:"market"`
Currency Currency `json:"currency"`
// Market metrics (from summaryDetail - real-time data)
MarketCap *Scaled `json:"market_cap,omitempty"`
ForwardPE *Scaled `json:"forward_pe,omitempty"`
TrailingPE *Scaled `json:"trailing_pe,omitempty"`
Beta *Scaled `json:"beta,omitempty"`
PriceToSales *Scaled `json:"price_to_sales,omitempty"`
// Share data
ShortInterest *int64 `json:"short_interest,omitempty"`
// Financial metrics (from financialData)
EnterpriseValue *Scaled `json:"enterprise_value,omitempty"`
TotalCash *Scaled `json:"total_cash,omitempty"`
TotalDebt *Scaled `json:"total_debt,omitempty"`
QuickRatio *Scaled `json:"quick_ratio,omitempty"`
CurrentRatio *Scaled `json:"current_ratio,omitempty"`
DebtToEquity *Scaled `json:"debt_to_equity,omitempty"`
ReturnOnAssets *Scaled `json:"return_on_assets,omitempty"`
ReturnOnEquity *Scaled `json:"return_on_equity,omitempty"`
GrossMargins *Scaled `json:"gross_margins,omitempty"`
OperatingMargins *Scaled `json:"operating_margins,omitempty"`
ProfitMargins *Scaled `json:"profit_margins,omitempty"`
RevenueGrowth *Scaled `json:"revenue_growth,omitempty"`
EarningsGrowth *Scaled `json:"earnings_growth,omitempty"`
// Price data
FiftyTwoWeekHigh *Scaled `json:"fifty_two_week_high,omitempty"`
FiftyTwoWeekLow *Scaled `json:"fifty_two_week_low,omitempty"`
AverageVolume *int64 `json:"average_volume,omitempty"`
AverageVolume10Day *int64 `json:"average_volume_10_day,omitempty"`
AsOf time.Time `json:"as_of"`
}
KeyStatisticsDTO represents extracted key statistics data
type LogEntry ¶
type LogEntry struct {
Timestamp string `json:"timestamp"`
Level string `json:"level"`
Source string `json:"source"`
Message string `json:"message"`
Fields map[string]interface{} `json:"fields,omitempty"`
}
LogEntry represents a structured log entry
type Logger ¶
type Logger struct {
// contains filtered or unexported fields
}
Logger handles structured logging for scraping operations
func (*Logger) LogBackoff ¶
LogBackoff logs a backoff event
func (*Logger) LogRateLimit ¶
LogRateLimit logs a rate limit event
func (*Logger) LogRequest ¶
func (l *Logger) LogRequest(url, host string, status, attempt int, duration time.Duration, bytes int, gzip bool, redirects int, errorMsg string)
LogRequest logs a scraping request
func (*Logger) LogRobotsDenied ¶
LogRobotsDenied logs a robots.txt denial
func (*Logger) LogRobotsFetch ¶
LogRobotsFetch logs a robots.txt fetch event
type Metrics ¶
type Metrics struct {
// contains filtered or unexported fields
}
Metrics handles Prometheus metrics for scraping operations
func (*Metrics) RecordBackoff ¶
RecordBackoff records a backoff event
func (*Metrics) RecordBackoffSleep ¶
RecordBackoffSleep records backoff sleep duration
func (*Metrics) RecordInflight ¶
RecordInflight records in-flight requests
func (*Metrics) RecordLatency ¶
RecordLatency records request latency
func (*Metrics) RecordNews ¶
RecordNews records a news parsing operation
func (*Metrics) RecordNewsParseLatency ¶
RecordNewsParseLatency records news parsing latency
func (*Metrics) RecordPageBytes ¶
RecordPageBytes records page size
func (*Metrics) RecordRequest ¶
RecordRequest records a scraping request
func (*Metrics) RecordRetry ¶
RecordRetry records a retry event
func (*Metrics) RecordRobotsDenied ¶
RecordRobotsDenied records a robots.txt denial
type NewsItem ¶
type NewsItem struct {
Title string `json:"title"`
URL string `json:"url"` // absolute; normalized
Source string `json:"source"`
PublishedAt *time.Time `json:"published_at"` // UTC if resolvable
ImageURL string `json:"image_url"`
RelatedTickers []string `json:"related_tickers"`
}
NewsItem represents a single news article extracted from Yahoo Finance
type NewsRegexConfig ¶
type NewsRegexConfig struct {
ArticleContainer string `yaml:"article_container"`
Title string `yaml:"title"`
ArticleLink string `yaml:"article_link"`
PublishingInfo string `yaml:"publishing_info"`
ImageURL string `yaml:"image_url"`
RelatedTickers string `yaml:"related_tickers"`
NextPageHint string `yaml:"next_page_hint"`
RelativeTime struct {
Minutes string `yaml:"minutes"`
Hours string `yaml:"hours"`
Days string `yaml:"days"`
Weeks string `yaml:"weeks"`
Yesterday string `yaml:"yesterday"`
} `yaml:"relative_time"`
URLCleanup struct {
UTMParams string `yaml:"utm_params"`
TrackingParams string `yaml:"tracking_params"`
Fragment string `yaml:"fragment"`
QuerySeparator string `yaml:"query_separator"`
} `yaml:"url_cleanup"`
}
NewsRegexConfig holds the regex patterns for news extraction
type NewsStats ¶
type NewsStats struct {
TotalFound int `json:"total_found"`
TotalReturned int `json:"total_returned"`
Deduped int `json:"deduped"`
NextPageHint string `json:"next_page_hint"` // e.g., a data-cursor or bool flag if detected
AsOf time.Time `json:"as_of"`
}
NewsStats represents statistics about news extraction
type Officer ¶
type Officer struct {
Name string `json:"name"`
Title string `json:"title"`
Age *int `json:"age,omitempty"`
Pay *Scaled `json:"pay,omitempty"`
}
Officer represents a company officer/executive
type PeriodLine ¶
type PeriodLine struct {
PeriodStart time.Time `json:"period_start"`
PeriodEnd time.Time `json:"period_end"`
Key string `json:"key"`
Value Scaled `json:"value"`
Currency Currency `json:"currency"`
}
PeriodLine represents a financial statement line item for a specific period
type ProfileDTO ¶
type ProfileDTO struct {
Symbol string `json:"symbol"`
Market string `json:"market"`
Company string `json:"company"`
Address1 string `json:"address1"`
City string `json:"city"`
State string `json:"state"`
Country string `json:"country"`
Phone string `json:"phone"`
Website string `json:"website"`
Industry string `json:"industry"`
Sector string `json:"sector"`
Employees *int `json:"employees,omitempty"`
Officers []Officer `json:"officers"`
AsOf time.Time `json:"as_of"`
}
ProfileDTO represents extracted company profile data
type QuarterlyEPS ¶
type QuarterlyEPS struct {
Date string `json:"date"`
Actual *Scaled `json:"actual,omitempty"`
Estimate *Scaled `json:"estimate,omitempty"`
}
QuarterlyEPS represents quarterly EPS estimates and actuals
type RateLimitConfig ¶
RateLimitConfig represents rate limiting configuration
func DefaultRateLimitConfig ¶
func DefaultRateLimitConfig() *RateLimitConfig
DefaultRateLimitConfig returns a sensible default rate limit configuration
type RateLimiter ¶
type RateLimiter struct {
// contains filtered or unexported fields
}
RateLimiter implements per-host rate limiting
func NewRateLimiter ¶
func NewRateLimiter(qps float64, burst int) *RateLimiter
NewRateLimiter creates a new rate limiter
type Recommendation ¶
type Recommendation struct {
Period string `json:"period"`
StrongBuy int `json:"strong_buy"`
Buy int `json:"buy"`
Hold int `json:"hold"`
Sell int `json:"sell"`
StrongSell int `json:"strong_sell"`
}
Recommendation represents analyst recommendation data for a period
type RegexConfig ¶
type RegexConfig struct {
Current struct {
MarketCap string `yaml:"market_cap"`
EnterpriseValue string `yaml:"enterprise_value"`
TrailingPE string `yaml:"trailing_pe"`
ForwardPE string `yaml:"forward_pe"`
PEGRatio string `yaml:"peg_ratio"`
PriceSales string `yaml:"price_sales"`
PriceBook string `yaml:"price_book"`
EnterpriseValueRevenue string `yaml:"enterprise_value_revenue"`
EnterpriseValueEBITDA string `yaml:"enterprise_value_ebitda"`
} `yaml:"current"`
Additional struct {
Beta string `yaml:"beta"`
SharesOutstanding string `yaml:"shares_outstanding"`
ProfitMargin string `yaml:"profit_margin"`
OperatingMargin string `yaml:"operating_margin"`
ReturnOnAssets string `yaml:"return_on_assets"`
ReturnOnEquity string `yaml:"return_on_equity"`
} `yaml:"additional"`
HistoricalColumns struct {
Column2 ColumnPatterns `yaml:"column_2"`
Column3 ColumnPatterns `yaml:"column_3"`
Column4 ColumnPatterns `yaml:"column_4"`
Column5 ColumnPatterns `yaml:"column_5"`
Column6 ColumnPatterns `yaml:"column_6"`
} `yaml:"historical_columns"`
DateHeaders string `yaml:"date_headers"`
}
RegexConfig holds the regex patterns for statistics extraction
type RetryConfig ¶
type RetryConfig struct {
Attempts int `yaml:"attempts"`
BaseMs int `yaml:"base_ms"`
MaxDelayMs int `yaml:"max_delay_ms"`
}
RetryConfig represents retry configuration
type RobotsCache ¶
RobotsCache represents cached robots.txt data
func (*RobotsCache) IsExpired ¶
func (rc *RobotsCache) IsExpired() bool
IsExpired checks if the robots cache is expired
type RobotsManager ¶
type RobotsManager struct {
// contains filtered or unexported fields
}
RobotsManager handles robots.txt fetching, caching, and policy enforcement
func NewRobotsManager ¶
func NewRobotsManager(policy string, ttl time.Duration) *RobotsManager
NewRobotsManager creates a new robots manager
func (*RobotsManager) CheckRobots ¶
func (rm *RobotsManager) CheckRobots(ctx context.Context, host, path string) error
CheckRobots checks if a path is allowed by robots.txt
func (*RobotsManager) ClearCache ¶
func (rm *RobotsManager) ClearCache()
ClearCache clears the robots.txt cache
func (*RobotsManager) GetCacheStats ¶
func (rm *RobotsManager) GetCacheStats() map[string]interface{}
GetCacheStats returns cache statistics
type RobotsPolicy ¶
type RobotsPolicy string
RobotsPolicy represents the robots.txt policy
const ( RobotsEnforce RobotsPolicy = "enforce" RobotsWarn RobotsPolicy = "warn" RobotsIgnore RobotsPolicy = "ignore" )
type RobotsRule ¶
RobotsRule represents a robots.txt rule
type Scaled ¶
type Scaled struct {
Scaled int64 `json:"scaled"`
Scale int `json:"scale"` // e.g., 2 for cents, 6 for micro-units
}
Scaled represents a scaled decimal number with precision preservation
func IntToScaled ¶
IntToScaled converts a YahooInt to a Scaled value with the given scale
func NumToScaled ¶
NumToScaled converts a YahooNum to a Scaled value with the given scale
type ScrapeError ¶
ScrapeError represents a scraping-specific error
func ErrHTTP ¶
func ErrHTTP(status int, url string) *ScrapeError
ErrHTTP creates an HTTP status error
func ErrMissingField ¶
func ErrMissingField(field string) *ScrapeError
ErrMissingField creates a missing field error
func ErrSchemaDrift ¶
func ErrSchemaDrift(field string) *ScrapeError
ErrSchemaDrift creates a schema drift error
func (*ScrapeError) Error ¶
func (e *ScrapeError) Error() string
type Tracer ¶
type Tracer struct {
// contains filtered or unexported fields
}
Tracer handles OpenTelemetry tracing for scraping operations
func (*Tracer) RecordSpanError ¶
RecordSpanError records an error in the span
type YahooFinanceData ¶
type YahooFinanceData struct {
QuoteSummary struct {
Result []struct {
FinancialData struct {
TrailingTotalRevenue []FinancialDataPoint `json:"trailingTotalRevenue"`
AnnualTotalRevenue []FinancialDataPoint `json:"annualTotalRevenue"`
TrailingOperatingIncome []FinancialDataPoint `json:"trailingOperatingIncome"`
AnnualOperatingIncome []FinancialDataPoint `json:"annualOperatingIncome"`
TrailingNetIncome []FinancialDataPoint `json:"trailingNetIncome"`
AnnualNetIncome []FinancialDataPoint `json:"annualNetIncome"`
TrailingBasicEPS []FinancialDataPoint `json:"trailingBasicEPS"`
AnnualBasicEPS []FinancialDataPoint `json:"annualBasicEPS"`
TrailingDilutedEPS []FinancialDataPoint `json:"trailingDilutedEPS"`
AnnualDilutedEPS []FinancialDataPoint `json:"annualDilutedEPS"`
TrailingEBITDA []FinancialDataPoint `json:"trailingEBITDA"`
AnnualEBITDA []FinancialDataPoint `json:"annualEBITDA"`
TrailingGrossProfit []FinancialDataPoint `json:"trailingGrossProfit"`
AnnualGrossProfit []FinancialDataPoint `json:"annualGrossProfit"`
TrailingCostOfRevenue []FinancialDataPoint `json:"trailingCostOfRevenue"`
AnnualCostOfRevenue []FinancialDataPoint `json:"annualCostOfRevenue"`
TrailingOperatingExpense []FinancialDataPoint `json:"trailingOperatingExpense"`
AnnualOperatingExpense []FinancialDataPoint `json:"annualOperatingExpense"`
TrailingTotalExpenses []FinancialDataPoint `json:"trailingTotalExpenses"`
AnnualTotalExpenses []FinancialDataPoint `json:"annualTotalExpenses"`
TrailingTaxProvision []FinancialDataPoint `json:"trailingTaxProvision"`
AnnualTaxProvision []FinancialDataPoint `json:"annualTaxProvision"`
TrailingPretaxIncome []FinancialDataPoint `json:"trailingPretaxIncome"`
AnnualPretaxIncome []FinancialDataPoint `json:"annualPretaxIncome"`
TrailingOtherIncomeExpense []FinancialDataPoint `json:"trailingOtherIncomeExpense"`
AnnualOtherIncomeExpense []FinancialDataPoint `json:"annualOtherIncomeExpense"`
TrailingNetNonOperatingInterestIncomeExpense []FinancialDataPoint `json:"trailingNetNonOperatingInterestIncomeExpense"`
AnnualNetNonOperatingInterestIncomeExpense []FinancialDataPoint `json:"annualNetNonOperatingInterestIncomeExpense"`
TrailingBasicAverageShares []FinancialDataPoint `json:"trailingBasicAverageShares"`
AnnualBasicAverageShares []FinancialDataPoint `json:"annualBasicAverageShares"`
TrailingDilutedAverageShares []FinancialDataPoint `json:"trailingDilutedAverageShares"`
AnnualDilutedAverageShares []FinancialDataPoint `json:"annualDilutedAverageShares"`
TrailingEBIT []FinancialDataPoint `json:"trailingEBIT"`
AnnualEBIT []FinancialDataPoint `json:"annualEBIT"`
TrailingNormalizedIncome []FinancialDataPoint `json:"trailingNormalizedIncome"`
AnnualNormalizedIncome []FinancialDataPoint `json:"annualNormalizedIncome"`
TrailingNormalizedEBITDA []FinancialDataPoint `json:"trailingNormalizedEBITDA"`
AnnualNormalizedEBITDA []FinancialDataPoint `json:"annualNormalizedEBITDA"`
TrailingReconciledCostOfRevenue []FinancialDataPoint `json:"trailingReconciledCostOfRevenue"`
AnnualReconciledCostOfRevenue []FinancialDataPoint `json:"annualReconciledCostOfRevenue"`
TrailingReconciledDepreciation []FinancialDataPoint `json:"trailingReconciledDepreciation"`
AnnualReconciledDepreciation []FinancialDataPoint `json:"annualReconciledDepreciation"`
} `json:"financialData"`
} `json:"result"`
} `json:"quoteSummary"`
}
YahooFinanceData represents the JSON structure from Yahoo Finance
type YahooInt ¶
type YahooInt struct {
Raw *int64 `json:"raw,omitempty"`
Fmt string `json:"fmt,omitempty"`
LongFmt string `json:"longFmt,omitempty"`
}
YahooInt represents Yahoo's integer format with raw, fmt, and longFmt
func ToYahooInt ¶
ToYahooInt converts a raw struct to YahooInt
type YahooNum ¶
type YahooNum struct {
Raw *float64 `json:"raw,omitempty"`
Fmt string `json:"fmt,omitempty"`
LongFmt string `json:"longFmt,omitempty"`
}
YahooNum represents Yahoo's numeric format with raw, fmt, and longFmt
func ToYahooNum ¶
ToYahooNum converts a raw struct to YahooNum
type YahooString ¶
type YahooString struct {
Raw *string `json:"raw,omitempty"`
Fmt string `json:"fmt,omitempty"`
LongFmt string `json:"longFmt,omitempty"`
}
YahooString represents Yahoo's string format that might contain numbers