switch-coreboot/northbridge/amd/k8/dqs.c
Ronald G. Minnich 96e0fd18bf Fixes to make k8 and others work.
We need the sys_info struct in the global variables struct for 
cache as ram on k8. The sys_info struct is generally very useful
so it makes sense to start accomodating it.  

This patch adds an (empty for now) sys_info struct for geode. 
It add sys_info to the global variables struct. 

It removes global variables from console.h to a new file, 
globalvars.h. Very little code needs to include this file. 

This patch is tested on the dbe62 and simnow with no problems.

k8 compilation is now broken but I'm working on it. I'm going through
the eyeballs-bleed code on k8 startup to document it and with any luck 
we'll have more functionality by the end of today. But it's hard ...

Signed-off-by: Ronald G. Minnich <rminnich@gmail.com>

Acked-by: Carl-Daniel Hailfinger <c-d.hailfinger.devel.2006@gmx.net>


git-svn-id: svn://coreboot.org/repository/coreboot-v3@828 f3766cd6-281f-0410-b1cd-43a5c92072e9
2008-08-27 22:43:18 +00:00

2030 lines
65 KiB
C

/*
* K8
* This file is part of the coreboot project.
* Copyright (C) 2005-7 YingHai Lu
* Copyright (C) 2005 Ollie Lo
* Copyright (C) 2005-2007 Stefan Reinauer <stepan@openbios.org>
* Copyright (C) 2008 Ronald G. Minnich <rminnich@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA, 02110-1301 USA
*/
#include <mainboard.h>
#include <console.h>
#include <string.h>
#include <mtrr.h>
#include <macros.h>
#include <spd.h>
#include <cpu.h>
#include <msr.h>
#include <device/pci.h>
#include <pci_ops.h>
#include <mc146818rtc.h>
#include <lib.h>
#include <amd/k8/k8.h>
#include <amd/k8/sysconf.h>
#include <spd_ddr2.h>
/*
yhlu 2005.10 dqs training
*/
//0: mean no debug info
#define DQS_TRAIN_DEBUG 1
// always undef this. We only support F2 and later.
#undef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
u32 pci_read_config32_index(u32 dev, u32 index_reg, u32 index);
void pci_write_config32_index(u32 dev, u32 index_reg, u32 index, u32 data);
u32 pci_read_config32_index_wait(u32 dev, u32 index_reg, u32 index);
void pci_write_config32_index_wait(u32 dev, u32 index_reg, u32 index, u32 data);
static inline void print_debug_dqs(const char *str, unsigned int val, unsigned level)
{
/* please note: you DO NOT NEED an #ifdef here. C will very happily optimize this out if
* DAW_TRAIN_DEBUG is 0.
*/
if(DQS_TRAIN_DEBUG && DQS_TRAIN_DEBUG > level) {
printk(BIOS_DEBUG, "%s%x\n", str, val);
}
}
static inline void print_debug_dqs_pair(const char *str, unsigned val, const char *str2, unsigned val2, unsigned level)
{
if(DQS_TRAIN_DEBUG && DQS_TRAIN_DEBUG > level) {
printk(BIOS_DEBUG, "%s%08x%s%08x\n", str, val, str2, val2);
}
}
static inline void print_debug_dqs_tsc(const char *str, unsigned i, unsigned val, unsigned val2, unsigned level)
{
if(DQS_TRAIN_DEBUG && DQS_TRAIN_DEBUG > level) {
printk(BIOS_DEBUG, "%s[%02x]=%08x%08x\n", str, i, val, val2);
}
}
static inline void print_debug_dqs_tsc_x(const char *str, unsigned i, unsigned val, unsigned val2)
{
printk(BIOS_DEBUG, "%s[%02x]=%08x%08x\n", str, i, val, val2);
}
static void fill_mem_cs_sysinfo(unsigned nodeid, const struct mem_controller *ctrl, struct sys_info *sysinfo)
{
int i;
sysinfo->mem_base[nodeid] = pci_conf1_read_config32(ctrl->f1, 0x40 + (nodeid<<3));
for(i=0;i<8; i++) {
sysinfo->cs_base[nodeid*8+i] = pci_conf1_read_config32(ctrl->f2, 0x40 + (i<<2));
}
sysinfo->hole_reg[nodeid] = pci_conf1_read_config32(ctrl->f1, 0xf0);
}
static unsigned Get_MCTSysAddr(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
{
u32 dword;
u32 mem_base;
unsigned nodeid = ctrl->node_id;
#if HW_MEM_HOLE_SIZEK != 0
u32 hole_reg;
#endif
//get the local base addr of the chipselect
dword = sysinfo->cs_base[nodeid * 8 + cs_idx];
dword &= 0xfffffff0;
//sys addr= node base + local cs base
mem_base = sysinfo->mem_base[nodeid];
mem_base &= 0xffff0000;
dword += mem_base;
#if HW_MEM_HOLE_SIZEK != 0
hole_reg = sysinfo->hole_reg[nodeid];
if(hole_reg & 1) {
unsigned hole_startk;
hole_startk = (hole_reg & (0xff<<24)) >> 10;
if( (dword >= (hole_startk<<2)) && (dword < ((4*1024*1024)<<2))) {
dword += ((4*1024*1024 - hole_startk)<<2);
}
}
#endif
//add 1MB offset to avoid compat area
dword += (1<<(20-8));
//So final result is upper 32 bit addr
return dword;
}
static unsigned Get_RcvrSysAddr(const struct mem_controller * ctrl, unsigned channel, unsigned cs_idx, struct sys_info *sysinfo)
{
return Get_MCTSysAddr(ctrl, cs_idx, sysinfo);
}
static inline unsigned long read_cr4(void)
{
unsigned long cr4;
asm volatile ("movl %%cr4, %0" : "=r" (cr4));
return cr4;
}
static inline void write_cr4(unsigned long cr4)
{
asm volatile ("movl %0, %%cr4" : : "r" (cr4));
}
static inline void enable_sse2(void)
{
unsigned long cr4;
cr4 = read_cr4();
cr4 |= (1<<9);
write_cr4(cr4);
}
static inline void disable_sse2(void)
{
u32 cr4;
cr4 = read_cr4();
cr4 &= ~(1<<9);
write_cr4(cr4);
}
static void set_wrap32dis(void) {
struct msr msr;
msr = rdmsr(0xc0010015);
msr.lo |= (1<<17);
wrmsr(0xc0010015, msr);
}
static void clear_wrap32dis(void) {
struct msr msr;
msr = rdmsr(0xc0010015);
msr.lo &= ~(1<<17);
wrmsr(0xc0010015, msr);
}
static void set_FSBASE(u32 addr_hi)
{
struct msr msr;
//set fs and use fs prefix to access the mem
msr.hi = addr_hi;
msr.lo = 0;
wrmsr(0xc0000100, msr); //FS_BASE
}
static unsigned ChipSelPresent(const struct mem_controller *ctrl, unsigned cs_idx, struct sys_info *sysinfo)
{
unsigned enabled;
unsigned nodeid = ctrl->node_id;
enabled = sysinfo->cs_base[nodeid * 8 + cs_idx];
enabled &= 1;
return enabled;
}
static unsigned RcvrRankEnabled(const struct mem_controller *ctrl, int channel, int cs_idx, unsigned is_Width128, struct sys_info *sysinfo)
{
/* FIXME: process 64Muxed */
if(!is_Width128) {
if(channel) return 0; // no channel b
}
return ChipSelPresent(ctrl, cs_idx, sysinfo);
}
static void WriteLNTestPattern(unsigned addr_lo, u8 *buf_a, unsigned line_num)
{
__asm__ volatile (
"pushl %%ebx\n\t"
"1:\n\t"
"movdqa (%3), %%xmm0\n\t"
"movntdq %%xmm0, %%fs:(%0)\n\t" /* xmm0 is 128 bit */
"addl %1, %0\n\t"
"addl %1, %3\n\t"
"loop 1b\n\t"
"popl %%ebx\n\t"
:: "a" (addr_lo), "d" (16), "c" (line_num * 4), "r"(buf_a)
);
}
void Write1LTestPattern(unsigned addr, unsigned p, u8 *buf_a, u8 *buf_b)
{
u8 *buf;
if(p==1) { buf = buf_b; }
else { buf = buf_a; }
set_FSBASE (addr>>24);
WriteLNTestPattern(addr<<8, buf, 1);
}
void Read1LTestPattern(unsigned addr)
{
unsigned value;
set_FSBASE(addr>>24);
/* 1st move causes read fill (to exclusive or shared)*/
__asm__ volatile (
"pushl %%ebx\n\tmovl %%fs:(%1), %0\n\tpopl %%ebx\n\t"
:"=r"(value): "a" (addr<<8)
);
}
#define DQS_PASS 0
#define DQS_FAIL 1
#define DQS_FIRST_PASS 1
#define DQS_SECOND_PASS 2
#define SB_NORCVREN 11
#define RCVREN_MARGIN 6
#define SB_SmallRCVR 13
#define SB_CHA2BRCVREN 12
#define SB_NODQSPOS 14
#define MIN_DQS_WNDW 3
#define SB_SMALLDQS 15
static unsigned CompareTestPatternQW0(unsigned channel, unsigned addr, unsigned pattern, const u32 *TestPattern0, const u32 *TestPattern1, const u32 *TestPattern2, unsigned Pass, unsigned is_Width128)
{
u32 addr_lo;
u32 *test_buf;
u32 value;
u32 value_test;
unsigned result = DQS_FAIL;
if(Pass == DQS_FIRST_PASS) {
if(pattern==1) {
test_buf = (u32 *)TestPattern1;
}
else {
test_buf = (u32 *)TestPattern0;
}
}
else {
test_buf = (u32 *)TestPattern2;
}
set_FSBASE(addr>>24);
addr_lo = addr<<8;
if(is_Width128 && (channel == 1)) {
addr_lo += 8; //second channel
test_buf += 2;
}
__asm__ volatile (
"movl %%fs:(%1), %0\n\t"
:"=c"(value): "a" (addr_lo)
);
value_test = *test_buf;
print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
print_debug_dqs_pair("\t\t\t\t\t\tQW0.lo : addr_lo = ", addr_lo, " value = ", value, 4);
if(value == value_test) {
addr_lo += 4;
test_buf++;
__asm__ volatile (
"movl %%fs:(%1), %0\n\t"
:"=c"(value): "a" (addr_lo)
);
value_test = *test_buf;
print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : test_buf= ", (unsigned)test_buf, " value = ", value_test, 4);
print_debug_dqs_pair("\t\t\t\t\t\tQW0.hi : addr_lo = ", addr_lo, " value = ", value, 4);
if(value == value_test){
result = DQS_PASS;
}
}
if(Pass == DQS_SECOND_PASS) { // second pass need to be inverted
if(result==DQS_PASS) {
result = DQS_FAIL;
}
else {
result = DQS_PASS;
}
}
return result;
}
static void SetMaxAL_RcvrDly(const struct mem_controller *ctrl, unsigned dly)
{
u32 reg;
dly += (20-1); // round it
dly /= 20; // convert from unit 50ps to 1ns
dly += 6;
reg = pci_conf1_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
reg &= ~(DCH_MaxAsyncLat_MASK <<DCH_MaxAsyncLat_SHIFT);
reg |= ((dly - DCH_MaxAsyncLat_BASE) << DCH_MaxAsyncLat_SHIFT);
pci_conf1_write_config32(ctrl->f2, DRAM_CONFIG_HIGH, reg);
}
/*
Set the Target range to WT IO (using an IORR overlapping the already existing
WB dram type). Use IORR0
*/
static void SetTargetWTIO(unsigned addr)
{
struct msr msr;
msr.hi = addr>>24;
msr.lo = addr<<8;
wrmsr(0xc0010016, msr); //IORR0 BASE
msr.hi = 0xff;
msr.lo = 0xfc000800; // 64MB Mask
wrmsr(0xc0010017, msr); // IORR0 Mask
}
static void ResetTargetWTIO(void)
{
struct msr msr;
msr.hi = 0;
msr.lo = 0;
wrmsr(0xc0010017, msr); // IORR0 Mask
}
static void proc_CLFLUSH(unsigned addr)
{
set_FSBASE(addr>>24);
/* 1st move causes read fill (to exclusive or shared)*/
__asm__ volatile (
/* clflush fs:[eax] */
"clflush %%fs:(%0)\n\t"
::"a" (addr<<8)
);
}
static void proc_IOCLFLUSH(unsigned addr)
{
SetTargetWTIO(addr);
proc_CLFLUSH(addr);
ResetTargetWTIO();
}
static void ResetDCTWrPtr(const struct mem_controller *ctrl)
{
u32 dword;
unsigned index = 0x10;
dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
}
static u16 get_exact_T1000(unsigned i)
{
// 200 266, 333, 400
static const u16 T1000_a[]= { 5000, 3759, 3003, 2500 };
static const u16 TT_a[] = {
/*200 266 333 400 */
/*4 */ 6250, 6250, 6250, 6250,
/*5 */ 5000, 5000, 5000, 2500,
/*6 */ 5000, 4166, 4166, 2500,
/*7 */ 5000, 4285, 3571, 2500,
/*8 */ 5000, 3750, 3125, 2500,
/*9 */ 5000, 3888, 3333, 2500,
/*10*/ 5000, 4000, 3000, 2500,
/*11*/ 5000, 4090, 3181, 2500,
/*12*/ 5000, 3750, 3333, 2500,
/*13*/ 5000, 3846, 3076, 2500,
/*14*/ 5000, 3928, 3214, 2500,
/*15*/ 5000, 4000, 3000, 2500,
};
unsigned fid_cur;
int index;
struct msr msr;
msr = rdmsr(0xc0010042);
fid_cur = msr.lo & 0x3f;
index = fid_cur>>1;
if(index>12) return T1000_a[i];
return TT_a[index * 4+i];
}
static void InitDQSPos4RcvrEn(const struct mem_controller *ctrl)
{
int i;
u32 dword;
dword = 0x00000000;
for(i=1; i<=3; i++) {
/* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x01-0x03, 0x21-0x23) to 0x00 for all bytes */
pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
}
dword = 0x2f2f2f2f;
for(i=5; i<=7; i++) {
/* Program the DQS Write Timing Control Registers (Function 2:Offset 0x9c, index 0x05-0x07, 0x25-0x27) to 0x2f for all bytes */
pci_write_config32_index_wait(ctrl->f2, 0x98, i, dword);
pci_write_config32_index_wait(ctrl->f2, 0x98, i+0x20, dword);
}
}
static unsigned TrainRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
{
static const u32 TestPattern0[] = {
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
};
static const u32 TestPattern1[] = {
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
};
static const u32 TestPattern2[] = {
0x12345678, 0x87654321, 0x23456789, 0x98765432,
0x59385824, 0x30496724, 0x24490795, 0x99938733,
0x40385642, 0x38465245, 0x29432163, 0x05067894,
0x12349045, 0x98723467, 0x12387634, 0x34587623,
};
u8 pattern_buf_x[64 * 4 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
u8 *buf_a, *buf_b;
u32 ecc_bit;
u32 dword;
u8 *dqs_rcvr_dly_a = &sysinfo->dqs_rcvr_dly_a[ctrl->node_id * 2* 8] ; //8 node, channel 2, receiver 8
int i;
unsigned channel, receiver;
unsigned Errors;
unsigned CTLRMaxDelay;
unsigned T1000;
unsigned LastTest;
unsigned CurrTest;
unsigned Test0, Test1;
unsigned RcvrEnDlyRmin;
unsigned two_ranks;
unsigned RcvrEnDly;
unsigned PatternA;
unsigned PatternB;
unsigned long TestAddr0, TestAddr0B;
unsigned long TestAddr1 = 0;/* warning: this was not set in the original code */
unsigned long TestAddr1B = 0;/* warning: this was not set in the original code */
unsigned long CurrRcvrCHADelay = 0; /* warning: this was not set in the original code */
unsigned tmp;
unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
if(Pass == DQS_FIRST_PASS) {
InitDQSPos4RcvrEn(ctrl);
}
//enable SSE2
enable_sse2();
//wrap32dis
set_wrap32dis();
//disable ECC temp
dword = pci_conf1_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
ecc_bit = dword & DCL_DimmEccEn;
dword &= ~(DCL_DimmEccEn);
pci_conf1_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
if(Pass == DQS_FIRST_PASS) {
#ifdef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
cpu_f0_f1 = is_cpu_pre_f2_in_bsp(ctrl->node_id);
if(!cpu_f0_f1)
#endif
{
#if 1
/* Set the DqsRcvEnTrain bit */
dword = pci_conf1_read_config32(ctrl->f2, DRAM_CTRL);
dword |= DC_DqsRcvEnTrain;
pci_conf1_write_config32(ctrl->f2, DRAM_CTRL, dword);
#endif
}
}
//get T1000 figures (cycle time (ns)) * 1K
dword = pci_conf1_read_config32(ctrl->f2, DRAM_CONFIG_HIGH);
dword &= DCH_MemClkFreq_MASK;
T1000 = get_exact_T1000(dword);
// SetupRcvrPattern
buf_a = (u8 *)(((u32)(&pattern_buf_x[0]) + 0x10) & (0xfffffff0));
buf_b = buf_a + 128; //??
if(Pass==DQS_FIRST_PASS) {
for(i=0;i<16;i++) {
*((u32 *)(buf_a + i*4)) = TestPattern0[i];
*((u32 *)(buf_b + i*4)) = TestPattern1[i];
}
}
else {
for(i=0;i<16;i++) {
*((u32 *)(buf_a + i*4)) = TestPattern2[i];
*((u32 *)(buf_b + i*4)) = TestPattern2[i];
}
}
printk(BIOS_DEBUG, "\nTrainRcvEn: 0 ctrl 0x%x %d\n", ctrl->node_id, 0);
printk(BIOS_DEBUG, "TrainRcvEn: buf_a:0x%x\n", *buf_a);
Errors = 0;
/* for each channel */
CTLRMaxDelay = 0;
for(channel = 0; (channel < 2) && (!Errors); channel++)
{
printk(BIOS_DEBUG, "\tTrainRcvEn51: channel 0x%x %d\n",channel, 1);
/* for each rank */
/* there are four recriver pairs, loosely associated with CS */
for( receiver = 0; (receiver < 8) && (!Errors); receiver+=2)
{
unsigned index=(receiver>>1) * 3 + 0x10;
printk(BIOS_DEBUG, "\t\tTrainRcvEn52: index 0x%x %d\n", index, 2);
if(is_Width128) {
if(channel) {
dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
CurrRcvrCHADelay= dword & 0xff;
}
}
else {
if(channel) {
index += 0x20;
}
}
LastTest = DQS_FAIL;
RcvrEnDlyRmin = 0xaf;
if(!RcvrRankEnabled(ctrl, channel, receiver, is_Width128, sysinfo)) continue;
/* for each DQS receiver enable setting */
TestAddr0 = Get_RcvrSysAddr(ctrl, channel, receiver, sysinfo);
TestAddr0B = TestAddr0 + (1<<(20+2-8)); // 4MB
if(RcvrRankEnabled(ctrl, channel, receiver+1, is_Width128, sysinfo)) {
TestAddr1 = Get_RcvrSysAddr(ctrl, channel, receiver+1, sysinfo);
TestAddr1B = TestAddr1 + (1<<(20+2-8)); //4MB
two_ranks = 1;
}
else {
two_ranks = 0;
}
printk(BIOS_DEBUG, "\t\tTrainRcvEn53: TestAddr0B 0x%lx %d\n", TestAddr0B, 2);
Write1LTestPattern(TestAddr0, 0, buf_a, buf_b); // rank0 of dimm, test p0
Write1LTestPattern(TestAddr0B, 1, buf_a, buf_b); //rank0 of dimm, test p1
if(two_ranks == 1) {
Write1LTestPattern(TestAddr1, 0, buf_a, buf_b); //rank 1 of dimm
Write1LTestPattern(TestAddr1B, 1, buf_a, buf_b);//rank 1 of dimm
}
if(Pass == DQS_FIRST_PASS) {
RcvrEnDly = 0;
} else {
RcvrEnDly = dqs_rcvr_dly_a[channel * 8 + receiver];
}
while ( RcvrEnDly < 0xaf) { // Sweep Delay value here
printk(BIOS_DEBUG, "\t\t\tTrainRcvEn541: RcvrEnDly 0x%x %d\n", RcvrEnDly, 3);
if(RcvrEnDly & 1) {
/* Odd steps get another pattern such that even
and odd steps alternate.
The pointers to the patterns will be swapped
at the end of the loop so they are correspond
*/
PatternA = 1;
PatternB = 0;
}
else {
/* Even step */
PatternA = 0;
PatternB = 1;
}
/* Program current Receiver enable delay */
pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
/* FIXME: 64bit MUX */
if(is_Width128) {
/* Program current Receiver enable delay chaannel b */
pci_write_config32_index_wait(ctrl->f2, 0x98, index+ 0x20, RcvrEnDly);
}
/* Program the MaxAsyncLat filed with the
current DQS receiver enable setting plus 6ns
*/
/*Porgram MaxAsyncLat to correspond with current delay */
SetMaxAL_RcvrDly(ctrl, RcvrEnDly);
CurrTest = DQS_FAIL;
Read1LTestPattern(TestAddr0); //Cache Fill
/* ROM vs cache compare */
Test0 = CompareTestPatternQW0(channel, TestAddr0, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
proc_IOCLFLUSH(TestAddr0);
ResetDCTWrPtr(ctrl);
printk(BIOS_DEBUG, "\t\t\tTrainRcvEn542: Test0 0x%x %d\n", Test0, 3);
if(Test0 == DQS_PASS) {
Read1LTestPattern(TestAddr0B);
Test1 = CompareTestPatternQW0(channel, TestAddr0B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
proc_IOCLFLUSH(TestAddr0B);
ResetDCTWrPtr(ctrl);
printk(BIOS_DEBUG, "\t\t\tTrainRcvEn543: Test1 0x%x %d\n", Test1, 3);
if(Test1 == DQS_PASS) {
if(two_ranks) {
Read1LTestPattern(TestAddr1);
Test0 = CompareTestPatternQW0(channel, TestAddr1, PatternA, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
proc_IOCLFLUSH(TestAddr1);
ResetDCTWrPtr(ctrl);
if(Test0 == DQS_PASS) {
Read1LTestPattern(TestAddr1B);
Test1 = CompareTestPatternQW0(channel, TestAddr1B, PatternB, TestPattern0, TestPattern1, TestPattern2, Pass, is_Width128);
proc_IOCLFLUSH(TestAddr1B);
ResetDCTWrPtr(ctrl);
if(Test1 == DQS_PASS) {
CurrTest = DQS_PASS;
}
}
printk(BIOS_DEBUG, "\t\t\tTrainRcvEn544: Test0 0x%x %d\n", Test0, 3);
}
else {
CurrTest = DQS_PASS;
}
}
}
printk(BIOS_DEBUG, "\t\t\tTrainRcvEn55: RcvrEnDly 0x%x %d\n", RcvrEnDly, 3);
if(CurrTest == DQS_PASS) {
if(LastTest == DQS_FAIL) {
RcvrEnDlyRmin = RcvrEnDly;
break;
}
}
LastTest = CurrTest;
/* swap the rank 0 pointers */
tmp = TestAddr0;
TestAddr0 = TestAddr0B;
TestAddr0B = tmp;
/* swap the rank 1 pointers */
tmp = TestAddr1;
TestAddr1 = TestAddr1B;
TestAddr1B = tmp;
printk(BIOS_DEBUG, "\t\t\tTrainRcvEn56: RcvrEnDly 0x%x %d\n", RcvrEnDly, 3);
RcvrEnDly++;
} // while RcvrEnDly
printk(BIOS_DEBUG, "\t\tTrainRcvEn61: RcvrEnDly 0x%x %d\n", RcvrEnDly, 2);
if(RcvrEnDlyRmin == 0xaf) {
//no passing window
Errors |= SB_NORCVREN;
}
if(Pass == DQS_FIRST_PASS) {
// We need a better value for DQSPos trainning
RcvrEnDly = RcvrEnDlyRmin /* + RCVREN_MARGIN * T1000/64/50 */;
} else {
RcvrEnDly = RcvrEnDlyRmin;
}
if(RcvrEnDly > 0xae) {
//passing window too narrow, too far delayed
Errors |= SB_SmallRCVR;
RcvrEnDly = 0xae;
}
if(Pass == DQS_SECOND_PASS) { //second pass must average vales
RcvrEnDly += dqs_rcvr_dly_a[channel * 8 + receiver] /* - (RCVREN_MARGIN * T1000/64/50)*/;
RcvrEnDly >>= 1;
}
dqs_rcvr_dly_a[channel * 8 + receiver] = RcvrEnDly;
//Set final RcvrEnDly for this DIMM and Channel
pci_write_config32_index_wait(ctrl->f2, 0x98, index, RcvrEnDly);
if(is_Width128) {
pci_write_config32_index_wait(ctrl->f2, 0x98, index+0x20, RcvrEnDly); // channel B
if(channel) {
pci_write_config32_index_wait(ctrl->f2, 0x98, index, CurrRcvrCHADelay);
if(RcvrEnDly > CurrRcvrCHADelay) {
dword = RcvrEnDly - CurrRcvrCHADelay;
}
else {
dword = CurrRcvrCHADelay - RcvrEnDly;
}
dword *= 50;
if(dword > T1000) {
Errors |= SB_CHA2BRCVREN;
}
}
}
printk(BIOS_DEBUG, "\t\tTrainRcvEn63: RcvrEnDly 0x%x %d\n", RcvrEnDly, 2);
if(RcvrEnDly > CTLRMaxDelay) {
CTLRMaxDelay = RcvrEnDly;
}
printk(BIOS_DEBUG, "\t\tTrainRcvEn64: CTLRMaxDelay 0x%x %d\n", CTLRMaxDelay, 2);
} /* receiver */
} /* channel */
printk(BIOS_DEBUG, "\tTrainRcvEn65: CTLRMaxDelay 0x%x %d\n", CTLRMaxDelay, 1);
/* Program the MaxAsysncLat field with the largest DQS Receiver Enable setting */
SetMaxAL_RcvrDly(ctrl, CTLRMaxDelay);
ResetDCTWrPtr(ctrl);
//Enable ECC again
dword = pci_conf1_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
dword &= ~(DCL_DimmEccEn);
dword |= ecc_bit;
pci_conf1_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
if(Pass == DQS_FIRST_PASS) {
#ifdef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
if(!cpu_f0_f1)
#endif
{
dword = pci_conf1_read_config32(ctrl->f2, DRAM_CTRL);
dword &= ~DC_DqsRcvEnTrain;
pci_conf1_write_config32(ctrl->f2, DRAM_CTRL, dword);
}
}
//Clear wrap32dis
clear_wrap32dis();
//restore SSE2 setting
disable_sse2();
#if MEM_TRAIN_SEQ != 1
/* We need tidy output for type 1 */
printk(BIOS_DEBUG, " CTLRMaxDelay=%02x", CTLRMaxDelay);
#endif
return (CTLRMaxDelay==0xae)?1:0;
}
#define DQS_READDIR 1
#define DQS_WRITEDIR 0
static void SetDQSDelayCSR(const struct mem_controller *ctrl, unsigned channel, unsigned bytelane, unsigned direction, unsigned dqs_delay)
{ //ByteLane could be 0-8, last is for ECC
unsigned index;
u32 dword;
unsigned shift;
dqs_delay &= 0xff;
index = (bytelane>>2) + 1 + channel * 0x20 + (direction << 2);
shift = bytelane;
while(shift>3) {
shift-=4;
}
shift <<= 3; // 8 bit
dword = pci_read_config32_index_wait(ctrl->f2, 0x98, index);
dword &= ~(0x3f<<shift);
dword |= (dqs_delay<<shift);
pci_write_config32_index_wait(ctrl->f2, 0x98, index, dword);
}
static void SetDQSDelayAllCSR(const struct mem_controller *ctrl, unsigned channel, unsigned direction, unsigned dqs_delay)
{
unsigned index;
u32 dword;
int i;
dword = 0;
dqs_delay &= 0xff;
for(i=0;i<4;i++) {
dword |= dqs_delay<<(i*8);
}
index = 1 + channel * 0x20 + direction * 4;
for(i=0; i<2; i++) {
pci_write_config32_index_wait(ctrl->f2, 0x98, index + i, dword);
}
}
static unsigned MiddleDQS(unsigned min_d, unsigned max_d)
{
unsigned size_d;
size_d = max_d-min_d;
if(size_d & 1) { //need round up
min_d++;
}
return ( min_d + (size_d>>1));
}
static inline void save_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, u8 *dqs_delay_a, u8 dqs_delay)
{
dqs_delay_a[channel * 2*9 + direction * 9 + bytelane] = dqs_delay;
}
static void WriteDQSTestPattern(unsigned addr_lo, unsigned pattern , u8 *buf_a)
{
WriteLNTestPattern(addr_lo, buf_a, (pattern+1) * 9);
}
static void ReadL18TestPattern(unsigned addr_lo)
{
//set fs and use fs prefix to access the mem
__asm__ volatile (
"pushl %%ebx\n\t"
"movl %%fs:-128(%%esi), %%eax\n\t" //TestAddr cache line
"movl %%fs:-64(%%esi), %%eax\n\t" //+1
"movl %%fs:(%%esi), %%eax\n\t" //+2
"movl %%fs:64(%%esi), %%eax\n\t" //+3
"movl %%fs:-128(%%edi), %%eax\n\t" //+4
"movl %%fs:-64(%%edi), %%eax\n\t" //+5
"movl %%fs:(%%edi), %%eax\n\t" //+6
"movl %%fs:64(%%edi), %%eax\n\t" //+7
"movl %%fs:-128(%%ebx), %%eax\n\t" //+8
"movl %%fs:-64(%%ebx), %%eax\n\t" //+9
"movl %%fs:(%%ebx), %%eax\n\t" //+10
"movl %%fs:64(%%ebx), %%eax\n\t" //+11
"movl %%fs:-128(%%ecx), %%eax\n\t" //+12
"movl %%fs:-64(%%ecx), %%eax\n\t" //+13
"movl %%fs:(%%ecx), %%eax\n\t" //+14
"movl %%fs:64(%%ecx), %%eax\n\t" //+15
"movl %%fs:-128(%%edx), %%eax\n\t" //+16
"movl %%fs:-64(%%edx), %%eax\n\t" //+17
"popl %%ebx\n\t"
:: "a"(0), "r" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "S"(addr_lo+128), "D"(addr_lo+128+4*64)
);
}
static void ReadL9TestPattern(unsigned addr_lo)
{
//set fs and use fs prefix to access the mem
__asm__ volatile (
"pushl %%ebx\n\t"
"movl %%fs:-128(%%ecx), %%eax\n\t" //TestAddr cache line
"movl %%fs:-64(%%ecx), %%eax\n\t" //+1
"movl %%fs:(%%ecx), %%eax\n\t" //+2
"movl %%fs:64(%%ecx), %%eax\n\t" //+3
"movl %%fs:-128(%%edx), %%eax\n\t" //+4
"movl %%fs:-64(%%edx), %%eax\n\t" //+5
"movl %%fs:(%%edx), %%eax\n\t" //+6
"movl %%fs:64(%%edx), %%eax\n\t" //+7
"movl %%fs:-128(%%ebx), %%eax\n\t" //+8
"popl %%ebx\n\t"
:: "a"(0), "r" (addr_lo+128+8*64), "c"(addr_lo+128), "d"(addr_lo+128+4*64)
);
}
static void ReadDQSTestPattern(unsigned addr_lo, unsigned pattern)
{
if(pattern == 0) {
ReadL9TestPattern(addr_lo);
}
else {
ReadL18TestPattern(addr_lo);
}
}
static void FlushDQSTestPattern_L9(unsigned addr_lo)
{
__asm__ volatile (
"pushl %%ebx\n\t"
"clflush %%fs:-128(%%ecx)\n\t"
"clflush %%fs:-64(%%ecx)\n\t"
"clflush %%fs:(%%ecx)\n\t"
"clflush %%fs:64(%%ecx)\n\t"
"clflush %%fs:-128(%%eax)\n\t"
"clflush %%fs:-64(%%eax)\n\t"
"clflush %%fs:(%%eax)\n\t"
"clflush %%fs:64(%%eax)\n\t"
"clflush %%fs:-128(%%ebx)\n\t"
"popl %%ebx\n\t"
:: "r" (addr_lo+128+8*64), "c"(addr_lo+128), "a"(addr_lo+128+4*64)
);
}
static __attribute__((noinline)) void FlushDQSTestPattern_L18(unsigned addr_lo)
{
__asm__ volatile (
"pushl %%ebx\n\t"
"clflush %%fs:-128(%%eax)\n\t"
"clflush %%fs:-64(%%eax)\n\t"
"clflush %%fs:(%%eax)\n\t"
"clflush %%fs:64(%%eax)\n\t"
"clflush %%fs:-128(%%edi)\n\t"
"clflush %%fs:-64(%%edi)\n\t"
"clflush %%fs:(%%edi)\n\t"
"clflush %%fs:64(%%edi)\n\t"
"clflush %%fs:-128(%%ebx)\n\t"
"clflush %%fs:-64(%%ebx)\n\t"
"clflush %%fs:(%%ebx)\n\t"
"clflush %%fs:64(%%ebx)\n\t"
"clflush %%fs:-128(%%ecx)\n\t"
"clflush %%fs:-64(%%ecx)\n\t"
"clflush %%fs:(%%ecx)\n\t"
"clflush %%fs:64(%%ecx)\n\t"
"clflush %%fs:-128(%%edx)\n\t"
"clflush %%fs:-64(%%edx)\n\t"
"popl %%ebx\n\t"
:: "r" (addr_lo+128+8*64), "c" (addr_lo+128+12*64), "d" (addr_lo +128+16*64), "a"(addr_lo+128), "D"(addr_lo+128+4*64)
);
}
static void FlushDQSTestPattern(unsigned addr_lo, unsigned pattern )
{
if(pattern == 0){
FlushDQSTestPattern_L9(addr_lo);
}
else {
FlushDQSTestPattern_L18(addr_lo);
}
}
static unsigned CompareDQSTestPattern(unsigned channel, unsigned addr_lo, unsigned pattern, u8 *buf_a)
{
u32 *test_buf;
unsigned bitmap = 0xff;
unsigned bytelane;
int i;
u32 value;
int j;
u32 value_test;
test_buf = (u32 *)buf_a;
if(pattern && channel) {
addr_lo += 8; //second channel
test_buf+= 2;
}
bytelane = 0;
for(i=0;i<9*64/4;i++) {
__asm__ volatile (
"pushl %%ebx\n\tmovl %%fs:(%1), %0\n\tpopl %%ebx\n\t"
:"=r"(value): "a" (addr_lo)
);
value_test = *test_buf;
print_debug_dqs_pair("\t\t\t\t\t\ttest_buf= ", (unsigned)test_buf, " value = ", value_test, 7);
print_debug_dqs_pair("\t\t\t\t\t\ttaddr_lo = ",addr_lo, " value = ", value, 7);
for(j=0;j<4*8;j+=8) {
if(((value>>j)&0xff) != ((value_test>>j)& 0xff)) {
bitmap &= ~(1<<bytelane);
}
bytelane++;
bytelane &= 0x7;
}
printk(BIOS_DEBUG, "\t\t\t\t\t\tbitmap = 0x%x %d\n", bitmap, 7);
if(bytelane == 0) {
if(pattern == 1) { //dual channel
addr_lo += 8; //skip over other channel's data
test_buf += 2;
}
}
addr_lo += 4;
test_buf +=1;
}
return bitmap;
}
static unsigned TrainDQSPos(const struct mem_controller *ctrl, unsigned channel, unsigned Direction, unsigned Pattern, u8 *buf_a, u8 *dqs_delay_a, struct sys_info *sysinfo)
{
unsigned ByteLane;
unsigned Errors;
unsigned BanksPresent;
unsigned MutualCSPassW[48];
unsigned ChipSel;
unsigned DQSDelay;
unsigned TestAddr;
unsigned LastTest;
unsigned RnkDlyFilterMax, RnkDlyFilterMin;
unsigned RnkDlySeqPassMax, RnkDlySeqPassMin = 0; /* warning: this was left unset in original code */
Errors = 0;
BanksPresent = 0;
printk(BIOS_DEBUG, "\t\t\tTrainDQSPos begin 0x%x %d\n", 0, 3);
printk(BIOS_DEBUG, "TrainDQSPos: MutualCSPassW[48] : 0x%x\n", *MutualCSPassW);
for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
MutualCSPassW[DQSDelay] = 0xff; // Bitmapped status per delay setting, 0xff=All positions passing (1= PASS)
}
for(ChipSel = 0; ChipSel < 8; ChipSel++) { //logical register chipselects 0..7
printk(BIOS_DEBUG, "\t\t\t\tTrainDQSPos: 11 ChipSel 0x%x %d\n", ChipSel, 4);
//FIXME: process 64MUXedMode
if(!ChipSelPresent(ctrl, ChipSel, sysinfo)) continue;
BanksPresent = 1;
TestAddr = Get_MCTSysAddr(ctrl, ChipSel, sysinfo);
printk(BIOS_DEBUG,"\t\t\t\tTrainDQSPos: 12 TestAddr 0x%x %d\n", TestAddr, 4);
//set fs and use fs prefix to access the mem
set_FSBASE(TestAddr>>24);
if(Direction == DQS_READDIR) {
printk(BIOS_DEBUG,"\t\t\t\tTrainDQSPos: 13 for read so write at first %d %d\n", 0, 4);
WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
}
for(DQSDelay = 0; DQSDelay < 48; DQSDelay++ ){
printk(BIOS_DEBUG, "\t\t\t\t\tTrainDQSPos: 141 DQSDelay 0x%x %d\n", DQSDelay, 5);
if(MutualCSPassW[DQSDelay] == 0) continue; //skip current delay value if other chipselects have failed all 8 bytelanes
SetDQSDelayAllCSR(ctrl, channel, Direction, DQSDelay);
printk(BIOS_DEBUG, "\t\t\t\t\tTrainDQSPos: 142 MutualCSPassW 0x%x %d\n", MutualCSPassW[DQSDelay], 5);
if(Direction == DQS_WRITEDIR) {
printk(BIOS_DEBUG, "\t\t\t\t\tTrainDQSPos: 143 for write 0x%x %d\n", 0, 5);
WriteDQSTestPattern(TestAddr<<8, Pattern, buf_a);
}
printk(BIOS_DEBUG, "\t\t\t\t\tTrainDQSPos: 144 Pattern 0x%x %d\n", Pattern, 5);
ReadDQSTestPattern(TestAddr<<8, Pattern);
printk(BIOS_DEBUG, "\t\t\t\t\tTrainDQSPos: 145 MutualCSPassW 0x%x %d\n", MutualCSPassW[DQSDelay], 5);
MutualCSPassW[DQSDelay] &= CompareDQSTestPattern(channel, TestAddr<<8, Pattern, buf_a); //0: fail, 1=pass
printk(BIOS_DEBUG, "\t\t\t\t\tTrainDQSPos: 146 MutualCSPassW 0x%x %d\n", MutualCSPassW[DQSDelay], 5);
SetTargetWTIO(TestAddr);
FlushDQSTestPattern(TestAddr<<8, Pattern);
ResetTargetWTIO();
}
}
if(BanksPresent)
for(ByteLane = 0; ByteLane < 8; ByteLane++) {
printk(BIOS_DEBUG, "\t\t\t\tTrainDQSPos: 31 ByteLane 0x%x %d\n",ByteLane, 4);
LastTest = DQS_FAIL;
RnkDlySeqPassMax = 0;
RnkDlyFilterMax = 0;
RnkDlyFilterMin = 0;
for(DQSDelay=0; DQSDelay<48; DQSDelay++) {
if(MutualCSPassW[DQSDelay] & (1<<ByteLane)) {
printk(BIOS_DEBUG, "\t\t\t\t\tTrainDQSPos: 321 DQSDelay 0x%x %d\n", DQSDelay, 5);
printk(BIOS_DEBUG, "\t\t\t\t\tTrainDQSPos: 322 MutualCSPassW 0x%x %d\n", MutualCSPassW[DQSDelay], 5);
RnkDlySeqPassMax = DQSDelay;
if(LastTest == DQS_FAIL) {
RnkDlySeqPassMin = DQSDelay; //start sequential run
}
if((RnkDlySeqPassMax - RnkDlySeqPassMin)>(RnkDlyFilterMax-RnkDlyFilterMin)){
RnkDlyFilterMin = RnkDlySeqPassMin;
RnkDlyFilterMax = RnkDlySeqPassMax;
}
LastTest = DQS_PASS;
}
else {
LastTest = DQS_FAIL;
}
}
printk(BIOS_DEBUG, "\t\t\t\tTrainDQSPos: 33 RnkDlySeqPassMax 0x%x %d\n", RnkDlySeqPassMax, 4);
if(RnkDlySeqPassMax == 0) {
Errors |= SB_NODQSPOS; // no passing window
}
else {
printk(BIOS_DEBUG, "\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMax 0x%x %d\n", RnkDlyFilterMax, 4);
printk(BIOS_DEBUG, "\t\t\t\tTrainDQSPos: 34 RnkDlyFilterMin 0x%x %d\n", RnkDlyFilterMin, 4);
if((RnkDlyFilterMax - RnkDlyFilterMin)< MIN_DQS_WNDW){
Errors |= SB_SMALLDQS;
}
else {
unsigned middle_dqs;
middle_dqs = MiddleDQS(RnkDlyFilterMin, RnkDlyFilterMax);
printk(BIOS_DEBUG, "\t\t\t\tTrainDQSPos: 35 middle_dqs 0x%x %d\n",middle_dqs, 4);
SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, middle_dqs);
save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, middle_dqs);
}
}
}
printk(BIOS_DEBUG, "\t\t\tTrainDQSPos: end 0x%x %d\n", 0xff, 3);
return Errors;
}
static unsigned TrainReadDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, u8 *buf_a, u8 *dqs_delay_a, struct sys_info *sysinfo)
{
printk(BIOS_DEBUG, "\t\tTrainReadPos 0x%x %d\n", 0, 2);
return TrainDQSPos(ctrl, channel, DQS_READDIR, pattern, buf_a, dqs_delay_a, sysinfo);
}
static unsigned TrainWriteDQS(const struct mem_controller *ctrl, unsigned channel, unsigned pattern, u8 *buf_a, u8 *dqs_delay_a, struct sys_info *sysinfo)
{
printk(BIOS_DEBUG, "\t\tTrainWritePos 0x%x %d\n", 0, 2);
return TrainDQSPos(ctrl, channel, DQS_WRITEDIR, pattern, buf_a, dqs_delay_a, sysinfo);
}
static unsigned TrainDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
{
static const u32 TestPatternJD1a[] = {
0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW0-1, ALL-EVEN
0x00000000,0x00000000,0x00000000,0x00000000, // QW2-3, ALL-EVEN
0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFF, // QW4-5, ALL-EVEN
0x00000000,0x00000000,0x00000000,0x00000000, // QW6-7, ALL-EVEN
0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW0-1, DQ0-ODD
0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW2-3, DQ0-ODD
0x01010101,0x01010101,0xFeFeFeFe,0xFeFeFeFe, // QW4-5, DQ0-ODD
0xFeFeFeFe,0xFeFeFeFe,0x01010101,0x01010101, // QW6-7, DQ0-ODD
0x02020202,0x02020202,0x02020202,0x02020202, // QW0-1, DQ1-ODD
0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2-3, DQ1-ODD
0xFdFdFdFd,0xFdFdFdFd,0x02020202,0x02020202, // QW4-5, DQ1-ODD
0x02020202,0x02020202,0x02020202,0x02020202, // QW6-7, DQ1-ODD
0x04040404,0x04040404,0xfBfBfBfB,0xfBfBfBfB, // QW0-1, DQ2-ODD
0x04040404,0x04040404,0x04040404,0x04040404, // QW2-3, DQ2-ODD
0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4-5, DQ2-ODD
0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6-7, DQ2-ODD
0x08080808,0x08080808,0xF7F7F7F7,0xF7F7F7F7, // QW0-1, DQ3-ODD
0x08080808,0x08080808,0x08080808,0x08080808, // QW2-3, DQ3-ODD
0xF7F7F7F7,0xF7F7F7F7,0x08080808,0x08080808, // QW4-5, DQ3-ODD
0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6-7, DQ3-ODD
0x10101010,0x10101010,0x10101010,0x10101010, // QW0-1, DQ4-ODD
0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW2-3, DQ4-ODD
0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4-5, DQ4-ODD
0xeFeFeFeF,0xeFeFeFeF,0x10101010,0x10101010, // QW6-7, DQ4-ODD
0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0-1, DQ5-ODD
0xdFdFdFdF,0xdFdFdFdF,0x20202020,0x20202020, // QW2-3, DQ5-ODD
0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4-5, DQ5-ODD
0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6-7, DQ5-ODD
0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0-1, DQ6-ODD
0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW2-3, DQ6-ODD
0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW4-5, DQ6-ODD
0x40404040,0x40404040,0xBfBfBfBf,0xBfBfBfBf, // QW6-7, DQ6-ODD
0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW0-1, DQ7-ODD
0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW2-3, DQ7-ODD
0x80808080,0x80808080,0x7F7F7F7F,0x7F7F7F7F, // QW4-5, DQ7-ODD
0x80808080,0x80808080,0x80808080,0x80808080 // QW6-7, DQ7-ODD
};
static const u32 TestPatternJD1b[] = {
0x00000000,0x00000000,0x00000000,0x00000000, // QW0,CHA-B, ALL-EVEN
0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW1,CHA-B, ALL-EVEN
0x00000000,0x00000000,0x00000000,0x00000000, // QW2,CHA-B, ALL-EVEN
0x00000000,0x00000000,0x00000000,0x00000000, // QW3,CHA-B, ALL-EVEN
0x00000000,0x00000000,0x00000000,0x00000000, // QW4,CHA-B, ALL-EVEN
0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF, // QW5,CHA-B, ALL-EVEN
0x00000000,0x00000000,0x00000000,0x00000000, // QW6,CHA-B, ALL-EVEN
0x00000000,0x00000000,0x00000000,0x00000000, // QW7,CHA-B, ALL-EVEN
0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW0,CHA-B, DQ0-ODD
0x01010101,0x01010101,0x01010101,0x01010101, // QW1,CHA-B, DQ0-ODD
0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW2,CHA-B, DQ0-ODD
0x01010101,0x01010101,0x01010101,0x01010101, // QW3,CHA-B, DQ0-ODD
0x01010101,0x01010101,0x01010101,0x01010101, // QW4,CHA-B, DQ0-ODD
0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW5,CHA-B, DQ0-ODD
0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe,0xFeFeFeFe, // QW6,CHA-B, DQ0-ODD
0x01010101,0x01010101,0x01010101,0x01010101, // QW7,CHA-B, DQ0-ODD
0x02020202,0x02020202,0x02020202,0x02020202, // QW0,CHA-B, DQ1-ODD
0x02020202,0x02020202,0x02020202,0x02020202, // QW1,CHA-B, DQ1-ODD
0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW2,CHA-B, DQ1-ODD
0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW3,CHA-B, DQ1-ODD
0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd,0xFdFdFdFd, // QW4,CHA-B, DQ1-ODD
0x02020202,0x02020202,0x02020202,0x02020202, // QW5,CHA-B, DQ1-ODD
0x02020202,0x02020202,0x02020202,0x02020202, // QW6,CHA-B, DQ1-ODD
0x02020202,0x02020202,0x02020202,0x02020202, // QW7,CHA-B, DQ1-ODD
0x04040404,0x04040404,0x04040404,0x04040404, // QW0,CHA-B, DQ2-ODD
0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW1,CHA-B, DQ2-ODD
0x04040404,0x04040404,0x04040404,0x04040404, // QW2,CHA-B, DQ2-ODD
0x04040404,0x04040404,0x04040404,0x04040404, // QW3,CHA-B, DQ2-ODD
0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW4,CHA-B, DQ2-ODD
0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW5,CHA-B, DQ2-ODD
0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW6,CHA-B, DQ2-ODD
0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB,0xfBfBfBfB, // QW7,CHA-B, DQ2-ODD
0x08080808,0x08080808,0x08080808,0x08080808, // QW0,CHA-B, DQ3-ODD
0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW1,CHA-B, DQ3-ODD
0x08080808,0x08080808,0x08080808,0x08080808, // QW2,CHA-B, DQ3-ODD
0x08080808,0x08080808,0x08080808,0x08080808, // QW3,CHA-B, DQ3-ODD
0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW4,CHA-B, DQ3-ODD
0x08080808,0x08080808,0x08080808,0x08080808, // QW5,CHA-B, DQ3-ODD
0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW6,CHA-B, DQ3-ODD
0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7,0xF7F7F7F7, // QW7,CHA-B, DQ3-ODD
0x10101010,0x10101010,0x10101010,0x10101010, // QW0,CHA-B, DQ4-ODD
0x10101010,0x10101010,0x10101010,0x10101010, // QW1,CHA-B, DQ4-ODD
0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW2,CHA-B, DQ4-ODD
0x10101010,0x10101010,0x10101010,0x10101010, // QW3,CHA-B, DQ4-ODD
0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW4,CHA-B, DQ4-ODD
0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW5,CHA-B, DQ4-ODD
0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF,0xeFeFeFeF, // QW6,CHA-B, DQ4-ODD
0x10101010,0x10101010,0x10101010,0x10101010, // QW7,CHA-B, DQ4-ODD
0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW0,CHA-B, DQ5-ODD
0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW1,CHA-B, DQ5-ODD
0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW2,CHA-B, DQ5-ODD
0x20202020,0x20202020,0x20202020,0x20202020, // QW3,CHA-B, DQ5-ODD
0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW4,CHA-B, DQ5-ODD
0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW5,CHA-B, DQ5-ODD
0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW6,CHA-B, DQ5-ODD
0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF,0xdFdFdFdF, // QW7,CHA-B, DQ5-ODD
0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW0,CHA-B, DQ6-ODD
0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW1,CHA-B, DQ6-ODD
0x40404040,0x40404040,0x40404040,0x40404040, // QW2,CHA-B, DQ6-ODD
0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW3,CHA-B, DQ6-ODD
0x40404040,0x40404040,0x40404040,0x40404040, // QW4,CHA-B, DQ6-ODD
0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW5,CHA-B, DQ6-ODD
0x40404040,0x40404040,0x40404040,0x40404040, // QW6,CHA-B, DQ6-ODD
0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf,0xBfBfBfBf, // QW7,CHA-B, DQ6-ODD
0x80808080,0x80808080,0x80808080,0x80808080, // QW0,CHA-B, DQ7-ODD
0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW1,CHA-B, DQ7-ODD
0x80808080,0x80808080,0x80808080,0x80808080, // QW2,CHA-B, DQ7-ODD
0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW3,CHA-B, DQ7-ODD
0x80808080,0x80808080,0x80808080,0x80808080, // QW4,CHA-B, DQ7-ODD
0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F,0x7F7F7F7F, // QW5,CHA-B, DQ7-ODD
0x80808080,0x80808080,0x80808080,0x80808080, // QW6,CHA-B, DQ7-ODD
0x80808080,0x80808080,0x80808080,0x80808080 // QW7,CHA-B, DQ7-ODD
};
u8 pattern_buf_x[64 * 18 + 16]; // We need to two cache line So have more 16 bytes to keep 16 byte alignment */
u8 *buf_a;
unsigned pattern;
u32 dword;
u32 ecc_bit;
unsigned Errors;
unsigned channel;
int i;
unsigned DQSWrDelay;
unsigned is_Width128 = sysinfo->meminfo[ctrl->node_id].is_Width128;
u8 *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
//enable SSE2
enable_sse2();
//wrap32dis
set_wrap32dis();
//disable ECC temp
dword = pci_conf1_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
ecc_bit = dword & DCL_DimmEccEn;
dword &= ~(DCL_DimmEccEn);
pci_conf1_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
//SetupDqsPattern
buf_a = (u8 *)(((u32)(&pattern_buf_x[0]) + 0x10) & (~0xf));
if(is_Width128){
pattern = 1;
for(i=0;i<16*18;i++) {
*((u32 *)(buf_a + i*4)) = TestPatternJD1b[i];
}
}
else {
pattern = 0;
for(i=0; i<16*9;i++) {
*((u32 *)(buf_a + i*4)) = TestPatternJD1a[i];
}
}
printk(BIOS_DEBUG, "\nTrainDQSRdWrPos: 0 ctrl 0x%x %d\n", ctrl->node_id, 0);
printk(BIOS_DEBUG, "TrainDQSRdWrPos: buf_a: %02x\n", *buf_a);
Errors = 0;
channel = 0;
while( (channel<2) && (!Errors)) {
printk(BIOS_DEBUG, "\tTrainDQSRdWrPos: 1 channel 0x%x %d\n",channel, 1);
for(DQSWrDelay = 0; DQSWrDelay < 48; DQSWrDelay++) {
unsigned err;
SetDQSDelayAllCSR(ctrl, channel, DQS_WRITEDIR, DQSWrDelay);
printk(BIOS_DEBUG, "\t\tTrainDQSRdWrPos: 21 DQSWrDelay 0x%x %d\n", DQSWrDelay, 2);
err= TrainReadDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
printk(BIOS_DEBUG, "\t\tTrainDQSRdWrPos: 22 err 0x%x %d\n",err, 2);
if(err == 0) break;
Errors |= err;
}
printk(BIOS_DEBUG, "\tTrainDQSRdWrPos: 3 DQSWrDelay 0x%x %d\n", DQSWrDelay, 1);
if(DQSWrDelay < 48) {
Errors = TrainWriteDQS(ctrl, channel, pattern, buf_a, dqs_delay_a, sysinfo);
printk(BIOS_DEBUG, "\tTrainDQSRdWrPos: 4 Errors 0x%x %d\n", Errors, 1);
}
channel++;
if(!is_Width128){
//FIXME: 64MuxMode??
channel++; // skip channel if 64-bit mode
}
}
//Enable ECC again
dword = pci_conf1_read_config32(ctrl->f2, DRAM_CONFIG_LOW);
dword &= ~(DCL_DimmEccEn);
dword |= ecc_bit;
pci_conf1_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dword);
//Clear wrap32dis
clear_wrap32dis();
//restore SSE2 setting
disable_sse2();
printk(BIOS_DEBUG, "TrainDQSRdWrPos: 0x%x %d\n", 5, 0);
return Errors;
}
static inline u8 get_dqs_delay(unsigned channel, unsigned bytelane, unsigned direction, u8 *dqs_delay_a)
{
return dqs_delay_a[channel * 2*9 + direction * 9 + bytelane];
}
static unsigned CalcEccDQSPos(unsigned channel,unsigned ByteLane0, unsigned ByteLane1, unsigned InterFactor, unsigned Direction, u8 *dqs_delay_a)
/* InterFactor: 0: 100% ByteLane 0
0x80: 50% between ByteLane 0 and 1
0xff: 99.6% ByteLane 1 and 0.4% like 0
*/
{
unsigned DQSDelay0, DQSDelay1;
unsigned DQSDelay;
DQSDelay0 = get_dqs_delay(channel, ByteLane0, Direction, dqs_delay_a);
DQSDelay1 = get_dqs_delay(channel, ByteLane1, Direction, dqs_delay_a);
if(DQSDelay0>DQSDelay1) {
DQSDelay = DQSDelay0 - DQSDelay1;
InterFactor = 0xff - InterFactor;
}
else {
DQSDelay = DQSDelay1 - DQSDelay0;
}
DQSDelay *= InterFactor;
DQSDelay >>= 8; // /255
if(DQSDelay0>DQSDelay1) {
DQSDelay += DQSDelay1;
}
else {
DQSDelay += DQSDelay0;
}
return DQSDelay;
}
static void SetEccDQSRdWrPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
{
unsigned channel;
unsigned ByteLane;
unsigned Direction;
unsigned lane0, lane1, ratio;
unsigned dqs_delay;
unsigned direction[] = { DQS_READDIR, DQS_WRITEDIR };
int i;
u8 *dqs_delay_a = &sysinfo->dqs_delay_a[ctrl->node_id * 2*2*9]; //channel 2, direction 2 , bytelane *9
ByteLane = 8;
for(channel = 0; channel < 2; channel++) {
for(i=0;i<2;i++) {
Direction = direction[i];
lane0 = 4; lane1 = 5; ratio = 0;
dqs_delay = CalcEccDQSPos(channel, lane0, lane1, ratio, Direction, dqs_delay_a);
print_debug_dqs_pair("\t\tSetEccDQSRdWrPos: channel ", channel, Direction==DQS_READDIR? " R dqs_delay":" W dqs_delay", dqs_delay, 2);
SetDQSDelayCSR(ctrl, channel, ByteLane, Direction, dqs_delay);
save_dqs_delay(channel, ByteLane, Direction, dqs_delay_a, dqs_delay);
}
}
}
static unsigned train_DqsRcvrEn(const struct mem_controller *ctrl, unsigned Pass, struct sys_info *sysinfo)
{
printk(BIOS_DEBUG, "\ntrain_DqsRcvrEn: begin ctrl 0x%x %d\n", ctrl->node_id, 0);
if(TrainRcvrEn(ctrl, Pass, sysinfo)) {
return 1;
}
printk(BIOS_DEBUG, "\ntrain_DqsRcvrEn: end ctrl 0x%x %d\n", ctrl->node_id, 0);
return 0;
}
static unsigned train_DqsPos(const struct mem_controller *ctrl, struct sys_info *sysinfo)
{
printk(BIOS_DEBUG, "\ntrain_DqsPos: begin ctrl %d\n", ctrl->node_id);
if(TrainDQSRdWrPos(ctrl, sysinfo) != 0) {
printk(BIOS_ERR, "\nDQS Training Rd Wr failed ctrl %d\n", ctrl->node_id);
return 1;
}
else {
SetEccDQSRdWrPos(ctrl, sysinfo);
}
printk(BIOS_DEBUG, "\ntrain_DqsPos: end ctrl 0x%x %d\n", ctrl->node_id, 0);
return 0;
}
#ifdef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
static void f0_svm_workaround(int controllers, const struct mem_controller *ctrl, u64 *tsc0, struct sys_info *sysinfo)
{
u64 tsc1[8];
unsigned cpu_f0_f1[8];
int i;
printk(BIOS_DEBUG, "dqs_timing: tsc1[8] :0x%llx", tsc1);
for(i = 0; i < controllers; i++) {
if (!sysinfo->ctrl_present[i])
continue;
/* Skip everything if I don't have any memory on this controller */
if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
u32 dword;
cpu_f0_f1[i] = is_cpu_pre_f2_in_bsp(i);
if(!cpu_f0_f1[i]) continue;
dword = pci_conf1_read_config32(ctrl[i].f2, DRAM_CTRL);
dword &= ~DC_DqsRcvEnTrain;
pci_conf1_write_config32(ctrl[i].f2, DRAM_CTRL, dword);
dword = pci_conf1_read_config32(ctrl[i].f2, DRAM_INIT);
dword |= DI_EnDramInit;
pci_conf1_write_config32(ctrl[i].f2, DRAM_INIT, dword);
dword &= ~DI_EnDramInit;
pci_conf1_write_config32(ctrl[i].f2, DRAM_INIT, dword);
tsc1[i] = cycles();
print_debug_dqs_tsc("begin: tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
dword = tsc1[i].lo + tsc0[i].lo;
if((dword<tsc1[i].lo) || (dword<tsc0[i].lo)) {
tsc1[i].hi++;
}
tsc1[i].lo = dword;
tsc1[i].hi+= tsc0[i].hi;
print_debug_dqs_tsc("end : tsc1", i, tsc1[i].hi, tsc1[i].lo, 2);
}
for(i = 0; i < controllers; i++) {
if (!sysinfo->ctrl_present[i])
continue;
/* Skip everything if I don't have any memory on this controller */
if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
if(!cpu_f0_f1[i]) continue;
u64 tsc;
do {
tsc = cycles();
} while ((tsc1[i].hi>tsc.hi) || ((tsc1[i].hi==tsc.hi) && (tsc1[i].lo>tsc.lo)));
print_debug_dqs_tsc("end : tsc ", i, tsc.hi, tsc.lo, 2);
}
}
#endif
/* setting variable mtrr, comes from linux kernel source */
static void set_var_mtrr_dqs(
unsigned int reg, unsigned long basek, unsigned long sizek,
unsigned char type, unsigned address_bits)
{
struct msr base, mask;
unsigned address_mask_high;
address_mask_high = ((1u << (address_bits - 32u)) - 1u);
base.hi = basek >> 22;
base.lo = basek << 10;
if (sizek < 4*1024*1024) {
mask.hi = address_mask_high;
mask.lo = ~((sizek << 10) -1);
}
else {
mask.hi = address_mask_high & (~((sizek >> 22) -1));
mask.lo = 0;
}
if (reg >= 8)
return;
if (sizek == 0) {
struct msr zero;
zero.lo = zero.hi = 0;
/* The invalid bit is kept in the mask, so we simply clear the
relevant mask register to disable a range. */
wrmsr (MTRRphysMask_MSR(reg), zero);
} else {
/* Bit 32-35 of MTRRphysMask should be set to 1 */
base.lo |= type;
mask.lo |= 0x800;
wrmsr (MTRRphysBase_MSR(reg), base);
wrmsr (MTRRphysMask_MSR(reg), mask);
}
}
/* fms: find most sigificant bit set, stolen from Linux Kernel Source. */
static inline unsigned int fms(unsigned int x)
{
int r;
__asm__("bsrl %1,%0\n\t"
"jnz 1f\n\t"
"movl $0,%0\n"
"1:" : "=r" (r) : "g" (x));
return r;
}
/* fms: find least sigificant bit set */
static inline unsigned int fls(unsigned int x)
{
int r;
__asm__("bsfl %1,%0\n\t"
"jnz 1f\n\t"
"movl $32,%0\n"
"1:" : "=r" (r) : "g" (x));
return r;
}
static unsigned int range_to_mtrr(unsigned int reg,
unsigned long range_startk, unsigned long range_sizek,
unsigned long next_range_startk, unsigned char type, unsigned address_bits)
{
if (!range_sizek || (reg >= 8)) {
return reg;
}
while(range_sizek) {
unsigned long max_align, align;
unsigned long sizek;
/* Compute the maximum size I can make a range */
max_align = fls(range_startk);
align = fms(range_sizek);
if (align > max_align) {
align = max_align;
}
sizek = 1 << align;
#if MEM_TRAIN_SEQ != 1
printk(BIOS_DEBUG, "Setting variable MTRR %d, base: %4ldMB, range: %4ldMB, type %s\n",
reg, range_startk >>10, sizek >> 10,
(type==MTRR_TYPE_UNCACHEABLE)?"UC":
((type==MTRR_TYPE_WRBACK)?"WB":"Other")
);
#endif
set_var_mtrr_dqs(reg++, range_startk, sizek, type, address_bits);
range_startk += sizek;
range_sizek -= sizek;
if (reg >= 8)
break;
}
return reg;
}
void set_top_mem_ap(unsigned tom_k, unsigned tom2_k)
{
struct msr msr;
/* Now set top of memory */
msr.lo = (tom2_k & 0x003fffff) << 10;
msr.hi = (tom2_k & 0xffc00000) >> 22;
wrmsr(TOP_MEM2, msr);
msr.lo = (tom_k & 0x003fffff) << 10;
msr.hi = (tom_k & 0xffc00000) >> 22;
wrmsr(TOP_MEM, msr);
}
static void setup_mtrr_dqs(unsigned tom_k, unsigned tom2_k){
unsigned reg;
struct msr msr;
#if 0
//still enable from cache_as_ram.inc
msr = rdmsr(SYSCFG_MSR);
msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
wrmsr(SYSCFG_MSR,msr);
#endif
//[0,512k), [512k, 640k)
msr.hi = 0x1e1e1e1e;
msr.lo = msr.hi;
wrmsr(0x250, msr);
wrmsr(0x258, msr);
//[1M, TOM)
reg = range_to_mtrr(2, 0, tom_k,4*1024*1024, MTRR_TYPE_WRBACK, 40);
//[4G, TOM2)
if(tom2_k) {
//enable tom2 and type
msr = rdmsr(SYSCFG_MSR);
msr.lo |= (1<<21) | (1<<22); //MtrrTom2En and Tom2ForceMemTypeWB
wrmsr(SYSCFG_MSR, msr);
}
}
static void clear_mtrr_dqs(unsigned tom2_k){
struct msr msr;
unsigned i;
//still enable from cache_as_ram.inc
msr = rdmsr(SYSCFG_MSR);
msr.lo |= SYSCFG_MSR_MtrrFixDramModEn;
wrmsr(SYSCFG_MSR,msr);
//[0,512k), [512k, 640k)
msr.hi = 0;
msr.lo = msr.hi;
wrmsr(0x250, msr);
wrmsr(0x258, msr);
//[1M, TOM)
for(i=0x204;i<0x210;i++) {
wrmsr(i, msr);
}
//[4G, TOM2)
if(tom2_k) {
//enable tom2 and type
msr = rdmsr(SYSCFG_MSR);
msr.lo &= ~((1<<21) | (1<<22)); //MtrrTom2En and Tom2ForceMemTypeWB
wrmsr(SYSCFG_MSR, msr);
}
}
static void set_htic_bit(unsigned i, unsigned val, unsigned bit)
{
u32 dword;
dword = pci_conf1_read_config32(PCI_BDF(0, 0x18+i, 0), HT_INIT_CONTROL);
dword &= ~(1<<bit);
dword |= ((val & 1) <<bit);
pci_conf1_write_config32(PCI_BDF(0, 0x18+i, 0), HT_INIT_CONTROL, dword);
}
static unsigned get_htic_bit(unsigned i, unsigned bit)
{
u32 dword;
dword = pci_conf1_read_config32(PCI_BDF(0, 0x18+i, 0), HT_INIT_CONTROL);
dword &= (1<<bit);
return dword;
}
void wait_till_sysinfo_in_ram(void)
{
while(1) {
if(get_htic_bit(0, 9)) return;
}
}
void set_sysinfo_in_ram(unsigned val)
{
set_htic_bit(0, val, 9);
}
#if MEM_TRAIN_SEQ == 0
#ifdef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
static void dqs_timing(int controllers, const struct mem_controller *ctrl, u64 *tsc0, struct sys_info *sysinfo)
#else
void dqs_timing(int controllers, const struct mem_controller *ctrl, struct sys_info *sysinfo)
#endif
{
int i;
u64 tsc[5];
//need to enable mtrr, so dqs training could access the test address
setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
for(i = 0; i < controllers; i++) {
if (!sysinfo->ctrl_present[ i ])
continue;
/* Skip everything if I don't have any memory on this controller */
if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
fill_mem_cs_sysinfo(i, ctrl+i, sysinfo);
}
tsc[0] = cycles();
for(i = 0; i < controllers; i++) {
if (!sysinfo->ctrl_present[ i ])
continue;
/* Skip everything if I don't have any memory on this controller */
if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
printk(BIOS_DEBUG, "DQS Training:RcvrEn:Pass1: %d", i);
if(train_DqsRcvrEn(ctrl+i, 1, sysinfo)) goto out;
printk(BIOS_DEBUG, " done\n");
}
tsc[1] = cycles();
#ifdef K8_REV_F_SUPPORT_F0_F1_WORKAROUND
f0_svm_workaround(controllers, ctrl, tsc0, sysinfo);
#endif
tsc[2] = cycles();
for(i = 0; i < controllers; i++) {
if (!sysinfo->ctrl_present[i])
continue;
/* Skip everything if I don't have any memory on this controller */
if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
printk(BIOS_DEBUG, "DQS Training:DQSPos: %d", i);
if(train_DqsPos(ctrl+i, sysinfo)) goto out;
printk(BIOS_DEBUG, " done\n");
}
tsc[3] = cycles();
for(i = 0; i < controllers; i++) {
if (!sysinfo->ctrl_present[i])
continue;
/* Skip everything if I don't have any memory on this controller */
if(sysinfo->meminfo[i].dimm_mask==0x00) continue;
printk(BIOS_DEBUG, "DQS Training:RcvrEn:Pass2: %d", i);
if(train_DqsRcvrEn(ctrl+i, 2, sysinfo)) goto out;
printk(BIOS_DEBUG, " done\n");
sysinfo->mem_trained[i]=1;
}
out:
tsc[4] = cycles();
clear_mtrr_dqs(sysinfo->tom2_k);
for(i=0;i<5;i++) {
// print_debug_dqs_tsc_x("DQS Training:tsc", i, tsc[i].hi, tsc[i].lo);
}
}
#endif
#if MEM_TRAIN_SEQ > 0
static void dqs_timing(int i, const struct mem_controller *ctrl, struct sys_info *sysinfo, unsigned int v)
{
int ii;
u64 tsc[4];
if(sysinfo->mem_trained[i] != 0x80) return;
#if MEM_TRAIN_SEQ == 1
//need to enable mtrr, so dqs training could access the test address
setup_mtrr_dqs(sysinfo->tom_k, sysinfo->tom2_k);
#endif
fill_mem_cs_sysinfo(i, ctrl, sysinfo);
if(v) {
tsc[0] = cycles();
printk(BIOS_DEBUG, "set DQS timing:RcvrEn:Pass1: 0x%x\n", i);
}
if(train_DqsRcvrEn(ctrl, 1, sysinfo)) {
sysinfo->mem_trained[i]=0x81; //
goto out;
}
if(v) {
printk(BIOS_DEBUG, " done\n");
tsc[1] = cycles();
printk(BIOS_DEBUG, "set DQS timing:DQSPos: ");
print_debug_hex8(i);
}
if(train_DqsPos(ctrl, sysinfo)) {
sysinfo->mem_trained[i]=0x82; //
goto out;
}
if(v) {
printk(BIOS_DEBUG, " done\n");
tsc[2] = cycles();
printk(BIOS_DEBUG, "set DQS timing:RcvrEn:Pass2: ");
print_debug_hex8(i);
}
if(train_DqsRcvrEn(ctrl, 2, sysinfo)){
sysinfo->mem_trained[i]=0x83; //
goto out;
}
if(v) {
printk(BIOS_DEBUG, " done\n");
tsc[3] = cycles();
}
out:
#if MEM_TRAIN_SEQ == 1
clear_mtrr_dqs(sysinfo->tom2_k);
#endif
if(v) {
for(ii=0;ii<4;ii++) {
print_debug_dqs_tsc_x("Total DQS Training : tsc ", ii, tsc[ii].hi, tsc[ii].lo);
}
}
if(sysinfo->mem_trained[i] == 0x80) {
sysinfo->mem_trained[i]=1;
}
}
#endif
#if MEM_TRAIN_SEQ == 1
static void train_ram(unsigned nodeid, struct sys_info *sysinfo, struct sys_info *sysinfox)
{
dqs_timing(nodeid, &sysinfo->ctrl[nodeid], sysinfo, 0); // keep the output tidy
// memcpy(&sysinfox->dqs_rcvr_dly_a[nodeid * 2 * 8],&sysinfo->dqs_rcvr_dly_a[nodeid * 2 * 8], 2*8);
// memcpy(&sysinfox->dqs_delay_a[nodeid * 2 * 2 * 9], &sysinfo->dqs_delay_a[nodeid * 2 * 2 * 9], 2 * 2 * 9);
sysinfox->mem_trained[nodeid] = sysinfo->mem_trained[nodeid];
}
static void copy_and_run_ap_code_in_car(unsigned ret_addr);
static inline void train_ram_on_node(unsigned nodeid, unsigned coreid, struct sys_info *sysinfo, unsigned retcall)
{
if(coreid) return; // only do it on core0
struct sys_info *sysinfox = ((CONFIG_LB_MEM_TOPK<<10) - DCACHE_RAM_GLOBAL_VAR_SIZE);
wait_till_sysinfo_in_ram(); // use pci to get it
if(sysinfox->mem_trained[nodeid] == 0x80) {
#if 0
sysinfo->tom_k = sysinfox->tom_k;
sysinfo->tom2_k = sysinfox->tom2_k;
sysinfo->meminfo[nodeid].is_Width128 = sysinfox->meminfo[nodeid].is_Width128;
sysinfo->mem_trained[nodeid] = sysinfox->mem_trained[nodeid];
memcpy(&sysinfo->ctrl[nodeid], &sysinfox->ctrl[nodeid], sizeof(struct mem_controller));
#else
memcpy(sysinfo, sysinfox, DCACHE_RAM_GLOBAL_VAR_SIZE);
#endif
set_top_mem_ap(sysinfo->tom_k, sysinfo->tom2_k); // keep the ap's tom consistent with bsp's
#if CONFIG_AP_CODE_IN_CAR == 0
printk(BIOS_DEBUG, "CODE IN ROM AND RUN ON NODE:"); print_debug_hex8(nodeid); printk(BIOS_DEBUG, "\n");
train_ram(nodeid, sysinfo, sysinfox);
#else
/* Can copy dqs_timing to ap cache and run from cache?
* we need coreboot_ap_car.rom? and treat it as coreboot_ram.rom for ap ?
*/
copy_and_run_ap_code_in_car(retcall);
// will go back by jump
#endif
}
}
#endif